comparison PanExplorer_workflow/Perl/ConvertOrthofinderMatrix.pl @ 1:032f6b3806a3 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:16:08 +0000
parents
children
comparison
equal deleted inserted replaced
0:3cbb01081cde 1:032f6b3806a3
1 #!/usr/bin/perl
2
3 use strict;
4
5 my $indir = $ARGV[0];
6 my $matrix = $ARGV[1];
7 my $out = $ARGV[2];
8 my $strain_names = $ARGV[3];
9
10 my %strains_of_gb;
11 open(F,$strain_names);
12 while(<F>){
13 my $line = $_;
14 $line =~s/\n//g;$line =~s/\r//g;
15 my ($gb,$strain) = split(/\t/,$line);
16 $strains_of_gb{$gb} = $strain;
17 }
18 close(F);
19
20
21
22 my $cl_num = 0;
23 my $nb_strains = 1;
24 open(O,">$out");
25 open(U,">$out.upsetr.txt");
26 open(M,">$out.accessory_01matrix.txt");
27 open(F,$matrix);
28 my $firstline = <F>;
29 $firstline =~s/\n//g;$firstline =~s/\r//g;
30 my @infos = split(/\t/,$firstline);
31 print O "ClutserID";
32 print U "ClutserID";
33 print M "Gene";
34 for (my $j=1; $j <= $#infos; $j++){
35 my $gbfile = $infos[$j];
36 $gbfile =~s/\"//g;
37 $gbfile =~s/\.gb\.filt//g;
38 $gbfile =~s/\.gb\.rmdup//g;
39 my $strain = $strains_of_gb{$gbfile};
40 print O "\t".$strain;
41 print U "\t".$strain;
42 print M "\t".$strain;
43 $nb_strains++;
44 }
45 print O "\n";
46 print U "\n";
47 print M "\n";
48 while(<F>){
49 $cl_num++;
50 my @infos = split(/\t/,$_);
51 print O $cl_num;
52 print U $cl_num;
53 my $concat_accessory = "";
54 for (my $i = 1; $i <= $#infos; $i++){
55 my $val = $infos[$i];
56 $val =~s/\"//g;
57 $val =~s/\n//g;$val =~s/\r//g;
58 if ($val =~/\w+/){
59 print U "\t1";
60 $concat_accessory .= "\t1";
61 }
62 else{
63 print U "\t0";
64 $concat_accessory .= "\t0";
65 }
66 my @genes = split(/, /,$val);
67 my $concat = "";
68 foreach my $gene(@genes){
69 my $prot_id = $gene;
70 $concat .= "$prot_id,"
71 }
72 chop($concat);
73 if (scalar @genes == 0){
74 $concat = "-";
75 }
76 print O "\t".$concat;
77 }
78 if ($concat_accessory =~/0/){
79 print M $cl_num.$concat_accessory."\n";
80 }
81 print O "\n";
82 print U "\n";
83 }
84 close(F);
85 close(O);
86 close(U);
87 close(M);