annotate bin/get_bed_fa_j.pl @ 5:2ebca9da5e42 draft default tip

planemo upload
author bioitcore
date Thu, 07 Sep 2017 17:39:24 -0400
parents adc0f7765d85
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
1 # Adapted from Chenghai Xue's script
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
2
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
3 $starttime=time();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
4
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
5 $input_file_1 = $ARGV[0]; # exon junction file
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
6 $input_file_2 = $ARGV[1]; # genome file list
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
7 $output_file_1 = $ARGV[2]; # exon junction bed (might be less than input_file_1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
8 $output_file_2 = $ARGV[3]; # exon junction fa
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
9 #$leftLen = $ARGV[4];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
10 #$rightLen = $ARGV[5];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
11
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
12 open(IN_1, "$input_file_1") or die "can't open the input file : $!";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
13 open(IN_2, "$input_file_2") or die "can't open the input file : $!";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
14 open OUT_1, ">$output_file_1" or die "Can not open output_file : $!";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
15 open OUT_2, ">$output_file_2" or die "Can not open output_file : $!";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
16
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
17 @chromList = (<IN_2>);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
18 chomp(@chromList);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
19 $len_chromList = @chromList;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
20 print "BED2FA: in $input_file_2, found $len_chromList chromosomes\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
21 foreach $one (@chromList){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
22 if($one =~ /\/(chr.[^\/]*?)\.*fa$/i){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
23 $chr_hash{$1} = $one;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
24 #print $1,"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
25 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
26 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
27 @key_chr_hash = keys(%chr_hash);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
28 $len_key_chr_hash = @key_chr_hash;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
29 @sort_key_chr_hash = sort_chromNo(@key_chr_hash);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
30 $len_sort_key_chr_hash = @sort_key_chr_hash;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
31 #for($i=0; $i<$len_sort_key_chr_hash; $i++){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
32 # print "$sort_key_chr_hash[$i] $chr_hash{$sort_key_chr_hash[$i]}\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
33 #}
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
34
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
35 $num_1=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
36 $num_2=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
37 $num_count_chrom=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
38 my ($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
39 $current_chrom = "";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
40 while(<IN_1>){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
41 $num_1++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
42 $line = $_;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
43 chomp $line;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
44 #print $line,"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
45 @cols = split ("\t", $line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
46 if(scalar(@cols)==12)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
47 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
48 ($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts) = @cols;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
49 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
50 if(scalar(@cols)!=12)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
51 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
52 ($chrom, $chromStart, $chromEnd, $name, $score, $strand)=@cols;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
53 $thickStart=$chromStart;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
54 # print $thickStart,"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
55 $thickEnd = $chromEnd;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
56 $blockCount=1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
57 $blockSizes=$chromEnd-$chromStart;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
58 $blockStarts = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
59 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
60 $strand="+" if !$strand;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
61 @a_blockSizes = split (/\,/, $blockSizes);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
62 @a_blockStarts = split (/\,/, $blockStarts);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
63 if($chrom ne $current_chrom){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
64 if($num_1 != 1){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
65 print "$num_chr_1 $num_chr_2 $len_contigSeqStr\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
66 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
67 print "BED2FA: $chrom: ";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
68
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
69 $num_chr_1=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
70 $num_chr_2=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
71
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
72 if(exists $chr_hash{$chrom}){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
73 $num_count_chrom++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
74 $current_chrom = $chrom;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
75 #print $current_chrom,"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
76 #=pod
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
77 $chromFastaFile = $chr_hash{$chrom};
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
78 #print $chromFastaFile,"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
79 open($fin, "<$chromFastaFile") or die "can't open the chrom file : $!";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
80 local ($/) = undef;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
81 $contigSeqStr = <$fin>;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
82 close ($fin);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
83 #print $contigSeqStr,"mark\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
84 $contigSeqStr =~s/^\>.*?\n//g;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
85 #print $contigSeqStr,"mark2\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
86
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
87 $contigSeqStr =~s/\s|\n//g;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
88 #print $contigSeqStr,"mark3\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
89
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
90 $len_contigSeqStr = length $contigSeqStr;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
91 #=cut
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
92 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
93 else{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
94 $num_chr_1++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
95 next;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
96 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
97 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
98 $num_chr_1++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
99
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
100 # modify from here................................
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
101 my @Starts;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
102 my @Ends;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
103 my @JuncSeq;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
104 my $ssStrTag=1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
105 for($i_wuj=0;$i_wuj<$blockCount;$i_wuj++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
106 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
107 $Starts[$i_wuj] = $chromStart + $a_blockStarts[$i_wuj];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
108 $Ends[$i_wuj] = $Starts[$i_wuj] + $a_blockSizes[$i_wuj];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
109 $JuncSeq[$i_wuj] = uc substr ($contigSeqStr,$Starts[$i_wuj], $a_blockSizes[$i_wuj]);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
110 if($strand eq "-"){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
111 $JuncSeq[$i_wuj] = uc string_reverse_complement(lc $JuncSeq[$i_wuj]);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
112 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
113 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
114 # for($i_wuj=0;$i_wuj<$blockCount-1;$i_wuj++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
115 # {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
116 # $ssStr = uc substr ($contigSeqStr, $Ends[$i_wuj], 2) . substr ($contigSeqStr, $Starts[$i_wuj+1] - 2, 2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
117 # if($strand eq "-"){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
118 # $ssStr = uc string_reverse_complement(lc $ssStr);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
119 #$ssStr = $rc_ssStr;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
120 # }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
121 # $ssStrTag = 0 if ($ssStr ne "GTAG");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
122
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
123 # }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
124 # if($ssStrTag ==1){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
125 if(1){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
126 $num_2++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
127 $num_chr_2++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
128 print OUT_1 "$line\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
129 #print OUT_2 ">$name\|$chrom\|$chromStart\|$chromEnd\|$strand\|$ssStr\|$num_2\n$junctionSeqStrLeft$junctionSeqStrRight\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
130 # print OUT_2 ">$name\|$chrom\|$chromStart\|$chromEnd\|$strand\|GTAG\|$num_2\|$blockCount\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
131 print OUT_2 ">$name\|$chrom\|$chromStart\|$chromEnd\|$strand\|$num_2\|$blockCount\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
132 if($strand eq "+")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
133 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
134 for($i_wuj=0;$i_wuj<$blockCount;$i_wuj++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
135 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
136 print OUT_2 $JuncSeq[$i_wuj];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
137 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
138 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
139 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
140 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
141 for($i_wuj=$blockCount-1;$i_wuj>-1;$i_wuj--)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
142 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
143 print OUT_2 $JuncSeq[$i_wuj];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
144 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
145
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
146 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
147 print OUT_2 "\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
148 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
149
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
150 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
151 print "$num_chr_1 $num_chr_2 $len_contigSeqStr\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
152 print "BED2FA: in file1, $num_count_chrom chroms, $num_1 beds, $num_2 saved.\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
153
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
154 close IN_1 or die "can't close the input file : $!";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
155 close IN_2 or die "can't close the input file : $!";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
156 close OUT_1 or die "can't close the output file : $!";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
157 close OUT_2 or die "can't close the output file : $!";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
158
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
159 #######################################
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
160 $complete_time = time()-$starttime;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
161 print "BED2FA: Run $complete_time seconds...Done!\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
162
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
163 #######################################
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
164 # sub fuctions
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
165 sub string_reverse_complement{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
166 local($string) = @_;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
167 local($len_str, $ret, $i, $char);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
168
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
169 $len_str = length $string;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
170 $ret = "";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
171 for($i=0; $i<$len_str; $i++){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
172 $char = substr($string, $i, 1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
173 if($char eq 'a'){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
174 $char = 't';
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
175 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
176 elsif($char eq 't'){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
177 $char = 'a';
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
178 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
179 elsif($char eq 'c'){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
180 $char = 'g';
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
181 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
182 elsif($char eq 'g'){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
183 $char = 'c';
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
184 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
185 else{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
186 $char = 'n';
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
187 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
188 $ret = $char.$ret;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
189 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
190
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
191 return $ret;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
192 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
193
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
194 sub sort_chromNo{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
195 local(@chrom) = @_;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
196 local($len_key_chr_hash, $i, @sort_chr_hash);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
197 local(@digit_random, @words_random, @digit_other_1, @digit_other_2, @words_other_1, @words_other_2, @digit, @words);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
198 local(@sort_digit, @sort_words, @sort_digit_random, @sort_words_random, @sort_digit_other, @sort_words_other);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
199 local($len_digit, $len_words, $len_digit_random, $len_words_random, $len_digit_other, $len_words_other, $term);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
200
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
201 $len_key_chr_hash = @chrom;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
202 # sort via chr number for printing result
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
203 for($i=0; $i<$len_key_chr_hash; $i++){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
204 if($key_chr_hash[$i] =~ /chr(\d+)\_random/){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
205 push(@digit_random, $1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
206 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
207 elsif($key_chr_hash[$i] =~ /chr(\w+)\_random/){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
208 push(@words_random, $1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
209 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
210 elsif($key_chr_hash[$i] =~ /chr(\d+)\_([\w\d\_]+)/){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
211 push(@digit_other_1, $1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
212 push(@digit_other_2, $2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
213 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
214 elsif($key_chr_hash[$i] =~ /chr(\w+)\_([\w\d\_]+)/){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
215 push(@words_other_1, $1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
216 push(@words_other_2, $2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
217 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
218 elsif($key_chr_hash[$i] =~ /chr(\d+)/){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
219 push(@digit, $1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
220 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
221 elsif($key_chr_hash[$i] =~ /chr(\w+)/){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
222 push(@words, $1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
223 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
224 else{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
225 print "BED2FA: There is unknown type of chromosomes: $key_chr_hash[$i]\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
226 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
227 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
228 @sort_digit = sort by_mostly_numeric @digit;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
229 @sort_words = sort by_mostly_string @words;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
230 @sort_digit_random = sort by_mostly_numeric @digit_random;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
231 @sort_words_random = sort by_mostly_string @words_random;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
232 @sort_digit_other = sort_2_array_number_string(\@digit_other_1, \@digit_other_2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
233 @sort_words_other = sort_2_array_string_string(\@words_other_1, \@words_other_2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
234
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
235 $len_digit = @sort_digit;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
236 for($i=0; $i<$len_digit; $i++){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
237 $term = "chr".$sort_digit[$i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
238 push(@sort_chr_hash, $term);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
239 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
240 $len_words = @sort_words;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
241 for($i=0; $i<$len_words; $i++){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
242 $term = "chr".$sort_words[$i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
243 push(@sort_chr_hash, $term);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
244 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
245 $len_digit_random = @sort_digit_random;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
246 for($i=0; $i<$len_digit_random; $i++){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
247 $term = "chr".$sort_digit_random[$i]."_random";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
248 push(@sort_chr_hash, $term);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
249 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
250 $len_words_random = @sort_words_random;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
251 for($i=0; $i<$len_words_random; $i++){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
252 $term = "chr".$sort_words_random[$i]."_random";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
253 push(@sort_chr_hash, $term);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
254 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
255 $len_digit_other = @sort_digit_other;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
256 for($i=0; $i<$len_digit_other; $i=$i+2){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
257 $term = "chr".$sort_digit_other[$i]."_".$sort_digit_other[$i+1];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
258 push(@sort_chr_hash, $term);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
259 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
260 $len_words_other = @sort_words_other;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
261 for($i=0; $i<$len_words_other; $i=$i+2){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
262 $term = "chr".$sort_words_other[$i]."_".$sort_words_other[$i+1];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
263 push(@sort_chr_hash, $term);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
264 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
265
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
266 return @sort_chr_hash;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
267 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
268
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
269 sub sort_2_array_number_string{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
270 local($a, $b) = @_;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
271 local($len_a, $len_b, $i, %family, $one, $two);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
272 local(@ret);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
273
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
274 $len_a = @$a;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
275 $len_b = @$b;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
276 if($len_a == $len_b){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
277 for($i=0; $i<$len_a; $i++){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
278 $family{$$a[$i]}{$$b[$i]} = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
279 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
280 for $one (sort by_mostly_numeric keys %family) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
281 for $two (sort by_mostly_string keys %{ $family{$one} }) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
282 push(@ret, $one);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
283 push(@ret, $two);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
284 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
285 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
286 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
287 else{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
288 print "ERROR: Sort array is not same size\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
289 print "a $len_a, b $len_b\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
290 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
291
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
292 return @ret;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
293 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
294
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
295 sub sort_2_array_string_string{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
296 local($a, $b) = @_;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
297 local($len_a, $len_b, $i, %family, $one, $two);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
298 local(@ret);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
299
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
300 $len_a = @$a;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
301 $len_b = @$b;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
302 if($len_a == $len_b){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
303 for($i=0; $i<$len_a; $i++){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
304 $family{$$a[$i]}{$$b[$i]} = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
305 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
306 for $one (sort by_mostly_string keys %family) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
307 for $two (sort by_mostly_string keys %{ $family{$one} }) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
308 push(@ret, $one);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
309 push(@ret, $two);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
310 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
311 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
312 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
313 else{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
314 print "ERROR: Sort array is not same size\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
315 print "a $len_a, b $len_b\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
316 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
317
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
318 return @ret;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
319 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
320
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
321 sub by_mostly_numeric{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
322 # ( $a <=> $b ) || ( $a cmp $b );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
323 ( $a <=> $b );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
324 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
325
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
326 sub by_mostly_string{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
327 # ( $a <=> $b ) || ( $a cmp $b );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
328 ( $a cmp $b );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
329 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
330