Mercurial > repos > big-tiandm > sirna_plant
comparison siRNA.pl @ 5:f466394ee1fd draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 23 Oct 2014 22:08:09 -0400 |
parents | 07745c0958dd |
children |
comparison
equal
deleted
inserted
replaced
4:75180ba26dc1 | 5:f466394ee1fd |
---|---|
10 use lib '/leofs/biotrans/chentt/perl_module/'; | 10 use lib '/leofs/biotrans/chentt/perl_module/'; |
11 #perl ../siRNA.pl -i config -g /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome.fa -f /share_bio/hs4/disk3-4/Reference/Plants/Rice_TIGR/Reference/TIGR/version_6.1/all.dir/all.gff3 -path /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/ -o /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test -t 3 -rfam /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/Rfam.fasta -idx /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome -idx2 /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/rfam -deg deg -n 25 -nat class/nat_1 -repeat class/repeat_1 -cen centromere_TIGR.txt -format fastq | 11 #perl ../siRNA.pl -i config -g /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome.fa -f /share_bio/hs4/disk3-4/Reference/Plants/Rice_TIGR/Reference/TIGR/version_6.1/all.dir/all.gff3 -path /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/ -o /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test -t 3 -rfam /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/Rfam.fasta -idx /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome -idx2 /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/rfam -deg deg -n 25 -nat class/nat_1 -repeat class/repeat_1 -cen centromere_TIGR.txt -format fastq |
12 print " | 12 print " |
13 ##################################### | 13 ##################################### |
14 # # | 14 # # |
15 # sRNA cluster # | 15 # sRNA cluster # |
16 # # | 16 # # |
17 ##################################### | 17 ##################################### |
18 "; | 18 "; |
19 ########################################################################################### | 19 ########################################################################################### |
20 my $usage="$0 | 20 my $usage="$0 |
55 -span plot span, default 50000 | 55 -span plot span, default 50000 |
56 "; | 56 "; |
57 | 57 |
58 my %options; | 58 my %options; |
59 GetOptions(\%options,"i:s@","tag:s@","g=s","f=s","o=s","a:s","path:s","p=s","format=s","nat:s","repeat:s","deg:s","n:i","mis:i","rfam:s","t:i","v:i","d:i","l:i","idx:s","idx2:s","cen:s","span:s","h"); | 59 GetOptions(\%options,"i:s@","tag:s@","g=s","f=s","o=s","a:s","path:s","p=s","format=s","nat:s","repeat:s","deg:s","n:i","mis:i","rfam:s","t:i","v:i","d:i","l:i","idx:s","idx2:s","cen:s","span:s","h"); |
60 | 60 #print help if that option is used |
61 my @inputfiles=@{$opts{'i'}}; | 61 if($options{h}){die $usage;} |
62 my @inputtags=@{$opts{'tag'}}; | 62 |
63 my @filein=@{$options{'i'}}; | |
64 my @mark=@{$options{'tag'}}; | |
63 | 65 |
64 #my $config=$options{'i'}; | 66 #my $config=$options{'i'}; |
65 my $genome_fa=$options{'g'}; | 67 my $genome_fa=$options{'g'}; |
66 my $gff=$options{'f'}; | 68 my $gff=$options{'f'}; |
69 | |
70 | |
67 ########################################################################################## | 71 ########################################################################################## |
68 my $predir=`pwd`; | 72 my $predir=`pwd`; |
69 chomp $predir; | 73 chomp $predir; |
70 my $workdir=defined($options{'o'}) ? $options{'o'}:$predir; | 74 my $workdir=defined($options{'o'}) ? $options{'o'}:$predir; |
71 | 75 |
89 #if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { | 93 #if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { |
90 # die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; | 94 # die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; |
91 #} | 95 #} |
92 | 96 |
93 my $adpter="ATCTCGTATG"; #adapter | 97 my $adpter="ATCTCGTATG"; #adapter |
94 if (defined $opts{'a'}) {$a=$opts{'a'};} | 98 if (defined $options{'a'}) {$adpter=$options{'a'};} |
95 | 99 |
96 #print help if that option is used | |
97 if($options{h}){die $usage;} | |
98 | 100 |
99 my $phred_qv=64; | 101 my $phred_qv=64; |
100 my $sample_number; | 102 my $sample_number; |
101 my ($dir,$dir_tmp); | 103 my ($dir,$dir_tmp); |
102 ################################ MAIN ################################################## | 104 ################################ MAIN ################################################## |
103 print "\ncluster program start:"; | 105 print "\ncluster program start:"; |
104 my $time=Time(); | 106 my $time=Time(); |
105 make_dir_tmp(); | 107 make_dir_tmp(); |
106 | 108 |
107 my (@filein,@mark,@clip); | 109 my @clip; |
108 my $mark; | 110 my $mark; |
109 my $sample_mark; | 111 my $sample_mark; |
110 | 112 |
111 my $config=$workdir."/input_config"; | 113 my $config=$dir."/input_config"; |
112 open CONFIG,">$config"; | 114 open CONFIG,">$config"; |
113 for (my $i=0;$i<@inputfiles;$i++) { | 115 for (my $i=0;$i<@filein;$i++) { |
114 print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n"; | 116 print CONFIG $filein[$i],"\t",$mark[$i],"\n"; |
115 } | 117 } |
116 close CONFIG; | 118 close CONFIG; |
117 | 119 if (@filein != @mark) { |
118 read_config(); | 120 die "Maybe config file have some wrong!!!\n"; |
121 } | |
122 $sample_number=@mark; | |
123 $mark=join "\t",@mark; | |
124 $sample_mark=join "\#",@mark; | |
125 | |
126 | |
127 #read_config(); | |
119 | 128 |
120 trim_adapter_and_filter(); | 129 trim_adapter_and_filter(); |
121 | 130 |
122 my $filter_out=$dir."preProcess\/"."collapse_reads_out.fa";## raw clean data | 131 my $filter_out=$dir."preProcess\/"."collapse_reads_out.fa";## raw clean data |
123 my $data2=$filter_out; ### mirbase not mapped reads | 132 my $data2=$filter_out; ### mirbase not mapped reads |
148 | 157 |
149 quantify(); | 158 quantify(); |
150 | 159 |
151 phase(); | 160 phase(); |
152 | 161 |
153 class(); | 162 if (defined($options{'nat'})&&defined($options{'repeat'})) { |
163 class(); | |
164 } | |
165 else{ | |
166 get_genelist(); | |
167 } | |
154 | 168 |
155 annotate(); | 169 annotate(); |
156 | 170 |
157 genome_length(); | 171 genome_length(); |
158 | 172 |
178 $dir="$workdir\/cluster_runs_$time\/"; | 192 $dir="$workdir\/cluster_runs_$time\/"; |
179 print STDERR "mkdir $dir\n\n"; | 193 print STDERR "mkdir $dir\n\n"; |
180 return; | 194 return; |
181 } | 195 } |
182 | 196 |
183 sub read_config{ | 197 #sub read_config{ |
184 open IN,"<$config"; | 198 # open IN,"<$config"; |
185 while (my $aline=<IN>) { | 199 # while (my $aline=<IN>) { |
186 chomp $aline; | 200 # chomp $aline; |
187 my @tmp=split/\t/,$aline; | 201 # my @tmp=split/\t/,$aline; |
188 push @filein,$tmp[0]; | 202 # push @filein,$tmp[0]; |
189 push @mark,$tmp[1]; | 203 # push @mark,$tmp[1]; |
190 } | 204 # } |
191 close IN; | 205 # close IN; |
192 if (@filein != @mark) { | 206 # if (@filein != @mark) { |
193 die "Maybe config file have some wrong!!!\n"; | 207 # die "Maybe config file have some wrong!!!\n"; |
194 } | 208 # } |
195 $sample_number=@mark; | 209 # $sample_number=@mark; |
196 $mark=join "\t",@mark; | 210 # $mark=join "\t",@mark; |
197 $sample_mark=join "\#",@mark; | 211 # $sample_mark=join "\#",@mark; |
198 } | 212 #} |
199 | 213 |
200 | 214 |
201 sub trim_adapter_and_filter{ | 215 sub trim_adapter_and_filter{ |
202 my $time=time(); | 216 my $time=time(); |
203 $preprocess=$dir."preProcess/"; | 217 $preprocess=$dir."preProcess/"; |
233 | 247 |
234 sub clips{ | 248 sub clips{ |
235 my ($filein,$fileout)=@_; | 249 my ($filein,$fileout)=@_; |
236 my $adapter=$dir."preProcess\/$fileout\_clip\.fq"; | 250 my $adapter=$dir."preProcess\/$fileout\_clip\.fq"; |
237 if($format eq "fq" || $format eq "fastq"){ | 251 if($format eq "fq" || $format eq "fastq"){ |
238 my $clip=`$path\/fastx_clipper -a $adpter -M 6 -Q $phred_qv -i $filein -o $adapter`; | 252 my $clip=`fastx_clipper -a $adpter -M 6 -Q $phred_qv -i $filein -o $adapter`; |
239 } | 253 } |
240 if($format eq "fa" || $format eq "fasta"){ | 254 if($format eq "fa" || $format eq "fasta"){ |
241 my $clip=`$path\/fastx_clipper -a $adpter -M 6 -i $filein -o $adapter`; | 255 my $clip=`fastx_clipper -a $adpter -M 6 -i $filein -o $adapter`; |
242 } | 256 } |
243 #my $clean=$dir."preProcess\/$fileout\_clean.fq"; | 257 #my $clean=$dir."preProcess\/$fileout\_clean.fq"; |
244 #my $filter=`filterReadsByLength.pl -i $adapter -o $clean -min 18 -max 40 `; | 258 #my $filter=`filterReadsByLength.pl -i $adapter -o $clean -min 18 -max 40 `; |
245 return $fileout; | 259 return $fileout; |
246 } | 260 } |
350 print "perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out\n\n\n"; | 364 print "perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out\n\n\n"; |
351 return 0; | 365 return 0; |
352 } | 366 } |
353 | 367 |
354 sub class{ | 368 sub class{ |
355 print "clusters is ready to annotate by source\n\n"; | 369 print "clusters is ready to annotate by sources\n\n"; |
356 my $nat=$options{'nat'}; | 370 my $nat=$options{'nat'}; |
357 my $repeat=$options{'repeat'}; | 371 my $repeat=$options{'repeat'}; |
358 my $class=`perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt`; | 372 my $class=`perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt`; |
359 print "perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt\n\n"; | 373 print "perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt\n\n"; |
360 } | 374 } |
361 | 375 |
362 sub annotate{ | 376 sub annotate{ |
363 print "clusters is ready to annotate by gff file\n\n"; | 377 print "clusters is ready to annotate by gff file\n\n"; |
364 my $file="$annotate_dir\/sample_class.anno"; | 378 my $file; |
379 if (defined($options{'nat'})&&defined($options{'repeat'})) { | |
380 $file="$annotate_dir\/sample_class.anno"; | |
381 } | |
382 else{ | |
383 $file=$rpkm; | |
384 } | |
365 my $annotate=`perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno`; | 385 my $annotate=`perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno`; |
366 print "perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno\n\n"; | 386 print "perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno\n\n"; |
367 return 0; | 387 return 0; |
388 } | |
389 sub get_genelist{ | |
390 | |
391 my $get_genelist=`perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt`; | |
392 print "perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt"; | |
368 } | 393 } |
369 | 394 |
370 sub dec{ | 395 sub dec{ |
371 print "deg reading\n\n"; | 396 print "deg reading\n\n"; |
372 my $deg_file=$options{'deg'}; | 397 my $deg_file=$options{'deg'}; |