comparison miRPlant.pl @ 46:ca05d68aca13 draft

Uploaded
author big-tiandm
date Thu, 13 Nov 2014 22:43:35 -0500
parents 0c4e11018934
children
comparison
equal deleted inserted replaced
45:2cb6add23dfe 46:ca05d68aca13
8 my $version=1.00; 8 my $version=1.00;
9 9
10 use strict; 10 use strict;
11 use Getopt::Long; 11 use Getopt::Long;
12 use threads; 12 use threads;
13 use threads::shared; 13 #use threads::shared;
14 use File::Path; 14 use File::Path;
15 use File::Basename; 15 use File::Basename;
16 use RNA; 16 #use RNA;
17 use Term::ANSIColor; 17 #use Term::ANSIColor;
18 18
19 my %opts; 19 my %opts;
20 GetOptions(\%opts,"i:s@","tag:s@","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h"); 20 GetOptions(\%opts,"i:s@","tag:s@","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h");
21 if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments 21 if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments
22 &usage; 22 &usage;
26 print "miPlant program start:\n The time is $time!\n"; 26 print "miPlant program start:\n The time is $time!\n";
27 print "Command line:\n $0 @ARGV\n"; 27 print "Command line:\n $0 @ARGV\n";
28 28
29 my $format=$opts{'format'}; 29 my $format=$opts{'format'};
30 if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { 30 if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") {
31 &printErr(); 31 #&printErr();
32 die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; 32 die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
33 } 33 }
34 34
35 my $phred_qv=64; 35 my $phred_qv=64;
36 36
272 } 272 }
273 } 273 }
274 sub quantify{ 274 sub quantify{
275 my $tag=join "\\;" ,@mark; 275 my $tag=join "\\;" ,@mark;
276 system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag"); 276 system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag");
277 # print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n"; 277 print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n";
278 } 278 }
279 sub filterbylength{ 279 sub filterbylength{
280 my $tmpmark=join ",", @mark; 280 my $tmpmark=join ",", @mark;
281 system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark"); 281 system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
282 system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt -o $preprocess/length.html"); 282 system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt -o $preprocess/length.html");
319 push @mark,$tmp[1]; 319 push @mark,$tmp[1];
320 &check_rawdata($tmp[0]); 320 &check_rawdata($tmp[0]);
321 } 321 }
322 close CON; 322 close CON;
323 if (@filein != @mark) { 323 if (@filein != @mark) {
324 &printErr(); 324 #&printErr();
325 die "Maybe config file have some wrong!!!\n"; 325 die "Maybe config file have some wrong!!!\n";
326 } 326 }
327 } 327 }
328 sub check_rawdata{ 328 sub check_rawdata{
329 my ($fileforcheck)=@_; 329 my ($fileforcheck)=@_;
330 if (!(-s $fileforcheck)) { 330 if (!(-s $fileforcheck)) {
331 &printErr(); 331 #&printErr();
332 die "Can not find $fileforcheck, or file is empty!!!\n"; 332 die "Can not find $fileforcheck, or file is empty!!!\n";
333 } 333 }
334 if ($format eq "fasta" || $format eq "fa") { 334 if ($format eq "fasta" || $format eq "fa") {
335 &checkfa($fileforcheck); 335 &checkfa($fileforcheck);
336 } 336 }
342 my ($file_reads)=@_; 342 my ($file_reads)=@_;
343 open N,"<$file_reads"; 343 open N,"<$file_reads";
344 my $line=<N>; 344 my $line=<N>;
345 chomp $line; 345 chomp $line;
346 if($line !~ /^>\S+/){ 346 if($line !~ /^>\S+/){
347 printErr(); 347 #printErr();
348 die "The first line of file $file_reads does not start with '>identifier' 348 die "The first line of file $file_reads does not start with '>identifier'
349 Reads file $file_reads is not a valid fasta file\n\n"; 349 Reads file $file_reads is not a valid fasta file\n\n";
350 } 350 }
351 if(<N> !~ /^[ACGTNacgtn]*$/){ 351 if(<N> !~ /^[ACGTNacgtn]*$/){
352 printErr(); 352 #printErr();
353 die "File $file_reads contains not allowed characters in sequences 353 die "File $file_reads contains not allowed characters in sequences
354 Allowed characters are ACGTN 354 Allowed characters are ACGTN
355 Reads file $file_reads is not a fasta file\n\n"; 355 Reads file $file_reads is not a fasta file\n\n";
356 } 356 }
357 close N; 357 close N;
368 chomp $a; 368 chomp $a;
369 chomp $b; 369 chomp $b;
370 chomp $c; 370 chomp $c;
371 chomp $d; 371 chomp $d;
372 if($a!~/^\@/){ 372 if($a!~/^\@/){
373 &printErr(); 373 #&printErr();
374 die "$file_reads is not a fastq file\n\n"; 374 die "$file_reads is not a fastq file\n\n";
375 } 375 }
376 if($b!~ /^[ACGTNacgtn]*$/){ 376 if($b!~ /^[ACGTNacgtn]*$/){
377 &printErr(); 377 #&printErr();
378 die "File $file_reads contains not allowed characters in sequences 378 die "File $file_reads contains not allowed characters in sequences
379 Allowed characters are ACGTN 379 Allowed characters are ACGTN
380 Reads file $file_reads is not a fasta file\n\n"; 380 Reads file $file_reads is not a fasta file\n\n";
381 } 381 }
382 if ($c!~/^\@/ && $c!~/^\+/) { 382 if ($c!~/^\@/ && $c!~/^\+/) {
383 &printErr(); 383 #&printErr();
384 die "$file_reads is not a fastq file\n\n"; 384 die "$file_reads is not a fastq file\n\n";
385 } 385 }
386 if ((length $b) != (length $d)) { 386 if ((length $b) != (length $d)) {
387 &printErr(); 387 #&printErr();
388 die "$file_reads is not a fastq file\n\n"; 388 die "$file_reads is not a fastq file\n\n";
389 } 389 }
390 my @qv=split //,$d; 390 my @qv=split //,$d;
391 for (my $j=0;$j<@qv ;$j++) { 391 for (my $j=0;$j<@qv ;$j++) {
392 my $q=ord($qv[$j])-64; 392 my $q=ord($qv[$j])-64;
405 push @ret, $file; 405 push @ret, $file;
406 } 406 }
407 } 407 }
408 closedir I; 408 closedir I;
409 if (@ret != 1) { 409 if (@ret != 1) {
410 &printErr(); 410 #&printErr();
411 411
412 die "Can not find directory or file which name has string: $str !!!\n"; 412 die "Can not find directory or file which name has string: $str !!!\n";
413 } 413 }
414 return $ret[0]; 414 return $ret[0];
415 } 415 }
416
417 =cut
416 418
417 sub printErr{ 419 sub printErr{
418 print STDERR color 'bold red'; 420 print STDERR color 'bold red';
419 print STDERR "Error: "; 421 print STDERR "Error: ";
420 print STDERR color 'reset'; 422 print STDERR color 'reset';
421 } 423 }
422 =cut
423 sub Time{ 424 sub Time{
424 my $time=time(); 425 my $time=time();
425 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; 426 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
426 $month++; 427 $month++;
427 $year+=1900; 428 $year+=1900;
451 452
452 sub usage{ 453 sub usage{
453 print <<"USAGE"; 454 print <<"USAGE";
454 Version $version 455 Version $version
455 Usage: 456 Usage:
457
456 $0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t -o -path 458 $0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t -o -path
457 options: 459 options:
458 -i string, input file#input files information file 460 -i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
459 /path/filename mark 461 -tag string # raw data file names, -tag xxx -tag xxx
460 /path/filename mark
461 ...
462 462
463 -format string,#specific input rawdata file format : fastq|fq|fasta|fa 463 -format string,#specific input rawdata file format : fastq|fq|fasta|fa
464
465 -path scirpt path
464 466
465 -gfa string, input file # genome fasta. sequence file 467 -gfa string, input file # genome fasta. sequence file
466 -idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter 468 -idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
467 string must be the prefix of the bowtie index. For instance, if 469 string must be the prefix of the bowtie index. For instance, if
468 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then 470 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then