Mercurial > repos > bgruening > trim_galore
comparison trim_galore @ 1:898db63d2e84 draft
upgrade to new version
author | bgruening |
---|---|
date | Wed, 17 Jul 2013 15:05:43 -0400 |
parents | 3c1664caa8e3 |
children | 2c1f0fe810f7 |
comparison
equal
deleted
inserted
replaced
0:3c1664caa8e3 | 1:898db63d2e84 |
---|---|
5 use IPC::Open3; | 5 use IPC::Open3; |
6 use File::Spec; | 6 use File::Spec; |
7 use File::Basename; | 7 use File::Basename; |
8 use Cwd; | 8 use Cwd; |
9 | 9 |
10 ## This program is Copyright (C) 2012, Felix Krueger (felix.krueger@babraham.ac.uk) | 10 ## This program is Copyright (C) 2012-13, Felix Krueger (felix.krueger@babraham.ac.uk) |
11 | 11 |
12 ## This program is free software: you can redistribute it and/or modify | 12 ## This program is free software: you can redistribute it and/or modify |
13 ## it under the terms of the GNU General Public License as published by | 13 ## it under the terms of the GNU General Public License as published by |
14 ## the Free Software Foundation, either version 3 of the License, or | 14 ## the Free Software Foundation, either version 3 of the License, or |
15 ## (at your option) any later version. | 15 ## (at your option) any later version. |
23 ## along with this program. If not, see <http://www.gnu.org/licenses/>. | 23 ## along with this program. If not, see <http://www.gnu.org/licenses/>. |
24 | 24 |
25 | 25 |
26 | 26 |
27 ## this script is taking in FastQ sequences and trims them with Cutadapt | 27 ## this script is taking in FastQ sequences and trims them with Cutadapt |
28 ## last modified on 18 10 2012 | 28 ## last modified on 10 April 2013 |
29 | 29 |
30 ######################################################################## | 30 ######################################################################## |
31 | 31 |
32 # change these paths if needed | 32 # change these paths if needed |
33 | 33 |
35 my $path_to_fastqc = 'fastqc'; | 35 my $path_to_fastqc = 'fastqc'; |
36 | 36 |
37 ######################################################################## | 37 ######################################################################## |
38 | 38 |
39 | 39 |
40 my $trimmer_version = '0.2.5'; | 40 my $trimmer_version = '0.2.8'; |
41 my $DOWARN = 1; # print on screen warning and text by default | 41 my $DOWARN = 1; # print on screen warning and text by default |
42 BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } }; | 42 BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } }; |
43 | 43 |
44 my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file) = process_commandline(); | 44 my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2) = process_commandline(); |
45 | |
46 my @filenames = @ARGV; | |
47 | |
48 die "\nPlease provide the filename(s) of one or more FastQ file(s) to launch Trim Galore!\n | |
49 USAGE: 'trim_galore [options] <filename(s)>' or 'trim_galore --help' for more options\n\n" unless (@filenames); | |
50 | |
45 | 51 |
46 ### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED | 52 ### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED |
47 unless (defined $cutoff){ | 53 unless (defined $cutoff){ |
48 $cutoff = 20; | 54 $cutoff = 20; |
49 } | 55 } |
66 | 72 |
67 if ($phred_encoding == 64){ | 73 if ($phred_encoding == 64){ |
68 $cutoff += 31; | 74 $cutoff += 31; |
69 } | 75 } |
70 | 76 |
71 my @filenames = @ARGV; | |
72 | |
73 my $file_1; | 77 my $file_1; |
74 my $file_2; | 78 my $file_2; |
75 | 79 |
76 foreach my $filename (@ARGV){ | 80 foreach my $filename (@ARGV){ |
77 trim ($filename); | 81 trim ($filename); |
153 warn "Length cut-off for read 1: $length_read_1 bp\n"; | 157 warn "Length cut-off for read 1: $length_read_1 bp\n"; |
154 print REPORT "Length cut-off for read 1: $length_read_1 bp\n"; | 158 print REPORT "Length cut-off for read 1: $length_read_1 bp\n"; |
155 } | 159 } |
156 | 160 |
157 if ($length_read_2 == 35){ | 161 if ($length_read_2 == 35){ |
158 warn "Length cut-off for read 2: $length_read_2 b (default)\n"; | 162 warn "Length cut-off for read 2: $length_read_2 bb (default)\n"; |
159 print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n"; | 163 print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n"; |
160 } | 164 } |
161 else{ | 165 else{ |
162 warn "Length cut-off for read 2: $length_read_2 bp\n"; | 166 warn "Length cut-off for read 2: $length_read_2 bp\n"; |
163 print REPORT "Length cut-off for read 2: $length_read_2 bp\n"; | 167 print REPORT "Length cut-off for read 2: $length_read_2 bp\n"; |
178 if ($trim){ | 182 if ($trim){ |
179 warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n"; | 183 warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n"; |
180 print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n"; | 184 print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n"; |
181 } | 185 } |
182 | 186 |
187 if ($clip_r1){ | |
188 warn "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n"; | |
189 print REPORT "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n"; | |
190 } | |
191 if ($clip_r2){ | |
192 warn "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n"; | |
193 print REPORT "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n"; | |
194 } | |
195 | |
196 | |
183 if ($fastqc){ | 197 if ($fastqc){ |
184 warn "Running FastQC on the data once trimming has completed\n"; | 198 warn "Running FastQC on the data once trimming has completed\n"; |
185 print REPORT "Running FastQC on the data once trimming has completed\n"; | 199 print REPORT "Running FastQC on the data once trimming has completed\n"; |
186 | 200 |
187 if ($fastqc_args){ | 201 if ($fastqc_args){ |
193 if ($keep and $rrbs){ | 207 if ($keep and $rrbs){ |
194 warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n"; | 208 warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n"; |
195 print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n"; | 209 print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n"; |
196 } | 210 } |
197 | 211 |
212 | |
198 if ($gzip or $filename =~ /\.gz$/){ | 213 if ($gzip or $filename =~ /\.gz$/){ |
199 warn "Output file will be GZIP compressed\n"; | 214 $gzip = 1; |
200 print REPORT "Output file will be GZIP compressed\n"; | 215 unless ($dont_gzip){ |
216 warn "Output file(s) will be GZIP compressed\n"; | |
217 print REPORT "Output file will be GZIP compressed\n"; | |
218 } | |
201 } | 219 } |
202 | 220 |
203 warn "\n"; | 221 warn "\n"; |
204 print REPORT "\n"; | 222 print REPORT "\n"; |
205 sleep (3); | 223 sleep (3); |
263 } | 281 } |
264 else{ | 282 else{ |
265 $output_filename =~ s/$/_trimmed.fq/; | 283 $output_filename =~ s/$/_trimmed.fq/; |
266 } | 284 } |
267 | 285 |
286 if ($gzip or $filename =~ /\.gz$/){ | |
287 unless ($dont_gzip){ | |
288 if ($validate){ | |
289 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n"; # don't need to gzip intermediate file | |
290 } | |
291 else{ | |
292 $output_filename .= '.gz'; | |
293 open (OUT,"| gzip -c - > ${output_dir}${output_filename}") or die "Can't write to $output_filename: $!\n"; | |
294 } | |
295 } | |
296 else{ | |
297 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n"; # don't need to gzip intermediate file | |
298 } | |
299 } | |
300 else{ | |
301 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n"; | |
302 } | |
268 warn "Writing final adapter and quality trimmed output to $output_filename\n\n"; | 303 warn "Writing final adapter and quality trimmed output to $output_filename\n\n"; |
269 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n"; | |
270 sleep (2); | |
271 | 304 |
272 my $count = 0; | 305 my $count = 0; |
273 my $too_short = 0; | 306 my $too_short = 0; |
274 my $quality_trimmed = 0; | 307 my $quality_trimmed = 0; |
275 my $rrbs_trimmed = 0; | 308 my $rrbs_trimmed = 0; |
387 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE | 420 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE |
388 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later) | 421 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later) |
389 print OUT "$l1$seq\n$l3$qual\n"; | 422 print OUT "$l1$seq\n$l3$qual\n"; |
390 } | 423 } |
391 else{ # single end | 424 else{ # single end |
425 | |
426 if ($clip_r1){ | |
427 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence | |
428 $qual = substr($qual,$clip_r1); | |
429 } | |
430 | |
392 if (length $seq < $length_cutoff){ | 431 if (length $seq < $length_cutoff){ |
393 ++$too_short; | 432 ++$too_short; |
394 next; | 433 next; |
395 } | 434 } |
396 else{ | 435 else{ |
461 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE | 500 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE |
462 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later) | 501 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later) |
463 print OUT "$l1$seq\n$l3$qual\n"; | 502 print OUT "$l1$seq\n$l3$qual\n"; |
464 } | 503 } |
465 else{ # single end | 504 else{ # single end |
505 | |
506 if ($clip_r1){ | |
507 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence | |
508 $qual = substr($qual,$clip_r1); | |
509 } | |
510 | |
466 if (length $seq < $length_cutoff){ | 511 if (length $seq < $length_cutoff){ |
467 ++$too_short; | 512 ++$too_short; |
468 next; | 513 next; |
469 } | 514 } |
470 else{ | 515 else{ |
533 } | 578 } |
534 | 579 |
535 warn "\n"; | 580 warn "\n"; |
536 print REPORT "\n"; | 581 print REPORT "\n"; |
537 | 582 |
538 ### RUNNING FASTQC | 583 ### RUNNING FASTQC unless we are dealing with paired-end files |
539 if ($fastqc){ | 584 unless($validate){ |
540 warn "\n >>> Now running FastQC on the data <<<\n\n"; | 585 if ($fastqc){ |
541 sleep (5); | 586 warn "\n >>> Now running FastQC on the data <<<\n\n"; |
542 if ($fastqc_args){ | 587 sleep (5); |
543 system ("$path_to_fastqc $fastqc_args $output_filename"); | 588 if ($fastqc_args){ |
544 } | 589 system ("$path_to_fastqc $fastqc_args $output_dir$output_filename"); |
545 else{ | 590 } |
546 system ("$path_to_fastqc $output_filename"); | 591 else{ |
547 } | 592 system ("$path_to_fastqc $output_dir$output_filename"); |
548 } | 593 } |
549 | |
550 ### COMPRESSING OUTPUT FILE | |
551 unless ($validate){ # not gzipping intermediate files for paired-end files | |
552 if ($gzip or $filename =~ /\.gz$/){ | |
553 warn "\n >>> GZIP-ing the output file <<<\n\n"; | |
554 system ("gzip -f $output_filename"); | |
555 $output_filename = $output_filename.'.gz'; | |
556 } | 594 } |
557 } | 595 } |
558 | 596 |
559 ### VALIDATE PAIRED-END FILES | 597 ### VALIDATE PAIRED-END FILES |
560 if ($validate){ | 598 if ($validate){ |
583 | 621 |
584 warn "\n >>> Now running FastQC on the validated data $val_1<<<\n\n"; | 622 warn "\n >>> Now running FastQC on the validated data $val_1<<<\n\n"; |
585 sleep (3); | 623 sleep (3); |
586 | 624 |
587 if ($fastqc_args){ | 625 if ($fastqc_args){ |
588 system ("$path_to_fastqc $fastqc_args $val_1"); | 626 system ("$path_to_fastqc $fastqc_args $output_dir$val_1"); |
589 } | 627 } |
590 else{ | 628 else{ |
591 system ("$path_to_fastqc $val_1"); | 629 system ("$path_to_fastqc $output_dir$val_1"); |
592 } | 630 } |
593 | 631 |
594 warn "\n >>> Now running FastQC on the validated data $val_2<<<\n\n"; | 632 warn "\n >>> Now running FastQC on the validated data $val_2<<<\n\n"; |
595 sleep (3); | 633 sleep (3); |
596 | 634 |
597 if ($fastqc_args){ | 635 if ($fastqc_args){ |
598 system ("$path_to_fastqc $fastqc_args $val_2"); | 636 system ("$path_to_fastqc $fastqc_args $output_dir$val_2"); |
599 } | 637 } |
600 else{ | 638 else{ |
601 system ("$path_to_fastqc $val_2"); | 639 system ("$path_to_fastqc $output_dir$val_2"); |
602 } | 640 } |
603 | 641 |
604 } | |
605 | |
606 if ($gzip or $filename =~ /\.gz$/){ | |
607 | |
608 # compressing main fastQ output files | |
609 warn "Compressing the validated output file $val_1 ...\n"; | |
610 system ("gzip -f $val_1"); | |
611 | |
612 warn "Compressing the validated output file $val_2 ...\n"; | |
613 system ("gzip -f $val_2"); | |
614 | |
615 | |
616 if ($retain){ # compressing unpaired reads | |
617 warn "Compressing the unpaired read output $un_1 ...\n"; | |
618 system ("gzip -f $un_1"); | |
619 | |
620 warn "Compressing the unpaired read output $un_2 ...\n"; | |
621 system ("gzip -f $un_2"); | |
622 } | |
623 } | 642 } |
624 | 643 |
625 warn "Deleting both intermediate output files $file_1 and $file_2\n"; | 644 warn "Deleting both intermediate output files $file_1 and $file_2\n"; |
626 unlink "$output_dir$file_1"; | 645 unlink "$output_dir$file_1"; |
627 unlink "$output_dir$file_2"; | 646 unlink "$output_dir$file_2"; |
639 sub validate_paired_end_files{ | 658 sub validate_paired_end_files{ |
640 | 659 |
641 my $file_1 = shift; | 660 my $file_1 = shift; |
642 my $file_2 = shift; | 661 my $file_2 = shift; |
643 | 662 |
644 warn "file_1 $file_1 file_2 $file_2\n\n"; | 663 warn "file_1: $file_1, file_2: $file_2\n\n"; |
645 | 664 |
646 if ($file_1 =~ /\.gz$/){ | 665 if ($file_1 =~ /\.gz$/){ |
647 open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n"; | 666 open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n"; |
648 } | 667 } |
649 else{ | 668 else{ |
675 } | 694 } |
676 else{ | 695 else{ |
677 $out_2 =~ s/trimmed\.fq$/val_2.fq/; | 696 $out_2 =~ s/trimmed\.fq$/val_2.fq/; |
678 } | 697 } |
679 | 698 |
680 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n"; | 699 if ($gzip){ |
681 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n"; | 700 if ($dont_gzip){ |
701 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n"; | |
702 } | |
703 else{ | |
704 $out_1 .= '.gz'; | |
705 open (R1,"| gzip -c - > ${output_dir}${out_1}") or die "Can't write to $out_1: $!\n"; | |
706 } | |
707 } | |
708 else{ | |
709 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n"; | |
710 } | |
711 | |
712 if ($gzip){ | |
713 if ($dont_gzip){ | |
714 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n"; | |
715 } | |
716 else{ | |
717 $out_2 .= '.gz'; | |
718 open (R2,"| gzip -c - > ${output_dir}${out_2}") or die "Can't write to $out_2: $!\n"; | |
719 } | |
720 } | |
721 else{ | |
722 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n"; | |
723 } | |
724 | |
682 warn "Writing validated paired-end read 1 reads to $out_1\n"; | 725 warn "Writing validated paired-end read 1 reads to $out_1\n"; |
683 warn "Writing validated paired-end read 2 reads to $out_2\n\n"; | 726 warn "Writing validated paired-end read 2 reads to $out_2\n\n"; |
684 | 727 |
685 my $unpaired_1; | 728 my $unpaired_1; |
686 my $unpaired_2; | 729 my $unpaired_2; |
702 } | 745 } |
703 else{ | 746 else{ |
704 $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/; | 747 $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/; |
705 } | 748 } |
706 | 749 |
707 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n"; | 750 if ($gzip){ |
708 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n"; | 751 if ($dont_gzip){ |
752 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n"; | |
753 } | |
754 else{ | |
755 $unpaired_1 .= '.gz'; | |
756 open (UNPAIRED1,"| gzip -c - > ${output_dir}${unpaired_1}") or die "Can't write to $unpaired_1: $!\n"; | |
757 } | |
758 } | |
759 else{ | |
760 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n"; | |
761 } | |
762 | |
763 if ($gzip){ | |
764 if ($dont_gzip){ | |
765 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n"; | |
766 } | |
767 else{ | |
768 $unpaired_2 .= '.gz'; | |
769 open (UNPAIRED2,"| gzip -c - > ${output_dir}${unpaired_2}") or die "Can't write to $unpaired_2: $!\n"; | |
770 } | |
771 } | |
772 else{ | |
773 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n"; | |
774 } | |
709 | 775 |
710 warn "Writing unpaired read 1 reads to $unpaired_1\n"; | 776 warn "Writing unpaired read 1 reads to $unpaired_1\n"; |
711 warn "Writing unpaired read 2 reads to $unpaired_2\n\n"; | 777 warn "Writing unpaired read 2 reads to $unpaired_2\n\n"; |
712 } | 778 } |
713 | 779 |
731 last unless ($id_2 and $seq_2 and $l3_2 and $qual_2); | 797 last unless ($id_2 and $seq_2 and $l3_2 and $qual_2); |
732 | 798 |
733 ++$count; | 799 ++$count; |
734 | 800 |
735 | 801 |
736 ## small check if the sequence files appear to FastQ files | 802 ## small check if the sequence files appear to be FastQ files |
737 if ($count == 1){ # performed just once | 803 if ($count == 1){ # performed just once |
738 if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){ | 804 if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){ |
739 die "Input file doesn't seem to be in FastQ format at sequence $count\n"; | 805 die "Input file doesn't seem to be in FastQ format at sequence $count\n"; |
740 } | 806 } |
741 if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){ | 807 if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){ |
744 } | 810 } |
745 | 811 |
746 chomp $seq_1; | 812 chomp $seq_1; |
747 chomp $seq_2; | 813 chomp $seq_2; |
748 | 814 |
815 if ($clip_r1){ | |
816 $seq_1 = substr($seq_1,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence | |
817 $qual_1 = substr($qual_1,$clip_r1); | |
818 } | |
819 if ($clip_r2){ | |
820 $seq_2 = substr($seq_2,$clip_r2); # starting after the sequences to be trimmed until the end of the sequence | |
821 $qual_2 = substr($qual_2,$clip_r2); | |
822 } | |
749 | 823 |
750 ### making sure that the reads do have a sensible length | 824 ### making sure that the reads do have a sensible length |
751 if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){ | 825 if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){ |
752 ++$sequence_pairs_removed; | 826 ++$sequence_pairs_removed; |
753 if ($retain){ # writing out single-end reads if they are longer than the cutoff | 827 if ($retain){ # writing out single-end reads if they are longer than the cutoff |
791 print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n"; | 865 print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n"; |
792 | 866 |
793 if ($keep){ | 867 if ($keep){ |
794 warn "Number of unpaired read 1 reads printed: $read_1_printed\n"; | 868 warn "Number of unpaired read 1 reads printed: $read_1_printed\n"; |
795 warn "Number of unpaired read 2 reads printed: $read_2_printed\n"; | 869 warn "Number of unpaired read 2 reads printed: $read_2_printed\n"; |
870 } | |
871 | |
872 close R1 or die $!; | |
873 close R2 or die $!; | |
874 | |
875 if ($retain){ | |
876 close UNPAIRED1 or die $!; | |
877 close UNPAIRED2 or die $!; | |
796 } | 878 } |
797 | 879 |
798 warn "\n"; | 880 warn "\n"; |
799 if ($retain){ | 881 if ($retain){ |
800 return ($out_1,$out_2,$unpaired_1,$unpaired_2); | 882 return ($out_1,$out_2,$unpaired_1,$unpaired_2); |
828 my $length_read_2; | 910 my $length_read_2; |
829 my $error_rate; | 911 my $error_rate; |
830 my $output_dir; | 912 my $output_dir; |
831 my $no_report_file; | 913 my $no_report_file; |
832 my $suppress_warn; | 914 my $suppress_warn; |
915 my $dont_gzip; | |
916 my $clip_r1; | |
917 my $clip_r2; | |
833 | 918 |
834 my $command_line = GetOptions ('help|man' => \$help, | 919 my $command_line = GetOptions ('help|man' => \$help, |
835 'q|quality=i' => \$quality, | 920 'q|quality=i' => \$quality, |
836 'a|adapter=s' => \$adapter, | 921 'a|adapter=s' => \$adapter, |
837 'a2|adapter2=s' => \$adapter2, | 922 'a2|adapter2=s' => \$adapter2, |
854 'length_2|r2=i' => \$length_read_2, | 939 'length_2|r2=i' => \$length_read_2, |
855 'e|error_rate=s' => \$error_rate, | 940 'e|error_rate=s' => \$error_rate, |
856 'o|output_dir=s' => \$output_dir, | 941 'o|output_dir=s' => \$output_dir, |
857 'no_report_file' => \$no_report_file, | 942 'no_report_file' => \$no_report_file, |
858 'suppress_warn' => \$suppress_warn, | 943 'suppress_warn' => \$suppress_warn, |
944 'dont_gzip' => \$dont_gzip, | |
945 'clip_R1=i' => \$clip_r1, | |
946 'clip_R2=i' => \$clip_r2, | |
859 ); | 947 ); |
860 | 948 |
949 | |
861 ### EXIT ON ERROR if there were errors with any of the supplied options | 950 ### EXIT ON ERROR if there were errors with any of the supplied options |
862 unless ($command_line){ | 951 unless ($command_line){ |
863 die "Please respecify command line options\n"; | 952 die "Please respecify command line options\n"; |
864 } | 953 } |
865 | 954 |
877 | 966 |
878 Quality-/Adapter-/RRBS-Trimming | 967 Quality-/Adapter-/RRBS-Trimming |
879 (powered by Cutadapt) | 968 (powered by Cutadapt) |
880 version $trimmer_version | 969 version $trimmer_version |
881 | 970 |
882 Last update: 18 10 2012 | 971 Last update: 10 04 2013 |
883 | 972 |
884 VERSION | 973 VERSION |
885 exit; | 974 exit; |
886 } | 975 } |
887 | 976 |
1009 } | 1098 } |
1010 else{ | 1099 else{ |
1011 $output_dir = ''; | 1100 $output_dir = ''; |
1012 } | 1101 } |
1013 | 1102 |
1014 return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file); | 1103 ### Trimming at the 5' end |
1104 if (defined $clip_r2){ # trimming 5' bases of read 1 | |
1105 die "Clipping the 5' end of read 2 is only allowed for paired-end files (--paired)\n" unless ($validate); | |
1106 } | |
1107 | |
1108 if (defined $clip_r1){ # trimming 5' bases of read 1 | |
1109 unless ($clip_r1 > 0 and $clip_r1 < 100){ | |
1110 die "The 5' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n"; | |
1111 } | |
1112 } | |
1113 | |
1114 if (defined $clip_r2){ # trimming 5' bases of read 2 | |
1115 unless ($clip_r2 > 0 and $clip_r2 < 100){ | |
1116 die "The 5' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n"; | |
1117 } | |
1118 } | |
1119 | |
1120 | |
1121 return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2); | |
1015 } | 1122 } |
1016 | 1123 |
1017 | 1124 |
1018 | 1125 |
1019 | 1126 |
1063 will be trimmed of the 3' end of any read. | 1170 will be trimmed of the 3' end of any read. |
1064 | 1171 |
1065 -e <ERROR RATE> Maximum allowed error rate (no. of errors divided by the length of the matching | 1172 -e <ERROR RATE> Maximum allowed error rate (no. of errors divided by the length of the matching |
1066 region) (default: 0.1) | 1173 region) (default: 0.1) |
1067 | 1174 |
1068 --gzip Compress the output file with gzip. If the input files are gzip-compressed | 1175 --gzip Compress the output file with GZIP. If the input files are GZIP-compressed |
1069 the output files will be automatically gzip compressed as well. | 1176 the output files will automatically be GZIP compressed as well. As of v0.2.8 the |
1177 compression will take place on the fly. | |
1178 | |
1179 --dont_gzip Output files won't be compressed with GZIP. This option overrides --gzip. | |
1070 | 1180 |
1071 --length <INT> Discard reads that became shorter than length INT because of either | 1181 --length <INT> Discard reads that became shorter than length INT because of either |
1072 quality or adapter trimming. A value of '0' effectively disables | 1182 quality or adapter trimming. A value of '0' effectively disables |
1073 this behaviour. Default: 20 bp. | 1183 this behaviour. Default: 20 bp. |
1074 | 1184 |
1081 directory. | 1191 directory. |
1082 | 1192 |
1083 --no_report_file If specified no report file will be generated. | 1193 --no_report_file If specified no report file will be generated. |
1084 | 1194 |
1085 --suppress_warn If specified any output to STDOUT or STDERR will be suppressed. | 1195 --suppress_warn If specified any output to STDOUT or STDERR will be suppressed. |
1196 | |
1197 --clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 1 (or single-end | |
1198 reads). This may be useful if the qualities were very poor, or if there is some | |
1199 sort of unwanted bias at the 5' end. Default: OFF. | |
1200 | |
1201 --clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 2 (paired-end reads | |
1202 only). This may be useful if the qualities were very poor, or if there is some sort | |
1203 of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove | |
1204 the first few bp because the end-repair reaction may introduce a bias towards low | |
1205 methylation. Please refer to the M-bias plot section in the Bismark User Guide for | |
1206 some examples. Default: OFF. | |
1086 | 1207 |
1087 | 1208 |
1088 | 1209 |
1089 RRBS-specific options (MspI digested material): | 1210 RRBS-specific options (MspI digested material): |
1090 | 1211 |
1150 -r2/--length_2 <INT> Unpaired single-end read length cutoff needed for read 2 to be written to | 1271 -r2/--length_2 <INT> Unpaired single-end read length cutoff needed for read 2 to be written to |
1151 '.unpaired_2.fq' output file. These reads may be mapped in single-end mode. | 1272 '.unpaired_2.fq' output file. These reads may be mapped in single-end mode. |
1152 Default: 35 bp. | 1273 Default: 35 bp. |
1153 | 1274 |
1154 | 1275 |
1155 Last modified on 18 Oct 2012. | 1276 Last modified on 15 July 2013. |
1156 | 1277 |
1157 HELP | 1278 HELP |
1158 exit; | 1279 exit; |
1159 } | 1280 } |