comparison trim_galore @ 1:898db63d2e84 draft

upgrade to new version
author bgruening
date Wed, 17 Jul 2013 15:05:43 -0400
parents 3c1664caa8e3
children 2c1f0fe810f7
comparison
equal deleted inserted replaced
0:3c1664caa8e3 1:898db63d2e84
5 use IPC::Open3; 5 use IPC::Open3;
6 use File::Spec; 6 use File::Spec;
7 use File::Basename; 7 use File::Basename;
8 use Cwd; 8 use Cwd;
9 9
10 ## This program is Copyright (C) 2012, Felix Krueger (felix.krueger@babraham.ac.uk) 10 ## This program is Copyright (C) 2012-13, Felix Krueger (felix.krueger@babraham.ac.uk)
11 11
12 ## This program is free software: you can redistribute it and/or modify 12 ## This program is free software: you can redistribute it and/or modify
13 ## it under the terms of the GNU General Public License as published by 13 ## it under the terms of the GNU General Public License as published by
14 ## the Free Software Foundation, either version 3 of the License, or 14 ## the Free Software Foundation, either version 3 of the License, or
15 ## (at your option) any later version. 15 ## (at your option) any later version.
23 ## along with this program. If not, see <http://www.gnu.org/licenses/>. 23 ## along with this program. If not, see <http://www.gnu.org/licenses/>.
24 24
25 25
26 26
27 ## this script is taking in FastQ sequences and trims them with Cutadapt 27 ## this script is taking in FastQ sequences and trims them with Cutadapt
28 ## last modified on 18 10 2012 28 ## last modified on 10 April 2013
29 29
30 ######################################################################## 30 ########################################################################
31 31
32 # change these paths if needed 32 # change these paths if needed
33 33
35 my $path_to_fastqc = 'fastqc'; 35 my $path_to_fastqc = 'fastqc';
36 36
37 ######################################################################## 37 ########################################################################
38 38
39 39
40 my $trimmer_version = '0.2.5'; 40 my $trimmer_version = '0.2.8';
41 my $DOWARN = 1; # print on screen warning and text by default 41 my $DOWARN = 1; # print on screen warning and text by default
42 BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } }; 42 BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $DOWARN } };
43 43
44 my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file) = process_commandline(); 44 my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2) = process_commandline();
45
46 my @filenames = @ARGV;
47
48 die "\nPlease provide the filename(s) of one or more FastQ file(s) to launch Trim Galore!\n
49 USAGE: 'trim_galore [options] <filename(s)>' or 'trim_galore --help' for more options\n\n" unless (@filenames);
50
45 51
46 ### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED 52 ### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED
47 unless (defined $cutoff){ 53 unless (defined $cutoff){
48 $cutoff = 20; 54 $cutoff = 20;
49 } 55 }
66 72
67 if ($phred_encoding == 64){ 73 if ($phred_encoding == 64){
68 $cutoff += 31; 74 $cutoff += 31;
69 } 75 }
70 76
71 my @filenames = @ARGV;
72
73 my $file_1; 77 my $file_1;
74 my $file_2; 78 my $file_2;
75 79
76 foreach my $filename (@ARGV){ 80 foreach my $filename (@ARGV){
77 trim ($filename); 81 trim ($filename);
153 warn "Length cut-off for read 1: $length_read_1 bp\n"; 157 warn "Length cut-off for read 1: $length_read_1 bp\n";
154 print REPORT "Length cut-off for read 1: $length_read_1 bp\n"; 158 print REPORT "Length cut-off for read 1: $length_read_1 bp\n";
155 } 159 }
156 160
157 if ($length_read_2 == 35){ 161 if ($length_read_2 == 35){
158 warn "Length cut-off for read 2: $length_read_2 b (default)\n"; 162 warn "Length cut-off for read 2: $length_read_2 bb (default)\n";
159 print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n"; 163 print REPORT "Length cut-off for read 2: $length_read_2 bp (default)\n";
160 } 164 }
161 else{ 165 else{
162 warn "Length cut-off for read 2: $length_read_2 bp\n"; 166 warn "Length cut-off for read 2: $length_read_2 bp\n";
163 print REPORT "Length cut-off for read 2: $length_read_2 bp\n"; 167 print REPORT "Length cut-off for read 2: $length_read_2 bp\n";
178 if ($trim){ 182 if ($trim){
179 warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n"; 183 warn "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
180 print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n"; 184 print REPORT "All sequences will be trimmed by 1 bp on their 3' end to avoid problems with invalid paired-end alignments with Bowtie 1\n";
181 } 185 }
182 186
187 if ($clip_r1){
188 warn "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
189 print REPORT "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5' end to avoid poor qualities or biases\n";
190 }
191 if ($clip_r2){
192 warn "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
193 print REPORT "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\n";
194 }
195
196
183 if ($fastqc){ 197 if ($fastqc){
184 warn "Running FastQC on the data once trimming has completed\n"; 198 warn "Running FastQC on the data once trimming has completed\n";
185 print REPORT "Running FastQC on the data once trimming has completed\n"; 199 print REPORT "Running FastQC on the data once trimming has completed\n";
186 200
187 if ($fastqc_args){ 201 if ($fastqc_args){
193 if ($keep and $rrbs){ 207 if ($keep and $rrbs){
194 warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n"; 208 warn "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
195 print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n"; 209 print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\n";
196 } 210 }
197 211
212
198 if ($gzip or $filename =~ /\.gz$/){ 213 if ($gzip or $filename =~ /\.gz$/){
199 warn "Output file will be GZIP compressed\n"; 214 $gzip = 1;
200 print REPORT "Output file will be GZIP compressed\n"; 215 unless ($dont_gzip){
216 warn "Output file(s) will be GZIP compressed\n";
217 print REPORT "Output file will be GZIP compressed\n";
218 }
201 } 219 }
202 220
203 warn "\n"; 221 warn "\n";
204 print REPORT "\n"; 222 print REPORT "\n";
205 sleep (3); 223 sleep (3);
263 } 281 }
264 else{ 282 else{
265 $output_filename =~ s/$/_trimmed.fq/; 283 $output_filename =~ s/$/_trimmed.fq/;
266 } 284 }
267 285
286 if ($gzip or $filename =~ /\.gz$/){
287 unless ($dont_gzip){
288 if ($validate){
289 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n"; # don't need to gzip intermediate file
290 }
291 else{
292 $output_filename .= '.gz';
293 open (OUT,"| gzip -c - > ${output_dir}${output_filename}") or die "Can't write to $output_filename: $!\n";
294 }
295 }
296 else{
297 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n"; # don't need to gzip intermediate file
298 }
299 }
300 else{
301 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n";
302 }
268 warn "Writing final adapter and quality trimmed output to $output_filename\n\n"; 303 warn "Writing final adapter and quality trimmed output to $output_filename\n\n";
269 open (OUT,'>',$output_dir.$output_filename) or die "Can't open $output_filename: $!\n";
270 sleep (2);
271 304
272 my $count = 0; 305 my $count = 0;
273 my $too_short = 0; 306 my $too_short = 0;
274 my $quality_trimmed = 0; 307 my $quality_trimmed = 0;
275 my $rrbs_trimmed = 0; 308 my $rrbs_trimmed = 0;
387 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE 420 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
388 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later) 421 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
389 print OUT "$l1$seq\n$l3$qual\n"; 422 print OUT "$l1$seq\n$l3$qual\n";
390 } 423 }
391 else{ # single end 424 else{ # single end
425
426 if ($clip_r1){
427 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
428 $qual = substr($qual,$clip_r1);
429 }
430
392 if (length $seq < $length_cutoff){ 431 if (length $seq < $length_cutoff){
393 ++$too_short; 432 ++$too_short;
394 next; 433 next;
395 } 434 }
396 else{ 435 else{
461 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE 500 ### PRINTING (POTENTIALLY TRIMMED) SEQUENCE
462 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later) 501 if ($validate){ # printing the sequence without performing a length check (this is performed for the read pair separately later)
463 print OUT "$l1$seq\n$l3$qual\n"; 502 print OUT "$l1$seq\n$l3$qual\n";
464 } 503 }
465 else{ # single end 504 else{ # single end
505
506 if ($clip_r1){
507 $seq = substr($seq,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
508 $qual = substr($qual,$clip_r1);
509 }
510
466 if (length $seq < $length_cutoff){ 511 if (length $seq < $length_cutoff){
467 ++$too_short; 512 ++$too_short;
468 next; 513 next;
469 } 514 }
470 else{ 515 else{
533 } 578 }
534 579
535 warn "\n"; 580 warn "\n";
536 print REPORT "\n"; 581 print REPORT "\n";
537 582
538 ### RUNNING FASTQC 583 ### RUNNING FASTQC unless we are dealing with paired-end files
539 if ($fastqc){ 584 unless($validate){
540 warn "\n >>> Now running FastQC on the data <<<\n\n"; 585 if ($fastqc){
541 sleep (5); 586 warn "\n >>> Now running FastQC on the data <<<\n\n";
542 if ($fastqc_args){ 587 sleep (5);
543 system ("$path_to_fastqc $fastqc_args $output_filename"); 588 if ($fastqc_args){
544 } 589 system ("$path_to_fastqc $fastqc_args $output_dir$output_filename");
545 else{ 590 }
546 system ("$path_to_fastqc $output_filename"); 591 else{
547 } 592 system ("$path_to_fastqc $output_dir$output_filename");
548 } 593 }
549
550 ### COMPRESSING OUTPUT FILE
551 unless ($validate){ # not gzipping intermediate files for paired-end files
552 if ($gzip or $filename =~ /\.gz$/){
553 warn "\n >>> GZIP-ing the output file <<<\n\n";
554 system ("gzip -f $output_filename");
555 $output_filename = $output_filename.'.gz';
556 } 594 }
557 } 595 }
558 596
559 ### VALIDATE PAIRED-END FILES 597 ### VALIDATE PAIRED-END FILES
560 if ($validate){ 598 if ($validate){
583 621
584 warn "\n >>> Now running FastQC on the validated data $val_1<<<\n\n"; 622 warn "\n >>> Now running FastQC on the validated data $val_1<<<\n\n";
585 sleep (3); 623 sleep (3);
586 624
587 if ($fastqc_args){ 625 if ($fastqc_args){
588 system ("$path_to_fastqc $fastqc_args $val_1"); 626 system ("$path_to_fastqc $fastqc_args $output_dir$val_1");
589 } 627 }
590 else{ 628 else{
591 system ("$path_to_fastqc $val_1"); 629 system ("$path_to_fastqc $output_dir$val_1");
592 } 630 }
593 631
594 warn "\n >>> Now running FastQC on the validated data $val_2<<<\n\n"; 632 warn "\n >>> Now running FastQC on the validated data $val_2<<<\n\n";
595 sleep (3); 633 sleep (3);
596 634
597 if ($fastqc_args){ 635 if ($fastqc_args){
598 system ("$path_to_fastqc $fastqc_args $val_2"); 636 system ("$path_to_fastqc $fastqc_args $output_dir$val_2");
599 } 637 }
600 else{ 638 else{
601 system ("$path_to_fastqc $val_2"); 639 system ("$path_to_fastqc $output_dir$val_2");
602 } 640 }
603 641
604 }
605
606 if ($gzip or $filename =~ /\.gz$/){
607
608 # compressing main fastQ output files
609 warn "Compressing the validated output file $val_1 ...\n";
610 system ("gzip -f $val_1");
611
612 warn "Compressing the validated output file $val_2 ...\n";
613 system ("gzip -f $val_2");
614
615
616 if ($retain){ # compressing unpaired reads
617 warn "Compressing the unpaired read output $un_1 ...\n";
618 system ("gzip -f $un_1");
619
620 warn "Compressing the unpaired read output $un_2 ...\n";
621 system ("gzip -f $un_2");
622 }
623 } 642 }
624 643
625 warn "Deleting both intermediate output files $file_1 and $file_2\n"; 644 warn "Deleting both intermediate output files $file_1 and $file_2\n";
626 unlink "$output_dir$file_1"; 645 unlink "$output_dir$file_1";
627 unlink "$output_dir$file_2"; 646 unlink "$output_dir$file_2";
639 sub validate_paired_end_files{ 658 sub validate_paired_end_files{
640 659
641 my $file_1 = shift; 660 my $file_1 = shift;
642 my $file_2 = shift; 661 my $file_2 = shift;
643 662
644 warn "file_1 $file_1 file_2 $file_2\n\n"; 663 warn "file_1: $file_1, file_2: $file_2\n\n";
645 664
646 if ($file_1 =~ /\.gz$/){ 665 if ($file_1 =~ /\.gz$/){
647 open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n"; 666 open (IN1,"zcat $output_dir$file_1 |") or die "Couldn't read from file $file_1: $!\n";
648 } 667 }
649 else{ 668 else{
675 } 694 }
676 else{ 695 else{
677 $out_2 =~ s/trimmed\.fq$/val_2.fq/; 696 $out_2 =~ s/trimmed\.fq$/val_2.fq/;
678 } 697 }
679 698
680 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n"; 699 if ($gzip){
681 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n"; 700 if ($dont_gzip){
701 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
702 }
703 else{
704 $out_1 .= '.gz';
705 open (R1,"| gzip -c - > ${output_dir}${out_1}") or die "Can't write to $out_1: $!\n";
706 }
707 }
708 else{
709 open (R1,'>',$output_dir.$out_1) or die "Couldn't write to $out_1 $!\n";
710 }
711
712 if ($gzip){
713 if ($dont_gzip){
714 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
715 }
716 else{
717 $out_2 .= '.gz';
718 open (R2,"| gzip -c - > ${output_dir}${out_2}") or die "Can't write to $out_2: $!\n";
719 }
720 }
721 else{
722 open (R2,'>',$output_dir.$out_2) or die "Couldn't write to $out_2 $!\n";
723 }
724
682 warn "Writing validated paired-end read 1 reads to $out_1\n"; 725 warn "Writing validated paired-end read 1 reads to $out_1\n";
683 warn "Writing validated paired-end read 2 reads to $out_2\n\n"; 726 warn "Writing validated paired-end read 2 reads to $out_2\n\n";
684 727
685 my $unpaired_1; 728 my $unpaired_1;
686 my $unpaired_2; 729 my $unpaired_2;
702 } 745 }
703 else{ 746 else{
704 $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/; 747 $unpaired_2 =~ s/trimmed\.fq$/unpaired_2.fq/;
705 } 748 }
706 749
707 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n"; 750 if ($gzip){
708 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n"; 751 if ($dont_gzip){
752 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
753 }
754 else{
755 $unpaired_1 .= '.gz';
756 open (UNPAIRED1,"| gzip -c - > ${output_dir}${unpaired_1}") or die "Can't write to $unpaired_1: $!\n";
757 }
758 }
759 else{
760 open (UNPAIRED1,'>',$output_dir.$unpaired_1) or die "Couldn't write to $unpaired_1: $!\n";
761 }
762
763 if ($gzip){
764 if ($dont_gzip){
765 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
766 }
767 else{
768 $unpaired_2 .= '.gz';
769 open (UNPAIRED2,"| gzip -c - > ${output_dir}${unpaired_2}") or die "Can't write to $unpaired_2: $!\n";
770 }
771 }
772 else{
773 open (UNPAIRED2,'>',$output_dir.$unpaired_2) or die "Couldn't write to $unpaired_2: $!\n";
774 }
709 775
710 warn "Writing unpaired read 1 reads to $unpaired_1\n"; 776 warn "Writing unpaired read 1 reads to $unpaired_1\n";
711 warn "Writing unpaired read 2 reads to $unpaired_2\n\n"; 777 warn "Writing unpaired read 2 reads to $unpaired_2\n\n";
712 } 778 }
713 779
731 last unless ($id_2 and $seq_2 and $l3_2 and $qual_2); 797 last unless ($id_2 and $seq_2 and $l3_2 and $qual_2);
732 798
733 ++$count; 799 ++$count;
734 800
735 801
736 ## small check if the sequence files appear to FastQ files 802 ## small check if the sequence files appear to be FastQ files
737 if ($count == 1){ # performed just once 803 if ($count == 1){ # performed just once
738 if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){ 804 if ($id_1 !~ /^\@/ or $l3_1 !~ /^\+/){
739 die "Input file doesn't seem to be in FastQ format at sequence $count\n"; 805 die "Input file doesn't seem to be in FastQ format at sequence $count\n";
740 } 806 }
741 if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){ 807 if ($id_2 !~ /^\@/ or $l3_2 !~ /^\+/){
744 } 810 }
745 811
746 chomp $seq_1; 812 chomp $seq_1;
747 chomp $seq_2; 813 chomp $seq_2;
748 814
815 if ($clip_r1){
816 $seq_1 = substr($seq_1,$clip_r1); # starting after the sequences to be trimmed until the end of the sequence
817 $qual_1 = substr($qual_1,$clip_r1);
818 }
819 if ($clip_r2){
820 $seq_2 = substr($seq_2,$clip_r2); # starting after the sequences to be trimmed until the end of the sequence
821 $qual_2 = substr($qual_2,$clip_r2);
822 }
749 823
750 ### making sure that the reads do have a sensible length 824 ### making sure that the reads do have a sensible length
751 if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){ 825 if ( (length($seq_1) < $length_cutoff) or (length($seq_2) < $length_cutoff) ){
752 ++$sequence_pairs_removed; 826 ++$sequence_pairs_removed;
753 if ($retain){ # writing out single-end reads if they are longer than the cutoff 827 if ($retain){ # writing out single-end reads if they are longer than the cutoff
791 print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n"; 865 print REPORT "Number of sequence pairs removed because at least one read was shorter than the length cutoff ($length_cutoff bp): $sequence_pairs_removed ($percentage%)\n";
792 866
793 if ($keep){ 867 if ($keep){
794 warn "Number of unpaired read 1 reads printed: $read_1_printed\n"; 868 warn "Number of unpaired read 1 reads printed: $read_1_printed\n";
795 warn "Number of unpaired read 2 reads printed: $read_2_printed\n"; 869 warn "Number of unpaired read 2 reads printed: $read_2_printed\n";
870 }
871
872 close R1 or die $!;
873 close R2 or die $!;
874
875 if ($retain){
876 close UNPAIRED1 or die $!;
877 close UNPAIRED2 or die $!;
796 } 878 }
797 879
798 warn "\n"; 880 warn "\n";
799 if ($retain){ 881 if ($retain){
800 return ($out_1,$out_2,$unpaired_1,$unpaired_2); 882 return ($out_1,$out_2,$unpaired_1,$unpaired_2);
828 my $length_read_2; 910 my $length_read_2;
829 my $error_rate; 911 my $error_rate;
830 my $output_dir; 912 my $output_dir;
831 my $no_report_file; 913 my $no_report_file;
832 my $suppress_warn; 914 my $suppress_warn;
915 my $dont_gzip;
916 my $clip_r1;
917 my $clip_r2;
833 918
834 my $command_line = GetOptions ('help|man' => \$help, 919 my $command_line = GetOptions ('help|man' => \$help,
835 'q|quality=i' => \$quality, 920 'q|quality=i' => \$quality,
836 'a|adapter=s' => \$adapter, 921 'a|adapter=s' => \$adapter,
837 'a2|adapter2=s' => \$adapter2, 922 'a2|adapter2=s' => \$adapter2,
854 'length_2|r2=i' => \$length_read_2, 939 'length_2|r2=i' => \$length_read_2,
855 'e|error_rate=s' => \$error_rate, 940 'e|error_rate=s' => \$error_rate,
856 'o|output_dir=s' => \$output_dir, 941 'o|output_dir=s' => \$output_dir,
857 'no_report_file' => \$no_report_file, 942 'no_report_file' => \$no_report_file,
858 'suppress_warn' => \$suppress_warn, 943 'suppress_warn' => \$suppress_warn,
944 'dont_gzip' => \$dont_gzip,
945 'clip_R1=i' => \$clip_r1,
946 'clip_R2=i' => \$clip_r2,
859 ); 947 );
860 948
949
861 ### EXIT ON ERROR if there were errors with any of the supplied options 950 ### EXIT ON ERROR if there were errors with any of the supplied options
862 unless ($command_line){ 951 unless ($command_line){
863 die "Please respecify command line options\n"; 952 die "Please respecify command line options\n";
864 } 953 }
865 954
877 966
878 Quality-/Adapter-/RRBS-Trimming 967 Quality-/Adapter-/RRBS-Trimming
879 (powered by Cutadapt) 968 (powered by Cutadapt)
880 version $trimmer_version 969 version $trimmer_version
881 970
882 Last update: 18 10 2012 971 Last update: 10 04 2013
883 972
884 VERSION 973 VERSION
885 exit; 974 exit;
886 } 975 }
887 976
1009 } 1098 }
1010 else{ 1099 else{
1011 $output_dir = ''; 1100 $output_dir = '';
1012 } 1101 }
1013 1102
1014 return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file); 1103 ### Trimming at the 5' end
1104 if (defined $clip_r2){ # trimming 5' bases of read 1
1105 die "Clipping the 5' end of read 2 is only allowed for paired-end files (--paired)\n" unless ($validate);
1106 }
1107
1108 if (defined $clip_r1){ # trimming 5' bases of read 1
1109 unless ($clip_r1 > 0 and $clip_r1 < 100){
1110 die "The 5' clipping value for read 1 should have a sensible value (> 0 and < read length)\n\n";
1111 }
1112 }
1113
1114 if (defined $clip_r2){ # trimming 5' bases of read 2
1115 unless ($clip_r2 > 0 and $clip_r2 < 100){
1116 die "The 5' clipping value for read 2 should have a sensible value (> 0 and < read length)\n\n";
1117 }
1118 }
1119
1120
1121 return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2);
1015 } 1122 }
1016 1123
1017 1124
1018 1125
1019 1126
1063 will be trimmed of the 3' end of any read. 1170 will be trimmed of the 3' end of any read.
1064 1171
1065 -e <ERROR RATE> Maximum allowed error rate (no. of errors divided by the length of the matching 1172 -e <ERROR RATE> Maximum allowed error rate (no. of errors divided by the length of the matching
1066 region) (default: 0.1) 1173 region) (default: 0.1)
1067 1174
1068 --gzip Compress the output file with gzip. If the input files are gzip-compressed 1175 --gzip Compress the output file with GZIP. If the input files are GZIP-compressed
1069 the output files will be automatically gzip compressed as well. 1176 the output files will automatically be GZIP compressed as well. As of v0.2.8 the
1177 compression will take place on the fly.
1178
1179 --dont_gzip Output files won't be compressed with GZIP. This option overrides --gzip.
1070 1180
1071 --length <INT> Discard reads that became shorter than length INT because of either 1181 --length <INT> Discard reads that became shorter than length INT because of either
1072 quality or adapter trimming. A value of '0' effectively disables 1182 quality or adapter trimming. A value of '0' effectively disables
1073 this behaviour. Default: 20 bp. 1183 this behaviour. Default: 20 bp.
1074 1184
1081 directory. 1191 directory.
1082 1192
1083 --no_report_file If specified no report file will be generated. 1193 --no_report_file If specified no report file will be generated.
1084 1194
1085 --suppress_warn If specified any output to STDOUT or STDERR will be suppressed. 1195 --suppress_warn If specified any output to STDOUT or STDERR will be suppressed.
1196
1197 --clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 1 (or single-end
1198 reads). This may be useful if the qualities were very poor, or if there is some
1199 sort of unwanted bias at the 5' end. Default: OFF.
1200
1201 --clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 5' end of read 2 (paired-end reads
1202 only). This may be useful if the qualities were very poor, or if there is some sort
1203 of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove
1204 the first few bp because the end-repair reaction may introduce a bias towards low
1205 methylation. Please refer to the M-bias plot section in the Bismark User Guide for
1206 some examples. Default: OFF.
1086 1207
1087 1208
1088 1209
1089 RRBS-specific options (MspI digested material): 1210 RRBS-specific options (MspI digested material):
1090 1211
1150 -r2/--length_2 <INT> Unpaired single-end read length cutoff needed for read 2 to be written to 1271 -r2/--length_2 <INT> Unpaired single-end read length cutoff needed for read 2 to be written to
1151 '.unpaired_2.fq' output file. These reads may be mapped in single-end mode. 1272 '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.
1152 Default: 35 bp. 1273 Default: 35 bp.
1153 1274
1154 1275
1155 Last modified on 18 Oct 2012. 1276 Last modified on 15 July 2013.
1156 1277
1157 HELP 1278 HELP
1158 exit; 1279 exit;
1159 } 1280 }