Previous changeset 0:3c1664caa8e3 (2013-07-06) Next changeset 2:9109c2c3be1e (2013-07-19) |
Commit message:
upgrade to new version |
modified:
trim_galore trim_galore_wrapper.xml |
b |
diff -r 3c1664caa8e3 -r 898db63d2e84 trim_galore --- a/trim_galore Sat Jul 06 09:52:23 2013 -0400 +++ b/trim_galore Wed Jul 17 15:05:43 2013 -0400 |
[ |
b'@@ -7,7 +7,7 @@\n use File::Basename;\n use Cwd;\n \n-## This program is Copyright (C) 2012, Felix Krueger (felix.krueger@babraham.ac.uk)\n+## This program is Copyright (C) 2012-13, Felix Krueger (felix.krueger@babraham.ac.uk)\n \n ## This program is free software: you can redistribute it and/or modify\n ## it under the terms of the GNU General Public License as published by\n@@ -25,7 +25,7 @@\n \n \n ## this script is taking in FastQ sequences and trims them with Cutadapt\n-## last modified on 18 10 2012\n+## last modified on 10 April 2013\n \n ########################################################################\n \n@@ -37,11 +37,17 @@\n ########################################################################\n \n \n-my $trimmer_version = \'0.2.5\';\n+my $trimmer_version = \'0.2.8\';\n my $DOWARN = 1; # print on screen warning and text by default\n BEGIN { $SIG{\'__WARN__\'} = sub { warn $_[0] if $DOWARN } };\n \n-my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file) = process_commandline();\n+my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2) = process_commandline();\n+\n+my @filenames = @ARGV;\n+\n+die "\\nPlease provide the filename(s) of one or more FastQ file(s) to launch Trim Galore!\\n\n+USAGE: \'trim_galore [options] <filename(s)>\' or \'trim_galore --help\' for more options\\n\\n" unless (@filenames);\n+\n \n ### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED\n unless (defined $cutoff){\n@@ -68,8 +74,6 @@\n $cutoff += 31;\n }\n \n-my @filenames = @ARGV;\n-\n my $file_1;\n my $file_2;\n \n@@ -155,7 +159,7 @@\n }\n \n if ($length_read_2 == 35){\n-\twarn "Length cut-off for read 2: $length_read_2 b (default)\\n";\n+\twarn "Length cut-off for read 2: $length_read_2 bb (default)\\n";\n \tprint REPORT "Length cut-off for read 2: $length_read_2 bp (default)\\n";\n }\n else{\n@@ -180,6 +184,16 @@\n print REPORT "All sequences will be trimmed by 1 bp on their 3\' end to avoid problems with invalid paired-end alignments with Bowtie 1\\n";\n }\n \n+ if ($clip_r1){\n+ warn "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5\' end to avoid poor qualities or biases\\n";\n+ print REPORT "All Read 1 sequences will be trimmed by $clip_r1 bp from their 5\' end to avoid poor qualities or biases\\n";\n+ }\n+ if ($clip_r2){\n+ warn "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5\' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\\n";\n+ print REPORT "All Read 2 sequences will be trimmed by $clip_r2 bp from their 5\' end to avoid poor qualities or biases (e.g. M-bias for BS-Seq applications)\\n";\n+ }\n+\n+\n if ($fastqc){\n warn "Running FastQC on the data once trimming has completed\\n";\n print REPORT "Running FastQC on the data once trimming has completed\\n";\n@@ -195,9 +209,13 @@\n print REPORT "Keeping quality trimmed (but not yet adapter trimmed) intermediate FastQ file\\n";\n }\n \n+\n if ($gzip or $filename =~ /\\.gz$/){\n- warn "Output file will be GZIP compressed\\n";\n- print REPORT "Output file will be GZIP compressed\\n";\n+ $gzip = 1;\n+ unless ($dont_gzip){\n+ warn "Output file(s) will be GZIP compressed\\n";\n+ print REPORT "Output file will be GZIP compressed\\n";\n+ }\n }\n \n warn "\\n";\n@@ -265,9 +283,24 @@\n $output_filename =~ s/$/_trimmed.fq/;\n }\n \n+ if ($gzip or $filename =~ /\\.gz$/){\n+ unless ($dont_gzip){\n+ if ($validate){\n+\topen (OUT,\'>\',$output_dir.$output_filename) or die "Can\'t open $output_filename: $!\\n"; # don\'t need to gzip intermediate file\n+ }\n+ else{\n+\t$output_filename .= \'.gz\';\n+\topen (OUT,"| gzip -c - > ${output_dir}${output_filename}") or die "Can\'t write to $output_filename: $!\\n'..b'12 @@\n \t\t\t\t \'o|output_dir=s\' => \\$output_dir,\n \t\t\t\t \'no_report_file\' => \\$no_report_file,\n \t\t\t\t \'suppress_warn\' => \\$suppress_warn,\n+\t\t\t\t \'dont_gzip\' => \\$dont_gzip,\n+\t\t\t\t \'clip_R1=i\' => \\$clip_r1,\n+\t\t\t\t \'clip_R2=i\' => \\$clip_r2,\n \t\t\t\t);\n- \n+\n+\n ### EXIT ON ERROR if there were errors with any of the supplied options\n unless ($command_line){\n die "Please respecify command line options\\n";\n@@ -879,7 +968,7 @@\n (powered by Cutadapt)\n version $trimmer_version\n \n- Last update: 18 10 2012\n+ Last update: 10 04 2013\n \n VERSION\n exit;\n@@ -1011,7 +1100,25 @@\n $output_dir = \'\';\n }\n \n- return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file);\n+ ### Trimming at the 5\' end\n+ if (defined $clip_r2){ # trimming 5\' bases of read 1\n+ die "Clipping the 5\' end of read 2 is only allowed for paired-end files (--paired)\\n" unless ($validate);\n+ }\n+\n+ if (defined $clip_r1){ # trimming 5\' bases of read 1\n+ unless ($clip_r1 > 0 and $clip_r1 < 100){\n+ die "The 5\' clipping value for read 1 should have a sensible value (> 0 and < read length)\\n\\n";\n+ }\n+ }\n+\n+ if (defined $clip_r2){ # trimming 5\' bases of read 2\n+ unless ($clip_r2 > 0 and $clip_r2 < 100){\n+ die "The 5\' clipping value for read 2 should have a sensible value (> 0 and < read length)\\n\\n";\n+ }\n+ }\n+\n+\n+ return ($quality,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$adapter2,$error_rate,$output_dir,$no_report_file,$dont_gzip,$clip_r1,$clip_r2);\n }\n \n \n@@ -1065,8 +1172,11 @@\n -e <ERROR RATE> Maximum allowed error rate (no. of errors divided by the length of the matching\n region) (default: 0.1)\n \n---gzip Compress the output file with gzip. If the input files are gzip-compressed\n- the output files will be automatically gzip compressed as well.\n+--gzip Compress the output file with GZIP. If the input files are GZIP-compressed\n+ the output files will automatically be GZIP compressed as well. As of v0.2.8 the\n+ compression will take place on the fly.\n+\n+--dont_gzip Output files won\'t be compressed with GZIP. This option overrides --gzip.\n \n --length <INT> Discard reads that became shorter than length INT because of either\n quality or adapter trimming. A value of \'0\' effectively disables\n@@ -1084,6 +1194,17 @@\n \n --suppress_warn If specified any output to STDOUT or STDERR will be suppressed.\n \n+--clip_R1 <int> Instructs Trim Galore to remove <int> bp from the 5\' end of read 1 (or single-end\n+ reads). This may be useful if the qualities were very poor, or if there is some\n+ sort of unwanted bias at the 5\' end. Default: OFF.\n+\n+--clip_R2 <int> Instructs Trim Galore to remove <int> bp from the 5\' end of read 2 (paired-end reads\n+ only). This may be useful if the qualities were very poor, or if there is some sort\n+ of unwanted bias at the 5\' end. For paired-end BS-Seq, it is recommended to remove\n+ the first few bp because the end-repair reaction may introduce a bias towards low\n+ methylation. Please refer to the M-bias plot section in the Bismark User Guide for\n+ some examples. Default: OFF.\n+\n \n \n RRBS-specific options (MspI digested material):\n@@ -1152,7 +1273,7 @@\n Default: 35 bp.\n \n \n-Last modified on 18 Oct 2012.\n+Last modified on 15 July 2013.\n \n HELP\n exit;\n' |
b |
diff -r 3c1664caa8e3 -r 898db63d2e84 trim_galore_wrapper.xml --- a/trim_galore_wrapper.xml Sat Jul 06 09:52:23 2013 -0400 +++ b/trim_galore_wrapper.xml Wed Jul 17 15:05:43 2013 -0400 |
b |
b'@@ -1,5 +1,5 @@\n-<tool id="trim_galore" name="Trim Galore" version="0.2.4.1">\n- <!-- Wrapper compatible with Trim Galore version 0.2.4.0 -->\n+<tool id="trim_galore" name="Trim Galore" version="0.2.8">\n+ <!-- Wrapper compatible with Trim Galore version 0.2.8 -->\n <description>adaptive quality and adapter trimmer</description>\n <version_command interpreter="perl">trim_galore --version</version_command>\n <requirements>\n@@ -9,7 +9,37 @@\n <command interpreter="perl">\n #from glob import glob\n #import tempfile, os\n- \n+\n+ ##\n+ ## Creating a temporary directory where trim_galore will store all result files\n+ ##\n+\n+ #set $temp_dir = os.path.abspath(tempfile.mkdtemp())\n+\n+\n+ ## trim_galore removes .fastq and .fq file extensions of input files. \n+ ## That is essential if Galaxy provides links to files (these can have real extensions), but that behaviour is causing an inconsitency in output filenaming.\n+ ## Fix: link every file to $TMP without file extension\n+\n+ #if $singlePaired.sPaired == "single":\n+ #set $input_singles_tmp_handle = tempfile.NamedTemporaryFile( dir=$temp_dir )\n+ #set $input_singles_tmp = $input_singles_tmp_handle.name\n+ #silent $input_singles_tmp_handle.close()\n+ #silent os.system("ln -s %s %s" % (str($singlePaired.input_singles), $input_singles_tmp))\n+ #else:\n+ #set $input_mate1_tmp_handle = tempfile.NamedTemporaryFile( dir=$temp_dir )\n+ #set $input_mate2_tmp_handle = tempfile.NamedTemporaryFile( dir=$temp_dir )\n+ \n+ #set $input_mate1_tmp = $input_mate1_tmp_handle.name\n+ #silent $input_mate1_tmp_handle.close()\n+\n+ #set $input_mate2_tmp = $input_mate2_tmp_handle.name\n+ #silent $input_mate2_tmp_handle.close()\n+\n+ #silent os.system("ln -s %s %s" % (str($singlePaired.input_mate1), $input_mate1_tmp))\n+ #silent os.system("ln -s %s %s" % (str($singlePaired.input_mate2), $input_mate2_tmp))\n+ #end if\n+\n trim_galore\n \n ##\n@@ -35,6 +65,14 @@\n ## default 20\n --length $params.min_length\n \n+ #if int($params.clip_R1) > 0:\n+ --clip_R1 $params.clip_R1\n+ #end if\n+ \n+ #if int($params.clip_R2) > 0:\n+ --clip_R2 $params.clip_R2\n+ #end if\n+\n #if $params.retain_unpaired.settingsType == "retain_unpaired_output":\n --retain_unpaired\n --length_1 $params.retain_unpaired.length_1\n@@ -54,12 +92,6 @@\n \n #end if\n \n- ##\n- ## Creating a temporary directory where trim_galore will store all result files\n- ##\n-\n- #set $temp_dir = os.path.abspath(tempfile.mkdtemp())\n-\n --output_dir $temp_dir\n --suppress_warn\n \n@@ -79,7 +111,7 @@\n #end if\n \n ## input sequence\n- $singlePaired.input_singles\n+ $input_singles_tmp\n #else:\n --paired \n #if $singlePaired.input_mate1.ext == "fastqillumina":\n@@ -100,8 +132,8 @@\n #end if\n \n ## input sequences\n- $singlePaired.input_mate1\n- $singlePaired.input_mate2\n+ $input_mate1_tmp\n+ $input_mate2_tmp\n \n #end if\n \n@@ -113,32 +145,32 @@\n \n \n #if $singlePaired.sPaired == "single":\n- #set $single_end_path = os.path.join($temp_dir, os.path.basename(str($singlePaired.input_singles)) + \'_trimmed.fq\')\n+ #set $single_end_path = os.path.join($temp_dir, os.path.basename(str($input_singles_tmp)) + \'_trimmed.fq\')\n mv $single_end_path $trimmed_reads_single;\n \n #if $params.settingsType == "custom":\n #if $params.report:\n- #set $report_path = os.path.join($temp_dir, os.path.basename(str($singlePaired.input_singles)) + \'_trimming_report.tx'..b' input is treated\n- as though all quality values are high. This is also the default behavior when the input\n- doesn\'t specify quality values (e.g. in -f mode). This option is invariable and on by default.\n-\n-\n-Bowtie 2 paired-end options::\n-\n- --no-mixed This option disables Bowtie 2\'s behavior to try to find alignments for the individual mates if\n- it cannot find a concordant or discordant alignment for a pair. This option is invariable and\n- and on by default.\n-\n- --no-discordant Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments.\n- A discordant alignment is an alignment where both mates align uniquely, but that does not\n- satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior\n- and it is on by default.\n-\n-\n-Bowtie 2 effort options::\n-\n- -D INT Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using\n- the alignments found so far. A seed extension "fails" if it does not yield a new best or a\n- new second-best alignment. Default: 15.\n-\n- -R INT INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds.\n- When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of\n- mismatches allowed) at different offsets and searches for more alignments. A read is considered\n- to have repetitive seeds if the total number of seed hits divided by the number of seeds\n- that aligned at least once is greater than 300. Default: 2.\n-\n-\n-Bowtie 2 Scoring options::\n-\n- --score_min "func" Sets a function governing the minimum alignment score needed for an alignment to be considered\n- "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying\n- L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.\n- See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is\n- L,0,-0.2.\n-\n-\n-Bowtie 2 Reporting options::\n-\n- --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is\n- deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the\n- default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the\n- effort expended to find valid alignments.\n-\n- For reference, this used to be the old (now deprecated) description of -M:\n- Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it\n- can\'t find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever\n- happens first. Only the best alignment is reported. Information from the other alignments is used to\n- estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes \n- Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that\n- aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not\n- guarantee that the alignment reported is the best possible in terms of alignment score. -M is\n- always used and its default value is set to 10.\n \n </help>\n </tool>\n' |