Next changeset 1:898db63d2e84 (2013-07-17) |
Commit message:
Uploaded |
added:
tool_dependencies.xml trim_galore trim_galore_wrapper.xml |
b |
diff -r 000000000000 -r 3c1664caa8e3 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sat Jul 06 09:52:23 2013 -0400 |
b |
@@ -0,0 +1,43 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="cutadapt" version="1.1"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://cutadapt.googlecode.com/files/cutadapt-1.1.tar.gz</action> + <action type="move_directory_files"> + <source_directory>bin</source_directory> + <destination_directory>$INSTALL_DIR/bin</destination_directory> + </action> + <action type="move_directory_files"> + <source_directory>cutadapt</source_directory> + <destination_directory>$INSTALL_DIR/cutadapt</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> + </readme> + </package> + <package name="fastqc" version="0.10.1"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.10.1.zip</action> + <action type="move_directory_files"> + <source_directory>../FastQC/</source_directory> + <destination_directory>$INSTALL_DIR/FastQC</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/FastQC</environment_variable> + </action> + </actions> + </install> + <readme> + FastQC needs a java Runtime Environment. + </readme> + </package> +</tool_dependency> + + + |
b |
diff -r 000000000000 -r 3c1664caa8e3 trim_galore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trim_galore Sat Jul 06 09:52:23 2013 -0400 |
[ |
b'@@ -0,0 +1,1159 @@\n+#!/usr/bin/perl\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+use IPC::Open3;\n+use File::Spec;\n+use File::Basename;\n+use Cwd;\n+\n+## This program is Copyright (C) 2012, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+\n+\n+## this script is taking in FastQ sequences and trims them with Cutadapt\n+## last modified on 18 10 2012\n+\n+########################################################################\n+\n+# change these paths if needed\n+\n+my $path_to_cutadapt = \'cutadapt\';\n+my $path_to_fastqc = \'fastqc\';\n+\n+########################################################################\n+\n+\n+my $trimmer_version = \'0.2.5\';\n+my $DOWARN = 1; # print on screen warning and text by default\n+BEGIN { $SIG{\'__WARN__\'} = sub { warn $_[0] if $DOWARN } };\n+\n+my ($cutoff,$adapter,$stringency,$rrbs,$length_cutoff,$keep,$fastqc,$non_directional,$phred_encoding,$fastqc_args,$trim,$gzip,$validate,$retain,$length_read_1,$length_read_2,$a2,$error_rate,$output_dir,$no_report_file) = process_commandline();\n+\n+### SETTING DEFAULTS UNLESS THEY WERE SPECIFIED\n+unless (defined $cutoff){\n+ $cutoff = 20;\n+}\n+my $phred_score_cutoff = $cutoff; # only relevant for report\n+\n+unless (defined $adapter){\n+ $adapter = \'AGATCGGAAGAGC\';\n+}\n+unless (defined $a2){ # optional adapter for the second read in a pair. Only works for --paired trimming\n+ $a2 = \'\';\n+}\n+\n+unless (defined $stringency){\n+ $stringency = 1;\n+}\n+\n+unless (defined $length_cutoff){\n+ $length_cutoff = 20;\n+}\n+\n+if ($phred_encoding == 64){\n+ $cutoff += 31;\n+}\n+\n+my @filenames = @ARGV;\n+\n+my $file_1;\n+my $file_2;\n+\n+foreach my $filename (@ARGV){\n+ trim ($filename);\n+}\n+\n+\n+sub trim{\n+ my $filename = shift;\n+\n+ my $output_filename = (split (/\\//,$filename))[-1];\n+ # warn "Here is the outputfile name: $output_filename\\n";\n+\n+ my $report = $output_filename;\n+ $report =~ s/$/_trimming_report.txt/;\n+\n+ if ($no_report_file) {\n+ $report = File::Spec->devnull;\n+ open (REPORT,\'>\',$report) or die "Failed to write to file: $!\\n";\n+ # warn "Redirecting report output to /dev/null\\n";\n+ }\n+ else{\n+ open (REPORT,\'>\',$output_dir.$report) or die "Failed to write to file: $!\\n";\n+ warn "Writing report to \'$output_dir$report\'\\n";\n+ }\n+\n+ warn "\\nSUMMARISING RUN PARAMETERS\\n==========================\\nInput filename: $filename\\n";\n+ print REPORT "\\nSUMMARISING RUN PARAMETERS\\n==========================\\nInput filename: $filename\\n";\n+\n+ warn "Quality Phred score cutoff: $phred_score_cutoff\\n";\n+ print REPORT "Quality Phred score cutoff: $phred_score_cutoff\\n";\n+\n+ warn "Quality encoding type selected: ASCII+$phred_encoding\\n";\n+ print REPORT "Quality encoding type selected: ASCII+$phred_encoding\\n";\n+\n+ warn "Adapter sequence: \'$adapter\'\\n";\n+ print REPORT "Adapter sequence: \'$adapter\'\\n";\n+\n+ if ($error_rate == 0.1){\n+ warn "Maximum trimming error rate: $error_rate (default)\\n";\n+ }\n+ else{\n+ warn "Maximum trimming error rate: $error_rate\\n";\n+ }\n+\n+ print REPORT "Maximum trimming error rate: $error_rate";\n+ if ($error_rate == 0.1){\n+ print REPORT " (default)\\n";\n+ }\n+ else{\n+ print REPORT "\\n";\n+ }\n+\n+ if ($a2){\n+ warn "Optional adapter 2 sequence (only used for read 2 of paired-end files): \'$a2\'\\n";\n+ print REPORT "Optional adapter 2 sequence (only used for read 2 of paired-end files): \'$a2\'\\n";\n+ '..b"he\n+ second MspI site in a sequence is used for methylation calls. Sequences which\n+ were merely trimmed because of poor quality will not be shortened further.\n+\n+--non_directional Selecting this option for non-directional RRBS libraries will screen\n+ quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read\n+ and, if found, removes the first two basepairs. Like with the option\n+ '--rrbs' this avoids using cytosine positions that were filled-in\n+ during the end-repair step. '--non_directional' requires '--rrbs' to\n+ be specified as well.\n+\n+--keep Keep the quality trimmed intermediate file. Default: off, which means\n+ the temporary file is being deleted after adapter trimming. Only has\n+ an effect for RRBS samples since other FastQ files are not trimmed\n+ for poor qualities separately.\n+\n+\n+Note for RRBS using MseI:\n+\n+If your DNA material was digested with MseI (recognition motif: TTAA) instead of MspI it is NOT necessary\n+to specify --rrbs or --non_directional since virtually all reads should start with the sequence\n+'TAA', and this holds true for both directional and non-directional libraries. As the end-repair of 'TAA'\n+restricted sites does not involve any cytosines it does not need to be treated especially. Instead, simply\n+run Trim Galore! in the standard (i.e. non-RRBS) mode.\n+\n+\n+Paired-end specific options:\n+\n+--paired This option performs length trimming of quality/adapter/RRBS trimmed reads for\n+ paired-end files. To pass the validation test, both sequences of a sequence pair\n+ are required to have a certain minimum length which is governed by the option\n+ --length (see above). If only one read passes this length threshold the\n+ other read can be rescued (see option --retain_unpaired). Using this option lets\n+ you discard too short read pairs without disturbing the sequence-by-sequence order\n+ of FastQ files which is required by many aligners.\n+\n+ Trim Galore! expects paired-end files to be supplied in a pairwise fashion, e.g.\n+ file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .\n+\n+-t/--trim1 Trims 1 bp off every read from its 3' end. This may be needed for FastQ files that\n+ are to be aligned as paired-end data with Bowtie. This is because Bowtie (1) regards\n+ alignments like this:\n+\n+ R1 ---------------------------> or this: -----------------------> R1\n+ R2 <--------------------------- <----------------- R2\n+\n+ as invalid (whenever a start/end coordinate is contained within the other read).\n+\n+--retain_unpaired If only one of the two paired-end reads became too short, the longer\n+ read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq'\n+ output files. The length cutoff for unpaired single-end reads is\n+ governed by the parameters -r1/--length_1 and -r2/--length_2. Default: OFF.\n+\n+-r1/--length_1 <INT> Unpaired single-end read length cutoff needed for read 1 to be written to\n+ '.unpaired_1.fq' output file. These reads may be mapped in single-end mode.\n+ Default: 35 bp.\n+\n+-r2/--length_2 <INT> Unpaired single-end read length cutoff needed for read 2 to be written to\n+ '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.\n+ Default: 35 bp.\n+\n+\n+Last modified on 18 Oct 2012.\n+\n+HELP\n+ exit;\n+}\n" |
b |
diff -r 000000000000 -r 3c1664caa8e3 trim_galore_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trim_galore_wrapper.xml Sat Jul 06 09:52:23 2013 -0400 |
b |
b'@@ -0,0 +1,577 @@\n+<tool id="trim_galore" name="Trim Galore" version="0.2.4.1">\n+ <!-- Wrapper compatible with Trim Galore version 0.2.4.0 -->\n+ <description>adaptive quality and adapter trimmer</description>\n+ <version_command interpreter="perl">trim_galore --version</version_command>\n+ <requirements>\n+ <requirement type="package" version="1.1">cutadapt</requirement>\n+ <requirement type="package" version="0.10.1">fastqc</requirement>\n+ </requirements>\n+ <command interpreter="perl">\n+ #from glob import glob\n+ #import tempfile, os\n+ \n+ trim_galore\n+\n+ ##\n+ ## Input parameters\n+ ##\n+\n+\n+ #if $params.settingsType == "custom":\n+\n+ $params.fastqc\n+ ## default 20\n+ --quality $params.quality\n+ ## default \'AGATCGGAAGAGC\'\n+ #if $params.adapter.strip() != \'\':\n+ --adapter $params.adapter\n+ #end if\n+ ## default 1\n+ --stringency $params.stringency\n+ \n+ ## default 0.1\n+ -e $params.error_rate\n+\n+ ## default 20\n+ --length $params.min_length\n+\n+ #if $params.retain_unpaired.settingsType == "retain_unpaired_output":\n+ --retain_unpaired\n+ --length_1 $params.retain_unpaired.length_1\n+ --length_2 $params.retain_unpaired.length_2\n+ #end if\n+\n+ #end if\n+\n+ ##\n+ ## RBBS specific options.\n+ ##\n+\n+ #if $rrbs.settingsType == "custom":\n+\n+ $rrbs.rrbs\n+ $rrbs.non_directional\n+\n+ #end if\n+\n+ ##\n+ ## Creating a temporary directory where trim_galore will store all result files\n+ ##\n+\n+ #set $temp_dir = os.path.abspath(tempfile.mkdtemp())\n+\n+ --output_dir $temp_dir\n+ --suppress_warn\n+\n+\n+ #if $singlePaired.sPaired == "single":\n+\n+ #if $singlePaired.input_singles.ext == "fastqillumina":\n+ --phred64\n+ #elif $singlePaired.input_singles.ext == "fastqsanger":\n+ --phred33\n+ #end if\n+\n+ #if $params.settingsType == "custom":\n+ #if not $params.report:\n+ --no_report_file\n+ #end if\n+ #end if\n+\n+ ## input sequence\n+ $singlePaired.input_singles\n+ #else:\n+ --paired \n+ #if $singlePaired.input_mate1.ext == "fastqillumina":\n+ --phred64\n+ #elif $singlePaired.input_mate1.ext == "fastqsanger":\n+ --phred33\n+ #end if\n+\n+ $singlePaired.trim1\n+ #if $singlePaired.adapter2.strip() != \'\':\n+ --adapter2 $singlePaired.adapter2\n+ #end if\n+\n+ #if $params.settingsType == "custom":\n+ #if not $params.report:\n+ --no_report_file\n+ #end if\n+ #end if\n+\n+ ## input sequences\n+ $singlePaired.input_mate1\n+ $singlePaired.input_mate2\n+\n+ #end if\n+\n+ &&\n+\n+ ##\n+ ## Trim Galore! run is finished. Move the result files to the proper place\n+ ##\n+\n+\n+ #if $singlePaired.sPaired == "single":\n+ #set $single_end_path = os.path.join($temp_dir, os.path.basename(str($singlePaired.input_singles)) + \'_trimmed.fq\')\n+ mv $single_end_path $trimmed_reads_single;\n+\n+ #if $params.settingsType == "custom":\n+ #if $params.report:\n+ #set $report_path = os.path.join($temp_dir, os.path.basename(str($singlePaired.input_singles)) + \'_trimming_report.txt\')\n+ mv $report_path $report_file;\n+ #end if\n+ #end if\n+\n+ #else:\n+ #set $paired_end_path_1 = os.path.join($temp_dir, os.path.basename(str($singlePaired.input_mate1)) + \'_val_1.fq\')\n+ #set $'..b' input is treated\n+ as though all quality values are high. This is also the default behavior when the input\n+ doesn\'t specify quality values (e.g. in -f mode). This option is invariable and on by default.\n+\n+\n+Bowtie 2 paired-end options::\n+\n+ --no-mixed This option disables Bowtie 2\'s behavior to try to find alignments for the individual mates if\n+ it cannot find a concordant or discordant alignment for a pair. This option is invariable and\n+ and on by default.\n+\n+ --no-discordant Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments.\n+ A discordant alignment is an alignment where both mates align uniquely, but that does not\n+ satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior\n+ and it is on by default.\n+\n+\n+Bowtie 2 effort options::\n+\n+ -D INT Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using\n+ the alignments found so far. A seed extension "fails" if it does not yield a new best or a\n+ new second-best alignment. Default: 15.\n+\n+ -R INT INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds.\n+ When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of\n+ mismatches allowed) at different offsets and searches for more alignments. A read is considered\n+ to have repetitive seeds if the total number of seed hits divided by the number of seeds\n+ that aligned at least once is greater than 300. Default: 2.\n+\n+\n+Bowtie 2 Scoring options::\n+\n+ --score_min "func" Sets a function governing the minimum alignment score needed for an alignment to be considered\n+ "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying\n+ L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.\n+ See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is\n+ L,0,-0.2.\n+\n+\n+Bowtie 2 Reporting options::\n+\n+ --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is\n+ deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the\n+ default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the\n+ effort expended to find valid alignments.\n+\n+ For reference, this used to be the old (now deprecated) description of -M:\n+ Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it\n+ can\'t find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever\n+ happens first. Only the best alignment is reported. Information from the other alignments is used to\n+ estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes \n+ Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that\n+ aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not\n+ guarantee that the alignment reported is the best possible in terms of alignment score. -M is\n+ always used and its default value is set to 10.\n+\n+ </help>\n+</tool>\n' |