diff tools/seq_primer_clip/seq_primer_clip.xml @ 2:ee5acea162a7 draft

Uploaded v0.0.10, README now using RST, MIT licence, automatic Biopython dependency
author peterjc
date Thu, 24 Oct 2013 09:37:25 -0400
parents
children 9b074c1db68e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/seq_primer_clip/seq_primer_clip.xml	Thu Oct 24 09:37:25 2013 -0400
@@ -0,0 +1,143 @@
+<tool id="seq_primer_clip" name="Primer clip sequences" version="0.0.10">
+    <description>Trim off 5' or 3' primers</description>
+    <requirements>
+        <requirement type="package" version="1.62">biopython</requirement>
+        <requirement type="python-module">Bio</requirement>
+    </requirements>
+    <version_command interpreter="python">seq_primer_clip.py --version</version_command>   
+    <command interpreter="python">
+seq_primer_clip.py $input_file $input_file.ext $primer_fasta $primer_type $mm $min_len $keep_negatives $output_file
+    </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <inputs>
+        <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to clip" description="FASTA, FASTQ, or SFF format."/>
+        <param name="primer_fasta" type="data" format="fasta" label="FASTA file containing primer(s)"/>
+        <param name="primer_type" type="select" label="Type of primers">
+            <option value="Forward">Forward (5') primers</option>
+            <option value="Reverse">Reverse (3') primers (given with respect to the forward strand)</option>
+            <option value="Reverse-complement">Reverse (3') primers (given with respect to the reverse strand)</option>
+        </param>
+        <param name="mm" type="integer" value="0" label="How many mismatches to allow? (0, 1 or 2)">
+            <validator type="in_range" min="0" max="2" />
+        </param>
+        <param name="keep_negatives" type="boolean" value="false" label="Keep reads with no matched primer"/>
+        <param name="min_len" type="integer" label="Minimum length for (clipped) sequences " value="1"/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data" label="$primer_type primer clipped">
+            <!-- TODO - Replace this with format="input:input_fastq" if/when that works -->
+            <change_format>
+                <when input_dataset="input_file" attribute="extension" value="sff" format="sff" />
+                <when input_dataset="input_file" attribute="extension" value="fasta" format="fasta" />
+                <when input_dataset="input_file" attribute="extension" value="fastq" format="fastq" />
+                <when input_dataset="input_file" attribute="extension" value="fastqsanger" format="fastqsanger" />
+                <when input_dataset="input_file" attribute="extension" value="fastqsolexa" format="fastqsolexa" />
+                <when input_dataset="input_file" attribute="extension" value="fastqillumina" format="fastqillumina" />
+                <when input_dataset="input_file" attribute="extension" value="fastqcssanger" format="fastqcssanger" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="MID4_GLZRM4E04_rnd30.fasta" ftype="fasta" />
+            <param name="primer_fasta" value="dop_primers.fasta" />
+            <param name="primer_type" value="Forward" />
+            <param name="mm" value="2" />
+            <param name="keep_negatives" value="false" />
+            <param name="min_len" value="35" />
+            <output name="output_file" file="MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" />
+        </test>
+        <test>
+            <param name="input_file" value="MID4_GLZRM4E04_rnd30.fastqsanger" ftype="fastqsanger" />
+            <param name="primer_fasta" value="dop_primers.fasta" />
+            <param name="primer_type" value="Forward" />
+            <param name="mm" value="2" />
+            <param name="keep_negatives" value="false" />
+            <param name="min_len" value="35" />
+            <output name="output_file" file="MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_file" value="MID4_GLZRM4E04_rnd30.sff" ftype="sff" />
+            <param name="primer_fasta" value="dop_primers.fasta" />
+            <param name="primer_type" value="Forward" />
+            <param name="mm" value="2" />
+            <param name="keep_negatives" value="false" />
+            <param name="min_len" value="35" />
+            <output name="output_file" file="MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" />
+        </test>
+        <test>
+            <param name="input_file" value="MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" />
+            <param name="primer_fasta" value="dop_primers.fasta" />
+            <param name="primer_type" value="Reverse" />
+            <param name="mm" value="2" />
+            <param name="keep_negatives" value="true" />
+            <param name="min_len" value="35" />
+            <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.fasta" ftype="fasta" />
+        </test>
+        <test>
+            <param name="input_file" value="MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" />
+            <param name="primer_fasta" value="dop_primers.fasta" />
+            <param name="primer_type" value="Reverse" />
+            <param name="mm" value="2" />
+            <param name="keep_negatives" value="true" />
+            <param name="min_len" value="35" />
+            <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_file" value="MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" />
+            <param name="primer_fasta" value="dop_primers.fasta" />
+            <param name="primer_type" value="Reverse" />
+            <param name="mm" value="2" />
+            <param name="keep_negatives" value="true" />
+            <param name="min_len" value="35" />
+            <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" />
+        </test>
+    </tests>
+    <requirements>
+        <requirement type="python-module">Bio</requirement>
+    </requirements>
+    <help>
+
+**What it does**
+
+Looks for the given primer sequences (within the existing clipped sequence) and
+further clips the reads to remove the primers and any preceding/trailing sequence.
+
+Reads containing a forward primer are reduced to just the sequence after (and
+excluding) the forward primer.
+
+Reads containing a reverse primer are reduced to just the sequence before (and
+excluding) the reverse primer.
+
+Degenerate primers can be specified using the standard IUPAC ambiguity codes,
+thus a primer with an N would match A, C, T or G (or any of the IUPAC ambiguity
+codes) and so on.
+
+Note that for SFF files only the clip/trim positions are edited - you will still
+be able to extract the original full read (with any adapter sequence and poor
+quality sequence) if you need to.
+
+.. class:: warningmark
+
+**Note**. This tool was initially written for Roche 454 data, and should also
+work fine on Sanger or Ion Torrent as well. However, it is probably too slow
+for use on large Illumina datasets.
+
+
+**Citation**
+
+This tool uses Biopython. If you use this tool in scientific work leading to a
+publication, please cite:
+
+Cock et al 2009. Biopython: freely available Python tools for computational
+molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
+http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+
+This tool is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_primer_clip 
+    </help>
+</tool>