comparison tools/seq_primer_clip/seq_primer_clip.xml @ 2:ee5acea162a7 draft

Uploaded v0.0.10, README now using RST, MIT licence, automatic Biopython dependency
author peterjc
date Thu, 24 Oct 2013 09:37:25 -0400
parents
children 9b074c1db68e
comparison
equal deleted inserted replaced
1:8c02a91a8680 2:ee5acea162a7
1 <tool id="seq_primer_clip" name="Primer clip sequences" version="0.0.10">
2 <description>Trim off 5' or 3' primers</description>
3 <requirements>
4 <requirement type="package" version="1.62">biopython</requirement>
5 <requirement type="python-module">Bio</requirement>
6 </requirements>
7 <version_command interpreter="python">seq_primer_clip.py --version</version_command>
8 <command interpreter="python">
9 seq_primer_clip.py $input_file $input_file.ext $primer_fasta $primer_type $mm $min_len $keep_negatives $output_file
10 </command>
11 <stdio>
12 <!-- Anything other than zero is an error -->
13 <exit_code range="1:" />
14 <exit_code range=":-1" />
15 </stdio>
16 <inputs>
17 <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to clip" description="FASTA, FASTQ, or SFF format."/>
18 <param name="primer_fasta" type="data" format="fasta" label="FASTA file containing primer(s)"/>
19 <param name="primer_type" type="select" label="Type of primers">
20 <option value="Forward">Forward (5') primers</option>
21 <option value="Reverse">Reverse (3') primers (given with respect to the forward strand)</option>
22 <option value="Reverse-complement">Reverse (3') primers (given with respect to the reverse strand)</option>
23 </param>
24 <param name="mm" type="integer" value="0" label="How many mismatches to allow? (0, 1 or 2)">
25 <validator type="in_range" min="0" max="2" />
26 </param>
27 <param name="keep_negatives" type="boolean" value="false" label="Keep reads with no matched primer"/>
28 <param name="min_len" type="integer" label="Minimum length for (clipped) sequences " value="1"/>
29 </inputs>
30 <outputs>
31 <data name="output_file" format="data" label="$primer_type primer clipped">
32 <!-- TODO - Replace this with format="input:input_fastq" if/when that works -->
33 <change_format>
34 <when input_dataset="input_file" attribute="extension" value="sff" format="sff" />
35 <when input_dataset="input_file" attribute="extension" value="fasta" format="fasta" />
36 <when input_dataset="input_file" attribute="extension" value="fastq" format="fastq" />
37 <when input_dataset="input_file" attribute="extension" value="fastqsanger" format="fastqsanger" />
38 <when input_dataset="input_file" attribute="extension" value="fastqsolexa" format="fastqsolexa" />
39 <when input_dataset="input_file" attribute="extension" value="fastqillumina" format="fastqillumina" />
40 <when input_dataset="input_file" attribute="extension" value="fastqcssanger" format="fastqcssanger" />
41 </change_format>
42 </data>
43 </outputs>
44 <tests>
45 <test>
46 <param name="input_file" value="MID4_GLZRM4E04_rnd30.fasta" ftype="fasta" />
47 <param name="primer_fasta" value="dop_primers.fasta" />
48 <param name="primer_type" value="Forward" />
49 <param name="mm" value="2" />
50 <param name="keep_negatives" value="false" />
51 <param name="min_len" value="35" />
52 <output name="output_file" file="MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" />
53 </test>
54 <test>
55 <param name="input_file" value="MID4_GLZRM4E04_rnd30.fastqsanger" ftype="fastqsanger" />
56 <param name="primer_fasta" value="dop_primers.fasta" />
57 <param name="primer_type" value="Forward" />
58 <param name="mm" value="2" />
59 <param name="keep_negatives" value="false" />
60 <param name="min_len" value="35" />
61 <output name="output_file" file="MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" />
62 </test>
63 <test>
64 <param name="input_file" value="MID4_GLZRM4E04_rnd30.sff" ftype="sff" />
65 <param name="primer_fasta" value="dop_primers.fasta" />
66 <param name="primer_type" value="Forward" />
67 <param name="mm" value="2" />
68 <param name="keep_negatives" value="false" />
69 <param name="min_len" value="35" />
70 <output name="output_file" file="MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" />
71 </test>
72 <test>
73 <param name="input_file" value="MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" />
74 <param name="primer_fasta" value="dop_primers.fasta" />
75 <param name="primer_type" value="Reverse" />
76 <param name="mm" value="2" />
77 <param name="keep_negatives" value="true" />
78 <param name="min_len" value="35" />
79 <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.fasta" ftype="fasta" />
80 </test>
81 <test>
82 <param name="input_file" value="MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" />
83 <param name="primer_fasta" value="dop_primers.fasta" />
84 <param name="primer_type" value="Reverse" />
85 <param name="mm" value="2" />
86 <param name="keep_negatives" value="true" />
87 <param name="min_len" value="35" />
88 <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.fastqsanger" ftype="fastqsanger" />
89 </test>
90 <test>
91 <param name="input_file" value="MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" />
92 <param name="primer_fasta" value="dop_primers.fasta" />
93 <param name="primer_type" value="Reverse" />
94 <param name="mm" value="2" />
95 <param name="keep_negatives" value="true" />
96 <param name="min_len" value="35" />
97 <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" />
98 </test>
99 </tests>
100 <requirements>
101 <requirement type="python-module">Bio</requirement>
102 </requirements>
103 <help>
104
105 **What it does**
106
107 Looks for the given primer sequences (within the existing clipped sequence) and
108 further clips the reads to remove the primers and any preceding/trailing sequence.
109
110 Reads containing a forward primer are reduced to just the sequence after (and
111 excluding) the forward primer.
112
113 Reads containing a reverse primer are reduced to just the sequence before (and
114 excluding) the reverse primer.
115
116 Degenerate primers can be specified using the standard IUPAC ambiguity codes,
117 thus a primer with an N would match A, C, T or G (or any of the IUPAC ambiguity
118 codes) and so on.
119
120 Note that for SFF files only the clip/trim positions are edited - you will still
121 be able to extract the original full read (with any adapter sequence and poor
122 quality sequence) if you need to.
123
124 .. class:: warningmark
125
126 **Note**. This tool was initially written for Roche 454 data, and should also
127 work fine on Sanger or Ion Torrent as well. However, it is probably too slow
128 for use on large Illumina datasets.
129
130
131 **Citation**
132
133 This tool uses Biopython. If you use this tool in scientific work leading to a
134 publication, please cite:
135
136 Cock et al 2009. Biopython: freely available Python tools for computational
137 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
138 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
139
140 This tool is available to install into other Galaxy Instances via the Galaxy
141 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_primer_clip
142 </help>
143 </tool>