comparison tools/primers/seq_primer_clip.xml @ 0:945053d79e60 draft

Uploaded v0.0.8, first public release
author peterjc
date Mon, 29 Apr 2013 06:11:00 -0400
parents
children 8c02a91a8680
comparison
equal deleted inserted replaced
-1:000000000000 0:945053d79e60
1 <tool id="seq_primer_clip" name="Primer clip sequences" version="0.0.8">
2 <description>Trim off 5' or 3' primers</description>
3 <version_command interpreter="python">seq_primer_clip.py --version</version_command>
4 <command interpreter="python">
5 seq_primer_clip.py $input_file $input_file.ext $primer_fasta $primer_type $mm $min_len $keep_negatives $output_file
6 </command>
7 <stdio>
8 <!-- Anything other than zero is an error -->
9 <exit_code range="1:" />
10 <exit_code range=":-1" />
11 </stdio>
12 <inputs>
13 <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to clip" description="FASTA, FASTQ, or SFF format."/>
14 <param name="primer_fasta" type="data" format="fasta" label="FASTA file containing primer(s)"/>
15 <param name="primer_type" type="select" label="Type of primers">
16 <option value="Forward">Forward (5') primers</option>
17 <option value="Reverse">Reverse (3') primers (given with respect to the forward strand)</option>
18 <option value="Reverse-complement">Reverse (3') primers (given with respect to the reverse strand)</option>
19 </param>
20 <param name="mm" type="integer" value="0" label="How many mismatches to allow? (0, 1 or 2)">
21 <validator type="in_range" min="0" max="2" />
22 </param>
23 <param name="keep_negatives" type="boolean" value="false" label="Keep reads with no matched primer"/>
24 <param name="min_len" type="integer" label="Minimum length for (clipped) sequences " value="1"/>
25 </inputs>
26 <outputs>
27 <data name="output_file" format="data" label="$primer_type primer clipped">
28 <!-- TODO - Replace this with format="input:input_fastq" if/when that works -->
29 <change_format>
30 <when input_dataset="input_file" attribute="extension" value="sff" format="sff" />
31 <when input_dataset="input_file" attribute="extension" value="fasta" format="fasta" />
32 <when input_dataset="input_file" attribute="extension" value="fastq" format="fastq" />
33 <when input_dataset="input_file" attribute="extension" value="fastqsanger" format="fastqsanger" />
34 <when input_dataset="input_file" attribute="extension" value="fastqsolexa" format="fastqsolexa" />
35 <when input_dataset="input_file" attribute="extension" value="fastqillumina" format="fastqillumina" />
36 <when input_dataset="input_file" attribute="extension" value="fastqcssanger" format="fastqcssanger" />
37 </change_format>
38 </data>
39 </outputs>
40 <tests>
41 <test>
42 <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30.fasta" ftype="fasta" />
43 <param name="primer_fasta" value="primers/dop_primers.fasta" />
44 <param name="primer_type" value="Forward" />
45 <param name="mm" value="2" />
46 <param name="keep_negatives" value="false" />
47 <param name="min_len" value="35" />
48 <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" />
49 </test>
50 <test>
51 <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30.fastqsanger" ftype="fastqsanger" />
52 <param name="primer_fasta" value="primers/dop_primers.fasta" />
53 <param name="primer_type" value="Forward" />
54 <param name="mm" value="2" />
55 <param name="keep_negatives" value="false" />
56 <param name="min_len" value="35" />
57 <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" />
58 </test>
59 <test>
60 <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30.sff" ftype="sff" />
61 <param name="primer_fasta" value="primers/dop_primers.fasta" />
62 <param name="primer_type" value="Forward" />
63 <param name="mm" value="2" />
64 <param name="keep_negatives" value="false" />
65 <param name="min_len" value="35" />
66 <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" />
67 </test>
68 <test>
69 <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" />
70 <param name="primer_fasta" value="primers/dop_primers.fasta" />
71 <param name="primer_type" value="Reverse" />
72 <param name="mm" value="2" />
73 <param name="keep_negatives" value="true" />
74 <param name="min_len" value="35" />
75 <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_frclip.fasta" ftype="fasta" />
76 </test>
77 <test>
78 <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" />
79 <param name="primer_fasta" value="primers/dop_primers.fasta" />
80 <param name="primer_type" value="Reverse" />
81 <param name="mm" value="2" />
82 <param name="keep_negatives" value="true" />
83 <param name="min_len" value="35" />
84 <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_frclip.fastqsanger" ftype="fastqsanger" />
85 </test>
86 <test>
87 <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" />
88 <param name="primer_fasta" value="primers/dop_primers.fasta" />
89 <param name="primer_type" value="Reverse" />
90 <param name="mm" value="2" />
91 <param name="keep_negatives" value="true" />
92 <param name="min_len" value="35" />
93 <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" />
94 </test>
95 </tests>
96 <requirements>
97 <requirement type="python-module">Bio</requirement>
98 </requirements>
99 <help>
100
101 **What it does**
102
103 Looks for the given primer sequences (within the existing clipped sequence) and
104 further clips the reads to remove the primers and any preceding/trailing sequence.
105
106 Reads containing a forward primer are reduced to just the sequence after (and
107 excluding) the forward primer.
108
109 Reads containing a reverse primer are reduced to just the sequence before (and
110 excluding) the reverse primer.
111
112 Degenerate primers can be specified using the standard IUPAC ambiguity codes,
113 thus a primer with an N would match A, C, T or G (or any of the IUPAC ambiguity
114 codes) and so on.
115
116 Note that for SFF files only the clip/trim positions are edited - you will still
117 be able to extract the original full read (with any adapter sequence and poor
118 quality sequence) if you need to.
119
120 .. class:: warningmark
121
122 **Note**. This tool was initially written for Roche 454 data, and should also
123 work fine on Sanger or Ion Torrent as well. However, it is probably too slow
124 for use on large Illumina datasets.
125
126
127 **Citation**
128
129 This tool uses Biopython. If you use this tool in scientific work leading to a
130 publication, please cite:
131
132 Cock et al 2009. Biopython: freely available Python tools for computational
133 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
134 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
135
136 </help>
137 </tool>