Mercurial > repos > peterjc > seq_primer_clip
diff tools/primers/seq_primer_clip.xml.orig @ 1:8c02a91a8680 draft
Uploaded v0.0.9, modifies tests to cope with current Tool Shed limitation.
author | peterjc |
---|---|
date | Tue, 30 Apr 2013 11:04:43 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/primers/seq_primer_clip.xml.orig Tue Apr 30 11:04:43 2013 -0400 @@ -0,0 +1,137 @@ +<tool id="seq_primer_clip" name="Primer clip sequences" version="0.0.8"> + <description>Trim off 5' or 3' primers</description> + <version_command interpreter="python">seq_primer_clip.py --version</version_command> + <command interpreter="python"> +seq_primer_clip.py $input_file $input_file.ext $primer_fasta $primer_type $mm $min_len $keep_negatives $output_file + </command> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + <inputs> + <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to clip" description="FASTA, FASTQ, or SFF format."/> + <param name="primer_fasta" type="data" format="fasta" label="FASTA file containing primer(s)"/> + <param name="primer_type" type="select" label="Type of primers"> + <option value="Forward">Forward (5') primers</option> + <option value="Reverse">Reverse (3') primers (given with respect to the forward strand)</option> + <option value="Reverse-complement">Reverse (3') primers (given with respect to the reverse strand)</option> + </param> + <param name="mm" type="integer" value="0" label="How many mismatches to allow? (0, 1 or 2)"> + <validator type="in_range" min="0" max="2" /> + </param> + <param name="keep_negatives" type="boolean" value="false" label="Keep reads with no matched primer"/> + <param name="min_len" type="integer" label="Minimum length for (clipped) sequences " value="1"/> + </inputs> + <outputs> + <data name="output_file" format="data" label="$primer_type primer clipped"> + <!-- TODO - Replace this with format="input:input_fastq" if/when that works --> + <change_format> + <when input_dataset="input_file" attribute="extension" value="sff" format="sff" /> + <when input_dataset="input_file" attribute="extension" value="fasta" format="fasta" /> + <when input_dataset="input_file" attribute="extension" value="fastq" format="fastq" /> + <when input_dataset="input_file" attribute="extension" value="fastqsanger" format="fastqsanger" /> + <when input_dataset="input_file" attribute="extension" value="fastqsolexa" format="fastqsolexa" /> + <when input_dataset="input_file" attribute="extension" value="fastqillumina" format="fastqillumina" /> + <when input_dataset="input_file" attribute="extension" value="fastqcssanger" format="fastqcssanger" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30.fasta" ftype="fasta" /> + <param name="primer_fasta" value="primers/dop_primers.fasta" /> + <param name="primer_type" value="Forward" /> + <param name="mm" value="2" /> + <param name="keep_negatives" value="false" /> + <param name="min_len" value="35" /> + <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" /> + </test> + <test> + <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30.fastqsanger" ftype="fastqsanger" /> + <param name="primer_fasta" value="primers/dop_primers.fasta" /> + <param name="primer_type" value="Forward" /> + <param name="mm" value="2" /> + <param name="keep_negatives" value="false" /> + <param name="min_len" value="35" /> + <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30.sff" ftype="sff" /> + <param name="primer_fasta" value="primers/dop_primers.fasta" /> + <param name="primer_type" value="Forward" /> + <param name="mm" value="2" /> + <param name="keep_negatives" value="false" /> + <param name="min_len" value="35" /> + <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" /> + </test> + <test> + <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30_fclip.fasta" ftype="fasta" /> + <param name="primer_fasta" value="primers/dop_primers.fasta" /> + <param name="primer_type" value="Reverse" /> + <param name="mm" value="2" /> + <param name="keep_negatives" value="true" /> + <param name="min_len" value="35" /> + <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_frclip.fasta" ftype="fasta" /> + </test> + <test> + <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30_fclip.fastqsanger" ftype="fastqsanger" /> + <param name="primer_fasta" value="primers/dop_primers.fasta" /> + <param name="primer_type" value="Reverse" /> + <param name="mm" value="2" /> + <param name="keep_negatives" value="true" /> + <param name="min_len" value="35" /> + <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_frclip.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="input_file" value="primers/MID4_GLZRM4E04_rnd30_fclip.sff" ftype="sff" /> + <param name="primer_fasta" value="primers/dop_primers.fasta" /> + <param name="primer_type" value="Reverse" /> + <param name="mm" value="2" /> + <param name="keep_negatives" value="true" /> + <param name="min_len" value="35" /> + <output name="output_file" file="primers/MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" /> + </test> + </tests> + <requirements> + <requirement type="python-module">Bio</requirement> + </requirements> + <help> + +**What it does** + +Looks for the given primer sequences (within the existing clipped sequence) and +further clips the reads to remove the primers and any preceding/trailing sequence. + +Reads containing a forward primer are reduced to just the sequence after (and +excluding) the forward primer. + +Reads containing a reverse primer are reduced to just the sequence before (and +excluding) the reverse primer. + +Degenerate primers can be specified using the standard IUPAC ambiguity codes, +thus a primer with an N would match A, C, T or G (or any of the IUPAC ambiguity +codes) and so on. + +Note that for SFF files only the clip/trim positions are edited - you will still +be able to extract the original full read (with any adapter sequence and poor +quality sequence) if you need to. + +.. class:: warningmark + +**Note**. This tool was initially written for Roche 454 data, and should also +work fine on Sanger or Ion Torrent as well. However, it is probably too slow +for use on large Illumina datasets. + + +**Citation** + +This tool uses Biopython. If you use this tool in scientific work leading to a +publication, please cite: + +Cock et al 2009. Biopython: freely available Python tools for computational +molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3. +http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. + + </help> +</tool>