diff tools/metag_tools/short_reads_trim_seq.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/metag_tools/short_reads_trim_seq.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,93 @@
+<tool id="trim_reads" name="Select high quality segments" version="1.0.0">
+<description></description>
+
+<command interpreter="python">
+ short_reads_trim_seq.py $trim $length $output1 $input1 $input2 $sequencing_method_choice.input3
+</command>
+<inputs>
+<page>
+    <param name="input1" type="data" format="fasta" label="Reads" />
+    <param name="input2" type="data" format="qualsolexa,qual454" label="Quality scores" />
+	<param name="trim" type="integer" size="5" value="20" label="Minimal quality score" help="bases scoring below this value will trigger splitting"/>
+    <param name="length" type="integer" size="5" value="100" label="Minimal length of contiguous segment" help="report all high quality segments above this length. Setting this option to '0' will cause the program to return a single longest run of high quality bases per read" />
+    <conditional name="sequencing_method_choice">
+        <param name="sequencer" type="select" label="Select technology">
+            <option value="454">Roche (454) or ABI SOLiD</option>
+            <option value="Solexa">Illumina (Solexa)</option>
+        </param>
+        <when value="454">
+            <param name="input3" type="select" label="Low quality bases in homopolymers" help="if set to 'DO NOT trigger splitting' the program will not count low quality bases that are within or adjacent to homonucleotide runs.  This will significantly reduce fragmentation of 454 data">
+                <option value="yes">DO NOT trigger splitting </option>
+                <option value="no">trigger splitting</option>
+            </param>
+        </when>
+        <when value="Solexa">
+            <param name="input3" type="integer" size="5" value="0" label="Restrict length of each read to" help="('0' = do not trim) The quality of Solexa reads drops towards the end. This option allows selecting the specified number of nucleotides from the beginning and then running the tool." />
+        </when> 
+    </conditional>
+</page>
+</inputs>
+
+<outputs>
+    <data name="output1" format="fasta" />
+</outputs>
+
+<tests>
+	<test>
+		<param name="sequencer" value="454" />
+		<param name="input1" value="454.fasta" ftype="fasta" />
+		<param name="input2" value="454.qual" ftype="qual454" />
+		<param name="input3" value="no" />
+		<param name="trim" value="20" />
+		<param name="length" value="0" />
+		<output name="output1" file="short_reads_trim_seq_out1.fasta" />
+	</test>
+	<test>
+		<param name="sequencer" value="Solexa" />
+		<param name="input1" value="solexa.fasta" ftype="fasta" />
+		<param name="input2" value="solexa.qual" ftype="qualsolexa" />
+		<param name="input3" value="0" />
+		<param name="trim" value="20" />
+		<param name="length" value="0" />
+		<output name="output1" file="short_reads_trim_seq_out2.fasta" />
+	</test>
+</tests>
+
+<help>
+  
+.. class:: warningmark
+
+To use this tool, your dataset needs to be in the *Quality Score* format. Click the pencil icon next to your dataset to set the datatype to *Quality Score* (see below for examples).
+ 
+-----
+
+**What it does**
+
+This tool finds high quality segments within sequencing reads generated by by Roche (454), Illumina (Solexa), or ABI SOLiD machines.
+
+-----
+
+**Example**
+
+
+Suppose this is your sequencing read::
+  
+   5'---------*-------------*------**----3'
+   
+where **dashes** (-) are HIGH quality bases (above 20) and **asterisks** (*) are LOW quality bases (below 20). If the **Minimal length of contiguous segment** is set to **5** (of course, only for the purposes of this example), the tool will return::
+
+   5'---------
+               -------------
+                             -------
+
+you can see that the tool simply splits the read on low quality bases and then returns all segments longer than 5.  **Note**, that the output of this tool will likely contain higher number of shorter sequences compared to the original input.   If we set the **Minimal length of contiguous segment** to **0**, the tool will only return the single longest segment::
+
+               -------------
+               
+
+               
+
+
+
+</help>
+</tool>