Mercurial > repos > peterjc > fastq_paired_unpaired

diff tools/fastq_paired_unpaired/fastq_paired_unpaired.xml @ 4:09f9f0e29e47 draft
v0.0.6 use format_source; v0.0.5 error handling & citation
author: peterjc
date: Wed, 05 Aug 2015 11:06:38 -0400
children: b38bbcbd458d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/fastq_paired_unpaired/fastq_paired_unpaired.xml	Wed Aug 05 11:06:38 2015 -0400
@@ -0,0 +1,120 @@
+<tool id="fastq_paired_unpaired" name="Divide FASTQ file into paired and unpaired reads" version="0.1.1">
+    <description>using the read name suffices</description>
+    <requirements>
+        <requirement type="package" version="1.64">biopython</requirement>
+        <requirement type="python-module">Bio</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <version_command interpreter="python">fastq_paired_unpaired.py --version</version_command>
+    <command interpreter="python">
+fastq_paired_unpaired.py $input_fastq.extension $input_fastq
+#if $output_choice_cond.output_choice=="separate"
+ $output_forward $output_reverse
+#elif $output_choice_cond.output_choice=="interleaved"
+ $output_paired
+#end if
+$output_singles
+    </command>
+    <inputs>
+        <param name="input_fastq" type="data" format="fastq" label="FASTQ file to divide into paired and unpaired reads"/>
+        <conditional name="output_choice_cond">
+            <param name="output_choice" type="select" label="How to output paired reads?">
+                <option value="separate">Separate (two FASTQ files, for the forward and reverse reads, in matching order).</option>
+                <option value="interleaved">Interleaved (one FASTQ file, alternating forward read then partner reverse read).</option>
+            </param>
+            <!-- Seems need these dummy entries here, compare this to indels/indel_sam2interval.xml -->
+            <when value="separate" />
+            <when value="interleaved" />
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output_singles" format_source="input_fastq" label="Orphan or single reads"/>
+        <data name="output_forward" format_source="input_fastq" label="Forward paired reads">
+            <filter>output_choice_cond["output_choice"] == "separate"</filter>
+        </data>
+        <data name="output_reverse" format_source="input_fastq" label="Reverse paired reads">
+            <filter>output_choice_cond["output_choice"] == "separate"</filter>
+        </data>
+        <data name="output_paired" format_source="input_fastq" label="Interleaved paired reads">
+            <filter>output_choice_cond["output_choice"] == "interleaved"</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_fastq" value="sanger-pairs-mixed.fastq" ftype="fastq"/>
+            <param name="output_choice" value="separate"/>
+            <output name="output_singles" file="sanger-pairs-singles.fastq" ftype="fastq"/>
+            <output name="output_forward" file="sanger-pairs-forward.fastq" ftype="fastq"/>
+            <output name="output_reverse" file="sanger-pairs-reverse.fastq" ftype="fastq"/>
+        </test>
+        <test>
+            <param name="input_fastq" value="sanger-pairs-mixed.fastq" ftype="fastq"/>
+            <param name="output_choice" value="interleaved"/>
+            <output name="output_singles" file="sanger-pairs-singles.fastq" ftype="fastq"/>
+            <output name="output_paired" file="sanger-pairs-interleaved.fastq" ftype="fastq"/>
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+Using the common read name suffix conventions, it divides a FASTQ file into
+paired reads, and orphan or single reads.
+
+The input file should be a valid FASTQ file which has been sorted so that
+any partner forward+reverse reads are consecutive. The output files all
+preserve this sort order. Pairing are recognised based on standard name
+suffices. See below or run the tool with no arguments for more details.
+
+Any reads where the forward/reverse naming suffix used is not recognised
+are treated as orphan reads. The tool supports the /1 and /2 convention
+originally used by Illumina, .f and .r convention, the Sanger convention
+(see http://staden.sourceforge.net/manual/pregap4_unix_50.html for details),
+and the current Illumina convention where the reads get the same identifier
+with the fragment number in the description, for example:
+
+ * @HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 1:N:0:TGNCCA
+ * @HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 2:N:0:TGNCCA 
+
+Note that this does support multiple forward and reverse reads per template
+(which is quite common with Sanger sequencing), e.g. this which is sorted
+alphabetically:
+
+ * WTSI_1055_4p17.p1kapIBF
+ * WTSI_1055_4p17.p1kpIBF
+ * WTSI_1055_4p17.q1kapIBR
+ * WTSI_1055_4p17.q1kpIBR
+
+or this where the reads already come in pairs:
+
+ * WTSI_1055_4p17.p1kapIBF
+ * WTSI_1055_4p17.q1kapIBR
+ * WTSI_1055_4p17.p1kpIBF
+ * WTSI_1055_4p17.q1kpIBR
+
+both become:
+
+ * WTSI_1055_4p17.p1kapIBF paired with WTSI_1055_4p17.q1kapIBR
+ * WTSI_1055_4p17.p1kpIBF paired with WTSI_1055_4p17.q1kpIBR
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following paper:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+This tool is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/fastq_paired_unpaired
+    </help>
+    <citations>
+        <citation type="doi">10.7717/peerj.167</citation>
+    </citations>
+</tool>
author	peterjc
date	Wed, 05 Aug 2015 11:06:38 -0400
parents
children	b38bbcbd458d