changeset 0:c961d16801e4 draft default tip

Uploaded v0.0.2
author peterjc
date Tue, 04 Nov 2014 07:15:50 -0500
parents
children
files test-data/sam_spec_padded.bam test-data/sam_spec_padded.bam2fq.fastq test-data/sam_spec_padded.bam2fq_no_suf.fastq test-data/sam_spec_padded.bam2fq_pairs.fastq test-data/sam_spec_padded.bam2fq_singles.fastq test-data/sam_spec_padded.depad.bam test-data/sam_spec_padded.sam tools/samtools_bam2fq/README.rst tools/samtools_bam2fq/samtools_bam2fq.xml tools/samtools_bam2fq/tool_dependencies.xml
diffstat 10 files changed, 293 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file test-data/sam_spec_padded.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam_spec_padded.bam2fq.fastq	Tue Nov 04 07:15:50 2014 -0500
@@ -0,0 +1,12 @@
+>ref
+AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT
+>r001/1
+TTAGATAAAGGATACTG
+>r002
+AAAAGATAAGGATA
+>r003
+AGCTAA
+>r004
+ATAGCTTCAGC
+>r001/2
+ATGCCGCTG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam_spec_padded.bam2fq_no_suf.fastq	Tue Nov 04 07:15:50 2014 -0500
@@ -0,0 +1,12 @@
+>ref
+AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT
+>r001
+TTAGATAAAGGATACTG
+>r002
+AAAAGATAAGGATA
+>r003
+AGCTAA
+>r004
+ATAGCTTCAGC
+>r001
+ATGCCGCTG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam_spec_padded.bam2fq_pairs.fastq	Tue Nov 04 07:15:50 2014 -0500
@@ -0,0 +1,4 @@
+>r001/1
+TTAGATAAAGGATACTG
+>r001/2
+ATGCCGCTG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam_spec_padded.bam2fq_singles.fastq	Tue Nov 04 07:15:50 2014 -0500
@@ -0,0 +1,8 @@
+>r002
+AAAAGATAAGGATA
+>r003
+AGCTAA
+>r004
+ATAGCTTCAGC
+>ref
+AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT
Binary file test-data/sam_spec_padded.depad.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam_spec_padded.sam	Tue Nov 04 07:15:50 2014 -0500
@@ -0,0 +1,10 @@
+@HD	VN:1.5	SO:coordinate
+@SQ	SN:ref	LN:47
+ref	516	ref	1	0	14M2D31M	*	0	0	AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT	*
+r001	99	ref	7	30	14M1D3M	=	39	41	TTAGATAAAGGATACTG	*
+*	768	ref	8	30	1M	*	0	0	*	*	CT:Z:.;Warning;Note=Ref wrong?
+r002	0	ref	9	30	3S6M1D5M	*	0	0	AAAAGATAAGGATA	*	PT:Z:1;4;+;homopolymer
+r003	0	ref	9	30	5H6M	*	0	0	AGCTAA	*	NM:i:1
+r004	0	ref	18	30	6M14D5M	*	0	0	ATAGCTTCAGC	*
+r003	2064	ref	31	30	6H5M	*	0	0	TAGGC	*	NM:i:0
+r001	147	ref	39	30	9M	=	7	-41	CAGCGGCAT	*	NM:i:1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/samtools_bam2fq/README.rst	Tue Nov 04 07:15:50 2014 -0500
@@ -0,0 +1,104 @@
+Galaxy wrapper for samtools bam2fq
+====================================
+
+This wrapper is copyright 2014 by Peter Cock, The James Hutton Institute
+(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
+See the licence text below.
+
+This is a wrapper for part of the command line samtools suite.
+
+This wrapper is available from the Galaxy Tool Shed at:
+http://toolshed.g2.bx.psu.edu/view/peterjc/samtools_bam2fq
+
+
+Automated Installation
+======================
+
+This should be straightforward, Galaxy should automatically download and install
+samtools if required.
+
+
+Manual Installation
+===================
+
+This expects samtools to be on the $PATH, and was tested using v0.1.19.
+
+To install the wrapper copy or move the following files under the Galaxy tools
+folder, e.g. in a ``tools/samtools_bam2fq`` folder:
+
+* ``samtools_bam2fq.xml`` (the Galaxy tool definition)
+* ``README.rst`` (this file)
+
+You will also need to modify the ``tools_conf.xml`` file to tell Galaxy to offer
+the tool. Just add the line, perhaps under the NGS tools section::
+
+  <tool file="samtools_bam2fq/samtools_bam2fq.xml" />
+
+If you wish to run the unit tests, also move/copy the ``test-data/`` files
+under Galaxy's ``test-data/`` folder. Then::
+
+    $ ./run_tests.sh -id samtools_bam2fq
+
+That's it.
+
+
+History
+=======
+
+======= ======================================================================
+Version Changes
+------- ----------------------------------------------------------------------
+v0.0.1  - Initial public release, tested with samtools v1.1.
+v0.0.2  - Defaults to pair-aware mode which requires pre-sorting by read name.
+======= ======================================================================
+
+
+Developers
+==========
+
+Development is on this GitHub repository:
+https://github.com/peterjc/pico_galaxy/tree/master/tools/samtools_bam2fq
+
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
+the following command from the Galaxy root folder::
+
+    $ tar -czf samtools_bam2fq.tar.gz tools/samtools_bam2fq/README.rst tools/samtools_bam2fq/samtools_bam2fq.xml tools/samtools_bam2fq/tool_dependencies.xml test-data/sam_spec_padded.bam test-data/sam_spec_padded.sam test-data/sam_spec_padded.depad.bam test-data/sam_spec_padded.bam2fq.fastq test-data/sam_spec_padded.bam2fq_no_suf.fastq test-data/sam_spec_padded.bam2fq_singles.fastq test-data/sam_spec_padded.bam2fq_pairs.fastq
+
+Check this worked::
+
+    $ tar -tzf samtools_bam2fq.tar.gz
+    tools/samtools_bam2fq/README.rst
+    tools/samtools_bam2fq/samtools_bam2fq.xml
+    tools/samtools_bam2fq/tool_dependencies.xml
+    test-data/sam_spec_padded.bam
+    test-data/sam_spec_padded.sam
+    test-data/sam_spec_padded.depad.bam
+    test-data/sam_spec_padded.bam2fq.fastq
+    test-data/sam_spec_padded.bam2fq_no_suf.fastq
+    test-data/sam_spec_padded.bam2fq_singles.fastq
+    test-data/sam_spec_padded.bam2fq_pairs.fastq
+
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+NOTE: This is the licence for the Galaxy Wrapper only.
+samtools is available and licenced separately.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/samtools_bam2fq/samtools_bam2fq.xml	Tue Nov 04 07:15:50 2014 -0500
@@ -0,0 +1,137 @@
+<tool id="samtools_bam2fq" name="Convert BAM to FASTQ" version="0.0.2">
+    <description>samtools bam2fq</description>
+    <requirements>
+        <requirement type="binary">samtools</requirement>
+        <requirement type="package" version="1.1">samtools</requirement>
+    </requirements>
+    <version_command>samtools 2&gt;&amp;1 | grep -i "Version:"</version_command>
+    <command>
+        #if $action_mode.mode == "pairs":
+            ## Sort by name for pair-aware output (should give nice interlaced FASTQ)
+            ## Galaxy has a tendancy to automatically apply co-ordindate sorting,
+            ## so just do this every time. If it was name sorted, pay an IO overhead.
+            ## Note requiring -T is samtools issue 295
+            samtools sort -n -O bam -T TEMP_SORT "$input_bam" | samtools bam2fq -s "$singletons_fastq" - &gt; "$pairs_fastq"
+        #else
+            ## Naive conversion using order in the input file
+            samtools bam2fq $suffices $orig_qual "$input_bam" &gt; "$out_fastq"
+        #end if
+    </command>
+    <inputs>
+        <!-- Unlike samtools 0.1.x, samtools 1.1 will autodetect SAM vs BAM -->
+        <param name="input_bam" type="data" format="bam,sam" label="Input SAM/BAM file" />
+        <param name="suffices" type="boolean" label="Add /1 and /2 suffices to paired reads?"
+	       truevalue="" falsevalue="-n" checked="true" />
+        <param name="orig_qual" type="boolean" label="Use original qualities (OQ tags) if present?"
+               truevalue="-O" falsevalue="" checked="false" />
+        <!-- Using a condition here to allow different output files; default to paired mode -->
+        <conditional name="action_mode">
+            <param name="mode" type="select" label="Mode of action">
+                <option value="pairs" selected="true">Sort by name, then divide into paired and singletons (two FASTQ files)</option>
+                <option value="naive">No pre-sorting, all reads in a single FASTQ file</option>
+            </param>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="pairs_fastq" format="fastqsanger" label="$input_bam.name (bam2fq pairs)">
+	      <filter>(action_mode['mode'] == 'pairs')</filter>
+        </data>
+        <data name="singletons_fastq" format="fastqsanger" label="$input_bam.name (bam2fq singletons)">
+              <filter>(action_mode['mode'] == 'pairs')</filter>
+        </data>
+        <data name="out_fastq" format="fastqsanger" label="$input_bam.name (bam2fq)">
+            <filter>(action_mode['mode'] == 'naive')</filter>
+        </data>
+    </outputs>
+    <stdio>
+        <!-- Assume anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <tests>
+        <test>
+            <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
+            <param name="suffices" value="true" />
+            <param name="orig_qual" value="false" />
+            <param name="mode" value="naive" />
+            <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
+            <param name="suffices" value="true" />
+            <param name="orig_qual" value="true" />
+            <param name="mode" value="naive" />
+            <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_bam" value="sam_spec_padded.sam" ftype="sam" />
+            <param name="mode" value="naive" />
+            <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_bam" value="sam_spec_padded.depad.bam" ftype="bam" />
+            <param name="mode" value="naive" />
+            <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
+            <param name="suffices" value="false"/>
+            <param name="mode" value="naive" />
+            <output name="out_fastq" file="sam_spec_padded.bam2fq_no_suf.fastq" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
+            <param name="suffices" value="true" />
+            <param name="orig_qual" value="false" />
+            <param name="mode" value="pairs" />
+            <output name="pairs_fastq" file="sam_spec_padded.bam2fq_pairs.fastq" ftype="fastqsanger" />
+            <output name="singletons_fastq" file="sam_spec_padded.bam2fq_singles.fastq" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_bam" value="sam_spec_padded.sam" ftype="sam" />
+            <param name="suffices" value="true" />
+            <param name="orig_qual" value="false" />
+            <param name="mode" value="pairs" />
+            <output name="pairs_fastq" file="sam_spec_padded.bam2fq_pairs.fastq" ftype="fastqsanger" />
+            <output name="singletons_fastq" file="sam_spec_padded.bam2fq_singles.fastq" ftype="fastqsanger" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+This tool runs the ``samtools bam2fq`` command in the SAMtools toolkit.
+
+By default this will pre-sort your SAM/BAM file by read name and split your
+reads into an interlaced FASTQ file for paired reads, and a separate FASTQ
+file for singleton reads. A naive conversion is also offered which gives a
+single FASTQ file with the reads ordered as in the input SAM/BAM file.
+
+It is quite common to wish to remap high-throughput sequencing data. If you
+only have the mapped reads in SAM/BAM format, this tool can "unmap" them to
+recover FASTQ format reads to input into an alternative mapping tool.
+
+BAM files can hold both aligned reads and unaligned reads, so another example
+usage would be to filter your BAM file to get only the unaligned reads, and
+turn those back in FASTQ using this tool ready for *de novo* assembly, or to
+try mapping against another reference sequence.
+
+
+**Citation**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite:
+
+Heng Li et al (2009). The Sequence Alignment/Map format and SAMtools.
+Bioinformatics 25(16), 2078-9.
+http://dx.doi.org/10.1093/bioinformatics/btp352
+
+Peter J.A. Cock (2014), Galaxy wrapper for the samtools bam2fq command
+http://toolshed.g2.bx.psu.edu/view/peterjc/samtools_bam2fq
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/samtools_bam2fq
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btp352</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/samtools_bam2fq/tool_dependencies.xml	Tue Nov 04 07:15:50 2014 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="samtools" version="1.1">
+        <repository changeset_revision="43f2fbec5d52" name="package_samtools_1_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>