Repository 'ncbi_sra_toolkit'
hg clone https://toolshed.g2.bx.psu.edu/repos/matt-shirley/ncbi_sra_toolkit

Changeset 0:c386fe82db82 (2013-09-25)
Next changeset 1:da2dbe22f80b (2013-09-25)
Commit message:
Initial commit, moving from test tool shed.
added:
sra_tools-04cc8176e86f/datatypes_conf.xml
sra_tools-04cc8176e86f/fastq_dump.xml
sra_tools-04cc8176e86f/sam_dump.xml
sra_tools-04cc8176e86f/sra.py
sra_tools-04cc8176e86f/sra_pileup.xml
sra_tools-04cc8176e86f/tool_dependencies.xml
b
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/datatypes_conf.xml Wed Sep 25 21:00:05 2013 -0400
b
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<datatypes>
+  <datatype_files>
+    <datatype_file name="sra.py"/>
+  </datatype_files>
+  <registration>
+    <datatype extension="sra" type="galaxy.datatypes.sra:sra" display_in_upload="true"/>
+  </registration>
+</datatypes>
b
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/fastq_dump.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/fastq_dump.xml Wed Sep 25 21:00:05 2013 -0400
[
@@ -0,0 +1,123 @@
+<tool id="fastq_dump" name="Extract reads" version="1.1.1">
+  <description> from NCBI SRA.</description>
+  <command>
+    fastq-dump --log-level fatal 
+    #if $input.input_select == "file":
+      --accession '${input.file.name}' 
+    #else:
+      --accession $input.accession 
+    #end if
+    --defline-seq '@\$sn[_\$rn]/\$ri' 
+    --stdout 
+    #if $split == "yes":
+      --split-spot
+    #end if
+    #if str( $alignments ) == "aligned":
+      --aligned
+    #end if
+    #if str( $alignments ) == "unaligned":
+      --unaligned
+    #end if
+    #if str( $minID ) != "":
+      --minSpotId $minID 
+    #end if
+    #if str( $maxID ) != "":
+      --maxSpotId $maxID 
+    #end if
+    #if str( $minlen ) != "":
+      --minReadLen $minlen 
+    #end if
+    #if str( $readfilter ) != "":
+      --read-filter $readfilter 
+    #end if
+    #if str( $region ) != "":
+      --aligned-region $region
+    #end if
+    #if str( $spotgroups ) != "":
+      --spot-groups $spotgroups
+    #end if
+    #if str( $matepairDist ) != "":
+      --matepair-distance $matepairDist
+    #end if
+    #if $clip == "yes":
+      --clip
+    #end if
+    #if str( $outputformat ) == "fasta":
+      --fasta
+    #end if
+    #if $input.input_select=="file":
+      $input.file
+    #else:
+        $input.accession 
+    #end if
+    > $output
+  </command>
+  <version_string>fastq-dump --version</version_string>
+  <inputs>
+    <conditional name="input">
+      <param name="input_select" type="select" label="select input type">
+        <option value="accession_number">SRR accession</option>
+        <option value="file">SRA archive in current history</option>
+      </param>
+      <when value="file">
+    <param format="sra" name="file" type="data" label="sra archive"/>
+      </when>
+      <when value="accession_number">
+    <param format="text" name="accession" type="text" label="accession"/>
+      </when>
+    </conditional>
+    <param format="text" name="minID" type="text" label="minimum spot ID"/>
+    <param format="text" name="maxID" type="text" label="maximum spot ID"/>
+    <param format="text" name="minlen" type="text" label="minimum read length"/>
+    <param format="text" name="split" type="select" value="yes">
+      <label>split spot by read pairs</label>
+      <option value="yes">Yes</option>
+      <option value="no">No</option>
+    </param>
+    <param format="text" name="alignments" type="select" value="both">
+      <label>aligned or unaligned reads</label>
+      <option value="both">both</option>
+      <option value="aligned">aligned only</option>
+        <option value="unaligned">unaligned only</option>
+    </param>
+    <param format="text" name="region" type="text" label="aligned region"/>
+    <param format="text" name="matepairDist" type="text" label="mate-pair distance (from-to|unknown)"/>
+    <param format="text" name="readfilter" type="select" value="">
+      <label>filter by value</label>
+      <option value="">None</option>
+      <option value="pass">pass</option>
+      <option value="reject">reject</option>
+      <option value="criteria">criteria</option>
+      <option value="redacted">redacted</option>
+    </param>
+    <param name="outputformat" type="select" label="select output format">
+      <option value="fastqsanger">fastq</option>
+      <option value="fasta">fasta</option>
+    </param>
+    <param format="text" name="spotgroups" type="text" label="filter by spot-groups"/>
+    <param format="text" name="clip" type="select" value="no">
+      <label>apply left and right clips</label>
+      <option value="no">No</option>
+      <option value="yes">Yes</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="fastq" name="output">
+        <change_format>
+            <when input="outputformat" value="fasta" format="fasta" />
+        </change_format>
+    </data>
+  </outputs>
+  <stdio>
+    <exit_code range="127" level="fatal" description="Could not locate fastq-dump binary"/>
+  </stdio>
+  <requirements>
+    <requirement type="package" version="2.3.3-3">sra_toolkit</requirement>
+  </requirements>
+  <help>
+    This tool extracts reads from SRA archives using fastq-dump. 
+    Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. 
+    The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+    Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
+  </help>
+</tool>
b
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/sam_dump.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/sam_dump.xml Wed Sep 25 21:00:05 2013 -0400
b
@@ -0,0 +1,99 @@
+<tool id="sam_dump" name="Extract reads" version="1.1.1">
+  <description> in SAM format from NCBI SRA.</description>
+  <command>
+    sam-dump --log-level fatal 
+    #if str( $region ) != "":
+      --aligned-region $region
+    #end if
+    #if str( $matepairDist ) != "":
+      --matepair-distance $matepairDist
+    #end if
+    #if str( $minMapq ) != "":
+      --minmapq $minMapq
+    #end if
+    #if $header == "yes":
+      --header
+    #else:
+      --no-header 
+    #end if
+    #if str( $alignments ) == "both":
+      --unaligned
+    #end if
+    #if str( $alignments ) == "unaligned":
+      --unaligned-spots-only
+    #end if
+    #if (str( $primary ) == "yes") and (str ( $alignments != "unaligned") ):
+      --primary
+    #end if
+    #if str( $fastq ) == "yes":
+      --fastq
+    #end if
+    #if $input.input_select == "file":
+      $input.file
+    #elif $input.input_select == "accession_number":
+      $input.accession 
+    #elif $input.input_select == "text":
+      `cat $input.text`
+    #end if
+    > $output
+  </command>  
+  <version_string>sam-dump --version</version_string>
+  <inputs>
+    <conditional name="input">
+      <param name="input_select" type="select" label="select input type">
+        <option value="accession_number">SRR accession</option>
+        <option value="file">SRA archive in current history</option>
+        <option value="text">text file containing SRR accession</option>
+      </param>
+      <when value="file">
+        <param format="sra" name="file" type="data" label="sra archive"/>
+      </when>
+      <when value="accession_number">
+        <param format="text" name="accession" type="text" label="accession"/>
+      </when>
+      <when value="text">
+        <param format="txt" name="text" type="data" label="text file"/>
+      </when>
+    </conditional>
+    <param format="text" name="region" type="text" label="aligned region"/>
+    <param format="text" name="matepairDist" type="text" label="mate-pair distance (from-to|unknown)"/>
+    <param format="text" name="header" type="select" value="yes">
+      <label>output SAM header</label>
+      <option value="yes">Yes</option>
+      <option value="no">No</option>
+    </param>
+    <param format="text" name="alignments" type="select" value="both">
+      <label>aligned or unaligned reads</label>
+      <option value="both">both</option>
+      <option value="aligned">aligned only</option>
+        <option value="unaligned">unaligned only</option>
+    </param>
+    <param format="text" name="primary" type="select" value="no">
+      <label>only primary aligments</label>
+      <option value="no">No</option>
+      <option value="yes">Yes</option>
+    </param>
+    <param format="text" name="minMapq" type="text" label="minimum mapping quality"/>
+    <param format="text" name="fastq" type="select" value="no">
+      <label>output fastq</label>
+      <option value="no">No</option>
+      <option value="yes">Yes</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="output" format="sam">
+      <change_format>
+        <when input="fastq" value="yes" format="fastq"/>
+      </change_format>
+    </data>
+  </outputs>
+  <requirements>
+    <requirement type="package" version="2.3.3-3">sra_toolkit</requirement>
+  </requirements>
+  <help>
+    This tool extracts reads from sra archives using sam-dump. 
+    Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. 
+    The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+    Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
+  </help>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/sra.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/sra.py Wed Sep 25 21:00:05 2013 -0400
b
@@ -0,0 +1,46 @@
+"""
+NCBI sra class
+"""
+import logging
+import binascii
+from galaxy.datatypes.data import *
+from galaxy.datatypes.sniff import *
+from galaxy.datatypes.binary import *
+from galaxy.datatypes.metadata import *
+
+log = logging.getLogger(__name__)
+
+class sra( Binary ):
+    """ Sequence Read Archive (SRA) """
+    file_ext = 'sra'
+
+    def __init__( self, **kwd ):
+        Binary.__init__( self, **kwd )
+    def sniff( self, filename ):
+        """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ 
+        submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
+        For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure 
+        """
+        try:
+            header = open(filename).read(8)
+            if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
+                return True
+            else:
+                return False
+        except:
+            return False
+    def set_peek(self, dataset, is_multi_byte=False):
+        if not dataset.dataset.purged:
+            dataset.peek  = 'Binary sra file'
+            dataset.blurb = data.nice_size(dataset.get_size())
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek(self, dataset):
+        try:
+            return dataset.peek
+        except:
+            return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
+
+if hasattr(Binary, 'register_sniffable_binary_format'):
+    Binary.register_sniffable_binary_format('sra', 'sra', sra)
b
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/sra_pileup.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/sra_pileup.xml Wed Sep 25 21:00:05 2013 -0400
b
@@ -0,0 +1,51 @@
+<tool id="sra_pileup" name="Generate pileup format" version="1.1.1">
+  <description> from NCBI sra.</description>
+  <command>sra-pileup --log-level fatal 
+    #if str( $region ) != "":
+      --aligned-region $region
+    #end if
+    #if str( $minMapq ) != "":
+      --minmapq $minMapq
+    #end if
+    #if $input.input_select == "file":
+      $input.file
+    #elif $input.input_select == "accession_number":
+      $input.accession 
+    #elif $input.input_select == "text":
+      `cat $input.text`
+    #end if
+    > $output</command>
+  <version_string>sra-pileup --version</version_string>
+  <inputs>
+    <conditional name="input">
+      <param name="input_select" type="select" label="select input type">
+        <option value="accession_number">SRR accession</option>
+        <option value="file">SRA archive in current history</option>
+        <option value="text">text file containing SRR accession</option>
+      </param>
+      <when value="file">
+        <param format="sra" name="file" type="data" label="sra archive"/>
+      </when>
+      <when value="accession_number">
+        <param format="text" name="accession" type="text" label="accession"/>
+      </when>
+      <when value="text">
+        <param format="txt" name="text" type="data" label="text file"/>
+      </when>
+    </conditional>
+    <param format="text" name="region" type="text" label="aligned region"/>
+    <param format="text" name="minMapq" type="text" label="minimum mapping quality"/>
+  </inputs>
+  <outputs>
+    <data format="pileup" name="output"/>
+  </outputs>
+  <requirements>
+    <requirement type="package" version="2.3.3-3">sra_toolkit</requirement>
+  </requirements>
+  <help>
+    This tool produces pileup format from sra archives using sra-pileup. 
+    Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. 
+    The sra-pileup program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+    Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
+  </help>
+</tool>
b
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/tool_dependencies.xml Wed Sep 25 21:00:05 2013 -0400
b
@@ -0,0 +1,37 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="sra_toolkit" version="2.3.3-3">
+    <install version="1.0">
+      <actions>
+        <action type="download_by_url">http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.3.3-3/sra_sdk-2.3.3-3.tar.gz</action>
+        <action type="shell_command">make release</action>
+        <action type="shell_command">make static</action>
+        <action type="shell_command">make</action>
+        <action type="make_directory">$INSTALL_DIR/bin</action>
+        <action type="make_directory">$INSTALL_DIR/ncbi</action>
+        <action type="make_directory">$INSTALL_DIR/ncbi/public</action>
+        <action type="shell_command">sed -i -e "s|\$(HOME)|$INSTALL_DIR|g" -e "s|cache-enabled = \"true\"|cache-enabled = \"false\"|" bin64/ncbi/default.kfg</action>
+        <action type="shell_command">cp --recursive --dereference bin64/* $INSTALL_DIR/bin</action>
+        <action type="set_environment"><environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable></action>
+      </actions>
+    </install>
+    <readme>Tools from NCBI SRA Toolkit for extracting FASTQ and SAM format reads from SRA format archives. 
+This software release was designed to run under Linux, MacOSX operating systems on Intel x86-compatible 64 bit architectures. 
+When running on Amazon EC2, be sure to keep in mind the size limitation of EBS storage devices when requesting a 
+download of a large SRA data set.
+
+Build Requirements:
+  ar 
+  bash
+  make
+  gcc, g++
+  libxml2
+  libcurl4
+  zlib
+  
+On a debian based Linux OS use:
+
+  apt-get install build-essential libxml2-dev libcurl4-openssl-dev zlib-dev
+    </readme>
+  </package>
+</tool_dependency>