# HG changeset patch
# User matt-shirley
# Date 1380157205 14400
# Node ID c386fe82db82609a64ec075ba79f51bef715876d
Initial commit, moving from test tool shed.
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/datatypes_conf.xml Wed Sep 25 21:00:05 2013 -0400
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/fastq_dump.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/fastq_dump.xml Wed Sep 25 21:00:05 2013 -0400
@@ -0,0 +1,123 @@
+
+ from NCBI SRA.
+
+ fastq-dump --log-level fatal
+ #if $input.input_select == "file":
+ --accession '${input.file.name}'
+ #else:
+ --accession $input.accession
+ #end if
+ --defline-seq '@\$sn[_\$rn]/\$ri'
+ --stdout
+ #if $split == "yes":
+ --split-spot
+ #end if
+ #if str( $alignments ) == "aligned":
+ --aligned
+ #end if
+ #if str( $alignments ) == "unaligned":
+ --unaligned
+ #end if
+ #if str( $minID ) != "":
+ --minSpotId $minID
+ #end if
+ #if str( $maxID ) != "":
+ --maxSpotId $maxID
+ #end if
+ #if str( $minlen ) != "":
+ --minReadLen $minlen
+ #end if
+ #if str( $readfilter ) != "":
+ --read-filter $readfilter
+ #end if
+ #if str( $region ) != "":
+ --aligned-region $region
+ #end if
+ #if str( $spotgroups ) != "":
+ --spot-groups $spotgroups
+ #end if
+ #if str( $matepairDist ) != "":
+ --matepair-distance $matepairDist
+ #end if
+ #if $clip == "yes":
+ --clip
+ #end if
+ #if str( $outputformat ) == "fasta":
+ --fasta
+ #end if
+ #if $input.input_select=="file":
+ $input.file
+ #else:
+ $input.accession
+ #end if
+ > $output
+
+ fastq-dump --version
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ sra_toolkit
+
+
+ This tool extracts reads from SRA archives using fastq-dump.
+ Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies.
+ The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+ Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
+
+
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/sam_dump.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/sam_dump.xml Wed Sep 25 21:00:05 2013 -0400
@@ -0,0 +1,99 @@
+
+ in SAM format from NCBI SRA.
+
+ sam-dump --log-level fatal
+ #if str( $region ) != "":
+ --aligned-region $region
+ #end if
+ #if str( $matepairDist ) != "":
+ --matepair-distance $matepairDist
+ #end if
+ #if str( $minMapq ) != "":
+ --minmapq $minMapq
+ #end if
+ #if $header == "yes":
+ --header
+ #else:
+ --no-header
+ #end if
+ #if str( $alignments ) == "both":
+ --unaligned
+ #end if
+ #if str( $alignments ) == "unaligned":
+ --unaligned-spots-only
+ #end if
+ #if (str( $primary ) == "yes") and (str ( $alignments != "unaligned") ):
+ --primary
+ #end if
+ #if str( $fastq ) == "yes":
+ --fastq
+ #end if
+ #if $input.input_select == "file":
+ $input.file
+ #elif $input.input_select == "accession_number":
+ $input.accession
+ #elif $input.input_select == "text":
+ `cat $input.text`
+ #end if
+ > $output
+
+ sam-dump --version
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ sra_toolkit
+
+
+ This tool extracts reads from sra archives using sam-dump.
+ Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies.
+ The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+ Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
+
+
\ No newline at end of file
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/sra.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/sra.py Wed Sep 25 21:00:05 2013 -0400
@@ -0,0 +1,46 @@
+"""
+NCBI sra class
+"""
+import logging
+import binascii
+from galaxy.datatypes.data import *
+from galaxy.datatypes.sniff import *
+from galaxy.datatypes.binary import *
+from galaxy.datatypes.metadata import *
+
+log = logging.getLogger(__name__)
+
+class sra( Binary ):
+ """ Sequence Read Archive (SRA) """
+ file_ext = 'sra'
+
+ def __init__( self, **kwd ):
+ Binary.__init__( self, **kwd )
+ def sniff( self, filename ):
+ """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ
+ submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
+ For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
+ """
+ try:
+ header = open(filename).read(8)
+ if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
+ return True
+ else:
+ return False
+ except:
+ return False
+ def set_peek(self, dataset, is_multi_byte=False):
+ if not dataset.dataset.purged:
+ dataset.peek = 'Binary sra file'
+ dataset.blurb = data.nice_size(dataset.get_size())
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+ def display_peek(self, dataset):
+ try:
+ return dataset.peek
+ except:
+ return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
+
+if hasattr(Binary, 'register_sniffable_binary_format'):
+ Binary.register_sniffable_binary_format('sra', 'sra', sra)
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/sra_pileup.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/sra_pileup.xml Wed Sep 25 21:00:05 2013 -0400
@@ -0,0 +1,51 @@
+
+ from NCBI sra.
+ sra-pileup --log-level fatal
+ #if str( $region ) != "":
+ --aligned-region $region
+ #end if
+ #if str( $minMapq ) != "":
+ --minmapq $minMapq
+ #end if
+ #if $input.input_select == "file":
+ $input.file
+ #elif $input.input_select == "accession_number":
+ $input.accession
+ #elif $input.input_select == "text":
+ `cat $input.text`
+ #end if
+ > $output
+ sra-pileup --version
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ sra_toolkit
+
+
+ This tool produces pileup format from sra archives using sra-pileup.
+ Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies.
+ The sra-pileup program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+ Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
+
+
diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sra_tools-04cc8176e86f/tool_dependencies.xml Wed Sep 25 21:00:05 2013 -0400
@@ -0,0 +1,37 @@
+
+
+
+
+
+ http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.3.3-3/sra_sdk-2.3.3-3.tar.gz
+ make release
+ make static
+ make
+ $INSTALL_DIR/bin
+ $INSTALL_DIR/ncbi
+ $INSTALL_DIR/ncbi/public
+ sed -i -e "s|\$(HOME)|$INSTALL_DIR|g" -e "s|cache-enabled = \"true\"|cache-enabled = \"false\"|" bin64/ncbi/default.kfg
+ cp --recursive --dereference bin64/* $INSTALL_DIR/bin
+ $INSTALL_DIR/bin
+
+
+ Tools from NCBI SRA Toolkit for extracting FASTQ and SAM format reads from SRA format archives.
+This software release was designed to run under Linux, MacOSX operating systems on Intel x86-compatible 64 bit architectures.
+When running on Amazon EC2, be sure to keep in mind the size limitation of EBS storage devices when requesting a
+download of a large SRA data set.
+
+Build Requirements:
+ ar
+ bash
+ make
+ gcc, g++
+ libxml2
+ libcurl4
+ zlib
+
+On a debian based Linux OS use:
+
+ apt-get install build-essential libxml2-dev libcurl4-openssl-dev zlib-dev
+
+
+