# HG changeset patch # User matt-shirley # Date 1380157205 14400 # Node ID c386fe82db82609a64ec075ba79f51bef715876d Initial commit, moving from test tool shed. diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-04cc8176e86f/datatypes_conf.xml Wed Sep 25 21:00:05 2013 -0400 @@ -0,0 +1,9 @@ + + + + + + + + + diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/fastq_dump.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-04cc8176e86f/fastq_dump.xml Wed Sep 25 21:00:05 2013 -0400 @@ -0,0 +1,123 @@ + + from NCBI SRA. + + fastq-dump --log-level fatal + #if $input.input_select == "file": + --accession '${input.file.name}' + #else: + --accession $input.accession + #end if + --defline-seq '@\$sn[_\$rn]/\$ri' + --stdout + #if $split == "yes": + --split-spot + #end if + #if str( $alignments ) == "aligned": + --aligned + #end if + #if str( $alignments ) == "unaligned": + --unaligned + #end if + #if str( $minID ) != "": + --minSpotId $minID + #end if + #if str( $maxID ) != "": + --maxSpotId $maxID + #end if + #if str( $minlen ) != "": + --minReadLen $minlen + #end if + #if str( $readfilter ) != "": + --read-filter $readfilter + #end if + #if str( $region ) != "": + --aligned-region $region + #end if + #if str( $spotgroups ) != "": + --spot-groups $spotgroups + #end if + #if str( $matepairDist ) != "": + --matepair-distance $matepairDist + #end if + #if $clip == "yes": + --clip + #end if + #if str( $outputformat ) == "fasta": + --fasta + #end if + #if $input.input_select=="file": + $input.file + #else: + $input.accession + #end if + > $output + + fastq-dump --version + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + sra_toolkit + + + This tool extracts reads from SRA archives using fastq-dump. + Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. + The fastq-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. + Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. + + diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/sam_dump.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-04cc8176e86f/sam_dump.xml Wed Sep 25 21:00:05 2013 -0400 @@ -0,0 +1,99 @@ + + in SAM format from NCBI SRA. + + sam-dump --log-level fatal + #if str( $region ) != "": + --aligned-region $region + #end if + #if str( $matepairDist ) != "": + --matepair-distance $matepairDist + #end if + #if str( $minMapq ) != "": + --minmapq $minMapq + #end if + #if $header == "yes": + --header + #else: + --no-header + #end if + #if str( $alignments ) == "both": + --unaligned + #end if + #if str( $alignments ) == "unaligned": + --unaligned-spots-only + #end if + #if (str( $primary ) == "yes") and (str ( $alignments != "unaligned") ): + --primary + #end if + #if str( $fastq ) == "yes": + --fastq + #end if + #if $input.input_select == "file": + $input.file + #elif $input.input_select == "accession_number": + $input.accession + #elif $input.input_select == "text": + `cat $input.text` + #end if + > $output + + sam-dump --version + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + sra_toolkit + + + This tool extracts reads from sra archives using sam-dump. + Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. + The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. + Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. + + \ No newline at end of file diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/sra.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-04cc8176e86f/sra.py Wed Sep 25 21:00:05 2013 -0400 @@ -0,0 +1,46 @@ +""" +NCBI sra class +""" +import logging +import binascii +from galaxy.datatypes.data import * +from galaxy.datatypes.sniff import * +from galaxy.datatypes.binary import * +from galaxy.datatypes.metadata import * + +log = logging.getLogger(__name__) + +class sra( Binary ): + """ Sequence Read Archive (SRA) """ + file_ext = 'sra' + + def __init__( self, **kwd ): + Binary.__init__( self, **kwd ) + def sniff( self, filename ): + """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ + submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'. + For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure + """ + try: + header = open(filename).read(8) + if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): + return True + else: + return False + except: + return False + def set_peek(self, dataset, is_multi_byte=False): + if not dataset.dataset.purged: + dataset.peek = 'Binary sra file' + dataset.blurb = data.nice_size(dataset.get_size()) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek(self, dataset): + try: + return dataset.peek + except: + return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) + +if hasattr(Binary, 'register_sniffable_binary_format'): + Binary.register_sniffable_binary_format('sra', 'sra', sra) diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/sra_pileup.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-04cc8176e86f/sra_pileup.xml Wed Sep 25 21:00:05 2013 -0400 @@ -0,0 +1,51 @@ + + from NCBI sra. + sra-pileup --log-level fatal + #if str( $region ) != "": + --aligned-region $region + #end if + #if str( $minMapq ) != "": + --minmapq $minMapq + #end if + #if $input.input_select == "file": + $input.file + #elif $input.input_select == "accession_number": + $input.accession + #elif $input.input_select == "text": + `cat $input.text` + #end if + > $output + sra-pileup --version + + + + + + + + + + + + + + + + + + + + + + + + + sra_toolkit + + + This tool produces pileup format from sra archives using sra-pileup. + Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. + The sra-pileup program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. + Contact Matt Shirley at mdshw5@gmail.com for support and bug reports. + + diff -r 000000000000 -r c386fe82db82 sra_tools-04cc8176e86f/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sra_tools-04cc8176e86f/tool_dependencies.xml Wed Sep 25 21:00:05 2013 -0400 @@ -0,0 +1,37 @@ + + + + + + http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.3.3-3/sra_sdk-2.3.3-3.tar.gz + make release + make static + make + $INSTALL_DIR/bin + $INSTALL_DIR/ncbi + $INSTALL_DIR/ncbi/public + sed -i -e "s|\$(HOME)|$INSTALL_DIR|g" -e "s|cache-enabled = \"true\"|cache-enabled = \"false\"|" bin64/ncbi/default.kfg + cp --recursive --dereference bin64/* $INSTALL_DIR/bin + $INSTALL_DIR/bin + + + Tools from NCBI SRA Toolkit for extracting FASTQ and SAM format reads from SRA format archives. +This software release was designed to run under Linux, MacOSX operating systems on Intel x86-compatible 64 bit architectures. +When running on Amazon EC2, be sure to keep in mind the size limitation of EBS storage devices when requesting a +download of a large SRA data set. + +Build Requirements: + ar + bash + make + gcc, g++ + libxml2 + libcurl4 + zlib + +On a debian based Linux OS use: + + apt-get install build-essential libxml2-dev libcurl4-openssl-dev zlib-dev + + +