Repository 'sam_to_bam'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/sam_to_bam

Changeset 0:30fdbaccb96b (2013-08-26)
Next changeset 1:93f2e3337a33 (2013-12-11)
Commit message:
Uploaded tool tarball.
added:
sam_to_bam.py
sam_to_bam.xml
test-data/chr_m.fasta
test-data/sam_to_bam_in1.sam
test-data/sam_to_bam_out1.bam
test-data/sam_to_bam_out2.bam
tool-data/sam_fa_indices.loc.sample
tool-data/tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r 30fdbaccb96b sam_to_bam.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sam_to_bam.py Mon Aug 26 14:22:00 2013 -0400
[
b'@@ -0,0 +1,197 @@\n+#!/usr/bin/env python\n+"""\n+Converts SAM data to sorted BAM data.\n+usage: sam_to_bam.py [options]\n+   --input1: SAM file to be converted\n+   --dbkey: dbkey value\n+   --ref_file: Reference file if choosing from history\n+   --output1: output dataset in bam format\n+   --index_dir: GALAXY_DATA_INDEX_DIR\n+"""\n+\n+import optparse, os, sys, subprocess, tempfile, shutil, gzip\n+from galaxy import eggs\n+import pkg_resources; pkg_resources.require( "bx-python" )\n+from bx.cookbook import doc_optparse\n+from galaxy import util\n+\n+def stop_err( msg ):\n+    sys.stderr.write( \'%s\\n\' % msg )\n+    sys.exit()\n+\n+def check_seq_file( dbkey, cached_seqs_pointer_file ):\n+    seq_path = \'\'\n+    for line in open( cached_seqs_pointer_file ):\n+        line = line.rstrip( \'\\r\\n\' )\n+        if line and not line.startswith( \'#\' ) and line.startswith( \'index\' ):\n+            fields = line.split( \'\\t\' )\n+            if len( fields ) < 3:\n+                continue\n+            if fields[1] == dbkey:\n+                seq_path = fields[2].strip()\n+                break\n+    return seq_path\n+\n+def __main__():\n+    #Parse Command Line\n+    parser = optparse.OptionParser()\n+    parser.add_option( \'\', \'--input1\', dest=\'input1\', help=\'The input SAM dataset\' )\n+    parser.add_option( \'\', \'--dbkey\', dest=\'dbkey\', help=\'The build of the reference dataset\' )\n+    parser.add_option( \'\', \'--ref_file\', dest=\'ref_file\', help=\'The reference dataset from the history\' )\n+    parser.add_option( \'\', \'--output1\', dest=\'output1\', help=\'The output BAM dataset\' )\n+    parser.add_option( \'\', \'--index_dir\', dest=\'index_dir\', help=\'GALAXY_DATA_INDEX_DIR\' )\n+    ( options, args ) = parser.parse_args()\n+\n+    # output version # of tool\n+    try:\n+        tmp = tempfile.NamedTemporaryFile().name\n+        tmp_stdout = open( tmp, \'wb\' )\n+        proc = subprocess.Popen( args=\'samtools 2>&1\', shell=True, stdout=tmp_stdout )\n+        tmp_stdout.close()\n+        returncode = proc.wait()\n+        stdout = None\n+        for line in open( tmp_stdout.name, \'rb\' ):\n+            if line.lower().find( \'version\' ) >= 0:\n+                stdout = line.strip()\n+                break\n+        if stdout:\n+            sys.stdout.write( \'Samtools %s\\n\' % stdout )\n+        else:\n+            raise Exception\n+    except:\n+        sys.stdout.write( \'Could not determine Samtools version\\n\' )\n+\n+    cached_seqs_pointer_file = \'%s/sam_fa_indices.loc\' % options.index_dir\n+    if not os.path.exists( cached_seqs_pointer_file ):\n+        stop_err( \'The required file (%s) does not exist.\' % cached_seqs_pointer_file )\n+    # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,\n+    # and the equCab2.fa file will contain fasta sequences.\n+    seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )\n+    tmp_dir = tempfile.mkdtemp( dir=\'.\' )\n+    if not options.ref_file or options.ref_file == \'None\':\n+        # We\'re using locally cached reference sequences( e.g., /galaxy/data/equCab2/sam_index/equCab2.fa ).\n+        # The indexes for /galaxy/data/equCab2/sam_index/equCab2.fa will be contained in\n+        # a file named /galaxy/data/equCab2/sam_index/equCab2.fa.fai\n+        fai_index_file_base = seq_path\n+        fai_index_file_path = \'%s.fai\' % seq_path \n+        if not os.path.exists( fai_index_file_path ):\n+            #clean up temp files\n+            if os.path.exists( tmp_dir ):\n+                shutil.rmtree( tmp_dir )\n+            stop_err( \'No sequences are available for build (%s), request them by reporting this error.\' % options.dbkey )\n+    else:\n+        try:\n+            # Create indexes for history reference ( e.g., ~/database/files/000/dataset_1.dat ) using samtools faidx, which will:\n+            # - index reference sequence in the FASTA format or extract subsequence from indexed reference sequence\n+            # - if no region is specified, faidx will index the file and create <ref.fasta>.fai on the disk\n+            # - if re'..b'.close()\n+            if returncode != 0:\n+                raise Exception, stderr \n+            if os.path.getsize( fai_index_file_path ) == 0:\n+                raise Exception, \'Index file empty, there may be an error with your reference file or settings.\'\n+        except Exception, e:\n+            #clean up temp files\n+            if os.path.exists( tmp_dir ):\n+                shutil.rmtree( tmp_dir )\n+            stop_err( \'Error creating indexes from reference (%s), %s\' % ( options.ref_file, str( e ) ) )\n+    try:\n+        # Extract all alignments from the input SAM file to BAM format ( since no region is specified, all the alignments will be extracted ).\n+        tmp_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )\n+        tmp_aligns_file_name = tmp_aligns_file.name\n+        tmp_aligns_file.close()\n+        command = \'samtools view -bt %s -o %s %s\' % ( fai_index_file_path, tmp_aligns_file_name, options.input1 )\n+        tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name\n+        tmp_stderr = open( tmp, \'wb\' )\n+        proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )\n+        returncode = proc.wait()\n+        tmp_stderr.close()\n+        # get stderr, allowing for case where it\'s very large\n+        tmp_stderr = open( tmp, \'rb\' )\n+        stderr = \'\'\n+        buffsize = 1048576\n+        try:\n+            while True:\n+                stderr += tmp_stderr.read( buffsize )\n+                if not stderr or len( stderr ) % buffsize != 0:\n+                    break\n+        except OverflowError:\n+            pass\n+        tmp_stderr.close()\n+        if returncode != 0:\n+            raise Exception, stderr\n+    except Exception, e:\n+        #clean up temp files\n+        if os.path.exists( tmp_dir ):\n+            shutil.rmtree( tmp_dir )\n+        stop_err( \'Error extracting alignments from (%s), %s\' % ( options.input1, str( e ) ) )\n+    try:\n+        # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command\n+        # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted\n+        # into memory ( controlled by option -m ).\n+        tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )\n+        tmp_sorted_aligns_file_name = tmp_sorted_aligns_file.name\n+        tmp_sorted_aligns_file.close()\n+        command = \'samtools sort %s %s\' % ( tmp_aligns_file_name, tmp_sorted_aligns_file_name )\n+        tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name\n+        tmp_stderr = open( tmp, \'wb\' )\n+        proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )\n+        returncode = proc.wait()\n+        tmp_stderr.close()\n+        # get stderr, allowing for case where it\'s very large\n+        tmp_stderr = open( tmp, \'rb\' )\n+        stderr = \'\'\n+        buffsize = 1048576\n+        try:\n+            while True:\n+                stderr += tmp_stderr.read( buffsize )\n+                if not stderr or len( stderr ) % buffsize != 0:\n+                    break\n+        except OverflowError:\n+            pass\n+        tmp_stderr.close()\n+        if returncode != 0:\n+            raise Exception, stderr\n+    except Exception, e:\n+        #clean up temp files\n+        if os.path.exists( tmp_dir ):\n+            shutil.rmtree( tmp_dir )\n+        stop_err( \'Error sorting alignments from (%s), %s\' % ( tmp_aligns_file_name, str( e ) ) )\n+    # Move tmp_aligns_file_name to our output dataset location\n+    sorted_bam_file = \'%s.bam\' % tmp_sorted_aligns_file_name\n+    shutil.move( sorted_bam_file, options.output1 )\n+    #clean up temp files\n+    if os.path.exists( tmp_dir ):\n+        shutil.rmtree( tmp_dir )\n+    # check that there are results in the output file\n+    if os.path.getsize( options.output1 ) > 0:\n+        sys.stdout.write( \'SAM file converted to BAM\' )\n+    else:\n+        stop_err( \'Error creating sorted version of BAM file.\' )\n+\n+if __name__=="__main__": __main__()\n'
b
diff -r 000000000000 -r 30fdbaccb96b sam_to_bam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sam_to_bam.xml Mon Aug 26 14:22:00 2013 -0400
b
@@ -0,0 +1,97 @@
+<tool id="sam_to_bam" name="SAM-to-BAM" version="1.1.2">
+  <description>converts SAM format to BAM format</description>
+  <requirements>
+    <requirement type="package" version="0.1.18">samtools</requirement>
+  </requirements>
+  <command interpreter="python">
+    sam_to_bam.py
+      --input1=$source.input1
+      #if $source.index_source == "history":
+        --dbkey=${ref_file.metadata.dbkey} 
+        --ref_file=$source.ref_file
+      #else
+        --dbkey=${input1.metadata.dbkey} 
+      #end if
+      --output1=$output1
+      --index_dir=${GALAXY_DATA_INDEX_DIR}
+  </command>
+  <inputs>
+    <conditional name="source">
+      <param name="index_source" type="select" label="Choose the source for the reference list">
+        <option value="cached">Locally cached</option>
+        <option value="history">History</option>
+      </param>
+      <when value="cached">
+        <param name="input1" type="data" format="sam" metadata_name="dbkey" label="SAM File to Convert">
+           <validator type="unspecified_build" />
+           <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" />
+        </param>
+      </when>
+      <when value="history">
+        <param name="input1" type="data" format="sam" label="Convert SAM file" />
+        <param name="ref_file" type="data" format="fasta" metadata_name="dbkey" label="Using reference file" />
+      </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="bam" name="output1" label="${tool.name} on ${on_string}: converted BAM">
+      <actions>
+        <conditional name="source.index_source">
+          <when value="cached">
+            <action type="metadata" name="dbkey">
+              <option type="from_param" name="source.input1" param_attribute="dbkey" />
+            </action>
+          </when>
+          <when value="history">
+            <action type="metadata" name="dbkey">
+              <option type="from_param" name="source.ref_file" param_attribute="dbkey" />
+            </action>
+          </when>
+        </conditional>
+      </actions>
+    </data>
+  </outputs>
+  <tests>
+    <test>
+      <!--
+      Sam-to-Bam command:
+      cp test-data/chr_m.fasta .
+      samtools faidx chr_m.fasta
+      samtools view -hbt chr_m.fasta.fai -o unsorted.bam test-data/sam_to_bam_in1.sam
+      samtools sort unsorted.bam sam_to_bam_out1
+      chr_m.fasta is the reference file (chrM from equCab2)
+      -->
+      <param name="index_source" value="history" /> 
+      <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" />
+      <param name="ref_file" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
+      <output name="output1" file="sam_to_bam_out1.bam" ftype="bam" />
+    </test>
+    <test>
+      <!--
+      Sam-to-Bam command:
+      samtools view -hbt chr_m.fasta.fai -o unsorted.bam test-data/sam_to_bam_in1.sam
+      samtools sort unsorted.bam sam_to_bam_out2
+      chr_m.fasta is the reference file and the index chr_m.fasta.fai 
+      these should be in the same directory, and chrM is from equCab2
+      -->
+      <param name="index_source" value="cached" />
+      <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" dbkey="chrM" />
+      <output name="output1" file="sam_to_bam_out2.bam" ftype="bam" />
+    </test>
+  </tests>
+  <help>
+
+**What it does**
+
+This tool uses the SAMTools_ toolkit to produce an indexed BAM file based on a sorted input SAM file.
+
+.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
+
+------
+
+**Citation**
+
+For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505943&gt;`_
+
+  </help>
+</tool>
b
diff -r 000000000000 -r 30fdbaccb96b test-data/chr_m.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chr_m.fasta Mon Aug 26 14:22:00 2013 -0400
b
b'@@ -0,0 +1,335 @@\n+>chrM\n+GTTAATGTAGCTTAATAATATAAAGCAAGGCACTGAAAATGCCTAGATGA\n+GTATTCTTACTCCATAAACACATAGGCTTGGTCCTAGCCTTTTTATTAGT\n+TATTAATAGAATTACACATGCAAGTATCCGCACCCCAGTGAGAATGCCCT\n+CTAAATCACGTCTCTACGATTAAAAGGAGCAGGTATCAAGCACACTAGAA\n+AGTAGCTCATAACACCTTGCTCAGCCACACCCCCACGGGACACAGCAGTG\n+ATAAAAATTAAGCTATGAACGAAAGTTCGACTAAGTCATATTAAATAAGG\n+GTTGGTAAATTTCGTGCCAGCCACCGCGGTCATACGATTAACCCAAATTA\n+ATAAATCTCCGGCGTAAAGCGTGTCAAAGACTAATACCAAAATAAAGTTA\n+AAACCCAGTTAAGCCGTAAAAAGCTACAACCAAAGTAAAATAGACTACGA\n+AAGTGACTTTAATACCTCTGACTACACGATAGCTAAGACCCAAACTGGGA\n+TTAGATACCCCACTATGCTTAGCCCTAAACTAAAATAGCTTACCACAACA\n+AAGCTATTCGCCAGAGTACTACTAGCAACAGCCTAAAACTCAAAGGACTT\n+GGCGGTGCTTTACATCCCTCTAGAGGAGCCTGTTCCATAATCGATAAACC\n+CCGATAAACCCCACCATCCCTTGCTAATTCAGCCTATATACCGCCATCTT\n+CAGCAAACCCTAAACAAGGTACCGAAGTAAGCACAAATATCCAACATAAA\n+AACGTTAGGTCAAGGTGTAGCCCATGGGATGGAGAGAAATGGGCTACATT\n+TTCTACCCTAAGAACAAGAACTTTAACCCGGACGAAAGTCTCCATGAAAC\n+TGGAGACTAAAGGAGGATTTAGCAGTAAATTAAGAATAGAGAGCTTAATT\n+GAATCAGGCCATGAAGCGCGCACACACCGCCCGTCACCCTCCTTAAATAT\n+CACAAATCATAACATAACATAAAACCGTGACCCAAACATATGAAAGGAGA\n+CAAGTCGTAACAAGGTAAGTATACCGGAAGGTGTACTTGGATAACCAAAG\n+TGTAGCTTAAACAAAGCATCCAGCTTACACCTAGAAGATTTCACTCAAAA\n+TGAACACTTTGAACTAAAGCTAGCCCAAACAATACCTAATTCAATTACCC\n+TTAGTCACTTAACTAAAACATTCACCAAACCATTAAAGTATAGGAGATAG\n+AAATTTTAACTTGGCGCTATAGAGAAAGTACCGTAAGGGAACGATGAAAG\n+ATGCATTAAAAGTACTAAACAGCAAAGCTTACCCCTTTTACCTTTTGCAT\n+AATGATTTAACTAGAATAAACTTAGCAAAGAGAACTTAAGCTAAGCACCC\n+CGAAACCAGACGAGCTACCTATGAACAGTTACAAATGAACCAACTCATCT\n+ATGTCGCAAAATAGTGAGAAGATTCGTAGGTAGAGGTGAAAAGCCCAACG\n+AGCCTGGTGATAGCTGGTTGTCCAGAAACAGAATTTCAGTTCAAATTTAA\n+ATTTACCTAAAAACTACTCAATTCTAATGTAAATTTAAATTATAGTCTAA\n+AAAGGTACAGCTTTTTAGATACAGGTTACAACCTTCATTAGAGAGTAAGA\n+ACAAGATAAACCCATAGTTGGCTTAAAAGCAGCCATCAATTAAGAAAGCG\n+TTCAAGCTCAACGACACATCTATCTTAATCCCAACAATCAACCCAAACTA\n+ACTCCTAATCTCATACTGGACTATTCTATCAACACATAGAAGCAATAATG\n+TTAATATGAGTAACAAGAATTATTTCTCCTTGCATAAGCTTATATCAGAA\n+CGAATACTCACTGATAGTTAACAACAAGATAGGGATAATCCAAAAACTAA\n+TCATCTATTTAAACCATTGTTAACCCAACACAGGCATGCATCTATAAGGA\n+AAGATTAAAAGAAGTAAAAGGAACTCGGCAAACACAAACCCCGCCTGTTT\n+ACCAAAAACATCACCTCTAGCATTTCCAGTATTAGAGGCACTGCCTGCCC\n+AGTGACATCTGTTtaaacggccgcggtatcctaaccgtgcaaaggtagca\n+taatcacttgttccctaaatagggacttgtatgaatggccacacgagggt\n+tttactgtctcttacttccaatcagtgaaattgaccttcccgtgaagagg\n+cgggaatgactaaataagacgagaagaccctatggagcttTAATTAACTG\n+ATTCACAAAAAACAACACACAAACCTTAACCTTCAGGGACAACAAAACTT\n+TTGATTGAATCAGCAATTTCGGTTGGGGTGACCTCGGAGAACAAAACAAC\n+CTCCGAGTGATTTAAATCCAGACTAACCAGTCAAAATATATAATCACTTA\n+TTGATCCAAACCATTGATCAACGGAACAAGTTACCCTAGGGATAACAGCG\n+CAATCCTATTCCAGAGTCCATATCGACAATTAGGGTTTACGACCTCGATG\n+TTGGATCAAGACATCCTAATGGTGCAACCGCTATTAAGGGTTCGTTTGTT\n+CAACGATTAAAGTCTTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGT\n+CGGTTTCTATCTATTCTATACTTTTCCCAGTACGAAAGGACAAGAAAAGT\n+AGGGCCCACTTTACAAGAAGCGCCCTCAAACTAATAGATGACATAATCTA\n+AATCTAACTAATTTATAACTTCTACCGCCCTAGAACAGGGCTCgttaggg\n+tggcagagcccggaaattgcataaaacttaaacctttacactcagaggtt\n+caactcctctccctaacaacaTGTTCATAATTAACGTCCTCCTCCTAATT\n+GTCCCAATCTTGCTCGCCGTAGCATTCCTCACACTAGTTGAACGAAAAGT\n+CTTAGGCTATATGCAACTTCGCAAAGGACCCAACATCGTAGGCCCCTATG\n+GCCTACTACAACCTATTGCCGATGCCCTCAAACTATTTATCAAAGAGCCA\n+CTACAACCACTAACATCATCGACATCCATATTCATCATCGCACCAATCCT\n+AGCCCTAACCCTGGCCTTAACCATATGAATCCCTCTGCCCATACCATACC\n+CACTAATCAACATAAACCTAGGAATTCTATTCATACTAGCCATGTCCAGC\n+CTAGCTGTCTACTCAATCCTTTGATCAGGATGGGCCTCAAACTCAAAATA\n+CGCCCTAATTGGAGCTCTACGAGCAGTAGCACAAACCATCTCATACGAAG\n+TAACTCTAGCAATCATCCTACTCTCAGTCCTCCTAATAAGCGGATCATTC\n+ACATTATCAACACTTATTATTACCCAAGAATACCTCTGATTAATCTTCCC\n+ATCATGACCCTTAGCCATAATGTGATTCATCTCAACATTAGCCGAAACCA\n+ACCGAGCTCCATTTGACCTAACAGAAGGAGAATCAGAACTCGTCTCTGGA\n+TTCAACGTTGAATACGCAGCCGGCCCATTTGCTCTATTCTTCCTAGCAGA\n+ATACGCAAACATCATCATGATAAACATCTTCACAACAACCCTATTTCTAG\n+GAGCATTTCACAACCCCTACCTGCCAGAACTCTACTCAATTAATTTCACC\n+ATTAAAGCTCTCCTTCTAACATGTTCCTTCCTATGAATCCGAGCATCCTA\n+CCCACGATTCCGATATGACCAACTTATACACCTCCTATGAAAGAACTTCC\n+TACCACTCACACTAGCCCTCTGCATATGACACGTCTCACTTCCAATCATA\n+CTATCCAGCATCCCACCACAAACATAGGAAATATGTCTGACAAAAGAGTT\n+ACTTTGATAGAGTAAAACATAGAGGCTCAAACCCTCTTATTTctagaact\n+acaggaattgaacctgctcct'..b'CATTATCCACAGCCTAAATGACGAGCAAGATATC\n+CGAAAAATAGGCGGACTATTTAATGCAATACCCTTCACCACCACATCTCT\n+AATTATTGGCAGCCTTGCACTCACCGGAATTCCTTTCCTCACAGGCTTCT\n+ACTCCAAAGACCTCATCATCGAAACCGCCAACACATCGTACACCAACGCC\n+TGAGCCCTACTAATAACTCTCATTGCCACATCCCTCACAGCTGTCTACAG\n+TACCCGAATCATCTTCTTTGCACTCCTAGGGCAACCCCGCTTCCTCCCTC\n+TGACCTCAATCAACGAAAATAACCCCTTTCTAATTAACTCCATCAAACGC\n+CTCTTAATTGGCAGCATTTTTGCCGGATTCTTCATCTCCAACAATATCTA\n+CCCCACAACCGTCCCAGAAATAACCATACCTACTTACATAAAACTCACCG\n+CCCTCGCAGTAACCATCCTAGGATTTACACTAGCCCTAGAACTAAGCTTG\n+ATAACCCATAACTTAAAACTAGAACACTCCACCAACGTATTCAAATTCTC\n+CAACCTCCTAGGATACTACCCAACAATTATACACCGACTCCCACCGCTCG\n+CTAACCTATCAATAAGCCAAAAATCAGCATCACTTCTACTAGACTCAATC\n+TGACTAGAAAACATCCTGCCAAAATCTATCTCCCAGTTCCAAATAAAAAC\n+CTCGATCCTAATTTCCACCCAAAAAGGACAAATCAAATTATATTTCCTCT\n+CATTCCTCATCACCCTTACCCTAAGCATACTACTTTTTAATCTCCACGAG\n+TAACCTCTAAAATTACCAAGACCCCAACAAGCAACGATCAACCAGTCACA\n+ATCACAACCCAAGCCCCATAACTATACAATGCAGCAGCCCCTATAATTTC\n+CTCACTAAACGCCCCAGAATCTCCAGTATCATAAATAGCTCAAGCCCCCA\n+CACCACTAAACTTAAACACTACCCCCACTTCCTCACTCTTCAGAACATAT\n+AAAACCAACATAACCTCCATCAACAACCCTAAAAGAAATACCCCCATAAC\n+AGTCGTATTAGACACCCATACCTCAGGATACTGCTCAGTAGCCATAGCCG\n+TTGTATAACCAAAAACAACCAACATTCCTCCCAAATAAATCAAAAACACC\n+ATCAACCCCAAAAAGGACCCTCCAAAATTCATAATAATACCACAACCTAC\n+CCCTCCACTTACAATCAGCACTAAACCCCCATAAATAGGTGAAGGTTTTG\n+AAGAAAACCCCACAAAACTAACAACAAAAATAACACTCAAAATAAACACA\n+ATATATGTCATCATTATTCCCACGTGGAATCTAACCACGACCAATGACAT\n+GAAAAATCATCGTTGTATTTCAACTATAAGAACACCAATGACAAACATCC\n+GGAAATCTCACCCACTAATTAAAATCATCAATCACTCTTTTATTGACCTA\n+CCAGCCCCCTCAAACATTTCATCATGATGAAACTTCGGCTCCCTCCTAGG\n+AATCTGCCTAATCCTCCAAATCTTAACAGGCCTATTCCTAGCCATACACT\n+ACACATCAGACACGACAACTGCCTTCTCATCCGTCACTCACATCTGCCGA\n+GACGTTAACTACGGATGAATTATTCGCTACCTCCATGCCAACGGAGCATC\n+AATATTTTTTATCTGCCTCTTCATTCACGTAGGACGCGGCCTCTACTACG\n+GCTCTTACACATTCCTAGAGACATGAAACATTGGAATCATCCTACTTTTC\n+ACAGTTATAGCTACAGCATTCATGGGCTATGTCCTACCATGAGGCCAAAT\n+ATCCTTTTGAGGAGCAACAGTCATCACGAACCTCCTATCAGCAATTCCCT\n+ACATCGGTACTACCCTCGTCGAGTGAATCTGAGGTGGATTCTCAGTAGAC\n+AAAGCCACCCTTACCCGATTTTTTGCTTTCCACTTCATCCTACCCTTCAT\n+CATCACAGCCCTGGTAGTCGTACATTTACTATTTCTTCACGAAACAGGAT\n+CTAATAACCCCTCAGGAATCCCATCCGATATGGACAAAATCCCATTCCAC\n+CCATATTATACAATTAAAGACATCCTAGGACTCCTCCTCCTGATCTTGCT\n+CCTACTAACTCTAGTATTATTCTCCCCCGACCTCCTAGGAGACCCAGACA\n+ACTACACCCCAGCTAACCCTCTCAGCACTCCCCCTCATATTAAACCAGAA\n+TGGTACTTCCTGTTTGCCTACGCCATCCTACGCTCCATTCCCAACAAACT\n+AGGCGGCGTATTAGCCCTAATCCTCTCCATCCTGATCCTAGCACTCATCC\n+CCACCCTCCACATATCAAAACAACGAAGCATAATATTCCGGCCTCTCAGC\n+CAATGCGTATTCTGACTCTTAGTGGCAGACTTACTGACACTAACATGAAT\n+CGGCGGACAGCCAGTGGAACACCCATACGTAATTATCGGCCAACTGGCCT\n+CAATCCTCTACTTCTCCCTAATTCTCATTTTTATACCACTCGCAAGCACC\n+ATCGAAAACAATCTTCTAAAATGAAGAGTCCCTGTAGTATATCGCACATT\n+ACCCTGGTCTTGTAAACCAGAAAAGGGGGAAAACGTTTCCTCCCAAGGAC\n+TATCAAGGAAGAAGCTCTAGCTCCACCATCAACACCCAAAGCTGAAATTC\n+TACTTAAACTATTCCTTGATTTCTTCCCCTAAACGACAACAATTTACCCT\n+CATGTGCTATGTCAGTATCAGATTATACCCCCACATAACACCATACCCAC\n+CTGACATGCAATATCTTATGAATGGCCTATGTACGTCGTGCATTAAATTG\n+TCTGCCCCATGAATAATAAGCATGTACATAATATCATTTATCTTACATAA\n+GTACATTATATTATTGATCGTGCATACCCCATCCAAGTCAAATCATTTCC\n+AGTCAACACGCATATCACAGCCCATGTTCCACGAGCTTAATCACCAAGCC\n+GCGGGAAATCAGCAACCCTCCCAACTACGTGTCCCAATCCTCGCTCCGGG\n+CCCATCCAAACGTGGGGGTTTCTACAATGAAACTATACCTGGCATCTGGT\n+TCTTTCTTCAGGGCCATTCCCACCCAACCTCGCCCATTCTTTCCCCTTAA\n+ATAAGACATCTCGATGGACTAATGACTAATCAGCCCATGCTCACACATAA\n+CTGTGATTTCATGCATTTGGTATCTTTTTATATTTGGGGATGCTATGACT\n+CAGCTATGGCCGTCAAAGGCCTCGACGCAGTCAATTAAATTGAAGCTGGA\n+CTTAAATTGAACGTTATTCCTCCGCATCAGCAACCATAAGGTGTTATTCA\n+GTCCATGGTAGCGGGACATAGGAAACAAgtgcacctgtgcacctgtgcac\n+ctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacct\n+gtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgt\n+gcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgc\n+acctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcac\n+ctgtgcacctACCCGCGCAGTAAGCAAGTAATATAGCTTTCTTAATCAAA\n+CCCCCCCTACCCCCCATTAAACTCCACATATGTACATTCAACACAATCTT\n+GCCAAACCCCAAAAACAAGACTAAACAATGCACAATACTTCATGAAGCTT\n+AACCCTCGCATGCCAACCATAATAACTCAACACACCTAACAATCTTAACA\n+GAACTTTCCCCCCGCCATTAATACCAACATGCTACTTTAATCAATAAAAT\n+TTCCATAGACAGGCATCCCCCTAGATCTAATTTTCTAAATCTGTCAACCC\n+TTCTTCCCCC\n'
b
diff -r 000000000000 -r 30fdbaccb96b test-data/sam_to_bam_in1.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam_to_bam_in1.sam Mon Aug 26 14:22:00 2013 -0400
b
@@ -0,0 +1,13 @@
+@HD VN:1.0 SO:coordinate
+@SQ SN:chrM LN:100001
+@RG ID:rg1 SM:s1
+HWI-EAS91_1_30788AAXX:1:1:1513:715 16 chrM 9563 25 36M * 0 0 CTGACTACCACAACTAAACATCTATGCNNAAAAAAC I+-II?IDIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
+HWI-EAS91_1_30788AAXX:1:1:1698:516 16 chrM 2735 25 36M * 0 0 TTTACACTCAGAGGTTCAACTCCTCTCNNTAACAAC I9IIIII5IIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
+HWI-EAS91_1_30788AAXX:1:1:1491:637 16 chrM 10864 25 36M * 0 0 TGTAGAAGCCCCAATTGCCGGATCCATNNTGCTAGC DBAIIIIIIIIIIIFIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
+HWI-EAS91_1_30788AAXX:1:1:1711:249 16 chrM 10617 25 36M * 0 0 ACCAAACAGAACGCCTGAACGCAGGCCNNTACTTCC IIIIIIIIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
+HWI-EAS91_1_30788AAXX:1:1:1634:211 0 chrM 9350 25 36M * 0 0 GAAGCAGNNGCTTGATACTGACACTTCGTCGACGTA IIIIIII""IIIIIIIIIIIIIIIIIIIIII9IIDF NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
+HWI-EAS91_1_30788AAXX:1:1:1218:141 16 chrM 14062 25 36M * 0 0 ACAAAACTAACAACAAAAATAACACTCNNAATAAAC I+IIII1IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
+HWI-EAS91_1_30788AAXX:1:1:1398:854 16 chrM 3921 25 36M * 0 0 CACCCTTCCCGTACTAATAAATCCCCTNNTCTTCAC IIIII=AIIIIIIIIIIIIIIBIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
+HWI-EAS91_1_30788AAXX:1:1:1310:991 16 chrM 10002 25 36M * 0 0 CTCCTATGCCTAGAAGGAATAATACTANNACTATTC I:2IEI:IIDIIIIII4IIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
+HWI-EAS91_1_30788AAXX:1:1:1716:413 0 chrM 6040 25 36M * 0 0 GATCCAANNCTTTATCAACACCTATTCTGATTCTTC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
+HWI-EAS91_1_30788AAXX:1:1:1630:59 16 chrM 12387 25 36M * 0 0 TCATACTCGACCCCAACCTTACCAACCNNCCGCTCC FIIHII;IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1
b
diff -r 000000000000 -r 30fdbaccb96b test-data/sam_to_bam_out1.bam
b
Binary file test-data/sam_to_bam_out1.bam has changed
b
diff -r 000000000000 -r 30fdbaccb96b test-data/sam_to_bam_out2.bam
b
Binary file test-data/sam_to_bam_out2.bam has changed
b
diff -r 000000000000 -r 30fdbaccb96b tool-data/sam_fa_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/sam_fa_indices.loc.sample Mon Aug 26 14:22:00 2013 -0400
b
@@ -0,0 +1,28 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a sam_fa_indices.loc file 
+#similar to this one (store it in this directory) that points to 
+#the directories in which those files are stored. The sam_fa_indices.loc 
+#file has this format (white space characters are TAB characters):
+#
+#index <seq> <location>
+#
+#So, for example, if you had hg18 indexed stored in 
+#/depot/data2/galaxy/sam/, 
+#then the sam_fa_indices.loc entry would look like this:
+#
+#index hg18 /depot/data2/galaxy/sam/hg18.fa
+#
+#and your /depot/data2/galaxy/sam/ directory
+#would contain hg18.fa and hg18.fa.fai files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.fa
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.fa.fai
+#
+#Your sam_fa_indices.loc file should include an entry per line for 
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#index hg18 /depot/data2/galaxy/sam/hg18.fa
+#index hg19 /depot/data2/galaxy/sam/hg19.fa
b
diff -r 000000000000 -r 30fdbaccb96b tool-data/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/tool_data_table_conf.xml.sample Mon Aug 26 14:22:00 2013 -0400
b
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Location of SAMTools indexes and other files -->
+    <table name="sam_fa_indexes" comment_char="#">
+        <columns>line_type, value, path</columns>
+        <file path="tool-data/sam_fa_indices.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 30fdbaccb96b tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Aug 26 14:22:00 2013 -0400
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="samtools" version="0.1.18">
+        <repository changeset_revision="a7936f4ea405" name="package_samtools_0_1_18" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>