Next changeset 1:93f2e3337a33 (2013-12-11) |
Commit message:
Uploaded tool tarball. |
added:
sam_to_bam.py sam_to_bam.xml test-data/chr_m.fasta test-data/sam_to_bam_in1.sam test-data/sam_to_bam_out1.bam test-data/sam_to_bam_out2.bam tool-data/sam_fa_indices.loc.sample tool-data/tool_data_table_conf.xml.sample tool_dependencies.xml |
b |
diff -r 000000000000 -r 30fdbaccb96b sam_to_bam.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sam_to_bam.py Mon Aug 26 14:22:00 2013 -0400 |
[ |
b'@@ -0,0 +1,197 @@\n+#!/usr/bin/env python\n+"""\n+Converts SAM data to sorted BAM data.\n+usage: sam_to_bam.py [options]\n+ --input1: SAM file to be converted\n+ --dbkey: dbkey value\n+ --ref_file: Reference file if choosing from history\n+ --output1: output dataset in bam format\n+ --index_dir: GALAXY_DATA_INDEX_DIR\n+"""\n+\n+import optparse, os, sys, subprocess, tempfile, shutil, gzip\n+from galaxy import eggs\n+import pkg_resources; pkg_resources.require( "bx-python" )\n+from bx.cookbook import doc_optparse\n+from galaxy import util\n+\n+def stop_err( msg ):\n+ sys.stderr.write( \'%s\\n\' % msg )\n+ sys.exit()\n+\n+def check_seq_file( dbkey, cached_seqs_pointer_file ):\n+ seq_path = \'\'\n+ for line in open( cached_seqs_pointer_file ):\n+ line = line.rstrip( \'\\r\\n\' )\n+ if line and not line.startswith( \'#\' ) and line.startswith( \'index\' ):\n+ fields = line.split( \'\\t\' )\n+ if len( fields ) < 3:\n+ continue\n+ if fields[1] == dbkey:\n+ seq_path = fields[2].strip()\n+ break\n+ return seq_path\n+\n+def __main__():\n+ #Parse Command Line\n+ parser = optparse.OptionParser()\n+ parser.add_option( \'\', \'--input1\', dest=\'input1\', help=\'The input SAM dataset\' )\n+ parser.add_option( \'\', \'--dbkey\', dest=\'dbkey\', help=\'The build of the reference dataset\' )\n+ parser.add_option( \'\', \'--ref_file\', dest=\'ref_file\', help=\'The reference dataset from the history\' )\n+ parser.add_option( \'\', \'--output1\', dest=\'output1\', help=\'The output BAM dataset\' )\n+ parser.add_option( \'\', \'--index_dir\', dest=\'index_dir\', help=\'GALAXY_DATA_INDEX_DIR\' )\n+ ( options, args ) = parser.parse_args()\n+\n+ # output version # of tool\n+ try:\n+ tmp = tempfile.NamedTemporaryFile().name\n+ tmp_stdout = open( tmp, \'wb\' )\n+ proc = subprocess.Popen( args=\'samtools 2>&1\', shell=True, stdout=tmp_stdout )\n+ tmp_stdout.close()\n+ returncode = proc.wait()\n+ stdout = None\n+ for line in open( tmp_stdout.name, \'rb\' ):\n+ if line.lower().find( \'version\' ) >= 0:\n+ stdout = line.strip()\n+ break\n+ if stdout:\n+ sys.stdout.write( \'Samtools %s\\n\' % stdout )\n+ else:\n+ raise Exception\n+ except:\n+ sys.stdout.write( \'Could not determine Samtools version\\n\' )\n+\n+ cached_seqs_pointer_file = \'%s/sam_fa_indices.loc\' % options.index_dir\n+ if not os.path.exists( cached_seqs_pointer_file ):\n+ stop_err( \'The required file (%s) does not exist.\' % cached_seqs_pointer_file )\n+ # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,\n+ # and the equCab2.fa file will contain fasta sequences.\n+ seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )\n+ tmp_dir = tempfile.mkdtemp( dir=\'.\' )\n+ if not options.ref_file or options.ref_file == \'None\':\n+ # We\'re using locally cached reference sequences( e.g., /galaxy/data/equCab2/sam_index/equCab2.fa ).\n+ # The indexes for /galaxy/data/equCab2/sam_index/equCab2.fa will be contained in\n+ # a file named /galaxy/data/equCab2/sam_index/equCab2.fa.fai\n+ fai_index_file_base = seq_path\n+ fai_index_file_path = \'%s.fai\' % seq_path \n+ if not os.path.exists( fai_index_file_path ):\n+ #clean up temp files\n+ if os.path.exists( tmp_dir ):\n+ shutil.rmtree( tmp_dir )\n+ stop_err( \'No sequences are available for build (%s), request them by reporting this error.\' % options.dbkey )\n+ else:\n+ try:\n+ # Create indexes for history reference ( e.g., ~/database/files/000/dataset_1.dat ) using samtools faidx, which will:\n+ # - index reference sequence in the FASTA format or extract subsequence from indexed reference sequence\n+ # - if no region is specified, faidx will index the file and create <ref.fasta>.fai on the disk\n+ # - if re'..b'.close()\n+ if returncode != 0:\n+ raise Exception, stderr \n+ if os.path.getsize( fai_index_file_path ) == 0:\n+ raise Exception, \'Index file empty, there may be an error with your reference file or settings.\'\n+ except Exception, e:\n+ #clean up temp files\n+ if os.path.exists( tmp_dir ):\n+ shutil.rmtree( tmp_dir )\n+ stop_err( \'Error creating indexes from reference (%s), %s\' % ( options.ref_file, str( e ) ) )\n+ try:\n+ # Extract all alignments from the input SAM file to BAM format ( since no region is specified, all the alignments will be extracted ).\n+ tmp_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )\n+ tmp_aligns_file_name = tmp_aligns_file.name\n+ tmp_aligns_file.close()\n+ command = \'samtools view -bt %s -o %s %s\' % ( fai_index_file_path, tmp_aligns_file_name, options.input1 )\n+ tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name\n+ tmp_stderr = open( tmp, \'wb\' )\n+ proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )\n+ returncode = proc.wait()\n+ tmp_stderr.close()\n+ # get stderr, allowing for case where it\'s very large\n+ tmp_stderr = open( tmp, \'rb\' )\n+ stderr = \'\'\n+ buffsize = 1048576\n+ try:\n+ while True:\n+ stderr += tmp_stderr.read( buffsize )\n+ if not stderr or len( stderr ) % buffsize != 0:\n+ break\n+ except OverflowError:\n+ pass\n+ tmp_stderr.close()\n+ if returncode != 0:\n+ raise Exception, stderr\n+ except Exception, e:\n+ #clean up temp files\n+ if os.path.exists( tmp_dir ):\n+ shutil.rmtree( tmp_dir )\n+ stop_err( \'Error extracting alignments from (%s), %s\' % ( options.input1, str( e ) ) )\n+ try:\n+ # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command\n+ # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted\n+ # into memory ( controlled by option -m ).\n+ tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )\n+ tmp_sorted_aligns_file_name = tmp_sorted_aligns_file.name\n+ tmp_sorted_aligns_file.close()\n+ command = \'samtools sort %s %s\' % ( tmp_aligns_file_name, tmp_sorted_aligns_file_name )\n+ tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name\n+ tmp_stderr = open( tmp, \'wb\' )\n+ proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )\n+ returncode = proc.wait()\n+ tmp_stderr.close()\n+ # get stderr, allowing for case where it\'s very large\n+ tmp_stderr = open( tmp, \'rb\' )\n+ stderr = \'\'\n+ buffsize = 1048576\n+ try:\n+ while True:\n+ stderr += tmp_stderr.read( buffsize )\n+ if not stderr or len( stderr ) % buffsize != 0:\n+ break\n+ except OverflowError:\n+ pass\n+ tmp_stderr.close()\n+ if returncode != 0:\n+ raise Exception, stderr\n+ except Exception, e:\n+ #clean up temp files\n+ if os.path.exists( tmp_dir ):\n+ shutil.rmtree( tmp_dir )\n+ stop_err( \'Error sorting alignments from (%s), %s\' % ( tmp_aligns_file_name, str( e ) ) )\n+ # Move tmp_aligns_file_name to our output dataset location\n+ sorted_bam_file = \'%s.bam\' % tmp_sorted_aligns_file_name\n+ shutil.move( sorted_bam_file, options.output1 )\n+ #clean up temp files\n+ if os.path.exists( tmp_dir ):\n+ shutil.rmtree( tmp_dir )\n+ # check that there are results in the output file\n+ if os.path.getsize( options.output1 ) > 0:\n+ sys.stdout.write( \'SAM file converted to BAM\' )\n+ else:\n+ stop_err( \'Error creating sorted version of BAM file.\' )\n+\n+if __name__=="__main__": __main__()\n' |
b |
diff -r 000000000000 -r 30fdbaccb96b sam_to_bam.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sam_to_bam.xml Mon Aug 26 14:22:00 2013 -0400 |
b |
@@ -0,0 +1,97 @@ +<tool id="sam_to_bam" name="SAM-to-BAM" version="1.1.2"> + <description>converts SAM format to BAM format</description> + <requirements> + <requirement type="package" version="0.1.18">samtools</requirement> + </requirements> + <command interpreter="python"> + sam_to_bam.py + --input1=$source.input1 + #if $source.index_source == "history": + --dbkey=${ref_file.metadata.dbkey} + --ref_file=$source.ref_file + #else + --dbkey=${input1.metadata.dbkey} + #end if + --output1=$output1 + --index_dir=${GALAXY_DATA_INDEX_DIR} + </command> + <inputs> + <conditional name="source"> + <param name="index_source" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="input1" type="data" format="sam" metadata_name="dbkey" label="SAM File to Convert"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> + </param> + </when> + <when value="history"> + <param name="input1" type="data" format="sam" label="Convert SAM file" /> + <param name="ref_file" type="data" format="fasta" metadata_name="dbkey" label="Using reference file" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="bam" name="output1" label="${tool.name} on ${on_string}: converted BAM"> + <actions> + <conditional name="source.index_source"> + <when value="cached"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="source.input1" param_attribute="dbkey" /> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="source.ref_file" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </data> + </outputs> + <tests> + <test> + <!-- + Sam-to-Bam command: + cp test-data/chr_m.fasta . + samtools faidx chr_m.fasta + samtools view -hbt chr_m.fasta.fai -o unsorted.bam test-data/sam_to_bam_in1.sam + samtools sort unsorted.bam sam_to_bam_out1 + chr_m.fasta is the reference file (chrM from equCab2) + --> + <param name="index_source" value="history" /> + <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" /> + <param name="ref_file" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" /> + <output name="output1" file="sam_to_bam_out1.bam" ftype="bam" /> + </test> + <test> + <!-- + Sam-to-Bam command: + samtools view -hbt chr_m.fasta.fai -o unsorted.bam test-data/sam_to_bam_in1.sam + samtools sort unsorted.bam sam_to_bam_out2 + chr_m.fasta is the reference file and the index chr_m.fasta.fai + these should be in the same directory, and chrM is from equCab2 + --> + <param name="index_source" value="cached" /> + <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" dbkey="chrM" /> + <output name="output1" file="sam_to_bam_out2.bam" ftype="bam" /> + </test> + </tests> + <help> + +**What it does** + +This tool uses the SAMTools_ toolkit to produce an indexed BAM file based on a sorted input SAM file. + +.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml + +------ + +**Citation** + +For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ + + </help> +</tool> |
b |
diff -r 000000000000 -r 30fdbaccb96b test-data/chr_m.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/chr_m.fasta Mon Aug 26 14:22:00 2013 -0400 |
b |
b'@@ -0,0 +1,335 @@\n+>chrM\n+GTTAATGTAGCTTAATAATATAAAGCAAGGCACTGAAAATGCCTAGATGA\n+GTATTCTTACTCCATAAACACATAGGCTTGGTCCTAGCCTTTTTATTAGT\n+TATTAATAGAATTACACATGCAAGTATCCGCACCCCAGTGAGAATGCCCT\n+CTAAATCACGTCTCTACGATTAAAAGGAGCAGGTATCAAGCACACTAGAA\n+AGTAGCTCATAACACCTTGCTCAGCCACACCCCCACGGGACACAGCAGTG\n+ATAAAAATTAAGCTATGAACGAAAGTTCGACTAAGTCATATTAAATAAGG\n+GTTGGTAAATTTCGTGCCAGCCACCGCGGTCATACGATTAACCCAAATTA\n+ATAAATCTCCGGCGTAAAGCGTGTCAAAGACTAATACCAAAATAAAGTTA\n+AAACCCAGTTAAGCCGTAAAAAGCTACAACCAAAGTAAAATAGACTACGA\n+AAGTGACTTTAATACCTCTGACTACACGATAGCTAAGACCCAAACTGGGA\n+TTAGATACCCCACTATGCTTAGCCCTAAACTAAAATAGCTTACCACAACA\n+AAGCTATTCGCCAGAGTACTACTAGCAACAGCCTAAAACTCAAAGGACTT\n+GGCGGTGCTTTACATCCCTCTAGAGGAGCCTGTTCCATAATCGATAAACC\n+CCGATAAACCCCACCATCCCTTGCTAATTCAGCCTATATACCGCCATCTT\n+CAGCAAACCCTAAACAAGGTACCGAAGTAAGCACAAATATCCAACATAAA\n+AACGTTAGGTCAAGGTGTAGCCCATGGGATGGAGAGAAATGGGCTACATT\n+TTCTACCCTAAGAACAAGAACTTTAACCCGGACGAAAGTCTCCATGAAAC\n+TGGAGACTAAAGGAGGATTTAGCAGTAAATTAAGAATAGAGAGCTTAATT\n+GAATCAGGCCATGAAGCGCGCACACACCGCCCGTCACCCTCCTTAAATAT\n+CACAAATCATAACATAACATAAAACCGTGACCCAAACATATGAAAGGAGA\n+CAAGTCGTAACAAGGTAAGTATACCGGAAGGTGTACTTGGATAACCAAAG\n+TGTAGCTTAAACAAAGCATCCAGCTTACACCTAGAAGATTTCACTCAAAA\n+TGAACACTTTGAACTAAAGCTAGCCCAAACAATACCTAATTCAATTACCC\n+TTAGTCACTTAACTAAAACATTCACCAAACCATTAAAGTATAGGAGATAG\n+AAATTTTAACTTGGCGCTATAGAGAAAGTACCGTAAGGGAACGATGAAAG\n+ATGCATTAAAAGTACTAAACAGCAAAGCTTACCCCTTTTACCTTTTGCAT\n+AATGATTTAACTAGAATAAACTTAGCAAAGAGAACTTAAGCTAAGCACCC\n+CGAAACCAGACGAGCTACCTATGAACAGTTACAAATGAACCAACTCATCT\n+ATGTCGCAAAATAGTGAGAAGATTCGTAGGTAGAGGTGAAAAGCCCAACG\n+AGCCTGGTGATAGCTGGTTGTCCAGAAACAGAATTTCAGTTCAAATTTAA\n+ATTTACCTAAAAACTACTCAATTCTAATGTAAATTTAAATTATAGTCTAA\n+AAAGGTACAGCTTTTTAGATACAGGTTACAACCTTCATTAGAGAGTAAGA\n+ACAAGATAAACCCATAGTTGGCTTAAAAGCAGCCATCAATTAAGAAAGCG\n+TTCAAGCTCAACGACACATCTATCTTAATCCCAACAATCAACCCAAACTA\n+ACTCCTAATCTCATACTGGACTATTCTATCAACACATAGAAGCAATAATG\n+TTAATATGAGTAACAAGAATTATTTCTCCTTGCATAAGCTTATATCAGAA\n+CGAATACTCACTGATAGTTAACAACAAGATAGGGATAATCCAAAAACTAA\n+TCATCTATTTAAACCATTGTTAACCCAACACAGGCATGCATCTATAAGGA\n+AAGATTAAAAGAAGTAAAAGGAACTCGGCAAACACAAACCCCGCCTGTTT\n+ACCAAAAACATCACCTCTAGCATTTCCAGTATTAGAGGCACTGCCTGCCC\n+AGTGACATCTGTTtaaacggccgcggtatcctaaccgtgcaaaggtagca\n+taatcacttgttccctaaatagggacttgtatgaatggccacacgagggt\n+tttactgtctcttacttccaatcagtgaaattgaccttcccgtgaagagg\n+cgggaatgactaaataagacgagaagaccctatggagcttTAATTAACTG\n+ATTCACAAAAAACAACACACAAACCTTAACCTTCAGGGACAACAAAACTT\n+TTGATTGAATCAGCAATTTCGGTTGGGGTGACCTCGGAGAACAAAACAAC\n+CTCCGAGTGATTTAAATCCAGACTAACCAGTCAAAATATATAATCACTTA\n+TTGATCCAAACCATTGATCAACGGAACAAGTTACCCTAGGGATAACAGCG\n+CAATCCTATTCCAGAGTCCATATCGACAATTAGGGTTTACGACCTCGATG\n+TTGGATCAAGACATCCTAATGGTGCAACCGCTATTAAGGGTTCGTTTGTT\n+CAACGATTAAAGTCTTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGT\n+CGGTTTCTATCTATTCTATACTTTTCCCAGTACGAAAGGACAAGAAAAGT\n+AGGGCCCACTTTACAAGAAGCGCCCTCAAACTAATAGATGACATAATCTA\n+AATCTAACTAATTTATAACTTCTACCGCCCTAGAACAGGGCTCgttaggg\n+tggcagagcccggaaattgcataaaacttaaacctttacactcagaggtt\n+caactcctctccctaacaacaTGTTCATAATTAACGTCCTCCTCCTAATT\n+GTCCCAATCTTGCTCGCCGTAGCATTCCTCACACTAGTTGAACGAAAAGT\n+CTTAGGCTATATGCAACTTCGCAAAGGACCCAACATCGTAGGCCCCTATG\n+GCCTACTACAACCTATTGCCGATGCCCTCAAACTATTTATCAAAGAGCCA\n+CTACAACCACTAACATCATCGACATCCATATTCATCATCGCACCAATCCT\n+AGCCCTAACCCTGGCCTTAACCATATGAATCCCTCTGCCCATACCATACC\n+CACTAATCAACATAAACCTAGGAATTCTATTCATACTAGCCATGTCCAGC\n+CTAGCTGTCTACTCAATCCTTTGATCAGGATGGGCCTCAAACTCAAAATA\n+CGCCCTAATTGGAGCTCTACGAGCAGTAGCACAAACCATCTCATACGAAG\n+TAACTCTAGCAATCATCCTACTCTCAGTCCTCCTAATAAGCGGATCATTC\n+ACATTATCAACACTTATTATTACCCAAGAATACCTCTGATTAATCTTCCC\n+ATCATGACCCTTAGCCATAATGTGATTCATCTCAACATTAGCCGAAACCA\n+ACCGAGCTCCATTTGACCTAACAGAAGGAGAATCAGAACTCGTCTCTGGA\n+TTCAACGTTGAATACGCAGCCGGCCCATTTGCTCTATTCTTCCTAGCAGA\n+ATACGCAAACATCATCATGATAAACATCTTCACAACAACCCTATTTCTAG\n+GAGCATTTCACAACCCCTACCTGCCAGAACTCTACTCAATTAATTTCACC\n+ATTAAAGCTCTCCTTCTAACATGTTCCTTCCTATGAATCCGAGCATCCTA\n+CCCACGATTCCGATATGACCAACTTATACACCTCCTATGAAAGAACTTCC\n+TACCACTCACACTAGCCCTCTGCATATGACACGTCTCACTTCCAATCATA\n+CTATCCAGCATCCCACCACAAACATAGGAAATATGTCTGACAAAAGAGTT\n+ACTTTGATAGAGTAAAACATAGAGGCTCAAACCCTCTTATTTctagaact\n+acaggaattgaacctgctcct'..b'CATTATCCACAGCCTAAATGACGAGCAAGATATC\n+CGAAAAATAGGCGGACTATTTAATGCAATACCCTTCACCACCACATCTCT\n+AATTATTGGCAGCCTTGCACTCACCGGAATTCCTTTCCTCACAGGCTTCT\n+ACTCCAAAGACCTCATCATCGAAACCGCCAACACATCGTACACCAACGCC\n+TGAGCCCTACTAATAACTCTCATTGCCACATCCCTCACAGCTGTCTACAG\n+TACCCGAATCATCTTCTTTGCACTCCTAGGGCAACCCCGCTTCCTCCCTC\n+TGACCTCAATCAACGAAAATAACCCCTTTCTAATTAACTCCATCAAACGC\n+CTCTTAATTGGCAGCATTTTTGCCGGATTCTTCATCTCCAACAATATCTA\n+CCCCACAACCGTCCCAGAAATAACCATACCTACTTACATAAAACTCACCG\n+CCCTCGCAGTAACCATCCTAGGATTTACACTAGCCCTAGAACTAAGCTTG\n+ATAACCCATAACTTAAAACTAGAACACTCCACCAACGTATTCAAATTCTC\n+CAACCTCCTAGGATACTACCCAACAATTATACACCGACTCCCACCGCTCG\n+CTAACCTATCAATAAGCCAAAAATCAGCATCACTTCTACTAGACTCAATC\n+TGACTAGAAAACATCCTGCCAAAATCTATCTCCCAGTTCCAAATAAAAAC\n+CTCGATCCTAATTTCCACCCAAAAAGGACAAATCAAATTATATTTCCTCT\n+CATTCCTCATCACCCTTACCCTAAGCATACTACTTTTTAATCTCCACGAG\n+TAACCTCTAAAATTACCAAGACCCCAACAAGCAACGATCAACCAGTCACA\n+ATCACAACCCAAGCCCCATAACTATACAATGCAGCAGCCCCTATAATTTC\n+CTCACTAAACGCCCCAGAATCTCCAGTATCATAAATAGCTCAAGCCCCCA\n+CACCACTAAACTTAAACACTACCCCCACTTCCTCACTCTTCAGAACATAT\n+AAAACCAACATAACCTCCATCAACAACCCTAAAAGAAATACCCCCATAAC\n+AGTCGTATTAGACACCCATACCTCAGGATACTGCTCAGTAGCCATAGCCG\n+TTGTATAACCAAAAACAACCAACATTCCTCCCAAATAAATCAAAAACACC\n+ATCAACCCCAAAAAGGACCCTCCAAAATTCATAATAATACCACAACCTAC\n+CCCTCCACTTACAATCAGCACTAAACCCCCATAAATAGGTGAAGGTTTTG\n+AAGAAAACCCCACAAAACTAACAACAAAAATAACACTCAAAATAAACACA\n+ATATATGTCATCATTATTCCCACGTGGAATCTAACCACGACCAATGACAT\n+GAAAAATCATCGTTGTATTTCAACTATAAGAACACCAATGACAAACATCC\n+GGAAATCTCACCCACTAATTAAAATCATCAATCACTCTTTTATTGACCTA\n+CCAGCCCCCTCAAACATTTCATCATGATGAAACTTCGGCTCCCTCCTAGG\n+AATCTGCCTAATCCTCCAAATCTTAACAGGCCTATTCCTAGCCATACACT\n+ACACATCAGACACGACAACTGCCTTCTCATCCGTCACTCACATCTGCCGA\n+GACGTTAACTACGGATGAATTATTCGCTACCTCCATGCCAACGGAGCATC\n+AATATTTTTTATCTGCCTCTTCATTCACGTAGGACGCGGCCTCTACTACG\n+GCTCTTACACATTCCTAGAGACATGAAACATTGGAATCATCCTACTTTTC\n+ACAGTTATAGCTACAGCATTCATGGGCTATGTCCTACCATGAGGCCAAAT\n+ATCCTTTTGAGGAGCAACAGTCATCACGAACCTCCTATCAGCAATTCCCT\n+ACATCGGTACTACCCTCGTCGAGTGAATCTGAGGTGGATTCTCAGTAGAC\n+AAAGCCACCCTTACCCGATTTTTTGCTTTCCACTTCATCCTACCCTTCAT\n+CATCACAGCCCTGGTAGTCGTACATTTACTATTTCTTCACGAAACAGGAT\n+CTAATAACCCCTCAGGAATCCCATCCGATATGGACAAAATCCCATTCCAC\n+CCATATTATACAATTAAAGACATCCTAGGACTCCTCCTCCTGATCTTGCT\n+CCTACTAACTCTAGTATTATTCTCCCCCGACCTCCTAGGAGACCCAGACA\n+ACTACACCCCAGCTAACCCTCTCAGCACTCCCCCTCATATTAAACCAGAA\n+TGGTACTTCCTGTTTGCCTACGCCATCCTACGCTCCATTCCCAACAAACT\n+AGGCGGCGTATTAGCCCTAATCCTCTCCATCCTGATCCTAGCACTCATCC\n+CCACCCTCCACATATCAAAACAACGAAGCATAATATTCCGGCCTCTCAGC\n+CAATGCGTATTCTGACTCTTAGTGGCAGACTTACTGACACTAACATGAAT\n+CGGCGGACAGCCAGTGGAACACCCATACGTAATTATCGGCCAACTGGCCT\n+CAATCCTCTACTTCTCCCTAATTCTCATTTTTATACCACTCGCAAGCACC\n+ATCGAAAACAATCTTCTAAAATGAAGAGTCCCTGTAGTATATCGCACATT\n+ACCCTGGTCTTGTAAACCAGAAAAGGGGGAAAACGTTTCCTCCCAAGGAC\n+TATCAAGGAAGAAGCTCTAGCTCCACCATCAACACCCAAAGCTGAAATTC\n+TACTTAAACTATTCCTTGATTTCTTCCCCTAAACGACAACAATTTACCCT\n+CATGTGCTATGTCAGTATCAGATTATACCCCCACATAACACCATACCCAC\n+CTGACATGCAATATCTTATGAATGGCCTATGTACGTCGTGCATTAAATTG\n+TCTGCCCCATGAATAATAAGCATGTACATAATATCATTTATCTTACATAA\n+GTACATTATATTATTGATCGTGCATACCCCATCCAAGTCAAATCATTTCC\n+AGTCAACACGCATATCACAGCCCATGTTCCACGAGCTTAATCACCAAGCC\n+GCGGGAAATCAGCAACCCTCCCAACTACGTGTCCCAATCCTCGCTCCGGG\n+CCCATCCAAACGTGGGGGTTTCTACAATGAAACTATACCTGGCATCTGGT\n+TCTTTCTTCAGGGCCATTCCCACCCAACCTCGCCCATTCTTTCCCCTTAA\n+ATAAGACATCTCGATGGACTAATGACTAATCAGCCCATGCTCACACATAA\n+CTGTGATTTCATGCATTTGGTATCTTTTTATATTTGGGGATGCTATGACT\n+CAGCTATGGCCGTCAAAGGCCTCGACGCAGTCAATTAAATTGAAGCTGGA\n+CTTAAATTGAACGTTATTCCTCCGCATCAGCAACCATAAGGTGTTATTCA\n+GTCCATGGTAGCGGGACATAGGAAACAAgtgcacctgtgcacctgtgcac\n+ctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacct\n+gtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgt\n+gcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgc\n+acctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcac\n+ctgtgcacctACCCGCGCAGTAAGCAAGTAATATAGCTTTCTTAATCAAA\n+CCCCCCCTACCCCCCATTAAACTCCACATATGTACATTCAACACAATCTT\n+GCCAAACCCCAAAAACAAGACTAAACAATGCACAATACTTCATGAAGCTT\n+AACCCTCGCATGCCAACCATAATAACTCAACACACCTAACAATCTTAACA\n+GAACTTTCCCCCCGCCATTAATACCAACATGCTACTTTAATCAATAAAAT\n+TTCCATAGACAGGCATCCCCCTAGATCTAATTTTCTAAATCTGTCAACCC\n+TTCTTCCCCC\n' |
b |
diff -r 000000000000 -r 30fdbaccb96b test-data/sam_to_bam_in1.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sam_to_bam_in1.sam Mon Aug 26 14:22:00 2013 -0400 |
b |
@@ -0,0 +1,13 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chrM LN:100001 +@RG ID:rg1 SM:s1 +HWI-EAS91_1_30788AAXX:1:1:1513:715 16 chrM 9563 25 36M * 0 0 CTGACTACCACAACTAAACATCTATGCNNAAAAAAC I+-II?IDIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1698:516 16 chrM 2735 25 36M * 0 0 TTTACACTCAGAGGTTCAACTCCTCTCNNTAACAAC I9IIIII5IIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1491:637 16 chrM 10864 25 36M * 0 0 TGTAGAAGCCCCAATTGCCGGATCCATNNTGCTAGC DBAIIIIIIIIIIIFIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1711:249 16 chrM 10617 25 36M * 0 0 ACCAAACAGAACGCCTGAACGCAGGCCNNTACTTCC IIIIIIIIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1634:211 0 chrM 9350 25 36M * 0 0 GAAGCAGNNGCTTGATACTGACACTTCGTCGACGTA IIIIIII""IIIIIIIIIIIIIIIIIIIIII9IIDF NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1218:141 16 chrM 14062 25 36M * 0 0 ACAAAACTAACAACAAAAATAACACTCNNAATAAAC I+IIII1IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1398:854 16 chrM 3921 25 36M * 0 0 CACCCTTCCCGTACTAATAAATCCCCTNNTCTTCAC IIIII=AIIIIIIIIIIIIIIBIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1310:991 16 chrM 10002 25 36M * 0 0 CTCCTATGCCTAGAAGGAATAATACTANNACTATTC I:2IEI:IIDIIIIII4IIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1716:413 0 chrM 6040 25 36M * 0 0 GATCCAANNCTTTATCAACACCTATTCTGATTCTTC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 +HWI-EAS91_1_30788AAXX:1:1:1630:59 16 chrM 12387 25 36M * 0 0 TCATACTCGACCCCAACCTTACCAACCNNCCGCTCC FIIHII;IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 RG:Z:rg1 |
b |
diff -r 000000000000 -r 30fdbaccb96b test-data/sam_to_bam_out1.bam |
b |
Binary file test-data/sam_to_bam_out1.bam has changed |
b |
diff -r 000000000000 -r 30fdbaccb96b test-data/sam_to_bam_out2.bam |
b |
Binary file test-data/sam_to_bam_out2.bam has changed |
b |
diff -r 000000000000 -r 30fdbaccb96b tool-data/sam_fa_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/sam_fa_indices.loc.sample Mon Aug 26 14:22:00 2013 -0400 |
b |
@@ -0,0 +1,28 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_indices.loc +#file has this format (white space characters are TAB characters): +# +#index <seq> <location> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/sam/, +#then the sam_fa_indices.loc entry would look like this: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +# +#and your /depot/data2/galaxy/sam/ directory +#would contain hg18.fa and hg18.fa.fai files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai +# +#Your sam_fa_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +#index hg19 /depot/data2/galaxy/sam/hg19.fa |
b |
diff -r 000000000000 -r 30fdbaccb96b tool-data/tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/tool_data_table_conf.xml.sample Mon Aug 26 14:22:00 2013 -0400 |
b |
@@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Location of SAMTools indexes and other files --> + <table name="sam_fa_indexes" comment_char="#"> + <columns>line_type, value, path</columns> + <file path="tool-data/sam_fa_indices.loc" /> + </table> +</tables> \ No newline at end of file |
b |
diff -r 000000000000 -r 30fdbaccb96b tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Aug 26 14:22:00 2013 -0400 |
b |
@@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="samtools" version="0.1.18"> + <repository changeset_revision="a7936f4ea405" name="package_samtools_0_1_18" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |