changeset 1:e7d863c5c5d6

Update sam_pileup to use the fasta_indexes data table.
author Dave Bouvier <dave@bx.psu.edu>
date Wed, 11 Dec 2013 12:54:32 -0500
parents 95612c159681
children 3ff8935743a9
files sam_pileup.py sam_pileup.xml tool-data/fasta_indexes.loc.sample tool-data/sam_fa_indices.loc.sample tool-data/tool_data_table_conf.xml.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 51 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/sam_pileup.py	Mon Aug 26 14:21:12 2013 -0400
+++ b/sam_pileup.py	Wed Dec 11 12:54:32 2013 -0500
@@ -8,9 +8,8 @@
    -o, --output1=o: Output pileup
    -R, --ref=R: Reference file type
    -n, --ownFile=n: User-supplied fasta reference file
-   -d, --dbkey=d: dbkey of user-supplied file
-   -x, --indexDir=x: Index directory
    -b, --bamIndex=b: BAM index file
+   -g, --index=g: Path of the indexed reference genome
    -s, --lastCol=s: Print the mapping quality as the last column
    -i, --indels=i: Only output lines containing indels
    -M, --mapCap=M: Cap mapping quality
@@ -31,24 +30,9 @@
     sys.stderr.write( '%s\n' % msg )
     sys.exit()
 
-def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
-    seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR
-    seqPath = ''
-    for line in open( seqFile ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and line.startswith( 'index' ):
-            fields = line.split( '\t' )
-            if len( fields ) < 3:
-                continue
-            if fields[1] == dbkey:
-                seqPath = fields[2].strip()
-                break
-    return seqPath
-
 def __main__():
     #Parse Command Line
     options, args = doc_optparse.parse( __doc__ )
-    seqPath = check_seq_file( options.dbkey, options.indexDir )
     # output version # of tool
     try:
         tmp = tempfile.NamedTemporaryFile().name
@@ -77,7 +61,6 @@
     tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir )
     tmpf1_name = tmpf1.name
     tmpf1.close()
-    tmpf1fai_name = '%s.fai' % tmpf1_name
     #link bam and bam index to working directory (can't move because need to leave original)
     os.symlink( options.input1, tmpf0bam_name )
     os.symlink( options.bamIndex, tmpf0bambai_name )
@@ -100,9 +83,9 @@
         try:
             #index reference if necessary and prepare pileup command
             if options.ref == 'indexed':
-                if not os.path.exists( "%s.fai" % seqPath ):
-                    raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey
-                cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 )
+                if not os.path.exists( "%s.fai" % options.index ):
+                    raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index
+                cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 )
             elif options.ref == 'history':
                 os.symlink( options.ownFile, tmpf1_name )
                 cmdIndex = 'samtools faidx %s' % ( tmpf1_name )
--- a/sam_pileup.xml	Mon Aug 26 14:21:12 2013 -0400
+++ b/sam_pileup.xml	Wed Dec 11 12:54:32 2013 -0500
@@ -1,4 +1,4 @@
-<tool id="sam_pileup" name="Generate pileup" version="1.1.1">
+<tool id="sam_pileup" name="Generate pileup" version="1.1.2">
   <description>from BAM dataset</description>
   <requirements>
     <requirement type="package" version="0.1.16">samtools</requirement>
@@ -11,7 +11,7 @@
       #if $refOrHistory.reference == "history":
         --ownFile=$refOrHistory.ownFile
       #else:
-        --ownFile="None"
+        --index=${refOrHistory.index.fields.path}
       #end if
        --dbkey=${input1.metadata.dbkey}
        --indexDir=${GALAXY_DATA_INDEX_DIR}
@@ -41,7 +41,14 @@
       <when value="indexed">
         <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for">
            <validator type="unspecified_build" />
-           <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" />
+           <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." />
+
+        </param>
+        <param name="index" type="select" label="Using reference genome">
+          <options from_data_table="fasta_indexes">
+            <filter type="data_meta" ref="input1" key="dbkey" column="1" />
+            <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
+          </options>
         </param>
       </when>
       <when value="history">
@@ -100,6 +107,7 @@
       -->
       <param name="reference" value="indexed" />
       <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" />
+      <param name="index" value="chr_m" />
       <param name="lastCol" value="no" />
       <param name="indels" value="no" />
       <param name="mapCap" value="60" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Wed Dec 11 12:54:32 2013 -0500
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a sam_fa_new_indices.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The sam_fa_new_indices.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the sam_fa_new_indices.loc entry would look like this:
+#
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your sam_fa_new_indices.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon	hg18	Human (Homo sapiens): hg18 Canonical	/depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full	hg18	Human (Homo sapiens): hg18 Full	/depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/depot/data2/galaxy/hg19/sam/hg19full.fa
--- a/tool-data/sam_fa_indices.loc.sample	Mon Aug 26 14:21:12 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of Samtools indexed sequences data files.  You will need
-#to create these data files and then create a sam_fa_indices.loc file 
-#similar to this one (store it in this directory) that points to 
-#the directories in which those files are stored. The sam_fa_indices.loc 
-#file has this format (white space characters are TAB characters):
-#
-#index	<seq>	<location>
-#
-#So, for example, if you had hg18 indexed stored in 
-#/depot/data2/galaxy/sam/, 
-#then the sam_fa_indices.loc entry would look like this:
-#
-#index	hg18	/depot/data2/galaxy/sam/hg18.fa
-#
-#and your /depot/data2/galaxy/sam/ directory
-#would contain hg18.fa and hg18.fa.fai files:
-#
-#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.fa
-#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.fa.fai
-#
-#Your sam_fa_indices.loc file should include an entry per line for 
-#each index set you have stored.  The file in the path does actually
-#exist, but it should never be directly used. Instead, the name serves
-#as a prefix for the index file.  For example:
-#
-#index	hg18	/depot/data2/galaxy/sam/hg18.fa
-#index	hg19	/depot/data2/galaxy/sam/hg19.fa
--- a/tool-data/tool_data_table_conf.xml.sample	Mon Aug 26 14:21:12 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
-<tables>
-    <!-- Location of SAMTools indexes and other files -->
-    <table name="sam_fa_indexes" comment_char="#">
-        <columns>line_type, value, path</columns>
-        <file path="tool-data/sam_fa_indices.loc" />
-    </table>
-</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Dec 11 12:54:32 2013 -0500
@@ -0,0 +1,7 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>