changeset 2:8b22e9adae34

Update to the new data table specification.
author Dave Bouvier <dave@bx.psu.edu>
date Wed, 04 Dec 2013 13:24:29 -0500
parents 9a3f7a890da6
children cf928aeaaff7
files cuffcompare_wrapper.py cuffcompare_wrapper.xml tool-data/fasta_indexes.loc.sample tool-data/sam_fa_indices.loc.sample tool_data_table_conf.xml.sample
diffstat 5 files changed, 49 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/cuffcompare_wrapper.py	Tue Oct 01 15:12:23 2013 -0400
+++ b/cuffcompare_wrapper.py	Wed Dec 04 13:24:29 2013 -0500
@@ -8,20 +8,6 @@
     sys.stderr.write( '%s\n' % msg )
     sys.exit()
 
-# Copied from sam_to_bam.py:
-def check_seq_file( dbkey, cached_seqs_pointer_file ):
-    seq_path = ''
-    for line in open( cached_seqs_pointer_file ):
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ) and line.startswith( 'index' ):
-            fields = line.split( '\t' )
-            if len( fields ) < 3:
-                continue
-            if fields[1] == dbkey:
-                seq_path = fields[2].strip()
-                break
-    return seq_path
-
 def __main__():
     #Parse Command Line
     parser = optparse.OptionParser()
@@ -30,8 +16,7 @@
     parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffcompare to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
     
     # Wrapper / Galaxy options.
-    parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
-    parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
+    parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
     parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
     
     # Outputs.
@@ -60,21 +45,15 @@
         
     # Set/link to sequence file.
     if options.use_seq_data:
-        if options.ref_file != 'None':
+        if options.ref_file:
             # Sequence data from history.
             # Create symbolic link to ref_file so that index will be created in working directory.
             seq_path = "ref.fa"
             os.symlink( options.ref_file, seq_path  )
         else:
-            # Sequence data from loc file.
-            cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
-            if not os.path.exists( cached_seqs_pointer_file ):
-                stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
-            # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
-            # and the equCab2.fa file will contain fasta sequences.
-            seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
-            if seq_path == '':
-                stop_err( 'No sequence data found for dbkey %s, so sequence data cannot be used.' % options.dbkey  )
+            if not os.path.exists( options.index ):
+                stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
+            seq_path = options.index
     
     # Build command.
     
--- a/cuffcompare_wrapper.xml	Tue Oct 01 15:12:23 2013 -0400
+++ b/cuffcompare_wrapper.xml	Wed Dec 04 13:24:29 2013 -0500
@@ -1,4 +1,4 @@
-<tool id="cuffcompare" name="Cuffcompare" version="0.0.5">
+<tool id="cuffcompare" name="Cuffcompare" version="0.0.6">
     <!-- Wrapper supports Cuffcompare versions v1.3.0 and newer -->
     <description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description>
     <requirements>
@@ -18,14 +18,12 @@
             
             ## Use sequence data?
             #if $seq_data.use_seq_data == "Yes":
-	        -s
+            -s
                 #if $seq_data.seq_source.index_source == "history":
                     --ref_file=$seq_data.seq_source.ref_file
                 #else:
-                    --ref_file="None"
+                    --index=${seq_data.seq_source.index.fields.path}
                 #end if
-                --dbkey=${first_input.metadata.dbkey} 
-                --index_dir=${GALAXY_DATA_INDEX_DIR}
             #end if
             
             ## Outputs.
@@ -67,7 +65,14 @@
                     <option value="cached">Locally cached</option>
                     <option value="history">History</option>
                   </param>
-                  <when value="cached"></when>
+                  <when value="cached">
+                    <param name="index" type="select" label="Using reference genome">
+                      <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" ref="first_input" key="dbkey" column="1" />
+                        <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
+                      </options>
+                    </param>
+                  </when>
                   <when value="history">
                       <param name="ref_file" type="data" format="fasta" label="Using reference file" />
                   </when>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Wed Dec 04 13:24:29 2013 -0500
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a sam_fa_new_indices.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The sam_fa_new_indices.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the sam_fa_new_indices.loc entry would look like this:
+#
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your sam_fa_new_indices.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon	hg18	Human (Homo sapiens): hg18 Canonical	/depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full	hg18	Human (Homo sapiens): hg18 Full	/depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/depot/data2/galaxy/hg19/sam/hg19full.fa
--- a/tool-data/sam_fa_indices.loc.sample	Tue Oct 01 15:12:23 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of Samtools indexed sequences data files.  You will need
-#to create these data files and then create a sam_fa_indices.loc file 
-#similar to this one (store it in this directory) that points to 
-#the directories in which those files are stored. The sam_fa_indices.loc 
-#file has this format (white space characters are TAB characters):
-#
-#index	<seq>	<location>
-#
-#So, for example, if you had hg18 indexed stored in 
-#/depot/data2/galaxy/sam/, 
-#then the sam_fa_indices.loc entry would look like this:
-#
-#index	hg18	/depot/data2/galaxy/sam/hg18.fa
-#
-#and your /depot/data2/galaxy/sam/ directory
-#would contain hg18.fa and hg18.fa.fai files:
-#
-#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.fa
-#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.fa.fai
-#
-#Your sam_fa_indices.loc file should include an entry per line for 
-#each index set you have stored.  The file in the path does actually
-#exist, but it should never be directly used. Instead, the name serves
-#as a prefix for the index file.  For example:
-#
-#index	hg18	/depot/data2/galaxy/sam/hg18.fa
-#index	hg19	/depot/data2/galaxy/sam/hg19.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Dec 04 13:24:29 2013 -0500
@@ -0,0 +1,4 @@
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+     </table>