changeset 5:f5eb9afa8f8a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit 9b68f6ae375aed38493f8399b8572347c750336d
author iuc
date Thu, 15 Aug 2019 11:30:16 -0400
parents 6ef6520f14fc
children 64deddb6a8ec
files data_manager/macros.xml data_manager/rna_star_index_builder.py data_manager/rna_star_index_builder.xml data_manager_conf.xml tool-data/rnastar_index2.loc.sample tool-data/rnastar_index2_versioned.loc.sample tool_data_table_conf.xml.sample
diffstat 7 files changed, 188 insertions(+), 57 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/macros.xml	Wed Jul 18 13:26:12 2018 -0400
+++ b/data_manager/macros.xml	Thu Aug 15 11:30:16 2019 -0400
@@ -1,10 +1,39 @@
 <macros>
+    <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager
+    whenever you make changes to the following two version tokens!
+    The data manager uses a symlink to this macro file to keep the versions in
+    sync. -->
+    <!-- STAR version to be used -->
+    <token name="@VERSION@">2.7.2a</token>
+    <!-- STAR index version compatible with this version of STAR
+    This is the STAR version that introduced the index structure expected
+    by the current version.
+    It can be found for any specific version of STAR with:
+    STAR -h | grep versionGenome
+    or by looking for the versionGenome parameter in source/parametersDefault
+    of STAR's source code -->
+    <token name="@IDX_VERSION@">2.7.1a</token>
+
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="2.6.0b">star</requirement>
-            <requirement type="package" version="1.8">samtools</requirement>
+            <requirement type="package" version="@VERSION@">star</requirement>
+            <requirement type="package" version="1.9">samtools</requirement>
         </requirements>
     </xml>
+
+    <xml name="index_selection" token_with_gene_model="1">
+        <param argument="--genomeDir" name="genomeDir" type="select"
+        label="Select reference genome"
+        help="If your genome of interest is not listed, contact the Galaxy team">
+            <options from_data_table="rnastar_index2_versioned">
+                <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" />
+                <filter type="static_value" column="5" value="@IDX_VERSION@" />
+                <filter type="sort_by" column="2" />
+                <validator type="no_options" message="No indexes are available for the selected input dataset" />
+            </options>
+        </param>
+    </xml>
+
     <token name="@FASTQ_GZ_OPTION@">
         --readFilesCommand zcat
     </token>
@@ -13,8 +42,8 @@
             <citation type="doi">10.1093/bioinformatics/bts635</citation>
         </citations>
     </xml>
-    <xml name="@SJDBOPTIONS@">
-         <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="true" help="Exon junction information for mapping splices"/>
+    <xml name="@SJDBOPTIONS@" token_optional="true">
+         <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="@OPTIONAL@" help="Exon junction information for mapping splices"/>
          <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>
     </xml>
     <xml name="dbKeyActions">
@@ -22,7 +51,7 @@
             <conditional name="refGenomeSource.geneSource">
                 <when value="indexed">
                     <action type="metadata" name="dbkey">
-                        <option type="from_data_table" name="rnastar_index2" column="1" offset="0">
+                        <option type="from_data_table" name="rnastar_index2_versioned" column="1" offset="0">
                             <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
                             <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>
                         </option>
@@ -36,4 +65,99 @@
             </conditional>
         </actions>
     </xml>
+    <token name="@TEMPINDEX@"><![CDATA[
+    ## Create temporary index for custom reference
+    #if str($refGenomeSource.geneSource) == 'history':
+        mkdir -p tempstargenomedir &&
+        STAR
+            --runMode genomeGenerate
+            --genomeDir 'tempstargenomedir'
+            --genomeFastaFiles '${refGenomeSource.genomeFastaFiles}'
+            ## Handle difference between indices with/without annotations
+            #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
+                --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}'
+                --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
+                #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
+                    --sjdbGTFtagExonParentTranscript Parent
+                #end if
+            #end if
+            #if str($refGenomeSource.genomeSAindexNbases):
+                --genomeSAindexNbases ${refGenomeSource.genomeSAindexNbases}
+            #end if
+            --runThreadN \${GALAXY_SLOTS:-4}
+        &&
+    #end if
+    ]]></token>
+    <token name="@REFGENOMEHANDLING" ><![CDATA[
+    --runThreadN \${GALAXY_SLOTS:-4}
+    --genomeLoad NoSharedMemory
+    --genomeDir
+    #if str($refGenomeSource.geneSource) == 'history':
+        tempstargenomedir
+    #else:
+        '${refGenomeSource.GTFconditional.genomeDir.fields.path}'
+        ## Handle difference between indices with/without annotations
+        #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
+            #if $refGenomeSource.GTFconditional.sjdbGTFfile:
+                --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang
+                --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
+                #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
+                    --sjdbGTFtagExonParentTranscript Parent
+                #end if
+            #end if
+        #end if
+        #end if
+        ]]></token>
+    <xml name="stdio" >
+        <stdio>
+            <regex match="FATAL error" source="both" level="fatal"/>
+            <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/>
+            <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/>
+            <regex match="\[sam_read1\] missing header\? Abort!" source="both" level="fatal"/>
+        </stdio>
+    </xml>
+    <xml name="refgenomehandling" >
+        <conditional name="refGenomeSource">
+            <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options">
+                <option value="indexed" selected="true">Use a built-in index</option>
+                <option value="history">Use reference genome from history and create temporary index</option>
+            </param>
+            <when value="indexed">
+                <conditional name="GTFconditional">
+                    <param name="GTFselect" type="select"
+                           label="Reference genome with or without an annotation"
+                           help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions.">
+                        <option value="without-gtf">use genome reference with builtin gene-model</option>
+                        <option value="with-gtf">use genome reference without builtin gene-model</option>
+                    </param>
+                    <when value="with-gtf">
+                        <expand macro="index_selection" with_gene_model="0" />
+                        <expand macro="@SJDBOPTIONS@" />
+                    </when>
+                    <when value="without-gtf">
+                        <expand macro="index_selection" with_gene_model="1" />
+                    </when>
+                </conditional>
+            </when>
+            <when value="history">
+                <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" />
+                <!-- Currently, this parameter is not exposed in the wrapper,
+                     but used only in the tests to avoid excessive index sizes for
+                     the tiny test genomes. -->
+                <param name="genomeSAindexNbases" type="hidden" value="" />
+                <conditional name="GTFconditional">
+                    <param name="GTFselect" type="select"
+                           label="Build index with our without known splice junctions annotation"
+                           help="To build an index with known splice junctions annotated, you will have to provide a GTF or GFF3 dataset that describes the gene models (the location of genes, transcripts and exons) known for the reference genome.">
+                        <option value="without-gtf">build index without gene-model</option>
+                        <option value="with-gtf">build index with gene-model</option>
+                    </param>
+                    <when value="with-gtf">
+                        <expand macro="@SJDBOPTIONS@" optional="false"/>
+                    </when>
+                    <when value="without-gtf" />
+                </conditional>
+            </when>
+        </conditional>
+    </xml>
 </macros>
--- a/data_manager/rna_star_index_builder.py	Wed Jul 18 13:26:12 2018 -0400
+++ b/data_manager/rna_star_index_builder.py	Thu Aug 15 11:30:16 2019 -0400
@@ -1,29 +1,31 @@
 #!/usr/bin/env python
 
+import argparse
 import json
-import optparse
 
 
 def main():
-    parser = optparse.OptionParser()
-    parser.add_option( '--config-file', dest='config_file', action='store', type="string")
-    parser.add_option( '--value', dest='value', action='store', type="string" )
-    parser.add_option( '--dbkey', dest='dbkey', action='store', type="string" )
-    parser.add_option( '--name', dest='name', action='store', type="string" )
-    parser.add_option( '--subdir', dest='subdir', action='store', type="string" )
-    parser.add_option( '--data-table', dest='data_table', action='store', type="string" )
-    parser.add_option( '--withGTF', dest='withGTF', action='store_true' )
-    (options, args) = parser.parse_args()
+    parser = argparse.ArgumentParser()
+    parser.add_argument( '--config-file' )
+    parser.add_argument( '--value' )
+    parser.add_argument( '--dbkey' )
+    parser.add_argument( '--name' )
+    parser.add_argument( '--subdir' )
+    parser.add_argument( '--data-table' )
+    parser.add_argument( '--with-gene-model', action='store_true' )
+    parser.add_argument( '--index-version' )
 
-    if options.dbkey in [ None, '', '?' ]:
-        raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( options.dbkey ) )
+    args = parser.parse_args()
+
+    if args.dbkey in [ None, '', '?' ]:
+        raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( args.dbkey ) )
 
-    withGTF = "0"
-    if options.withGTF:
-        withGTF = "1"
+    with_gene_model = "0"
+    if args.with_gene_model:
+        with_gene_model = "1"
 
-    data_manager_dict = {'data_tables': {options.data_table: [dict({"value": options.value, "dbkey": options.dbkey, "name": options.name, "path": options.subdir, "with-gtf": withGTF} )]}}
-    open( options.config_file, 'wb' ).write( json.dumps( data_manager_dict ) )
+    data_manager_dict = {'data_tables': {args.data_table: [dict({"value": args.value, "dbkey": args.dbkey, "name": args.name, "path": args.subdir, "with_gene_model": with_gene_model, "version": args.index_version} )]}}
+    open( args.config_file, 'w' ).write( json.dumps( data_manager_dict ) )
 
 
 if __name__ == "__main__":
--- a/data_manager/rna_star_index_builder.xml	Wed Jul 18 13:26:12 2018 -0400
+++ b/data_manager/rna_star_index_builder.xml	Thu Aug 15 11:30:16 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="0.0.5" profile="17.01">
+<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="2.7.1a" profile="17.01">
     <description>builder</description>
 
     <macros>
@@ -41,15 +41,16 @@
 --config-file '${out_file}'
 --value '${all_fasta_source.fields.value}'
 --dbkey '${all_fasta_source.fields.dbkey}'
+--index-version '@IDX_VERSION@'
 #if $name:
     --name '$name'
 #else
     --name '${all_fasta_source.fields.name}'
 #end if
 #if str($GTFconditional.GTFselect) == "withGTF":
-    --withGTF 1
+    --with-gene-model
 #end if
---data-table rnastar_index2
+--data-table rnastar_index2_versioned
 --subdir '${subdir}'
     ]]></command>
     <inputs>
--- a/data_manager_conf.xml	Wed Jul 18 13:26:12 2018 -0400
+++ b/data_manager_conf.xml	Thu Aug 15 11:30:16 2019 -0400
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <data_managers>
-    <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder" version="0.0.3">
-        <data_table name="rnastar_index2">
+    <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder">
+        <data_table name="rnastar_index2_versioned">
             <output>
                 <column name="value" />
                 <column name="dbkey" />
@@ -12,12 +12,13 @@
                             out_file.extra_files_path is used as base by default
                             if no source, eg for type=directory, then refers to base 
                         -->
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/rnastar_index2/${value}</target>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rnastar/${version}/${dbkey}/${value}</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/rnastar_index2/${value}/${path}</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
-                <column name="with-gtf" />
+                <column name="with_gene_model" />
+                <column name="version" />
             </output>
         </data_table>
     </data_manager>
--- a/tool-data/rnastar_index2.loc.sample	Wed Jul 18 13:26:12 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of rna-star indexed sequences data files. You will
-#need to create these data files and then create a rnastar_index2.loc
-#file similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The rnastar_index2.loc
-#file has this format (longer white space characters are TAB characters):
-#
-#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>	<with-gtf>
-#
-#The <with-gtf> column should be 1 or 0, indicating whether the index was made
-#with an annotation (i.e., --sjdbGTFfile and --sjdbOverhang were used) or not,
-#respecively.
-#
-#Note that STAR indices can become quite large. Consequently, it is only
-#advisable to create indices with annotations if it's known ahead of time that
-#(A) the annotations won't be frequently updated and (B) the read lengths used
-#will also rarely vary. If either of these is not the case, it's advisable to
-#create indices without annotations and then specify an annotation file and
-#maximum read length (minus 1) when running STAR.
-#
-#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar	0
-#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar	1
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rnastar_index2_versioned.loc.sample	Thu Aug 15 11:30:16 2019 -0400
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of rna-star indexed sequences data files. You will
+#need to create these data files and then create a rnastar_index2.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The rnastar_index2.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>	<with_gene_model>	<version>
+#
+#The <with_gene_model> column should be 1 or 0, indicating whether the index
+#was built with annotations (i.e., --sjdbGTFfile and --sjdbOverhang were used)
+#or not.
+#
+#The <version> column indicates the STAR version that introduced the format of
+#the index, i.e., the oldest STAR version that could make use of the index.
+#
+#Note that STAR indices can become quite large. Consequently, it is only
+#advisable to create indices with annotations if it's known ahead of time that
+#(A) the annotations won't be frequently updated and (B) the read lengths used
+#will also rarely vary. If either of these is not the case, it's advisable to
+#create indices without annotations and then specify an annotation file and
+#maximum read length (minus 1) when running STAR.
+#
+#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar	0	2.7.1a
+#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar	1	2.7.1a
+
--- a/tool_data_table_conf.xml.sample	Wed Jul 18 13:26:12 2018 -0400
+++ b/tool_data_table_conf.xml.sample	Thu Aug 15 11:30:16 2019 -0400
@@ -4,9 +4,9 @@
         <columns>value, dbkey, name, path</columns>
         <file path="tool-data/all_fasta.loc" />
     </table>
-    <!-- Locations of indexes in the BWA mapper format -->
-    <table name="rnastar_index2" comment_char="#" allow_duplicate_entries="False">
-        <columns>value, dbkey, name, path, with-gtf</columns>
-        <file path="tool-data/rnastar_index2.loc" />
+    <!-- Locations of STAR indexes -->
+    <table name="rnastar_index2_versioned" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path, with_gene_model, version</columns>
+        <file path="tool-data/rnastar_index2_versioned.loc" />
     </table>
 </tables>