Repository 'data_manager_star_index_builder'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/data_manager_star_index_builder

Changeset 5:f5eb9afa8f8a (2019-08-15)
Previous changeset 4:6ef6520f14fc (2018-07-18) Next changeset 6:64deddb6a8ec (2019-11-28)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit 9b68f6ae375aed38493f8399b8572347c750336d
modified:
data_manager/macros.xml
data_manager/rna_star_index_builder.py
data_manager/rna_star_index_builder.xml
data_manager_conf.xml
tool_data_table_conf.xml.sample
added:
tool-data/rnastar_index2_versioned.loc.sample
removed:
tool-data/rnastar_index2.loc.sample
b
diff -r 6ef6520f14fc -r f5eb9afa8f8a data_manager/macros.xml
--- a/data_manager/macros.xml Wed Jul 18 13:26:12 2018 -0400
+++ b/data_manager/macros.xml Thu Aug 15 11:30:16 2019 -0400
[
b'@@ -1,10 +1,39 @@\n <macros>\n+    <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager\n+    whenever you make changes to the following two version tokens!\n+    The data manager uses a symlink to this macro file to keep the versions in\n+    sync. -->\n+    <!-- STAR version to be used -->\n+    <token name="@VERSION@">2.7.2a</token>\n+    <!-- STAR index version compatible with this version of STAR\n+    This is the STAR version that introduced the index structure expected\n+    by the current version.\n+    It can be found for any specific version of STAR with:\n+    STAR -h | grep versionGenome\n+    or by looking for the versionGenome parameter in source/parametersDefault\n+    of STAR\'s source code -->\n+    <token name="@IDX_VERSION@">2.7.1a</token>\n+\n     <xml name="requirements">\n         <requirements>\n-            <requirement type="package" version="2.6.0b">star</requirement>\n-            <requirement type="package" version="1.8">samtools</requirement>\n+            <requirement type="package" version="@VERSION@">star</requirement>\n+            <requirement type="package" version="1.9">samtools</requirement>\n         </requirements>\n     </xml>\n+\n+    <xml name="index_selection" token_with_gene_model="1">\n+        <param argument="--genomeDir" name="genomeDir" type="select"\n+        label="Select reference genome"\n+        help="If your genome of interest is not listed, contact the Galaxy team">\n+            <options from_data_table="rnastar_index2_versioned">\n+                <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" />\n+                <filter type="static_value" column="5" value="@IDX_VERSION@" />\n+                <filter type="sort_by" column="2" />\n+                <validator type="no_options" message="No indexes are available for the selected input dataset" />\n+            </options>\n+        </param>\n+    </xml>\n+\n     <token name="@FASTQ_GZ_OPTION@">\n         --readFilesCommand zcat\n     </token>\n@@ -13,8 +42,8 @@\n             <citation type="doi">10.1093/bioinformatics/bts635</citation>\n         </citations>\n     </xml>\n-    <xml name="@SJDBOPTIONS@">\n-         <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="true" help="Exon junction information for mapping splices"/>\n+    <xml name="@SJDBOPTIONS@" token_optional="true">\n+         <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="@OPTIONAL@" help="Exon junction information for mapping splices"/>\n          <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>\n     </xml>\n     <xml name="dbKeyActions">\n@@ -22,7 +51,7 @@\n             <conditional name="refGenomeSource.geneSource">\n                 <when value="indexed">\n                     <action type="metadata" name="dbkey">\n-                        <option type="from_data_table" name="rnastar_index2" column="1" offset="0">\n+                        <option type="from_data_table" name="rnastar_index2_versioned" column="1" offset="0">\n                             <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>\n                             <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>\n                         </option>\n@@ -36,4 +65,99 @@\n             </conditional>\n         </actions>\n     </xml>\n+    <token name="@TEMPINDEX@"><![CDATA[\n+    ## Create temporary index for custom reference\n+    #if str($refGenomeSource.geneSource) == \'history\':\n+        mkdir -p tempstargenomedir &&\n+        STAR\n+            --runMode genomeGenerate\n+            --genomeDir \'tempstargenomedir\'\n+            --genomeFastaFiles \'${refGenomeSource.genomeFastaFiles}\'\n+            ## Handle difference betwee'..b'     #if $refGenomeSource.GTFconditional.sjdbGTFfile:\n+                --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang\n+                --sjdbGTFfile \'${refGenomeSource.GTFconditional.sjdbGTFfile}\'\n+                #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == \'gff3\':\n+                    --sjdbGTFtagExonParentTranscript Parent\n+                #end if\n+            #end if\n+        #end if\n+        #end if\n+        ]]></token>\n+    <xml name="stdio" >\n+        <stdio>\n+            <regex match="FATAL error" source="both" level="fatal"/>\n+            <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/>\n+            <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/>\n+            <regex match="\\[sam_read1\\] missing header\\? Abort!" source="both" level="fatal"/>\n+        </stdio>\n+    </xml>\n+    <xml name="refgenomehandling" >\n+        <conditional name="refGenomeSource">\n+            <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options">\n+                <option value="indexed" selected="true">Use a built-in index</option>\n+                <option value="history">Use reference genome from history and create temporary index</option>\n+            </param>\n+            <when value="indexed">\n+                <conditional name="GTFconditional">\n+                    <param name="GTFselect" type="select"\n+                           label="Reference genome with or without an annotation"\n+                           help="Select the \'... with builtin gene-model\' option to select from the list of available indexes that were built with splice junction information. Select the \'... without builtin gene-model\' option to select from the list of available indexes without annotated splice junctions.">\n+                        <option value="without-gtf">use genome reference with builtin gene-model</option>\n+                        <option value="with-gtf">use genome reference without builtin gene-model</option>\n+                    </param>\n+                    <when value="with-gtf">\n+                        <expand macro="index_selection" with_gene_model="0" />\n+                        <expand macro="@SJDBOPTIONS@" />\n+                    </when>\n+                    <when value="without-gtf">\n+                        <expand macro="index_selection" with_gene_model="1" />\n+                    </when>\n+                </conditional>\n+            </when>\n+            <when value="history">\n+                <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" />\n+                <!-- Currently, this parameter is not exposed in the wrapper,\n+                     but used only in the tests to avoid excessive index sizes for\n+                     the tiny test genomes. -->\n+                <param name="genomeSAindexNbases" type="hidden" value="" />\n+                <conditional name="GTFconditional">\n+                    <param name="GTFselect" type="select"\n+                           label="Build index with our without known splice junctions annotation"\n+                           help="To build an index with known splice junctions annotated, you will have to provide a GTF or GFF3 dataset that describes the gene models (the location of genes, transcripts and exons) known for the reference genome.">\n+                        <option value="without-gtf">build index without gene-model</option>\n+                        <option value="with-gtf">build index with gene-model</option>\n+                    </param>\n+                    <when value="with-gtf">\n+                        <expand macro="@SJDBOPTIONS@" optional="false"/>\n+                    </when>\n+                    <when value="without-gtf" />\n+                </conditional>\n+            </when>\n+        </conditional>\n+    </xml>\n </macros>\n'
b
diff -r 6ef6520f14fc -r f5eb9afa8f8a data_manager/rna_star_index_builder.py
--- a/data_manager/rna_star_index_builder.py Wed Jul 18 13:26:12 2018 -0400
+++ b/data_manager/rna_star_index_builder.py Thu Aug 15 11:30:16 2019 -0400
[
@@ -1,29 +1,31 @@
 #!/usr/bin/env python
 
+import argparse
 import json
-import optparse
 
 
 def main():
-    parser = optparse.OptionParser()
-    parser.add_option( '--config-file', dest='config_file', action='store', type="string")
-    parser.add_option( '--value', dest='value', action='store', type="string" )
-    parser.add_option( '--dbkey', dest='dbkey', action='store', type="string" )
-    parser.add_option( '--name', dest='name', action='store', type="string" )
-    parser.add_option( '--subdir', dest='subdir', action='store', type="string" )
-    parser.add_option( '--data-table', dest='data_table', action='store', type="string" )
-    parser.add_option( '--withGTF', dest='withGTF', action='store_true' )
-    (options, args) = parser.parse_args()
+    parser = argparse.ArgumentParser()
+    parser.add_argument( '--config-file' )
+    parser.add_argument( '--value' )
+    parser.add_argument( '--dbkey' )
+    parser.add_argument( '--name' )
+    parser.add_argument( '--subdir' )
+    parser.add_argument( '--data-table' )
+    parser.add_argument( '--with-gene-model', action='store_true' )
+    parser.add_argument( '--index-version' )
 
-    if options.dbkey in [ None, '', '?' ]:
-        raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( options.dbkey ) )
+    args = parser.parse_args()
+
+    if args.dbkey in [ None, '', '?' ]:
+        raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( args.dbkey ) )
 
-    withGTF = "0"
-    if options.withGTF:
-        withGTF = "1"
+    with_gene_model = "0"
+    if args.with_gene_model:
+        with_gene_model = "1"
 
-    data_manager_dict = {'data_tables': {options.data_table: [dict({"value": options.value, "dbkey": options.dbkey, "name": options.name, "path": options.subdir, "with-gtf": withGTF} )]}}
-    open( options.config_file, 'wb' ).write( json.dumps( data_manager_dict ) )
+    data_manager_dict = {'data_tables': {args.data_table: [dict({"value": args.value, "dbkey": args.dbkey, "name": args.name, "path": args.subdir, "with_gene_model": with_gene_model, "version": args.index_version} )]}}
+    open( args.config_file, 'w' ).write( json.dumps( data_manager_dict ) )
 
 
 if __name__ == "__main__":
b
diff -r 6ef6520f14fc -r f5eb9afa8f8a data_manager/rna_star_index_builder.xml
--- a/data_manager/rna_star_index_builder.xml Wed Jul 18 13:26:12 2018 -0400
+++ b/data_manager/rna_star_index_builder.xml Thu Aug 15 11:30:16 2019 -0400
b
@@ -1,4 +1,4 @@
-<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="0.0.5" profile="17.01">
+<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="2.7.1a" profile="17.01">
     <description>builder</description>
 
     <macros>
@@ -41,15 +41,16 @@
 --config-file '${out_file}'
 --value '${all_fasta_source.fields.value}'
 --dbkey '${all_fasta_source.fields.dbkey}'
+--index-version '@IDX_VERSION@'
 #if $name:
     --name '$name'
 #else
     --name '${all_fasta_source.fields.name}'
 #end if
 #if str($GTFconditional.GTFselect) == "withGTF":
-    --withGTF 1
+    --with-gene-model
 #end if
---data-table rnastar_index2
+--data-table rnastar_index2_versioned
 --subdir '${subdir}'
     ]]></command>
     <inputs>
b
diff -r 6ef6520f14fc -r f5eb9afa8f8a data_manager_conf.xml
--- a/data_manager_conf.xml Wed Jul 18 13:26:12 2018 -0400
+++ b/data_manager_conf.xml Thu Aug 15 11:30:16 2019 -0400
b
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <data_managers>
-    <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder" version="0.0.3">
-        <data_table name="rnastar_index2">
+    <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder">
+        <data_table name="rnastar_index2_versioned">
             <output>
                 <column name="value" />
                 <column name="dbkey" />
@@ -12,12 +12,13 @@
                             out_file.extra_files_path is used as base by default
                             if no source, eg for type=directory, then refers to base 
                         -->
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/rnastar_index2/${value}</target>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rnastar/${version}/${dbkey}/${value}</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/rnastar_index2/${value}/${path}</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
-                <column name="with-gtf" />
+                <column name="with_gene_model" />
+                <column name="version" />
             </output>
         </data_table>
     </data_manager>
b
diff -r 6ef6520f14fc -r f5eb9afa8f8a tool-data/rnastar_index2.loc.sample
--- a/tool-data/rnastar_index2.loc.sample Wed Jul 18 13:26:12 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,23 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of rna-star indexed sequences data files. You will
-#need to create these data files and then create a rnastar_index2.loc
-#file similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The rnastar_index2.loc
-#file has this format (longer white space characters are TAB characters):
-#
-#<unique_build_id>   <dbkey>   <display_name>   <file_base_path> <with-gtf>
-#
-#The <with-gtf> column should be 1 or 0, indicating whether the index was made
-#with an annotation (i.e., --sjdbGTFfile and --sjdbOverhang were used) or not,
-#respecively.
-#
-#Note that STAR indices can become quite large. Consequently, it is only
-#advisable to create indices with annotations if it's known ahead of time that
-#(A) the annotations won't be frequently updated and (B) the read lengths used
-#will also rarely vary. If either of these is not the case, it's advisable to
-#create indices without annotations and then specify an annotation file and
-#maximum read length (minus 1) when running STAR.
-#
-#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar 0
-#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1
-
b
diff -r 6ef6520f14fc -r f5eb9afa8f8a tool-data/rnastar_index2_versioned.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rnastar_index2_versioned.loc.sample Thu Aug 15 11:30:16 2019 -0400
b
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of rna-star indexed sequences data files. You will
+#need to create these data files and then create a rnastar_index2.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The rnastar_index2.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path> <with_gene_model> <version>
+#
+#The <with_gene_model> column should be 1 or 0, indicating whether the index
+#was built with annotations (i.e., --sjdbGTFfile and --sjdbOverhang were used)
+#or not.
+#
+#The <version> column indicates the STAR version that introduced the format of
+#the index, i.e., the oldest STAR version that could make use of the index.
+#
+#Note that STAR indices can become quite large. Consequently, it is only
+#advisable to create indices with annotations if it's known ahead of time that
+#(A) the annotations won't be frequently updated and (B) the read lengths used
+#will also rarely vary. If either of these is not the case, it's advisable to
+#create indices without annotations and then specify an annotation file and
+#maximum read length (minus 1) when running STAR.
+#
+#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar 0 2.7.1a
+#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1 2.7.1a
+
b
diff -r 6ef6520f14fc -r f5eb9afa8f8a tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Wed Jul 18 13:26:12 2018 -0400
+++ b/tool_data_table_conf.xml.sample Thu Aug 15 11:30:16 2019 -0400
b
@@ -4,9 +4,9 @@
         <columns>value, dbkey, name, path</columns>
         <file path="tool-data/all_fasta.loc" />
     </table>
-    <!-- Locations of indexes in the BWA mapper format -->
-    <table name="rnastar_index2" comment_char="#" allow_duplicate_entries="False">
-        <columns>value, dbkey, name, path, with-gtf</columns>
-        <file path="tool-data/rnastar_index2.loc" />
+    <!-- Locations of STAR indexes -->
+    <table name="rnastar_index2_versioned" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path, with_gene_model, version</columns>
+        <file path="tool-data/rnastar_index2_versioned.loc" />
     </table>
 </tables>