Mercurial > repos > iuc > data_manager_star_index_builder
changeset 5:f5eb9afa8f8a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit 9b68f6ae375aed38493f8399b8572347c750336d
author | iuc |
---|---|
date | Thu, 15 Aug 2019 11:30:16 -0400 |
parents | 6ef6520f14fc |
children | 64deddb6a8ec |
files | data_manager/macros.xml data_manager/rna_star_index_builder.py data_manager/rna_star_index_builder.xml data_manager_conf.xml tool-data/rnastar_index2.loc.sample tool-data/rnastar_index2_versioned.loc.sample tool_data_table_conf.xml.sample |
diffstat | 7 files changed, 188 insertions(+), 57 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/macros.xml Wed Jul 18 13:26:12 2018 -0400 +++ b/data_manager/macros.xml Thu Aug 15 11:30:16 2019 -0400 @@ -1,10 +1,39 @@ <macros> + <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager + whenever you make changes to the following two version tokens! + The data manager uses a symlink to this macro file to keep the versions in + sync. --> + <!-- STAR version to be used --> + <token name="@VERSION@">2.7.2a</token> + <!-- STAR index version compatible with this version of STAR + This is the STAR version that introduced the index structure expected + by the current version. + It can be found for any specific version of STAR with: + STAR -h | grep versionGenome + or by looking for the versionGenome parameter in source/parametersDefault + of STAR's source code --> + <token name="@IDX_VERSION@">2.7.1a</token> + <xml name="requirements"> <requirements> - <requirement type="package" version="2.6.0b">star</requirement> - <requirement type="package" version="1.8">samtools</requirement> + <requirement type="package" version="@VERSION@">star</requirement> + <requirement type="package" version="1.9">samtools</requirement> </requirements> </xml> + + <xml name="index_selection" token_with_gene_model="1"> + <param argument="--genomeDir" name="genomeDir" type="select" + label="Select reference genome" + help="If your genome of interest is not listed, contact the Galaxy team"> + <options from_data_table="rnastar_index2_versioned"> + <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" /> + <filter type="static_value" column="5" value="@IDX_VERSION@" /> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available for the selected input dataset" /> + </options> + </param> + </xml> + <token name="@FASTQ_GZ_OPTION@"> --readFilesCommand zcat </token> @@ -13,8 +42,8 @@ <citation type="doi">10.1093/bioinformatics/bts635</citation> </citations> </xml> - <xml name="@SJDBOPTIONS@"> - <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="true" help="Exon junction information for mapping splices"/> + <xml name="@SJDBOPTIONS@" token_optional="true"> + <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="@OPTIONAL@" help="Exon junction information for mapping splices"/> <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/> </xml> <xml name="dbKeyActions"> @@ -22,7 +51,7 @@ <conditional name="refGenomeSource.geneSource"> <when value="indexed"> <action type="metadata" name="dbkey"> - <option type="from_data_table" name="rnastar_index2" column="1" offset="0"> + <option type="from_data_table" name="rnastar_index2_versioned" column="1" offset="0"> <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/> </option> @@ -36,4 +65,99 @@ </conditional> </actions> </xml> + <token name="@TEMPINDEX@"><![CDATA[ + ## Create temporary index for custom reference + #if str($refGenomeSource.geneSource) == 'history': + mkdir -p tempstargenomedir && + STAR + --runMode genomeGenerate + --genomeDir 'tempstargenomedir' + --genomeFastaFiles '${refGenomeSource.genomeFastaFiles}' + ## Handle difference between indices with/without annotations + #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf': + --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}' + --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}' + #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3': + --sjdbGTFtagExonParentTranscript Parent + #end if + #end if + #if str($refGenomeSource.genomeSAindexNbases): + --genomeSAindexNbases ${refGenomeSource.genomeSAindexNbases} + #end if + --runThreadN \${GALAXY_SLOTS:-4} + && + #end if + ]]></token> + <token name="@REFGENOMEHANDLING" ><![CDATA[ + --runThreadN \${GALAXY_SLOTS:-4} + --genomeLoad NoSharedMemory + --genomeDir + #if str($refGenomeSource.geneSource) == 'history': + tempstargenomedir + #else: + '${refGenomeSource.GTFconditional.genomeDir.fields.path}' + ## Handle difference between indices with/without annotations + #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf': + #if $refGenomeSource.GTFconditional.sjdbGTFfile: + --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang + --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}' + #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3': + --sjdbGTFtagExonParentTranscript Parent + #end if + #end if + #end if + #end if + ]]></token> + <xml name="stdio" > + <stdio> + <regex match="FATAL error" source="both" level="fatal"/> + <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/> + <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/> + <regex match="\[sam_read1\] missing header\? Abort!" source="both" level="fatal"/> + </stdio> + </xml> + <xml name="refgenomehandling" > + <conditional name="refGenomeSource"> + <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options"> + <option value="indexed" selected="true">Use a built-in index</option> + <option value="history">Use reference genome from history and create temporary index</option> + </param> + <when value="indexed"> + <conditional name="GTFconditional"> + <param name="GTFselect" type="select" + label="Reference genome with or without an annotation" + help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions."> + <option value="without-gtf">use genome reference with builtin gene-model</option> + <option value="with-gtf">use genome reference without builtin gene-model</option> + </param> + <when value="with-gtf"> + <expand macro="index_selection" with_gene_model="0" /> + <expand macro="@SJDBOPTIONS@" /> + </when> + <when value="without-gtf"> + <expand macro="index_selection" with_gene_model="1" /> + </when> + </conditional> + </when> + <when value="history"> + <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" /> + <!-- Currently, this parameter is not exposed in the wrapper, + but used only in the tests to avoid excessive index sizes for + the tiny test genomes. --> + <param name="genomeSAindexNbases" type="hidden" value="" /> + <conditional name="GTFconditional"> + <param name="GTFselect" type="select" + label="Build index with our without known splice junctions annotation" + help="To build an index with known splice junctions annotated, you will have to provide a GTF or GFF3 dataset that describes the gene models (the location of genes, transcripts and exons) known for the reference genome."> + <option value="without-gtf">build index without gene-model</option> + <option value="with-gtf">build index with gene-model</option> + </param> + <when value="with-gtf"> + <expand macro="@SJDBOPTIONS@" optional="false"/> + </when> + <when value="without-gtf" /> + </conditional> + </when> + </conditional> + </xml> </macros>
--- a/data_manager/rna_star_index_builder.py Wed Jul 18 13:26:12 2018 -0400 +++ b/data_manager/rna_star_index_builder.py Thu Aug 15 11:30:16 2019 -0400 @@ -1,29 +1,31 @@ #!/usr/bin/env python +import argparse import json -import optparse def main(): - parser = optparse.OptionParser() - parser.add_option( '--config-file', dest='config_file', action='store', type="string") - parser.add_option( '--value', dest='value', action='store', type="string" ) - parser.add_option( '--dbkey', dest='dbkey', action='store', type="string" ) - parser.add_option( '--name', dest='name', action='store', type="string" ) - parser.add_option( '--subdir', dest='subdir', action='store', type="string" ) - parser.add_option( '--data-table', dest='data_table', action='store', type="string" ) - parser.add_option( '--withGTF', dest='withGTF', action='store_true' ) - (options, args) = parser.parse_args() + parser = argparse.ArgumentParser() + parser.add_argument( '--config-file' ) + parser.add_argument( '--value' ) + parser.add_argument( '--dbkey' ) + parser.add_argument( '--name' ) + parser.add_argument( '--subdir' ) + parser.add_argument( '--data-table' ) + parser.add_argument( '--with-gene-model', action='store_true' ) + parser.add_argument( '--index-version' ) - if options.dbkey in [ None, '', '?' ]: - raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( options.dbkey ) ) + args = parser.parse_args() + + if args.dbkey in [ None, '', '?' ]: + raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( args.dbkey ) ) - withGTF = "0" - if options.withGTF: - withGTF = "1" + with_gene_model = "0" + if args.with_gene_model: + with_gene_model = "1" - data_manager_dict = {'data_tables': {options.data_table: [dict({"value": options.value, "dbkey": options.dbkey, "name": options.name, "path": options.subdir, "with-gtf": withGTF} )]}} - open( options.config_file, 'wb' ).write( json.dumps( data_manager_dict ) ) + data_manager_dict = {'data_tables': {args.data_table: [dict({"value": args.value, "dbkey": args.dbkey, "name": args.name, "path": args.subdir, "with_gene_model": with_gene_model, "version": args.index_version} )]}} + open( args.config_file, 'w' ).write( json.dumps( data_manager_dict ) ) if __name__ == "__main__":
--- a/data_manager/rna_star_index_builder.xml Wed Jul 18 13:26:12 2018 -0400 +++ b/data_manager/rna_star_index_builder.xml Thu Aug 15 11:30:16 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="0.0.5" profile="17.01"> +<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="2.7.1a" profile="17.01"> <description>builder</description> <macros> @@ -41,15 +41,16 @@ --config-file '${out_file}' --value '${all_fasta_source.fields.value}' --dbkey '${all_fasta_source.fields.dbkey}' +--index-version '@IDX_VERSION@' #if $name: --name '$name' #else --name '${all_fasta_source.fields.name}' #end if #if str($GTFconditional.GTFselect) == "withGTF": - --withGTF 1 + --with-gene-model #end if ---data-table rnastar_index2 +--data-table rnastar_index2_versioned --subdir '${subdir}' ]]></command> <inputs>
--- a/data_manager_conf.xml Wed Jul 18 13:26:12 2018 -0400 +++ b/data_manager_conf.xml Thu Aug 15 11:30:16 2019 -0400 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <data_managers> - <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder" version="0.0.3"> - <data_table name="rnastar_index2"> + <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder"> + <data_table name="rnastar_index2_versioned"> <output> <column name="value" /> <column name="dbkey" /> @@ -12,12 +12,13 @@ out_file.extra_files_path is used as base by default if no source, eg for type=directory, then refers to base --> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/rnastar_index2/${value}</target> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rnastar/${version}/${dbkey}/${value}</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/rnastar_index2/${value}/${path}</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}</value_translation> <value_translation type="function">abspath</value_translation> </column> - <column name="with-gtf" /> + <column name="with_gene_model" /> + <column name="version" /> </output> </data_table> </data_manager>
--- a/tool-data/rnastar_index2.loc.sample Wed Jul 18 13:26:12 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of rna-star indexed sequences data files. You will -#need to create these data files and then create a rnastar_index2.loc -#file similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The rnastar_index2.loc -#file has this format (longer white space characters are TAB characters): -# -#<unique_build_id> <dbkey> <display_name> <file_base_path> <with-gtf> -# -#The <with-gtf> column should be 1 or 0, indicating whether the index was made -#with an annotation (i.e., --sjdbGTFfile and --sjdbOverhang were used) or not, -#respecively. -# -#Note that STAR indices can become quite large. Consequently, it is only -#advisable to create indices with annotations if it's known ahead of time that -#(A) the annotations won't be frequently updated and (B) the read lengths used -#will also rarely vary. If either of these is not the case, it's advisable to -#create indices without annotations and then specify an annotation file and -#maximum read length (minus 1) when running STAR. -# -#hg19 hg19 hg19 full /mnt/galaxyIndices/genomes/hg19/rnastar 0 -#hg19Ensembl hg19Ensembl hg19 full with Ensembl annotation /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/rnastar_index2_versioned.loc.sample Thu Aug 15 11:30:16 2019 -0400 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of rna-star indexed sequences data files. You will +#need to create these data files and then create a rnastar_index2.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The rnastar_index2.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_base_path> <with_gene_model> <version> +# +#The <with_gene_model> column should be 1 or 0, indicating whether the index +#was built with annotations (i.e., --sjdbGTFfile and --sjdbOverhang were used) +#or not. +# +#The <version> column indicates the STAR version that introduced the format of +#the index, i.e., the oldest STAR version that could make use of the index. +# +#Note that STAR indices can become quite large. Consequently, it is only +#advisable to create indices with annotations if it's known ahead of time that +#(A) the annotations won't be frequently updated and (B) the read lengths used +#will also rarely vary. If either of these is not the case, it's advisable to +#create indices without annotations and then specify an annotation file and +#maximum read length (minus 1) when running STAR. +# +#hg19 hg19 hg19 full /mnt/galaxyIndices/genomes/hg19/rnastar 0 2.7.1a +#hg19Ensembl hg19Ensembl hg19 full with Ensembl annotation /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1 2.7.1a +
--- a/tool_data_table_conf.xml.sample Wed Jul 18 13:26:12 2018 -0400 +++ b/tool_data_table_conf.xml.sample Thu Aug 15 11:30:16 2019 -0400 @@ -4,9 +4,9 @@ <columns>value, dbkey, name, path</columns> <file path="tool-data/all_fasta.loc" /> </table> - <!-- Locations of indexes in the BWA mapper format --> - <table name="rnastar_index2" comment_char="#" allow_duplicate_entries="False"> - <columns>value, dbkey, name, path, with-gtf</columns> - <file path="tool-data/rnastar_index2.loc" /> + <!-- Locations of STAR indexes --> + <table name="rnastar_index2_versioned" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path, with_gene_model, version</columns> + <file path="tool-data/rnastar_index2_versioned.loc" /> </table> </tables>