comparison data_manager/rna_star_index_builder.xml @ 4:6ef6520f14fc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit 39b9b154845ecf517f7d9e9d76c386b473c3ebd2
author iuc
date Wed, 18 Jul 2018 13:26:12 -0400
parents cdc4d8a998e1
children f5eb9afa8f8a
comparison
equal deleted inserted replaced
3:50ca9af6db2e 4:6ef6520f14fc
1 <tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="0.0.4" profile="17.01"> 1 <tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="0.0.5" profile="17.01">
2 <description>builder</description> 2 <description>builder</description>
3 3
4 <macros> 4 <macros>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
7 7
8 <expand macro="requirements" /> 8 <expand macro="requirements" />
9 9
10 <command><![CDATA[ 10 <command><![CDATA[
11 #import json, os 11 if [ -z "\$GALAXY_MEMORY_MB" ] ; then
12 #set params = json.loads( open( str($out_file) ).read() ) 12 GALAXY_MEMORY_BYTES=31000000000 ;
13 #set target_directory = $params[ 'output_data' ][0]['extra_files_path'].encode('ascii', 'replace') 13 else
14 #set subdir = os.path.basename(target_directory) 14 GALAXY_MEMORY_BYTES=\$((GALAXY_MEMORY_MB * 1000000)) ;
15 fi ;
15 16
16 mkdir -p '${target_directory}/${subdir}' && 17 #import json, os
18 #set params = json.loads(open(str($out_file)).read())
19 #set target_directory = $params['output_data'][0]['extra_files_path'].encode('ascii', 'replace')
20 #set subdir = os.path.basename(target_directory)
17 21
18 STAR 22 mkdir -p '${target_directory}/${subdir}' &&
19 --runMode genomeGenerate
20 --genomeFastaFiles '${all_fasta_source.fields.path}'
21 --genomeDir '${target_directory}/${subdir}'
22 #if str($GTFconditional.GTFselect) == "withGTF":
23 --sjdbGTFfile '${GTFconditional.sjdbGTFfile}'
24 --sjdbOverhang '${GTFconditional.sjdbOverhang}'
25 #end if
26 --runThreadN \${GALAXY_SLOTS:-2} &&
27 23
28 python ${__tool_directory__}/rna_star_index_builder.py 24 STAR
29 --config-file '${out_file}' 25 --runMode genomeGenerate
30 --value '${all_fasta_source.fields.value}' 26 --genomeFastaFiles '${all_fasta_source.fields.path}'
31 --dbkey '${all_fasta_source.fields.dbkey}' 27 --genomeDir '${target_directory}/${subdir}'
32 #if $name: 28 --limitGenomeGenerateRAM \${GALAXY_MEMORY_BYTES}
33 --name '$name' 29 #if $GTFconditional.GTFselect == "withGTF":
34 #else 30 --sjdbGTFfile '${GTFconditional.sjdbGTFfile}'
35 --name '${all_fasta_source.fields.name}' 31 --sjdbOverhang ${GTFconditional.sjdbOverhang}
36 #end if 32 #end if
37 #if str($GTFconditional.GTFselect) == "withGTF": 33 #if $advanced_options.advanced_options_selector == "advanced":
38 --withGTF 1 34 --genomeSAindexNbases ${advanced_options.genomeSAindexNbases}
39 #end if 35 --genomeChrBinNbits ${advanced_options.genomeChrBinNbits}
40 --data-table 'rnastar_index2' 36 --genomeSAsparseD ${advanced_options.genomeSAsparseD}
41 --subdir '${subdir}' 37 #end if
38 --runThreadN \${GALAXY_SLOTS:-2} &&
39
40 python '${__tool_directory__}/rna_star_index_builder.py'
41 --config-file '${out_file}'
42 --value '${all_fasta_source.fields.value}'
43 --dbkey '${all_fasta_source.fields.dbkey}'
44 #if $name:
45 --name '$name'
46 #else
47 --name '${all_fasta_source.fields.name}'
48 #end if
49 #if str($GTFconditional.GTFselect) == "withGTF":
50 --withGTF 1
51 #end if
52 --data-table rnastar_index2
53 --subdir '${subdir}'
42 ]]></command> 54 ]]></command>
43 <inputs> 55 <inputs>
44 <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> 56 <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
45 <options from_data_table="all_fasta"/> 57 <options from_data_table="all_fasta"/>
46 </param> 58 </param>
47 <param name="name" 59 <param name="name" type="text" value="" label="Informative name for sequence index"
48 type="text" 60 help="By using different settings, you may have several indices per reference genome. Give an appropriate description to the index to distinguish between indices"/>
49 value=""
50 label="Informative name for sequence index"
51 help="By using different settings, you may have several indices per reference genome. Give an appropriate description to the index to distinguish between indices"/>
52 <conditional name="GTFconditional"> 61 <conditional name="GTFconditional">
53 <param name="GTFselect" type="select" label="Reference genome with or without an annotation" help="Must the index have been created WITH a GTF file (if not you can specify one afterward)."> 62 <param name="GTFselect" type="select" label="Reference genome with or without an annotation" help="Must the index have been created WITH a GTF file (if not you can specify one afterward).">
54 <option value="withoutGTF">use genome reference without builtin gene-model</option> 63 <option value="withoutGTF">use genome reference without builtin gene-model</option>
55 <option value="withGTF">use genome reference with builtin gene-model</option> 64 <option value="withGTF">use genome reference with builtin gene-model</option>
56 </param> 65 </param>
57 <when value="withGTF"> 66 <when value="withGTF">
58 <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/> 67 <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/>
59 <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/> 68 <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>
60 </when> 69 </when>
61 <when value="withoutGTF" /> 70 <when value="withoutGTF" />
71 </conditional>
72 <conditional name="advanced_options">
73 <param name="advanced_options_selector" type="select" label="Advanced options">
74 <option value="default" selected="true">Use default options</option>
75 <option value="advanced">Set advanced options</option>
76 </param>
77 <when value="default" />
78 <when value="advanced">
79 <param argument="--genomeSAindexNbases" type="integer" min="1" value="14"
80 label="Length (bases) of the SA pre-indexing string"
81 help="Typically between 10 and 15. Longer strings will use much more memory, but allow
82 faster searches. For small genomes, the parameter –genomeSAindexNbases must be scaled
83 down to min(14, log2(GenomeLength)/2 - 1). For example, for 1 megaBase genome, this is
84 equal to 9, for 100 kiloBase genome, this is equal to 7."/>
85 <param argument="--genomeChrBinNbits" type="integer" min="1" value="18"
86 label="Log2(chrBin), where chrBin is the size of the bins for genome storage"
87 help="Each chromosome will occupy an integer number of bins. For a genome with large number
88 of contigs, it is recommended to scale this parameter as min(18,
89 log2[max(GenomeLength/NumberOfReferences,ReadLength)]). For example, for 3 gigaBase
90 genome with 100,000 chromosomes/scaffolds, this is equal to 15."/>
91 <param argument="--genomeSAsparseD" type="integer" min="1" value="1" label="Suffix array sparsity"
92 help="The distance between indices: use bigger numbers to decrease needed RAM at the cost of
93 mapping speed reduction"/>
94 </when>
62 </conditional> 95 </conditional>
63 </inputs> 96 </inputs>
64 97
65 <outputs> 98 <outputs>
66 <data name="out_file" format="data_manager_json"/> 99 <data name="out_file" format="data_manager_json"/>
77 <output name="out_file" file="test_star_01.data_manager_json"/> 110 <output name="out_file" file="test_star_01.data_manager_json"/>
78 </test> 111 </test>
79 </tests> 112 </tests>
80 --> 113 -->
81 114
82 <help> 115 <help><![CDATA[
83
84 .. class:: infomark 116 .. class:: infomark
85 117
86 <![CDATA[
87 *What it does* 118 *What it does*
88 119
89 This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner. 120 This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner.
90 121
91 Please read the fine manual - that and the google group are the places to learn about the options above. 122 Please read the fine manual - that and the google group are the places to learn about the options above.
93 *Memory requirements* 124 *Memory requirements*
94 125
95 To run efficiently, RNA-STAR requires enough free memory to 126 To run efficiently, RNA-STAR requires enough free memory to
96 hold the SA-indexed reference genome in RAM. For Human Genome hg19 this 127 hold the SA-indexed reference genome in RAM. For Human Genome hg19 this
97 index is about 27GB and running RNA-STAR requires approximately ~30GB of RAM. 128 index is about 27GB and running RNA-STAR requires approximately ~30GB of RAM.
98 For custom genomes, the rule of thub is to multiply the size of the 129 For custom genomes, the rule of thumb is to multiply the size of the
99 reference FASTA file by 9 to estimated required amount of RAM. 130 reference FASTA file by 9 to estimated required amount of RAM.
100 131
101 *Note on sjdbOverhang* 132 *Note on sjdbOverhang*
102 133
103 From https://groups.google.com/forum/#!topic/rna-star/h9oh10UlvhI:: 134 From https://groups.google.com/forum/#!topic/rna-star/h9oh10UlvhI::
124 3. 'transcript_id' attribute that assigns each exon to a transcript (--this name can be changed with --sjdbGTFtagExonParentTranscript) 155 3. 'transcript_id' attribute that assigns each exon to a transcript (--this name can be changed with --sjdbGTFtagExonParentTranscript)
125 156
126 Cheers 157 Cheers
127 Alex 158 Alex
128 159
129 **Notice:** If you leave name, description, or id blank, it will be generated automatically. 160 **Notice:** If you leave name, description, or id blank, it will be generated automatically.
130 ]]> 161 ]]></help>
131 </help>
132 <expand macro="citations" /> 162 <expand macro="citations" />
133 </tool> 163 </tool>