Mercurial > repos > iuc > data_manager_hisat2_index_builder

diff data_manager/hisat2_index_builder.xml @ 4:d210e1f185bd draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_hisat2_index_builder commit 8652f36a3a3838dca989426961561e81432acf4f
author: iuc
date: Tue, 04 Apr 2017 18:09:40 -0400
parents: 4014cb2c17db
children: 8eac26f44d29
--- a/data_manager/hisat2_index_builder.xml	Mon Nov 23 09:41:52 2015 -0500
+++ b/data_manager/hisat2_index_builder.xml	Tue Apr 04 18:09:40 2017 -0400
@@ -1,46 +1,44 @@
-<tool id="hisat2_index_builder_data_manager" name="HISAT2 index" tool_type="manage_data" version="1.0.0">
+<tool id="hisat2_index_builder_data_manager" name="HISAT2 index" tool_type="manage_data" version="2.0.5">
     <description>builder</description>
     <requirements>
-        <requirement type="package" version="2.0">hisat</requirement>
+        <requirement type="package" version="2.0.5">hisat2</requirement>
     </requirements>
-    <stdio>
-        <exit_code range=":-1" />
-        <exit_code range="1:" />
-    </stdio>
-    <command><![CDATA[
+    <command detect_errors="exit_code"><![CDATA[
         #if $advanced.adv_param_select == 'yes' and $advanced.gtf_input:
-            ln -s "${advanced.gtf_input}" gtf_file.gtf &&
-            python \$HISAT2_ROOT_DIR/bin/extract_splice_sites.py gtf_file.gtf > splice_sites.txt &&
-            python \$HISAT2_ROOT_DIR/bin/extract_exons.py gtf_file.gtf > exon.txt &&
-            ls -lh &&
+            ln -s '${advanced.gtf_input}' gtf_file.gtf &&
+            hisat2_extract_splice_sites.py gtf_file.gtf > splice_sites.txt &&
+            hisat2_extract_exons.py gtf_file.gtf > exon.txt &&
         #end if
         #if $advanced.adv_param_select == 'yes' and $advanced.snps:
-            ln -s "${all_fasta_source.fields.path}" genome.fa &&
-            ln -s "${advanced.snps}" snps.tabular &&
-            python \$HISAT2_ROOT_DIR/bin/extract_snps.py --genome_file genome.fa --snp_file snps.tabular > snps.txt &&
+            ln -s '${advanced.snps}' snps.tabular &&
+            #if $advanced.snps.is_of_type('vcf')
+                hisat2_extract_snps_haplotypes_VCF.py '${all_fasta_source.fields.path}' snps.tabular extracted &&
+            #else
+                hisat2_extract_snps_haplotypes_UCSC.py '${all_fasta_source.fields.path}' snps.tabular extracted &&
+            #end if
         #end if
-        python $__tool_directory__/hisat2_index_builder.py --output "${out_file}" 
-            --fasta_filename "${all_fasta_source.fields.path}" 
-            --fasta_dbkey "${all_fasta_source.fields.dbkey}" 
-            --fasta_description "${all_fasta_source.fields.name}" 
-            --data_table_name "hisat2_indexes"
+        python '$__tool_directory__/hisat2_index_builder.py' --output '${out_file}'
+            --fasta_filename '${all_fasta_source.fields.path}'
+            --fasta_dbkey '${all_fasta_source.fields.dbkey}'
+            --fasta_description '${all_fasta_source.fields.name}'
+            --data_table_name hisat2_indexes
+            --indexer_options "-p \${GALAXY_SLOTS:-1}
             #if $advanced.adv_param_select == 'yes':
-                --indexer_options "
                 --noauto
-                -p \${GALAXY_SLOTS:-1}
-                #if $snps:
-                    --snps `pwd`/snps.txt
+                #if $advanced.snps:
+                    --snps "`pwd`/extracted.snp"
+                    --haplotype "`pwd`/extracted.haplotype"
                 #end if
                 #if $advanced.gtf_input:
-                    --ss `pwd`/splice_sites.txt
-                    --exon `pwd`/exon.txt
+                    --ss "`pwd`/splice_sites.txt"
+                    --exon "`pwd`/exon.txt"
                 #end if
                 --bmax $advanced.bmax
                 --bmaxdivn $advanced.bmaxdivn
                 --dcv $advanced.dcv
                 --offrate $advanced.offrate
-                "
             #end if
+            "
         ]]>
     </command>
     <inputs>
@@ -52,21 +50,21 @@
                 <option value="no">Use defaults</option>
                 <option value="yes">Fine-tune indexing parameters</option>
             </param>
+            <when value="no" />
             <when value="yes">
-                <param type="integer" name="bmax" label="Maximum number of suffixes allowed in a block." help="--bmax" value="4" />
-                <param type="integer" name="bmaxdivn" label="Maximum number of suffixes allowed in a block, expressed as a fraction of the length of the reference." help="--bmaxdivn" value="4" />
-                <param type="integer" name="dcv" label="Period for the difference-cover sample." help="--dcv: A larger period yields less memory overhead, but may make suffix sorting slower, especially if repeats are present. Must be a power of 2 no greater than 4096. " value="1024" min="2" max="4096" />
-                <param type="integer" name="offrate" label="Mark rows in the Burrows-Wheeler transform" help="--offrate: To map alignments back to positions on the reference sequences, it's necessary to annotate (&quot;mark&quot;) some or all of the Burrows-Wheeler rows with their corresponding location on the genome. This parameter governs how many rows get marked: the indexer will mark every 2^&lt;int&gt; rows. Marking more rows makes reference-position lookups faster, but requires more memory to hold the annotations at runtime. The default is 4 (every 16th row is marked; for human genome, annotations occupy about 680 megabytes)." value="4" />
-                <param type="data" format="tabular" name="snps" label="Provide a list of SNPs in the UCSC dbSNP format" optional="True" help="This should be a dataset in the Data Manager History (automatically created). If you include SNPs or splice sites and exons, building an index on the human genome will consume up to 200GB RAM as index building involves a graph construction." />
-                <param type="data" format="gtf" name="gtf_input" label="Provide a GTF file for HISAT2 to extract splice sites from" optional="True" help="This should be a dataset in the Data Manager History (automatically created). If you include SNPs or splice sites and exons, building an index on the human genome will consume up to 200GB RAM as index building involves a graph construction." />
+                <param argument="--bmax" type="integer" value="4" label="Maximum number of suffixes allowed in a block" />
+                <param argument="--bmaxdivn" type="integer" value="4" label="Maximum number of suffixes allowed in a block, expressed as a fraction of the length of the reference" />
+                <param argument="--dcv" type="integer" min="2" max="4096" value="1024" label="Period for the difference-cover sample" help="A larger period yields less memory overhead, but may make suffix sorting slower, especially if repeats are present. Must be a power of 2 no greater than 4096" />
+                <param argument="--offrate" type="integer" value="4" label="Mark rows in the Burrows-Wheeler transform" help="To map alignments back to positions on the reference sequences, it's necessary to annotate (&quot;mark&quot;) some or all of the Burrows-Wheeler rows with their corresponding location on the genome. This parameter governs how many rows get marked: the indexer will mark every 2^&lt;int&gt; rows. Marking more rows makes reference-position lookups faster, but requires more memory to hold the annotations at runtime. The default is 4 (every 16th row is marked; for human genome, annotations occupy about 680 megabytes)" />
+                <param name="snps" type="data" format="tabular,vcf" optional="true" label="Provide a list of SNPs in the UCSC dbSNP or VCF format" help="If you include SNPs or splice sites and exons, building an index on the human genome will consume up to 200GB RAM as index building involves a graph construction" />
+                <param name="gtf_input" type="data" format="gtf" optional="true" label="Provide a GTF file for HISAT2 to extract splice sites from" help="If you include SNPs or splice sites and exons, building an index on the human genome will consume up to 200GB RAM as index building involves a graph construction" />
             </when>
-            <when value="no" />
         </conditional>
-        <param label="Name of sequence" name="sequence_name" type="text" value="" />
-        <param label="ID for sequence" name="sequence_id" type="text" value="" />
+        <param name="sequence_name" type="text" value="" label="Name of sequence" />
+        <param name="sequence_id" type="text" value="" label="ID for sequence" />
     </inputs>
     <outputs>
-        <data format="data_manager_json" name="out_file" />
+        <data name="out_file" format="data_manager_json" />
     </outputs>
     <help>
 <![CDATA[
author	iuc
date	Tue, 04 Apr 2017 18:09:40 -0400
parents	4014cb2c17db
children	8eac26f44d29