diff tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml @ 11:4c4a0da938ff draft

Uploaded v0.0.22, now wraps BLAST+ 2.2.28 allowing extended tabular output to include the hit descriptions as column 25. Supports $GALAXY_SLOTS. Includes more tests and heavy use of macros.
author peterjc
date Thu, 05 Dec 2013 06:55:59 -0500
parents
children 623f727cdff1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml	Thu Dec 05 06:55:59 2013 -0500
@@ -0,0 +1,101 @@
+<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.0.22">
+    <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo -->
+    <description>masks low complexity regions</description>
+    <macros>
+        <token name="@BINARY@">dustmasker</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+dustmasker
+#if $db_opts.db_opts_selector == "db":
+  -in "${db_opts.database.fields.path}" -infmt blastdb
+#elif $db_opts.db_opts_selector == "histdb":
+  -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb
+#else:
+  -in "$subject" -infmt fasta
+#end if
+-out "$outfile"
+-window $window -level $level -linker $linker -outfmt $outformat
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <expand macro="input_conditional_nucleotide_db" />
+        <param name="window" type="integer" value="64" label="DUST window length" />
+        <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" />
+        <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" />
+        <param name="outformat" type="select" label="Output format">
+<!-- acclist and maskinfo_xml are listed as possible output formats in
+     "dustmasker -help", but were not recognized by NCBI BLAST up to
+     release 2.2.27+. Fixed in BLAST 2.2.28+.
+     seqloc_* formats are not very useful -->
+<!--            <option value="acclist">acclist</option>-->
+            <option value="fasta">FASTA</option>
+            <option value="interval" selected="true">interval</option>
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option>
+<!--            <option value="maskinfo_xml">maskinfo_xml</option>
+            <option value="seqloc_asn1_bin">seqloc_asn1_bin</option>
+            <option value="seqloc_asn1_text">seqloc_asn1_text</option>
+            <option value="seqloc_xml">seqloc_xml</option>-->
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="interval" label="DUST Masked File">
+            <change_format>
+                <when input="outformat" value="fasta" format="fasta" />
+                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="window" value="64" />
+            <param name="level" value="20" />
+            <param name="linker" value="1" />
+            <param name="outformat" value="fasta" />
+            <output name="outfile" file="dustmasker_three_human.fasta" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="window" value="64" />
+            <param name="level" value="20" />
+            <param name="linker" value="1" />
+            <param name="outformat" value="maskinfo_asn1_bin" />
+            <output name="outfile" file="dustmasker_three_human.maskinfo-asn1-binary" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="window" value="64" />
+            <param name="level" value="20" />
+            <param name="linker" value="1" />
+            <param name="outformat" value="maskinfo_asn1_text" />
+            <output name="outfile" file="dustmasker_three_human.maskinfo-asn1" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm.
+
+If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
+
+More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+    </help>
+</tool>