changeset 31:abc79259f5f0 draft

Uploaded
author p.lucas
date Tue, 28 May 2024 14:31:59 +0000
parents 8c668d029008
children ea84e8cc3cfe
files diamond_makedb.xml
diffstat 1 files changed, 90 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/diamond_makedb.xml	Tue May 28 14:31:59 2024 +0000
@@ -0,0 +1,90 @@
+<tool id="pl_diamond_makedb" name="PL_Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
+    <description>Build database from a FASTA file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+
+    <command detect_errors="aggressive">
+    <!-- DB has two files, *.dmnd and *.tx -->
+    <![CDATA[
+    /usr/bin/diamond makedb
+        --threads "\${GALAXY_SLOTS:-12}"
+        --in '$infile'
+        --db ./database
+
+      #if str($tax_cond.tax_select) == 'yes':
+        --taxonmap '$tax_cond.taxonmap'
+        --taxonnodes '$tax_cond.taxonnodes'
+        --taxonnames '$tax_cond.taxonnames'
+      #end if
+    ]]>
+    </command>
+
+    <inputs>
+      <param name="infile" type="data" format="fasta" label="Input reference file in FASTA format" />
+      <conditional name="tax_cond">
+        <param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner">
+          <option value="yes">Yes</option>
+          <option value="no" selected="true">No</option>
+        </param>
+        <when value="yes">
+          <param argument="--taxonmap" type="data" format="tabular" 
+            label="Protein accession to taxid mapping file" 
+            help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. 
+              A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored" />
+          <param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
+          <param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
+        </when>
+        <when value="no"/>
+      </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="dmnd" name="outfile" from_work_dir="database.dmnd" label="${tool.name} on ${on_string}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="infile" value="db.fasta" ftype="fasta"/>
+            <output name="outfile" value="db.dmnd" compare="sim_size" delta="2"/>
+        </test>
+        <test>
+            <param name="infile" value="db.fasta" ftype="fasta"/>
+            <conditional name="tax_cond">
+                <param name="tax_select" value="yes"/>
+                <param name="taxonmap" ftype="tabular" value="prot.accession2taxid" />
+                <param name="taxonnodes" ftype="tabular" value="nodes.dmp" />
+                <param name="taxonnames" ftype="tabular" value="names.dmp" />
+            </conditional>
+            <output name="outfile" value="db-wtax.dmnd" compare="sim_size" delta="2"/>
+        </test>
+    </tests>
+
+    <help>
+<![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+DIAMOND_ is a new alignment tool for aligning short DNA sequencing reads to a protein reference database such as NCBI-NR.
+On Illumina reads of length 100-150bp, in fast mode, DIAMOND is about 20,000 times faster than BLASTX, while reporting
+about 80-90% of all matches that BLASTX finds, with an e-value of at most 1e-5. In sensitive mode, DIAMOND is about 2,500
+times faster than BLASTX, finding more than 94% of all matches.
+
+.. _DIAMOND: http://ab.inf.uni-tuebingen.de/software/diamond/
+
+
+- taxonmap: Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. The file can be downloaded from NCBI: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz
+
+- taxonnames: Path to the names.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
+
+- taxonnodes: Path to the nodes.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
+]]>
+    </help>
+
+    <expand macro="citations" />
+</tool>