changeset 5:226949352e31 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit a1c079107b72dc08612fa664897bb9d627624e52-dirty
author jjohnson
date Wed, 27 Nov 2019 12:11:14 -0500
parents aaeb63501369
children
files cat_add_names.xml cat_bins.xml cat_contigs.xml cat_prepare.xml cat_summarise.xml macros.xml
diffstat 6 files changed, 80 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/cat_add_names.xml	Wed Nov 27 10:27:23 2019 -0500
+++ b/cat_add_names.xml	Wed Nov 27 12:11:14 2019 -0500
@@ -13,7 +13,7 @@
     && @TXT2TSV@ -i output_names.txt -o $output
     ]]></command>
     <inputs>
-        <param name="input" type="data" format="tabular,txt" label="classification.txt or ORF2LCA.txt"/>
+        <param argument="--input" type="data" format="tabular,txt" label="classification.txt or ORF2LCA.txt"/>
         <expand macro="cat_db" />
         <expand macro="add_names_options" />
     </inputs>
@@ -33,7 +33,18 @@
     </tests>
     <help><![CDATA[
 **CAT/BAT add_names** 
-Add names for the NCBI taxomy IDs.
+
+Add taxonomic names to CAT or BAT output files.
+
+Required arguments:
+  -i, --input_file       Path to input file. Can be either classification
+                         output file or ORF2LCA output file.
+  -t, --taxonomy_folder  Path to folder that contains taxonomy files.
+
+Optional arguments:
+  --only_official        Only output official level names.
+  --exclude_scores       Do not include bit-score support scores in the
+                         lineage.
 
 @COMMON_HELP@
     ]]></help>
--- a/cat_bins.xml	Wed Nov 27 10:27:23 2019 -0500
+++ b/cat_bins.xml	Wed Nov 27 12:11:14 2019 -0500
@@ -107,9 +107,12 @@
 
     </tests>
     <help><![CDATA[
-**CAT bin or bins**
+**CAT bin** or **CAT bins**
+
 Classify metagenomics assembled genomes. 
 
+@OPTIONS_HELP@
+
 @COMMON_HELP@
     ]]></help>
     <expand macro="citations" />
--- a/cat_contigs.xml	Wed Nov 27 10:27:23 2019 -0500
+++ b/cat_contigs.xml	Wed Nov 27 12:11:14 2019 -0500
@@ -66,8 +66,11 @@
     </tests>
     <help><![CDATA[
 **CAT contigs**
+
 Classifiy metagenomics contigs.
 
+@OPTIONS_HELP@
+
 @COMMON_HELP@
     ]]></help>
     <expand macro="citations" />
--- a/cat_prepare.xml	Wed Nov 27 10:27:23 2019 -0500
+++ b/cat_prepare.xml	Wed Nov 27 12:11:14 2019 -0500
@@ -21,6 +21,7 @@
     </outputs>
     <help><![CDATA[
 **CAT prepare**
+
 Prepare CAT reference data for classifying metagomic contigs or genome assemblies.
 
 This requires over a 100GB of RAM, 250GB of disk space, and up to 24 hours.
--- a/cat_summarise.xml	Wed Nov 27 10:27:23 2019 -0500
+++ b/cat_summarise.xml	Wed Nov 27 12:11:14 2019 -0500
@@ -11,9 +11,9 @@
     && @TXT2TSV@ -i output_names_summary.txt -o $output
     ]]></command>
     <inputs>
-        <param name="input" type="data" format="tabular" label="classification.official_names.txt"
+        <param argument="--input" type="data" format="tabular" label="classification.official_names.txt"
             help="The classication must be made with only_official names"/>
-        <param name="contigs_fasta" type="data" format="fasta" optional="true" label="contigs.fasta" 
+        <param argument="--contigs_fasta" type="data" format="fasta" optional="true" label="contigs.fasta" 
             help="Required if a contig2classification.names"/>
     </inputs>
     <outputs>
@@ -32,7 +32,19 @@
     </tests>
     <help><![CDATA[
 **CAT summarise** 
-Produce a summary report of assignments to the ofifcial taxonomic names. 
+
+Summarise taxonomic asignemts from anamed CAT or BAT classification file. 
+
+Required arguments:
+  -i, --input_file     Path to named CAT contig classification file or BAT
+                       bin classification file. Currently only official ranks
+                       are supported, and only classification files
+                       containing a single classification per contig / bin.
+
+Optional arguments:
+  -c, --contigs_fasta
+                        Path to contigs fasta file. This is required if you
+                        want to summarise a contig classification file.
 
 @COMMON_HELP@
     ]]></help>
--- a/macros.xml	Wed Nov 27 10:27:23 2019 -0500
+++ b/macros.xml	Wed Nov 27 12:11:14 2019 -0500
@@ -284,6 +284,50 @@
             </actions>
         </data>
     </xml>
+    <token name="@OPTIONS_HELP@"><![CDATA[
+
+Optional arguments:
+  -r, --range               cut-off range after alignment [0-49] (default: 10).
+  -f, --fraction            fraction of bit-score support for each classification
+                            [0-0.99] (default: 0.5).
+  -p, --proteins_fasta
+                            Path to predicted proteins fasta file. If supplied,
+                            CAT will skip the protein prediction step.
+  -a, --diamond_alignment
+                            Path to DIAMOND alignment table. If supplied, CAT will
+                            skip the DIAMOND alignment step and directly classify
+                            the sequences. A predicted proteins fasta file should
+                            also be supplied with argument [-p / --proteins].
+
+
+DIAMOND specific optional arguments:
+  --sensitive     Run DIAMOND in sensitive mode (default: not enabled).
+
+  --block_size    DIAMOND block-size parameter (default: 2.0). Lower
+                  numbers will decrease memory and temporary disk space
+                  usage.
+
+  --index_chunks
+                  DIAMOND index-chunks parameter (default: 4). Set to 1
+                  on high memory machines. The parameter has no effect
+                  on temporary disk space usage.
+
+  --top
+                  DIAMOND top parameter [0-50] (default: 50). Governs
+                  hits within range of best hit that are written to the
+                  alignment file. This is not the [-r / --range]
+                  parameter!
+
+
+Setting the DIAMOND --top parameter
+
+You can speed up DIAMOND considerably, and at the same time greatly reduce disk usage, by setting the DIAMOND --top parameter to lower values. This will govern hits within range of the best hit that are written to the alignment file.
+
+You have to be very carefull to 1) not confuse this parameter with the r / --range parameter, which does a similar cut-off but after alignment and 2) be aware that if you want to run CAT or BAT again afterwards with different values of the -r / --range parameter, your options will be limited to the range you have chosen with --top earlier, because all hits that fall outside this range will not be included in the alignment file. Importantly, CAT and BAT currently do not warn you if you choose -r / --range in a second run higher than --top in a previous one, so it's up to you to remember this!
+
+If you have understood all this, or you do not plan to tune -r / --range at all afterwards, you can enjoy a huge speedup with much smaller alignment files! For CAT you can for example set --top 11 and for BAT --top 6.
+
+]]></token>
     <token name="@COMMON_HELP@"><![CDATA[
 The CAT/BAT workflow is described at: https://github.com/dutilh/CAT    
 ]]></token>