Mercurial > repos > jjohnson > contig_annotation_tool
changeset 5:226949352e31 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit a1c079107b72dc08612fa664897bb9d627624e52-dirty
author | jjohnson |
---|---|
date | Wed, 27 Nov 2019 12:11:14 -0500 |
parents | aaeb63501369 |
children | |
files | cat_add_names.xml cat_bins.xml cat_contigs.xml cat_prepare.xml cat_summarise.xml macros.xml |
diffstat | 6 files changed, 80 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/cat_add_names.xml Wed Nov 27 10:27:23 2019 -0500 +++ b/cat_add_names.xml Wed Nov 27 12:11:14 2019 -0500 @@ -13,7 +13,7 @@ && @TXT2TSV@ -i output_names.txt -o $output ]]></command> <inputs> - <param name="input" type="data" format="tabular,txt" label="classification.txt or ORF2LCA.txt"/> + <param argument="--input" type="data" format="tabular,txt" label="classification.txt or ORF2LCA.txt"/> <expand macro="cat_db" /> <expand macro="add_names_options" /> </inputs> @@ -33,7 +33,18 @@ </tests> <help><![CDATA[ **CAT/BAT add_names** -Add names for the NCBI taxomy IDs. + +Add taxonomic names to CAT or BAT output files. + +Required arguments: + -i, --input_file Path to input file. Can be either classification + output file or ORF2LCA output file. + -t, --taxonomy_folder Path to folder that contains taxonomy files. + +Optional arguments: + --only_official Only output official level names. + --exclude_scores Do not include bit-score support scores in the + lineage. @COMMON_HELP@ ]]></help>
--- a/cat_bins.xml Wed Nov 27 10:27:23 2019 -0500 +++ b/cat_bins.xml Wed Nov 27 12:11:14 2019 -0500 @@ -107,9 +107,12 @@ </tests> <help><![CDATA[ -**CAT bin or bins** +**CAT bin** or **CAT bins** + Classify metagenomics assembled genomes. +@OPTIONS_HELP@ + @COMMON_HELP@ ]]></help> <expand macro="citations" />
--- a/cat_contigs.xml Wed Nov 27 10:27:23 2019 -0500 +++ b/cat_contigs.xml Wed Nov 27 12:11:14 2019 -0500 @@ -66,8 +66,11 @@ </tests> <help><![CDATA[ **CAT contigs** + Classifiy metagenomics contigs. +@OPTIONS_HELP@ + @COMMON_HELP@ ]]></help> <expand macro="citations" />
--- a/cat_prepare.xml Wed Nov 27 10:27:23 2019 -0500 +++ b/cat_prepare.xml Wed Nov 27 12:11:14 2019 -0500 @@ -21,6 +21,7 @@ </outputs> <help><![CDATA[ **CAT prepare** + Prepare CAT reference data for classifying metagomic contigs or genome assemblies. This requires over a 100GB of RAM, 250GB of disk space, and up to 24 hours.
--- a/cat_summarise.xml Wed Nov 27 10:27:23 2019 -0500 +++ b/cat_summarise.xml Wed Nov 27 12:11:14 2019 -0500 @@ -11,9 +11,9 @@ && @TXT2TSV@ -i output_names_summary.txt -o $output ]]></command> <inputs> - <param name="input" type="data" format="tabular" label="classification.official_names.txt" + <param argument="--input" type="data" format="tabular" label="classification.official_names.txt" help="The classication must be made with only_official names"/> - <param name="contigs_fasta" type="data" format="fasta" optional="true" label="contigs.fasta" + <param argument="--contigs_fasta" type="data" format="fasta" optional="true" label="contigs.fasta" help="Required if a contig2classification.names"/> </inputs> <outputs> @@ -32,7 +32,19 @@ </tests> <help><![CDATA[ **CAT summarise** -Produce a summary report of assignments to the ofifcial taxonomic names. + +Summarise taxonomic asignemts from anamed CAT or BAT classification file. + +Required arguments: + -i, --input_file Path to named CAT contig classification file or BAT + bin classification file. Currently only official ranks + are supported, and only classification files + containing a single classification per contig / bin. + +Optional arguments: + -c, --contigs_fasta + Path to contigs fasta file. This is required if you + want to summarise a contig classification file. @COMMON_HELP@ ]]></help>
--- a/macros.xml Wed Nov 27 10:27:23 2019 -0500 +++ b/macros.xml Wed Nov 27 12:11:14 2019 -0500 @@ -284,6 +284,50 @@ </actions> </data> </xml> + <token name="@OPTIONS_HELP@"><![CDATA[ + +Optional arguments: + -r, --range cut-off range after alignment [0-49] (default: 10). + -f, --fraction fraction of bit-score support for each classification + [0-0.99] (default: 0.5). + -p, --proteins_fasta + Path to predicted proteins fasta file. If supplied, + CAT will skip the protein prediction step. + -a, --diamond_alignment + Path to DIAMOND alignment table. If supplied, CAT will + skip the DIAMOND alignment step and directly classify + the sequences. A predicted proteins fasta file should + also be supplied with argument [-p / --proteins]. + + +DIAMOND specific optional arguments: + --sensitive Run DIAMOND in sensitive mode (default: not enabled). + + --block_size DIAMOND block-size parameter (default: 2.0). Lower + numbers will decrease memory and temporary disk space + usage. + + --index_chunks + DIAMOND index-chunks parameter (default: 4). Set to 1 + on high memory machines. The parameter has no effect + on temporary disk space usage. + + --top + DIAMOND top parameter [0-50] (default: 50). Governs + hits within range of best hit that are written to the + alignment file. This is not the [-r / --range] + parameter! + + +Setting the DIAMOND --top parameter + +You can speed up DIAMOND considerably, and at the same time greatly reduce disk usage, by setting the DIAMOND --top parameter to lower values. This will govern hits within range of the best hit that are written to the alignment file. + +You have to be very carefull to 1) not confuse this parameter with the r / --range parameter, which does a similar cut-off but after alignment and 2) be aware that if you want to run CAT or BAT again afterwards with different values of the -r / --range parameter, your options will be limited to the range you have chosen with --top earlier, because all hits that fall outside this range will not be included in the alignment file. Importantly, CAT and BAT currently do not warn you if you choose -r / --range in a second run higher than --top in a previous one, so it's up to you to remember this! + +If you have understood all this, or you do not plan to tune -r / --range at all afterwards, you can enjoy a huge speedup with much smaller alignment files! For CAT you can for example set --top 11 and for BAT --top 6. + +]]></token> <token name="@COMMON_HELP@"><![CDATA[ The CAT/BAT workflow is described at: https://github.com/dutilh/CAT ]]></token>