Mercurial > repos > iuc > cami_amber
view cami_amber.xml @ 5:e30bc6da7a36 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/amber/ commit 04a067550f7d61ca8ff489e3de64efd0da3abcf1
author | iuc |
---|---|
date | Sun, 08 Sep 2024 14:37:28 +0000 |
parents | 6c71acde9d52 |
children |
line wrap: on
line source
<tool id="cami_amber" name="CAMI AMBER" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Evaluation package for MAGs</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <command detect_errors="exit_code"> <![CDATA[ #set $path_to_html = $html.files_path #set $max = [] #set $min = [] #set $label = [] mkdir -p output inputs '$path_to_html' && #if $tox.ncbi.is_select == 'yes': #if $tox.input.is_select == 'manually': mkdir -p ncbi && #for $file in $tox.ncbi_dir: ln -s '$file' './ncbi/$file.element_identifier' && #end for #end if #end if #for $i, $file in enumerate($input_files): ln -s '$file.binning_files' './inputs/${i}.tsv' && #if $file.labels: $label.append($file.labels) #end if #end for #if $genome.thresholds: #for $i, $arg in enumerate($genome.thresholds): #if $arg.min_completeness: $min.append($arg.min_completeness) #end if #if $arg.max_contamination: $max.append($arg.max_contamination) #end if #end for #end if amber.py -g '${gold_standard_file}' #if $label: #set $sep = '' -l ' #for $lab in $label: $sep$lab #set $sep = ',' #end for ' #end if -p ${filter} #if $min_length: -n $min_length #end if #if $desc: -d '${desc}' #end if #if $min: #set $sep = '' --min_completeness ' #for $i in $min: $sep$i #set $sep = ',' #end for ' #end if #if $max: #set $sep = '' --max_contamination ' #for $i in $max: $sep$i #set$sep = ',' #end for ' #end if #if $genome.remove_genomes: -r '$genome.remove_genomes' #end if #if $genome.remove.is_select == 'yes': -k '$genome.keyword' #end if #if $genome.genome_coverage: --genome_coverage '$genome.genome_coverage' #end if #if $tox.ncbi.is_select == 'yes': #if $tox.input.is_select == 'manually': --ncbi_dir ncbi #else: --ncbi_dir '$tox.ncbi_dir.fields.path' #end if #end if -o output #for $i, $bin in enumerate($input_files): 'inputs/${i}.tsv' #end for && mv 'output/heatmap_bar.png' '$path_to_html' ]]> </command> <inputs> <param argument="--gold_standard_file" format="tabular" type="data" label="Mapping of contigs or reads" help="Input the gold standard file here so amber know the correct IDs for each contig/read" /> <repeat name="input_files" title="Binning files and names " help="Enter multiple binning files and names (names are optional). IMPORTANT: for each binning file you use in the program you need to state one label, this mean for example for 3 binning files you need 3 labels (3 slots) not more or less!"> <param name="binning_files" format="tabular" type="data" label="Input bin files here" /> <param argument="--labels" type="text" value="" optional="true" label="Name for bin" /> </repeat> <param argument="--filter" type="integer" value="0" min="0" label="Filter out the n smallest genome bins" help="Optional filter for filter out the n smallest genome bins" /> <param argument="--min_length" type="integer" value="" optional="true" label="Minimum length of sequences" help="Input how long the sequences has to be" /> <param argument="--desc" type="text" value="" label="HTML description" help="Enter the HTML page description here" /> <section name="genome" title="Genome binning-specific options" > <repeat name="thresholds" title="Min. completeness and max. contamination thresholds" help="Enter certain thresholds for min. completeness (Default %: 50,70,90) and certain thresholds for max. contamination (Default %: 5, 10), the program itself will transform it to %!" > <param argument="--min_completeness" type="integer" value="" min="0" max="100" optional="true" label="Min. completeness threshold" /> <param argument="--max_contamination" type="integer" value="" min="0" max="100" optional="true" label="Max. contamination threshold" /> </repeat> <param argument="--remove_genomes" type="data" format="tabular" optional="true" label="tsv file for genomes to remove" help="Input a tsv file with binid and type in each line. In the help section is an example. WARNING: IF THE LIST CONTAIN ALL GENOME THE PROGRAM WILL FAIL!" /> <conditional name="remove"> <param name="is_select" type="select" label="Remove one or all genomes which are in the list?" help="Select yes and enter a keyword to remove certain type of genomes which are in the list. When all genomes in the list should remove also select yes and do not enter a keyword!"> <option value="yes" selected="false">Yes</option> <option value="no" selected="true">No</option> </param> <when value="yes"> <param argument="--keyword" type="text" value="" label="Keyword for removing certain genomes" help="Input a keyword which should be match with binid giving in the file for removing genomes. When no keyword is giving the program remove all genomes which are in the list!" /> </when> <when value="no" /> </conditional> <param argument="--genome_coverage" type="data" format="tabular" optional="true" label="Genome coverages tsv file" help="Input a tsv file where the genome coverage is stated. Look at the help section to see how this file should look like!" /> </section> <section name="tox" title="taxonomic binning-specific option"> <conditional name="ncbi"> <param name="is_select" type="select" label="Want to use the NCBI database?" help="Select yes if you want to use the NCBI database to evaluate taxonomic binning"> <option value="yes" selected="false">Yes</option> <option value="no" selected="true">No</option> </param> <when value="yes"> <conditional name="input"> <param name="is_select" type="select" label="Select how you want to use NCBI database" help="Either select manually input or data manager. For more help read the help of the type of the input you selected." > <option value="manually">Manually</option> <option value="data">Data manager</option> </param> <when value="manually"> <param argument="--ncbi_dir" format="tabular" type="data" multiple="true" label="Input .dmp files" help="To use the NCBI database we need to provide followed .dmp files: nodes.dmp, names.dmp and merged.dmp. You can get the via download of this file **ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz** and unzip it!" /> </when> <when value="data" > <param argument="--ncbi_dir" type="select" label="Assessing taxonomic binning" help="Include the NCBI taxonomy database. For this you can use the data manager data_manager_fetch_ncbi_taxonomy which can be install via galaxy. For more help look at the help section at the bottom!" > <options from_data_table="ncbi_taxonomy"> <validator message="No NCBI database is available" type="no_options"/> </options> </param> </when> </conditional> </when> <when value="no"/> </conditional> </section> </inputs> <outputs> <data name="html" format="html" from_work_dir="output/index.html" label="${tool.name}: HTML" /> <data name="result" format="tabular" from_work_dir="output/results.tsv" label="${tool.name}: Results" /> <data name="metrics_genome" format="tabular" from_work_dir="output/genome_metrics_cami1.tsv" label="${tool.name}: Genome metrics" /> <data name="metrics_bin" format="tabular" from_work_dir="output/bin_metrics.tsv" label="${tool.name}: Bin metrics" /> </outputs> <tests> <test expect_num_outputs="4"> <param name="gold_standard_file" value="gsa_mapping.binning" ftype="tabular" /> <repeat name="input_files"> <param name="binning_files" value="elated_franklin_0" ftype="tabular"/> </repeat> <repeat name="input_files"> <param name="binning_files" value="goofy_hypatia_2" ftype="tabular"/> </repeat> <repeat name="input_files"> <param name="binning_files" value="naughty_carson_2" ftype="tabular"/> </repeat> <output name="metrics_bin"> <assert_contents> <has_text text="genome_id" n="1" /> <has_text text="sample_id" n="1" /> </assert_contents> </output> <output name="metrics_genome"> <assert_contents> <has_text text="precision_seq" n="1" /> <has_text text="total_length" n="1" /> </assert_contents> </output> </test> <test expect_num_outputs="4"> <param name="gold_standard_file" value="gsa_mapping.binning" ftype="tabular" /> <repeat name="input_files"> <param name="binning_files" value="elated_franklin_0" ftype="tabular"/> <param name="labels" value="test1" /> </repeat> <repeat name="input_files"> <param name="binning_files" value="goofy_hypatia_2" ftype="tabular"/> <param name="labels" value="test2" /> </repeat> <repeat name="input_files"> <param name="binning_files" value="naughty_carson_2" ftype="tabular"/> <param name="labels" value="test3" /> </repeat> <param name="filter" value="1" /> <param name="min_length" value="200" /> <section name="genome" > <repeat name="thresholds" > <param name="max_contamination" value="2" /> <param name="min_completeness" value="50" /> </repeat> <repeat name="thresholds" > <param name="min_completeness" value="70" /> </repeat> <repeat name="thresholds" > <param name="min_completeness" value="90" /> </repeat> <param name="genome_coverage" value="cami2_mouse_gut_average_genome_coverage.tsv" ftype="tabular" /> <param name="remove_genomes" value="unique_common.tsv" ftype="tabular" /> <conditional name="remove" > <param name="is_select" value="yes" /> <param name="keyword" value="circular element" /> </conditional> </section> <section name="tox"> <conditional name="ncbi"> <param name="is_select" value="yes" /> <conditional name="input" > <param name="is_select" value="manually" /> <param name="ncbi_dir" value="test-db/nodes.dmp,test-db/merged.dmp,test-db/names.dmp" ftype="tabular" /> </conditional> </conditional> </section> <output name="result"> <assert_contents> <has_text text="test1" n="1" /> <has_text text="test2" n="1" /> <has_text text="test3" n="1" /> </assert_contents> </output> </test> <test expect_num_outputs="4"> <param name="gold_standard_file" value="gsa_mapping.binning" ftype="tabular" /> <repeat name="input_files"> <param name="binning_files" value="elated_franklin_0" ftype="tabular"/> </repeat> <repeat name="input_files"> <param name="binning_files" value="goofy_hypatia_2" ftype="tabular"/> </repeat> <repeat name="input_files"> <param name="binning_files" value="naughty_carson_2" ftype="tabular"/> </repeat> <param name="desc" value="TEST FOR GALAXY" /> <section name="tox"> <conditional name="ncbi"> <param name="is_select" value="yes" /> <conditional name="input" > <param name="is_select" value="data" /> <param name="ncbi_dir" value="test-db-tox" /> </conditional> </conditional> </section> <output name="html"> <assert_contents> <has_text text="TEST FOR GALAXY" n="1" /> </assert_contents> </output> </test><test expect_num_outputs="4"> <param name="gold_standard_file" value="test_gold.tsv" ftype="tabular" /> <repeat name="input_files"> <param name="binning_files" value="test_binning.tsv" ftype="tabular"/> </repeat> <repeat name="input_files"> <param name="binning_files" value="test_binning2.tsv" ftype="tabular"/> </repeat> <section name="tox"> <conditional name="ncbi"> <param name="is_select" value="yes" /> <conditional name="input" > <param name="is_select" value="data" /> <param name="ncbi_dir" value="test-db-tox" /> </conditional> </conditional> </section> <output name="metrics_bin"> <assert_contents> <has_text text="genome_id" n="1" /> <has_text text="sample_id" n="1" /> </assert_contents> </output> <output name="metrics_genome"> <assert_contents> <has_text text="precision_seq" n="1" /> <has_text text="total_length" n="1" /> </assert_contents> </output> </test> </tests> <help> <![CDATA[ .. class:: infomark **What is AMBER** AMBER is an evaluation package for the comparative assessment of genome reconstructions and taxonomic assignments from metagenome benchmark datasets. .. class:: infomark **What it does** AMBER calculate multiple metrics per bin and multiple metrics per sample. Each of them are provided then in results rankings, and comparative visualizations for assessing multiple programs or parameter effects. For more information please visit `AMBER in GitHub <https://github.com/CAMI-challenge/AMBER>`_. **Input** AMBER use only 2 required inputs: - The golden standard file (biobox format) This file can be created via the add_length tool .. class:: infomark Example(tab separated) :: @Version:0.9.1 @SampleID:CAMI_low @@SEQUENCEID BINID _LENGTH RL|S1|C10817 Sample18_57 20518 RL|S1|C11497 Sample22_57 37672 RL|S1|C6571 evo_1286_AP.033 69914 RL|S1|C10560 evo_1286_AP.033 995657 RL|S1|C13546 evo_1286_AP.033 626775 Note: This file looks similar to the binning files but the only different which is also is important is the length column. - Multiple binning files (biobox format) Files can be created via the convert_to_biobox tool .. class:: infomark Example(tab separated): :: #CAMI Format for Binning @Version:0.9.0 @SampleID:CAMI_low @@SEQUENCEID BINID RL|S1|C10 Bin_034 RL|S1|C100 Bin_023 RL|S1|C1000 Bin_034 RL|S1|C10000 Bin_019 RL|S1|C10002 Bin_035 RL|S1|C10004 Bin_035 RL|S1|C10008 Bin_034 RL|S1|C10011 Bin_035 RL|S1|C10012 Bin_013 RL|S1|C10014 Bin_035 There are also additional inputs which can be used: - A genome list which should be removed(tabular format) .. class:: infomark Example(tab separated): :: evo_1035930.029 common strain 1035930 common strain evo_1035930.032 common strain evo_1035930.011 common strain evo_1286_AP.033 common strain 1286_AP common strain evo_1286_AP.026 common strain evo_1286_AP.037 common strain evo_1286_AP.008 common strain 1052944 common strain 1053058 common strain 1052947 common strain evo_1049056.013 common strain evo_1049056.031 common strain evo_1049056.011 common strain 1049056 common strain evo_1049056.039 common strain Note: The first column contain the BINID and the second contain any kind of string. IMPORTANT: The argument where to state a keyword has to match to the anything in the second column to filter these kind of genomes out. If there is no keyword stated it can happen if the remove list contain all genomes which should be used then AMBER will fail since there are no genomes left to use! - Genome coverage file (tabular format) .. class:: infomark Example(tab separated): :: @SampleID:gsa_pooled @@GENOMEID COVERAGE 4378740.0 82.85111527272727 4378740.1 27.159305090909097 denovo10559.0 2.1596957142857143 179927.0 1.6946866666666667 denovo8373.1 2.07144 136604.0 9.150489565217391 denovo8373.0 1.1413460000000002 269378.0 8.051563333333332 190114.0 18.253119629629627 228140.0 3.078681818181818 135956.0 121.52672015625001 259846.0 12.298210588235296 162576.0 9.57867191489362 184966.0 14.461031521739134 There is also an option to include the NCBI database in AMBER. This can be used when including the link which is stated in the data manager: **data_manager_fetch_ncbi_taxonomy**. This data manager download the current version of the database and store all files for you. If there are questions about data manager maybe have a look at this `Tutorial <https://usegalaxy.eu/training-material/topics/admin/tutorials/reference-genomes/tutorial.html>`_. **Output** AMBER will output 3 tsv files where each metrics value is stated. The important output is the HTML file where all data are included and also can be visualized with certain plots! **Additional information** The package **Bokeh** will create warnings which can be ignored since the only tell you that certain functions in the code are swapped with the new functions. AMBER was tested with the stated version of **Bokeh** and with the newest version of it and both generate the same output! ]]> </help> <expand macro="citations" /> </tool>