Mercurial > repos > thanhlv > customize_metaphlan_database
diff customize_metaphlan_database.xml @ 0:c0473c69ac9f draft
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
author | thanhlv |
---|---|
date | Mon, 13 Feb 2023 11:36:16 +0000 |
parents | |
children | b6e5df1237f2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/customize_metaphlan_database.xml Mon Feb 13 11:36:16 2023 +0000 @@ -0,0 +1,281 @@ +<tool id="customize_metaphlan_database" name="Customize the marker sequences and metadata" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>from the MetaPhlAn database</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edam_ontology"/> + <expand macro="requirements"> + <requirement type="package" version="1.3">seqtk</requirement> + </expand> + <version_command>metaphlan -v</version_command> + <command detect_errors="aggressive"><![CDATA[ +python '$__tool_directory__/customizemetadata.py' + $op.operation + +#if $op.operation == "add_marker" + --in_json '$in_json' + --out_json '$out_json' + --name $op.name + --m_length $op.m_length + #for $s in $op.genomes + --g_length $s.g_length + #if str($s.gca) != '' + --gca '$s.gca' + #else + --gca '' + #end if + --k_name '$s.k_name' + --k_id $s.k_id + --p_name '$s.p_name' + --p_id $s.p_id + --c_name '$s.c_name' + --c_id $s.c_id + --o_name '$s.o_name' + --o_id $s.o_id + --f_name '$s.f_name' + --f_id $s.f_id + --g_name '$s.g_name' + --g_id $s.g_id + --s_name '$s.s_name' + --s_id $s.s_id + --t_name '$s.t_name' + #end for +&& +cat + '$in_fasta' + '$op.marker_seq' + > '$out_fasta' + +#else if $op.operation == "remove_markers" + --in_json '$in_json' + --markers '$op.markers' + --out_json '$out_json' + --kept_markers 'kept_makers' +&& +seqtk subseq + '$in_fasta' + 'kept_makers' + > '$out_fasta' + +#else if $op.operation == "keep_markers" + --in_json '$in_json' + --markers '$op.markers' + --out_json '$out_json' +&& +seqtk subseq + '$in_fasta' + '$op.markers' + > '$out_fasta' +#end if + ]]></command> + <inputs> + <param name="in_fasta" type="data" format="fasta" label="Marker sequences"/> + <param argument="--in_json" type="data" format="json" label="Marker metadata"/> + <conditional name="op"> + <param name="operation" type="select" label="Customization"> + <option value="add_marker" selected="true">Add new marker</option> + <option value="remove_markers">Remove markers</option> + <option value="keep_markers">Keep markers, others will be removed</option> + </param> + <when value="add_marker"> + <param name="marker_seq" type="data" format="fasta" label="Sequences of the new markers"/> + <param argument="--name" type="text" label="Name of the new marker"/> + <param argument="--m_length" type="integer" value="" label="Length of the new marker"/> + <repeat name="genomes" min="1" title="Taxonomy of the genomes from which the new marker has been extracted"> + <param argument="--g_length" type="integer" min="0" value="" label="Length of the genome"/> + <param argument="--gca" type="text" optional="true" label="GenBank assemblies id (GCA) of the genome"/> + <param argument="--k_name" type="text" label="Kingdom: Name"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param> + <param argument="--k_id" type="integer" min="0" value="" label="Kingdom: NCBI id"/> + <param argument="--p_name" type="text" label="Phylum: Name"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param> + <param argument="--p_id" type="integer" min="0" value="" label="Phylum: NCBI id"/> + <param argument="--c_name" type="text" label="Class: Name"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param> + <param argument="--c_id" type="integer" min="0" value="" label="Class: NCBI id"/> + <param argument="--o_name" type="text" label="Order: Name"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param> + <param argument="--o_id" type="integer" min="0" value="" label="Order: NCBI id"/> + <param argument="--f_name" type="text" label="Family: Name"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param> + <param argument="--f_id" type="integer" min="0" value="" label="Family: NCBI id"/> + <param argument="--g_name" type="text" label="Genus: Name"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param> + <param argument="--g_id" type="integer" min="0" value="" label="Genus: NCBI id"/> + <param argument="--s_name" type="text" label="Species: Name"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param> + <param argument="--s_id" type="integer" min="0" value="" label="Species: NCBI id"/> + <param argument="--t_name" type="text" label="Strain: Name"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param> + </repeat> + </when> + <when value="remove_markers"> + <param argument="--markers" type="data" format="tabular,txt" label="List of markers to remove" help="1 marker per line"/> + </when> + <when value="keep_markers"> + <param argument="--markers" type="data" format="tabular,txt" label="List of markers to keep" help="1 marker per line"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="out_fasta" format="fasta" label="${tool.name} on ${on_string}: Markers sequences" /> + <data name="out_json" format="json" label="${tool.name} on ${on_string}: Marker metadata" /> + </outputs> + <tests> + <test expect_num_outputs="2"> + <param name="in_fasta" value="test-db-without-one-marker.fasta"/> + <param name="in_json" value="test-db-without-one-marker.json"/> + <conditional name="op"> + <param name="operation" value="add_marker"/> + <param name="marker_seq" value="marker_sequence.fasta"/> + <param name="name" value="13076__A0A2I1PE66__CYJ72_10760"/> + <param name="m_length" value="540"/> + <repeat name="genomes"> + <param name="g_length" value="2411251"/> + <param name="gca" value="GCA_002847845"/> + <param name="k_name" value="Bacteria"/> + <param name="k_id" value="2"/> + <param name="p_name" value="Bacilli"/> + <param name="p_id" value="1239"/> + <param name="c_name" value="Negativicutes"/> + <param name="c_id" value="91061"/> + <param name="o_name" value="Lactobacillales"/> + <param name="o_id" value="186826"/> + <param name="f_name" value="Aerococcaceae"/> + <param name="f_id" value="186827"/> + <param name="g_name" value="Globicatella"/> + <param name="g_id" value="13075"/> + <param name="s_name" value="Globicatella_sanguinis"/> + <param name="s_id" value="13076"/> + <param name="t_name" value="GCA_002847845"/> + </repeat> + </conditional> + <output name="out_fasta" file="test-db.fasta" compare="sim_size"> + <assert_contents> + <has_text text="13076__A0A2I1PE66__CYJ72_10760" /> + </assert_contents> + </output> + <output name="out_json" file="test-db.json" compare="sim_size"> + <assert_contents> + <has_text text="13076__A0A2I1PE66__CYJ72_10760" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="in_fasta" value="test-db.fasta"/> + <param name="in_json" value="test-db.json"/> + <conditional name="op"> + <param name="operation" value="remove_markers"/> + <param name="markers" value="marker.txt"/> + </conditional> + <output name="out_fasta" file="test-db-without-one-marker.fasta" compare="sim_size"> + <assert_contents> + <not_has_text text="13076__A0A2I1PE66__CYJ72_10760" /> + </assert_contents> + </output> + <output name="out_json" file="test-db-without-one-marker.json" compare="sim_size"> + <assert_contents> + <not_has_text text="13076__A0A2I1PE66__CYJ72_10760" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="in_fasta" value="test-db.fasta"/> + <param name="in_json" value="test-db.json"/> + <conditional name="op"> + <param name="operation" value="keep_markers"/> + <param name="markers" value="marker.txt"/> + </conditional> + <output name="out_fasta" file="test-db-with-one-marker.fasta" compare="sim_size"> + <assert_contents> + <has_text text="13076__A0A2I1PE66__CYJ72_10760" /> + </assert_contents> + </output> + <output name="out_json" file="test-db-with-one-marker.json" compare="sim_size"> + <assert_contents> + <has_text text="13076__A0A2I1PE66__CYJ72_10760" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +What it does +============ + +Customize the marker sequences (in fasta format) and metadata (in JSON) for a MetaPhlAn database: + +- Add marker +- Remove markers +- Keep markers + +Inputs +====== + +MetaphlAn database (can be extracted with dedicated tool) + +- Fasta file with marker sequences +- JSON file with marker metadata + +The other inputs depends on the type of customization + +- Add marker + - Fasta file with the sequence of new marker + - Information about the new marker and related genomes + +- Remove markers + - File with list of markers to remove + +- Keep markers + - File with list of markers to keep + +Outputs +======= + +Customized database (that can be used as input for MetaphlAn tool) + +- Fasta file with marker sequences +- JSON file with marker metadata + + ]]></help> + <expand macro="citations"/> +</tool>