Mercurial > repos > bgruening > tiara
diff tiara.xml @ 0:3f33a8ac8891 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/tiara commit 3e174d7bc4280c835f5092a44d259cc33f0b54b6
author | bgruening |
---|---|
date | Thu, 30 May 2024 11:10:39 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tiara.xml Thu May 30 11:10:39 2024 +0000 @@ -0,0 +1,183 @@ +<tool id="tiara" name="tiara" version="@TOOL_VERSION@+galaxy0" profile="21.05"> + <description>Deep-learning-based approach for identification of eukaryotic sequences in the metagenomic data </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + + tiara + -t \${GALAXY_SLOTS:-4} + -i '$input' + -o '$output' + + #if $taxonomy_filter + --tf #for $tf in $taxonomy_filter + $tf + #end for + #end if + #if $probabilities + --pr '$probabilities' + #end if + #if $min_len + -m '$min_len' + #end if + #if $cutoff_stage1 + -p $cutoff_stage1 + #if $cutoff_stage2 + $cutoff_stage2 + #end if + #end if + #if $advanced_options.advance.customize_kmer_length == 'customize' + --k1 $advanced_options.advance.first_stage_kmer + --k2 $advanced_options.advance.second_stage_kmer + #end if + + ]]></command> + <inputs> + <param name="input" type="data" format="fasta" label="input fasta,fasta.gz file"/> + <param name="taxonomy_filter" type="select" multiple="true" optional="true" label="Write sequences to fasta,fasta.gz files specified in the arguments to this option." help="all refers to all classes present in input fasta (to separate fasta files)."> + <option value="mit">mitochondria</option> + <option value="pla">plastid</option> + <option value="bac">bacteria</option> + <option value="arc">archea</option> + <option value="euk">eukarya</option> + <option value="unk">unknown</option> + <option value="pro">prokarya</option> + <option value="all">all</option> + </param> + <param argument="probabilities" type="boolean" truevalue="--pr" falsevalue="" checked="false" label="Add probabilities of individual classes for each sequence."/> + <param argument="min_len" type="integer" value="3000" min="1000" optional="true" label="Minimum length of a sequence. Default: 3000 bp." help="Specify the desired minimum length in base pairs.Default value is 3000 bp and we do not recommend classifying sequences shorter than 1000 bp. "/> + <param argument="cutoff_stage1" type="float" value="0.65" min="0.5" max="1" optional="true" label="Probability threshold for the first stage." help="Probability threshold needed for classification in the first stage. Default: 0.65." /> + <param argument="cutoff_stage2" type="float" value="0.65" min="0.5" max="1" optional="true" label="Probability threshold for the second stage." help="Probability threshold needed for classification in the second stage. Default: 0.65." /> + <section name="advanced_options" title="k-mer" expanded="true"> + <conditional name="advance"> + <param argument="customize_kmer_length" type="select" label="Advanced options"> + <option value="default_options">No, Use param defaults</option> + <option value="customize">Yes, See full parameter list</option> + </param> + <when value="customize"> + <param argument="first_stage_kmer" type="select" label="Select k-mer length used in the first stage of classification (Default: 6)."> + <option value="4">k-mer length 4</option> + <option value="5">k-mer length 5</option> + <option value="6" selected="True">default k-mer length</option> + </param> + <param argument="second_stage_kmer" type="select" label="k-mer length used in the second stage of classification (Default: 7)."> + <option value="4">k-mer length 4</option> + <option value="5">k-mer length 5</option> + <option value="6">k-mer length 6</option> + <option value="7" selected="True">default k-mer length</option> + </param> + </when> + <when value="default_options"> + <!-- Define actions or defaults for the default option if necessary --> + </when> + </conditional> + </section> + </inputs> + <outputs> + <data name="output" format="txt" label="${tool.name} on ${on_string}: sequence ID, classification results"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input" value="plast_fr.fasta.gz"/> + <param name="taxonomy_filter" value="pla"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_text_matching expression=".*sequence_id*"/> + <has_n_lines n="11" delta="5"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input" value="mitplas1.fasta"/> + <param name="taxonomy_filter" value="pla,mit"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_text_matching expression=".*sequence_id*"/> + <has_n_lines n="30" delta="5"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input" value="sample_all.fasta"/> + <param name="taxonomy_filter" value="all"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_text_matching expression=".*sequence_id*"/> + <has_n_lines n="51" delta="5"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input" value="sample_all.fasta"/> + <param name="taxonomy_filter" value="euk,bac,arc,unk"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_text_matching expression=".*sequence_id*"/> + <has_n_lines n="51" delta="5"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input" value="eukarya_fr.fasta"/> + <param name="taxonomy_filter" value="euk"/> + <param name="min_len" value="5000"/> + <param name="cutoff_stage1" value="0.65"/> + <param name="cutoff_stage2" value="0.60"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_text_matching expression=".*sequence_id*"/> + <has_n_lines n="11" delta="5"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input" value="bacteria_fr.fasta"/> + <param name="taxonomy_filter" value="bac"/> + <param name="min_len" value="5000"/> + <param name="cutoff_stage1" value="0.65"/> + <param name="cutoff_stage2" value="0.60"/> + <param name="probabilities" value="true"/> + <output name="output" ftype="txt"> + <assert_contents> + <has_text_matching expression=".*bac*"/> + <has_n_lines n="11" delta="5"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +What it does +============ +Tiara is a Deep-learning-based approach for identification of eukaryotic sequences in the metagenomic data powered by PyTorch. + +How it works +============ +The sequences are classified in two stages: + +First Stage: +Input: Sequences are classified into classes: archaea, bacteria, prokarya, eukarya, organelle, and unknown. +Output: Classifications for each sequence into one of the above classes. + +Second Stage: +Input: Sequences labeled as organelle from the first stage. +Output: Further classification into mitochondria, plastid, or unknown. + +Required Inputs +=============== +The primary input for Tiara is metagenomic sequence data that needs classification. + +Generated Outputs +================= +The output will be the sequences categorized into specific classes as described above. + +Additional Resources +==================== +For a more comprehensive understanding of tiara and detailed usage instructions, please visit the tiara GitHub repository: +tiara GitHub Repository: [https://github.com/ibe-uw/tiara] + + ]]></help> + <expand macro="citations"/> +</tool>