Mercurial > repos > iuc > semibin
view semibin.xml @ 2:99ff9221182c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit 13abac83068b126399ec415141007a48c2efaa84
author | iuc |
---|---|
date | Fri, 10 Nov 2023 20:50:01 +0000 |
parents | 6b517dc161e4 |
children |
line wrap: on
line source
<tool id="semibin" name="SemiBin" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description> for Semi-supervised Metagenomic Binning </description> <macros> <import>macros.xml</import> </macros> <expand macro="biotools"/> <expand macro="requirements"/> <expand macro="version"/> <command detect_errors="exit_code"><![CDATA[ #import re @BAM_FILES@ @FASTA_FILES@ SemiBin2 #if $mode.select == 'single' or $mode.select == 'co' single_easy_bin #if $mode.select == 'single' and str($mode.environment) != '' --environment '$mode.environment' #end if #if $mode.ref.select == "cached" --reference-db-data-dir '$mode.ref.cached_db.fields.path' #else --taxonomy-annotation-table '$mode.ref.taxonomy_annotation_table' #end if #else multi_easy_bin --separator '$separator' #if $mode.ref.select == "cached" --reference-db-data-dir '$mode.ref.cached_db.fields.path' #else --taxonomy-annotation-table #for $e in $mode.ref.taxonomy_annotation_table '$e' #end for #end if #end if --input-fasta 'contigs.fasta' --input-bam *.bam --output 'output' --cannot-name 'cannot' @MIN_LEN@ --orf-finder '$orf_finder' --random-seed $random_seed #if str($annot.ml_threshold) != '' --ml-threshold $annot.ml_threshold #end if --epoches $training.epoches --batch-size $training.batch_size --max-node $bin.max_node --max-edges $bin.max_edges --minfasta-kbs $bin.minfasta_kbs #if ($mode.select == 'single' or $mode.select == 'co') and "pre_reclustering_bins" in $extra_output --write-pre-reclustering-bins #end if --compression none --threads \${GALAXY_SLOTS:-1} --processes \${GALAXY_SLOTS:-1} && echo "output" && ls output ]]></command> <inputs> <conditional name="mode"> <expand macro="mode_select"/> <when value="single"> <expand macro="input-fasta-single"/> <expand macro="input-bam-single"/> <expand macro="ref-single"/> <expand macro="environment"/> </when> <when value="co"> <expand macro="input-fasta-single"/> <expand macro="input-bam-multi"/> <expand macro="ref-single"/> </when> <when value="multi"> <expand macro="input-fasta-multi"/> <expand macro="input-bam-multi"/> <expand macro="ref-multi"/> </when> </conditional> <expand macro="min_len"/> <expand macro="orf-finder"/> <expand macro="random-seed"/> <section name="annot" title="Contig annotations" expanded="true"> <expand macro="ml-threshold"/> </section> <section name="training" title="Training"> <expand macro="epoches"/> <expand macro="batch-size"/> </section> <section name="bin" title="Binning"> <expand macro="max-node"/> <expand macro="max-edges"/> <expand macro="minfasta-kbs"/> </section> <param name="extra_output" type="select" multiple="true" optional="true" label="Extra outputs" help="In addition to the training data"> <option value="data">Training data</option> <option value="coverage">Coverage files</option> <option value="contigs">Contigs (if multiple sample)</option> <option value="pre_reclustering_bins">Pre-reclustering bins (only single sample and co-assembly)</option> </param> </inputs> <outputs> <collection name="output_pre_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> <filter>mode["select"]!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_prerecluster_bins"/> </collection> <collection name="output_after_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering"> <filter>mode["select"]!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_recluster_bins"/> </collection> <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins"> <filter>mode["select"]!="multi" and not "pre_reclustering_bins" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_bins"/> </collection> <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering (multi_bins)"> <filter>mode["select"]=="multi"</filter> <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/bins"/> </collection> <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> </data> <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data"> <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> </data> <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample"> <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> </collection> <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample"> <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> </collection> <expand macro="generate_sequence_features_extra_outputs"/> </outputs> <tests> <test expect_num_outputs="5"> <conditional name="mode"> <param name="select" value="single"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_single.bam"/> <conditional name="ref"> <param name="select" value="taxonomy"/> <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> </conditional> <param name="environment" value="human_gut"/> </conditional> <conditional name="min_len"> <param name="method" value="min-len"/> <param name="min_len" value="0" /> </conditional> <param name="orf_finder" value="prodigal"/> <param name="random-seed" value="0"/> <section name="annot"> <param name="ml_threshold" value=""/> </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> </section> <section name="bin"> <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="data,coverage,contigs"/> <output_collection name="output_bins" count="0"/> <output name="single_data" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> <has_text text="g4k_7"/> </assert_contents> </output> <output name="single_data_split" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> <has_text text="g1k_6_2"/> </assert_contents> </output> <output name="single_cov" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> <has_text text="0.027"/> </assert_contents> </output> <output name="single_split_cov" ftype="csv"> <assert_contents> <has_size value="1" delta="1"/> </assert_contents> </output> </test> <test expect_num_outputs="3"> <conditional name="mode"> <param name="select" value="co"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> <conditional name="ref"> <param name="select" value="taxonomy"/> <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> </conditional> </conditional> <conditional name="min_len"> <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fast-naive"/> <param name="random-seed" value="0"/> <section name="annot"> <param name="ml_threshold" value=""/> </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> </section> <section name="bin"> <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="coverage"/> <output_collection name="output_bins" count="0"/> <output_collection name="co_cov" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> <has_text text="g2k_7"/> </assert_contents> </element> <element name="1" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> <has_text text="g2k_7"/> </assert_contents> </element> <element name="4" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> <has_text text="g2k_7"/> </assert_contents> </element> </output_collection> <output_collection name="co_split_cov" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> <has_text text="g2k_7_2"/> </assert_contents> </element> <element name="1" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> <has_text text="g2k_7_2"/> </assert_contents> </element> <element name="2" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> <has_text text="g2k_7_2"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="3"> <conditional name="mode"> <param name="select" value="co"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> <conditional name="ref"> <param name="select" value="taxonomy"/> <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> </conditional> </conditional> <conditional name="min_len"> <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> <param name="random-seed" value="0"/> <section name="annot"> <param name="ml_threshold" value=""/> </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> </section> <section name="bin"> <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="coverage"/> <output_collection name="output_bins" count="0"/> <output_collection name="co_cov" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> <has_text text="g2k_7"/> </assert_contents> </element> <element name="1" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> <has_text text="g2k_7"/> </assert_contents> </element> <element name="4" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> <has_text text="g2k_7"/> </assert_contents> </element> </output_collection> <output_collection name="co_split_cov" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> <has_text text="g2k_7_2"/> </assert_contents> </element> <element name="1" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> <has_text text="g2k_7_2"/> </assert_contents> </element> <element name="2" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> <has_text text="g2k_7_2"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="1"> <conditional name="mode"> <param name="select" value="single"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_single.bam"/> <conditional name="ref"> <param name="db_selector" value="cached"/> <param name="cached_db" value="test-db"/> </conditional> </conditional> <conditional name="min_len"> <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> <param name="random-seed" value="0"/> <section name="annot"> <param name="ml_threshold" value=""/> </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> </section> <section name="bin"> <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value=""/> <output_collection name="output_bins" count="1"> <element name="SemiBin_30" ftype="fasta"> <assert_contents> <has_text text=">g3k_0"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="2"> <conditional name="mode"> <param name="select" value="single"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_single.bam"/> <conditional name="ref"> <param name="db_selector" value="cached"/> <param name="cached_db" value="test-db"/> </conditional> </conditional> <conditional name="min_len"> <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> <param name="random-seed" value="0"/> <section name="annot"> <param name="ml_threshold" value=""/> </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> </section> <section name="bin"> <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="pre_reclustering_bins"/> <output_collection name="output_pre_recluster_bins" count="3"> <element name="SemiBin_0" ftype="fasta"> <assert_contents> <has_text text="g1k_0"/> </assert_contents> </element> <element name="SemiBin_1" ftype="fasta"> <assert_contents> <has_text text="g2k_0"/> </assert_contents> </element> <element name="SemiBin_2" ftype="fasta"> <assert_contents> <has_text text="g3k_0"/> </assert_contents> </element> </output_collection> <output_collection name="output_after_recluster_bins" count="1"> <element name="SemiBin_30" ftype="fasta"> <assert_contents> <has_text text="g3k_0"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="8"> <conditional name="mode"> <param name="select" value="multi"/> <conditional name="multi_fasta"> <param name="select" value="concatenated"/> <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/> </conditional> <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/> <conditional name="ref"> <param name="select" value="taxonomy"/> <param name="taxonomy_annotation_table" value="taxonomy.tsv,taxonomy_2.tsv,taxonomy_3.tsv,taxonomy_4.tsv,taxonomy_5.tsv,taxonomy_6.tsv,taxonomy_7.tsv,taxonomy_8.tsv,taxonomy_9.tsv,taxonomy_10.tsv"/> </conditional> </conditional> <conditional name="min_len"> <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> <param name="random_seed" value="0"/> <section name="annot"> <param name="ml_threshold" value=""/> </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> </section> <section name="bin"> <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="data,coverage,contigs"/> <output_collection name="multi_bins" count="0"/> <output_collection name="multi_data" count="10"> <element name="S8" ftype="csv"> <assert_contents> <has_text text="g1k_0,"/> </assert_contents> </element> </output_collection> <output_collection name="multi_data_split" count="10"> <element name="S8" ftype="csv"> <assert_contents> <has_text text="g1k_0_1,"/> </assert_contents> </element> </output_collection> <output_collection name="multi_cov" count="10"> <element name="8" ftype="csv"> <assert_contents> <has_text text="S1:g1k_5,"/> </assert_contents> </element> </output_collection> <output_collection name="multi_cov_sample" count="10"> <element name="S8" ftype="csv"> <assert_contents> <has_text text="g1k_3"/> </assert_contents> </element> </output_collection> <output_collection name="multi_split_cov" count="10"> <element name="8" ftype="csv"> <assert_contents> <has_text text="S1:g1k_5_1,0."/> </assert_contents> </element> </output_collection> <output_collection name="multi_split_cov_sample" count="10"> <element name="S8" ftype="csv"> <assert_contents> <has_text text="g1k_3_1"/> </assert_contents> </element> </output_collection> <output_collection name="multi_contigs" count="10"> <element name="S8" ftype="fasta"> <assert_contents> <has_text text=">g1k_0"/> </assert_contents> </element> </output_collection> </test> </tests> <help><![CDATA[ @HELP_HEADER@ Inputs ====== @HELP_INPUT_FASTA@ @HELP_INPUT_BAM@ ]]></help> <expand macro="citations"/> </tool>