Mercurial > repos > mbernt > maxbin2
diff maxbin2.xml @ 5:8a0473eb465e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maxbin2/ commit e5b086199e53e32ffdf46ac6ba36c43f8ef6db26
author | iuc |
---|---|
date | Fri, 17 Jun 2022 17:27:40 +0000 |
parents | 4ef88f9a195d |
children | 2bc40bfbcfb4 |
line wrap: on
line diff
--- a/maxbin2.xml Wed Nov 18 10:42:37 2020 +0000 +++ b/maxbin2.xml Fri Jun 17 17:27:40 2022 +0000 @@ -1,157 +1,202 @@ -<tool id="maxbin2" name="MaxBin2" version="@MAXBIN_VERSION@+galaxy2"> +<tool id="maxbin2" name="MaxBin2" version="@MAXBIN_VERSION@+galaxy3"> <description>clusters metagenomic contigs into bins</description> + <xrefs> + <xref type="bio.tools">masigpro</xref> + </xrefs> <macros> <token name="@MAXBIN_VERSION@">2.2.7</token> + <xml name="contig"> + <param argument="-contig" type="data" format="fasta,fasta.gz" label="Contig file"/> + </xml> + <xml name="input_type"> + <param name="type" type="select" label="Input type"> + <option value="reads" selected="true">Sequencing Reads</option> + <option value="abund">Abundances</option> + </param> + </xml> + <xml name="reads_extra_params"> + <param name="output_abundances" type="boolean" checked="false" label="Output abundances" help="" /> + <param argument="--reassembly" type="boolean" truevalue="-reassembly" falsevalue="" checked="false" label="Reassembly" help="Reassembly option is still highly experimental. To use this function, you need to feed MaxBin interleaved paired-end fastq or fasta file." /> + </xml> </macros> <requirements> <requirement type="package" version="@MAXBIN_VERSION@">maxbin2</requirement> </requirements> <version_command><![CDATA[run_MaxBin.pl -version | head -n 1]]></version_command> <command detect_errors="exit_code"><![CDATA[ - ## generate read or abundance files - #if $intype_cond.intype_select == 'rds': - #for $r in $intype_cond.reads +## generate read or abundance files +#if $assembly.inputs.type == 'reads' + #if $assembly.type == 'individual' +echo '$assembly.inputs.reads' >> reads_list && + #else + #for $r in $assembly.inputs.reads #if $r - echo '$r' >> reads_list && +echo '$r' >> reads_list && #end if #end for - #else if $intype_cond.intype_select == 'abdc': - #for $a in $intype_cond.abund + #end if +#else if $assembly.inputs.type == 'abund' + #if $assembly.type == 'individual' +echo '$assembly.inputs.abund' >> abund_list && + #else + #for $a in $assembly.inputs.abund #if $a - echo '$a' >> abund_list && +echo '$a' >> abund_list && #end if #end for #end if +#end if - ## in case of reassembly the IBDA out and err is appended - ## to differentiate this a header is added also befor the - ## MaxBin2 outputs - #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != "" - echo "==== MaxBin2 stdout ====" && - echo "==== MaxBin2 stderr ====" 1>&2 && - #end if +## in case of reassembly the IBDA out and err is appended +## to differentiate this a header is added also befor the +## MaxBin2 outputs +#if $assembly.inputs.type == 'reads' and $assembly.inputs.reassembly != "" +echo "==== MaxBin2 stdout ====" && +echo "==== MaxBin2 stderr ====" 1>&2 && +#end if - run_MaxBin.pl +run_MaxBin.pl -contig '$contig' -out out - #if $intype_cond.intype_select == 'rds': - -reads_list reads_list - $intype_cond.reassembly - #else if $intype_cond.intype_select == 'abdc': - -abund_list abund_list - #end if - #if $adv_cond.adv_select == 'yes': - -min_contig_length $adv_cond.min_contig_length - -max_iteration $adv_cond.max_iteration - -prob_threshold $adv_cond.prob_threshold - $adv_cond.plotmarker - -markerset $adv_cond.markerset - #end if +#if $assembly.inputs.type == 'reads': + -reads_list reads_list + $assembly.inputs.reassembly +#else if $assembly.inputs.type == 'abund': + -abund_list abund_list +#end if + -min_contig_length $adv.min_contig_length + -max_iteration $adv.max_iteration + -prob_threshold $adv.prob_threshold + $output.plotmarker + -markerset $output.markerset -thread \${GALAXY_SLOTS:-1} - && gzip -cd out.marker_of_each_bin.tar.gz | tar -xf - +&& gzip -cd out.marker_of_each_bin.tar.gz | tar -xf - - ## redirect the idba out and err file content to stdout and err - ## since this is also wanted in case the error case ';' is used here to - ## separate commands - #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != "" - ; echo "==== IDBA stdout ====" - && if [[ -f out.idba.out ]]; then cat out.idba.out; fi - && echo "==== IDBA stderr ====" 1>&2 - && if [[ -f out.idba.err ]]; then cat out.idba.err 1>&2; fi - #end if +## redirect the idba out and err file content to stdout and err +## since this is also wanted in case the error case ';' is used here to +## separate commands +#if $assembly.inputs.type == 'reads' and $assembly.inputs.reassembly != "" +; echo "==== IDBA stdout ====" +&& if [[ -f out.idba.out ]]; then cat out.idba.out; fi +&& echo "==== IDBA stderr ====" 1>&2 +&& if [[ -f out.idba.err ]]; then cat out.idba.err 1>&2; fi +#end if ]]></command> <inputs> - <param argument="-contig" type="data" format="fasta,fasta.gz" label="Contig file"/> - <conditional name="intype_cond"> - <param name="intype_select" type="select" label="Input type"> - <option value="rds" selected="true">Sequencing Reads</option> - <option value="abdc">Abundances</option> - </param> - <when value="rds"> - <param name="reads" type="data" format="fasta,fastq" multiple="true" label="Reads file(s)" help="(-read/-read2/...)"/> - <param name="output_abundances" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output abundances" help="" /> - <param argument="--reassembly" type="boolean" truevalue="-reassembly" falsevalue="" checked="false" label="Reassembly" help="Reassembly option is still highly experimental. To use this function, you need to feed MaxBin interleaved paired-end fastq or fasta file." /> - </when> - <when value="abdc"> - <param name="abund" type="data" format="tabular" multiple="true" label="Abundance file(s)" help="(-abund/-abund2/...)" /> - </when> - </conditional> - <conditional name="adv_cond"> - <param name="adv_select" type="select" label="Advanced options"> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> + <expand macro="contig"/> + <conditional name="assembly"> + <param name="type" type="select" label="Assembly type used to generate contig(s)"> + <option value="individual">Assembly of sample(s) one by one (individual assembly)</option> + <option value="coassembly">Assembly of different samples together (co-assembly)</option> </param> - <when value="no"/> - <when value="yes"> - <param argument="-min_contig_length" type="integer" min="0" value="1000" label="minimum contig length" /> - <param argument="-max_iteration" type="integer" min="0" value="50" label="Maximum Expectation-Maximization algorithm iteration number" /> - <param argument="-prob_threshold" type="float" min="0" max="1.0" value="0.5" label="Probability threshold for EM final classification" /> - <param argument="-plotmarker" type="boolean" truevalue="-plotmarker" falsevalue="" checked="false" label="Generate visualization of the marker gene presence numbers" /> - <param name="output_marker" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output marker gene presence for bins table" /> - <param name="output_markers" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output marker genes for each bin as fasta" /> - <param name="output_log" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output log" /> - <param argument="-markerset" type="select" label="Marker gene set"> - <option value="107" selected="true">107 marker genes present in >95% of bacteria</option> - <option value="40">40 marker gene sets that are universal among bacteria and archaea</option> - </param> + <when value="individual"> + <conditional name="inputs"> + <expand macro="input_type"/> + <when value="reads"> + <param argument="-reads" type="data" format="fasta,fastq" label="Reads file"/> + <expand macro="reads_extra_params"/> + </when> + <when value="abund"> + <param argument="-abund" type="data" format="tabular" label="Abundance file"/> + </when> + </conditional> + </when> + <when value="coassembly"> + <conditional name="inputs"> + <expand macro="input_type"/> + <when value="reads"> + <param argument="-reads" type="data" multiple="true" format="fasta,fastq" label="Reads file(s)"/> + <expand macro="reads_extra_params"/> + </when> + <when value="abund"> + <param argument="-abund" type="data" format="tabular" multiple="true" label="Abundance file(s)"/> + </when> + </conditional> </when> </conditional> + <section name="adv" title="Advanced options"> + <param argument="-min_contig_length" type="integer" min="0" value="1000" label="minimum contig length" /> + <param argument="-max_iteration" type="integer" min="0" value="50" label="Maximum Expectation-Maximization algorithm iteration number" /> + <param argument="-prob_threshold" type="float" min="0" max="1.0" value="0.5" label="Probability threshold for EM final classification" /> + </section> + <section name="output" title="Outputs"> + <param argument="-plotmarker" type="boolean" truevalue="-plotmarker" falsevalue="" checked="false" label="Generate visualization of the marker gene presence numbers" /> + <param name="marker" type="boolean" checked="false" label="Output marker gene presence for bins table" /> + <param name="markers" type="boolean" checked="false" label="Output marker genes for each bin as fasta" /> + <param name="log" type="boolean" checked="false" label="Output log" /> + <param argument="-markerset" type="select" label="Marker gene set"> + <option value="107" selected="true">107 marker genes present in >95% of bacteria</option> + <option value="40">40 marker gene sets that are universal among bacteria and archaea</option> + </param> + </section> </inputs> <outputs> <!-- default outputs --> - <collection name="bins" type="list" label="${tool.name} on ${on_string} (bins)"> + <collection name="bins" type="list" label="${tool.name} on ${on_string}: Bins"> <discover_datasets pattern="out.(?P<designation>[0-9]+).fasta" format="fasta" visible="false" /> </collection> - <data name="noclass" format="fasta" label="${tool.name} on ${on_string} (unclassified sequences)" from_work_dir="out.noclass"/> - <data name="toshort" format="fasta" label="${tool.name} on ${on_string} (to short sequences)" from_work_dir="out.tooshort"/> - <data name="summary" format="tabular" label="${tool.name} on ${on_string} (summary)" from_work_dir="out.summary"/> - + <data name="noclass" format="fasta" label="${tool.name} on ${on_string}: Unclassified sequences" from_work_dir="out.noclass"/> + <data name="toshort" format="fasta" label="${tool.name} on ${on_string}: Too short sequences" from_work_dir="out.tooshort"/> + <data name="summary" format="tabular" label="${tool.name} on ${on_string}: Summary" from_work_dir="out.summary"/> <!-- optional outputs --> - <data name="log" format="txt" label="${tool.name} on ${on_string} (log)" from_work_dir="out.log"> - <filter>adv_cond['adv_select']=='yes' and adv_cond['output_log']</filter> + <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="out.log"> + <filter>output['log']</filter> </data> - <data name="marker" format="tabular" label="${tool.name} on ${on_string} (marker gene presence)" from_work_dir="out.marker"> - <filter>adv_cond['adv_select']=='yes' and adv_cond['output_marker']</filter> + <data name="marker" format="tabular" label="${tool.name} on ${on_string}: Marker gene presence" from_work_dir="out.marker"> + <filter>output['marker']</filter> </data> - <data name="abundout" format="tabular" label="${tool.name} on ${on_string} (abundances)" from_work_dir="out.abund1"> - <filter>intype_cond['intype_select']=='rds' and intype_cond['output_abundances']</filter> + <data name="abundout" format="tabular" label="${tool.name} on ${on_string}: Abundances" from_work_dir="out.abund1"> + <filter>assembly['inputs']['type']=='reads' and assembly['inputs']['output_abundances']</filter> </data> - <data name="plot" format="pdf" label="${tool.name} on ${on_string} (marker gene presence plot)" from_work_dir="out.marker.pdf"> - <filter>adv_cond['adv_select']=='yes' and adv_cond['plotmarker']</filter> + <data name="plot" format="pdf" label="${tool.name} on ${on_string}: Marker gene presence plot" from_work_dir="out.marker.pdf"> + <filter>output['plotmarker']</filter> </data> - <collection name="markers" type="list" label="${tool.name} on ${on_string} (markers prediced for bins)"> + <collection name="markers" type="list" label="${tool.name} on ${on_string}: Markers prediced for bins"> <discover_datasets pattern="out.(?P<designation>[0-9]+).marker.fasta" format="fasta" visible="false" /> - <filter>adv_cond['adv_select']=='yes' and adv_cond['output_markers']</filter> + <filter>output['markers']</filter> </collection> - <!-- additional output in case of reassembly --> - <collection name="reassembly_bins" type="list" label="${tool.name} on ${on_string} (reassembly bins)"> + <collection name="reassembly_bins" type="list" label="${tool.name} on ${on_string}: Reassembly bins"> <discover_datasets directory="out.reassem" pattern="out.(?P<designation>[0-9]+).fasta" format="fasta" visible="false" /> - <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> + <filter>assembly['inputs']['type']=='reads' and assembly['inputs']['reassembly']</filter> </collection> - <collection name="reassembly_reads" type="list" label="${tool.name} on ${on_string} (reassembly reads)"> + <collection name="reassembly_reads" type="list" label="${tool.name} on ${on_string}: Reassembly reads"> <discover_datasets directory="out.reassem" pattern="out.reads.(?P<designation>[0-9]+)" format="fasta" visible="false" /> - <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> + <filter>assembly['inputs']['type']=='reads' and assembly['inputs']['reassembly']</filter> </collection> - <data name="reassembly_noclass" format="fasta" label="${tool.name} on ${on_string} (reassembly unclassified sequences)" from_work_dir="out.reassem/out.reads.noclass"> - <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> + <data name="reassembly_noclass" format="fasta" label="${tool.name} on ${on_string}: Reassembly unclassified sequences" from_work_dir="out.reassem/out.reads.noclass"> + <filter>assembly['inputs']['type']=='reads' and assembly['inputs']['reassembly']</filter> </data> - <data name="reassembly_n50" format="txt" label="${tool.name} on ${on_string} (reassembly N50)" from_work_dir="out.reassem/N50.txt"> - <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> + <data name="reassembly_n50" format="txt" label="${tool.name} on ${on_string}: Reassembly N50" from_work_dir="out.reassem/N50.txt"> + <filter>assembly['inputs']['type']=='reads' and assembly['inputs']['reassembly']</filter> </data> </outputs> <tests> <!-- test w contigs and reads as input --> <test expect_num_outputs="4"> <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> - <conditional name="intype_cond"> - <param name="intype_select" value="rds"/> - <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/> + <conditional name="assembly"> + <param name="type" value="individual"/> + <conditional name="inputs"> + <param name="type" value="reads"/> + <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/> + <param name="output_abundances" value="false"/> + <param name="reassembly" value=""/> + </conditional> </conditional> - <conditional name="adv_cond"> - <param name="adv_select" value="no"/> - </conditional> + <section name="adv"> + <param name="min_contig_length" value="1000"/> + <param name="max_iteration" value="50"/> + <param name="prob_threshold" value="0.5"/> + </section> + <section name="output"> + <param name="plotmarker" value=""/> + <param name="marker" value="false"/> + <param name="markers" value="false" /> + <param name="log" value="false"/> + <param name="markerset" value="107"/> + </section> <output_collection name="bins" type="list" count="2"> <element name="001" file="1/out.001.fasta" ftype="fasta"/> <element name="002" file="1/out.002.fasta" ftype="fasta"/> @@ -160,29 +205,49 @@ <output name="noclass" file="1/out.noclass" ftype="fasta" /> <output name="toshort" file="1/out.tooshort" ftype="fasta" /> </test> - <!-- test w contigs and reads as input test for optional outputs --> + <!-- test w co-assembled contigs and multiple reads and optional outputs --> <test expect_num_outputs="9"> <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> - <conditional name="intype_cond"> - <param name="intype_select" value="rds"/> - <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/> - <param name="output_abundances" value="true" /> + <conditional name="assembly"> + <param name="type" value="coassembly"/> + <conditional name="inputs"> + <param name="type" value="reads"/> + <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta,interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/> + <param name="output_abundances" value="true"/> + <param name="reassembly" value=""/> + </conditional> </conditional> - <conditional name="adv_cond"> - <param name="adv_select" value="yes"/> - <param name="plotmarker" value="true" /> - <param name="output_marker" value="true" /> - <param name="output_markers" value="true" /> - <param name="output_log" value="true" /> - </conditional> + <section name="adv"> + <param name="min_contig_length" value="1000"/> + <param name="max_iteration" value="50"/> + <param name="prob_threshold" value="0.5"/> + </section> + <section name="output"> + <param name="plotmarker" value="true"/> + <param name="marker" value="true"/> + <param name="markers" value="true" /> + <param name="log" value="true"/> + <param name="markerset" value="107"/> + </section> <output_collection name="bins" type="list" count="2"> <element name="001" file="1/out.001.fasta" ftype="fasta"/> <element name="002" file="1/out.002.fasta" ftype="fasta"/> </output_collection> - <output name="summary" file="1/out.summary" ftype="tabular" /> + <output name="summary" ftype="tabular"> + <assert_contents> + <has_text text="Completeness"/> + <has_text text="out.001.fasta"/> + </assert_contents> + </output> <output name="noclass" file="1/out.noclass" ftype="fasta" /> <output name="toshort" file="1/out.tooshort" ftype="fasta" /> - <output name="log" file="1/out.log" ftype="txt" compare="diff" lines_diff="21" /> + <output name="log" ftype="txt" > + <assert_contents> + <has_text text="Input contig"/> + <has_text text="Elapsed time"/> + <has_text text="Yielded 2 bins for contig (scaffold) file"/> + </assert_contents> + </output> <output name="abundout" file="1/out.abund1" ftype="tabular" /> <output name="marker" file="1/out.marker" ftype="tabular" /> <output name="plot" file="1/out.marker.pdf" ftype="pdf" compare="sim_size" /> @@ -193,18 +258,25 @@ <!--test w contigs and abundances as input + advanced options --> <test expect_num_outputs="5"> <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> - <conditional name="intype_cond"> - <param name="intype_select" value="abdc"/> - <param name="abund" value="abundances.tsv" ftype="tabular"/> + <conditional name="assembly"> + <param name="type" value="individual"/> + <conditional name="inputs"> + <param name="type" value="abund"/> + <param name="abund" value="abundances.tsv" ftype="tabular"/> + </conditional> </conditional> - <conditional name="adv_cond"> - <param name="adv_select" value="yes"/> + <section name="adv"> <param name="min_contig_length" value="500"/> <param name="max_iteration" value="10"/> <param name="prob_threshold" value="0.95"/> + </section> + <section name="output"> <param name="plotmarker" value="-plotmarker"/> + <param name="marker" value="false"/> + <param name="markers" value="false" /> + <param name="log" value="false"/> <param name="markerset" value="107"/> - </conditional> + </section> <output_collection name="bins" type="list" count="2"> <element name="001" file="2/out.001.fasta" ftype="fasta"/> <element name="002" file="2/out.002.fasta" ftype="fasta"/> @@ -217,14 +289,27 @@ <!-- test w contigs and reads as input + reassembly--> <test expect_num_outputs="8"> <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> - <conditional name="intype_cond"> - <param name="intype_select" value="rds"/> - <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/> - <param name="reassembly" value="-reassembly"/> + <conditional name="assembly"> + <param name="type" value="individual"/> + <conditional name="inputs"> + <param name="type" value="reads"/> + <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/> + <param name="output_abundances" value="false"/> + <param name="reassembly" value="-reassembly"/> + </conditional> </conditional> - <conditional name="adv_cond"> - <param name="adv_select" value="no"/> - </conditional> + <section name="adv"> + <param name="min_contig_length" value="1000"/> + <param name="max_iteration" value="50"/> + <param name="prob_threshold" value="0.5"/> + </section> + <section name="output"> + <param name="plotmarker" value=""/> + <param name="marker" value="false"/> + <param name="markers" value="false" /> + <param name="log" value="false"/> + <param name="markerset" value="107"/> + </section> <output_collection name="bins" type="list" count="2"> <element name="001" file="3/out.001.fasta" ftype="fasta"/> <element name="002" file="3/out.002.fasta" ftype="fasta"/> @@ -233,15 +318,31 @@ <output name="noclass" file="3/out.noclass" ftype="fasta" /> <output name="toshort" file="3/out.tooshort" ftype="fasta" /> <output_collection name="reassembly_bins" type="list" count="2"> - <element name="001" file="3/out.reassem/out.001.fasta" ftype="fasta"/> - <element name="002" file="3/out.reassem/out.002.fasta" ftype="fasta"/> + <element name="001" ftype="fasta"> + <assert_contents> + <has_text text=">scaffold_0"/> + <has_text text=">scaffold_523"/> + </assert_contents> + </element> + <element name="002" ftype="fasta"> + <assert_contents> + <has_text text=">scaffold_0"/> + <has_text text=">scaffold_523"/> + </assert_contents> + </element> </output_collection> <output_collection name="reassembly_reads" type="list" count="2"> <element name="001" file="3/out.reassem/out.reads.001" ftype="fasta"/> <element name="002" file="3/out.reassem/out.reads.002" ftype="fasta"/> </output_collection> <output name="reassembly_noclass" file="3/out.reassem/out.reads.noclass" ftype="fasta" /> - <output name="reassembly_n50" file="3/out.reassem/N50.txt" ftype="txt" /> + <output name="reassembly_n50" ftype="txt"> + <assert_contents> + <has_text text="N50 before reassem"/> + <has_text text="out.002.fasta"/> + <has_text text="2878"/> + </assert_contents> + </output> </test> </tests> <help><![CDATA[