concoct: concoct.xml comparison

comparison concoct.xml @ 3:3842ef1b2f34 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 1a79c139165648b969d82530784cea3fc8f2d2c0"

author	iuc
date	Thu, 07 Jul 2022 08:33:35 +0000
parents	7a145c72d375
children	28e8d2bd6aba

comparison

equal deleted inserted replaced

-:7a145c72d375
+:3842ef1b2f34
 <tool id="concoct" name="CONCOCT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
-<description>metagenome binning</description>
+<description>for metagenome binning</description>
 <macros>
 <import>macros.xml</import>
 </macros>
 <expand macro="requirements"/>
 <command detect_errors="exit_code"><![CDATA[
-#set pca_components_file_name = 'PCA_components_data_gt' + str($advanced.length_threshold) + '.csv'
-#set pca_transformed_file_name = 'PCA_transformed_data_gt' + str($advanced.length_threshold) + '.csv'
-#set clustering_file_name = 'clustering_gt' + str($advanced.length_threshold) + '.csv'
 ## CONCOCT doesn't handle gzipped files.
 #if $composition_file.ext.endswith(".gz")
-gunzip -c '$composition_file' > composition_file.fa &&
+gunzip -c '$composition_file' > 'composition_file.fa' &&
 #else:
-ln -s '$composition_file' composition_file.fa &&
+ln -s '$composition_file' 'composition_file.fa' &&
 #end if
 mkdir outdir &&
 concoct
 --coverage_file '$coverage_file'
---composition_file composition_file.fa
+--composition_file 'composition_file.fa'
 --clusters $advanced.clusters
 --kmer_length $advanced.kmer_length
 --threads \${GALAXY_SLOTS:-4}
 --length_threshold $advanced.length_threshold
 --read_length $advanced.read_length
 --total_percentage_pca $advanced.total_percentage_pca
 --basename 'outdir/'
 --seed $advanced.seed
 --iterations $advanced.iterations
---epsilon $advanced.epsilon
+$advanced.no_cov_normalization
-$advanced.no_cov_normalization
+$output.no_total_coverage
-$advanced.no_total_coverage
+--no_original_data
---no_original_data
+$output.converge_out
-$advanced.converge_out
-## Convert all CONCOCT .csv outputs to tabular.
-&& sed 's/\("\([^"]*\)"\)\?,/\2\t/g' outdir/$pca_components_file_name > '$output_pca_components'
-&& sed 's/\("\([^"]*\)"\)\?,/\2\t/g' outdir/$pca_transformed_file_name > '$output_pca_transformed'
-&& sed 's/\("\([^"]*\)"\)\?,/\2\t/g' outdir/$clustering_file_name > '$output_clustering'
-#if str($advanced.output_process_log) == 'yes':
-&& mv outdir/log.txt '$process_log'
-#end if
 ]]></command>
 <inputs>
-<param argument="--coverage_file" type="data" format="tabular" label="Tabular coverage file" help="Columns correspond to samples and rows to contigs"/>
+<param argument="--coverage_file" type="data" format="tabular" label="Coverage file" help="Table where each row correspond to a contig, and each column correspond to a sample. The values are the average coverage for this contig in that sample"/>
-<param argument="--composition_file" type="data" format="fasta,fasta.gz" label="Fasta file" help="Used to calculate the kmer composition (the genomic signature) of each contig"/>
+<param argument="--composition_file" type="data" format="fasta,fasta.gz" label="Composition file with sequences" help="It is named the composition file since it is used to calculate the kmer composition (the genomic signature) of each contig."/>
 <section name="advanced" title="Advanced options">
-<param argument="--clusters" type="integer" value="400" label="Maximum number of clusters for the Variational Gaussian Mixture Model algorithm"/>
+<param argument="--clusters" type="integer" min="0" value="400" label="Maximum number of clusters for the Variational Gaussian Mixture Model (VGMM) algorithm"/>
-<param argument="--kmer_length" type="integer" value="4" label="Kmer length"/>
+<param argument="--kmer_length" type="integer" min="0" value="4" label="Kmer length"/>
-<param argument="--length_threshold" type="integer" value="1000" label="Sequence length threshold" help="Contigs shorter than this value will not be included"/>
+<param argument="--length_threshold" type="integer" min="0" value="1000" label="Sequence length threshold" help="Contigs shorter than this value will not be included"/>
-<param argument="--read_length" type="integer" value="100" label="Read length for coverage"/>
+<param argument="--read_length" type="integer" min="0" value="100" label="Read length for coverage"/>
-<param argument="--total_percentage_pca" type="integer" value="100" label="Percentage of variance explained by the principal components for the combined data"/>
+<param argument="--total_percentage_pca" type="integer" min="0" value="100" label="Percentage of variance explained by the principal components for the combined data"/>
-<param argument="--seed" type="integer" min="0" value="1" label="Integer to use as seed for clustering" help="Zero value will use random seed"/>
+<param argument="--seed" type="integer" min="0" value="1" label="Seed for clustering" help="Zero value will use random seed"/>
-<param argument="--iterations" type="integer" value="500" label="Maximum number of iterations for the Variational Bayes Gaussian Mixture Models"/>
+<param argument="--iterations" type="integer" min="0" value="500" label="Maximum number of iterations for the Variational Bayes Gaussian Mixture Models (VBGMM)"/>
-<param argument="--epsilon" type="float" value="0.000001" label="Epsilon for the Variational Gaussian Mixture Model algorithm"/>
+<param argument="--no_cov_normalization" type="boolean" truevalue="--no_cov_normalization" falsevalue="" checked="false" label="Skip normalization and only do log transorm of the coverage?" help="By default, the coverage is normalized for samples, then normalized for contigs and finally log transformed. By setting this flag you skip the normalization and only do log transorm of the   coverage."/>
-<param argument="--no_cov_normalization" type="boolean" truevalue="--no_cov_normalization" falsevalue="" checked="false" label="Skip normalization and only do log transorm of the coverage?" help="By default, the coverage is normalized for samples, then normalized for contigs and finally log transformed"/>
+</section>
-<param argument="--no_total_coverage" type="boolean" truevalue="--no_total_coverage" falsevalue="" checked="false" label="Eliminate the total coverage column from the coverage data matrix?" help="By default, total coverage is included, independently of coverage normalization but previous to log transformation"/>
+<section name="output" title="Output">
-<param argument="--converge_out" type="boolean" truevalue="--converge_out" falsevalue="" checked="false" label="Output convergence information?"/>
+<param argument="--no_total_coverage" type="boolean" truevalue="--no_total_coverage" falsevalue="" checked="false" label="Eliminate the total coverage column from the coverage data matrix?" help="By default, total coverage is included, independently of coverage normalization but previous to log transformation. Use this tag to escape this behaviour."/>
-<param name="output_process_log" type="select" label="Output process log file?">
+<param argument="--converge_out" type="boolean" truevalue="--converge_out" falsevalue="" checked="false" label="Write convergence information to files?"/>
-<option value="no" selected="true">No</option>
+<param name="log" type="boolean" checked="false" label="Output process log file?"/>
-<option value="yes">Yes</option>
-</param>
 </section>
 </inputs>
 <outputs>
-<data name="process_log" format="txt" label="${tool.name} on ${on_string} (process log)">
+<data name="output_clustering" format="csv" from_work_dir="outdir/clustering_gt*" label="${tool.name} on ${on_string}: Clusters"/>
-<filter>advanced['output_process_log'] == 'yes'</filter>
+<data name="process_log" format="txt" from_work_dir="outdir/log.txt" label="${tool.name} on ${on_string}: Log">
+<filter>output['log']</filter>
 </data>
-<data name="output_pca_components" format="tabular" label="${tool.name} on ${on_string} (PCA components)"/>
+<data name="output_pca_components" format="csv" from_work_dir="outdir/PCA_components_data_gt*" label="${tool.name} on ${on_string}: PCA components"/>
-<data name="output_pca_transformed" format="tabular" label="${tool.name} on ${on_string} (PCA transformed)"/>
+<data name="output_pca_transformed" format="csv" from_work_dir="outdir/PCA_transformed_data_gt*" label="${tool.name} on ${on_string}: PCA transformed clusters"/>
-<data name="output_clustering" format="tabular" label="${tool.name} on ${on_string} (Clusters)"/>
 </outputs>
 <tests>
 <test expect_num_outputs="4">
-<param name="coverage_file" value="input1.tabular" ftype="tabular"/>
+<param name="coverage_file" value="coverage" ftype="tabular"/>
-<param name="composition_file" value="input1.fa.gz" ftype="fasta.gz"/>
+<param name="composition_file" value="composition.fa" ftype="fasta"/>
-<param name="output_process_log" value="yes"/>
+<section name="advanced">
-<output name="process_log" file="process_log.txt" ftype="txt" compare="re_match"/>
+<param name="clusters" value="400"/>
-<output name="output_pca_components" ftype="tabular">
+<param name="kmer_length" value="4"/>
+<param name="length_threshold" value="1000"/>
+<param name="read_length" value="100"/>
+<param name="total_percentage_pca" value="100"/>
+<param name="seed" value="1"/>
+<param name="iterations" value="500"/>
+<param name="no_cov_normalization" value=""/>
+</section>
+<section name="output">
+<param name="no_total_coverage" value=""/>
+<param name="converge_out" value=""/>
+<param name="log" value="true"/>
+</section>
+<output name="process_log" ftype="txt" compare="contains">
 <assert_contents>
-<has_size value="367636"/>
+<has_size value="786"/>
-<has_text text="7377051e-02"/>
+<has_text text="CONCOCT Finished"/>
 </assert_contents>
 </output>
-<output name="output_pca_transformed" ftype="tabular">
+<output name="output_pca_components" ftype="csv">
 <assert_contents>
-<has_size value="737926"/>
+<has_size value="362924" delta="10"/>
-<has_text text="NODE_103_length_20202_cov_8.395357.0"/>
+<has_text text="-5.90697200e-02"/>
 </assert_contents>
 </output>
-<output name="output_clustering" ftype="tabular">
+<output name="output_pca_transformed" ftype="csv">
 <assert_contents>
-<has_size value="12167"/>
+<has_size value="834200" delta="10"/>
-<has_text text="NODE_103_length_20202_cov_8.395357"/>
+<has_text text="contig-21000001"/>
+</assert_contents>
+</output>
+<output name="output_clustering" ftype="csv">
+<assert_contents>
+<has_size value="6923" delta="10"/>
+<has_text text="contig-21000001,"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="3">
+<param name="coverage_file" value="coverage" ftype="tabular"/>
+<param name="composition_file" value="composition.fa.gz" ftype="fasta.gz"/>
+<section name="advanced">
+<param name="clusters" value="400"/>
+<param name="kmer_length" value="4"/>
+<param name="length_threshold" value="1000"/>
+<param name="read_length" value="100"/>
+<param name="total_percentage_pca" value="100"/>
+<param name="seed" value="1"/>
+<param name="iterations" value="500"/>
+<param name="no_cov_normalization" value=""/>
+</section>
+<section name="output">
+<param name="no_total_coverage" value=""/>
+<param name="converge_out" value=""/>
+<param name="log" value="false"/>
+</section>
+<output name="output_pca_components" ftype="csv">
+<assert_contents>
+<has_size value="362924" delta="10"/>
+<has_text text="-5.90697200e-02"/>
+</assert_contents>
+</output>
+<output name="output_pca_transformed" ftype="csv">
+<assert_contents>
+<has_size value="834200" delta="10"/>
+<has_text text="contig-21000001"/>
+</assert_contents>
+</output>
+<output name="output_clustering" ftype="csv">
+<assert_contents>
+<has_size value="6923" delta="10"/>
+<has_text text="contig-21000001,"/>
 </assert_contents>
 </output>
 </test>
 </tests>
 <help><![CDATA[
 **What it does**
 CONCOCT (Clustering cONtigs with COverage and ComposiTion) performs unsupervised binning of metagenomic contigs by
 using nucleotide composition - kmer frequencies - and coverage data for multiple samples.  CONCOCT can accurately
 (up to species level) bin metagenomic contigs.
 The tool accepts 2 inputs; a tabular file where each row corresponds to a contig and each column corresponds to a
 sample (the values are the average coverage for this contig in that sample) and a file containing sequences in
 fasta format.

Mercurial > repos > iuc > concoct

comparison concoct.xml @ 3:3842ef1b2f34 draft