comparison concoct.xml @ 0:06c0eb033025 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/concoct commit 49b42f61ff37c3c33dd15c195e5705e1db066c37"
author iuc
date Fri, 18 Feb 2022 14:18:11 +0000
parents
children 031f84cb2fd3
comparison
equal deleted inserted replaced
-1:000000000000 0:06c0eb033025
1 <tool id="concoct" name="CONCOCT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>metagenome binning</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command detect_errors="exit_code"><![CDATA[
8 #set pca_components_file_name = 'PCA_components_data_gt' + str($advanced.length_threshold) + '.csv'
9 #set pca_transformed_file_name = 'PCA_transformed_data_gt' + str($advanced.length_threshold) + '.csv'
10 #set clustering_file_name = 'clustering_gt' + str($advanced.length_threshold) + '.csv'
11
12 ## CONCOCT doesn't handle gzipped files.
13 #if $composition_file.ext.endswith(".gz")
14 gunzip -c '$composition_file' > composition_file.fa &&
15 #else:
16 ln -s '$composition_file' composition_file.fa &&
17 #end if
18
19 mkdir outdir &&
20 concoct
21 --coverage_file '$coverage_file'
22 --composition_file composition_file.fa
23 --clusters $advanced.clusters
24 --kmer_length $advanced.kmer_length
25 --threads \${GALAXY_SLOTS:-4}
26 --length_threshold $advanced.length_threshold
27 --read_length $advanced.read_length
28 --total_percentage_pca $advanced.total_percentage_pca
29 --basename 'outdir/'
30 --seed $advanced.seed
31 --iterations $advanced.iterations
32 --epsilon $advanced.epsilon
33 $advanced.no_cov_normalization
34 $advanced.no_total_coverage
35 --no_original_data
36 $advanced.converge_out
37
38 ## Convert all CONCOCT .csv outputs to tabular.
39 && sed 's/\("\([^"]*\)"\)\?,/\2\t/g' outdir/$pca_components_file_name > '$output_pca_components'
40 && sed 's/\("\([^"]*\)"\)\?,/\2\t/g' outdir/$pca_transformed_file_name > '$output_pca_transformed'
41 && sed 's/\("\([^"]*\)"\)\?,/\2\t/g' outdir/$clustering_file_name > '$output_clustering'
42 #if str($advanced.output_process_log) == 'yes':
43 && mv outdir/log.txt '$process_log'
44 #end if
45 ]]></command>
46 <inputs>
47 <param argument="--coverage_file" type="data" format="tabular" label="Tabular coverage file" help="Columns correspond to samples and rows to contigs"/>
48 <param argument="--composition_file" type="data" format="fasta,fasta.gz" label="Fasta file" help="Used to calculate the kmer composition (the genomic signature) of each contig"/>
49 <section name="advanced" title="Advanced options">
50 <param argument="--clusters" type="integer" value="400" label="Maximum number of clusters for the Variational Gaussian Mixture Model algorithm"/>
51 <param argument="--kmer_length" type="integer" value="4" label="Kmer length"/>
52 <param argument="--length_threshold" type="integer" value="1000" label="Sequence length threshold" help="Contigs shorter than this value will not be included"/>
53 <param argument="--read_length" type="integer" value="100" label="Read length for coverage"/>
54 <param argument="--total_percentage_pca" type="integer" value="100" label="Percentage of variance explained by the principal components for the combined data"/>
55 <param argument="--seed" type="integer" min="0" value="1" label="Integer to use as seed for clustering" help="Zero value will use random seed"/>
56 <param argument="--iterations" type="integer" value="500" label="Maximum number of iterations for the Variational Bayes Gaussian Mixture Models"/>
57 <param argument="--epsilon" type="float" value="0.000001" label="Epsilon for the Variational Gaussian Mixture Model algorithm"/>
58 <param argument="--no_cov_normalization" type="boolean" truevalue="--no_cov_normalization" falsevalue="" checked="false" label="Skip normalization and only do log transorm of the coverage?" help="By default, the coverage is normalized for samples, then normalized for contigs and finally log transformed"/>
59 <param argument="--no_total_coverage" type="boolean" truevalue="--no_total_coverage" falsevalue="" checked="false" label="Eliminate the total coverage column from the coverage data matrix?" help="By default, total coverage is included, independently of coverage normalization but previous to log transformation"/>
60 <param argument="--converge_out" type="boolean" truevalue="--converge_out" falsevalue="" checked="false" label="Output convergence information?"/>
61 <param name="output_process_log" type="select" label="Output process log file?">
62 <option value="no" selected="true">No</option>
63 <option value="yes">Yes</option>
64 </param>
65 </section>
66 </inputs>
67 <outputs>
68 <data name="process_log" format="txt" label="${tool.name} on ${on_string} (process log)">
69 <filter>advanced['output_process_log'] == 'yes'</filter>
70 </data>
71 <data name="output_pca_components" format="tabular" label="${tool.name} on ${on_string} (PCA components)"/>
72 <data name="output_pca_transformed" format="tabular" label="${tool.name} on ${on_string} (PCA transformed)"/>
73 <data name="output_clustering" format="tabular" label="${tool.name} on ${on_string} (Clusters)"/>
74 </outputs>
75 <tests>
76 <test expect_num_outputs="4">
77 <param name="coverage_file" value="input1.tabular" ftype="tabular"/>
78 <param name="composition_file" value="input1.fa.gz" ftype="fasta.gz"/>
79 <param name="output_process_log" value="yes"/>
80 <output name="process_log" file="process_log.txt" ftype="txt" compare="contains"/>
81 <output name="output_pca_components" ftype="tabular">
82 <assert_contents>
83 <has_size value="367636"/>
84 <has_text text="7377051e-02"/>
85 </assert_contents>
86 </output>
87 <output name="output_pca_transformed" ftype="tabular">
88 <assert_contents>
89 <has_size value="737926"/>
90 <has_text text="NODE_103_length_20202_cov_8.395357.0"/>
91 </assert_contents>
92 </output>
93 <output name="output_clustering" ftype="tabular">
94 <assert_contents>
95 <has_size value="12167"/>
96 <has_text text="NODE_103_length_20202_cov_8.395357"/>
97 </assert_contents>
98 </output>
99 </test>
100 </tests>
101 <help><![CDATA[
102 **What it does**
103
104 CONCOCT (Clustering cONtigs with COverage and ComposiTion) performs unsupervised binning of metagenomic contigs by
105 using nucleotide composition - kmer frequencies - and coverage data for multiple samples. CONCOCT can accurately
106 (up to species level) bin metagenomic contigs.
107
108 The tool accepts 2 inputs; a tabular file where each row corresponds to a contig and each column corresponds to a
109 sample (the values are the average coverage for this contig in that sample) and a file containing sequences in
110 fasta format.
111
112 Three are produced; clustering of the > 1000 kmer count, the PCA transformed matrix and the PCA components.
113
114 ]]></help>
115 <expand macro="citations"/>
116 </tool>