comparison genomescope.xml @ 1:3169a38c2656 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genomescope commit a0ba4e5bb9dd542bbf1395af64e59b9f72823fec"
author iuc
date Sat, 26 Jun 2021 14:17:47 +0000
parents b2f674562a18
children 01210c4e9144
comparison
equal deleted inserted replaced
0:b2f674562a18 1:3169a38c2656
1 <tool id="genomescope" name="GenomeScope" version="@VERSION@" profile="20.01"> 1 <tool id="genomescope" name="GenomeScope" version="@VERSION@+galaxy@SUFFIX_VERSION@" profile="20.01">
2 <description>Analyze unassembled short reads</description> 2 <description>reference-free genome profiling</description>
3 <macros> 3 <macros>
4 <token name="@VERSION@">2.0</token> 4 <token name="@VERSION@">2.0</token>
5 <token name="@SUFFIX_VERSION@">1</token>
5 </macros> 6 </macros>
6 <requirements> 7 <requirements>
7 <requirement type="package" version="@VERSION@">genomescope2</requirement> 8 <requirement type="package" version="@VERSION@">genomescope2</requirement>
8 </requirements> 9 </requirements>
9 <version_command>genomescope2 --version</version_command> 10 <version_command>genomescope2 --version</version_command>
10 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
11 genomescope2 12 genomescope2
12 --input '$input' 13 --input '$input'
13 --output . 14 --output .
14 --kmer_length $kmer_length 15 --kmer_length $kmer_length
15 $no_unique_sequence $testing $trace_flag 16 $output_options.no_unique_sequence
17 $advanced_options.testing
18 $advanced_options.trace_flag
16 #if $ploidy: 19 #if $ploidy:
17 --ploidy $ploidy 20 --ploidy $ploidy
18 #end if 21 #end if
19 #if $lambda: 22 #if $lambda:
20 --lambda $lambda 23 --lambda $lambda
21 #end if 24 #end if
22 #if $max_kmercov: 25 #if $max_kmercov:
23 --max_kmercov $max_kmercov 26 --max_kmercov $max_kmercov
24 #end if 27 #end if
25 #if $topology: 28 #if $advanced_options.topology:
26 --topology $topology 29 --topology $advanced_options.topology
27 #end if 30 #end if
28 #if $initial_repetitiveness: 31 #if $advanced_options.initial_repetitiveness:
29 --initial_repetitiveness $initial_repetitiveness 32 --initial_repetitiveness $advanced_options.initial_repetitiveness
30 #end if 33 #end if
31 #if $initial_heterozygosities: 34 #if $advanced_options.initial_heterozygosities:
32 --initial_heterozygosities $initial_heterozygosities 35 --initial_heterozygosities '${advanced_options.initial_heterozygosities}'
33 #end if 36 #end if
34 #if $transform_exp: 37 #if $advanced_options.transform_exp:
35 --transform_exp $transform_exp 38 --transform_exp $advanced_options.transform_exp
36 #end if 39 #end if
37 #if $true_params: 40 #if $advanced_options.true_params:
38 --true_params $true_params 41 --true_params '${advanced_options.true_params}'
39 #end if 42 #end if
40 #if $num_rounds: 43 #if $advanced_options.num_rounds:
41 --num_rounds $num_rounds 44 --num_rounds $advanced_options.num_rounds
42 #end if 45 #end if
43 ]]> 46 ]]>
44 </command> 47 </command>
45 <inputs> 48 <inputs>
46 <param argument="--input" type="data" format="tabular" label="Input histogram file" help="This file is a two column tabular file for example generated with the histo function of Jellyfish."/> 49 <param argument="--input" type="data" format="tabular" label="Input histogram file" help="This file is a two column tabular file for example generated with the histo function of Jellyfish."/>
47 <param name="model_output" type="boolean" label="Add the model parameters to your history"/> 50 <param argument="--ploidy" type="integer" min="1" max="6" optional="true" label="Ploidy for model to use" help="Default: 2"/>
48 <param name="summary_output" type="boolean" label="Output a summary of the analysis"/>
49 <param name="progress_output" type="boolean" label="Additional information for each optimization round"/>
50 <param argument="--ploidy" type="integer" optional="true" label="Ploidy for model to use" help="Default: 2"/>
51 <param argument="--kmer_length" type="integer" value="21" optional="false" label="K-mer length used to calculate k-mer spectra"/> 51 <param argument="--kmer_length" type="integer" value="21" optional="false" label="K-mer length used to calculate k-mer spectra"/>
52 <param argument="--lambda" type="integer" optional="true" label="Optional initial kmercov estimate for model to use"/> 52 <param argument="--lambda" type="integer" optional="true" label="Initial k-mer coverage estimate for model to use" help="This parameter is optional"/>
53 <param argument="--max_kmercov" type="integer" optional="true" label="Optional maximum k-mer coverage threshold" help="K-mers with coverage greater than max_kmercov are ignored by the model"/> 53 <param argument="--max_kmercov" type="integer" optional="true" label="Maximum k-mer coverage threshold" help="K-mers with coverage greater than max_kmercov are ignored by the model. This parameter is optional"/>
54 <param argument="--no_unique_sequence" type="boolean" truevalue="--no_unique_sequence" falsevalue="" label="Turn off yellow unique sequence line in plots"/> 54 <section name="output_options" title="Output options" expanded="true">
55 <param argument="--topology" type="integer" optional="true" label="Flag for topology for model to use"/> 55 <param name="output_files" type="select" multiple="true" display="checkboxes" label="Output files">
56 <param argument="--initial_repetitiveness" type="integer" optional="true" label="Initial value for repetitiveness"/> 56 <option value="model_output">Generate a file with the model parameters</option>
57 <param argument="--initial_heterozygosities" type="integer" optional="true" label="Initial values for nucleotide heterozygosity rates"/> 57 <option value="summary_output">Summary of the analysis</option>
58 <param argument="--transform_exp" type="integer" optional="true" label="Parameter for the exponent when fitting a transformed (x**transform_exp*y vs. x) k-mer histogram" help="Default: 1"/> 58 <option value="progress_output">Additional information for each optimization round</option>
59 <param argument="--testing" type="boolean" truevalue="--testing" falsevalue="" label="Create testing.tsv file with model parameters"/> 59 </param>
60 <param argument="--true_params" type="integer" optional="true" label="Flag to state true simulated parameters for testing mode"/> 60 <param argument="--no_unique_sequence" type="boolean" truevalue="--no_unique_sequence" falsevalue="" label="Turn off yellow unique sequence line in plots"/>
61 <param argument="--trace_flag" type="boolean" truevalue="--trace_flag" falsevalue="" label="Turn on printing of iteration progress of nlsLM function"/> 61 </section>
62 <param argument="--num_rounds" type="integer" min="1" optional="true" label="Number of optimization rounds"/> 62 <section name="advanced_options" title="Advanced options">
63 <param argument="--topology" type="integer" optional="true" label="Topological relationships between the homologous chromosomes" help="Flag for topology for model to use. Topology refers to the similarities between distinct homologues. Each triploid topology consists of two nucleotide heterozygosity forms (e.g. aab and abc), while each tetraploid, pentaploid, and hexaploid topology consists of three, four, and five heterozygosity forms respectively."/>
64 <param argument="--initial_repetitiveness" type="float" min="0" max="1" optional="true" label="Initial value for repetitiveness"/>
65 <param argument="--initial_heterozygosities" type="text" optional="true" label="Initial values for nucleotide heterozygosity rates" help="Nucleotide hetegozygosity rates should be float values (separated by commas if necessary)">
66 <sanitizer>
67 <valid initial="string.digits"><add value="," /><add value="." /></valid>
68 </sanitizer>
69 <validator type="regex">[0-9,.]+</validator>
70 </param>
71 <param argument="--transform_exp" type="integer" optional="true" label="Parameter for the exponent when fitting a transformed (x**transform_exp*y vs. x) k-mer histogram" help="Default: 1"/>
72 <param argument="--testing" type="boolean" truevalue="--testing" falsevalue="" label="Create testing.tsv file with model parameters"/>
73 <param argument="--true_params" type="text" optional="true" label="Set true simulated parameters for testing mode">
74 <sanitizer>
75 <valid initial="string.digits"><add value="," /><add value="." /></valid>
76 </sanitizer>
77 <validator type="regex">[0-9,.]+</validator>
78 </param>
79 <param argument="--trace_flag" type="boolean" truevalue="--trace_flag" falsevalue="" label="Turn on printing of iteration progress of nlsLM function"/>
80 <param argument="--num_rounds" type="integer" min="1" optional="true" label="Number of optimization rounds"/>
81 </section>
63 </inputs> 82 </inputs>
64 <outputs> 83 <outputs>
65 <data name="linear_plot" format="png" from_work_dir="linear_plot.png" label="${tool.name} on ${on_string} Linear plot"/> 84 <data name="linear_plot" format="png" from_work_dir="linear_plot.png" label="${tool.name} on ${on_string} Linear plot"/>
66 <data name="log_plot" format="png" from_work_dir="log_plot.png" label="${tool.name} on ${on_string} Log plot"/> 85 <data name="log_plot" format="png" from_work_dir="log_plot.png" label="${tool.name} on ${on_string} Log plot"/>
67 <data name="transformed_linear_plot" format="png" from_work_dir="transformed_linear_plot.png" label="${tool.name} on ${on_string} Transformed linear plot"/> 86 <data name="transformed_linear_plot" format="png" from_work_dir="transformed_linear_plot.png" label="${tool.name} on ${on_string} Transformed linear plot"/>
68 <data name="transformed_log_plot" format="png" from_work_dir="transformed_log_plot.png" label="${tool.name} on ${on_string} Transformed log plot"/> 87 <data name="transformed_log_plot" format="png" from_work_dir="transformed_log_plot.png" label="${tool.name} on ${on_string} Transformed log plot"/>
69 <data name="model" format="txt" from_work_dir="model.txt" label="${tool.name} on ${on_string} Model"> 88 <data name="model" format="txt" from_work_dir="model.txt" label="${tool.name} on ${on_string} Model">
70 <filter>model_output</filter> 89 <filter>output_options['output_files'] and 'model_output' in output_options['output_files']</filter>
71 </data> 90 </data>
72 <data name="summary" format="txt" from_work_dir="summary.txt" label="${tool.name} on ${on_string} Summary"> 91 <data name="summary" format="txt" from_work_dir="summary.txt" label="${tool.name} on ${on_string} Summary">
73 <filter>summary_output</filter> 92 <filter>output_options['output_files'] and 'summary_output' in output_options['output_files']</filter>
74 </data> 93 </data>
75 <data name="progress" format="txt" from_work_dir="progress.txt" label="${tool.name} on ${on_string} Progress"> 94 <data name="progress" format="txt" from_work_dir="progress.txt" label="${tool.name} on ${on_string} Progress">
76 <filter>progress_output</filter> 95 <filter>output_options['output_files'] and 'progress_output' in output_options['output_files']</filter>
77 </data> 96 </data>
78 <data name="model_params" format="tabular" from_work_dir="SIMULATED_testing.tsv" label="${tool.name} on ${on_string} Model parameters"> 97 <data name="model_params" format="tabular" from_work_dir="SIMULATED_testing.tsv" label="${tool.name} on ${on_string} Model parameters">
79 <filter>testing</filter> 98 <filter>advanced_options['testing']</filter>
80 </data> 99 </data>
81 </outputs> 100 </outputs>
82 <tests> 101 <tests>
83 <test expect_num_outputs="8"> 102 <test expect_num_outputs="8">
84 <param name="input" value="genomescope-in1.tab"/> 103 <param name="input" value="genomescope-in1.tab"/>
85 <param name="kmer_length" value="21"/> 104 <param name="kmer_length" value="21"/>
86 <param name="testing" value="true"/> 105 <section name="output_options">
87 <param name="model_output" value="true"/> 106 <param name="output_files" value="model_output,summary_output,progress_output"/>
88 <param name="summary_output" value="true"/> 107 </section>
89 <param name="progress_output" value="true"/> 108 <section name="advanced_options">
109 <param name="testing" value="true"/>
110 </section>
90 <output name="linear_plot" file="genomescope-out1-1.png" ftype="png"/> 111 <output name="linear_plot" file="genomescope-out1-1.png" ftype="png"/>
91 <output name="log_plot" file="genomescope-out1-2.png" ftype="png"/> 112 <output name="log_plot" file="genomescope-out1-2.png" ftype="png"/>
92 <output name="transformed_linear_plot" file="genomescope-out1-3.png" ftype="png"/> 113 <output name="transformed_linear_plot" file="genomescope-out1-3.png" ftype="png"/>
93 <output name="transformed_log_plot" file="genomescope-out1-4.png" ftype="png" compare="sim_size"/> 114 <output name="transformed_log_plot" file="genomescope-out1-4.png" ftype="png" compare="sim_size"/>
94 <output name="model" file="genomescope-out1-1.txt" ftype="txt" lines_diff="2"/> 115 <output name="model" file="genomescope-out1-1.txt" ftype="txt" lines_diff="2"/>
95 <output name="summary" file="genomescope-out1-2.txt" ftype="txt" lines_diff="2"/> 116 <output name="summary" file="genomescope-out1-2.txt" ftype="txt" lines_diff="2"/>
96 <output name="progress" file="genomescope-out1-3.txt" ftype="txt" lines_diff="2"/> 117 <output name="progress" file="genomescope-out1-3.txt" ftype="txt" lines_diff="2"/>
97 <output name="testing" file="genomescope-out1-1.tab" ftype="tabular"/> 118 <output name="model_params" file="genomescope-out1-1.tab" ftype="tabular"/>
119 </test>
120 <!--Test initial_repetitiveness option-->
121 <test expect_num_outputs="5">
122 <param name="input" value="genomescope-in1.tab"/>
123 <param name="kmer_length" value="21"/>
124 <section name="advanced_options">
125 <param name="initial_repetitiveness" value="0.1"/>
126 </section>
127 <section name="output_options">
128 <param name="output_files" value="summary_output"/>
129 </section>
130 <output name="linear_plot" ftype="png">
131 <assert_contents>
132 <has_size value="223370" delta="300"/>
133 </assert_contents>
134 </output>
135 <output name="log_plot" ftype="png">
136 <assert_contents>
137 <has_size value="201425" delta="300"/>
138 </assert_contents>
139 </output>
140 <output name="transformed_linear_plot" ftype="png">
141 <assert_contents>
142 <has_size value="221442" delta="300"/>
143 </assert_contents>
144 </output>
145 <output name="transformed_log_plot" ftype="png">
146 <assert_contents>
147 <has_size value="210889" delta="300"/>
148 </assert_contents>
149 </output>
150 <output name="summary" ftype="txt" lines_diff="2">
151 <assert_contents>
152 <has_line line="initial repetitiveness = 0.1"/>
153 <has_text text="Homozygous (aa) 98.9538% 98.967%"/>
154 </assert_contents>
155 </output>
156 </test>
157 <!--Test initial_heterozigosity option-->
158 <test expect_num_outputs="5">
159 <param name="input" value="genomescope-in1.tab"/>
160 <param name="kmer_length" value="21"/>
161 <param name="ploidy" value="3"/>
162 <section name="advanced_options">
163 <param name="initial_heterozygosities" value="0.04,0.01"/>
164 </section>
165 <section name="output_options">
166 <param name="output_files" value="summary_output"/>
167 </section>
168 <output name="linear_plot" ftype="png">
169 <assert_contents>
170 <has_size value="213366" delta="300"/>
171 </assert_contents>
172 </output>
173 <output name="log_plot" ftype="png">
174 <assert_contents>
175 <has_size value="218425" delta="300"/>
176 </assert_contents>
177 </output>
178 <output name="transformed_linear_plot" ftype="png">
179 <assert_contents>
180 <has_size value="217280" delta="300"/>
181 </assert_contents>
182 </output>
183 <output name="transformed_log_plot" ftype="png">
184 <assert_contents>
185 <has_size value="229021" delta="300"/>
186 </assert_contents>
187 </output>
188 <output name="summary" ftype="txt" lines_diff="2">
189 <assert_contents>
190 <has_line line="initial heterozygosities = 0.04,0.01"/>
191 <has_text text="Homozygous (aaa) 6.03606% 100%"/>
192 </assert_contents>
193 </output>
98 </test> 194 </test>
99 </tests> 195 </tests>
100 <help><![CDATA[ 196 <help><![CDATA[
101 197
102 GenomeScope 2.0: Reference-free profiling of polyploid genomes 198 GenomeScope 2.0: Reference-free profiling of polyploid genomes
103 ============================================================== 199 ==============================================================
104 200
105 GenomeScope 2.0 applies classical insights from combinatorial theory to establish 201 GenomeScope 2.0 applies classical insights from combinatorial theory to establish a detailed mathematical model of how k-mer frequencies will be distributed in heterozygous and polyploid genomes.It employs a polyploid-aware mixture model that, within seconds, accurately infers genome properties from
106 a detailed mathematical model of how k-mer frequencies will be distributed in 202 unassembled sequencing data.
107 heterozygous and polyploid genomes. GenomeScope 2.0 employs a polyploid-aware 203
108 mixture model that, within seconds, accurately infers genome properties from 204 GenomeScope 2.0 uses the k-mer count distribution, e.g. from KMC or Jellyfish, and produces a report and several informative plots describing the genome properties. We validate the approach on simulated polyploid data created using a generative model with parameters for genome size, heterozygosity, repetitiveness, ploidy, and sequencing coverage, and find GenomeScope 2.0 retains accuracy across a broad range of realistic and extreme parameter values.
109 unassembled sequencing data. GenomeScope 2.0 uses the k-mer count distribution, 205
110 e.g. from KMC or Jellyfish, and produces a report and several informative plots 206 -----
111 describing the genome properties. We validate the approach on simulated polyploid 207
112 data created using a generative model with parameters for genome size, heterozygosity, 208 .. class:: infomark
113 repetitiveness, ploidy, and sequencing coverage, and find GenomeScope 2.0 retains 209
114 accuracy across a broad range of realistic and extreme parameter values. We also 210 **Topological relationships**
115 validate GenomeScope 2.0 by analyzing genuine sequence data from 11 diverse 211
116 polyploid genomes with known genome characteristics. 212 In the field of phylogenetics, the evolutionary relationships between species are often depicted in a branching diagram known as a phylogenetic tree. In this setting, the topology of the tree refers to the branching structure of the tree. We may also depict the similarities between homologous chromosomes in a branching diagram. In this case, a topology refers to the similarities between distinct homologues.
213
214 For ploidies of 4 and greater, there are multiple possible topologies. For example, the two tetraploid topologies are:
215
216 ::
217
218 AAAA → AAAB → AABC → ABCD
219 AAAA → AABB → AABC → ABCD
117 220
118 ]]></help> 221 ]]></help>
119 <citations> 222 <citations>
120 <citation type="doi">10.1093/bioinformatics/btx153</citation> 223 <citation type="doi">10.1093/bioinformatics/btx153</citation>
121 <citation type="doi">10.1038/s41467-020-14998-3</citation> 224 <citation type="doi">10.1038/s41467-020-14998-3</citation>