Mercurial > repos > iuc > genomescope
comparison genomescope.xml @ 1:3169a38c2656 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genomescope commit a0ba4e5bb9dd542bbf1395af64e59b9f72823fec"
author | iuc |
---|---|
date | Sat, 26 Jun 2021 14:17:47 +0000 |
parents | b2f674562a18 |
children | 01210c4e9144 |
comparison
equal
deleted
inserted
replaced
0:b2f674562a18 | 1:3169a38c2656 |
---|---|
1 <tool id="genomescope" name="GenomeScope" version="@VERSION@" profile="20.01"> | 1 <tool id="genomescope" name="GenomeScope" version="@VERSION@+galaxy@SUFFIX_VERSION@" profile="20.01"> |
2 <description>Analyze unassembled short reads</description> | 2 <description>reference-free genome profiling</description> |
3 <macros> | 3 <macros> |
4 <token name="@VERSION@">2.0</token> | 4 <token name="@VERSION@">2.0</token> |
5 <token name="@SUFFIX_VERSION@">1</token> | |
5 </macros> | 6 </macros> |
6 <requirements> | 7 <requirements> |
7 <requirement type="package" version="@VERSION@">genomescope2</requirement> | 8 <requirement type="package" version="@VERSION@">genomescope2</requirement> |
8 </requirements> | 9 </requirements> |
9 <version_command>genomescope2 --version</version_command> | 10 <version_command>genomescope2 --version</version_command> |
10 <command detect_errors="exit_code"><![CDATA[ | 11 <command detect_errors="exit_code"><![CDATA[ |
11 genomescope2 | 12 genomescope2 |
12 --input '$input' | 13 --input '$input' |
13 --output . | 14 --output . |
14 --kmer_length $kmer_length | 15 --kmer_length $kmer_length |
15 $no_unique_sequence $testing $trace_flag | 16 $output_options.no_unique_sequence |
17 $advanced_options.testing | |
18 $advanced_options.trace_flag | |
16 #if $ploidy: | 19 #if $ploidy: |
17 --ploidy $ploidy | 20 --ploidy $ploidy |
18 #end if | 21 #end if |
19 #if $lambda: | 22 #if $lambda: |
20 --lambda $lambda | 23 --lambda $lambda |
21 #end if | 24 #end if |
22 #if $max_kmercov: | 25 #if $max_kmercov: |
23 --max_kmercov $max_kmercov | 26 --max_kmercov $max_kmercov |
24 #end if | 27 #end if |
25 #if $topology: | 28 #if $advanced_options.topology: |
26 --topology $topology | 29 --topology $advanced_options.topology |
27 #end if | 30 #end if |
28 #if $initial_repetitiveness: | 31 #if $advanced_options.initial_repetitiveness: |
29 --initial_repetitiveness $initial_repetitiveness | 32 --initial_repetitiveness $advanced_options.initial_repetitiveness |
30 #end if | 33 #end if |
31 #if $initial_heterozygosities: | 34 #if $advanced_options.initial_heterozygosities: |
32 --initial_heterozygosities $initial_heterozygosities | 35 --initial_heterozygosities '${advanced_options.initial_heterozygosities}' |
33 #end if | 36 #end if |
34 #if $transform_exp: | 37 #if $advanced_options.transform_exp: |
35 --transform_exp $transform_exp | 38 --transform_exp $advanced_options.transform_exp |
36 #end if | 39 #end if |
37 #if $true_params: | 40 #if $advanced_options.true_params: |
38 --true_params $true_params | 41 --true_params '${advanced_options.true_params}' |
39 #end if | 42 #end if |
40 #if $num_rounds: | 43 #if $advanced_options.num_rounds: |
41 --num_rounds $num_rounds | 44 --num_rounds $advanced_options.num_rounds |
42 #end if | 45 #end if |
43 ]]> | 46 ]]> |
44 </command> | 47 </command> |
45 <inputs> | 48 <inputs> |
46 <param argument="--input" type="data" format="tabular" label="Input histogram file" help="This file is a two column tabular file for example generated with the histo function of Jellyfish."/> | 49 <param argument="--input" type="data" format="tabular" label="Input histogram file" help="This file is a two column tabular file for example generated with the histo function of Jellyfish."/> |
47 <param name="model_output" type="boolean" label="Add the model parameters to your history"/> | 50 <param argument="--ploidy" type="integer" min="1" max="6" optional="true" label="Ploidy for model to use" help="Default: 2"/> |
48 <param name="summary_output" type="boolean" label="Output a summary of the analysis"/> | |
49 <param name="progress_output" type="boolean" label="Additional information for each optimization round"/> | |
50 <param argument="--ploidy" type="integer" optional="true" label="Ploidy for model to use" help="Default: 2"/> | |
51 <param argument="--kmer_length" type="integer" value="21" optional="false" label="K-mer length used to calculate k-mer spectra"/> | 51 <param argument="--kmer_length" type="integer" value="21" optional="false" label="K-mer length used to calculate k-mer spectra"/> |
52 <param argument="--lambda" type="integer" optional="true" label="Optional initial kmercov estimate for model to use"/> | 52 <param argument="--lambda" type="integer" optional="true" label="Initial k-mer coverage estimate for model to use" help="This parameter is optional"/> |
53 <param argument="--max_kmercov" type="integer" optional="true" label="Optional maximum k-mer coverage threshold" help="K-mers with coverage greater than max_kmercov are ignored by the model"/> | 53 <param argument="--max_kmercov" type="integer" optional="true" label="Maximum k-mer coverage threshold" help="K-mers with coverage greater than max_kmercov are ignored by the model. This parameter is optional"/> |
54 <param argument="--no_unique_sequence" type="boolean" truevalue="--no_unique_sequence" falsevalue="" label="Turn off yellow unique sequence line in plots"/> | 54 <section name="output_options" title="Output options" expanded="true"> |
55 <param argument="--topology" type="integer" optional="true" label="Flag for topology for model to use"/> | 55 <param name="output_files" type="select" multiple="true" display="checkboxes" label="Output files"> |
56 <param argument="--initial_repetitiveness" type="integer" optional="true" label="Initial value for repetitiveness"/> | 56 <option value="model_output">Generate a file with the model parameters</option> |
57 <param argument="--initial_heterozygosities" type="integer" optional="true" label="Initial values for nucleotide heterozygosity rates"/> | 57 <option value="summary_output">Summary of the analysis</option> |
58 <param argument="--transform_exp" type="integer" optional="true" label="Parameter for the exponent when fitting a transformed (x**transform_exp*y vs. x) k-mer histogram" help="Default: 1"/> | 58 <option value="progress_output">Additional information for each optimization round</option> |
59 <param argument="--testing" type="boolean" truevalue="--testing" falsevalue="" label="Create testing.tsv file with model parameters"/> | 59 </param> |
60 <param argument="--true_params" type="integer" optional="true" label="Flag to state true simulated parameters for testing mode"/> | 60 <param argument="--no_unique_sequence" type="boolean" truevalue="--no_unique_sequence" falsevalue="" label="Turn off yellow unique sequence line in plots"/> |
61 <param argument="--trace_flag" type="boolean" truevalue="--trace_flag" falsevalue="" label="Turn on printing of iteration progress of nlsLM function"/> | 61 </section> |
62 <param argument="--num_rounds" type="integer" min="1" optional="true" label="Number of optimization rounds"/> | 62 <section name="advanced_options" title="Advanced options"> |
63 <param argument="--topology" type="integer" optional="true" label="Topological relationships between the homologous chromosomes" help="Flag for topology for model to use. Topology refers to the similarities between distinct homologues. Each triploid topology consists of two nucleotide heterozygosity forms (e.g. aab and abc), while each tetraploid, pentaploid, and hexaploid topology consists of three, four, and five heterozygosity forms respectively."/> | |
64 <param argument="--initial_repetitiveness" type="float" min="0" max="1" optional="true" label="Initial value for repetitiveness"/> | |
65 <param argument="--initial_heterozygosities" type="text" optional="true" label="Initial values for nucleotide heterozygosity rates" help="Nucleotide hetegozygosity rates should be float values (separated by commas if necessary)"> | |
66 <sanitizer> | |
67 <valid initial="string.digits"><add value="," /><add value="." /></valid> | |
68 </sanitizer> | |
69 <validator type="regex">[0-9,.]+</validator> | |
70 </param> | |
71 <param argument="--transform_exp" type="integer" optional="true" label="Parameter for the exponent when fitting a transformed (x**transform_exp*y vs. x) k-mer histogram" help="Default: 1"/> | |
72 <param argument="--testing" type="boolean" truevalue="--testing" falsevalue="" label="Create testing.tsv file with model parameters"/> | |
73 <param argument="--true_params" type="text" optional="true" label="Set true simulated parameters for testing mode"> | |
74 <sanitizer> | |
75 <valid initial="string.digits"><add value="," /><add value="." /></valid> | |
76 </sanitizer> | |
77 <validator type="regex">[0-9,.]+</validator> | |
78 </param> | |
79 <param argument="--trace_flag" type="boolean" truevalue="--trace_flag" falsevalue="" label="Turn on printing of iteration progress of nlsLM function"/> | |
80 <param argument="--num_rounds" type="integer" min="1" optional="true" label="Number of optimization rounds"/> | |
81 </section> | |
63 </inputs> | 82 </inputs> |
64 <outputs> | 83 <outputs> |
65 <data name="linear_plot" format="png" from_work_dir="linear_plot.png" label="${tool.name} on ${on_string} Linear plot"/> | 84 <data name="linear_plot" format="png" from_work_dir="linear_plot.png" label="${tool.name} on ${on_string} Linear plot"/> |
66 <data name="log_plot" format="png" from_work_dir="log_plot.png" label="${tool.name} on ${on_string} Log plot"/> | 85 <data name="log_plot" format="png" from_work_dir="log_plot.png" label="${tool.name} on ${on_string} Log plot"/> |
67 <data name="transformed_linear_plot" format="png" from_work_dir="transformed_linear_plot.png" label="${tool.name} on ${on_string} Transformed linear plot"/> | 86 <data name="transformed_linear_plot" format="png" from_work_dir="transformed_linear_plot.png" label="${tool.name} on ${on_string} Transformed linear plot"/> |
68 <data name="transformed_log_plot" format="png" from_work_dir="transformed_log_plot.png" label="${tool.name} on ${on_string} Transformed log plot"/> | 87 <data name="transformed_log_plot" format="png" from_work_dir="transformed_log_plot.png" label="${tool.name} on ${on_string} Transformed log plot"/> |
69 <data name="model" format="txt" from_work_dir="model.txt" label="${tool.name} on ${on_string} Model"> | 88 <data name="model" format="txt" from_work_dir="model.txt" label="${tool.name} on ${on_string} Model"> |
70 <filter>model_output</filter> | 89 <filter>output_options['output_files'] and 'model_output' in output_options['output_files']</filter> |
71 </data> | 90 </data> |
72 <data name="summary" format="txt" from_work_dir="summary.txt" label="${tool.name} on ${on_string} Summary"> | 91 <data name="summary" format="txt" from_work_dir="summary.txt" label="${tool.name} on ${on_string} Summary"> |
73 <filter>summary_output</filter> | 92 <filter>output_options['output_files'] and 'summary_output' in output_options['output_files']</filter> |
74 </data> | 93 </data> |
75 <data name="progress" format="txt" from_work_dir="progress.txt" label="${tool.name} on ${on_string} Progress"> | 94 <data name="progress" format="txt" from_work_dir="progress.txt" label="${tool.name} on ${on_string} Progress"> |
76 <filter>progress_output</filter> | 95 <filter>output_options['output_files'] and 'progress_output' in output_options['output_files']</filter> |
77 </data> | 96 </data> |
78 <data name="model_params" format="tabular" from_work_dir="SIMULATED_testing.tsv" label="${tool.name} on ${on_string} Model parameters"> | 97 <data name="model_params" format="tabular" from_work_dir="SIMULATED_testing.tsv" label="${tool.name} on ${on_string} Model parameters"> |
79 <filter>testing</filter> | 98 <filter>advanced_options['testing']</filter> |
80 </data> | 99 </data> |
81 </outputs> | 100 </outputs> |
82 <tests> | 101 <tests> |
83 <test expect_num_outputs="8"> | 102 <test expect_num_outputs="8"> |
84 <param name="input" value="genomescope-in1.tab"/> | 103 <param name="input" value="genomescope-in1.tab"/> |
85 <param name="kmer_length" value="21"/> | 104 <param name="kmer_length" value="21"/> |
86 <param name="testing" value="true"/> | 105 <section name="output_options"> |
87 <param name="model_output" value="true"/> | 106 <param name="output_files" value="model_output,summary_output,progress_output"/> |
88 <param name="summary_output" value="true"/> | 107 </section> |
89 <param name="progress_output" value="true"/> | 108 <section name="advanced_options"> |
109 <param name="testing" value="true"/> | |
110 </section> | |
90 <output name="linear_plot" file="genomescope-out1-1.png" ftype="png"/> | 111 <output name="linear_plot" file="genomescope-out1-1.png" ftype="png"/> |
91 <output name="log_plot" file="genomescope-out1-2.png" ftype="png"/> | 112 <output name="log_plot" file="genomescope-out1-2.png" ftype="png"/> |
92 <output name="transformed_linear_plot" file="genomescope-out1-3.png" ftype="png"/> | 113 <output name="transformed_linear_plot" file="genomescope-out1-3.png" ftype="png"/> |
93 <output name="transformed_log_plot" file="genomescope-out1-4.png" ftype="png" compare="sim_size"/> | 114 <output name="transformed_log_plot" file="genomescope-out1-4.png" ftype="png" compare="sim_size"/> |
94 <output name="model" file="genomescope-out1-1.txt" ftype="txt" lines_diff="2"/> | 115 <output name="model" file="genomescope-out1-1.txt" ftype="txt" lines_diff="2"/> |
95 <output name="summary" file="genomescope-out1-2.txt" ftype="txt" lines_diff="2"/> | 116 <output name="summary" file="genomescope-out1-2.txt" ftype="txt" lines_diff="2"/> |
96 <output name="progress" file="genomescope-out1-3.txt" ftype="txt" lines_diff="2"/> | 117 <output name="progress" file="genomescope-out1-3.txt" ftype="txt" lines_diff="2"/> |
97 <output name="testing" file="genomescope-out1-1.tab" ftype="tabular"/> | 118 <output name="model_params" file="genomescope-out1-1.tab" ftype="tabular"/> |
119 </test> | |
120 <!--Test initial_repetitiveness option--> | |
121 <test expect_num_outputs="5"> | |
122 <param name="input" value="genomescope-in1.tab"/> | |
123 <param name="kmer_length" value="21"/> | |
124 <section name="advanced_options"> | |
125 <param name="initial_repetitiveness" value="0.1"/> | |
126 </section> | |
127 <section name="output_options"> | |
128 <param name="output_files" value="summary_output"/> | |
129 </section> | |
130 <output name="linear_plot" ftype="png"> | |
131 <assert_contents> | |
132 <has_size value="223370" delta="300"/> | |
133 </assert_contents> | |
134 </output> | |
135 <output name="log_plot" ftype="png"> | |
136 <assert_contents> | |
137 <has_size value="201425" delta="300"/> | |
138 </assert_contents> | |
139 </output> | |
140 <output name="transformed_linear_plot" ftype="png"> | |
141 <assert_contents> | |
142 <has_size value="221442" delta="300"/> | |
143 </assert_contents> | |
144 </output> | |
145 <output name="transformed_log_plot" ftype="png"> | |
146 <assert_contents> | |
147 <has_size value="210889" delta="300"/> | |
148 </assert_contents> | |
149 </output> | |
150 <output name="summary" ftype="txt" lines_diff="2"> | |
151 <assert_contents> | |
152 <has_line line="initial repetitiveness = 0.1"/> | |
153 <has_text text="Homozygous (aa) 98.9538% 98.967%"/> | |
154 </assert_contents> | |
155 </output> | |
156 </test> | |
157 <!--Test initial_heterozigosity option--> | |
158 <test expect_num_outputs="5"> | |
159 <param name="input" value="genomescope-in1.tab"/> | |
160 <param name="kmer_length" value="21"/> | |
161 <param name="ploidy" value="3"/> | |
162 <section name="advanced_options"> | |
163 <param name="initial_heterozygosities" value="0.04,0.01"/> | |
164 </section> | |
165 <section name="output_options"> | |
166 <param name="output_files" value="summary_output"/> | |
167 </section> | |
168 <output name="linear_plot" ftype="png"> | |
169 <assert_contents> | |
170 <has_size value="213366" delta="300"/> | |
171 </assert_contents> | |
172 </output> | |
173 <output name="log_plot" ftype="png"> | |
174 <assert_contents> | |
175 <has_size value="218425" delta="300"/> | |
176 </assert_contents> | |
177 </output> | |
178 <output name="transformed_linear_plot" ftype="png"> | |
179 <assert_contents> | |
180 <has_size value="217280" delta="300"/> | |
181 </assert_contents> | |
182 </output> | |
183 <output name="transformed_log_plot" ftype="png"> | |
184 <assert_contents> | |
185 <has_size value="229021" delta="300"/> | |
186 </assert_contents> | |
187 </output> | |
188 <output name="summary" ftype="txt" lines_diff="2"> | |
189 <assert_contents> | |
190 <has_line line="initial heterozygosities = 0.04,0.01"/> | |
191 <has_text text="Homozygous (aaa) 6.03606% 100%"/> | |
192 </assert_contents> | |
193 </output> | |
98 </test> | 194 </test> |
99 </tests> | 195 </tests> |
100 <help><![CDATA[ | 196 <help><![CDATA[ |
101 | 197 |
102 GenomeScope 2.0: Reference-free profiling of polyploid genomes | 198 GenomeScope 2.0: Reference-free profiling of polyploid genomes |
103 ============================================================== | 199 ============================================================== |
104 | 200 |
105 GenomeScope 2.0 applies classical insights from combinatorial theory to establish | 201 GenomeScope 2.0 applies classical insights from combinatorial theory to establish a detailed mathematical model of how k-mer frequencies will be distributed in heterozygous and polyploid genomes.It employs a polyploid-aware mixture model that, within seconds, accurately infers genome properties from |
106 a detailed mathematical model of how k-mer frequencies will be distributed in | 202 unassembled sequencing data. |
107 heterozygous and polyploid genomes. GenomeScope 2.0 employs a polyploid-aware | 203 |
108 mixture model that, within seconds, accurately infers genome properties from | 204 GenomeScope 2.0 uses the k-mer count distribution, e.g. from KMC or Jellyfish, and produces a report and several informative plots describing the genome properties. We validate the approach on simulated polyploid data created using a generative model with parameters for genome size, heterozygosity, repetitiveness, ploidy, and sequencing coverage, and find GenomeScope 2.0 retains accuracy across a broad range of realistic and extreme parameter values. |
109 unassembled sequencing data. GenomeScope 2.0 uses the k-mer count distribution, | 205 |
110 e.g. from KMC or Jellyfish, and produces a report and several informative plots | 206 ----- |
111 describing the genome properties. We validate the approach on simulated polyploid | 207 |
112 data created using a generative model with parameters for genome size, heterozygosity, | 208 .. class:: infomark |
113 repetitiveness, ploidy, and sequencing coverage, and find GenomeScope 2.0 retains | 209 |
114 accuracy across a broad range of realistic and extreme parameter values. We also | 210 **Topological relationships** |
115 validate GenomeScope 2.0 by analyzing genuine sequence data from 11 diverse | 211 |
116 polyploid genomes with known genome characteristics. | 212 In the field of phylogenetics, the evolutionary relationships between species are often depicted in a branching diagram known as a phylogenetic tree. In this setting, the topology of the tree refers to the branching structure of the tree. We may also depict the similarities between homologous chromosomes in a branching diagram. In this case, a topology refers to the similarities between distinct homologues. |
213 | |
214 For ploidies of 4 and greater, there are multiple possible topologies. For example, the two tetraploid topologies are: | |
215 | |
216 :: | |
217 | |
218 AAAA → AAAB → AABC → ABCD | |
219 AAAA → AABB → AABC → ABCD | |
117 | 220 |
118 ]]></help> | 221 ]]></help> |
119 <citations> | 222 <citations> |
120 <citation type="doi">10.1093/bioinformatics/btx153</citation> | 223 <citation type="doi">10.1093/bioinformatics/btx153</citation> |
121 <citation type="doi">10.1038/s41467-020-14998-3</citation> | 224 <citation type="doi">10.1038/s41467-020-14998-3</citation> |