Mercurial > repos > iuc > genomescope
diff genomescope.xml @ 0:b2f674562a18 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genomescope commit aa87b7b1713b749328c5a710f32631aab2acaa3a"
author | iuc |
---|---|
date | Fri, 30 Apr 2021 20:21:25 +0000 |
parents | |
children | 3169a38c2656 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genomescope.xml Fri Apr 30 20:21:25 2021 +0000 @@ -0,0 +1,123 @@ +<tool id="genomescope" name="GenomeScope" version="@VERSION@" profile="20.01"> + <description>Analyze unassembled short reads</description> + <macros> + <token name="@VERSION@">2.0</token> + </macros> + <requirements> + <requirement type="package" version="@VERSION@">genomescope2</requirement> + </requirements> + <version_command>genomescope2 --version</version_command> + <command detect_errors="exit_code"><![CDATA[ + genomescope2 + --input '$input' + --output . + --kmer_length $kmer_length + $no_unique_sequence $testing $trace_flag + #if $ploidy: + --ploidy $ploidy + #end if + #if $lambda: + --lambda $lambda + #end if + #if $max_kmercov: + --max_kmercov $max_kmercov + #end if + #if $topology: + --topology $topology + #end if + #if $initial_repetitiveness: + --initial_repetitiveness $initial_repetitiveness + #end if + #if $initial_heterozygosities: + --initial_heterozygosities $initial_heterozygosities + #end if + #if $transform_exp: + --transform_exp $transform_exp + #end if + #if $true_params: + --true_params $true_params + #end if + #if $num_rounds: + --num_rounds $num_rounds + #end if + ]]> + </command> + <inputs> + <param argument="--input" type="data" format="tabular" label="Input histogram file" help="This file is a two column tabular file for example generated with the histo function of Jellyfish."/> + <param name="model_output" type="boolean" label="Add the model parameters to your history"/> + <param name="summary_output" type="boolean" label="Output a summary of the analysis"/> + <param name="progress_output" type="boolean" label="Additional information for each optimization round"/> + <param argument="--ploidy" type="integer" optional="true" label="Ploidy for model to use" help="Default: 2"/> + <param argument="--kmer_length" type="integer" value="21" optional="false" label="K-mer length used to calculate k-mer spectra"/> + <param argument="--lambda" type="integer" optional="true" label="Optional initial kmercov estimate for model to use"/> + <param argument="--max_kmercov" type="integer" optional="true" label="Optional maximum k-mer coverage threshold" help="K-mers with coverage greater than max_kmercov are ignored by the model"/> + <param argument="--no_unique_sequence" type="boolean" truevalue="--no_unique_sequence" falsevalue="" label="Turn off yellow unique sequence line in plots"/> + <param argument="--topology" type="integer" optional="true" label="Flag for topology for model to use"/> + <param argument="--initial_repetitiveness" type="integer" optional="true" label="Initial value for repetitiveness"/> + <param argument="--initial_heterozygosities" type="integer" optional="true" label="Initial values for nucleotide heterozygosity rates"/> + <param argument="--transform_exp" type="integer" optional="true" label="Parameter for the exponent when fitting a transformed (x**transform_exp*y vs. x) k-mer histogram" help="Default: 1"/> + <param argument="--testing" type="boolean" truevalue="--testing" falsevalue="" label="Create testing.tsv file with model parameters"/> + <param argument="--true_params" type="integer" optional="true" label="Flag to state true simulated parameters for testing mode"/> + <param argument="--trace_flag" type="boolean" truevalue="--trace_flag" falsevalue="" label="Turn on printing of iteration progress of nlsLM function"/> + <param argument="--num_rounds" type="integer" min="1" optional="true" label="Number of optimization rounds"/> + </inputs> + <outputs> + <data name="linear_plot" format="png" from_work_dir="linear_plot.png" label="${tool.name} on ${on_string} Linear plot"/> + <data name="log_plot" format="png" from_work_dir="log_plot.png" label="${tool.name} on ${on_string} Log plot"/> + <data name="transformed_linear_plot" format="png" from_work_dir="transformed_linear_plot.png" label="${tool.name} on ${on_string} Transformed linear plot"/> + <data name="transformed_log_plot" format="png" from_work_dir="transformed_log_plot.png" label="${tool.name} on ${on_string} Transformed log plot"/> + <data name="model" format="txt" from_work_dir="model.txt" label="${tool.name} on ${on_string} Model"> + <filter>model_output</filter> + </data> + <data name="summary" format="txt" from_work_dir="summary.txt" label="${tool.name} on ${on_string} Summary"> + <filter>summary_output</filter> + </data> + <data name="progress" format="txt" from_work_dir="progress.txt" label="${tool.name} on ${on_string} Progress"> + <filter>progress_output</filter> + </data> + <data name="model_params" format="tabular" from_work_dir="SIMULATED_testing.tsv" label="${tool.name} on ${on_string} Model parameters"> + <filter>testing</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="8"> + <param name="input" value="genomescope-in1.tab"/> + <param name="kmer_length" value="21"/> + <param name="testing" value="true"/> + <param name="model_output" value="true"/> + <param name="summary_output" value="true"/> + <param name="progress_output" value="true"/> + <output name="linear_plot" file="genomescope-out1-1.png" ftype="png"/> + <output name="log_plot" file="genomescope-out1-2.png" ftype="png"/> + <output name="transformed_linear_plot" file="genomescope-out1-3.png" ftype="png"/> + <output name="transformed_log_plot" file="genomescope-out1-4.png" ftype="png" compare="sim_size"/> + <output name="model" file="genomescope-out1-1.txt" ftype="txt" lines_diff="2"/> + <output name="summary" file="genomescope-out1-2.txt" ftype="txt" lines_diff="2"/> + <output name="progress" file="genomescope-out1-3.txt" ftype="txt" lines_diff="2"/> + <output name="testing" file="genomescope-out1-1.tab" ftype="tabular"/> + </test> + </tests> + <help><![CDATA[ + +GenomeScope 2.0: Reference-free profiling of polyploid genomes +============================================================== + +GenomeScope 2.0 applies classical insights from combinatorial theory to establish +a detailed mathematical model of how k-mer frequencies will be distributed in +heterozygous and polyploid genomes. GenomeScope 2.0 employs a polyploid-aware +mixture model that, within seconds, accurately infers genome properties from +unassembled sequencing data. GenomeScope 2.0 uses the k-mer count distribution, +e.g. from KMC or Jellyfish, and produces a report and several informative plots +describing the genome properties. We validate the approach on simulated polyploid +data created using a generative model with parameters for genome size, heterozygosity, +repetitiveness, ploidy, and sequencing coverage, and find GenomeScope 2.0 retains +accuracy across a broad range of realistic and extreme parameter values. We also +validate GenomeScope 2.0 by analyzing genuine sequence data from 11 diverse +polyploid genomes with known genome characteristics. + +]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btx153</citation> + <citation type="doi">10.1038/s41467-020-14998-3</citation> + </citations> +</tool>