Mercurial > repos > bgruening > gfstats
diff gfastats.xml @ 0:5f250ffcb1af draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gfastats commit 6ca9363cc4e0da886aab9accd79d52663247af29"
author | bgruening |
---|---|
date | Tue, 08 Mar 2022 21:49:13 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gfastats.xml Tue Mar 08 21:49:13 2022 +0000 @@ -0,0 +1,339 @@ +<tool id="gfastats" name="gfastats" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="20.01"> + <description>the swiss army knife for genome assembly</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="biotools"/> + <version_command>gfastats --version</version_command> + <command detect_errors="exit_code"><![CDATA[ + gfastats + '$input_file' + #if $mode_condition.selector == 'statistics' + #if $mode_condition.statistics_condition.selector == 'assembly' + $mode_condition.statistics_condition.expected_genomesize + #end if + #end if + #if $target_condition.target_option == 'true' + $target_condition.target_sequence + #if $target_condition.include_bed + --include-bed $target_condition.include_bed + #end if + #if $target_condition.exclude_bed + --exclude-bed $target_condition.exclude_bed + #end if + #end if + #if $mode_condition.selector == 'manipulation' + #if $mode_condition.swiss_army_knife + -k $mode_condition.swiss_army_knife + #end if + #if $mode_condition.sort + --sort $mode_condition.sort + #end if + $mode_condition.homopolymer_compress + -o dataset.$mode_condition.output_condition.out_format + #if $mode_condition.output_condition.out_format == 'fasta' + #if $mode_condition.output_condition.line_length + --line-length $mode_condition.output_condition.line_length + #end if + #else if $mode_condition.output_condition.out_format == 'fasta.gz' + #if $mode_condition.output_condition.line_length + --line-length $mode_condition.output_condition.line_length + #end if + #end if + #else + #if $mode_condition.statistics_condition.selector == 'size' + --out-size $mode_condition.statistics_condition.out_size + #else if $mode_condition.statistics_condition.selector == 'coordinates' + --out-coord $mode_condition.statistics_condition.out_coord + #else if $mode_condition.statistics_condition.selector == 'assembly' + --nstar-report + #else + --seq-report + $mode_condition.statistics_condition.out_sequence + #end if + $mode_condition.tabular > '$stats' + #end if + #if $mode_condition.selector == 'manipulation' + && mv dataset* output_dataset + #end if + ]]></command> + <inputs> + <param name="input_file" argument="--fasta" type="data" + format="fasta,fastq,fastqsanger,gfa1,fasta.gz,fastq.gz,fastqsanger.gz,gfa1.gz" + label="Input file"/> + <conditional name="target_condition"> + <param name="target_option" type="select" label="Specify target sequences"> + <option value="false">Disabled</option> + <option value="true">Enabled</option> + </param> + <when value="false"/> + <when value="true"> + <param name="target_sequence" type="text" value="" label="Target sequence" help="Target specific sequence by header, optionally with coordinates: header[:start-end]"> + <sanitizer invalid_char=""> + <valid initial="string.digits,string.letters"> + <add value=":"/> + <add value="-"/> + <add value="_"/> + <add value="|"/> + <add value=" "/> + </valid> + </sanitizer> + <validator type="regex">[0-9A-Za-z:-_| ]+</validator> + </param> + <param argument="--include-bed" type="data" optional="true" + format="bed" label="Include specific intervals" + help="Generates output on a subset list of headers or coordinates + in 0-based bed format. It can be combined with --exclude-bed. Optional"/> + <param argument="--exclude-bed" type="data" format="bed" optional="true" + label="Exclude specific intervals" + help="Exclude a subset of headers or coordinates in 0-base bed format. It can be conmbined with --include-bed Optional"/> + </when> + <when value="false"/> + </conditional> + <conditional name="mode_condition"> + <param name="selector" type="select" label="Tool mode"> + <option value="statistics">Summary statistics generation</option> + <option value="manipulation">Genome assembly manipulation</option> + </param> + <when value="manipulation"> + <param argument="--swiss-army-knife" type="data" + format="text" label="SAK input file" optional="true" + help="Set of instructions provided as an ordered list"/> + <conditional name="output_condition"> + <param argument="--out-format" type="select" + label="Output format" help="Outputs selected sequences."> + <option value="fasta">FASTA</option> + <option value="fasta.gz">FASTA.gz</option> + <option value="fastq">FASTQ</option> + <option value="fastq.gz" selected="true">FASTQ.gz</option> + <option value="gfa">GFA</option> + <option value="gfa.gz">GFA.gz</option> + </param> + <when value="fasta"> + <expand macro="length_macro"/> + </when> + <when value="fasta.gz"> + <expand macro="length_macro"/> + </when> + <when value="fastq"/> + <when value="fastq.gz"/> + <when value="gfa"/> + <when value="gfa.gz"/> + </conditional> + <param argument="--sort" type="select" label="Sort sequences" help="Specify how to sort the sequences. Ascending/descending used the sequence/path header."> + <option value="" selected="true">Disabled</option> + <option value="ascending">Ascending</option> + <option value="descending">Descending</option> + <option value="largest">Largest</option> + <option value="smallest">Smallest</option> + </param> + <param argument="--homopolymer-compress" type="boolean" truevalue="--homopolymer-compress" falsevalue="" checked="false" + label="Homopolymer compression" help="Compress all the homopolymers in the input"/> + </when> + <when value="statistics"> + <conditional name="statistics_condition"> + <param name="selector" type="select" label="Report mode"> + <option value="assembly" selected="true">Genome assembly statistics (--nstar-report)</option> + <option value="size">Scaffold, contig or gap sizes (--out-size)</option> + <option value="coordinates">AGP, contig or gap coordinates (--out-coord)</option> + <option value="sequence">Sequence statistics (--seq-report)</option> + </param> + <when value="size"> + <param argument="--out-size" type="select" label="Feature for reporting sizes" + help="Generate a tabular file with the sequence sizes"> + <option value="s">Scaffolds</option> + <option value="c">Contigs</option> + <option value="g">Gaps</option> + </param> + </when> + <when value="coordinates"> + <param argument="--out-coord" type="select" label="BED coordinares feature" + help="Generates bed coordinates of given feature. Default: agp"> + <option value="a">AGP</option> + <option value="c">Contigs</option> + <option value="g">Gaps</option> + </param> + </when> + <when value="assembly"> + <param name="expected_genomesize" type="integer" min="0" optional="true" + label="Expected genome size" help="Estimated genome size. This parameter is optional, but required for NG* statistics."/> + </when> + <when value="sequence"> + <param argument="--out-sequence" type="boolean" truevalue="--out-sequence" falsevalue="" checked="false" + label="Report actual sequence" help="It reports also the actual sequence"/> + </when> + </conditional> + <param argument="--tabular" type="boolean" truevalue="--tabular" falsevalue="" checked="true" + label="Tabular-format output" help="Generate output in tabular format"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="stats" format="tabular" label="${tool.name} on ${on_string}: stats"> + <filter>mode_condition['selector'] == 'statistics'</filter> + <change_format> + <when input="tabular" value="false" format="text"/> + </change_format> + </data> + <data name="output" format="fastq" from_work_dir="output_dataset" label="${tool.name} on ${on_string}: edited sequences"> + <filter>mode_condition['selector'] == 'manipulation'</filter> + <change_format> + <when input="mode_condition.output_condition.out_format" value="fasta" format="fasta"/> + <when input="mode_condition.output_condition.out_format" value="fasta.gz" format="fasta.gz"/> + <when input="mode_condition.output_condition.out_format" value="fastq" format="fastq"/> + <when input="mode_condition.output_condition.out_format" value="fastq.gz" format="fastq.gz"/> + <when input="mode_condition.output_condition.out_format" value="gfa" format="gfa1"/> + <when input="mode_condition.output_condition.out_format" value="gfa.gz" format="gfa1.gz"/> + </change_format> + </data> + </outputs> + <tests> + <!--Test 01 --> + <test expect_num_outputs="1"> + <param name="input_file" value="dataset_01.fastq.gz"/> + <conditional name="target_condition"> + <param name="target_condition" value="true"/> + <param name="target_sequence" value="S1_1"/> + </conditional> + <conditional name="mode_condition"> + <param name="selector" value="manipulation"/> + <param name="swiss_army_knife" value="swiss_army.sak"/> + <conditional name="output_condition"> + <param name="out_format" value="fasta.gz"/> + </conditional> + </conditional> + <output name="output" value="test_01.fasta.gz" ftype="fasta.gz"/> + </test> + <!--Test 02 --> + <test expect_num_outputs="1"> + <param name="input_file" value="dataset_01.fastq.gz"/> + <conditional name="target_condition"> + <param name="target_condition" value="true"/> + <param name="target_sequence" value="S1_1"/> + </conditional> + <conditional name="mode_condition"> + <param name="selector" value="statistics"/> + <conditional name="statistics_condition"> + <param name="selector" value="size"/> + <param name="out_size" value="c"/> + </conditional> + </conditional> + <output name="stats" value="test_02_stats.tabular" ftype="tabular"/> + </test> + <!--Test 03 --> + <test expect_num_outputs="1"> + <param name="input_file" value="dataset_02.fasta.gz"/> + <conditional name="mode_condition"> + <param name="selector" value="statistics"/> + <conditional name="statistics_condition"> + <param name="selector" value="sequence"/> + </conditional> + </conditional> + <output name="stats" value="test_03_stats.tabular" ftype="tabular"/> + </test> + <!--Test 04 --> + <test expect_num_outputs="1"> + <param name="input_file" value="dataset_03.fasta"/> + <conditional name="mode_condition"> + <param name="selector" value="statistics"/> + <conditional name="statistics_condition"> + <param name="selector" value="assembly"/> + <param name="expected_genomesize" value="600000"/> + </conditional> + </conditional> + <output name="stats" value="test_04_stats.tabular" ftype="tabular"/> + </test> + <!--Test 05 --> + <test expect_num_outputs="1"> + <param name="input_file" value="dataset_04.gfa"/> + <conditional name="mode_condition"> + <param name="selector" value="statistics"/> + <conditional name="statistics_condition"> + <param name="selector" value="coordinates"/> + <param name="out_coord" value="a"/> + </conditional> + </conditional> + <output name="stats" value="test_05_stats.tabular" ftype="tabular"/> + </test> + <!--Test 06 --> + <test expect_num_outputs="1"> + <param name="input_file" value="dataset_04.gfa"/> + <conditional name="mode_condition"> + <param name="selector" value="manipulation"/> + <conditional name="output_condition"> + <param name="out_format" value="fasta.gz"/> + </conditional> + </conditional> + <output name="output" value="test_06.fasta.gz" ftype="fasta.gz"/> + </test> + <!--Test 07 --> + <test expect_num_outputs="1"> + <param name="input_file" value="dataset_03.fasta"/> + <conditional name="mode_condition"> + <param name="selector" value="statistics"/> + <conditional name="statistics_condition"> + <param name="selector" value="assembly"/> + </conditional> + <param name="tabular" value="false"/> + </conditional> + <output name="stats" value="test_07_stats.tabular" ftype="tabular"/> + </test> + <!--Test 08 --> + <test expect_num_outputs="1"> + <param name="input_file" value="dataset_01.fastq.gz"/> + <conditional name="mode_condition"> + <param name="selector" value="manipulation"/> + <conditional name="output_condition"> + <param name="out_format" value="fasta.gz"/> + </conditional> + <param name="sort" value="ascending"/> + <param name="homopolymer_compress" value="true"/> + </conditional> + <output name="output" value="test_08.fasta.gz" ftype="fasta.gz"/> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**Purpose** + +gfastats is a single fast and exhaustive tool for summary statistics and simultaneous genome assembly file manipulation. gfastats also allows seamless format conversion. + + +.. class:: infomark + +**Metrics details** + +Typical fast* metrics include: + +- Scaffold, contig and gap size +- Number of scaffolds, contigs and gaps +- Total length of scaffolds, contigs and gaps +- Scaffold, contig, gap N50 and statistics (full N*/NG* statistics with the --nstar-report flag) +- Area under the curve (AuN/AuNG) values for scaffolds, contigs and gaps +- Average scaffold, contig, gap size +- Largest scaffold, contig and gap +- Base composition and GC content +- Soft-masked base counts (lower case bases) + + +Typical gfa metrics include: + +- Number of nodes and edges +- Average degree +- Number of connected components, and length of the largets connected component +- Number of dead ends +- Number of disconnected components, and their total length + + +.. class:: infomark + +**Assembly manipulation** + +gfastats allows extensive assembly manipulation at the sequence level. Manipulation is achieved using a set of instructions provided as an ordered list in a file to the option **swiss army knife**. See the `instruction wiki <https://github.com/vgl-hub/gfastats/tree/main/instructions>`_ for a full list of instructions. + + ]]></help> + <expand macro="citations" /> +</tool>