Mercurial > repos > bgruening > gfastats
view gfastats.xml @ 9:f4a3eff8ab85 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gfastats commit 7de96873dbc693ce72d8d0aa734d1272a3d0f456
author | bgruening |
---|---|
date | Wed, 06 Nov 2024 11:28:41 +0000 |
parents | 6ec1b853c8fd |
children | 764f2516d837 |
line wrap: on
line source
<tool id="gfastats" name="gfastats" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="23.2"> <description>The swiss army knife for Genome Assembly</description> <macros> <import>macros.xml</import> </macros> <expand macro="biotools"/> <expand macro="requirements"/> <version_command>gfastats --version</version_command> <command detect_errors="exit_code"><![CDATA[ gfastats '$input_file' #if $mode_condition.selector == 'statistics' #if $mode_condition.statistics_condition.selector == 'assembly' $mode_condition.statistics_condition.expected_genomesize #end if #end if #if $target_condition.target_option == 'true' $target_condition.target_sequence #if $target_condition.include_bed --include-bed $target_condition.include_bed #end if #if $target_condition.exclude_bed --exclude-bed $target_condition.exclude_bed #end if #end if #if $mode_condition.selector == 'manipulation' #if $mode_condition.swiss_army_knife -k $mode_condition.swiss_army_knife #end if #if $mode_condition.sort --sort $mode_condition.sort #end if #if $mode_condition.homopolymer_compress --homopolymer-compress $mode_condition.homopolymer_compress #end if $mode_condition.discover_paths $mode_condition.remove_terminal_gaps -o dataset.$mode_condition.output_condition.out_format #if $mode_condition.output_condition.out_format in ['fasta','fasta.gz'] #if $mode_condition.output_condition.line_length --line-length $mode_condition.output_condition.line_length #end if #end if #if $mode_condition.output_condition.out_format in ['gfa','gfa.gz'] #if $mode_condition.output_condition.terminal_overlaps_condition.terminal_overlaps_select == 'yes': --discover-terminal-overlaps $mode_condition.output_condition.terminal_overlaps_condition.terminal_overlaps_length #end if #end if #else if $mode_condition.selector == "statistics" #if $mode_condition.statistics_condition.selector == 'size' --out-size $mode_condition.statistics_condition.out_size #else if $mode_condition.statistics_condition.selector == 'coordinates' --out-coord $mode_condition.statistics_condition.out_coord #else if $mode_condition.statistics_condition.selector == 'assembly' --nstar-report #else --segment-report $mode_condition.statistics_condition.out_sequence #end if $mode_condition.locale $mode_condition.discover_paths $mode_condition.tabular > '$stats' #else --agp-to-path $mode_condition.agp_to_path $mode_condition.discover_paths -o dataset.gfa #end if --threads \${GALAXY_SLOTS:-8} #if $mode_condition.selector == 'manipulation' or $mode_condition.selector == 'scaffolding' && mv dataset* output_dataset #end if ]]></command> <inputs> <param name="input_file" argument="--fasta" type="data" format="fasta,fastq,fastqsanger,gfa1,fasta.gz,fastq.gz,fastqsanger.gz,gfa1.gz" label="Input file"/> <conditional name="target_condition"> <param name="target_option" type="select" label="Specify target sequences"> <option value="false">Disabled</option> <option value="true">Enabled</option> </param> <when value="false"/> <when value="true"> <param name="target_sequence" type="text" value="" label="Target sequence" optional="true" help="Target specific sequence by header, optionally with coordinates: header[:start-end]"> <sanitizer invalid_char=""> <valid initial="string.digits,string.letters"> <add value=":"/> <add value="-"/> <add value="_"/> <add value="|"/> <add value=" "/> </valid> </sanitizer> <validator type="regex">[0-9A-Za-z:-_| ]+</validator> </param> <param argument="--include-bed" type="data" optional="true" format="bed" label="Include specific intervals" help="Generates output on a subset list of headers or coordinates in 0-based bed format. It can be combined with --exclude-bed. Optional."/> <param argument="--exclude-bed" type="data" format="bed" optional="true" label="Exclude specific intervals" help="Exclude a subset of headers or coordinates in 0-base bed format. It can be conmbined with --include-bed. Optional."/> </when> <when value="false"/> </conditional> <conditional name="mode_condition"> <param name="selector" type="select" label="Tool mode"> <option value="statistics">Summary statistics generation</option> <option value="manipulation">Genome assembly manipulation</option> <option value="scaffolding">Scaffolding</option> </param> <when value="manipulation"> <param argument="--swiss-army-knife" type="data" format="text" label="SAK input file" optional="true" help="Set of instructions provided as an ordered list"/> <conditional name="output_condition"> <param argument="--out-format" type="select" label="Output format" help="Outputs selected sequences."> <option value="fasta">FASTA</option> <option value="fasta.gz">FASTA.gz</option> <option value="fastq">FASTQ</option> <option value="fastq.gz" selected="true">FASTQ.gz</option> <option value="gfa">GFA</option> <option value="gfa.gz">GFA.gz</option> </param> <when value="fasta"> <expand macro="length_macro"/> </when> <when value="fasta.gz"> <expand macro="length_macro"/> </when> <when value="fastq"/> <when value="fastq.gz"/> <when value="gfa"> <expand macro="terminal_overlaps_macro"/> </when> <when value="gfa.gz"> <expand macro="terminal_overlaps_macro"/> </when> </conditional> <param argument="--discover-paths" type="boolean" truevalue="--discover-paths" falsevalue="" checked="false" label="Generates the initial set of paths" help="In the graph space, an assembly is a collection of segments and edges/gaps between these segments. A path defines a potential walk through the segments and edges/gaps that corresponds to a hypothesis of the actual linear sequence." /> <param argument="--sort" type="select" label="Sort sequences" help="Specify how to sort the sequences. Ascending/descending uses the sequence/path header."> <option value="" selected="true">Disabled</option> <option value="ascending">Ascending</option> <option value="descending">Descending</option> <option value="largest">Largest</option> <option value="smallest">Smallest</option> </param> <param argument="--remove-terminal-gaps" type="boolean" truevalue="--remove-terminal-gaps" falsevalue="" checked="false" label="Remove gaps from the beginning and end of sequences"/> <param argument="--homopolymer-compress" type="integer" min="0" value="" optional="true" label="Homopolymer compression" help="Compress all the homopolymers in the input above the given threshold." /> </when> <when value="statistics"> <conditional name="statistics_condition"> <param name="selector" type="select" label="Report mode"> <option value="assembly" selected="true">Genome assembly statistics (--nstar-report)</option> <option value="size">Scaffold, contig or gap sizes (--out-size)</option> <option value="coordinates">AGP, contig or gap coordinates (--out-coord)</option> <option value="sequence">Sequence statistics (--segment-report)</option> </param> <when value="size"> <param argument="--out-size" type="select" label="Feature for reporting sizes" help="Generate a tabular file with the sequence sizes"> <option value="s">Scaffolds</option> <option value="c">Contigs</option> <option value="g">Gaps</option> </param> </when> <when value="coordinates"> <param argument="--out-coord" type="select" label="BED coordinares feature" help="Generates bed coordinates of given feature. Default: AGP"> <option value="a">AGP</option> <option value="c">Contigs</option> <option value="g">Gaps</option> </param> </when> <when value="assembly"> <param name="expected_genomesize" type="integer" min="0" optional="true" label="Expected genome size" help="Estimated genome size. This parameter is optional, but required for NG* statistics."/> </when> <when value="sequence"> <param argument="--out-sequence" type="boolean" truevalue="--out-sequence" falsevalue="" checked="false" label="Report actual sequence" help="It reports also the actual sequence"/> </when> </conditional> <param argument="--locale" type="boolean" truevalue="--locale en_US.UTF-8" falsevalue="" checked="true" label="Thousands separator in output" help="Use commas for thousands separator in output?"/> <param argument="--tabular" type="boolean" truevalue="--tabular" falsevalue="" checked="true" label="Tabular-format output" help="Generate output in tabular format"/> <param argument="--discover-paths" type="boolean" truevalue="--discover-paths" falsevalue="" checked="false" label="Generates the initial set of paths" help="In the graph space an assembly is a collection of segment and edges/gaps between these segments. A path defines a potential walk through the segments and edges/gaps that corresponds to a hypothesis of the actual linear sequence" /> </when> <when value="scaffolding"> <param argument="--agp-to-path" type="data" format="agp" label="Input AGP file" help="Integrate the AGP information into the assembly graph as paths. The scaffolding information is converted into paths through the graph" /> <param argument="--discover-paths" type="boolean" truevalue="--discover-paths" falsevalue="" checked="false" label="Generates the initial set of paths" help="In the graph space an assembly is a collection of segment and edges/gaps between these segments. A path defines a potential walk through the segments and edges/gaps that corresponds to a hypothesis of the actual linear sequence" /> </when> </conditional> </inputs> <outputs> <data name="stats" format="tabular" label="${tool.name} on ${on_string}: stats"> <filter>mode_condition['selector'] == 'statistics'</filter> <change_format> <when input="tabular" value="false" format="text"/> </change_format> </data> <data name="output" format="fastq" from_work_dir="output_dataset" label="${tool.name} on ${on_string}: edited sequences"> <filter>mode_condition['selector'] == 'manipulation' or mode_condition['selector'] == 'scaffolding'</filter> <change_format> <when input="mode_condition.output_condition.out_format" value="fasta" format="fasta"/> <when input="mode_condition.output_condition.out_format" value="fasta.gz" format="fasta.gz"/> <when input="mode_condition.output_condition.out_format" value="fastq" format="fastq"/> <when input="mode_condition.output_condition.out_format" value="fastq.gz" format="fastq.gz"/> <when input="mode_condition.output_condition.out_format" value="gfa" format="gfa1"/> <when input="mode_condition.output_condition.out_format" value="gfa.gz" format="gfa1.gz"/> <when input="mode_condition.selector" value="scaffolding" format="gfa1"/> </change_format> </data> </outputs> <tests> <!--Test 01 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_01.fastq.gz"/> <conditional name="mode_condition"> <param name="selector" value="manipulation"/> <param name="swiss_army_knife" value="swiss_army.sak"/> <conditional name="output_condition"> <param name="out_format" value="fasta.gz"/> </conditional> </conditional> <output name="output" value="test_01.fasta.gz" ftype="fasta.gz"/> </test> <!--Test 01 A) --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_01.fastq.gz"/> <conditional name="mode_condition"> <param name="selector" value="manipulation"/> <param name="swiss_army_knife" value="swiss_army.sak"/> <param name="remove_terminal_gaps" value="true"/> <conditional name="output_condition"> <param name="out_format" value="fasta.gz"/> </conditional> </conditional> <output name="output" value="test_01_a.fasta.gz" ftype="fasta.gz"/> </test> <!--Test 02 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_01.fastq.gz"/> <conditional name="target_condition"> <param name="target_condition" value="true"/> <param name="target_sequence" value="S1_1"/> </conditional> <conditional name="mode_condition"> <param name="selector" value="statistics"/> <conditional name="statistics_condition"> <param name="selector" value="size"/> <param name="out_size" value="c"/> </conditional> <param name="locale" value="false"/> </conditional> <output name="stats" ftype="tabular"> <assert_contents> <has_text text="S1_1.1	8550" /> </assert_contents> </output> </test> <!--Test 03 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_02.fasta.gz"/> <conditional name="mode_condition"> <param name="selector" value="statistics"/> <conditional name="statistics_condition"> <param name="selector" value="sequence"/> </conditional> <param name="locale" value="false"/> </conditional> <output name="stats" ftype="tabular"> <assert_contents> <has_text text="23029444-6c7d-4f19-ac8e-4c0a478589d0.1		476	198	68	93	117	33.82	0" /> </assert_contents> </output> </test> <!--Test 04 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_03.fasta"/> <conditional name="mode_condition"> <param name="selector" value="statistics"/> <conditional name="statistics_condition"> <param name="selector" value="assembly"/> <param name="expected_genomesize" value="600000"/> </conditional> <param name="locale" value="false"/> </conditional> <output name="stats" value="test_04_stats.tabular" ftype="tabular"/> </test> <!--Test 05 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_04.gfa"/> <conditional name="mode_condition"> <param name="selector" value="statistics"/> <conditional name="statistics_condition"> <param name="selector" value="coordinates"/> <param name="out_coord" value="a"/> </conditional> <param name="locale" value="false"/> </conditional> <output name="stats" value="test_05_stats.tabular" ftype="tabular"/> </test> <!--Test 06 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_04.gfa"/> <conditional name="mode_condition"> <param name="selector" value="manipulation"/> <conditional name="output_condition"> <param name="out_format" value="fasta.gz"/> </conditional> </conditional> <output name="output" value="test_06.fasta.gz" ftype="fasta.gz"/> </test> <!--Test 07 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_03.fasta"/> <conditional name="mode_condition"> <param name="selector" value="statistics"/> <conditional name="statistics_condition"> <param name="selector" value="assembly"/> </conditional> <param name="locale" value="false"/> <param name="tabular" value="false"/> </conditional> <output name="stats" value="test_07_stats.tabular" ftype="tabular"/> </test> <!--Test 08 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_01.fastq.gz"/> <conditional name="mode_condition"> <param name="selector" value="manipulation"/> <conditional name="output_condition"> <param name="out_format" value="fasta.gz"/> </conditional> <param name="sort" value="ascending"/> </conditional> <output name="output" value="test_08.fasta.gz" ftype="fasta.gz"/> </test> <!--Test 09 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_03.fasta"/> <conditional name="mode_condition"> <param name="selector" value="manipulation"/> <conditional name="output_condition"> <param name="out_format" value="fasta.gz"/> </conditional> <param name="homopolymer_compress" value="10"/> </conditional> <output name="output" value="test_09.fasta.gz" ftype="fasta.gz"/> </test> <!--Test 10 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_04.gfa"/> <conditional name="mode_condition"> <param name="selector" value="scaffolding"/> <param name="agp_to_path" value="dataset_05.agp"/> <param name="discover_paths" value="true"/> </conditional> <output name="output" value="test_10.gfa" ftype="gfa1"/> </test> <!--Test 11 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_03.fasta"/> <conditional name="target_condition"> <param name="target_option" value="true"/> <param name="include_bed" value="regions.bed"/> </conditional> <conditional name="mode_condition"> <param name="locale" value="false"/> </conditional> <output name="stats" value="test_11_stats.tabular" ftype="tabular"/> </test> <!--Test 12 --> <test expect_num_outputs="1"> <param name="input_file" value="dataset_04.gfa"/> <conditional name="mode_condition"> <param name="selector" value="statistics"/> <conditional name="statistics_condition"> <param name="selector" value="assembly"/> </conditional> <param name="locale" value="false"/> <param name="tabular" value="false"/> <param name="discover_paths" value="true"/> </conditional> <output name="stats" value="test_12_stats.tabular" ftype="tabular"/> </test> </tests> <help><![CDATA[ .. class:: infomark **Purpose** gfastats is a single fast and exhaustive tool for summary statistics and simultaneous genome assembly file manipulation. gfastats also allows seamless format conversion. .. class:: infomark **Metrics details** Typical fast* metrics include: - Scaffold, contig and gap size - Number of scaffolds, contigs and gaps - Total length of scaffolds, contigs and gaps - Scaffold, contig, gap N50 and statistics (full N*/NG* statistics with the --nstar-report flag) - Area under the curve (AuN/AuNG) values for scaffolds, contigs and gaps - Average scaffold, contig, gap size - Largest scaffold, contig and gap - Base composition and GC content - Soft-masked base counts (lower case bases) Typical gfa metrics include: - Number of nodes and edges - Average degree - Number of connected components, and length of the largets connected component - Number of dead ends - Number of disconnected components, and their total length .. class:: infomark **Assembly manipulation** gfastats allows extensive assembly manipulation at the sequence level. Manipulation is achieved using a set of instructions provided as an ordered list in a file to the option **swiss army knife**. See the `instruction wiki <https://github.com/vgl-hub/gfastats/tree/main/instructions>`_ for a full list of instructions. ]]></help> <expand macro="citations"/> </tool>