Mercurial > repos > iuc > fasta_stats
diff fasta-stats.xml @ 4:0dbb995c7d35 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
author | iuc |
---|---|
date | Thu, 18 Nov 2021 20:56:57 +0000 |
parents | 56022eb50bbd |
children |
line wrap: on
line diff
--- a/fasta-stats.xml Mon Jul 05 13:36:26 2021 +0000 +++ b/fasta-stats.xml Thu Nov 18 20:56:57 2021 +0000 @@ -1,66 +1,91 @@ -<tool id="fasta-stats" name="Fasta Statistics" version="1.0.3"> - <description>Display summary statistics for a fasta file.</description> +<tool id="fasta-stats" name="Fasta Statistics" version="2.0" profile="20.05"> + <description>display summary statistics for a FASTA file</description> <requirements> - <requirement type="package" version="5.26">perl</requirement> + <requirement type="package" version="1.21.4">numpy</requirement> + <requirement type="package" version="1.79">biopython</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - perl '${__tool_directory__}/fasta-stats.pl' - '$dataset' - #if $genome_size: - $genome_size + python '${__tool_directory__}/fasta-stats.py' + --fasta '$fasta' + --stats_output '$stats_output' + #if $gaps_option + --gaps_output '$gaps_output' #end if - > '$stats' + #if $genome_size + --genome_size $genome_size + #end if ]]> </command> <inputs> - <param name="dataset" type="data" format="fasta" label="fasta or multifasta file" help="fasta dataset to get statistics for."/> - <param name="genome_size" type="float" optional="True" label="Genome size estimate (optional)" help="Estimate of the genome size in bases. If specified, NG50 and LG50 will be calculated."/> + <param argument="--fasta" type="data" format="fasta" label="FASTA or Multi-FASTA file" help="FASTA dataset to get statistics."/> + <param argument="--genome_size" type="integer" min="0" optional="true" label="Estimated genome size" help="This parameter is optional. If provided, it will be used for calculating the NG50 statistic." /> + <param argument="--gaps_option" type="boolean" truevalue="true" falsevalue="false" label="Generate gap stats"/> </inputs> <outputs> - <data name="stats" format="tabular" label="${tool.name} on ${on_string}: Fasta summary stats"/> + <data name="stats_output" format="tabular" label="${tool.name} on ${on_string}: summary stats"/> + <data name="gaps_output" format="bed" label="${tool.name} on ${on_string}: Gap stats"> + <filter>gaps_option</filter> + </data> </outputs> <tests> - <test> - <param name="dataset" value="test.fasta"/> - <output name="stats" file="test_out.txt"/> + <test expect_num_outputs="1"> + <param name="fasta" value="test.fasta" ftype="fasta"/> + <output name="stats_output" file="test_01.tab" ftype="tabular"/> </test> - <test> - <param name="dataset" value="ng50_input.fasta"/> + <!--Test gap options and NG50--> + <test expect_num_outputs="2"> + <param name="fasta" value="ng50_input.fasta" ftype="fasta"/> + <param name="gaps_option" value="true"/> <param name="genome_size" value="4000"/> - <output name="stats" file="ng50_out.txt"/> + <output name="stats_output" file="test_02.tab" ftype="tabular"/> + <output name="gaps_output" file="test_02.bed" ftype="bed"/> + </test> + <!--Compare outputs with QUAST--> + <test expect_num_outputs="1"> + <param name="fasta" value="test_long_sequence.fasta" ftype="fasta"/> + <output name="stats_output" ftype="tabular"> + <assert_contents> + <has_text text="8353"/> + <has_text text="303889"/> + <has_text text="22107"/> + </assert_contents> + </output> </test> </tests> - <help> -**Fasta Stats** -Displays the summary statistics for a fasta file. + <help><![CDATA[ + + .. class:: infomark + +**Purpose** + +Displays the summary statistics for a FASTA file. ------ -Outputs in tabular form: - Lengths: n50, min, max, median and average + .. class:: infomark + +**Outputs** - Number of base pairs: A, C, G, T, N, Total and Total_not_N +This tool generates two outputs: a general summary and an optional gap stats file. - Number of sequences +The general summary includes the following information: - GC content in % - - If an optional genome size estimate is specified, then the NG50 length will also be calculated. +- Lengths: n50, min, max, median and average +- Number of base pairs: A, C, G, T, N, Total and Total_not_N +- Number of sequences +- GC content ------- - -Inputs: - -Fasta dataset +In addition the optional gap stats BED file includes the information about gaps localization. + ]]> </help> <citations> <citation type="bibtex"> -@UNPUBLISHED{Seemann_Gladman2012, - author = {Torsten Seemann and Simon Gladman}, - title = {Fasta Statistics: Display summary statistics for a fasta file.}, - year = {2012}, - url = {https://github.com/galaxyproject/tools-iuc}, -} + @UNPUBLISHED{Anmol_Kyran2021, + author = {Anmol Kyran}, + title = {Fasta Statistics: Display summary statistics for a fasta file.}, + year = {2021}, + url = {https://github.com/galaxyproject/tools-iuc}, + } </citation> </citations> </tool>