diff scoary.xml @ 0:42a1a5750539 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scoary commit ce823d6021a7afbc2c49ba60e32faababaffd870"
author iuc
date Sun, 21 Mar 2021 12:21:41 +0000
parents
children 77d50ec2bcf2
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scoary.xml	Sun Mar 21 12:21:41 2021 +0000
@@ -0,0 +1,218 @@
+<tool id="scoary" name="Scoary" version="@TOOL_VERSION@+galaxy0" profile="20.01">
+    <description>calculates the assocations between all genes in the accessory genome and the traits</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.6.16</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">scoary</requirement>
+    </requirements>
+    <version_command>scoary --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+scoary
+
+###########
+## Input ##
+###########
+
+-t '$input_traits'
+-g '$input_genes'
+
+#if $input_restricts:
+	-r '$input_restricts'
+#end if
+
+########################
+## Additional Options ##
+########################
+
+#if len($additional_options.series_pc) != 0
+    -p #echo " ".join([ "'%s'" % $s.pvalue for $i, $s in enumerate($additional_options.series_pc) ])
+    -c #echo " ".join([ "'%s'" % $s.correction for $i, $s in enumerate($additional_options.series_pc) ])
+#end if
+
+#if $additional_options.permute != 0:
+    -e str($additional_options.permute)
+#end if
+
+#if $additional_options.maxhits != 0:
+    -m str($additional_options.maxhits)
+#end if
+
+$additional_options.collapse
+$output_options.upgma
+
+#if $input_newicktree:
+    -n '$input_newicktree'
+#end if
+
+#########
+## END ##
+#########
+
+--no-time
+&&
+tail -n +1 *.csv | sed "s/\,/\\t/g" > scoary_output.tsv &&
+mv *.nwk scoary_output.nwk
+
+    ]]></command>
+    <inputs>
+        <param name="input_traits" argument="-t" type="data" format="csv" label="Trait table"/>
+        <param name="input_genes" argument="-g" type="data" format="csv" label="Gene Presence/Absence table from ROARY (default output)"/>
+        <param name="input_restricts" optional="true" argument="-r" type="data" format="tabular" label="Table to analyze a subset of strains" />
+
+        <!-- Additional Options -->
+        <section name="additional_options" title="Additional Options">
+            <repeat name="series_pc" title="P-value cutoff(s) and Correction(s)">
+                <param name="pvalue" argument="-p" type="float" min="0" max="1.0" value="0.05" label="P-value cutoff for one Trait" help="SCOARY will not report genes with higher p-values than this (Default=1.0=All). Provide a single value (applied to all) or exactly as many values as correction criteria and in corresponding order (e.g., 0.05 0.05 for two traits)."/>
+                <param name="correction" argument="-c" type="select" label="P-value correction" help="Apply the p-value corrections to the p-value cutoffs you have entered (Default = Individual p-value)." >
+                    <option value="I" selected="true">Individual (naive) p-value</option>
+                    <option value="B">Bonferroni adjusted p-value</option>
+                    <option value="BH">Benjamini-Hochberg adjusted p</option>
+                    <option value="PW">Best (lowest) pairwise comparison</option>
+                    <option value="EPW">Entire range of pairwise comparison p-values</option>
+                    <option value="P">Empirical p-value from permutations</option>
+                </param>
+            </repeat>
+            <param name="permute" argument="-e" type="integer" min="0" value="0" label="Permutations" help="Perform N number of permutations of the significant results post-analysis. (Default = 0 = None)" />
+            <param name="maxhits" argument="-m" type="integer" min="0" value="0" label="Maximal number of hits to report" help="SCOARY will only report the top max hits results per trait. (Default = 0 = All)" />
+            <param name="collapse" argument="--collapse" type="boolean" checked="false" truevalue="--collapse" falsevalue="" label="Collapse correlated genes" help="Collapse correlated genes (genes that have identical distribution patterns in the sample) into merged units. (Default=false)"/>
+            <param name="input_newicktree" optional="true" argument="-n" type="data" format="newick" label="Supply a custom tree (Newick format) for phylogenetic analyses instead instead of calculating it internally." />
+        </section>
+
+        <!-- Output Options -->
+        <section name="output_options" title="Output Options" expanded="true">
+            <param name="upgma" argument="-u" type="boolean" checked="false" truevalue="-u" falsevalue="" label="UPGMA tree" help="Calculate UPGMA tree to a newick file."/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="out_tabular" format="tabular" from_work_dir="scoary_output.tsv" label="${tool.name} on ${on_string}: Table" />
+        <data name="out_newick" format="newick" from_work_dir="scoary_output.nwk" label="${tool.name} on ${on_string}: Tree">
+            <filter>(output_options['upgma'] is True)</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="input_traits" ftype="csv" value="Tetracycline_resistance.csv" />
+            <param name="input_genes" ftype="csv" value="Gene_presence_absence.csv" />
+            <param name="upgma" value="Yes" />
+            <repeat name="series_pc">
+                <param name="pvalue" value="0.05"/>
+                <param name="correction" value="I"/>
+            </repeat>
+            <output name="out_tabular" file="scoary_output.tsv" ftype="tabular" sort="true">
+                <assert_contents>
+                    <has_n_lines n="573" />
+                    <has_line line="==&gt; Bogus_trait.results.csv &lt;==" />
+                    <has_line line="==&gt; Tetracycline_resistance.results.csv &lt;==" />
+                </assert_contents>
+            </output>
+            <output name="out_newick" file="scoary_output.nwk" ftype="newick" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_traits" ftype="csv" value="Tetracycline_resistance.csv" />
+            <param name="input_genes" ftype="csv" value="Gene_presence_absence.csv" />
+            <param name="upgma" value="Yes" />
+            <repeat name="series_pc">
+                <param name="pvalue" value="0.05"/>
+                <param name="correction" value="I"/>
+            </repeat>
+            <repeat name="series_pc">
+                <param name="pvalue" value="0.05"/>
+                <param name="correction" value="EPW"/>
+            </repeat>
+            <output name="out_tabular" file="scoary_output_2.tsv" ftype="tabular" sort="true">
+                <assert_contents>
+                    <has_n_lines n="27" />
+                    <has_line line="==&gt; Bogus_trait.results.csv &lt;==" />
+                    <has_line line="==&gt; Tetracycline_resistance.results.csv &lt;==" />
+                </assert_contents>
+            </output>
+            <output name="out_newick" file="scoary_output_2.nwk" ftype="newick" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+-------------------
+
+**Scoary**
+
+Scoary is designed to take the csv file from Roary as well as a traits file created by the user and calculate the assocations between all genes in the accessory genome and the traits. It reports a list of genes sorted by strength of association per trait.
+
+-------------------
+
+**Inputs**
+
+-------------------
+
+Scoary requires two input files: csv file from Roary and a list of traits to test associations to.
+Traits can be anything as long as you can classify it into binary categories (e.g. antibiotic resistance, group membership (yes/no), MIC value higher/lower than 16).
+Make sure you your entires are separated by ','.
+The traits file needs to be formatted in a specific way (please take a look into the (documentation)[https://github.com/AdmiralenOla/Scoary]).
+
+You can also use as input the pan-genome as called from Jason Sahl's program LS-BSR (Large-Scale Blast Score Ratio).
+The program includes a python script for converting LS-BSR output to the Roary/Scoary format.
+
+Trait presence is indicated by 1, trait absence by 0.
+Assumes strain names in the first column and trait names in the first row.
+
+Input gene presence/absence table (comma-separated-values) from ROARY.
+Strain names must be equal to those in the trait table.
+
+-----------
+
+**Outputs**
+
+-----------
+
+Scory outputs a single csv traits file. It uses comma "," as a delimiter.
+The results consists of genes that were found to be associated with the trait, sorted according to significance.
+By default, Scoary reports all genes with a naive p-value < 0.05.
+
+You can find the description of the columns in the (documentation)[https://github.com/AdmiralenOla/Scoary].
+
+--------------------
+
+**More Information**
+
+--------------------
+
+See the excellent `Scoary documentation`_
+
+.. _`Scoary documentation`: https://github.com/AdmiralenOla/Scoary
+
+
+**P-value cutoff (-p)**: For Fishers, Bonferronis, and Benjamini-Hochbergs tests, SCOARY will not report genes with higher p-values than this.
+For empirical p-values, this is treated as an alpha level instead.
+I.e. 0.02 will filter all genes except the lower and upper percentile from this test.
+Run with "-p 1.0" to report all genes. Accepts standard form (e.g. 1E-8).
+Provide a single value (applied to all) or exactly as many values as correction criteria and in corresponding order (e.g., 0.05 0.1 0.05 0.02).
+
+**Correction (-c)**: Apply the indicated filtration measure: I=Individual (naive) p-value, B=Bonferroni adjusted p-value, BH=Benjamini-Hochberg adjusted p, PW=Best (lowest) pairwise comparison, EPW=Entire range of pairwise comparison p-values, P=Empirical p-value from permutations.
+You can enter as many correction criteria as you would like.
+These will be associated with the p-value cutoffs you enter.
+For example "-c I EPW -p 0.1 0.05" will apply the following cutoffs: Naive p-value must be lower than 0.1 AND the entire range of pairwise comparison values are below 0.05 for this gene.
+Note that the empirical p-values should be interpreted at both tails.
+Therefore, running "-c P -p 0.05" will apply an alpha of 0.05 to the empirical (permuted) p-values, i.e. it will filter everything except the upper and lower 2.5 percent of the distribution.
+
+**Permute (-e)**: Perform N number of permutations of the significant results post-analysis.
+Each permutation will do a label switching of the phenotype and a new p-value is calculated according to this new dataset.
+After all N permutations are completed, the results are ordered in ascending order, and the percentile of the original result in the permuted p-value distribution is reported.
+
+--------------------
+
+**Galaxy Wrapper Development**
+
+--------------------
+
+Author: Florian Heyl
+
+	]]></help>
+    <citations>
+	   <citation type="doi">10.1038/s41467-020-15171-6</citation>
+    </citations>
+</tool>