comparison fgsea.xml @ 0:9bb7943b5263 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fgsea commit 9a6eda48463d6c19e9c5f3f2f8109f33de74855d-dirty
author iuc
date Sat, 20 Oct 2018 05:47:18 -0400
parents
children 101b208a3e1a
comparison
equal deleted inserted replaced
-1:000000000000 0:9bb7943b5263
1 <tool id="fgsea" name="fgsea" version="1.6.0">
2 <description>- fast preranked gene set enrichment analysis</description>
3 <requirements>
4 <requirement type="package" version="1.6.0">bioconductor-fgsea</requirement>
5 <requirement type="package" version="1.6.0">r-optparse</requirement>
6 </requirements>
7 <version_command><![CDATA[
8 echo $(R --version | grep version | grep -v GNU)", fgsea version" $(R --vanilla --slave -e "library(fgsea); cat(sessionInfo()\$otherPkgs\$fgsea\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
9 ]]></version_command>
10 <command detect_errors="exit_code"><![CDATA[
11 #set $gmt = True
12 #if $sets_file.is_of_type("rdata"):
13 #set $gmt = False
14 #end if
15
16 Rscript '$__tool_directory__/fgsea.R'
17 --rnk_file '$rnk_file'
18 --header $header
19 --sets_file '$sets_file'
20 --gmt $gmt
21 --min_size $min_size
22 --max_size $max_size
23 --n_perm $n_perm
24 --out_tab '$out_tab'
25 --plot_opt $plot_opt
26 --top_num $top_num
27 --rda_opt $rda_opt
28
29 ]]></command>
30 <inputs>
31 <param name="rnk_file" type="data" format="tabular" label="Ranked Genes" help="A tabular file with gene symbols in the first column, and a ranked statistic (e.g. t-statistic or log fold-change) in the second column"/>
32 <param name="header" type="boolean" truevalue="True" falsevalue="False" checked="True" label="File has header?" help="If this option is set to Yes, the tool will assume that the ranked genes file has a column header in the first row and the identifers commence on the second line. Default: Yes" />
33 <param name="sets_file" type="data" format="tabular,rdata" label="Gene Sets" help="A tabular file in GMT file or an RData file containing a list of gene sets, see below for more information"/>
34 <param name="min_size" type="integer" min="0" value="1" label="Minimum Size of Gene Set" help="Minimal size of a gene set to test. All pathways below the threshold are excluded. Default: 1" />
35 <param name="max_size" type="integer" value="500" label="Maximal Size of Gene Set" help="Maximal size of a gene set to test. All pathways above the threshold are excluded. Default: 500" />
36 <param name="n_perm" type="integer" min="0" value="1000" label="Number of Permutations" help="Number of permutations to do. Minimial possible nominal p-value is about 1/nperm. Default: 1000" />
37 <param name="plot_opt" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output plots" help="Output a PDF file containing plots for top pathways by P value significance. Default: No"/>
38 <param name="top_num" type="integer" value="10" label="Plot top most significant pathways" help="If Output plots is selected the number of top pathways to plot can be specified. Default: 10"/>
39 <param name="rda_opt" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output RData file?" help="Output all the data used by R in the fgsea analysis, can be loaded into R. Default: No" />
40 </inputs>
41
42 <outputs>
43 <data name="out_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list" />
44 <data name="out_pdf" format="pdf" from_work_dir="fgsea_plots.pdf" label="${tool.name} on ${on_string}: Plots">
45 <filter>plot_opt is True</filter>
46 </data>
47 <data name="out_rdata" format="rdata" from_work_dir="fgsea_analysis.RData" label="${tool.name} on ${on_string}: RData file">
48 <filter>rda_opt is True</filter>
49 </data>
50 </outputs>
51
52 <tests>
53 <test>
54 <param name="rnk_file" ftype="tabular" value="t47d_Treatment_DEA_Prog-vs-Control_all_for_GSEA.rnk" />
55 <param name="sets_file" ftype="tabular" value="h.all.v6.2.symbols.gmt"/>
56 <param name="plot_opt" value="True"/>
57 <output name="out_tab" >
58 <assert_contents>
59 <has_text_matching expression="pathway.*pval.*padj.*ES.*NES.*nMoreExtreme.*size.*leadingEdge" />
60 <has_text_matching expression="HALLMARK_TNFA_SIGNALING_VIA_NFKB.*0.001" />
61 </assert_contents>
62 </output>
63 <output name="out_pdf" value="out_t47d.pdf" compare="sim_size" delta="12000"/>
64 </test>
65 <test>
66 <param name="rnk_file" ftype="tabular" value="t47d_entrez_ids.rnk" />
67 <param name="sets_file" ftype="rdata" value="human_H_v5p2.rdata"/>
68 <param name="plot_opt" value="True"/>
69 <output name="out_tab" >
70 <assert_contents>
71 <has_text_matching expression="pathway.*pval.*padj.*ES.*NES.*nMoreExtreme.*size.*leadingEdge" />
72 <has_text_matching expression="HALLMARK_TNFA_SIGNALING_VIA_NFKB.*0.001" />
73 </assert_contents>
74 </output>
75 <output name="out_pdf" value="out_t47d.pdf" compare="sim_size"/>
76 </test>
77 </tests>
78
79 <help><![CDATA[
80 fgsea_ is a Bioconductor package for fast preranked gene set enrichment analysis (GSEA). The performance is achieved by using an algorithm for cumulative GSEA-statistic calculation. This allows to reuse samples between different gene set sizes. See the preprint_ for algorithmic details.
81
82 -----
83
84 **Inputs**
85
86 **Ranked Genes**
87
88 A two-column file containing a ranked list of genes is required. The first column must contain the gene identifiers and the second column the statistic used to rank. Gene identifiers must be unique (not repeated) within the file and must be the same type as the identifiers in the Gene Sets file.
89
90 Example:
91
92 ========= ============
93 Symbol Ranked Stat
94 ========= ============
95 VDR 67.198
96 IL20RA 65.963
97 MPHOSPH10 51.353
98 RCAN1 50.269
99 HILPDA 50.015
100 TSC22D3 47.496
101 FAM107B 45.926
102 ========= ============
103
104 **Gene Sets**
105
106 A Gene Sets file is required. This can be a tabular file in Gene Matrix Transposed (GMT) format. In GMT format, each row represents a gene set, with the set name in the first column, a description in the second, then the identifiers of the genes in the set in the following columns, see the example below. GMT files with any identifiers (e.g. Entrez IDs, Symbols) can be used but the same type of identifiers must be present in the Ranked Genes file. More information on `GMT format`_ can be found at the Broad website. GMT files for human gene sets can be obtained from the Broad's MSigDB_ collections.
107
108 Example:
109 ================== ================================================================== ====== ===== ====
110 HALLMARK_APOPTOSIS http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_APOPTOSIS CASP3 CASP9 ...
111 HALLMARK_HYPOXIA http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_HYPOXIA PGK1 PDK1 ...
112 ================== ================================================================== ====== ===== ====
113
114
115 Alternatively, an RData file containing a collection of gene sets can be input, like the ones provided here_ containing mouse versions of the MSigDB collections.
116
117 -----
118
119 **Outputs**
120
121 * A Tabular file of gene set rankings
122 * A PDF with plots of top pathways (optional)
123
124 -----
125
126 Wrapper released under MIT License. Copyright (c) 2017 Mark Dunning
127
128 .. _fgsea: https://bioconductor.org/packages/release/bioc/html/fgsea.html
129 .. _preprint: http://biorxiv.org/content/early/2016/06/20/060012
130 .. _GMT format: https://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats#GMT:_Gene_Matrix_Transposed_file_format_.28.2A.gmt.29
131 .. _MSigDB: http://software.broadinstitute.org/gsea/msigdb/collections.jsp
132 .. _here: http://bioinf.wehi.edu.au/software/MSigDB/index.html
133
134 ]]></help>
135 <citations>
136 <citation type="doi">10.1101/060012</citation>
137 </citations>
138 </tool>