comparison analyze_diff_expr.xml @ 0:d4128a4e49ca draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/trinity commit 30192223eeb60f33a42046921351c6ba3c80c90c
author iuc
date Mon, 21 Nov 2016 11:59:35 -0500
parents
children 24d072085816
comparison
equal deleted inserted replaced
-1:000000000000 0:d4128a4e49ca
1 <tool id="trinity_analyze_diff_expr" name="Extract and cluster differentially expressed transcripts" version="@WRAPPER_VERSION@.0">
2 <description>from a Trinity assembly</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements">
7 <requirement type="package" version="2.30.0">bioconductor-biobase</requirement>
8 <requirement type="package" version="2.2.2">bioconductor-qvalue</requirement>
9 <requirement type="package" version="1.22.0">bioconductor-goseq</requirement>
10 <requirement type="package" version="3.3.0">bioconductor-go.db</requirement>
11 </expand>
12 <expand macro="stdio"/>
13 <command><![CDATA[
14 ## DE results input files must be in the working directory and have suffix .DE_results
15 #import re
16 #for $input in $DE_results
17 ln -s "${input}" "${re.sub('[^\w\-_.]', '_', input.element_identifier)}.DE_results"
18 &&
19 #end for
20
21 analyze_diff_expr.pl
22 --matrix "${matrix}"
23 --samples "${samples}"
24 -P ${p}
25 -C ${c}
26
27 #if str( $additional_params.max_DE_genes_per_comparison ):
28 --max_DE_genes_per_comparison ${additional_params.max_DE_genes_per_comparison}
29 #end if
30
31 $additional_params.order_columns_by_samples_file
32
33 #if $additional_params.max_genes_clust:
34 --max_genes_clust ${additional_params.max_genes_clust}
35 #end if
36
37 #if str( $additional_params.GO_enrichment.examine_GO_enrichment ) == "yes":
38 --examine_GO_enrichment
39 --GO_annots "${$additional_params.GO_enrichment.GO_annots}"
40 --gene_lengths "${$additional_params.GO_enrichment.gene_lengths}"
41 #end if
42
43 --output results
44 ]]></command>
45 <inputs>
46 <param format="tabular" name="matrix" argument="--matrix" type="data" label="Expression matrix" help="Raw counts matrix produced by 'Build expression matrix for a de novo assembly of RNA-Seq data by Trinity' tool"/>
47 <param format="tabular" name="samples" argument="--samples" type="data" label="Sample description" help="File describing samples and replicates"/>
48 <param format="tabular" name="DE_results" type="data_collection" collection_type="list" label="Differential expression results"/>
49 <param name="p" type="float" argument="-P" value="0.001" label="p-value cutoff for FDR"/>
50 <param name="c" type="float" argument="-C" value="2" label="min abs(log2(a/b)) fold change" help="Default: 2 (meaning 2^(2) or 4-fold"/>
51 <section name="additional_params" title="Additional Options" expanded="False">
52 <param name="max_DE_genes_per_comparison" argument="--max_DE_genes_per_comparison" type="integer" value="" optional="true" label="Maximum differential expression genes per comparison" help="Extract only up to the top number of DE features within each pairwise comparison. This is useful when you have massive numbers of DE features but still want to make useful heatmaps and other plots with more manageable numbers of data points."/>
53 <param name="order_columns_by_samples_file" argument="--order_columns_by_samples_file" type="boolean" checked="false" truevalue="--order_columns_by_samples_file" falsevalue="" label="Order columns by samples file" help="Instead of clustering samples or replicates hierarchically based on gene expression patterns, order columns according to order in the --samples file."/>
54 <param name="max_genes_clust" argument="--max_genes_clust" type="integer" value="10000" label="Maximum genes in cluster" help="If more than 10000, heatmaps are not generated, since too time consuming"/>
55 <conditional name="GO_enrichment">
56 <param type="select" name="examine_GO_enrichment" argument="--examine_GO_enrichment" label="Run GO enrichment analysis" help="To examine GO enrichment, you must first run Trinotate and then extract all GO assignments for each gene feature, with the Trinotate script extract_GO_assignments_from_Trinotate_xls.pl">
57 <option value="no">No</option>
58 <option value="yes">Yes</option>
59 </param>
60 <when value="no">
61 </when>
62 <when value="yes">
63 <param format="tabular" name="GO_annots" argument="--GO_annots" type="data" label="Extracted GO assignments file" help="Generated by the Trinotate script extract_GO_assignments_from_Trinotate_xls.pl. Must have 2 columns: feature_id GO:000001,GO:00002,..."/>
64 <param format="tabular" name="gene_lengths" argument="--gene_lengths" type="data" label="Gene length file" help="Must have 2 columns: feature_id length"/>
65 </when>
66 </conditional>
67 </section>
68 </inputs>
69 <outputs>
70 <collection name="extracted_DE_genes" type="list" label="${tool.name} on ${on_string}: extracted differentially expressed genes">
71 <discover_datasets pattern="(?P&lt;name&gt;.+)\.subset$" ext="tabular" />
72 </collection>
73 <collection name="summary_files" type="list" label="${tool.name} on ${on_string}: summary files">
74 <data format="tabular" name="results_matrix" from_work_dir="results.matrix"/>
75 <data format="tabular" name="results_matrix_log2_centered" from_work_dir="results.matrix.log2.centered.dat"/>
76 <data format="pdf" name="results_matrix_log2_centered_heatmap" from_work_dir="results.matrix.log2.centered.genes_vs_samples_heatmap.pdf"/>
77 <data format="tabular" name="results_matrix_log2" from_work_dir="results.matrix.log2.dat"/>
78 <data format="tabular" name="results_matrix_log2_sample_cor" from_work_dir="results.matrix.log2.sample_cor.dat"/>
79 <data format="pdf" name="results_matrix_log2_sample_cor_matrix" from_work_dir="results.matrix.log2.sample_cor_matrix.pdf"/>
80 </collection>
81 <data format="RData" name="rdata" label="${tool.name} on ${on_string}: RData file" from_work_dir="results.matrix.RData"/>
82 <collection name="GOseq_enrichment" type="list" label="${tool.name} on ${on_string}: GOseq enriched and depleted categories">
83 <discover_datasets pattern="(?P&lt;name&gt;.+\.subset\.GOseq\.(enriched|depleted))$" ext="tabular" />
84 </collection>
85 </outputs>
86 <tests>
87 <test>
88 <param name="matrix" value="count/qcheck/matrix.counts.matrix"/>
89 <param name="samples" value="count/samples.txt"/>
90 <param name="DE_results">
91 <collection type="list">
92 <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" ftype="tabular" />
93 <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
94 <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
95 </collection>
96 </param>
97 <output_collection name="extracted_DE_genes">
98 <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_37-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
99 <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
100 <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_37-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
101 <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
102 <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
103 <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
104 </output_collection>
105 <output_collection name="summary_files">
106 <element name="results_matrix" compare="sim_size" file="count/analyze_diff_expr/results.matrix"/>
107 <element name="results_matrix_log2_centered" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.centered.dat"/>
108 <element name="results_matrix_log2_centered_heatmap" delta="100" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.centered.genes_vs_samples_heatmap.pdf"/>
109 <element name="results_matrix_log2" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.dat"/>
110 <element name="results_matrix_log2_sample_cor" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.sample_cor.dat"/>
111 <element name="results_matrix_log2_sample_cor_matrix" delta="100" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.sample_cor_matrix.pdf"/>
112 </output_collection>
113 <output name="rdata" compare="sim_size" file="count/analyze_diff_expr/results.matrix.RData"/>
114 </test>
115 <test>
116 <param name="matrix" value="count/qcheck/matrix.counts.matrix"/>
117 <param name="samples" value="count/samples.txt"/>
118 <param name="DE_results">
119 <collection type="list">
120 <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" ftype="tabular" />
121 <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
122 <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
123 </collection>
124 </param>
125 <section name="additional_params">
126 <conditional name="GO_enrichment">
127 <param name="examine_GO_enrichment" value="yes"/>
128 <param name="GO_annots" value="count/trinotate/go_annotations.txt"/>
129 <param name="gene_lengths" value="count/trinotate/genes.lengths.txt"/>
130 </conditional>
131 </section>
132 <assert_command>
133 <has_text text="--examine_GO_enrichment" />
134 <has_text text="--GO_annots" />
135 <has_text text="--gene_lengths" />
136 </assert_command>
137 <output_collection name="GOseq_enrichment">
138 <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_37-UP.subset.GOseq.enriched" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset.GOseq.enriched"/>
139 </output_collection>
140 </test>
141 </tests>
142 <help>
143 <![CDATA[
144 Trinity_ assembles transcript sequences from Illumina RNA-Seq data.
145 This tool extracts the transcripts that are most differentially expressed (most significant FDR and fold-changes), once differential expression analyses have been runned.
146
147 **Inputs**
148
149 This tool uses the raw counts matrix produced by 'Build expression matrix for a de novo assembly of RNA-Seq data by Trinity' tool.
150
151 You must describe your samples and replicates with a tabular file looking like this:
152
153 =========== ================
154 ConditionA CondA_replicate1
155 ----------- ----------------
156 ConditionA CondA_replicate2
157 ----------- ----------------
158 ConditionB CondB_replicate1
159 ----------- ----------------
160 ConditionB CondB_replicate2
161 ----------- ----------------
162 ConditionC CondC_replicate1
163 ----------- ----------------
164 ConditionC CondC_replicate2
165 ----------- ----------------
166 ConditionC CondC_replicate3
167 =========== ================
168
169 This file can be generated with the 'Describe samples and replicates' tool.
170 It will probably be the same file as used in the tool 'RNASeq samples quality check for transcript quantification' or in the tool 'Differential expression analysis'.
171 The names in column 2 must match the names given in the tool 'Build expression matrix for a de novo assembly of RNA-Seq data by Trinity'.
172
173 You must also provide as a data collection the files resulting from the differential expression analysis (outputs of tool 'Differential expression analysis').
174
175 .. _Trinity: http://trinityrnaseq.github.io
176 ]]>
177 </help>
178 <expand macro="citation" />
179 </tool>