diff analyze_diff_expr.xml @ 0:d4128a4e49ca draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/trinity commit 30192223eeb60f33a42046921351c6ba3c80c90c
author iuc
date Mon, 21 Nov 2016 11:59:35 -0500
parents
children 24d072085816
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/analyze_diff_expr.xml	Mon Nov 21 11:59:35 2016 -0500
@@ -0,0 +1,179 @@
+<tool id="trinity_analyze_diff_expr" name="Extract and cluster differentially expressed transcripts" version="@WRAPPER_VERSION@.0">
+    <description>from a Trinity assembly</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <requirement type="package" version="2.30.0">bioconductor-biobase</requirement>
+        <requirement type="package" version="2.2.2">bioconductor-qvalue</requirement>
+        <requirement type="package" version="1.22.0">bioconductor-goseq</requirement>
+        <requirement type="package" version="3.3.0">bioconductor-go.db</requirement>
+    </expand>
+    <expand macro="stdio"/>
+    <command><![CDATA[
+    ## DE results input files must be in the working directory and have suffix .DE_results
+    #import re
+    #for $input in $DE_results
+        ln -s "${input}" "${re.sub('[^\w\-_.]', '_', input.element_identifier)}.DE_results"
+        &&
+    #end for
+
+    analyze_diff_expr.pl
+        --matrix "${matrix}"
+        --samples "${samples}"
+        -P ${p}
+        -C ${c}
+
+        #if str( $additional_params.max_DE_genes_per_comparison ):
+            --max_DE_genes_per_comparison ${additional_params.max_DE_genes_per_comparison}
+        #end if
+
+        $additional_params.order_columns_by_samples_file
+
+        #if $additional_params.max_genes_clust:
+            --max_genes_clust ${additional_params.max_genes_clust}
+        #end if
+
+        #if str( $additional_params.GO_enrichment.examine_GO_enrichment ) == "yes":
+            --examine_GO_enrichment
+            --GO_annots "${$additional_params.GO_enrichment.GO_annots}"
+            --gene_lengths "${$additional_params.GO_enrichment.gene_lengths}"
+        #end if
+
+        --output results
+    ]]></command>
+    <inputs>
+        <param format="tabular" name="matrix" argument="--matrix" type="data" label="Expression matrix" help="Raw counts matrix produced by 'Build expression matrix for a de novo assembly of RNA-Seq data by Trinity' tool"/>
+        <param format="tabular" name="samples" argument="--samples" type="data" label="Sample description" help="File describing samples and replicates"/>
+        <param format="tabular" name="DE_results" type="data_collection" collection_type="list" label="Differential expression results"/>
+        <param name="p" type="float" argument="-P" value="0.001" label="p-value cutoff for FDR"/>
+        <param name="c" type="float" argument="-C" value="2" label="min abs(log2(a/b)) fold change" help="Default: 2 (meaning 2^(2) or 4-fold"/>
+        <section name="additional_params" title="Additional Options" expanded="False">
+            <param name="max_DE_genes_per_comparison" argument="--max_DE_genes_per_comparison" type="integer" value="" optional="true" label="Maximum differential expression genes per comparison" help="Extract only up to the top number of DE features within each pairwise comparison. This is useful when you have massive numbers of DE features but still want to make useful heatmaps and other plots with more manageable numbers of data points."/>
+            <param name="order_columns_by_samples_file" argument="--order_columns_by_samples_file" type="boolean" checked="false" truevalue="--order_columns_by_samples_file" falsevalue="" label="Order columns by samples file" help="Instead of clustering samples or replicates hierarchically based on gene expression patterns, order columns according to order in the --samples file."/>
+            <param name="max_genes_clust" argument="--max_genes_clust" type="integer" value="10000" label="Maximum genes in cluster" help="If more than 10000, heatmaps are not generated, since too time consuming"/>
+            <conditional name="GO_enrichment">
+                <param type="select" name="examine_GO_enrichment" argument="--examine_GO_enrichment" label="Run GO enrichment analysis" help="To examine GO enrichment, you must first run Trinotate and then extract all GO assignments for each gene feature, with the Trinotate script extract_GO_assignments_from_Trinotate_xls.pl">
+                    <option value="no">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+                <when value="no">
+                </when>
+                <when value="yes">
+                    <param format="tabular" name="GO_annots" argument="--GO_annots" type="data" label="Extracted GO assignments file" help="Generated by the Trinotate script extract_GO_assignments_from_Trinotate_xls.pl. Must have 2 columns: feature_id GO:000001,GO:00002,..."/>
+                    <param format="tabular" name="gene_lengths" argument="--gene_lengths" type="data" label="Gene length file" help="Must have 2 columns: feature_id length"/>
+                </when>
+        </conditional> 
+        </section>
+    </inputs>
+    <outputs>
+        <collection name="extracted_DE_genes" type="list" label="${tool.name} on ${on_string}: extracted differentially expressed genes">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.subset$" ext="tabular" />
+        </collection>
+        <collection name="summary_files" type="list" label="${tool.name} on ${on_string}: summary files">
+            <data format="tabular" name="results_matrix" from_work_dir="results.matrix"/>
+            <data format="tabular" name="results_matrix_log2_centered" from_work_dir="results.matrix.log2.centered.dat"/>
+            <data format="pdf" name="results_matrix_log2_centered_heatmap" from_work_dir="results.matrix.log2.centered.genes_vs_samples_heatmap.pdf"/>
+            <data format="tabular" name="results_matrix_log2" from_work_dir="results.matrix.log2.dat"/>
+            <data format="tabular" name="results_matrix_log2_sample_cor" from_work_dir="results.matrix.log2.sample_cor.dat"/>
+            <data format="pdf" name="results_matrix_log2_sample_cor_matrix" from_work_dir="results.matrix.log2.sample_cor_matrix.pdf"/>
+        </collection>
+        <data format="RData" name="rdata" label="${tool.name} on ${on_string}: RData file" from_work_dir="results.matrix.RData"/>
+        <collection name="GOseq_enrichment" type="list" label="${tool.name} on ${on_string}: GOseq enriched and depleted categories">
+            <discover_datasets pattern="(?P&lt;name&gt;.+\.subset\.GOseq\.(enriched|depleted))$" ext="tabular" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="matrix" value="count/qcheck/matrix.counts.matrix"/>
+            <param name="samples" value="count/samples.txt"/>
+            <param name="DE_results">
+                <collection type="list">
+                    <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" ftype="tabular" />
+                    <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
+                    <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
+              </collection>
+            </param>
+            <output_collection name="extracted_DE_genes">
+                <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_37-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_37-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
+                <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
+                <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
+            </output_collection>
+            <output_collection name="summary_files">
+                <element name="results_matrix" compare="sim_size" file="count/analyze_diff_expr/results.matrix"/>
+                <element name="results_matrix_log2_centered" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.centered.dat"/>
+                <element name="results_matrix_log2_centered_heatmap" delta="100" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.centered.genes_vs_samples_heatmap.pdf"/>
+                <element name="results_matrix_log2" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.dat"/>
+                <element name="results_matrix_log2_sample_cor" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.sample_cor.dat"/>
+                <element name="results_matrix_log2_sample_cor_matrix" delta="100" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.sample_cor_matrix.pdf"/>
+            </output_collection>
+            <output name="rdata" compare="sim_size" file="count/analyze_diff_expr/results.matrix.RData"/>
+        </test>
+        <test>
+            <param name="matrix" value="count/qcheck/matrix.counts.matrix"/>
+            <param name="samples" value="count/samples.txt"/>
+            <param name="DE_results">
+                <collection type="list">
+                    <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" ftype="tabular" />
+                    <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
+                    <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
+                </collection>
+            </param>
+            <section name="additional_params">
+                <conditional name="GO_enrichment">
+                    <param name="examine_GO_enrichment" value="yes"/>
+                    <param name="GO_annots" value="count/trinotate/go_annotations.txt"/>
+                    <param name="gene_lengths" value="count/trinotate/genes.lengths.txt"/>
+                </conditional>
+            </section>
+            <assert_command>
+                <has_text text="--examine_GO_enrichment" />
+                <has_text text="--GO_annots" />
+                <has_text text="--gene_lengths" />
+            </assert_command>
+            <output_collection name="GOseq_enrichment">
+                <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_37-UP.subset.GOseq.enriched" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset.GOseq.enriched"/>
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+Trinity_ assembles transcript sequences from Illumina RNA-Seq data.
+This tool extracts the transcripts that are most differentially expressed (most significant FDR and fold-changes), once differential expression analyses have been runned. 
+
+**Inputs**
+
+This tool uses the raw counts matrix produced by 'Build expression matrix for a de novo assembly of RNA-Seq data by Trinity' tool.
+
+You must describe your samples and replicates with a tabular file looking like this:
+
+=========== ================
+ConditionA  CondA_replicate1
+----------- ----------------
+ConditionA  CondA_replicate2
+----------- ----------------
+ConditionB  CondB_replicate1
+----------- ----------------
+ConditionB  CondB_replicate2
+----------- ----------------
+ConditionC  CondC_replicate1
+----------- ----------------
+ConditionC  CondC_replicate2
+----------- ----------------
+ConditionC  CondC_replicate3
+=========== ================
+
+This file can be generated with the 'Describe samples and replicates' tool.
+It will probably be the same file as used in the tool 'RNASeq samples quality check for transcript quantification' or in the tool 'Differential expression analysis'.
+The names in column 2 must match the names given in the tool 'Build expression matrix for a de novo assembly of RNA-Seq data by Trinity'.
+
+You must also provide as a data collection the files resulting from the differential expression analysis (outputs of tool 'Differential expression analysis').
+
+.. _Trinity: http://trinityrnaseq.github.io
+]]>
+    </help>
+    <expand macro="citation" />
+</tool>