diff edger-repenrich.xml @ 0:f6f0f1e5e940 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
author artbio
date Wed, 02 Aug 2017 05:17:29 -0400
parents
children 51b4590a972d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/edger-repenrich.xml	Wed Aug 02 05:17:29 2017 -0400
@@ -0,0 +1,180 @@
+<tool id="edger-repenrich" name="edgeR-repenrich" version="1.4.0">
+    <description>Determines differentially expressed features from RepEnrich counts</description>
+    <requirements>
+        <requirement type="package" version="3.16.5">bioconductor-edger</requirement>
+        <requirement type="package" version="3.30.13">bioconductor-limma</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="0.2.15">r-rjson</requirement>
+    </requirements>
+    <stdio>
+        <regex match="Execution halted"
+           source="both"
+           level="fatal"
+           description="Execution halted." />
+        <regex match="Error in"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+        <regex match="Fatal error"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+    </stdio>
+    <version_command>
+    <![CDATA[
+        echo $(R --version | grep version | grep -v GNU)", edgeR version" $(R --vanilla --slave -e "library(edgeR) &&
+        cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+    ]]>
+    </version_command>
+    <command>
+    <![CDATA[
+        #import json
+        Rscript '${__tool_directory__}/edgeR_repenrich.R'
+            --factorName '$factorName'
+
+            --levelNameA '$factorLevel_A'
+            #set $factorlevelsA = list()
+            #for $file in $countsFiles_A:
+                $factorlevelsA.append(str($file))
+            #end for
+            $factorlevelsA.reverse()
+            --levelAfiles '#echo json.dumps(factorlevelsA)#'
+
+            --levelNameB '$factorLevel_B'
+            #set $factorlevelsB = list()
+            #for $file in $countsFiles_B:
+                $factorlevelsB.append(str($file))
+            #end for
+            $factorlevelsB.reverse()
+            --levelBfiles '#echo json.dumps(factorlevelsB)#'
+
+            #set $alignedA = list()
+            #for file in $alignmentFiles_A:
+                $alignedA.append(str($file))
+            #end for
+            $alignedA.reverse()
+            --alignmentA '#echo json.dumps(alignedA)#' 
+
+            #set $alignedB = list()
+            #for file in $alignmentFiles_B:
+                $alignedB.append(str($file))
+            #end for
+            $alignedB.reverse()
+            --alignmentB '#echo json.dumps(alignedB)#'
+
+            -o 'edger_out'
+
+            -p '$plots'
+            #if $normCounts:
+                -n '$counts_out'
+            #end if
+            -o '$edger_out'
+    ]]>
+    </command>
+    <inputs>
+            <param name="factorName" type="text" value="FactorName" label="Specify a factor name, e.g. genotype or age or drug_x"
+                help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="factorLevel_A" type="text" value="FactorLevel1" label="Specify a factor level, typical values could be 'wildtype' or 'control'"
+                   help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="countsFiles_A" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich" />
+            <param name="alignmentFiles_A" type="data" format="tabular" multiple="true" label="Number of aligned reads file(s)" help="files of total aligned reads generated by repenrich"/>
+            <param name="factorLevel_B" type="text" value="FactorLevel2" label="Specify a factor level, typical values could be 'mutant' or 'Drug_X'"
+                   help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="countsFiles_B" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich tool" />
+            <param name="alignmentFiles_B" type="data" format="tabular" multiple="true" label="Number of aligned reads file(s)" help="files of total aligned reads generated by repenrich"/>
+            <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
+            label="Output normalized counts table" />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="edger_out" label="edgeR: ${factorLevel_A} compared to ${factorLevel_B}">
+            <actions>
+                <action name="column_names" type="metadata" default="Tag,log2(FC),FDR,Class,Type" />
+            </actions>
+        </data>
+        <data format="pdf" name="plots" label="edgeR plots" />
+        <data format="tabular" name="counts_out" label="Normalized counts file">
+            <filter>normCounts == True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="factorName" value="Genotype"/>
+            <param name="factorLevel_A" value="Mutant"/>
+            <param name="countsFiles_A" value="355_fraction_counts.tab,356_fraction_counts.tab"/>
+            <param name="alignmentFiles_A" value="aligned_355.tab,aligned_356.tab"/>
+            <param name="factorLevel_B" value="Wildtype"/>
+            <param name="countsFiles_B" value="353_fraction_counts.tab,354_fraction_counts.tab"/>
+            <param name="alignmentFiles_B" value="aligned_353.tab,aligned_354.tab"/>
+            <param name="normCounts" value="True"/>
+            <output name="counts_out" file="Normalized_counts_file.tab"/>
+            <output name="plots" file="edgeR_plots.pdf"/>
+            <output name="edger_out" file="edgeR_result_file.tab"/>
+
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+.. class:: infomark
+
+**What it does**
+
+Estimate Distance between samples (MDS) and Biological Coefficient Variation (BCV) in count data from high-throughput sequencing assays and test for differential expression using edgeR_.
+
+**Inputs**
+
+edger-repenrich takes count tables generated by repenrich as input. Count tables must be generated for each sample individually. Here, edgeR_ is handling a single factor (genotype, age, treatment, etc) that effect your experiment. This factor has two levels/states (for instance, "wild-type" and "mutant".
+You need to select appropriate count table from your history for each factor level.
+
+The following table gives some examples of factors and their levels:
+
+========= ============== ===============
+Factor    Factorlevel1   Factorlevel2
+--------- -------------- ---------------
+Treatment Treated        Untreated
+--------- -------------- ---------------
+Genotype  Knockdown      Wildtype
+--------- -------------- ---------------
+TimePoint Day4           Day1
+--------- -------------- ---------------
+Gender    Female         Male
+========= ============== ===============
+
+*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'Treatment' given in above table, DESeq2 computes fold changes of 'Treated' samples against 'Untreated', i.e. the values correspond to up or down regulations of genes in Treated samples.
+
+**Output**
+
+edgeR_ generates a tabular file containing the different columns and results visualized in a PDF:
+
+====== =============================================================================
+Column Description
+------ -----------------------------------------------------------------------------
+     1 Tag (transposon element ID)
+     2 the logarithm (to basis 2) of the fold change (See the note in inputs section)
+     3 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
+       which controls false discovery rate (FDR)
+     4 Class the transposon belongs to
+     5 Type the transposon belongs to
+====== =============================================================================
+
+.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
+]]>
+
+**Note**: This edgeR_ wrapper was adapted from code available at https://github.com/nskvir/RepEnrich
+
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btp616</citation>
+    </citations>
+</tool>