diff edger-repenrich2.xml @ 1:6d59fbca2db4 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 4dd520dee5c3c0c526e8319a74c4890da032300f
author artbio
date Sat, 20 Apr 2024 14:46:12 +0000
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/edger-repenrich2.xml	Sat Apr 20 14:46:12 2024 +0000
@@ -0,0 +1,199 @@
+<tool id="edger-repenrich2" name="edgeR-repenrich2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Determines differentially expressed features from RepEnrich2 counts</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edgeR_requirements"/>
+    <stdio>
+        <regex match="Execution halted"
+           source="both"
+           level="fatal"
+           description="Execution halted." />
+        <regex match="Error in"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+        <regex match="Fatal error"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+    </stdio>
+    <version_command>
+    <![CDATA[
+        echo $(R --version | grep version | grep -v GNU)", edgeR version" $(R --vanilla --slave -e "library(edgeR) &&
+        cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+    ]]>
+    </version_command>
+    <command>
+    <![CDATA[
+        #import json
+        Rscript '${__tool_directory__}/edgeR_repenrich2.R'
+            --factorName '$factorName'
+            --levelNameA '$factorLevel_A'
+            #set $factorlevelsA = list()
+            #for $file in $countsFiles_A:
+                $factorlevelsA.append(str($file))
+            #end for
+            $factorlevelsA.reverse()
+            --levelAfiles '#echo json.dumps(factorlevelsA)#'
+            --levelNameB '$factorLevel_B'
+            #set $factorlevelsB = list()
+            #for $file in $countsFiles_B:
+                $factorlevelsB.append(str($file))
+            #end for
+            $factorlevelsB.reverse()
+            --levelBfiles '#echo json.dumps(factorlevelsB)#'
+            -o 'edger_out'
+            -p '$plots'
+            #if $normCounts:
+                -n '$counts_out'
+            #end if
+            -o '$edger_out'
+    ]]>
+    </command>
+    <inputs>
+            <param name="factorName" type="text" value="FactorName"  label="Specify a factor name, e.g. genotype or age or drug_x"
+                help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="factorLevel_A" type="text" value="FactorLevel1" label="Specify a factor level, typical values could be 'mutant' or 'Drug_X'"
+                   help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="countsFiles_A" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich" />
+            <param name="factorLevel_B" type="text" value="FactorLevel2" label="Specify a factor level, typical values could be 'wildtype' or 'control'"
+                   help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="countsFiles_B" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich tool" />
+            <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
+            label="Output normalized counts table" />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="edger_out" label="edgeR: ${factorLevel_A} compared to ${factorLevel_B}">
+            <actions>
+                <action name="column_names" type="metadata" default="Tag,log2(FC),FDR,Class,Type" />
+            </actions>
+        </data>
+        <data format="pdf" name="plots" label="edgeR plots" />
+        <data format="tabular" name="counts_out" label="Normalized counts file">
+            <filter>normCounts == True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="factorName" value="Genotype"/>
+            <param name="factorLevel_A" value="Mutant"/>
+            <param name="countsFiles_A" value="355_fraction_counts.tab,356_fraction_counts.tab"/>
+            <param name="factorLevel_B" value="Wildtype"/>
+            <param name="countsFiles_B" value="353_fraction_counts.tab,354_fraction_counts.tab"/>
+            <param name="normCounts" value="True"/>
+            <output name="counts_out" file="Normalized_counts_file.tab"/>
+            <output name="plots" file="edgeR_plots.pdf"/>
+            <output name="edger_out" file="edgeR_result_file.tab"/>
+        </test>
+    </tests>
+    <help>
+.. class:: infomark
+**What it does**
+Estimate Distance between samples (MDS) and Biological Coefficient Variation (BCV) in count
+data from high-throughput sequencing assays and test for differential expression using edgeR_.
+edger-repenrich takes count tables generated by repenrich as inputs. A repenrich count table looks
+============== ========== ========== ==========
+LSU-rRNA_Dme    rRNA       rRNA       3659329
+-------------- ---------- ---------- ----------
+FW3_DM          LINE       Jockey     831
+-------------- ---------- ---------- ----------
+DMTOM1_LTR      LTR        Gypsy      1004
+-------------- ---------- ---------- ----------
+R1_DM           LINE       R1         7343
+-------------- ---------- ---------- ----------
+TAHRE           LINE       Jockey     4560
+-------------- ---------- ---------- ----------
+G4_DM           LINE       Jockey     3668
+-------------- ---------- ---------- ----------
+BS              LINE       Jockey     7296
+-------------- ---------- ---------- ----------
+Stalker2_I-int  LTR        Gypsy      12252
+-------------- ---------- ---------- ----------
+Stalker3_LTR    LTR        Gypsy      593
+-------------- ---------- ---------- ----------
+TABOR_I-int     LTR        Gypsy      3947
+-------------- ---------- ---------- ----------
+G7_DM           LINE       Jockey     162
+-------------- ---------- ---------- ----------
+BEL_I-int       LTR        Pao        23757
+-------------- ---------- ---------- ----------
+Gypsy6_I-int    LTR        Gypsy      7489
+============== ========== ========== ==========
+Count tables must be generated for each sample individually. Here, edgeR_ is handling a
+single factor (genotype, age, treatment, etc) that effect your experiment. This factor has
+two levels/states (for instance, "wild-type" and "mutant". You need to select appropriate
+count table from your history for each factor level.
+The following table gives some examples of factors and their levels:
+========= ============== ===============
+Factor    Factorlevel1   Factorlevel2
+--------- -------------- ---------------
+Treatment Treated        Untreated
+--------- -------------- ---------------
+Genotype  Knockdown      Wildtype
+--------- -------------- ---------------
+TimePoint Day4           Day1
+--------- -------------- ---------------
+Gender    Female         Male
+========= ============== ===============
+*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2.
+Here the order of factor levels is important. For example, for the factor 'Treatment' given
+in above table, edgeR computes fold changes of 'Treated' samples against 'Untreated',
+i.e. the values correspond to up or down regulations of genes in Treated samples.
+edgeR_ generates a tabular file containing the different columns and results visualized in
+a PDF:
+====== =============================================================================
+Column Description
+------ -----------------------------------------------------------------------------
+     1 Tag (transposon element ID)
+     2 the logarithm (to basis 2) of the fold change (See the note in inputs section)
+     3 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
+       which controls false discovery rate (FDR)
+     4 Class the transposon belongs to
+     5 Type the transposon belongs to
+====== =============================================================================
+.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
+**Note**: This edgeR_ wrapper was adapted from code available at
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btp616</citation>
+    </citations>