changeset 1:6d59fbca2db4 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 4dd520dee5c3c0c526e8319a74c4890da032300f
author artbio
date Sat, 20 Apr 2024 14:46:12 +0000
parents 4905a332a094
children cfb06f8e8f52
files edgeR_repenrich.R edgeR_repenrich2.R edger-repenrich.xml edger-repenrich2.xml macros.xml repenrich2.xml
diffstat 6 files changed, 377 insertions(+), 377 deletions(-) [+]
line wrap: on
line diff
--- a/edgeR_repenrich.R	Sat Apr 20 11:56:53 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,176 +0,0 @@
-# setup R error handling to go to stderr
-options(show.error.messages = FALSE, error = function() {
-    cat(geterrmessage(), file = stderr())
-    q("no", 1, FALSE)
-})
-
-# To not crash galaxy with an UTF8 error with not-US LC settings.
-loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
-
-# load libraries
-library("getopt")
-library("tools")
-library("rjson")
-suppressPackageStartupMessages({
-    library("edgeR")
-    library("limma")
-})
-
-options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
-
-# get options, using the spec as defined by the enclosed list.
-spec <- matrix(
-    c(
-        "quiet", "q", 0, "logical",
-        "outfile", "o", 1, "character",
-        "countsfile", "n", 1, "character",
-        "factorName", "N", 1, "character",
-        "levelNameA", "A", 1, "character",
-        "levelNameB", "B", 1, "character",
-        "levelAfiles", "a", 1, "character",
-        "levelBfiles", "b", 1, "character",
-        "plots", "p", 1, "character"
-    ),
-    byrow = TRUE, ncol = 4
-)
-opt <- getopt(spec)
-
-# build levels A and B file lists
-filesA <- fromJSON(opt$levelAfiles, method = "C", unexpected.escape = "error")
-filesB <- fromJSON(opt$levelBfiles, method = "C", unexpected.escape = "error")
-listA <- list()
-indice <- 0
-listA[["level"]] <- opt$levelNameA
-for (file in filesA) {
-    indice <- indice + 1
-    listA[[paste0(opt$levelNameA, "_", indice)]] <- read.delim(file, header = FALSE)
-}
-listB <- list()
-indice <- 0
-listB[["level"]] <- opt$levelNameB
-for (file in filesB) {
-    indice <- indice + 1
-    listB[[paste0(opt$levelNameB, "_", indice)]] <- read.delim(file, header = FALSE)
-}
-
-# build a counts table
-counts <- data.frame(row.names = listA[[2]][, 1])
-for (element in names(listA[-1])) {
-    counts <- cbind(counts, listA[[element]][, 4])
-}
-for (element in names(listB[-1])) {
-    counts <- cbind(counts, listB[[element]][, 4])
-}
-colnames(counts) <- c(names(listA[-1]), names(listB[-1]))
-sizes <- colSums(counts)
-
-# build a meta data object
-meta <- data.frame(
-    row.names = colnames(counts),
-    condition = c(rep(opt$levelNameA, length(filesA)), rep(opt$levelNameB, length(filesB))),
-    libsize = sizes
-)
-
-
-# Define the library size and conditions for the GLM
-libsize <- meta$libsize
-condition <- factor(meta$condition)
-design <- model.matrix(~ 0 + condition)
-colnames(design) <- levels(condition)
-
-# Build a DGE object for the GLM
-y <- DGEList(counts = counts, lib.size = libsize)
-
-# Normalize the data
-y <- calcNormFactors(y)
-
-# Estimate the variance
-y <- estimateGLMCommonDisp(y, design)
-y <- estimateGLMTrendedDisp(y, design)
-y <- estimateGLMTagwiseDisp(y, design)
-
-# Builds and outputs an object to contain the normalized read abundance in counts per million of reads
-cpm <- cpm(y, log = FALSE, lib.size = libsize)
-cpm <- as.data.frame(cpm)
-colnames(cpm) <- colnames(counts)
-if (!is.null(opt$countsfile)) {
-    normalizedAbundance <- data.frame(Tag = rownames(cpm))
-    normalizedAbundance <- cbind(normalizedAbundance, cpm)
-    write.table(normalizedAbundance, file = opt$countsfile, sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE)
-}
-
-# Conduct fitting of the GLM
-yfit <- glmFit(y, design)
-
-# Initialize result matrices to contain the results of the GLM
-results <- matrix(nrow = dim(counts)[1], ncol = 0)
-logfc <- matrix(nrow = dim(counts)[1], ncol = 0)
-
-# Make the comparisons for the GLM
-my.contrasts <- makeContrasts(
-    paste0(opt$levelNameA, "_", opt$levelNameB, " = ", opt$levelNameA, " - ", opt$levelNameB),
-    levels = design
-)
-
-# Define the contrasts used in the comparisons
-allcontrasts <- paste0(opt$levelNameA, " vs ", opt$levelNameB)
-
-# Conduct a for loop that will do the fitting of the GLM for each comparison
-# Put the results into the results objects
-lrt <- glmLRT(yfit, contrast = my.contrasts[, 1])
-res <- topTags(lrt, n = dim(c)[1], sort.by = "none")$table
-results <- cbind(results, res[, c(1, 5)])
-logfc <- cbind(logfc, res[c(1)])
-
-# Add the repeat types back into the results.
-# We should still have the same order as the input data
-results$class <- listA[[2]][, 2]
-results$type <- listA[[2]][, 3]
-# Sort the results table by the FDR
-results <- results[with(results, order(FDR)), ]
-
-# Plot Fold Changes for repeat classes and types
-
-# open the device and plots
-if (!is.null(opt$plots)) {
-    pdf(opt$plots)
-    plotMDS(y, main = "Multidimensional Scaling Plot Of Distances Between Samples")
-    plotBCV(y, xlab = "Gene abundance (Average log CPM)", main = "Biological Coefficient of Variation Plot")
-    logFC <- results[, "logFC"]
-    # Plot the repeat classes
-    classes <- with(results, reorder(class, -logFC, median))
-    classes
-    par(mar = c(6, 10, 4, 1))
-    boxplot(logFC ~ classes,
-        data = results, outline = FALSE, horizontal = TRUE,
-        las = 2, xlab = "log2(Fold Change)", ylab = "", cex.axis = 0.7, main = paste0(allcontrasts, ", by Class")
-    )
-    abline(v = 0)
-    # Plot the repeat types
-    types <- with(results, reorder(type, -logFC, median))
-    boxplot(logFC ~ types,
-        data = results, outline = FALSE, horizontal = TRUE,
-        las = 2, xlab = "log2(Fold Change)", ylab = "", cex.axis = 0.7, main = paste0(allcontrasts, ", by Type")
-    )
-    abline(v = 0)
-    # volcano plot
-    TEdata <- cbind(rownames(results), as.data.frame(results), score = -log(results$FDR, 10))
-    colnames(TEdata) <- c("Tag", "log2FC", "FDR", "Class", "Type", "score")
-    color <- ifelse(TEdata$FDR < 0.05, "red", "black")
-    s <- subset(TEdata, FDR < 0.01)
-    with(TEdata, plot(log2FC, score, pch = 20, col = color, main = "Volcano plot (all tag types)", ylab = "-log10(FDR)"))
-    text(s[, 2], s[, 6], labels = s[, 5], pos = seq(from = 1, to = 3), cex = 0.5)
-}
-
-# close the plot device
-if (!is.null(opt$plots)) {
-    cat("closing plot device\n")
-    dev.off()
-}
-
-# Save the results
-results <- cbind(TE_item = rownames(results), results)
-colnames(results) <- c("TE_item", "log2FC", "FDR", "Class", "Type")
-results$log2FC <- format(results$log2FC, digits = 5)
-results$FDR <- format(results$FDR, digits = 5)
-write.table(results, opt$outfile, quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/edgeR_repenrich2.R	Sat Apr 20 14:46:12 2024 +0000
@@ -0,0 +1,176 @@
+# setup R error handling to go to stderr
+options(show.error.messages = FALSE, error = function() {
+    cat(geterrmessage(), file = stderr())
+    q("no", 1, FALSE)
+})
+
+# To not crash galaxy with an UTF8 error with not-US LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# load libraries
+library("getopt")
+library("tools")
+library("rjson")
+suppressPackageStartupMessages({
+    library("edgeR")
+    library("limma")
+})
+
+options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
+
+# get options, using the spec as defined by the enclosed list.
+spec <- matrix(
+    c(
+        "quiet", "q", 0, "logical",
+        "outfile", "o", 1, "character",
+        "countsfile", "n", 1, "character",
+        "factorName", "N", 1, "character",
+        "levelNameA", "A", 1, "character",
+        "levelNameB", "B", 1, "character",
+        "levelAfiles", "a", 1, "character",
+        "levelBfiles", "b", 1, "character",
+        "plots", "p", 1, "character"
+    ),
+    byrow = TRUE, ncol = 4
+)
+opt <- getopt(spec)
+
+# build levels A and B file lists
+filesA <- fromJSON(opt$levelAfiles, method = "C", unexpected.escape = "error")
+filesB <- fromJSON(opt$levelBfiles, method = "C", unexpected.escape = "error")
+listA <- list()
+indice <- 0
+listA[["level"]] <- opt$levelNameA
+for (file in filesA) {
+    indice <- indice + 1
+    listA[[paste0(opt$levelNameA, "_", indice)]] <- read.delim(file, header = FALSE)
+}
+listB <- list()
+indice <- 0
+listB[["level"]] <- opt$levelNameB
+for (file in filesB) {
+    indice <- indice + 1
+    listB[[paste0(opt$levelNameB, "_", indice)]] <- read.delim(file, header = FALSE)
+}
+
+# build a counts table
+counts <- data.frame(row.names = listA[[2]][, 1])
+for (element in names(listA[-1])) {
+    counts <- cbind(counts, listA[[element]][, 4])
+}
+for (element in names(listB[-1])) {
+    counts <- cbind(counts, listB[[element]][, 4])
+}
+colnames(counts) <- c(names(listA[-1]), names(listB[-1]))
+sizes <- colSums(counts)
+
+# build a meta data object
+meta <- data.frame(
+    row.names = colnames(counts),
+    condition = c(rep(opt$levelNameA, length(filesA)), rep(opt$levelNameB, length(filesB))),
+    libsize = sizes
+)
+
+
+# Define the library size and conditions for the GLM
+libsize <- meta$libsize
+condition <- factor(meta$condition)
+design <- model.matrix(~ 0 + condition)
+colnames(design) <- levels(condition)
+
+# Build a DGE object for the GLM
+y <- DGEList(counts = counts, lib.size = libsize)
+
+# Normalize the data
+y <- calcNormFactors(y)
+
+# Estimate the variance
+y <- estimateGLMCommonDisp(y, design)
+y <- estimateGLMTrendedDisp(y, design)
+y <- estimateGLMTagwiseDisp(y, design)
+
+# Builds and outputs an object to contain the normalized read abundance in counts per million of reads
+cpm <- cpm(y, log = FALSE, lib.size = libsize)
+cpm <- as.data.frame(cpm)
+colnames(cpm) <- colnames(counts)
+if (!is.null(opt$countsfile)) {
+    normalizedAbundance <- data.frame(Tag = rownames(cpm))
+    normalizedAbundance <- cbind(normalizedAbundance, cpm)
+    write.table(normalizedAbundance, file = opt$countsfile, sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE)
+}
+
+# Conduct fitting of the GLM
+yfit <- glmFit(y, design)
+
+# Initialize result matrices to contain the results of the GLM
+results <- matrix(nrow = dim(counts)[1], ncol = 0)
+logfc <- matrix(nrow = dim(counts)[1], ncol = 0)
+
+# Make the comparisons for the GLM
+my.contrasts <- makeContrasts(
+    paste0(opt$levelNameA, "_", opt$levelNameB, " = ", opt$levelNameA, " - ", opt$levelNameB),
+    levels = design
+)
+
+# Define the contrasts used in the comparisons
+allcontrasts <- paste0(opt$levelNameA, " vs ", opt$levelNameB)
+
+# Conduct a for loop that will do the fitting of the GLM for each comparison
+# Put the results into the results objects
+lrt <- glmLRT(yfit, contrast = my.contrasts[, 1])
+res <- topTags(lrt, n = dim(c)[1], sort.by = "none")$table
+results <- cbind(results, res[, c(1, 5)])
+logfc <- cbind(logfc, res[c(1)])
+
+# Add the repeat types back into the results.
+# We should still have the same order as the input data
+results$class <- listA[[2]][, 2]
+results$type <- listA[[2]][, 3]
+# Sort the results table by the FDR
+results <- results[with(results, order(FDR)), ]
+
+# Plot Fold Changes for repeat classes and types
+
+# open the device and plots
+if (!is.null(opt$plots)) {
+    pdf(opt$plots)
+    plotMDS(y, main = "Multidimensional Scaling Plot Of Distances Between Samples")
+    plotBCV(y, xlab = "Gene abundance (Average log CPM)", main = "Biological Coefficient of Variation Plot")
+    logFC <- results[, "logFC"]
+    # Plot the repeat classes
+    classes <- with(results, reorder(class, -logFC, median))
+    classes
+    par(mar = c(6, 10, 4, 1))
+    boxplot(logFC ~ classes,
+        data = results, outline = FALSE, horizontal = TRUE,
+        las = 2, xlab = "log2(Fold Change)", ylab = "", cex.axis = 0.7, main = paste0(allcontrasts, ", by Class")
+    )
+    abline(v = 0)
+    # Plot the repeat types
+    types <- with(results, reorder(type, -logFC, median))
+    boxplot(logFC ~ types,
+        data = results, outline = FALSE, horizontal = TRUE,
+        las = 2, xlab = "log2(Fold Change)", ylab = "", cex.axis = 0.7, main = paste0(allcontrasts, ", by Type")
+    )
+    abline(v = 0)
+    # volcano plot
+    TEdata <- cbind(rownames(results), as.data.frame(results), score = -log(results$FDR, 10))
+    colnames(TEdata) <- c("Tag", "log2FC", "FDR", "Class", "Type", "score")
+    color <- ifelse(TEdata$FDR < 0.05, "red", "black")
+    s <- subset(TEdata, FDR < 0.01)
+    with(TEdata, plot(log2FC, score, pch = 20, col = color, main = "Volcano plot (all tag types)", ylab = "-log10(FDR)"))
+    text(s[, 2], s[, 6], labels = s[, 5], pos = seq(from = 1, to = 3), cex = 0.5)
+}
+
+# close the plot device
+if (!is.null(opt$plots)) {
+    cat("closing plot device\n")
+    dev.off()
+}
+
+# Save the results
+results <- cbind(TE_item = rownames(results), results)
+colnames(results) <- c("TE_item", "log2FC", "FDR", "Class", "Type")
+results$log2FC <- format(results$log2FC, digits = 5)
+results$FDR <- format(results$FDR, digits = 5)
+write.table(results, opt$outfile, quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
--- a/edger-repenrich.xml	Sat Apr 20 11:56:53 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,199 +0,0 @@
-<tool id="edger-repenrich2" name="edgeR-repenrich" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
-    <description>Determines differentially expressed features from RepEnrich2 counts</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="edgeR_requirements"/>
-    <stdio>
-        <regex match="Execution halted"
-           source="both"
-           level="fatal"
-           description="Execution halted." />
-        <regex match="Error in"
-           source="both"
-           level="fatal"
-           description="An undefined error occurred, please check your input carefully and contact your administrator." />
-        <regex match="Fatal error"
-           source="both"
-           level="fatal"
-           description="An undefined error occurred, please check your input carefully and contact your administrator." />
-    </stdio>
-    <version_command>
-    <![CDATA[
-        echo $(R --version | grep version | grep -v GNU)", edgeR version" $(R --vanilla --slave -e "library(edgeR) &&
-        cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
-    ]]>
-    </version_command>
-    <command>
-    <![CDATA[
-        #import json
-        Rscript '${__tool_directory__}/edgeR_repenrich.R'
-            --factorName '$factorName'
-
-            --levelNameA '$factorLevel_A'
-            #set $factorlevelsA = list()
-            #for $file in $countsFiles_A:
-                $factorlevelsA.append(str($file))
-            #end for
-            $factorlevelsA.reverse()
-            --levelAfiles '#echo json.dumps(factorlevelsA)#'
-
-            --levelNameB '$factorLevel_B'
-            #set $factorlevelsB = list()
-            #for $file in $countsFiles_B:
-                $factorlevelsB.append(str($file))
-            #end for
-            $factorlevelsB.reverse()
-            --levelBfiles '#echo json.dumps(factorlevelsB)#'
-
-            -o 'edger_out'
-
-            -p '$plots'
-            #if $normCounts:
-                -n '$counts_out'
-            #end if
-            -o '$edger_out'
-    ]]>
-    </command>
-    <inputs>
-            <param name="factorName" type="text" value="FactorName"  label="Specify a factor name, e.g. genotype or age or drug_x"
-                help="Only letters, numbers and underscores will be retained in this field">
-                <sanitizer>
-                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
-                </sanitizer>
-            </param>
-            <param name="factorLevel_A" type="text" value="FactorLevel1" label="Specify a factor level, typical values could be 'mutant' or 'Drug_X'"
-                   help="Only letters, numbers and underscores will be retained in this field">
-                <sanitizer>
-                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
-                </sanitizer>
-            </param>
-            <param name="countsFiles_A" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich" />
-            <param name="factorLevel_B" type="text" value="FactorLevel2" label="Specify a factor level, typical values could be 'wildtype' or 'control'"
-                   help="Only letters, numbers and underscores will be retained in this field">
-                <sanitizer>
-                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
-                </sanitizer>
-            </param>
-            <param name="countsFiles_B" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich tool" />
-            <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
-            label="Output normalized counts table" />
-    </inputs>
-    <outputs>
-        <data format="tabular" name="edger_out" label="edgeR: ${factorLevel_A} compared to ${factorLevel_B}">
-            <actions>
-                <action name="column_names" type="metadata" default="Tag,log2(FC),FDR,Class,Type" />
-            </actions>
-        </data>
-        <data format="pdf" name="plots" label="edgeR plots" />
-        <data format="tabular" name="counts_out" label="Normalized counts file">
-            <filter>normCounts == True</filter>
-        </data>
-    </outputs>
-    <tests>
-        <test expect_num_outputs="3">
-            <param name="factorName" value="Genotype"/>
-            <param name="factorLevel_A" value="Mutant"/>
-            <param name="countsFiles_A" value="355_fraction_counts.tab,356_fraction_counts.tab"/>
-            <param name="factorLevel_B" value="Wildtype"/>
-            <param name="countsFiles_B" value="353_fraction_counts.tab,354_fraction_counts.tab"/>
-            <param name="normCounts" value="True"/>
-            <output name="counts_out" file="Normalized_counts_file.tab"/>
-            <output name="plots" file="edgeR_plots.pdf"/>
-            <output name="edger_out" file="edgeR_result_file.tab"/>
-
-        </test>
-    </tests>
-    <help>
-<![CDATA[
-.. class:: infomark
-
-**What it does**
-
-Estimate Distance between samples (MDS) and Biological Coefficient Variation (BCV) in count
-data from high-throughput sequencing assays and test for differential expression using edgeR_.
-
-**Inputs**
-
-edger-repenrich takes count tables generated by repenrich as inputs. A repenrich count table looks
-like:
-
-============== ========== ========== ==========
-LSU-rRNA_Dme    rRNA       rRNA       3659329
--------------- ---------- ---------- ----------
-FW3_DM          LINE       Jockey     831
--------------- ---------- ---------- ----------
-DMTOM1_LTR      LTR        Gypsy      1004
--------------- ---------- ---------- ----------
-R1_DM           LINE       R1         7343
--------------- ---------- ---------- ----------
-TAHRE           LINE       Jockey     4560
--------------- ---------- ---------- ----------
-G4_DM           LINE       Jockey     3668
--------------- ---------- ---------- ----------
-BS              LINE       Jockey     7296
--------------- ---------- ---------- ----------
-Stalker2_I-int  LTR        Gypsy      12252
--------------- ---------- ---------- ----------
-Stalker3_LTR    LTR        Gypsy      593
--------------- ---------- ---------- ----------
-TABOR_I-int     LTR        Gypsy      3947
--------------- ---------- ---------- ----------
-G7_DM           LINE       Jockey     162
--------------- ---------- ---------- ----------
-BEL_I-int       LTR        Pao        23757
--------------- ---------- ---------- ----------
-Gypsy6_I-int    LTR        Gypsy      7489
-============== ========== ========== ==========
-
-Count tables must be generated for each sample individually. Here, edgeR_ is handling a
-single factor (genotype, age, treatment, etc) that effect your experiment. This factor has
-two levels/states (for instance, "wild-type" and "mutant". You need to select appropriate
-count table from your history for each factor level.
-
-The following table gives some examples of factors and their levels:
-
-========= ============== ===============
-Factor    Factorlevel1   Factorlevel2
---------- -------------- ---------------
-Treatment Treated        Untreated
---------- -------------- ---------------
-Genotype  Knockdown      Wildtype
---------- -------------- ---------------
-TimePoint Day4           Day1
---------- -------------- ---------------
-Gender    Female         Male
-========= ============== ===============
-
-*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2.
-Here the order of factor levels is important. For example, for the factor 'Treatment' given
-in above table, edgeR computes fold changes of 'Treated' samples against 'Untreated',
-i.e. the values correspond to up or down regulations of genes in Treated samples.
-
-**Output**
-
-edgeR_ generates a tabular file containing the different columns and results visualized in
-a PDF:
-
-====== =============================================================================
-Column Description
------- -----------------------------------------------------------------------------
-     1 Tag (transposon element ID)
-     2 the logarithm (to basis 2) of the fold change (See the note in inputs section)
-     3 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
-       which controls false discovery rate (FDR)
-     4 Class the transposon belongs to
-     5 Type the transposon belongs to
-====== =============================================================================
-
-.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
-]]>
-
-**Note**: This edgeR_ wrapper was adapted from code available at
-https://github.com/nskvir/RepEnrich
-
-    </help>
-    <citations>
-        <citation type="doi">10.1093/bioinformatics/btp616</citation>
-    </citations>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/edger-repenrich2.xml	Sat Apr 20 14:46:12 2024 +0000
@@ -0,0 +1,199 @@
+<tool id="edger-repenrich2" name="edgeR-repenrich2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Determines differentially expressed features from RepEnrich2 counts</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edgeR_requirements"/>
+    <stdio>
+        <regex match="Execution halted"
+           source="both"
+           level="fatal"
+           description="Execution halted." />
+        <regex match="Error in"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+        <regex match="Fatal error"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+    </stdio>
+    <version_command>
+    <![CDATA[
+        echo $(R --version | grep version | grep -v GNU)", edgeR version" $(R --vanilla --slave -e "library(edgeR) &&
+        cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+    ]]>
+    </version_command>
+    <command>
+    <![CDATA[
+        #import json
+        Rscript '${__tool_directory__}/edgeR_repenrich2.R'
+            --factorName '$factorName'
+
+            --levelNameA '$factorLevel_A'
+            #set $factorlevelsA = list()
+            #for $file in $countsFiles_A:
+                $factorlevelsA.append(str($file))
+            #end for
+            $factorlevelsA.reverse()
+            --levelAfiles '#echo json.dumps(factorlevelsA)#'
+
+            --levelNameB '$factorLevel_B'
+            #set $factorlevelsB = list()
+            #for $file in $countsFiles_B:
+                $factorlevelsB.append(str($file))
+            #end for
+            $factorlevelsB.reverse()
+            --levelBfiles '#echo json.dumps(factorlevelsB)#'
+
+            -o 'edger_out'
+
+            -p '$plots'
+            #if $normCounts:
+                -n '$counts_out'
+            #end if
+            -o '$edger_out'
+    ]]>
+    </command>
+    <inputs>
+            <param name="factorName" type="text" value="FactorName"  label="Specify a factor name, e.g. genotype or age or drug_x"
+                help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="factorLevel_A" type="text" value="FactorLevel1" label="Specify a factor level, typical values could be 'mutant' or 'Drug_X'"
+                   help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="countsFiles_A" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich" />
+            <param name="factorLevel_B" type="text" value="FactorLevel2" label="Specify a factor level, typical values could be 'wildtype' or 'control'"
+                   help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <param name="countsFiles_B" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich tool" />
+            <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
+            label="Output normalized counts table" />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="edger_out" label="edgeR: ${factorLevel_A} compared to ${factorLevel_B}">
+            <actions>
+                <action name="column_names" type="metadata" default="Tag,log2(FC),FDR,Class,Type" />
+            </actions>
+        </data>
+        <data format="pdf" name="plots" label="edgeR plots" />
+        <data format="tabular" name="counts_out" label="Normalized counts file">
+            <filter>normCounts == True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="factorName" value="Genotype"/>
+            <param name="factorLevel_A" value="Mutant"/>
+            <param name="countsFiles_A" value="355_fraction_counts.tab,356_fraction_counts.tab"/>
+            <param name="factorLevel_B" value="Wildtype"/>
+            <param name="countsFiles_B" value="353_fraction_counts.tab,354_fraction_counts.tab"/>
+            <param name="normCounts" value="True"/>
+            <output name="counts_out" file="Normalized_counts_file.tab"/>
+            <output name="plots" file="edgeR_plots.pdf"/>
+            <output name="edger_out" file="edgeR_result_file.tab"/>
+
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+.. class:: infomark
+
+**What it does**
+
+Estimate Distance between samples (MDS) and Biological Coefficient Variation (BCV) in count
+data from high-throughput sequencing assays and test for differential expression using edgeR_.
+
+**Inputs**
+
+edger-repenrich takes count tables generated by repenrich as inputs. A repenrich count table looks
+like:
+
+============== ========== ========== ==========
+LSU-rRNA_Dme    rRNA       rRNA       3659329
+-------------- ---------- ---------- ----------
+FW3_DM          LINE       Jockey     831
+-------------- ---------- ---------- ----------
+DMTOM1_LTR      LTR        Gypsy      1004
+-------------- ---------- ---------- ----------
+R1_DM           LINE       R1         7343
+-------------- ---------- ---------- ----------
+TAHRE           LINE       Jockey     4560
+-------------- ---------- ---------- ----------
+G4_DM           LINE       Jockey     3668
+-------------- ---------- ---------- ----------
+BS              LINE       Jockey     7296
+-------------- ---------- ---------- ----------
+Stalker2_I-int  LTR        Gypsy      12252
+-------------- ---------- ---------- ----------
+Stalker3_LTR    LTR        Gypsy      593
+-------------- ---------- ---------- ----------
+TABOR_I-int     LTR        Gypsy      3947
+-------------- ---------- ---------- ----------
+G7_DM           LINE       Jockey     162
+-------------- ---------- ---------- ----------
+BEL_I-int       LTR        Pao        23757
+-------------- ---------- ---------- ----------
+Gypsy6_I-int    LTR        Gypsy      7489
+============== ========== ========== ==========
+
+Count tables must be generated for each sample individually. Here, edgeR_ is handling a
+single factor (genotype, age, treatment, etc) that effect your experiment. This factor has
+two levels/states (for instance, "wild-type" and "mutant". You need to select appropriate
+count table from your history for each factor level.
+
+The following table gives some examples of factors and their levels:
+
+========= ============== ===============
+Factor    Factorlevel1   Factorlevel2
+--------- -------------- ---------------
+Treatment Treated        Untreated
+--------- -------------- ---------------
+Genotype  Knockdown      Wildtype
+--------- -------------- ---------------
+TimePoint Day4           Day1
+--------- -------------- ---------------
+Gender    Female         Male
+========= ============== ===============
+
+*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2.
+Here the order of factor levels is important. For example, for the factor 'Treatment' given
+in above table, edgeR computes fold changes of 'Treated' samples against 'Untreated',
+i.e. the values correspond to up or down regulations of genes in Treated samples.
+
+**Output**
+
+edgeR_ generates a tabular file containing the different columns and results visualized in
+a PDF:
+
+====== =============================================================================
+Column Description
+------ -----------------------------------------------------------------------------
+     1 Tag (transposon element ID)
+     2 the logarithm (to basis 2) of the fold change (See the note in inputs section)
+     3 p value adjusted for multiple testing with the Benjamini-Hochberg procedure
+       which controls false discovery rate (FDR)
+     4 Class the transposon belongs to
+     5 Type the transposon belongs to
+====== =============================================================================
+
+.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
+]]>
+
+**Note**: This edgeR_ wrapper was adapted from code available at
+https://github.com/nskvir/RepEnrich
+
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btp616</citation>
+    </citations>
+</tool>
--- a/macros.xml	Sat Apr 20 11:56:53 2024 +0000
+++ b/macros.xml	Sat Apr 20 14:46:12 2024 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">2.31.1</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <token name="@PROFILE@">23.0</token>
 
     <xml name="repenrich_requirements">
--- a/repenrich2.xml	Sat Apr 20 11:56:53 2024 +0000
+++ b/repenrich2.xml	Sat Apr 20 14:46:12 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="repenrich2" name="RepEnrich" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+<tool id="repenrich2" name="RepEnrich2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>Repeat Element Profiling</description>
     <macros>
         <import>macros.xml</import>