Galaxy |

Changeset 0:9bdff28ae1b1 (2017-11-07)

Next changeset 1:2a16413ec60d (2018-01-30)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit eac022c9c6e51e661c1513306b9fefdad673487d

added:
edger.R
edger.xml
test-data/Mut1.counts
test-data/Mut2.counts
test-data/Mut3.counts
test-data/WT1.counts
test-data/WT2.counts
test-data/WT3.counts
test-data/anno.txt
test-data/edgeR_Mut-WT.tsv
test-data/edgeR_Mut-WT_2fact.tsv
test-data/edgeR_Mut-WT_2fact_anno.tsv
test-data/edgeR_Mut-WT_anno.tsv
test-data/edgeR_Mut-WT_filt.tsv
test-data/edgeR_WT-Mut.tsv
test-data/edgeR_WT-Mut_2fact_anno.tsv
test-data/edgeR_normcounts.tsv
test-data/edgeR_normcounts_anno.tsv
test-data/factorinfo.txt
test-data/matrix.txt

diff -r 000000000000 -r 9bdff28ae1b1 edger.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/edger.R Tue Nov 07 08:18:14 2017 -0500

[

b'@@ -0,0 +1,718 @@\n+# This tool takes in a matrix of feature counts as well as gene annotations and\n+# outputs a table of top expressions as well as various plots for differential\n+# expression analysis\n+#\n+# ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs\n+# outPath", "o", 1, "character" -Path to folder to write all output to\n+# filesPath", "j", 2, "character" -JSON list object if multiple files input\n+# matrixPath", "m", 2, "character" -Path to count matrix\n+# factFile", "f", 2, "character" -Path to factor information file\n+# factInput", "i", 2, "character" -String containing factors if manually input \n+# annoPath", "a", 2, "character" -Path to input containing gene annotations\n+# contrastData", "C", 1, "character" -String containing contrasts of interest\n+# cpmReq", "c", 2, "double" -Float specifying cpm requirement\n+# cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement\n+# sampleReq", "s", 2, "integer" -Integer specifying cpm requirement\n+# normCounts", "x", 0, "logical" -String specifying if normalised counts should be output \n+# rdaOpt", "r", 0, "logical" -String specifying if RData should be output\n+# lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement \n+# pValReq", "p", 1, "double" -Float specifying the p-value requirement\n+# pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method \n+# normOpt", "n", 1, "character" -String specifying type of normalisation used \n+# robOpt", "b", 0, "logical" -String specifying if robust options should be used \n+# lrtOpt", "t", 0, "logical" -String specifying whether to perform LRT test instead \n+#\n+# OUT: \n+# MDS Plot \n+# BCV Plot\n+# QL Plot\n+# MD Plot\n+# Expression Table\n+# HTML file linking to the ouputs\n+# Optional:\n+# Normalised counts Table\n+# RData file\n+#\n+# Author: Shian Su - registertonysu@gmail.com - Jan 2014\n+# Modified by: Maria Doyle - Oct 2017 (some code taken from the DESeq2 wrapper)\n+\n+# Record starting time\n+timeStart <- as.character(Sys.time())\n+\n+# setup R error handling to go to stderr\n+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+\n+# we need that to not crash galaxy with an UTF8 error on German LC settings.\n+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n+\n+# Load all required libraries\n+library(methods, quietly=TRUE, warn.conflicts=FALSE)\n+library(statmod, quietly=TRUE, warn.conflicts=FALSE)\n+library(splines, quietly=TRUE, warn.conflicts=FALSE)\n+library(edgeR, quietly=TRUE, warn.conflicts=FALSE)\n+library(limma, quietly=TRUE, warn.conflicts=FALSE)\n+library(scales, quietly=TRUE, warn.conflicts=FALSE)\n+library(getopt, quietly=TRUE, warn.conflicts=FALSE)\n+\n+################################################################################\n+### Function Delcaration\n+################################################################################\n+# Function to sanitise contrast equations so there are no whitespaces\n+# surrounding the arithmetic operators, leading or trailing whitespace\n+sanitiseEquation <- function(equation) {\n+ equation <- gsub(" *[+] *", "+", equation)\n+ equation <- gsub(" *[-] *", "-", equation)\n+ equation <- gsub(" *[/] *", "/", equation)\n+ equation <- gsub(" *[*] *", "*", equation)\n+ equation <- gsub("^\\\\s+|\\\\s+$", "", equation)\n+ return(equation)\n+}\n+\n+# Function to sanitise group information\n+sanitiseGroups <- function(string) {\n+ string <- gsub(" *[,] *", ",", string)\n+ string <- gsub("^\\\\s+|\\\\s+$", "", string)\n+ return(string)\n+}\n+\n+# Function to change periods to whitespace in a string\n+unmake.names <- function(string) {\n+ string <- gsub(".", " ", string, fixed=TRUE)'..b'ck floppy disc icon associated history item to download ")\n+cata("all files.</p>\\n")\n+cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\\n")\n+\n+cata("<h4>Additional Information</h4>\\n")\n+cata("<ul>\\n")\n+\n+if (filtCPM || filtSmpCount || filtTotCount) {\n+ if (filtCPM) {\n+ tempStr <- paste("Genes without more than", opt$cmpReq,\n+ "CPM in at least", opt$sampleReq, "samples are insignificant",\n+ "and filtered out.")\n+ } else if (filtSmpCount) {\n+ tempStr <- paste("Genes without more than", opt$cntReq,\n+ "counts in at least", opt$sampleReq, "samples are insignificant",\n+ "and filtered out.")\n+ } else if (filtTotCount) {\n+ tempStr <- paste("Genes without more than", opt$cntReq,\n+ "counts, after summing counts for all samples, are insignificant",\n+ "and filtered out.")\n+ }\n+\n+ ListItem(tempStr)\n+ filterProp <- round(filteredCount/preFilterCount*100, digits=2)\n+ tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,\n+ "%) genes were filtered out for low expression.")\n+ ListItem(tempStr)\n+}\n+ListItem(opt$normOpt, " was the method used to normalise library sizes.")\n+if (wantLRT) {\n+ ListItem("The edgeR likelihood ratio test was used.")\n+} else {\n+ if (wantRobust) {\n+ ListItem("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")\n+ } else {\n+ ListItem("The edgeR quasi-likelihood test was used.")\n+ }\n+}\n+if (opt$pAdjOpt!="none") {\n+ if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {\n+ tempStr <- paste0("MD-Plot highlighted genes are significant at FDR ",\n+ "of ", opt$pValReq," and exhibit log2-fold-change of at ", \n+ "least ", opt$lfcReq, ".")\n+ ListItem(tempStr)\n+ } else if (opt$pAdjOpt=="holm") {\n+ tempStr <- paste0("MD-Plot highlighted genes are significant at adjusted ",\n+ "p-value of ", opt$pValReq," by the Holm(1979) ",\n+ "method, and exhibit log2-fold-change of at least ", \n+ opt$lfcReq, ".")\n+ ListItem(tempStr)\n+ }\n+} else {\n+ tempStr <- paste0("MD-Plot highlighted genes are significant at p-value ",\n+ "of ", opt$pValReq," and exhibit log2-fold-change of at ", \n+ "least ", opt$lfcReq, ".")\n+ ListItem(tempStr)\n+}\n+cata("</ul>\\n")\n+\n+cata("<h4>Summary of experimental data:</h4>\\n")\n+\n+cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\\n")\n+\n+cata("<table border=\\"1\\" cellpadding=\\"3\\">\\n")\n+cata("<tr>\\n")\n+TableHeadItem("SampleID")\n+TableHeadItem(names(factors)[1], " (Primary Factor)")\n+\n+ if (ncol(factors) > 1) {\n+ for (i in names(factors)[2:length(names(factors))]) {\n+ TableHeadItem(i)\n+ }\n+ cata("</tr>\\n")\n+ }\n+\n+for (i in 1:nrow(factors)) {\n+ cata("<tr>\\n")\n+ TableHeadItem(row.names(factors)[i])\n+ for (j in 1:ncol(factors)) {\n+ TableItem(as.character(unmake.names(factors[i, j])))\n+ }\n+ cata("</tr>\\n")\n+}\n+cata("</table>")\n+\n+for (i in 1:nrow(linkData)) {\n+ if (grepl("session_info", linkData$Link[i])) {\n+ HtmlLink(linkData$Link[i], linkData$Label[i])\n+ }\n+}\n+\n+cata("<table border=\\"0\\">\\n")\n+cata("<tr>\\n")\n+TableItem("Task started at:"); TableItem(timeStart)\n+cata("</tr>\\n")\n+cata("<tr>\\n")\n+TableItem("Task ended at:"); TableItem(timeEnd)\n+cata("</tr>\\n")\n+cata("<tr>\\n")\n+TableItem("Task run time:"); TableItem(timeTaken)\n+cata("<tr>\\n")\n+cata("</table>\\n")\n+\n+cata("</body>\\n")\n+cata("</html>")\n'

diff -r 000000000000 -r 9bdff28ae1b1 edger.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/edger.xml Tue Nov 07 08:18:14 2017 -0500

[

b'@@ -0,0 +1,733 @@\n+<tool id="edger" name="edgeR" version="3.16.5">\n+ <description>\n+ Perform differential expression of count data\n+ </description>\n+\n+ <requirements>\n+ <requirement type="package" version="3.16.5">bioconductor-edger</requirement>\n+ <requirement type="package" version="0.2.15">r-rjson</requirement>\n+ <requirement type="package" version="1.20.0">r-getopt</requirement>\n+ \n+ <requirement type="package" version="0.4.1">r-scales</requirement>\n+ \n+ <requirement type="package" version="1.4.30">r-statmod</requirement>\n+ </requirements>\n+\n+ <version_command><![CDATA[\n+ echo $(R --version | grep version | grep -v GNU)", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\\$otherPkgs\\$edgeR\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scales version" $(R --vanilla --slave -e "library(scales); cat(sessionInfo()\\$otherPkgs\\$scales\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\\$otherPkgs\\$rjson\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\\$otherPkgs\\$getopt\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")\n+ ]]></version_command>\n+\n+ <command detect_errors="exit_code"><![CDATA[\n+#import json\n+Rscript \'$__tool_directory__/edger.R\'\n+\n+-R \'$outReport\'\n+-o \'$outReport.files_path\'\n+\n+#if $input.format=="files":\n+\n+ ## Adapted from DESeq2 wrapper\n+ #set $temp_factor_names = list()\n+ #for $fact in $input.rep_factor:\n+ #set $temp_factor = list()\n+ #for $g in $fact.rep_group:\n+ #set $count_files = list()\n+ #for $file in $g.countsFile:\n+ $count_files.append(str($file))\n+ #end for\n+ $temp_factor.append( {str($g.groupName): $count_files} )\n+ #end for\n+\n+ $temp_factor.reverse()\n+ $temp_factor_names.append([str($fact.factorName), $temp_factor])\n+ #end for\n+ -j \'#echo json.dumps(temp_factor_names)#\'\n+\n+#elif $input.format=="matrix":\n+ -m \'$input.counts\'\n+ #if $input.fact.ffile==\'yes\':\n+ -f \'$input.fact.finfo\'\n+ #else:\n+ -i \'${ \'|\'.join( [\'%s::%s\' % ($x.factorName, $x.groupNames) for x in $input.fact.rep_factor] ) }\'\n+ #end if\n+#end if\n+\n+#if $anno.annoOpt==\'yes\':\n+ -a \'$anno.geneanno\'\n+#end if\n+\n+-C \'${ \',\'.join( [\'%s\' % $x.contrast for x in $rep_contrast] ) }\'\n+\n+#if $f.filt.filt_select == \'yes\':\n+ #if $f.filt.cformat.format_select == \'cpm\':\n+ -c \'$f.filt.cformat.cpmReq\'\n+ -s \'$f.filt.cformat.cpmSampleReq\'\n+ #elif $f.filt.cformat.format_select == \'counts\':\n+ -z \'$f.filt.cformat.cntReq\'\n+ #if $f.filt.cformat.samples.count_select == \'total\':\n+ -y\n+ #elif $f.filt.cformat.samples.count_select == \'sample\':\n+ -s \'$f.filt.cformat.samples.cntSampleReq\'\n+ #end if\n+ #end if\n+#end if\n+\n+#if $out.normCounts:\n+ -x\n+#end if\n+#if $out.rdaOption:\n+ -r\n+#end if\n+\n+-l \'$adv.lfc\'\n+-p \'$adv.pVal\'\n+-d \'$adv.pAdjust\'\n+-n \'$adv.normalisationOption\'\n+#if $adv.robOption:\n+ -b\n+#end if\n+#if $adv.lrtOption:\n+ -t\n+#end if\n+\n+&&\n+mkdir ./output_dir\n+\n+&&\n+cp \'$outReport.files_path\'/*.tsv output_dir/\n+ ]]></command>\n+\n+ <inputs>\n+\n+ \n+ <conditional name="input">\n+ <param name="format" type="select" label="Count Files or Matrix?"\n+ help="You can choose to input either separate count files (one per sample) or a single count matrix">\n+ <option value="files">Separate Count Files</option>\n+ <option value="matrix">Single Count Matrix</option>\n+ </param>\n+\n+ <when value="files">\n+ <repeat name="rep_factor" title="Factor" m'..b' * **Adjusted Threshold:**\n+ Set the threshold for the resulting value of the multiple testing control\n+ method. Only observations whose statistic falls below this value is\n+ considered significant, thus highlighted in the MD plot.\n+\n+ * **P-Value Adjustment Method:**\n+ Change the multiple testing control method, the options are BH(1995) and\n+ BY(2001) which are both false discovery rate controls. There is also\n+ Holm(1979) which is a method for family-wise error rate control.\n+\n+**Normalisation Method:**\n+The most obvious technical factor that affects the read counts, other than gene expression\n+levels, is the sequencing depth of each RNA sample. edgeR adjusts any differential expression\n+analysis for varying sequencing depths as represented by differing library sizes. This is\n+part of the basic modeling procedure and flows automatically into fold-change or p-value\n+calculations. It is always present, and doesn\xe2\x80\x99t require any user intervention.\n+The second most important technical influence on differential expression is one that is less\n+obvious. RNA-seq provides a measure of the relative abundance of each gene in each RNA\n+sample, but does not provide any measure of the total RNA output on a per-cell basis.\n+This commonly becomes important when a small number of genes are very highly expressed\n+in one sample, but not in another. The highly expressed genes can consume a substantial\n+proportion of the total library size, causing the remaining genes to be under-sampled in that\n+sample. Unless this RNA composition effect is adjusted for, the remaining genes may falsely\n+appear to be down-regulated in that sample . The edgeR `calcNormFactors` function normalizes for RNA composition by finding a set of scaling factors for the library sizes that minimize the log-fold changes between the samples for most genes. The default method for computing these scale factors uses a trimmed mean of M values (TMM) between each pair of samples. We call the product of the original library size and the scaling factor the *effective library size*. The effective library size replaces the original library size in all downsteam analyses. TMM is the recommended method for most RNA-Seq data where the majority (more than half) of the genes are believed not differentially expressed between any pair of the samples. You can change the normalisation method under **Advanced Options** above. For more information, see the `calcNormFactors` section in the `edgeR User\'s Guide`_.\n+\n+**Robust Settings**\n+Option to use robust settings. Using robust settings (robust=TRUE) with the edgeR estimateDisp and glmQLFit functions is usually recommended to protect against outlier genes. This is turned on by default. Note that it is only used with the quasi-likelihood F test method. For more information, see the `edgeR workflow article`_.\n+\n+**Test Method**\n+Option to use the likelihood ratio test instead of the quasi-likelihood F test. For more information, see the `edgeR workflow article`_.\n+\n+.. _edgeR User\'s Guide: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html\n+\n+-----\n+\n+**Outputs**\n+\n+This tool outputs\n+\n+ * a table of differentially expressed genes for each contrast of interest\n+ * a HTML report with plots and additional information\n+\n+Optionally, under **Output Options** you can choose to output\n+\n+ * a normalised counts table\n+ * an RData file\n+\n+-----\n+\n+**Citations**\n+\n+Please try to cite the appropriate articles when you publish results obtained using software, as such citation is the main means by which the authors receive credit for their work. For the edgeR method itself, please cite Robinson et al., 2010, and for this tool (which was developed from the Galaxy limma-voom tool) please cite Liu et al., 2015.\n+\n+ ]]></help>\n+ <citations>\n+ <citation type="doi">10.1093/bioinformatics/btp616</citation>\n+ <citation type="doi">10.1093/nar/gkv412</citation>\n+ </citations>\n+</tool>\n'

diff -r 000000000000 -r 9bdff28ae1b1 test-data/Mut1.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Mut1.counts Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+GeneID Mut1
+11287 1463
+11298 1345
+11302 5
+11303 1574
+11304 361
+11305 1762

diff -r 000000000000 -r 9bdff28ae1b1 test-data/Mut2.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Mut2.counts Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+GeneID Mut2
+11287 1441
+11298 1291
+11302 6
+11303 1519
+11304 397
+11305 1942

diff -r 000000000000 -r 9bdff28ae1b1 test-data/Mut3.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Mut3.counts Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+GeneID Mut3
+11287 1495
+11298 1346
+11302 5
+11303 1654
+11304 346
+11305 2027

diff -r 000000000000 -r 9bdff28ae1b1 test-data/WT1.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/WT1.counts Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+GeneID WT1
+11287 1699
+11298 1905
+11302 6
+11303 2099
+11304 356
+11305 2528

diff -r 000000000000 -r 9bdff28ae1b1 test-data/WT2.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/WT2.counts Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+GeneID WT2
+11287 1528
+11298 1744
+11302 8
+11303 1974
+11304 312
+11305 2438

diff -r 000000000000 -r 9bdff28ae1b1 test-data/WT3.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/WT3.counts Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+GeneID WT3
+11287 1601
+11298 1834
+11302 7
+11303 2100
+11304 337
+11305 2493

diff -r 000000000000 -r 9bdff28ae1b1 test-data/anno.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/anno.txt Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+EntrezID Symbol GeneName Chr Length
+11287 Pzp pregnancy zone protein 6 4681
+11298 Aanat arylalkylamine N-acetyltransferase 11 1455
+11302 Aatk apoptosis-associated tyrosine kinase 11 5743
+11303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 4 10260
+11304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 3 7248
+11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 2 8061
\ No newline at end of file

diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_Mut-WT.tsv Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+"GeneID" "logFC" "logCPM" "F" "PValue" "FDR"
+"11304" 0.458203001410391 15.530162861746 32.6285109553746 6.943370724917e-06 4.1660224349502e-05
+"11287" 0.188840644104212 17.6536729774735 20.5671667733158 0.000135453949597801 0.000406361848793403
+"11298" -0.138359578382475 17.6815280107154 10.8470695851279 0.00306012801564425 0.00612025603128849
+"11303" -0.0561156581317604 17.8897677663033 1.50815092591008 0.231329593888878 0.346994390833318
+"11305" -0.0579340818829784 18.1615839598046 1.09689306676368 0.305382540289637 0.366459048347564
+"11302" -0.0682406105165454 10.0898264751075 0.137130529665157 0.884266488139469 0.884266488139469

diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT_2fact.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_Mut-WT_2fact.tsv Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+"GeneID" "logFC" "logCPM" "F" "PValue" "FDR"
+"11287" 0.189281291475186 17.6499778192954 198.646314971919 7.90598427634257e-09 4.74359056580554e-08
+"11298" -0.13798041694802 17.6843133699537 96.2224552671758 4.15830411749776e-06 1.24749123524933e-05
+"11304" 0.458490715244216 15.526484673111 14.5864146735617 0.00244295799161999 0.00488591598323999
+"11303" -0.0560600217169691 17.8909334307093 6.5300693781724 0.0442859767053646 0.0664289650580469
+"11305" -0.0585095825423414 18.1629882429457 1.07140336604322 0.32103822810743 0.385245873728916
+"11302" -0.0716631320244627 10.0898336653124 0.376796260571098 0.878304702615846 0.878304702615846

diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT_2fact_anno.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_Mut-WT_2fact_anno.tsv Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "logCPM" "F" "PValue" "FDR"
+11287 "Pzp" "pregnancy zone protein" 6 4681 0.189281947498313 17.6499778192954 198.646315096405 7.90598424818912e-09 4.74359054891347e-08
+11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 -0.137980416947824 17.6843133699537 96.2224553233548 4.15830411749738e-06 1.24749123524921e-05
+11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 0.45849071524422 15.526484673111 14.5864146737822 0.00244295799149183 0.00488591598298366
+11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 -0.0560600215744048 17.8909334307093 6.53006938009001 0.0442859767053567 0.066428965058035
+11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 -0.0585095828508861 18.1629882429457 1.07140336564628 0.321038228193371 0.385245873832045
+11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 -0.0716631320197652 10.0898336653124 0.376796260576848 0.878304702615841 0.878304702615841

diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT_anno.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_Mut-WT_anno.tsv Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "logCPM" "F" "PValue" "FDR"
+11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 0.458203001410391 15.530162861746 32.6285109553746 6.943370724917e-06 4.1660224349502e-05
+11287 "Pzp" "pregnancy zone protein" 6 4681 0.188840644104212 17.6536729774735 20.5671667733158 0.000135453949597801 0.000406361848793403
+11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 -0.138359578382475 17.6815280107154 10.8470695851279 0.00306012801564425 0.00612025603128849
+11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 -0.0561156581317604 17.8897677663033 1.50815092591008 0.231329593888878 0.346994390833318
+11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 -0.0579340818829784 18.1615839598046 1.09689306676368 0.305382540289637 0.366459048347564
+11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 -0.0682406105165454 10.0898264751075 0.137130529665157 0.884266488139469 0.884266488139469

diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT_filt.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_Mut-WT_filt.tsv Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,6 @@
+"GeneID" "logFC" "logCPM" "F" "PValue" "FDR"
+"11287" 0.187201149217925 17.6526225386971 165.500659651998 5.18054239620105e-10 2.59027119810053e-09
+"11298" -0.140077523013286 17.6838446963123 82.0496288033128 2.92613742709898e-06 7.31534356774746e-06
+"11304" 0.456820345055957 15.5288695886958 25.2675517854784 6.46433259176098e-05 0.00010773887652935
+"11303" -0.0578468398229744 17.8912127135125 5.26103367901545 0.0384341523491632 0.048042690436454
+"11305" -0.0593023205976883 18.1634104549086 0.864302521617601 0.363623540536245 0.363623540536245

diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_WT-Mut.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_WT-Mut.tsv Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+"GeneID" "logFC" "logCPM" "F" "PValue" "FDR"
+"11304" -0.458203001410391 15.530162861746 32.6285109553746 6.943370724917e-06 4.1660224349502e-05
+"11287" -0.188840644104212 17.6536729774735 20.5671667733158 0.000135453949597801 0.000406361848793403
+"11298" 0.138359578382475 17.6815280107154 10.8470695851279 0.00306012801564425 0.00612025603128849
+"11303" 0.0561156581317604 17.8897677663033 1.50815092591008 0.231329593888878 0.346994390833318
+"11305" 0.0579340818829784 18.1615839598046 1.09689306676368 0.305382540289637 0.366459048347564
+"11302" 0.0682406105165454 10.0898264751075 0.137130529665157 0.884266488139469 0.884266488139469

diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_WT-Mut_2fact_anno.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_WT-Mut_2fact_anno.tsv Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "logCPM" "F" "PValue" "FDR"
+11287 "Pzp" "pregnancy zone protein" 6 4681 -0.189281947498313 17.6499778192954 198.646315096405 7.90598424818912e-09 4.74359054891347e-08
+11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 0.137980416947824 17.6843133699537 96.2224553233548 4.15830411749738e-06 1.24749123524921e-05
+11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 -0.45849071524422 15.526484673111 14.5864146737822 0.00244295799149183 0.00488591598298366
+11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 0.0560600215744048 17.8909334307093 6.53006938009001 0.0442859767053567 0.066428965058035
+11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 0.0585095828508861 18.1629882429457 1.07140336564628 0.321038228193371 0.385245873832045
+11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 0.0716631320197652 10.0898336653124 0.376796260576848 0.878304702615841 0.878304702615841

diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_normcounts.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_normcounts.tsv Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+"GeneID" "Mut1" "Mut2" "Mut3" "WT1" "WT2" "WT3"
+"11287" 17.7717801382127 17.7103668584544 17.7656984572699 17.6075444214943 17.5078565133576 17.5637960881114
+"11298" 17.6504754185442 17.55181161064 17.6142553019077 17.7726234935868 17.6985800110028 17.7597848438911
+"11302" 9.64041099082467 9.8551982993804 9.60469198931215 9.52851478148979 9.97869946791847 9.78190633986473
+"11303" 17.8772707356813 17.7864068634935 17.9114914356477 17.9125147871338 17.8772755854201 17.9551530504837
+"11304" 15.753577788623 15.8510977521242 15.6551142861549 15.3537170121875 15.2168364952853 15.3165751633072
+"11305" 18.0400277799982 18.1407817993511 18.2048423497925 18.1807759635442 18.1818136580236 18.2026167343562

diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_normcounts_anno.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_normcounts_anno.tsv Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+"EntrezID" "Symbol" "GeneName" "Chr" "Length" "Mut1" "Mut2" "Mut3" "WT1" "WT2" "WT3"
+11287 "Pzp" "pregnancy zone protein" 6 4681 17.7717801382127 17.7103668584544 17.7656984572699 17.6075444214943 17.5078565133576 17.5637960881114
+11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 17.6504754185442 17.55181161064 17.6142553019077 17.7726234935868 17.6985800110028 17.7597848438911
+11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 9.64041099082467 9.8551982993804 9.60469198931215 9.52851478148979 9.97869946791847 9.78190633986473
+11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 17.8772707356813 17.7864068634935 17.9114914356477 17.9125147871338 17.8772755854201 17.9551530504837
+11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 15.753577788623 15.8510977521242 15.6551142861549 15.3537170121875 15.2168364952853 15.3165751633072
+11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 18.0400277799982 18.1407817993511 18.2048423497925 18.1807759635442 18.1818136580236 18.2026167343562

diff -r 000000000000 -r 9bdff28ae1b1 test-data/factorinfo.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/factorinfo.txt Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+Sample Genotype Batch
+Mut1 Mut b1
+Mut2 Mut b2
+Mut3 Mut b3
+WT1 WT b1
+WT2 WT b2
+WT3 WT b3

diff -r 000000000000 -r 9bdff28ae1b1 test-data/matrix.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/matrix.txt Tue Nov 07 08:18:14 2017 -0500

@@ -0,0 +1,7 @@
+GeneID Mut1 Mut2 Mut3 WT1 WT2 WT3
+11287 1463 1441 1495 1699 1528 1601
+11298 1345 1291 1346 1905 1744 1834
+11302 5 6 5 6 8 7
+11303 1574 1519 1654 2099 1974 2100
+11304 361 397 346 356 312 337
+11305 1762 1942 2027 2528 2438 2493