Repository 'repenrich'
hg clone https://toolshed.g2.bx.psu.edu/repos/drosofff/repenrich

Changeset 1:54a3f3a195d6 (2017-05-29)
Previous changeset 0:1435d142041b (2017-05-23) Next changeset 2:aed130b47d36 (2017-05-29)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 114b47cc624e39b4f485c8623458fc98494c564d
modified:
repenrich.xml
test-data/tool_wrapper.sh
added:
edgeR_repenrich.R
edger-repenrich.xml
test-data/353_fraction_counts.tab
test-data/354_fraction_counts.tab
test-data/355_fraction_counts.tab
test-data/356_fraction_counts.tab
test-data/Normalized_counts_file.tab
test-data/aligned_353.tab
test-data/aligned_354.tab
test-data/aligned_355.tab
test-data/aligned_356.tab
test-data/aligned_reads.tab
test-data/edgeR_plots.pdf
test-data/edgeR_result_file.tab
b
diff -r 1435d142041b -r 54a3f3a195d6 edgeR_repenrich.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/edgeR_repenrich.R Mon May 29 13:11:57 2017 -0400
[
@@ -0,0 +1,221 @@
+#!/usr/bin/env Rscript
+
+# A command-line interface to edgeR for use with Galaxy edger-repenrich
+# written by Christophe Antoniewski drosofff@gmail.com 2017.05.30
+
+
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# To not crash galaxy with an UTF8 error with not-US LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+library("getopt")
+library("tools")
+options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs(trailingOnly = TRUE)
+
+# get options, using the spec as defined by the enclosed list.
+# we read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+  "quiet", "q", 0, "logical",
+  "help", "h", 0, "logical",
+  "outfile", "o", 1, "character",
+  "countsfile", "n", 1, "character",
+  "factorName", "N", 1, "character",
+  "levelNameA", "A", 1, "character",
+  "levelNameB", "B", 1, "character",
+  "levelAfiles", "a", 1, "character",
+  "levelBfiles", "b", 1, "character",
+  "alignmentA", "i", 1, "character",
+  "alignmentB", "j", 1, "character",
+  "plots" , "p", 1, "character"),
+  byrow=TRUE, ncol=4)
+opt <- getopt(spec)
+
+# if help was asked for print a friendly message
+# and exit with a non-zero error code
+if (!is.null(opt$help)) {
+  cat(getopt(spec, usage=TRUE))
+  q(status=1)
+}
+
+# enforce the following required arguments
+if (is.null(opt$outfile)) {
+  cat("'outfile' is required\n")
+  q(status=1)
+}
+if (is.null(opt$levelAfiles) | is.null(opt$levelBfiles)) {
+  cat("input count files are required for both levels\n")
+  q(status=1)
+}
+if (is.null(opt$alignmentA) | is.null(opt$alignmentB)) {
+  cat("total aligned read files are required for both levels\n")
+  q(status=1)
+}
+
+verbose <- if (is.null(opt$quiet)) {
+  TRUE
+} else {
+  FALSE
+}
+
+suppressPackageStartupMessages({
+  library("edgeR")
+  library("limma")
+})
+
+# build levels A and B file lists
+
+library("rjson")
+filesA <- fromJSON(opt$levelAfiles, method = "C", unexpected.escape = "error")
+filesB <- fromJSON(opt$levelBfiles, method = "C", unexpected.escape = "error")
+listA <- list()
+indice = 0
+listA[["level"]] <- opt$levelNameA
+for (file in filesA) {
+    indice = indice +1
+    listA[[paste0(opt$levelNameA,"_",indice)]] <- read.delim(file, header=FALSE)
+    }
+listB <- list()
+indice = 0
+listB[["level"]] <- opt$levelNameB
+for (file in filesB) {
+    indice = indice +1
+    listB[[paste0(opt$levelNameB,"_",indice)]] <- read.delim(file, header=FALSE)
+    }
+
+# build a counts table
+counts <- data.frame(row.names=listA[[2]][,1])
+for (element in names(listA[-1])) {
+    counts<-cbind(counts, listA[[element]][,4])
+    } 
+for (element in names(listB[-1])) {
+    counts<-cbind(counts, listB[[element]][,4])
+    }
+colnames(counts)=c(names(listA[-1]), names(listB[-1]))
+
+# build aligned counts vector
+
+filesi <- fromJSON(opt$alignmentA, method = "C", unexpected.escape = "error")
+filesj <- fromJSON(opt$alignmentB, method = "C", unexpected.escape = "error")
+sizes <- c()
+for (file in filesi) {
+    sizes <- c(sizes, read.delim(file, header=FALSE)[1,1])
+    }
+for (file in filesj) {
+    sizes <- c(sizes, read.delim(file, header=FALSE)[1,1])
+    }
+
+# build a meta data object
+
+meta <- data.frame(
+    row.names=colnames(counts),
+    condition=c(rep(opt$levelNameA,length(filesA)), rep(opt$levelNameB,length(filesB)) ),
+    libsize=sizes
+)
+
+
+# Define the library size and conditions for the GLM
+libsize <- meta$libsize
+condition <- factor(meta$condition)
+design <- model.matrix(~0+condition)
+colnames(design) <- levels(meta$condition)
+
+
+# Build a DGE object for the GLM
+y <- DGEList(counts=counts, lib.size=libsize)
+
+# Normalize the data
+y <- calcNormFactors(y)
+y$samples
+# plotMDS(y) latter
+
+# Estimate the variance
+y <- estimateGLMCommonDisp(y, design)
+y <- estimateGLMTrendedDisp(y, design)
+y <- estimateGLMTagwiseDisp(y, design)
+# plotBCV(y) latter
+
+# Builds and outputs an object to contain the normalized read abundance in counts per million of reads
+cpm <- cpm(y, log=FALSE, lib.size=libsize)
+cpm <- as.data.frame(cpm)
+colnames(cpm) <- colnames(counts)
+if (!is.null(opt$countsfile)) {
+    normalizedAbundance <- data.frame(Tag=rownames(cpm))
+    normalizedAbundance <- cbind(normalizedAbundance, cpm)
+    write.table(normalizedAbundance, file=opt$countsfile, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
+}
+
+# test
+print(counts)
+print(cpm)
+
+# Conduct fitting of the GLM
+yfit <- glmFit(y, design)
+
+# Initialize result matrices to contain the results of the GLM
+results <- matrix(nrow=dim(counts)[1],ncol=0)
+logfc <- matrix(nrow=dim(counts)[1],ncol=0)
+
+# Make the comparisons for the GLM
+my.contrasts <- makeContrasts(
+    paste0(opt$levelNameB,"_",opt$levelNameA," = ", opt$levelNameB, " - ", opt$levelNameA),
+    levels = design
+)
+
+# Define the contrasts used in the comparisons
+allcontrasts =  paste0(opt$levelNameB," vs ",opt$levelNameA)
+
+# Conduct a for loop that will do the fitting of the GLM for each comparison
+# Put the results into the results objects
+    lrt <- glmLRT(yfit, contrast=my.contrasts[,1])
+    plotSmear(lrt, de.tags=rownames(y))
+    title(allcontrasts)
+    res <- topTags(lrt,n=dim(c)[1],sort.by="none")$table
+    results <- cbind(results,res[,c(1,5)])
+    logfc <- cbind(logfc,res[c(1)])
+
+# Add the repeat types back into the results.
+# We should still have the same order as the input data
+results$class <- listA[[2]][,2]
+results$type <- listA[[2]][,3]
+
+# Sort the results table by the FDR
+results <- results[with(results, order(FDR)), ]
+
+# Save the results
+write.table(results, opt$outfile, quote=FALSE, sep="\t", col.names=FALSE)
+
+# Plot Fold Changes for repeat classes and types
+
+# open the device and plots
+if (!is.null(opt$plots)) {
+    if (verbose) cat("creating plots\n")
+    pdf(opt$plots)
+    plotMDS(y, main="Multidimensional Scaling Plot Of Distances Between Samples")
+    plotBCV(y, xlab="Gene abundance (Average log CPM)", main="Biological Coefficient of Variation Plot")
+    logFC <- results[, "logFC"]
+    # Plot the repeat classes
+    classes <- with(results, reorder(class, -logFC, median))
+    par(mar=c(6,10,4,1))
+    boxplot(logFC ~ classes, data=results, outline=FALSE, horizontal=TRUE,
+        las=2, xlab="log(Fold Change)", main=paste0(allcontrasts, ", by Class"))
+    abline(v=0)
+    # Plot the repeat types
+    types <- with(results, reorder(type, -logFC, median))
+    boxplot(logFC ~ types, data=results, outline=FALSE, horizontal=TRUE,
+        las=2, xlab="log(Fold Change)", main=paste0(allcontrasts, ", by Type"))
+    abline(v=0)
+}
+
+# close the plot device
+if (!is.null(opt$plots)) {
+  cat("closing plot device\n")
+  dev.off()
+}
+
+cat("Session information:\n\n")
+
+sessionInfo()
+
b
diff -r 1435d142041b -r 54a3f3a195d6 edger-repenrich.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/edger-repenrich.xml Mon May 29 13:11:57 2017 -0400
[
b'@@ -0,0 +1,180 @@\n+<tool id="edger-repenrich" name="edgeR-repenrich" version="0.3.0">\n+    <description>Determines differentially expressed features from RepEnrich counts</description>\n+    <requirements>\n+        <requirement type="package" version="3.16.5-r3.3.1_0">bioconductor-edger</requirement>\n+        <requirement type="package" version="3.30.13-r3.3.1_0">bioconductor-limma</requirement>\n+        <requirement type="package" version="1.20.0-r3.3.1_0">r-getopt</requirement>\n+        <requirement type="package" version="0.2.15-r3.3.1_0">r-rjson</requirement>\n+    </requirements>\n+    <stdio>\n+        <regex match="Execution halted"\n+           source="both"\n+           level="fatal"\n+           description="Execution halted." />\n+        <regex match="Error in"\n+           source="both"\n+           level="fatal"\n+           description="An undefined error occurred, please check your input carefully and contact your administrator." />\n+        <regex match="Fatal error"\n+           source="both"\n+           level="fatal"\n+           description="An undefined error occurred, please check your input carefully and contact your administrator." />\n+    </stdio>\n+    <version_command>\n+    <![CDATA[\n+        echo $(R --version | grep version | grep -v GNU)", edgeR version" $(R --vanilla --slave -e "library(edgeR) &&\n+        cat(sessionInfo()\\$otherPkgs\\$edgeR\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")\n+    ]]>\n+    </version_command>\n+    <command>\n+    <![CDATA[\n+        #import json\n+        Rscript \'${__tool_directory__}/edgeR_repenrich.R\'\n+            --factorName \'$factorName\'\n+\n+            --levelNameA \'$factorLevel_A\'\n+            #set $factorlevelsA = list()\n+            #for $file in $countsFiles_A:\n+                $factorlevelsA.append(str($file))\n+            #end for\n+            $factorlevelsA.reverse()\n+            --levelAfiles \'#echo json.dumps(factorlevelsA)#\'\n+\n+            --levelNameB \'$factorLevel_B\'\n+            #set $factorlevelsB = list()\n+            #for $file in $countsFiles_B:\n+                $factorlevelsB.append(str($file))\n+            #end for\n+            $factorlevelsB.reverse()\n+            --levelBfiles \'#echo json.dumps(factorlevelsB)#\'\n+\n+            #set $alignedA = list()\n+            #for file in $alignmentFiles_A:\n+                $alignedA.append(str($file))\n+            #end for\n+            $alignedA.reverse()\n+            --alignmentA \'#echo json.dumps(alignedA)#\' \n+\n+            #set $alignedB = list()\n+            #for file in $alignmentFiles_B:\n+                $alignedB.append(str($file))\n+            #end for\n+            $alignedB.reverse()\n+            --alignmentB \'#echo json.dumps(alignedB)#\'\n+\n+            -o \'edger_out\'\n+\n+            -p \'$plots\'\n+            #if $normCounts:\n+                -n \'$counts_out\'\n+            #end if\n+            -o \'$edger_out\'\n+    ]]>\n+    </command>\n+    <inputs>\n+            <param name="factorName" type="text" value="FactorName" label="Specify a factor name, e.g. genotype or age or drug_x"\n+                help="Only letters, numbers and underscores will be retained in this field">\n+                <sanitizer>\n+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>\n+                </sanitizer>\n+            </param>\n+            <param name="factorLevel_A" type="text" value="FactorLevel1" label="Specify a factor level, typical values could be \'wildtype\' or \'control\'"\n+                   help="Only letters, numbers and underscores will be retained in this field">\n+                <sanitizer>\n+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>\n+                </sanitizer>\n+            </param>\n+            <param name="countsFiles_A" type="data" format="tabular" multiple="true" label="Counts file(s)" help="Count files must have been generated by repenrich" />\n+            <param name="alignmentFiles_A" type="data" format="tabular" multiple="true" labe'..b'rmalized counts table" />\n+    </inputs>\n+    <outputs>\n+        <data format="tabular" name="edger_out" label="edgeR result file on ${on_string}">\n+            <actions>\n+                <action name="column_names" type="metadata" default="Tag,log2(FC),FDR,Class,Type" />\n+            </actions>\n+        </data>\n+        <data format="pdf" name="plots" label="edgeR plots on ${on_string}" />\n+        <data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}">\n+            <filter>normCounts == True</filter>\n+        </data>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="factorName" value="genotype"/>\n+            <param name="factorLevel_A" value="wildtype"/>\n+            <param name="countsFiles_A" value="353_fraction_counts.tab,354_fraction_counts.tab"/>\n+            <param name="alignmentFiles_A" value="aligned_353.tab,aligned_354.tab"/>\n+            <param name="factorLevel_B" value="mutant"/>\n+            <param name="countsFiles_B" value="355_fraction_counts.tab,356_fraction_counts.tab"/>\n+            <param name="alignmentFiles_B" value="aligned_355.tab,aligned_356.tab"/>\n+            <param name="normCounts" value="True"/>\n+            <output name="counts_out" file="Normalized_counts_file.tab"/>\n+            <output name="plots" file="edgeR_plots.pdf"/>\n+            <output name="edger_out" file="edgeR_result_file.tab"/>\n+\n+        </test>\n+    </tests>\n+    <help>\n+<![CDATA[\n+.. class:: infomark\n+\n+**What it does**\n+\n+Estimate Distance between samples (MDS) and Biological Coefficient Variation (BCV) in count data from high-throughput sequencing assays and test for differential expression using edgeR_.\n+\n+**Inputs**\n+\n+edger-repenrich takes count tables generated by repenrich as input. Count tables must be generated for each sample individually. Here, edgeR_ is handling a single factor (genotype, age, treatment, etc) that effect your experiment. This factor has two levels/states (for instance, "wild-type" and "mutant".\n+You need to select appropriate count table from your history for each factor level.\n+\n+The following table gives some examples of factors and their levels:\n+\n+========= ============== ===============\n+Factor    Factorlevel1   Factorlevel2\n+--------- -------------- ---------------\n+Treatment Treated        Untreated\n+--------- -------------- ---------------\n+Genotype  Knockdown      Wildtype\n+--------- -------------- ---------------\n+TimePoint Day4           Day1\n+--------- -------------- ---------------\n+Gender    Female         Male\n+========= ============== ===============\n+\n+*Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor \'Treatment\' given in above table, DESeq2 computes fold changes of \'Treated\' samples against \'Untreated\', i.e. the values correspond to up or down regulations of genes in Treated samples.\n+\n+**Output**\n+\n+edgeR_ generates a tabular file containing the different columns and results visualized in a PDF:\n+\n+====== =============================================================================\n+Column Description\n+------ -----------------------------------------------------------------------------\n+     1 Tag (transposon element ID)\n+     2 the logarithm (to basis 2) of the fold change (See the note in inputs section)\n+     3 p value adjusted for multiple testing with the Benjamini-Hochberg procedure\n+       which controls false discovery rate (FDR)\n+     4 Class the transposon belongs to\n+     5 Type the transposon belongs to\n+====== =============================================================================\n+\n+.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html\n+]]>\n+\n+**Note**: This edgeR_ wrapper was adapted from code available at https://github.com/nskvir/RepEnrich\n+\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1093/bioinformatics/btp616</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 1435d142041b -r 54a3f3a195d6 repenrich.xml
--- a/repenrich.xml Tue May 23 18:37:22 2017 -0400
+++ b/repenrich.xml Mon May 29 13:11:57 2017 -0400
b
@@ -1,4 +1,4 @@
-<tool id="repenrich" name="RepEnrich" version="0.2.0">
+<tool id="repenrich" name="RepEnrich" version="0.3.0">
     <description>Repeat Element Profiling</description>
     <requirements>
         <requirement type="package" version="1.2.0">bowtie</requirement>
@@ -18,7 +18,10 @@
         ln -f -s '$input_fastq' '${input_base}.fastq' &&
         bowtie-build '$genome' ${baseReference} &&
         python $__tool_directory__/RepEnrich_setup.py $repeatmasker ${baseReference}.fa setup_folder_${baseReference} &&
-        bowtie $baseReference -p \${GALAXY_SLOTS:-4} -t -m 1 -S --max ${input_base}_multimap.fastq ${input_base}.fastq ${input_base}_unique.sam &&
+        bowtie $baseReference -p \${GALAXY_SLOTS:-4} -t -m 1 -S --max ${input_base}_multimap.fastq ${input_base}.fastq ${input_base}_unique.sam 2>bowtie_alignments.txt &&
+        ALIGNED=\$(grep 'reads with at least one' bowtie_alignments.txt | cut -d ' ' -f 9) &&
+        NONALIGNED=\$(grep 'reads that failed to align:' bowtie_alignments.txt | cut -d ' ' -f 7) &&
+        echo \$((\$ALIGNED-\$NONALIGNED)) > bowtie_aligned.numb &&
         samtools view -bS ${input_base}_unique.sam > ${input_base}_unique.bam &&
         samtools sort ${input_base}_unique.bam ${input_base}_unique_sorted &&
         mv ${input_base}_unique_sorted.bam ${input_base}_unique.bam &&
@@ -38,6 +41,8 @@
     </inputs>
 
     <outputs>
+        <data format="tabular" name="bowtie_alignments" label="RepEnrich on ${on_string}: reads aligned" from_work_dir="bowtie_aligned.numb">
+        </data>
         <data format="tabular" name="class_fraction_counts" label="RepEnrich on ${on_string}: class fraction counts" from_work_dir="class_fraction_counts.tabular">
         </data>
         <data format="tabular" name="family_fraction_counts" label="RepEnrich on ${on_string}: family fraction counts" from_work_dir="family_fraction_counts.tabular">
@@ -51,6 +56,7 @@
             <param name="input_fastq" value="Samp.fastq" ftype="fastq"/>
             <param name="genome" value="chrM.fa" ftype="fasta"/>
             <param name="repeatmasker" value="chrM_repeatmasker.txt" ftype="txt"/>
+            <output name="bowtie_alignments" file="aligned_reads.tab" ftype="tabular"/>
             <output name="class_fraction_counts" file="Samp_class_fraction_counts.tabular" ftype="tabular"/>
             <output name="family_fraction_counts" file="Samp_family_fraction_counts.tabular" ftype="tabular"/>
             <output name="fraction_counts" file="Samp_fraction_counts.tabular" ftype="tabular"/>
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/353_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/353_fraction_counts.tab Mon May 29 13:11:57 2017 -0400
b
@@ -0,0 +1,210 @@
+LSU-rRNA_Dme rRNA rRNA 3078755
+FW3_DM LINE Jockey 913
+DMTOM1_LTR LTR Gypsy 1012
+R1_DM LINE R1 5407
+TAHRE LINE Jockey 3914
+G4_DM LINE Jockey 3407
+BS LINE Jockey 5001
+Stalker2_I-int LTR Gypsy 12474
+Stalker3_LTR LTR Gypsy 439
+TABOR_I-int LTR Gypsy 3081
+G7_DM LINE Jockey 98
+BEL_I-int LTR Pao 21860
+Gypsy6_I-int LTR Gypsy 6746
+ACCORD_I-int LTR Gypsy 2165
+DM412B_LTR LTR Gypsy 2910
+G2_DM LINE Jockey 1132
+SSU-rRNA_Hsa rRNA rRNA 172100
+TART_B1 LINE Jockey 21513
+S2_DM DNA TcMar-Tc1 733
+LOOPER1_DM DNA PiggyBac 589
+HOBO DNA hAT-hobo 4404
+ARS406_DM Unknown Unknown 719
+G6_DM LINE Jockey 3473
+DOC2_DM LINE Jockey 4538
+Baggins1 LINE LOA 3893
+NINJA_LTR LTR Pao 406
+TRANSIB2 DNA CMC-Transib 3076
+Gypsy5_LTR LTR Gypsy 451
+MDG3_LTR LTR Gypsy 848
+Gypsy12A_LTR LTR Gypsy 2242
+Gypsy_LTR LTR Gypsy 917
+TIRANT_LTR LTR Gypsy 1279
+I_DM LINE I 7013
+DIVER2_I-int LTR Pao 5627
+P-1_DY DNA P 2
+Invader2_I-int LTR Gypsy 5510
+DOC6_DM LINE Jockey 1240
+BURDOCK_LTR LTR Gypsy 370
+SSU-rRNA_Dme rRNA rRNA 4153
+MAX_LTR LTR Pao 753
+STALKER4_LTR LTR Gypsy 1210
+XDMR_DM Unknown Unknown 4328
+BARI_DM DNA TcMar-Tc1 902
+Transib-N1_DM DNA CMC-Transib 371
+Gypsy5_I-int LTR Gypsy 2186
+XDMR Unknown Unknown 1440
+ACCORD2_LTR LTR Gypsy 192
+POGON1 DNA TcMar-Pogo 403
+BS4_DM LINE Jockey 21
+ALA_DM Unknown Unknown 6
+Gypsy1-I_DM LTR Gypsy 6297
+ACCORD_LTR LTR Gypsy 220
+BURDOCK_I-int LTR Gypsy 3332
+ZAM_I-int LTR Gypsy 4426
+Invader5_I-int LTR Gypsy 335
+G3_DM LINE Jockey 556
+ROOA_I-int LTR Pao 4589
+Gypsy1-LTR_DM LTR Gypsy 795
+DM1731_I-int LTR Copia 4881
+ROO_I-int LTR Pao 28034
+DM412 LTR Gypsy 23566
+PROTOP DNA P 4353
+TART-A LINE Jockey 3408
+ROO_LTR LTR Pao 1851
+DIVER_LTR LTR Pao 429
+Gypsy3_I-int LTR Gypsy 2753
+BATUMI_LTR LTR Pao 403
+IDEFIX_LTR LTR Gypsy 1844
+G5_DM LINE Jockey 2551
+HETA LINE Jockey 24397
+Gypsy11_I-int LTR Gypsy 560
+R1-2_DM LINE R1 177
+Gypsy11_LTR LTR Gypsy 149
+HELENA_RT LINE Jockey 627
+FTZ_DM Unknown Unknown 0
+MICROPIA_I-int LTR Gypsy 10355
+Invader4_I-int LTR Gypsy 1219
+PLACW_DM DNA P 127
+BLOOD_LTR LTR Gypsy 746
+R2_DM LINE R2 8407
+Copia1-I_DM LTR Copia 3518
+Gypsy4_I-int LTR Gypsy 10056
+Chouto_LTR LTR Gypsy 281
+BATUMI_I-int LTR Pao 4789
+DNAREP1_DM RC Helitron 63954
+DMRT1B LINE R1 7119
+DMRT1C LINE R1 2087
+TLD2 LTR Gypsy 17
+DMRT1A LINE R1 3540
+DM1731_LTR LTR Copia 440
+LSU-rRNA_Cel rRNA rRNA 326925
+Copia_LTR LTR Copia 1129
+Gypsy6_LTR LTR Gypsy 484
+Gypsy12_I-int LTR Gypsy 9627
+BLASTOPIA_LTR LTR Gypsy 478
+Bica_LTR LTR Gypsy 125
+Gypsy8_LTR LTR Gypsy 1241
+NINJA_I-int LTR Pao 4503
+PROTOP_B DNA P 8285
+PROTOP_A DNA P 3384
+Invader3_LTR LTR Gypsy 608
+Chimpo_LTR LTR Gypsy 120
+Invader2_LTR LTR Gypsy 472
+BLASTOPIA_I-int LTR Gypsy 9540
+FROGGER_LTR LTR Copia 54
+NOMAD_I-int LTR Gypsy 11594
+QUASIMODO_LTR LTR Gypsy 1174
+TABOR_LTR LTR Gypsy 273
+Stalker2_LTR LTR Gypsy 672
+MICROPIA_LTR LTR Gypsy 863
+Gypsy9_I-int LTR Gypsy 474
+STALKER4_I-int LTR Gypsy 5741
+TRANSIB1 DNA CMC-Transib 2116
+DOC LINE Jockey 16414
+TRANSIB3 DNA CMC-Transib 1132
+Copia_I-int LTR Copia 26733
+TRANSIB4 DNA CMC-Transib 571
+Gypsy10_LTR LTR Gypsy 368
+Invader3_I-int LTR Gypsy 3722
+Invader1_I-int LTR Gypsy 1672
+BS3_DM LINE Jockey 877
+Gypsy2-LTR_DM LTR Gypsy 690
+LSU-rRNA_Hsa rRNA rRNA 0
+NOMAD_LTR LTR Gypsy 822
+Helitron1_DM RC Helitron 6
+LINEJ1_DM LINE Jockey 6329
+Invader4_LTR LTR Gypsy 1228
+MDG3_I-int LTR Gypsy 4971
+LmeSINE1c SINE tRNA-Deu-L2 1414
+ROVER-LTR_DM LTR Gypsy 415
+S_DM DNA TcMar-Tc1 3476
+Invader6_I-int LTR Gypsy 6701
+ROVER-I_DM LTR Gypsy 5589
+QUASIMODO_I-int LTR Gypsy 13018
+Chouto_I-int LTR Gypsy 1304
+NTS_DM Other Other 36205
+Gypsy3_LTR LTR Gypsy 447
+SSU-rRNA_Cel rRNA rRNA 2183
+MINOS DNA TcMar-Tc1 226
+DOC4_DM LINE Jockey 830
+ZAM_LTR LTR Gypsy 225
+QUASIMODO2-LTR_DM LTR Gypsy 200
+Gypsy2-I_DM LTR Gypsy 3896
+TRANSPAC_I-int LTR Gypsy 14982
+Bica_I-int LTR Gypsy 1390
+BS2 LINE Jockey 4991
+BLOOD_I-int LTR Gypsy 17222
+DMCR1A LINE CR1 17957
+QUASIMODO2-I_DM LTR Gypsy 2225
+HMSBEAGLE_I-int LTR Gypsy 5908
+DMLTR5 LTR Gypsy 160
+Gypsy7_LTR LTR Gypsy 87
+G5A_DM LINE Jockey 1595
+MDG1_I-int LTR Gypsy 13740
+Gypsy2_I-int LTR Gypsy 3453
+BARI1 DNA TcMar-Tc1 322
+Invader5_LTR LTR Gypsy 64
+DM176_LTR LTR Gypsy 872
+DOC3_DM LINE Jockey 10659
+Copia1-LTR_DM LTR Copia 566
+TOM_I-int LTR Gypsy 2311
+NOF_FB DNA MULE-NOF 960
+Chimpo_I-int LTR Gypsy 1060
+DIVER_I-int LTR Pao 15627
+TIRANT_I-int LTR Gypsy 8559
+Gypsy2_LTR LTR Gypsy 485
+FB4_DM DNA TcMar-Tc1 6939
+GTWIN_LTR LTR Gypsy 467
+Invader6_LTR LTR Gypsy 588
+Gypsy8_I-int LTR Gypsy 4083
+G_DM LINE Jockey 8035
+TRANSPAC_LTR LTR Gypsy 555
+FUSHI_DM Unknown Unknown 0
+Transib5 DNA CMC-Transib 965
+MuDR-1_DEl DNA MULE-NOF 9
+Mariner2_DM DNA TcMar-Tc1 1591
+DOC5_DM LINE Jockey 2183
+TC1_DM DNA TcMar-Tc1 1960
+Gypsy9_LTR LTR Gypsy 42
+DMRP1 Unknown Unknown 227
+Gypsy10_I-int LTR Gypsy 1930
+Gypsy4_LTR LTR Gypsy 194
+DM297_LTR LTR Gypsy 1478
+ACCORD2_I-int LTR Gypsy 2458
+Invader1_LTR LTR Gypsy 1931
+BEL_LTR LTR Pao 858
+IVK_DM LINE I 2921
+M4DM DNA CMC-Transib 1650
+FW2_DM LINE Jockey 13847
+Copia2_I-int LTR Copia 3455
+MDG1_LTR LTR Gypsy 582
+Gypsy12_LTR LTR Gypsy 5934
+Gypsy7_I-int LTR Gypsy 1009
+FROGGER_I-int LTR Copia 1361
+Gypsy_I-int LTR Gypsy 5634
+5S_DM RNA RNA 10392
+GTWIN_I-int LTR Gypsy 5054
+CIRCE LTR Gypsy 3323
+DIVER2_LTR LTR Pao 535
+POGO DNA TcMar-Pogo 2492
+Gypsy6A_LTR LTR Gypsy 573
+MAX_I-int LTR Pao 9201
+ROOA_LTR LTR Pao 483
+DM176_I-int LTR Gypsy 2813
+Copia2_LTR_DM LTR Copia 486
+DM297_I-int LTR Gypsy 11530
+IDEFIX_I-int LTR Gypsy 5547
+Jockey2 LINE Jockey 1588
+TC1-2_DM DNA TcMar-Tc1 3948
+DMRPR Unknown Unknown 187
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/354_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/354_fraction_counts.tab Mon May 29 13:11:57 2017 -0400
b
@@ -0,0 +1,210 @@
+LSU-rRNA_Dme rRNA rRNA 3659329
+FW3_DM LINE Jockey 831
+DMTOM1_LTR LTR Gypsy 1004
+R1_DM LINE R1 7343
+TAHRE LINE Jockey 4560
+G4_DM LINE Jockey 3668
+BS LINE Jockey 7296
+Stalker2_I-int LTR Gypsy 12252
+Stalker3_LTR LTR Gypsy 593
+TABOR_I-int LTR Gypsy 3947
+G7_DM LINE Jockey 162
+BEL_I-int LTR Pao 23757
+Gypsy6_I-int LTR Gypsy 7489
+ACCORD_I-int LTR Gypsy 2723
+DM412B_LTR LTR Gypsy 3078
+G2_DM LINE Jockey 1328
+SSU-rRNA_Hsa rRNA rRNA 318045
+TART_B1 LINE Jockey 25248
+S2_DM DNA TcMar-Tc1 765
+LOOPER1_DM DNA PiggyBac 821
+HOBO DNA hAT-hobo 5031
+ARS406_DM Unknown Unknown 563
+G6_DM LINE Jockey 4827
+DOC2_DM LINE Jockey 4438
+Baggins1 LINE LOA 5136
+NINJA_LTR LTR Pao 491
+TRANSIB2 DNA CMC-Transib 3197
+Gypsy5_LTR LTR Gypsy 651
+MDG3_LTR LTR Gypsy 1037
+Gypsy12A_LTR LTR Gypsy 3059
+Gypsy_LTR LTR Gypsy 1306
+TIRANT_LTR LTR Gypsy 1371
+I_DM LINE I 7987
+DIVER2_I-int LTR Pao 6679
+P-1_DY DNA P 8
+Invader2_I-int LTR Gypsy 6823
+DOC6_DM LINE Jockey 1259
+BURDOCK_LTR LTR Gypsy 287
+SSU-rRNA_Dme rRNA rRNA 7113
+MAX_LTR LTR Pao 1017
+STALKER4_LTR LTR Gypsy 1292
+XDMR_DM Unknown Unknown 3317
+BARI_DM DNA TcMar-Tc1 990
+Transib-N1_DM DNA CMC-Transib 308
+Gypsy5_I-int LTR Gypsy 2448
+XDMR Unknown Unknown 1249
+ACCORD2_LTR LTR Gypsy 235
+POGON1 DNA TcMar-Pogo 331
+BS4_DM LINE Jockey 34
+ALA_DM Unknown Unknown 5
+Gypsy1-I_DM LTR Gypsy 8160
+ACCORD_LTR LTR Gypsy 295
+BURDOCK_I-int LTR Gypsy 3119
+ZAM_I-int LTR Gypsy 5129
+Invader5_I-int LTR Gypsy 416
+G3_DM LINE Jockey 797
+ROOA_I-int LTR Pao 4985
+Gypsy1-LTR_DM LTR Gypsy 1042
+DM1731_I-int LTR Copia 5180
+ROO_I-int LTR Pao 31793
+DM412 LTR Gypsy 24086
+PROTOP DNA P 4942
+TART-A LINE Jockey 3697
+ROO_LTR LTR Pao 1639
+DIVER_LTR LTR Pao 469
+Gypsy3_I-int LTR Gypsy 3399
+BATUMI_LTR LTR Pao 554
+IDEFIX_LTR LTR Gypsy 2191
+G5_DM LINE Jockey 2601
+HETA LINE Jockey 23025
+Gypsy11_I-int LTR Gypsy 671
+R1-2_DM LINE R1 267
+Gypsy11_LTR LTR Gypsy 134
+HELENA_RT LINE Jockey 653
+FTZ_DM Unknown Unknown 0
+MICROPIA_I-int LTR Gypsy 10149
+Invader4_I-int LTR Gypsy 1439
+PLACW_DM DNA P 149
+BLOOD_LTR LTR Gypsy 748
+R2_DM LINE R2 15327
+Copia1-I_DM LTR Copia 3588
+Gypsy4_I-int LTR Gypsy 12113
+Chouto_LTR LTR Gypsy 299
+BATUMI_I-int LTR Pao 6395
+DNAREP1_DM RC Helitron 67821
+DMRT1B LINE R1 9706
+DMRT1C LINE R1 2530
+TLD2 LTR Gypsy 65
+DMRT1A LINE R1 4456
+DM1731_LTR LTR Copia 577
+LSU-rRNA_Cel rRNA rRNA 520168
+Copia_LTR LTR Copia 1408
+Gypsy6_LTR LTR Gypsy 428
+Gypsy12_I-int LTR Gypsy 9778
+BLASTOPIA_LTR LTR Gypsy 550
+Bica_LTR LTR Gypsy 140
+Gypsy8_LTR LTR Gypsy 1311
+NINJA_I-int LTR Pao 5274
+PROTOP_B DNA P 8690
+PROTOP_A DNA P 3533
+Invader3_LTR LTR Gypsy 657
+Chimpo_LTR LTR Gypsy 94
+Invader2_LTR LTR Gypsy 492
+BLASTOPIA_I-int LTR Gypsy 10309
+FROGGER_LTR LTR Copia 41
+NOMAD_I-int LTR Gypsy 16469
+QUASIMODO_LTR LTR Gypsy 1332
+TABOR_LTR LTR Gypsy 384
+Stalker2_LTR LTR Gypsy 726
+MICROPIA_LTR LTR Gypsy 797
+Gypsy9_I-int LTR Gypsy 504
+STALKER4_I-int LTR Gypsy 6207
+TRANSIB1 DNA CMC-Transib 2082
+DOC LINE Jockey 17883
+TRANSIB3 DNA CMC-Transib 1184
+Copia_I-int LTR Copia 27612
+TRANSIB4 DNA CMC-Transib 635
+Gypsy10_LTR LTR Gypsy 314
+Invader3_I-int LTR Gypsy 4667
+Invader1_I-int LTR Gypsy 1832
+BS3_DM LINE Jockey 1252
+Gypsy2-LTR_DM LTR Gypsy 966
+LSU-rRNA_Hsa rRNA rRNA 5
+NOMAD_LTR LTR Gypsy 619
+Helitron1_DM RC Helitron 3
+LINEJ1_DM LINE Jockey 7025
+Invader4_LTR LTR Gypsy 1420
+MDG3_I-int LTR Gypsy 5392
+LmeSINE1c SINE tRNA-Deu-L2 599
+ROVER-LTR_DM LTR Gypsy 564
+S_DM DNA TcMar-Tc1 3612
+Invader6_I-int LTR Gypsy 6931
+ROVER-I_DM LTR Gypsy 6513
+QUASIMODO_I-int LTR Gypsy 14906
+Chouto_I-int LTR Gypsy 1690
+NTS_DM Other Other 65954
+Gypsy3_LTR LTR Gypsy 566
+SSU-rRNA_Cel rRNA rRNA 3463
+MINOS DNA TcMar-Tc1 173
+DOC4_DM LINE Jockey 813
+ZAM_LTR LTR Gypsy 259
+QUASIMODO2-LTR_DM LTR Gypsy 313
+Gypsy2-I_DM LTR Gypsy 4606
+TRANSPAC_I-int LTR Gypsy 9674
+Bica_I-int LTR Gypsy 1852
+BS2 LINE Jockey 5595
+BLOOD_I-int LTR Gypsy 22330
+DMCR1A LINE CR1 20380
+QUASIMODO2-I_DM LTR Gypsy 2717
+HMSBEAGLE_I-int LTR Gypsy 7239
+DMLTR5 LTR Gypsy 161
+Gypsy7_LTR LTR Gypsy 112
+G5A_DM LINE Jockey 1804
+MDG1_I-int LTR Gypsy 16315
+Gypsy2_I-int LTR Gypsy 3190
+BARI1 DNA TcMar-Tc1 395
+Invader5_LTR LTR Gypsy 69
+DM176_LTR LTR Gypsy 899
+DOC3_DM LINE Jockey 11941
+Copia1-LTR_DM LTR Copia 771
+TOM_I-int LTR Gypsy 2563
+NOF_FB DNA MULE-NOF 1272
+Chimpo_I-int LTR Gypsy 1288
+DIVER_I-int LTR Pao 13178
+TIRANT_I-int LTR Gypsy 10917
+Gypsy2_LTR LTR Gypsy 479
+FB4_DM DNA TcMar-Tc1 7149
+GTWIN_LTR LTR Gypsy 598
+Invader6_LTR LTR Gypsy 649
+Gypsy8_I-int LTR Gypsy 4809
+G_DM LINE Jockey 8905
+TRANSPAC_LTR LTR Gypsy 538
+FUSHI_DM Unknown Unknown 0
+Transib5 DNA CMC-Transib 1123
+MuDR-1_DEl DNA MULE-NOF 6
+Mariner2_DM DNA TcMar-Tc1 2011
+DOC5_DM LINE Jockey 3037
+TC1_DM DNA TcMar-Tc1 1840
+Gypsy9_LTR LTR Gypsy 51
+DMRP1 Unknown Unknown 262
+Gypsy10_I-int LTR Gypsy 2258
+Gypsy4_LTR LTR Gypsy 274
+DM297_LTR LTR Gypsy 1489
+ACCORD2_I-int LTR Gypsy 2934
+Invader1_LTR LTR Gypsy 3346
+BEL_LTR LTR Pao 875
+IVK_DM LINE I 3349
+M4DM DNA CMC-Transib 1727
+FW2_DM LINE Jockey 14234
+Copia2_I-int LTR Copia 4218
+MDG1_LTR LTR Gypsy 690
+Gypsy12_LTR LTR Gypsy 6292
+Gypsy7_I-int LTR Gypsy 1256
+FROGGER_I-int LTR Copia 1611
+Gypsy_I-int LTR Gypsy 6822
+5S_DM RNA RNA 6317
+GTWIN_I-int LTR Gypsy 5448
+CIRCE LTR Gypsy 3767
+DIVER2_LTR LTR Pao 793
+POGO DNA TcMar-Pogo 2427
+Gypsy6A_LTR LTR Gypsy 617
+MAX_I-int LTR Pao 12395
+ROOA_LTR LTR Pao 400
+DM176_I-int LTR Gypsy 3627
+Copia2_LTR_DM LTR Copia 891
+DM297_I-int LTR Gypsy 11837
+IDEFIX_I-int LTR Gypsy 6193
+Jockey2 LINE Jockey 1770
+TC1-2_DM DNA TcMar-Tc1 3807
+DMRPR Unknown Unknown 128
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/355_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/355_fraction_counts.tab Mon May 29 13:11:57 2017 -0400
b
@@ -0,0 +1,210 @@
+LSU-rRNA_Dme rRNA rRNA 4126525
+FW3_DM LINE Jockey 376
+DMTOM1_LTR LTR Gypsy 458
+R1_DM LINE R1 6019
+TAHRE LINE Jockey 1417
+G4_DM LINE Jockey 1225
+BS LINE Jockey 4135
+Stalker2_I-int LTR Gypsy 4814
+Stalker3_LTR LTR Gypsy 235
+TABOR_I-int LTR Gypsy 3691
+G7_DM LINE Jockey 418
+BEL_I-int LTR Pao 15192
+Gypsy6_I-int LTR Gypsy 3763
+ACCORD_I-int LTR Gypsy 2031
+DM412B_LTR LTR Gypsy 3001
+G2_DM LINE Jockey 764
+SSU-rRNA_Hsa rRNA rRNA 301813
+TART_B1 LINE Jockey 5089
+S2_DM DNA TcMar-Tc1 205
+LOOPER1_DM DNA PiggyBac 31
+HOBO DNA hAT-hobo 2119
+ARS406_DM Unknown Unknown 230
+G6_DM LINE Jockey 786
+DOC2_DM LINE Jockey 2052
+Baggins1 LINE LOA 1832
+NINJA_LTR LTR Pao 114
+TRANSIB2 DNA CMC-Transib 1484
+Gypsy5_LTR LTR Gypsy 326
+MDG3_LTR LTR Gypsy 454
+Gypsy12A_LTR LTR Gypsy 1077
+Gypsy_LTR LTR Gypsy 1737
+TIRANT_LTR LTR Gypsy 1596
+I_DM LINE I 11149
+DIVER2_I-int LTR Pao 2668
+P-1_DY DNA P 4
+Invader2_I-int LTR Gypsy 3157
+DOC6_DM LINE Jockey 768
+BURDOCK_LTR LTR Gypsy 193
+SSU-rRNA_Dme rRNA rRNA 3822
+MAX_LTR LTR Pao 343
+STALKER4_LTR LTR Gypsy 803
+XDMR_DM Unknown Unknown 1587
+BARI_DM DNA TcMar-Tc1 522
+Transib-N1_DM DNA CMC-Transib 130
+Gypsy5_I-int LTR Gypsy 1036
+XDMR Unknown Unknown 903
+ACCORD2_LTR LTR Gypsy 76
+POGON1 DNA TcMar-Pogo 256
+BS4_DM LINE Jockey 23
+ALA_DM Unknown Unknown 0
+Gypsy1-I_DM LTR Gypsy 22753
+ACCORD_LTR LTR Gypsy 344
+BURDOCK_I-int LTR Gypsy 2232
+ZAM_I-int LTR Gypsy 2892
+Invader5_I-int LTR Gypsy 117
+G3_DM LINE Jockey 250
+ROOA_I-int LTR Pao 2579
+Gypsy1-LTR_DM LTR Gypsy 576
+DM1731_I-int LTR Copia 6873
+ROO_I-int LTR Pao 22941
+DM412 LTR Gypsy 23508
+PROTOP DNA P 2059
+TART-A LINE Jockey 1825
+ROO_LTR LTR Pao 1453
+DIVER_LTR LTR Pao 328
+Gypsy3_I-int LTR Gypsy 2258
+BATUMI_LTR LTR Pao 169
+IDEFIX_LTR LTR Gypsy 1012
+G5_DM LINE Jockey 1300
+HETA LINE Jockey 10327
+Gypsy11_I-int LTR Gypsy 168
+R1-2_DM LINE R1 68
+Gypsy11_LTR LTR Gypsy 48
+HELENA_RT LINE Jockey 416
+FTZ_DM Unknown Unknown 0
+MICROPIA_I-int LTR Gypsy 3327
+Invader4_I-int LTR Gypsy 1180
+PLACW_DM DNA P 204
+BLOOD_LTR LTR Gypsy 542
+R2_DM LINE R2 15128
+Copia1-I_DM LTR Copia 1946
+Gypsy4_I-int LTR Gypsy 5348
+Chouto_LTR LTR Gypsy 159
+BATUMI_I-int LTR Pao 1842
+DNAREP1_DM RC Helitron 36953
+DMRT1B LINE R1 4115
+DMRT1C LINE R1 670
+TLD2 LTR Gypsy 1
+DMRT1A LINE R1 1235
+DM1731_LTR LTR Copia 321
+LSU-rRNA_Cel rRNA rRNA 251671
+Copia_LTR LTR Copia 3075
+Gypsy6_LTR LTR Gypsy 760
+Gypsy12_I-int LTR Gypsy 2466
+BLASTOPIA_LTR LTR Gypsy 919
+Bica_LTR LTR Gypsy 67
+Gypsy8_LTR LTR Gypsy 486
+NINJA_I-int LTR Pao 1146
+PROTOP_B DNA P 4300
+PROTOP_A DNA P 1988
+Invader3_LTR LTR Gypsy 227
+Chimpo_LTR LTR Gypsy 33
+Invader2_LTR LTR Gypsy 213
+BLASTOPIA_I-int LTR Gypsy 12045
+FROGGER_LTR LTR Copia 57
+NOMAD_I-int LTR Gypsy 8154
+QUASIMODO_LTR LTR Gypsy 528
+TABOR_LTR LTR Gypsy 88
+Stalker2_LTR LTR Gypsy 356
+MICROPIA_LTR LTR Gypsy 149
+Gypsy9_I-int LTR Gypsy 188
+STALKER4_I-int LTR Gypsy 3132
+TRANSIB1 DNA CMC-Transib 289
+DOC LINE Jockey 21700
+TRANSIB3 DNA CMC-Transib 741
+Copia_I-int LTR Copia 87220
+TRANSIB4 DNA CMC-Transib 153
+Gypsy10_LTR LTR Gypsy 162
+Invader3_I-int LTR Gypsy 2022
+Invader1_I-int LTR Gypsy 1768
+BS3_DM LINE Jockey 453
+Gypsy2-LTR_DM LTR Gypsy 385
+LSU-rRNA_Hsa rRNA rRNA 11
+NOMAD_LTR LTR Gypsy 277
+Helitron1_DM RC Helitron 0
+LINEJ1_DM LINE Jockey 15939
+Invader4_LTR LTR Gypsy 679
+MDG3_I-int LTR Gypsy 3770
+LmeSINE1c SINE tRNA-Deu-L2 653
+ROVER-LTR_DM LTR Gypsy 168
+S_DM DNA TcMar-Tc1 2041
+Invader6_I-int LTR Gypsy 3298
+ROVER-I_DM LTR Gypsy 3069
+QUASIMODO_I-int LTR Gypsy 8000
+Chouto_I-int LTR Gypsy 518
+NTS_DM Other Other 21753
+Gypsy3_LTR LTR Gypsy 212
+SSU-rRNA_Cel rRNA rRNA 1817
+MINOS DNA TcMar-Tc1 93
+DOC4_DM LINE Jockey 658
+ZAM_LTR LTR Gypsy 118
+QUASIMODO2-LTR_DM LTR Gypsy 105
+Gypsy2-I_DM LTR Gypsy 2658
+TRANSPAC_I-int LTR Gypsy 16760
+Bica_I-int LTR Gypsy 618
+BS2 LINE Jockey 2086
+BLOOD_I-int LTR Gypsy 13280
+DMCR1A LINE CR1 8072
+QUASIMODO2-I_DM LTR Gypsy 1811
+HMSBEAGLE_I-int LTR Gypsy 4079
+DMLTR5 LTR Gypsy 94
+Gypsy7_LTR LTR Gypsy 39
+G5A_DM LINE Jockey 582
+MDG1_I-int LTR Gypsy 8043
+Gypsy2_I-int LTR Gypsy 1552
+BARI1 DNA TcMar-Tc1 125
+Invader5_LTR LTR Gypsy 23
+DM176_LTR LTR Gypsy 330
+DOC3_DM LINE Jockey 5205
+Copia1-LTR_DM LTR Copia 389
+TOM_I-int LTR Gypsy 3390
+NOF_FB DNA MULE-NOF 63
+Chimpo_I-int LTR Gypsy 370
+DIVER_I-int LTR Pao 11751
+TIRANT_I-int LTR Gypsy 12728
+Gypsy2_LTR LTR Gypsy 250
+FB4_DM DNA TcMar-Tc1 3348
+GTWIN_LTR LTR Gypsy 938
+Invader6_LTR LTR Gypsy 146
+Gypsy8_I-int LTR Gypsy 1247
+G_DM LINE Jockey 4582
+TRANSPAC_LTR LTR Gypsy 979
+FUSHI_DM Unknown Unknown 0
+Transib5 DNA CMC-Transib 328
+MuDR-1_DEl DNA MULE-NOF 4
+Mariner2_DM DNA TcMar-Tc1 926
+DOC5_DM LINE Jockey 752
+TC1_DM DNA TcMar-Tc1 1485
+Gypsy9_LTR LTR Gypsy 8
+DMRP1 Unknown Unknown 144
+Gypsy10_I-int LTR Gypsy 927
+Gypsy4_LTR LTR Gypsy 218
+DM297_LTR LTR Gypsy 2516
+ACCORD2_I-int LTR Gypsy 1200
+Invader1_LTR LTR Gypsy 1903
+BEL_LTR LTR Pao 605
+IVK_DM LINE I 1769
+M4DM DNA CMC-Transib 918
+FW2_DM LINE Jockey 8864
+Copia2_I-int LTR Copia 1892
+MDG1_LTR LTR Gypsy 281
+Gypsy12_LTR LTR Gypsy 2680
+Gypsy7_I-int LTR Gypsy 742
+FROGGER_I-int LTR Copia 546
+Gypsy_I-int LTR Gypsy 11161
+5S_DM RNA RNA 4146
+GTWIN_I-int LTR Gypsy 2930
+CIRCE LTR Gypsy 1570
+DIVER2_LTR LTR Pao 240
+POGO DNA TcMar-Pogo 1032
+Gypsy6A_LTR LTR Gypsy 219
+MAX_I-int LTR Pao 3905
+ROOA_LTR LTR Pao 276
+DM176_I-int LTR Gypsy 2298
+Copia2_LTR_DM LTR Copia 460
+DM297_I-int LTR Gypsy 42705
+IDEFIX_I-int LTR Gypsy 3286
+Jockey2 LINE Jockey 760
+TC1-2_DM DNA TcMar-Tc1 2287
+DMRPR Unknown Unknown 136
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/356_fraction_counts.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/356_fraction_counts.tab Mon May 29 13:11:57 2017 -0400
b
@@ -0,0 +1,210 @@
+LSU-rRNA_Dme rRNA rRNA 5583038
+FW3_DM LINE Jockey 647
+DMTOM1_LTR LTR Gypsy 1403
+R1_DM LINE R1 15424
+TAHRE LINE Jockey 3806
+G4_DM LINE Jockey 2223
+BS LINE Jockey 5200
+Stalker2_I-int LTR Gypsy 10966
+Stalker3_LTR LTR Gypsy 543
+TABOR_I-int LTR Gypsy 5504
+G7_DM LINE Jockey 1949
+BEL_I-int LTR Pao 29338
+Gypsy6_I-int LTR Gypsy 11381
+ACCORD_I-int LTR Gypsy 4103
+DM412B_LTR LTR Gypsy 5607
+G2_DM LINE Jockey 1235
+SSU-rRNA_Hsa rRNA rRNA 563673
+TART_B1 LINE Jockey 13382
+S2_DM DNA TcMar-Tc1 716
+LOOPER1_DM DNA PiggyBac 220
+HOBO DNA hAT-hobo 4375
+ARS406_DM Unknown Unknown 755
+G6_DM LINE Jockey 2874
+DOC2_DM LINE Jockey 4511
+Baggins1 LINE LOA 5500
+NINJA_LTR LTR Pao 297
+TRANSIB2 DNA CMC-Transib 3357
+Gypsy5_LTR LTR Gypsy 502
+MDG3_LTR LTR Gypsy 926
+Gypsy12A_LTR LTR Gypsy 2436
+Gypsy_LTR LTR Gypsy 3771
+TIRANT_LTR LTR Gypsy 3225
+I_DM LINE I 11479
+DIVER2_I-int LTR Pao 6446
+P-1_DY DNA P 13
+Invader2_I-int LTR Gypsy 5970
+DOC6_DM LINE Jockey 1718
+BURDOCK_LTR LTR Gypsy 543
+SSU-rRNA_Dme rRNA rRNA 9958
+MAX_LTR LTR Pao 958
+STALKER4_LTR LTR Gypsy 2040
+XDMR_DM Unknown Unknown 4049
+BARI_DM DNA TcMar-Tc1 1117
+Transib-N1_DM DNA CMC-Transib 366
+Gypsy5_I-int LTR Gypsy 2834
+XDMR Unknown Unknown 1771
+ACCORD2_LTR LTR Gypsy 213
+POGON1 DNA TcMar-Pogo 641
+BS4_DM LINE Jockey 8
+ALA_DM Unknown Unknown 9
+Gypsy1-I_DM LTR Gypsy 36962
+ACCORD_LTR LTR Gypsy 790
+BURDOCK_I-int LTR Gypsy 5190
+ZAM_I-int LTR Gypsy 6896
+Invader5_I-int LTR Gypsy 191
+G3_DM LINE Jockey 480
+ROOA_I-int LTR Pao 4103
+Gypsy1-LTR_DM LTR Gypsy 1432
+DM1731_I-int LTR Copia 18829
+ROO_I-int LTR Pao 34023
+DM412 LTR Gypsy 43689
+PROTOP DNA P 4775
+TART-A LINE Jockey 5092
+ROO_LTR LTR Pao 2219
+DIVER_LTR LTR Pao 547
+Gypsy3_I-int LTR Gypsy 5766
+BATUMI_LTR LTR Pao 348
+IDEFIX_LTR LTR Gypsy 2538
+G5_DM LINE Jockey 2714
+HETA LINE Jockey 22459
+Gypsy11_I-int LTR Gypsy 487
+R1-2_DM LINE R1 184
+Gypsy11_LTR LTR Gypsy 159
+HELENA_RT LINE Jockey 934
+FTZ_DM Unknown Unknown 0
+MICROPIA_I-int LTR Gypsy 6941
+Invader4_I-int LTR Gypsy 1888
+PLACW_DM DNA P 277
+BLOOD_LTR LTR Gypsy 1269
+R2_DM LINE R2 54042
+Copia1-I_DM LTR Copia 4100
+Gypsy4_I-int LTR Gypsy 13114
+Chouto_LTR LTR Gypsy 162
+BATUMI_I-int LTR Pao 4723
+DNAREP1_DM RC Helitron 76851
+DMRT1B LINE R1 9003
+DMRT1C LINE R1 1907
+TLD2 LTR Gypsy 33
+DMRT1A LINE R1 3797
+DM1731_LTR LTR Copia 1274
+LSU-rRNA_Cel rRNA rRNA 363697
+Copia_LTR LTR Copia 2612
+Gypsy6_LTR LTR Gypsy 1423
+Gypsy12_I-int LTR Gypsy 5002
+BLASTOPIA_LTR LTR Gypsy 1038
+Bica_LTR LTR Gypsy 123
+Gypsy8_LTR LTR Gypsy 1467
+NINJA_I-int LTR Pao 3676
+PROTOP_B DNA P 9371
+PROTOP_A DNA P 4465
+Invader3_LTR LTR Gypsy 506
+Chimpo_LTR LTR Gypsy 107
+Invader2_LTR LTR Gypsy 417
+BLASTOPIA_I-int LTR Gypsy 13780
+FROGGER_LTR LTR Copia 85
+NOMAD_I-int LTR Gypsy 11614
+QUASIMODO_LTR LTR Gypsy 1625
+TABOR_LTR LTR Gypsy 443
+Stalker2_LTR LTR Gypsy 790
+MICROPIA_LTR LTR Gypsy 460
+Gypsy9_I-int LTR Gypsy 429
+STALKER4_I-int LTR Gypsy 7287
+TRANSIB1 DNA CMC-Transib 1229
+DOC LINE Jockey 29082
+TRANSIB3 DNA CMC-Transib 1802
+Copia_I-int LTR Copia 50888
+TRANSIB4 DNA CMC-Transib 364
+Gypsy10_LTR LTR Gypsy 363
+Invader3_I-int LTR Gypsy 4411
+Invader1_I-int LTR Gypsy 2993
+BS3_DM LINE Jockey 851
+Gypsy2-LTR_DM LTR Gypsy 1155
+LSU-rRNA_Hsa rRNA rRNA 15
+NOMAD_LTR LTR Gypsy 757
+Helitron1_DM RC Helitron 0
+LINEJ1_DM LINE Jockey 24891
+Invader4_LTR LTR Gypsy 1624
+MDG3_I-int LTR Gypsy 7857
+LmeSINE1c SINE tRNA-Deu-L2 627
+ROVER-LTR_DM LTR Gypsy 415
+S_DM DNA TcMar-Tc1 4974
+Invader6_I-int LTR Gypsy 6566
+ROVER-I_DM LTR Gypsy 6947
+QUASIMODO_I-int LTR Gypsy 17910
+Chouto_I-int LTR Gypsy 1231
+NTS_DM Other Other 56043
+Gypsy3_LTR LTR Gypsy 681
+SSU-rRNA_Cel rRNA rRNA 3855
+MINOS DNA TcMar-Tc1 273
+DOC4_DM LINE Jockey 1030
+ZAM_LTR LTR Gypsy 377
+QUASIMODO2-LTR_DM LTR Gypsy 298
+Gypsy2-I_DM LTR Gypsy 7374
+TRANSPAC_I-int LTR Gypsy 28474
+Bica_I-int LTR Gypsy 1561
+BS2 LINE Jockey 5214
+BLOOD_I-int LTR Gypsy 27362
+DMCR1A LINE CR1 17489
+QUASIMODO2-I_DM LTR Gypsy 3977
+HMSBEAGLE_I-int LTR Gypsy 7863
+DMLTR5 LTR Gypsy 189
+Gypsy7_LTR LTR Gypsy 80
+G5A_DM LINE Jockey 1578
+MDG1_I-int LTR Gypsy 16306
+Gypsy2_I-int LTR Gypsy 4226
+BARI1 DNA TcMar-Tc1 370
+Invader5_LTR LTR Gypsy 47
+DM176_LTR LTR Gypsy 560
+DOC3_DM LINE Jockey 11067
+Copia1-LTR_DM LTR Copia 600
+TOM_I-int LTR Gypsy 6929
+NOF_FB DNA MULE-NOF 228
+Chimpo_I-int LTR Gypsy 1366
+DIVER_I-int LTR Pao 24742
+TIRANT_I-int LTR Gypsy 23165
+Gypsy2_LTR LTR Gypsy 869
+FB4_DM DNA TcMar-Tc1 7840
+GTWIN_LTR LTR Gypsy 2533
+Invader6_LTR LTR Gypsy 312
+Gypsy8_I-int LTR Gypsy 4176
+G_DM LINE Jockey 10948
+TRANSPAC_LTR LTR Gypsy 1457
+FUSHI_DM Unknown Unknown 0
+Transib5 DNA CMC-Transib 973
+MuDR-1_DEl DNA MULE-NOF 32
+Mariner2_DM DNA TcMar-Tc1 1096
+DOC5_DM LINE Jockey 1793
+TC1_DM DNA TcMar-Tc1 2618
+Gypsy9_LTR LTR Gypsy 62
+DMRP1 Unknown Unknown 295
+Gypsy10_I-int LTR Gypsy 2236
+Gypsy4_LTR LTR Gypsy 390
+DM297_LTR LTR Gypsy 6128
+ACCORD2_I-int LTR Gypsy 2286
+Invader1_LTR LTR Gypsy 2525
+BEL_LTR LTR Pao 927
+IVK_DM LINE I 3025
+M4DM DNA CMC-Transib 1880
+FW2_DM LINE Jockey 17723
+Copia2_I-int LTR Copia 3614
+MDG1_LTR LTR Gypsy 650
+Gypsy12_LTR LTR Gypsy 7572
+Gypsy7_I-int LTR Gypsy 1162
+FROGGER_I-int LTR Copia 1075
+Gypsy_I-int LTR Gypsy 24082
+5S_DM RNA RNA 6331
+GTWIN_I-int LTR Gypsy 7568
+CIRCE LTR Gypsy 3676
+DIVER2_LTR LTR Pao 648
+POGO DNA TcMar-Pogo 2909
+Gypsy6A_LTR LTR Gypsy 793
+MAX_I-int LTR Pao 8914
+ROOA_LTR LTR Pao 435
+DM176_I-int LTR Gypsy 4726
+Copia2_LTR_DM LTR Copia 747
+DM297_I-int LTR Gypsy 82387
+IDEFIX_I-int LTR Gypsy 7222
+Jockey2 LINE Jockey 1823
+TC1-2_DM DNA TcMar-Tc1 4438
+DMRPR Unknown Unknown 221
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/Normalized_counts_file.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Normalized_counts_file.tab Mon May 29 13:11:57 2017 -0400
b
b'@@ -0,0 +1,211 @@\n+Tag\twildtype_1\twildtype_2\tmutant_1\tmutant_2\n+LSU-rRNA_Dme\t157455.221275418\t149201.031867455\t218650.181450173\t354466.098949349\n+FW3_DM\t35.756634311884\t44.2453336153694\t25.3386538652006\t32.2981814492716\n+DMTOM1_LTR\t43.2005545717588\t49.0430203929395\t54.9461072223747\t39.3419337866127\n+R1_DM\t315.957840856997\t262.031236427494\t604.054709763298\t517.028601444589\n+TAHRE\t196.209690086873\t189.678242903128\t149.055512536249\t121.719476366005\n+G4_DM\t157.828320885669\t165.108271224056\t87.0600116574048\t105.226787966377\n+BS\t313.935504138996\t242.355874491196\t203.649150075801\t355.194096523239\n+Stalker2_I-int\t527.184456786045\t604.508533973842\t429.46472687139\t413.519801853174\n+Stalker3_LTR\t25.5158653994552\t21.2745908621546\t21.2656708636846\t20.1863634057947\n+TABOR_I-int\t169.833255871247\t149.309827895896\t215.554792695617\t317.054754599099\n+G7_DM\t6.97060741098101\t4.74922529496845\t76.3292679803337\t35.9059570366902\n+BEL_I-int\t1022.22666828812\t1059.36800967358\t1148.97283940843\t1304.98396962057\n+Gypsy6_I-int\t322.239993215042\t326.921161631196\t445.717495579363\t323.23951274896\n+ACCORD_I-int\t117.166444321613\t104.919109832721\t160.687012069425\t174.461719477315\n+DM412B_LTR\t132.441540808639\t141.022914371002\t219.588612399041\t257.784155662936\n+G2_DM\t57.1417693937208\t54.8583983051458\t48.3666731430026\t65.6271559235199\n+SSU-rRNA_Hsa\t13684.9804569472\t8340.22115575582\t22075.2937251301\t25925.561270609\n+TART_B1\t1086.38207353363\t1042.55187521078\t524.083255060455\t437.142142008891\n+S2_DM\t32.9167572185214\t35.52226674706\t28.0409214335141\t17.6093808433528\n+LOOPER1_DM\t35.3263499037988\t28.5438132524124\t8.61592558013002\t2.66288198118994\n+HOBO\t216.476085707688\t213.42436937797\t171.339429150313\t182.020868327145\n+ARS406_DM\t24.2250121751994\t34.8438059906359\t29.5682900590826\t19.7568663120544\n+G6_DM\t207.698283782749\t168.30672907577\t112.555318714971\t67.5169431359773\n+DOC2_DM\t190.960220308233\t219.9182080466\t176.665637690757\t176.265607271025\n+Baggins1\t220.994071992583\t188.660551768492\t215.398139503251\t157.367735146451\n+NINJA_LTR\t21.1269644369856\t19.6753619362979\t11.6314995331755\t9.79253373727915\n+TRANSIB2\t137.561925264854\t149.067520482887\t131.471191693166\t127.474737422125\n+Gypsy5_LTR\t28.0115149663496\t21.8561286533752\t19.6599756419331\t28.0032105118684\n+MDG3_LTR\t44.6204931184401\t41.0953372462576\t36.2652140327291\t38.9983361116205\n+Gypsy12A_LTR\t131.624000433277\t108.650643993054\t95.4017941508943\t92.5136739916635\n+Gypsy_LTR\t56.1951436959333\t44.4391795457762\t147.684797103047\t149.207290365385\n+TIRANT_LTR\t58.9919923484874\t61.9822362475985\t126.301636345088\t137.095472321908\n+I_DM\t343.668156737687\t339.860377485855\t449.55549879233\t957.692619622151\n+DIVER2_I-int\t287.386956160137\t272.692762599872\t252.44661949781\t229.179649219831\n+P-1_DY\t0.344227526468198\t0.0969229652034378\t0.509122875189502\t0.343597674992251\n+Invader2_I-int\t293.583051636564\t267.022769135471\t233.804889606256\t271.184464987634\n+DOC6_DM\t54.1728069779326\t60.0922384261314\t67.2825461211972\t65.9707535985121\n+BURDOCK_LTR\t12.3491625120466\t17.930748562636\t21.2656708636846\t16.5785878183761\n+SSU-rRNA_Dme\t306.061299471036\t201.260537244939\t389.988122395158\t328.307578455096\n+MAX_LTR\t43.7599243022696\t36.4914963990943\t37.5184395716571\t29.4635006305855\n+STALKER4_LTR\t55.5927455246139\t58.6383939480799\t79.8931281066602\t68.9772332546943\n+XDMR_DM\t142.725338161877\t209.741296700239\t158.572193972484\t136.322377553175\n+BARI_DM\t42.5981564004395\t43.7122573067505\t43.7454039682056\t44.8394965864887\n+Transib-N1_DM\t13.2527597690256\t17.9792100452377\t14.333767101489\t11.1669244372481\n+Gypsy5_I-int\t105.333623099269\t105.936800967358\t110.988786791311\t88.9917978229929\n+XDMR\t53.7425225698474\t69.7845349464752\t69.3582009200467\t77.5671751295006\n+ACCORD2_LTR\t10.1116835900033\t9.30460465953003\t8.34178249348953\t6.52835582485276\n+POGON1\t14.2424139076217\t19.5299774884927\t25.1036740766516\t21.990251199504\n+BS4_DM\t1.46296698748984\t1.0176911346361\t0.313306384732001\t1.97568663120544\n+ALA_DM\t0.215142204042624\t0.290768895610313\t0.352469682823501\t0\n+Gypsy1-I_DM\t351.112076997562\t305'..b'3.420219978632\t447.106474583666\n+Copia1-LTR_DM\t33.1749278633726\t27.4291991525729\t23.4979788549001\t33.4148738929964\n+TOM_I-int\t110.281893792249\t111.994486292572\t271.362492476004\t291.199029555932\n+NOF_FB\t54.7321767084434\t46.5230232976501\t8.92923196486203\t5.41166338112795\n+Chimpo_I-int\t55.4206317613798\t51.369171557822\t53.4970651929892\t31.7827849367832\n+DIVER_I-int\t567.028792974739\t757.307588617061\t968.978321379896\t1009.40406970848\n+TIRANT_I-int\t469.741488306664\t414.781829588112\t907.2178002896\t1093.32780182534\n+Gypsy2_LTR\t20.6106231472833\t23.5038190618337\t34.0329060415136\t21.4748546870157\n+FB4_DM\t307.610323340143\t336.274227773327\t307.040257037361\t287.591253968514\n+GTWIN_LTR\t25.7310076034978\t22.6315123750027\t99.2006340657698\t80.5736547856828\n+Invader6_LTR\t27.9254580847325\t28.4953517698107\t12.218949004548\t12.5413151372172\n+Gypsy8_I-int\t206.923771848195\t197.868233462818\t163.545932830104\t107.116575178834\n+G_DM\t383.168265399913\t389.388012704811\t428.759787505743\t393.591136703623\n+TRANSPAC_LTR\t23.1493011549863\t26.896122843954\t57.0609253193157\t84.0955309543534\n+FUSHI_DM\t0\t0\t0\t0\n+Transib5\t48.3209390279733\t46.7653307106587\t38.1058890430296\t28.1750093493646\n+MuDR-1_DEl\t0.258170644851148\t0.43615334341547\t1.253225538928\t0.343597674992251\n+Mariner2_DM\t86.5301944659432\t77.1022188193348\t42.9229747082841\t79.542861760706\n+DOC5_DM\t130.67737473549\t105.791416519552\t70.2197934780597\t64.5963628985431\n+TC1_DM\t79.1723310876855\t94.9845058993691\t102.529514403547\t127.560636840873\n+Gypsy9_LTR\t2.19445048123476\t2.03538226927219\t2.42812448167301\t0.687195349984501\n+DMRP1\t11.2734514918335\t11.0007565505902\t11.5531729369925\t12.369516299721\n+Gypsy10_I-int\t97.1582193456488\t93.5306614213175\t87.5691345325943\t79.6287611794541\n+Gypsy4_LTR\t11.7897927815358\t9.40152762473347\t15.273686255685\t18.7260732870777\n+DM297_LTR\t64.0693483638933\t71.6260712853405\t239.992690704713\t216.122937570126\n+ACCORD2_I-int\t126.245445332212\t119.118324235025\t89.5272994371693\t103.079302497675\n+Invader1_LTR\t143.973162945324\t93.5791229039192\t98.8873276810378\t163.466593877563\n+BEL_LTR\t37.6498857074591\t41.5799520722748\t36.3043773308206\t51.9691483425779\n+IVK_DM\t144.102248267749\t141.555990679621\t118.468976726788\t151.956071765323\n+M4DM\t74.3101172763222\t79.9614462928362\t73.6270004120202\t78.8556664107215\n+FW2_DM\t612.466826468541\t671.046149586002\t694.091132075657\t761.412447782827\n+Copia2_I-int\t181.493963330357\t167.434422388939\t141.536159302681\t162.521700271335\n+MDG1_LTR\t29.6896241578821\t28.2045828742004\t25.4561437594751\t24.1377366682056\n+Gypsy12_LTR\t270.734949567238\t287.5704377586\t296.544493148839\t230.210442244808\n+Gypsy7_I-int\t54.0437216555071\t48.8976359451344\t45.5077523823231\t63.7373687110625\n+FROGGER_I-int\t69.3188181425333\t65.9560778209394\t42.1005454483626\t46.9010826364422\n+Gypsy_I-int\t293.540023195756\t273.031992978084\t943.130544639506\t958.723412647127\n+5S_DM\t271.810660587451\t503.611727197063\t247.942840217287\t356.138990129468\n+GTWIN_I-int\t234.418945524843\t244.924333069087\t296.387839956473\t251.685296931824\n+CIRCE\t162.088136525713\t161.037506685512\t143.964283784354\t134.862087434458\n+DIVER2_LTR\t34.1215535611601\t25.9268931919196\t25.3778171632921\t20.615860499535\n+POGO\t104.43002584229\t120.766014643484\t113.926034148174\t88.6482001480007\n+Gypsy6A_LTR\t26.5485479788598\t27.7684295307849\t31.0564953865596\t18.8119727058257\n+MAX_I-int\t533.337523821664\t445.894101418416\t349.101639187632\t335.437230211185\n+ROOA_LTR\t17.2113763234099\t23.4068960966302\t17.0360346698026\t23.7082395744653\n+DM176_I-int\t156.064154812519\t136.322150558635\t185.08574678043\t197.396864283048\n+Copia2_LTR_DM\t38.3383407603955\t23.5522805444354\t29.2549836743506\t39.5137326241088\n+DM297_I-int\t509.327653850507\t558.760894397819\t3226.54663986442\t3668.33467763602\n+IDEFIX_I-int\t266.475133927194\t268.815843991735\t282.837338816814\t282.265490006134\n+Jockey2\t76.1603402310888\t76.9568343715296\t71.3946924208047\t65.2835582485276\n+TC1-2_DM\t163.809274158054\t191.325933311586\t173.806716930078\t196.451970676819\n+DMRPR\t5.50764042349117\t9.06229724652144\t8.65508887822152\t11.6823209497365\n'
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/aligned_353.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aligned_353.tab Mon May 29 13:11:57 2017 -0400
b
@@ -0,0 +1,1 @@
+15862067
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/aligned_354.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aligned_354.tab Mon May 29 13:11:57 2017 -0400
b
@@ -0,0 +1,1 @@
+28421096
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/aligned_355.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aligned_355.tab Mon May 29 13:11:57 2017 -0400
b
@@ -0,0 +1,1 @@
+10808170
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/aligned_356.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aligned_356.tab Mon May 29 13:11:57 2017 -0400
b
@@ -0,0 +1,1 @@
+29256707
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/aligned_reads.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aligned_reads.tab Mon May 29 13:11:57 2017 -0400
b
@@ -0,0 +1,1 @@
+2500
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/edgeR_plots.pdf
b
Binary file test-data/edgeR_plots.pdf has changed
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/edgeR_result_file.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edgeR_result_file.tab Mon May 29 13:11:57 2017 -0400
b
b'@@ -0,0 +1,210 @@\n+DM297_I-int\t2.69047105438237\t4.49448726299278e-21\tLTR\tGypsy\n+DM297_LTR\t1.74961921158059\t1.33970311551615e-18\tLTR\tGypsy\n+Gypsy1-I_DM\t2.37388633230267\t4.48026988289666e-16\tLTR\tGypsy\n+NOF_FB\t-2.79644759297337\t4.48026988289666e-16\tDNA\tMULE-NOF\n+G7_DM\t3.25987374806405\t1.55296802933851e-15\tLINE\tJockey\n+Gypsy_I-int\t1.74702179622354\t4.35250852544863e-12\tLTR\tGypsy\n+LINEJ1_DM\t1.94429217926762\t5.18070558191679e-12\tLINE\tJockey\n+TOM_I-int\t1.33935222283437\t6.69227956217608e-12\tLTR\tGypsy\n+GTWIN_LTR\t1.89490932455113\t8.80590960311443e-12\tLTR\tGypsy\n+Gypsy_LTR\t1.56024435809038\t2.99852403113839e-11\tLTR\tGypsy\n+DM1731_I-int\t1.53134429520835\t6.4920552091728e-10\tLTR\tCopia\n+Gypsy6_LTR\t1.53023046204423\t1.07509345482697e-06\tLTR\tGypsy\n+LOOPER1_DM\t-2.46983670012103\t1.61917280285007e-06\tDNA\tPiggyBac\n+TIRANT_LTR\t1.12203469288475\t1.7801266502904e-06\tLTR\tGypsy\n+Gypsy12_I-int\t-1.12204057066646\t2.19485691074398e-06\tLTR\tGypsy\n+R2_DM\t1.67889583354494\t2.73035057005978e-06\tLINE\tR2\n+TRANSPAC_LTR\t1.49364968145495\t2.90210475425899e-06\tLTR\tGypsy\n+Copia_LTR\t1.66649088148806\t1.186041112787e-05\tLTR\tCopia\n+ACCORD_LTR\t1.3724059468295\t1.75953804703817e-05\tLTR\tGypsy\n+TIRANT_I-int\t1.17728139313168\t2.04686461971632e-05\tLTR\tGypsy\n+TART_B1\t-1.14695345949677\t3.0965848472869e-05\tLINE\tJockey\n+TRANSIB1\t-1.3902119423097\t6.16503339250287e-05\tDNA\tCMC-Transib\n+DM412B_LTR\t0.803090050933452\t0.00012443453312394\tLTR\tGypsy\n+MICROPIA_LTR\t-1.29556276189806\t0.000129852108160169\tLTR\tGypsy\n+G6_DM\t-1.05894868960943\t0.000138305097075555\tLINE\tJockey\n+NINJA_I-int\t-0.874264518473233\t0.000155536374045889\tLTR\tPao\n+R1_DM\t0.955905154410719\t0.000189845334696753\tLINE\tR1\n+Invader6_LTR\t-1.18865109431199\t0.000212988712815497\tLTR\tGypsy\n+Copia_I-int\t1.9331690527025\t0.000223130721644621\tLTR\tCopia\n+BLASTOPIA_LTR\t1.34881958569428\t0.000364879309201074\tLTR\tGypsy\n+LSU-rRNA_Hsa\t2.61189118344978\t0.000801598301190925\trRNA\trRNA\n+TRANSPAC_I-int\t1.16122217809542\t0.000845313292127692\tLTR\tGypsy\n+G4_DM\t-0.74970144243323\t0.000914365923267139\tLINE\tJockey\n+SSU-rRNA_Hsa\t1.12389968473263\t0.0010425909765405\trRNA\trRNA\n+DOC5_DM\t-0.809872869194368\t0.00162410751644232\tLINE\tJockey\n+Invader1_I-int\t0.749727833008358\t0.00198920329215514\tLTR\tGypsy\n+TRANSIB4\t-1.00230688125284\t0.00206051807311839\tDNA\tCMC-Transib\n+MICROPIA_I-int\t-0.751193028928835\t0.00292932704926266\tLTR\tGypsy\n+PLACW_DM\t1.16310826488548\t0.00305178248567248\tDNA\tP\n+TABOR_I-int\t0.737702190628259\t0.00354953953974351\tLTR\tGypsy\n+I_DM\t1.04141382256571\t0.00457151681951996\tLINE\tI\n+Gypsy3_I-int\t0.586594796021762\t0.00533147146680049\tLTR\tGypsy\n+DOC\t0.94015335715084\t0.00552118010545893\tLINE\tJockey\n+Helitron1_DM\t-5.10401049101919\t0.00636579556275411\tRC\tHelitron\n+Invader5_I-int\t-0.973422642391873\t0.00666026639838213\tLTR\tGypsy\n+ACCORD_I-int\t0.593047691019391\t0.00701363615971111\tLTR\tGypsy\n+LSU-rRNA_Dme\t0.902204791675651\t0.0102101724391352\trRNA\trRNA\n+DM412\t0.775970807845826\t0.0108079710929838\tLTR\tGypsy\n+NINJA_LTR\t-0.923408172476265\t0.0109563169817133\tLTR\tPao\n+BATUMI_I-int\t-0.562867612987509\t0.0148551719821076\tLTR\tPao\n+DMRT1C\t-0.664744166716986\t0.0174563775735501\tLINE\tR1\n+TAHRE\t-0.509503033739914\t0.0198535740576257\tLINE\tJockey\n+Gypsy8_I-int\t-0.578276897527121\t0.0248976847994196\tLTR\tGypsy\n+BLASTOPIA_I-int\t0.796987753936179\t0.0301824235833281\tLTR\tGypsy\n+Gypsy11_I-int\t-0.734634622561586\t0.0432538714324547\tLTR\tGypsy\n+DMRT1A\t-0.509608409844153\t0.0432538714324547\tLINE\tR1\n+DM176_LTR\t-0.691384671506076\t0.0432538714324547\tLTR\tGypsy\n+QUASIMODO2-I_DM\t0.470057940126177\t0.044414105251961\tLTR\tGypsy\n+MuDR-1_DEl\t1.35524442785064\t0.053231954651916\tDNA\tMULE-NOF\n+DM1731_LTR\t0.752238181600754\t0.0540130070646255\tLTR\tCopia\n+FROGGER_I-int\t-0.60531971025629\t0.0540130070646255\tLTR\tCopia\n+MAX_I-int\t-0.516456091983578\t0.064340206781843\tLTR\tPao\n+BURDOCK_I-int\t0.418223330277469\t0.0691214301126655\tLTR\tGypsy\n+FROGGER_LTR\t0.888326398013863\t0.0706362756551064\tLTR\tCopia\n+DIVER_I-int\t0.579052226156673\t0.0738156276915433\tLTR\tPao\n+Chouto_I-int\t-0.551029990754245\t0.0815115938596914\tLTR\tGypsy\n+DM176_I-int\t0.38700876'..b'y\n+Gypsy8_LTR\t-0.22930840819102\t0.572155988619216\tLTR\tGypsy\n+MDG1_LTR\t-0.222278232633914\t0.598330807400729\tLTR\tGypsy\n+ARS406_DM\t-0.253969785738388\t0.616310720407917\tUnknown\tUnknown\n+TRANSIB2\t-0.146222438911029\t0.625405183746989\tDNA\tCMC-Transib\n+Invader3_I-int\t-0.138005789457227\t0.630870214463397\tLTR\tGypsy\n+FW2_DM\t0.18134923098773\t0.631976090646791\tLINE\tJockey\n+Gypsy9_LTR\t-0.341386719736692\t0.637388403082853\tLTR\tGypsy\n+TART-A\t0.137000662462686\t0.641582941743804\tLINE\tJockey\n+PROTOP_A\t0.129470019526978\t0.641582941743804\tDNA\tP\n+MDG3_LTR\t-0.188668582948884\t0.653366733777971\tLTR\tGypsy\n+Invader2_I-int\t-0.151224307192946\t0.653366733777971\tLTR\tGypsy\n+Jockey2\t-0.162959614427421\t0.654568430062825\tLINE\tJockey\n+POGO\t-0.150727724968995\t0.655487951512125\tDNA\tTcMar-Pogo\n+Baggins1\t-0.134826103211095\t0.693165877069107\tLINE\tLOA\n+Stalker3_LTR\t-0.174018068430168\t0.707635948527927\tLTR\tGypsy\n+Bica_LTR\t-0.203027970314789\t0.707635948527927\tLTR\tGypsy\n+HMSBEAGLE_I-int\t0.13884460350775\t0.707635948527927\tLTR\tGypsy\n+TABOR_LTR\t-0.243243320673685\t0.7126900116144\tLTR\tGypsy\n+NTS_DM\t-0.176552072990723\t0.7126900116144\tOther\tOther\n+ZAM_LTR\t0.185889162361338\t0.717882303973929\tLTR\tGypsy\n+G5_DM\t-0.112256553949312\t0.728328879400811\tLINE\tJockey\n+QUASIMODO_I-int\t0.126271255526405\t0.756425760963745\tLTR\tGypsy\n+FB4_DM\t-0.114616335459739\t0.756425760963745\tDNA\tTcMar-Tc1\n+Invader1_LTR\t0.141219962488351\t0.756425760963745\tLTR\tGypsy\n+BEL_LTR\t0.152542861711851\t0.756425760963745\tLTR\tPao\n+DMRT1B\t-0.111235216287692\t0.760051744799183\tLINE\tR1\n+NOMAD_I-int\t-0.137546703453753\t0.760051744799183\tLTR\tGypsy\n+LmeSINE1c\t-0.228543531933743\t0.76029602974523\tSINE\ttRNA-Deu-L2\n+Gypsy10_LTR\t-0.154651074858764\t0.762401782894161\tLTR\tGypsy\n+QUASIMODO2-LTR_DM\t-0.155430480903998\t0.762401782894161\tLTR\tGypsy\n+Copia2_LTR_DM\t0.147979193864449\t0.762401782894161\tLTR\tCopia\n+BS4_DM\t-0.253251631796478\t0.81092436860891\tLINE\tJockey\n+G_DM\t0.0902785903431559\t0.81092436860891\tLINE\tJockey\n+IVK_DM\t-0.0807211765160779\t0.81870908545043\tLINE\tI\n+Gypsy5_I-int\t-0.0774807293903186\t0.835273508730751\tLTR\tGypsy\n+Gypsy12_LTR\t-0.0832045482918072\t0.835273508730751\tLTR\tGypsy\n+Gypsy6A_LTR\t-0.116239966704279\t0.835273508730751\tLTR\tGypsy\n+IDEFIX_I-int\t0.0781972363147815\t0.835273508730751\tLTR\tGypsy\n+PROTOP_B\t-0.0745501769371703\t0.846311908195535\tDNA\tP\n+DMRP1\t0.100504519100353\t0.846311908195535\tUnknown\tUnknown\n+Copia1-LTR_DM\t-0.0961752276665428\t0.850550157756422\tLTR\tCopia\n+TC1-2_DM\t0.0595720035046981\t0.853067020850901\tDNA\tTcMar-Tc1\n+LSU-rRNA_Cel\t-0.0920825033469626\t0.861339242540399\trRNA\trRNA\n+Gypsy7_I-int\t0.0826410967604942\t0.861339242540399\tLTR\tGypsy\n+Gypsy5_LTR\t-0.0710979993807782\t0.905487490634908\tLTR\tGypsy\n+Gypsy4_I-int\t-0.0516274039610208\t0.905487490634908\tLTR\tGypsy\n+QUASIMODO_LTR\t-0.0643467685962964\t0.905487490634908\tLTR\tGypsy\n+Gypsy2-LTR_DM\t0.0652192786223383\t0.905487490634908\tLTR\tGypsy\n+DMLTR5\t0.0726894766496021\t0.905487490634908\tLTR\tGypsy\n+Stalker2_LTR\t-0.0523210063310555\t0.917308330496028\tLTR\tGypsy\n+ROVER-I_DM\t-0.0408110836395496\t0.922716163337709\tLTR\tGypsy\n+MDG1_I-int\t-0.0411424164719924\t0.937452811422426\tLTR\tGypsy\n+ALA_DM\t-0.11425480149938\t0.942917068804402\tUnknown\tUnknown\n+DNAREP1_DM\t0.039354033592028\t0.942917068804402\tRC\tHelitron\n+BARI_DM\t0.0372716083617889\t0.94985097135805\tDNA\tTcMar-Tc1\n+Gypsy2_I-int\t-0.0261599267227275\t0.95196577198178\tLTR\tGypsy\n+DMTOM1_LTR\t0.0349640382618226\t0.953829042791583\tLTR\tGypsy\n+IDEFIX_LTR\t0.0219961145617681\t0.957715681080671\tLTR\tGypsy\n+STALKER4_I-int\t0.0241275307638275\t0.957715681080671\tLTR\tGypsy\n+Gypsy3_LTR\t-0.0298883238815861\t0.957715681080671\tLTR\tGypsy\n+MINOS\t0.0330479824589295\t0.957715681080671\tDNA\tTcMar-Tc1\n+G2_DM\t0.0226097987515835\t0.962242582650612\tLINE\tJockey\n+Copia1-I_DM\t0.0124236833262227\t0.963045943681216\tLTR\tCopia\n+Invader4_LTR\t0.016495431966038\t0.963045943681216\tLTR\tGypsy\n+M4DM\t-0.0173659751457427\t0.963045943681216\tDNA\tCMC-Transib\n+BS\t0.00565217021634194\t0.997858051773989\tLINE\tJockey\n+FTZ_DM\t0\t1\tUnknown\tUnknown\n+FUSHI_DM\t0\t1\tUnknown\tUnknown\n+ROOA_LTR\t-0.0005425128837839\t1\tLTR\tPao\n'
b
diff -r 1435d142041b -r 54a3f3a195d6 test-data/tool_wrapper.sh
--- a/test-data/tool_wrapper.sh Tue May 23 18:37:22 2017 -0400
+++ b/test-data/tool_wrapper.sh Mon May 29 13:11:57 2017 -0400
b
@@ -1,5 +1,5 @@
-input_base=$1
-baseReference=$2
+input_base=Samp
+baseReference=chrM
 
 bowtie-build ${baseReference}.fa ${baseReference}