Repository 'crosscontamination_barcode_filter'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/crosscontamination_barcode_filter

Changeset 0:582b7bd4ae4c (2019-01-24)
Next changeset 1:253c9448f524 (2019-06-03)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit 6f73edc667e61fabdab8b24a7ff40942588fee5b
added:
crosscontamination_barcode_filter.xml
scripts/batch_plotting_functions.R
scripts/config_assertions.R
scripts/contamination_plot.R
scripts/crosscontamination_filter.R
scripts/reorder_matrix_headers.R
static/images/crosscontam_pretopost.png
test-data/celseq_barcodes.192.raw
test-data/out3.subtable
test-data/test.pdf
test-data/test.table
b
diff -r 000000000000 -r 582b7bd4ae4c crosscontamination_barcode_filter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/crosscontamination_barcode_filter.xml Thu Jan 24 09:52:58 2019 -0500
[
b'@@ -0,0 +1,247 @@\n+<tool id="crosscontamination_barcode_filter" name="Cross-contamination Barcode Filter" version="@VERSION@">\n+    <description>for use in plate-based barcoded analyses</description>\n+    <macros>\n+        <token name="@VERSION@">0.1</token>\n+        <macro name="assert_conts" >\n+            <assert_contents>\n+                <has_text text="/CreationDate" />\n+                <has_text text="/Producer" />\n+                <has_line line="startxref" />\n+                <has_line line="%%EOF" />\n+            </assert_contents>\n+        </macro>\n+        <macro name="sanitize_batch">\n+            <sanitizer invalid_char="">\n+                <valid initial="string.digits">\n+                    <add value=","/>\n+                </valid>\n+            </sanitizer>\n+        </macro>\n+        <macro name="sanitize_regex">\n+            <sanitizer invalid_char="">\n+                <valid initial="string.letters,string.digits">\n+                    <add value="!"/>\n+                    <add value="="/>\n+                    <add value="-"/>\n+                    <add value="."/>\n+                    <add value="*"/>\n+                    <add value="?"/>\n+                    <add value="+"/>\n+                    <add value="\\\\"/>\n+                    <add value="_"/>\n+                    <add value="&#91;"/> <!-- left square bracket, e.g subselecting from vec[1] -->\n+                    <add value="&#93;"/> <!-- right square bracket -->\n+                    <add value="&#40;"/> <!-- left parenthesis -->\n+                    <add value="&#41;"/> <!-- right parenthesis -->\n+                </valid>\n+            </sanitizer>\n+        </macro>        \n+    </macros>\n+    <requirements>\n+        <requirement type="package" version="2.2.1" >r-ggplot2</requirement>\n+    </requirements>\n+    <version_command><![CDATA[\n+        Rscript \'$__tool_directory__/scripts/crosscontamination_filter.R\' | head -1 | cut -d\' \' -f 2\n+    ]]></version_command>\n+    <command detect_errors="exit_code"><![CDATA[\n+        Rscript \'$__tool_directory__/scripts/crosscontamination_filter.R\' \'$crossconf\'\n+    ]]></command>\n+    <configfiles>\n+        <configfile name="crossconf"><![CDATA[\n+script.dir = \'$__tool_directory__/scripts\'\n+input_matrix <- read.table(\n+    \'$input_table\',\n+    stringsAsFactors = F,\n+    na.strings=c("NA", "-", "?", "."),\n+    header=TRUE,\n+    row.names=1\n+)\n+input_matrix[is.na(input_matrix)] <- 0\n+#if str($inbuilt_spec.select_use) == "mpi_sagar":\n+spec = list(\n+    barcodes = \'$input_barcodes\',\n+    format = list(\n+        "1-96"   = c(1,3,5,7),\n+        "97-192" = c(2,4,6,8)\n+    ),\n+    plates = list(\n+        "1" = c(1,2,3,4),\n+        "2" = c(5,6,7,8)\n+    )\n+)\n+#elif str($inbuilt_spec.select_use) == "custom":\n+spec = list(\n+    barcodes = \'$input_barcodes\',\n+    format = list(\n+    #for $i, $s in enumerate($inbuilt_spec.barcode_format)\n+        "${s.range_start}-${s.range_end}" = c( ${s.batches} ) \n+        #if $i < len(list($inbuilt_spec.barcode_format)) - 1\n+        ,\n+        #end if\n+    #end for\n+    ),\n+    plates = list(\n+    #for $i, $s in enumerate($inbuilt_spec.plate_format)\n+        "${s.plate}" = c( ${s.batches} )\n+        #if $i < len(list($inbuilt_spec.plate_format)) - 1\n+        ,\n+        #end if\n+    #end for\n+    )\n+)\n+#end if\n+regex.extract = \'$advanced.regex_extract\'\n+regex.display = \'$advanced.regex_display\'\n+out.pdf = \'$out_plots\'\n+out.table = \'$out_table\'\n+]]>\n+        </configfile>\n+    </configfiles>\n+    <inputs>\n+        <param name="input_table" type="data" format="tsv,tabular" label="Input Matrix" />\n+        <param name="input_barcodes" type="data" format="tsv,tabular,txt" label="Complete Barcodes" />\n+        <conditional name="inbuilt_spec" >\n+            <param name="select_use" type="select" label="Plate Protocol" >\n+                <option value="mpi_sagar">CelSeq2 Plate Protocol (Sagar)</option>\n+                <option value="custom">Custom</option>\n+            </param>\n+            <whe'..b'ditional name="inbuilt_spec" >\n+                <param name="select_use" value="mpi_sagar" />\n+            </conditional>\n+            <output name="out_plots" >\n+                <expand macro="assert_conts" />\n+            </output>\n+            <output name="out_table" value="test.table" />\n+        </test>\n+        <test><!-- Plate and Lane test -->\n+            <param name="input_table" value="out3.subtable" />\n+            <param name="input_barcodes" value="celseq_barcodes.192.raw" />\n+            <conditional name="inbuilt_spec" >\n+                <param name="select_use" value="custom" />\n+                <repeat name="barcode_format" >\n+                    <param name="range_start" value="1"/>\n+                    <param name="range_end" value="96" />\n+                    <param name="batches" value="1,3,5,7" />\n+                </repeat>\n+                <repeat name="barcode_format" >\n+                    <param name="range_start" value="97"/>\n+                    <param name="range_end" value="192" />\n+                    <param name="batches" value="2,4,6,8" />\n+                </repeat>\n+                <repeat name="plate_format" >\n+                    <param name="plate" value="1" />\n+                    <param name="batches" value="1,2,3,4" />\n+                </repeat>\n+                <repeat name="plate_format" >\n+                    <param name="plate" value="2" />\n+                    <param name="batches" value="5,6,7,8" />\n+                </repeat>\n+            </conditional>\n+            <output name="out_plots" >\n+                <expand macro="assert_conts" />\n+            </output>\n+            <output name="out_table" value="test.table" />\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+Cross-contamination Filter Plot\n+###################################\n+\n+For a set of barcodes and an experimental setup that uses a subset of these barcodes for each batch, this tool compares each batch against the full range of barcodes in order to determine whether any cross-contamination between batches has occured.\n+\n+If a significant number of transcripts are shown in a batch for cell barcodes that were not designed for that batch, then this tool will show that. In the below plot, we can see that there is no significant cross-contamination taking place (pre-filter), and so we can filter out the false barcodes (post-filter).\n+\n+.. image:: $PATH_TO_IMAGES/crosscontam_pretopost.png\n+   :scale: 50 %\n+\n+\n+Example\n+~~~~~~~~\n+\n+Consider the following experimental setup, with a list of 100 possible barcodes, used over 3 sequencing plates, with each plate containing 4 unique batches, and each plate using a specific subset of the 100 barcodes.\n+\n+:: \n+\n+ Barcodes\n+    \n+  1 - 10 | AAA AAC AAT AAG ACA AGA ATA CAC GAG TAT\n+ 11 - 20 | CCC CCA CCT CCG CTC CGC TCT GCG TCT CGT\n+    .\n+    .\n+ 91 -100 | TTT TAT TCT TGT TTA TTC TTG TCC TGG TAA\n+\n+\n+\n+ Plate 1  +-------+-------+-------+-------+\n+          |  B1   |  B2   |  B3   |  B4   |\n+          +-------+-------+-------+-------+\n+             1-50   51-100  51-100   1-50  \n+\n+ Plate 2  +-------+-------+-------+-------+\n+          |  B5   |  B6   |  B7   |  B8   |\n+          +-------+-------+-------+-------+\n+             1-40   41-80    1-40   41-80\n+\n+ Plate 3  +-------+-------+-------+-------+\n+          |  B9   |  B10  |  B11  |  B12  |\n+          +-------+-------+-------+-------+\n+             1-40   41-80    1-40   41-80 \n+\n+\n+****\n+\n+The above plate and barcoding setup can be more textually represented by specifying barcode ranges and plate numbers, with each denoting which batch numbers they describe as outlined below:\n+\n+::\n+\n+ *Barcodes \xe2\x86\x92 Batches*\n+  1- 50: B1, B4\n+ 51-100: B2, B3\n+  1- 40: B5, B7, B9 , B11\n+ 41- 80: B6, B8, B10, B12\n+\n+ *Plates \xe2\x86\x92 Batches*\n+   1: B1, B2 , B3 , B4\n+   2: B5, B6 , B7 , B8\n+   3: B9, B10, B11, B12\n+\n+]]></help>\n+    <citations>\n+        <citation type="doi">10.1007/978-1-4939-7768-0_15</citation>\n+    </citations>\n+</tool>\n+\n'
b
diff -r 000000000000 -r 582b7bd4ae4c scripts/batch_plotting_functions.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/batch_plotting_functions.R Thu Jan 24 09:52:58 2019 -0500
[
@@ -0,0 +1,95 @@
+#!/usr/bin/env R
+
+##
+## Batch Plotting Functions
+##
+calculatePlateIndexes <- function(plate.form, full.barcode.size, num.plates){
+    #' Determine plotting positions of plate lines (under false model)
+    #'
+    #' Assumes all plates are the same size and span the full range of the
+    #' barcodes.
+    #'
+    #' @param plate.form list of vectors mapping plates to batches
+    #' @param full.barcode.size size of the complete barcodes list
+    #' @param num.plates, number of plates
+    #' @return sequence of discrete plate-boundary positions
+    batches.per.plate <- length(plate.form[[1]])
+    plate.size <- batches.per.plate * full.barcode.size
+
+    return(seq(0, num.plates * plate.size, plate.size))
+}
+
+
+calculateFullBarcodeIndexes <- function(num.batches, full.barcode.size){
+    #' Determines plotting position of batch lines (under false model)
+    #'
+    #' For N batches and a list of actually detected barcodes in the header,
+    #' generates where the blue lines should be
+    #'
+    #' @param num.batches number of batches in experiment
+    #' @param full.barcode.size size of all barcodes
+    #' @return sequence of discrete batch positions
+    bsize <- full.barcode.size
+    return(seq(0, num.batches * bsize, bsize))
+}
+
+
+
+calculateRealBarcodeIndexes <- function(barcode.form, full.barcode.size){
+    #' Determine plotting position of the true batch lines (under true model)
+    #'
+    #' For N batches a list of actually USED barcodes as given by the spec,
+    #' generates where the green lines should be
+    #'
+    #' @param barcode.form list of barcode formats and the batches they map to
+    #' @param full.barcode.size size of all barcodes
+    #' @return list of useful vectors: true batch positions using whole matrix,
+    #'         true batch positions using the filtered matrix which contains
+    #'         only real barcodes, and a list of batches and their respective
+    #'         sizes.
+    batches <- c()
+    res <- sapply(names(barcode.form), function(key){
+        rng <- as.integer(unlist(strsplit(key, '-')))
+        size.of.range <- length(seq(rng[1],rng[2]))
+        sub.batches <- barcode.form[[key]]  # 1,3,5,7 or 2,4,6,8
+        res2 <- lapply(sub.batches, function(bat){
+            batches[[bat]] <<- size.of.range
+        })
+    })
+    ## We now have sizes per batch, in order of batch
+    ## Need to place these at positions after each full barcode size
+    positions <- c()
+    real_positions <- c(0)
+
+    res <- sapply(1:length(batches), function(b){
+        batch.start <- (b-1) * full.barcode.size
+        batch.size <- batches[[b]]
+        positions <<- c(positions, batch.start + batch.size)
+        real_positions[[b+1]] <<- batch.size + real_positions[[b]]
+    })
+
+    real_positions <- real_positions[2:length(real_positions)]   
+    
+    return(list(unfiltered=positions,filtered=real_positions, batches=batches))
+}
+
+calculateRealPlateIndexes <- function(plate.form, batches, num.plates){
+    #' Determine true plate positions given variable batch sizes
+    #'
+    #' Given the true size of each batch, and which batches exist in which plates
+    #' calculate the size of each plate
+    #'
+    #' @param plate.form list of vectors mapping plates to batches
+    #' @param batches list of batches and their respective sizes
+    #' @param num.plates number of plates
+    #' @return sequence of plate positions
+    batches.per.plate <- length(plate.form[[1]])
+
+    size.of.plate <- 0
+    res <- sapply(plate.form[[1]], function(batch){
+        batch.size <- batches[[batch]]
+        size.of.plate <<- size.of.plate + batch.size
+    })
+
+    return(seq(0, num.plates * size.of.plate, size.of.plate))
+}
b
diff -r 000000000000 -r 582b7bd4ae4c scripts/config_assertions.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/config_assertions.R Thu Jan 24 09:52:58 2019 -0500
[
@@ -0,0 +1,91 @@
+#!/usr/bin/env R
+##
+## Sanity Check Methods
+##
+checkNoMissingRanges <- function(format, barcodes){
+    #' Checks coverage of barcodes across all specified ranges
+    #'
+    #' e.g. 1-50, 60-80 -- barcodes 51-59 are not specified. This
+    #'       is not a fatal error, but a warning is issued.
+    #'
+    #' @param format barcode range and the batches they map to
+    #' @param barcodes full list of barcodes
+    #' @return list of specified barcodes
+    ranges <- c()
+    res <- sapply(names(format), function(key){
+        rng <- as.integer(unlist(strsplit(key, '-')))
+        ranges <<- c(ranges, seq(rng[1],rng[2]))
+    })
+    full.range <- seq(min(ranges),max(ranges))
+    not.in <- !(full.range %in% ranges)
+    if (sum(not.in) != 0) {
+        message("Warning: values[",
+                paste(full.range[not.in], collapse=","),
+                "] -> barcodes[",
+                paste(barcodes[not.in], collapse=","),
+                "] are not used."
+                )
+    } else {
+        message("All barcodes accounted for.")
+    }
+    return(barcodes[!not.in])
+}
+
+checkNoMissingBarcodes <- function(headers, barcodes){
+    #' Extracts barcodes in the headers and compares them with those in barcodes
+    #'
+    #' @param headers matrix headers, must be of P1_B2_ACTG format
+    #' @param barcodes full list of barcodes
+    barcs.in.matrix <- unique(sort(sub(".*_.*_([ACTG]+)", "\\1", headers)))
+    not.in <- !(barcs.in.matrix %in% barcodes)
+    if (sum(not.in) > 0){
+        message("Warning: Barcodes in matrix not in barcodes file\n", barcs.in.matrix[not.in])
+    } else {
+        message("All input matrix barcodes accounted for.")
+    }
+}
+
+assertNoMissingBatches <- function(format, plates){
+    #' Checks the barcode and plate spec match
+    #' 
+    #' These must specify the same batches.
+    #'
+    #' @param format barcode format, ranges to batches
+    #' @param plates plate format, plates to batches
+    #' @return number of batches
+    batches.form = c()
+    batches.plate = c()
+    for (form in format){batches.form = c(batches.form, form)}
+    for (plate in plates){batches.plate = c(batches.plate, plate)}
+
+    if (length(batches.plate) != length(batches.form)){
+        stop("Error: The number of batches specified in the plate do not match those given in the barcode format")
+    }
+
+    range.form <- seq(min(batches.form), max(batches.form))
+    range.plate <- seq(min(batches.plate), max(batches.plate))
+
+    if (sum(!(range.form %in% batches.form)) > 0){
+        stop("Error: Missing batch in barcode format")
+    }
+    if (sum(!(range.plate %in% batches.plate)) > 0){
+        stop("Error: Missing batch in plate format")
+    }
+    return(length(range.form))
+}
+
+sanityCheck <- function(spec, matrix.headers){
+    #' Checks specification and matrix headers for consistency
+    #'
+    #' @param spec experiment specification
+    #' @param matrix.headers column names of input matrix
+    #' @return list of barcodes, as well as number of barcodes, plates, and batches
+    barcodes <- scan(spec$barcodes, what="", sep="\n")
+    num.barcodes <- length(barcodes)
+    num.plates <- length(names(spec$plate))
+    used.barcodes <- checkNoMissingRanges(spec$format, barcodes)
+    num.batches <- assertNoMissingBatches(spec$format, spec$plates)
+    checkNoMissingBarcodes(matrix.headers, used.barcodes)
+
+    return(list(barc=barcodes, barc.n=num.barcodes, plates.n=num.plates, batch.n=num.batches))
+}
b
diff -r 000000000000 -r 582b7bd4ae4c scripts/contamination_plot.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/contamination_plot.R Thu Jan 24 09:52:58 2019 -0500
[
@@ -0,0 +1,140 @@
+#!/usr/bin/env R
+
+require(ggplot2)
+
+log10histoPlot <- function(columncounts, title=""){
+    #' Log10 histogram plot
+    #'
+    #' @param columncounts colSums(input_matrix)
+    #' @param title Title of plot
+    #' @return ggplot grob
+    dfer <- data.frame(colcounts=log10(columncounts))
+    p1 <- ggplot(dfer, aes(x=colcounts)) +
+        geom_histogram(binwidth = 0.05, color="black",fill="white") +
+        theme(plot.title = element_text(hjust = 0.5)) +
+        labs(title=title, y="Frequency", x="Library Size (Log10)")
+
+    return(p1)
+}
+
+contaminationPlot <- function(columncounts, title = "",
+                              indexes.plates, indexes.fullbc, indexes.truebc,
+                              filtered=FALSE)
+{
+    #' Plots true and false barcodes
+    #'
+    #'
+    #' @param columncounts colSums(input_matrix)
+    #' @param title plot title
+    #' @param indexes.plates plate line positions
+    #' @param indexes.fullbc full batch line positions
+    #' @param indexes.truebc true batch line positions
+    #' @param filtered specifies whether the positions have been adjusted for true barcodes
+    #' @return ggplot grob
+    dfer <- data.frame(colcounts=columncounts)
+
+    ## Remove indexes where plates and full barcodes mix
+    indexes.fullbc = indexes.fullbc[!(indexes.fullbc %in% indexes.plates)]
+    
+    nit <- length(indexes.truebc)
+    nif <- length(indexes.fullbc)
+    nip <- length(indexes.plates)
+    mval <- max(dfer)
+  
+    ## Aesthetics
+    min.height <- -200
+    tf.spacing.left <- 12
+    tf.spacing.right <- 12   
+    tf.height <- mval - 10000
+    bn.height <- mval - 2000
+    plate.color <- "grey"
+    plate.text.color <- "black"
+    plate.text.alpha <- 0.5
+    plate.text.size <- 3
+    plate.height <- 2* mval / 5
+    plate.spacing <- if (filtered) 12 else 24
+    plate.height.text <- plate.height - 3000
+    
+    truebcs <- data.frame(
+        x=indexes.truebc, y=rep(min.height,nit),
+        xend=indexes.truebc, yend=rep(mval,nit)
+    )
+    fullbcs <- data.frame(
+        x=indexes.fullbc, y=rep(min.height,nif),
+        xend=indexes.fullbc, yend=rep(mval,nif)
+    )
+    platess <- data.frame(
+        x=indexes.plates, y=rep(min.height,nip),
+        xend=indexes.plates, yend=rep(plate.height,nip)
+    )
+    connecting.bar <- data.frame(
+        xsta = min(indexes.plates), ysta = min.height,
+        xfin = max(indexes.plates), yfin = min.height
+    )
+      
+    p1 <- ggplot()
+
+    if (!filtered){
+        p1 <- p1 +
+            geom_segment(data=truebcs, aes(x=x,y=y,xend=xend,yend=yend - 4000), col='grey', lty=2, size=0.2) + 
+            geom_segment(data=fullbcs, aes(x=x,y=y,xend=xend,yend=yend), col='blue', lty=1, size=0.4, alpha=0.2)
+    }
+    else {
+        p1 <- p1 +
+            geom_segment(data=truebcs, aes(x=x,y=y,xend=xend,yend=yend), col='blue', lty=1, size=0.4, alpha=0.2)
+    }
+    
+    p1 <- p1 +
+        geom_segment(data=platess, aes(x=x,y=y,xend=xend,yend=yend), col=plate.color, lty=1, size=1) +
+        geom_segment(data=connecting.bar, aes(x=xsta,y=ysta,xend=xfin,yend=yfin), col=plate.color, lty=1, size=1) +
+        geom_point(
+            data=dfer, aes(x=1:length(rownames(dfer)), y=dfer$colcounts),
+            pch = 16, cex = 1) +
+        theme(plot.title = element_text(hjust = 0.5),
+              axis.ticks.x=element_blank(), axis.ticks.y=element_blank(),
+              axis.text.x=element_blank()) +
+        labs(title=paste("Contamination Plot\n", title), y="Library Size", x="Barcode Index") +
+        scale_y_continuous(breaks=seq(0,mval + 10000, 10000)) +
+        scale_x_continuous(breaks=NULL)
+
+    ## Add true/false and batch labels
+    res <- lapply(indexes.truebc, function(xval){
+        batch <- match(xval, indexes.truebc)
+
+        if (!filtered){
+            p1 <<- p1 +
+                annotate("text", x=xval - tf.spacing.left, size=2, y=tf.height, angle=90, label=" true positives", color = "dark blue", alpha = 0.5) +
+                annotate("text", x=xval + tf.spacing.right, size=2, y=tf.height, angle=-90,label="false positives", color = "black", alpha = 0.5) +
+                annotate("text", x=xval , size=4, y=bn.height, angle=-90, label=paste("B",batch,sep=""), color = "grey", alpha = 0.8)
+        }
+        else {
+            p1 <<- p1 +
+                annotate("text", x=xval - 48, size=4, y=bn.height, angle=-90, label=paste("B",batch,sep=""), color = "grey", alpha = 0.8)
+        }
+    })
+
+    ## Add Plate labels
+    res <- lapply(indexes.plates, function(p){
+        plate.num <- match(p, indexes.plates)
+        c.label <- paste("Plate", plate.num, sep="")
+        b.label <- paste("Plate", plate.num - 1, sep="")
+
+        # Right label
+        if (plate.num <  length(indexes.plates)){
+            p1 <<- p1 +
+                annotate("text", x=p + plate.spacing, size=plate.text.size, y=plate.height.text, angle=-90,
+                         label=c.label, color = plate.text.color, alpha=plate.text.alpha)
+        }
+
+        # Left label
+        if (plate.num > 1){
+            p1 <<- p1 +
+                annotate("text", x=p - plate.spacing, size=plate.text.size, y=plate.height.text, angle=90,
+                         label=b.label, color = plate.text.color,  alpha=plate.text.alpha)
+        }
+    })
+        
+    return(p1)
+}
+
+
b
diff -r 000000000000 -r 582b7bd4ae4c scripts/crosscontamination_filter.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/crosscontamination_filter.R Thu Jan 24 09:52:58 2019 -0500
[
@@ -0,0 +1,73 @@
+#!/usr/bin/env R
+VERSION = "0.1"
+
+args = commandArgs(trailingOnly = T)
+
+if (length(args) != 1){
+     message(paste("VERSION:", VERSION))
+     stop("Please provide the config file")
+}
+
+source(args[1])
+source(file.path(script.dir, "config_assertions.R"))
+source(file.path(script.dir, "batch_plotting_functions.R"))
+source(file.path(script.dir, "reorder_matrix_headers.R"))
+source(file.path(script.dir, "contamination_plot.R"))
+
+
+colnames(input_matrix) <- convertHeadersToSensible(
+    regex.extract,
+    regex.display,
+    colnames(input_matrix)
+)
+
+sc <- sanityCheck(spec, colnames(input_matrix))
+
+barcodes <- sc$barc
+num.barcodes <- sc$barc.n
+num.batches <- sc$batch.n
+num.plates <- sc$plates.n
+
+real.indexes = calculateRealBarcodeIndexes(spec$format, num.barcodes)
+plate.indexes = calculatePlateIndexes(spec$plates, num.barcodes, num.plates)
+plate.indexes.real = calculateRealPlateIndexes(spec$plates, real.indexes$batches, num.plates)
+
+ordering <- reorderMatrixHeaders(barcodes, colnames(input_matrix), spec$format)
+
+## Unfiltered
+nmatrix <- input_matrix[,ordering$all]
+
+plot.prefilter <- contaminationPlot(
+    colSums(nmatrix),
+    title="Pre-Filter",
+    plate.indexes,
+    calculateFullBarcodeIndexes(num.batches, num.barcodes),
+    real.indexes$unfiltered
+)
+
+
+## Filtered
+cmatrix <- input_matrix[,ordering$correct]
+
+plot.postfilter <- contaminationPlot(
+    colSums(cmatrix),
+    title="Post-Filter",
+    plate.indexes.real,
+    calculateFullBarcodeIndexes(num.batches, num.barcodes),
+    real.indexes$filtered,
+    filtered = T
+)
+
+plot.histogram <- log10histoPlot(
+    colSums(cmatrix),
+    "Histogram of Post-Filter Matrix Counts"
+)
+
+pdf(out.pdf)
+plot.prefilter
+plot.postfilter
+plot.histogram
+dev.off()
+
+
+write.table(cmatrix, file=out.table, quote=FALSE, na="0", sep="\t")
b
diff -r 000000000000 -r 582b7bd4ae4c scripts/reorder_matrix_headers.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/reorder_matrix_headers.R Thu Jan 24 09:52:58 2019 -0500
[
@@ -0,0 +1,61 @@
+#!/usr/bin/env R
+##
+## Reorder matrix
+##
+convertHeadersToSensible <- function(regex.from, regex.to, col.names){
+    #' Strips headers of filenames and sets plate, batch, and barcodes
+    #'
+    #' @param regex.from format to extract plate, batch, and barcodes
+    #' @param regex.to format to set
+    #' @param matrix input matrix to rename headers
+    #' @return updated names
+    return(sub(regex.from, regex.to, col.names))
+}
+
+reorderMatrixHeaders <- function(barcodes, headers, barcode.format){
+    #' Reorder headers to segment wanted and unwanted barcodes on opposite sides
+    #' of each batch
+    #'
+    #' @param barcodes list of full barcodes
+    #' @param headers input matrix headers
+    #' @param barcode.format batch list specifying valid barcodes for each batch
+    #' @return list of all barcodes sorted bilaterally by batch, and true barcodes
+    form <- barcode.format
+    batch.ordering <- list()
+    batch.ordering.correct <- list()
+
+    res <- sapply(names(form), function(key){
+        rng <- as.integer(unlist(strsplit(key, '-')))
+        ranges <- seq(rng[1],rng[2])
+
+        barc.wanted <- barcodes[ranges]
+        barc.unwant <- barcodes[!(barcodes %in% barc.wanted)]
+
+        sub.batches <- form[[key]]  # 1,3,5,7 or 2,4,6,8
+        res2 <- lapply(sub.batches, function(bat){
+            batch_bar <- headers[grepl(paste("P\\d_B",bat,"_([ACGT]+)", sep=""), headers)]
+            barcs.in.batch <- sub("P._B._([ACGT]+)", "\\1", batch_bar)
+            b.wanted <- batch_bar[barcs.in.batch %in% barc.wanted]
+            b.unwant <- batch_bar[barcs.in.batch %in% barc.unwant]
+
+            if (sum(b.wanted %in% b.unwant) > 0){
+                stop("Barcode given twice!", b.wanted[b.wanted %in% b.unwant])
+            }
+            barc_order <- c(b.wanted, b.unwant)
+            batch.ordering[[bat]] <<- barc_order
+            batch.ordering.correct[[bat]] <<- b.wanted
+        })
+    })
+
+    barcode.ordering <- c()
+    barcode.ordering.correct <- c()
+
+    res <- lapply(1:length(batch.ordering), function(bat){
+        barc_order <- batch.ordering[[bat]]
+        barc_order.correct <- batch.ordering.correct[[bat]]
+        barcode.ordering <<- c(barcode.ordering, barc_order)
+        barcode.ordering.correct <<- c(barcode.ordering.correct, barc_order.correct)
+    })
+
+    return(list(all=barcode.ordering,correct=barcode.ordering.correct))
+}
b
diff -r 000000000000 -r 582b7bd4ae4c static/images/crosscontam_pretopost.png
b
Binary file static/images/crosscontam_pretopost.png has changed
b
diff -r 000000000000 -r 582b7bd4ae4c test-data/celseq_barcodes.192.raw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/celseq_barcodes.192.raw Thu Jan 24 09:52:58 2019 -0500
b
@@ -0,0 +1,192 @@
+AGTGTC
+ACCATG
+GAGTGA
+CACTCA
+CATGTC
+ACAGGA
+GTACCA
+ACAGAC
+ACGTTG
+ACCAAC
+GTGAAG
+CACTTC
+GAGTTG
+GAAGAC
+TGCAGA
+CTAGGA
+ACCAGA
+GTGACA
+CTAGAC
+AGCTCA
+ACTCGA
+CTGTTG
+CATGCA
+CAGAAG
+GTCTCA
+GTGATC
+TGTCTG
+GACAGA
+ACTCTG
+TGCAAC
+GAAGGA
+GTTGAG
+AGACCA
+TGGTTG
+GATCTG
+CTAGTG
+CTCAGA
+CTTCGA
+AGCTAG
+GATCGA
+GTACTC
+TGTCGA
+ACGTGA
+AGGATC
+CTCATG
+AGACTC
+GACAAC
+AGGACA
+ACTCAC
+GTACAG
+AGGAAG
+AGTGCA
+TGGTGA
+AGACAG
+AGCTTC
+TGAGGA
+ACGTAC
+TCACAG
+ACAGTG
+CGATTG
+TCTTGC
+GGTAAC
+TCATCC
+TAGGAC
+TTCACC
+AACGAG
+GTGGAA
+ATGTCG
+ATCACG
+GAATCC
+CGATGA
+GAATGG
+GCAACA
+TTCTCG
+ATTGCG
+TAGTGG
+AAGCCA
+CTATCC
+TCCGAA
+TGAACC
+TGTACG
+GACGAA
+CCACAA
+CACCAA
+CTAAGC
+GATACG
+ACAAGC
+TGAAGG
+TAACGG
+AACCTC
+CGTCTA
+CCATAG
+TTCCAG
+GGACAA
+ACTTCG
+TTGTGC
+GGTATG
+CTGCTA
+ATGAGG
+GGTAGA
+ATCGTG
+ATGGAC
+AGTAGG
+CCATCA
+AGTACC
+CCAGTA
+CGTTAC
+GAGGTA
+TTGGCA
+CAATGC
+GCGTTA
+TAGCTC
+TTCGAC
+GAGCAA
+TTGCTG
+TTGCGA
+GCAGAA
+CCTACA
+GCATGA
+AACTGG
+CGGTTA
+CTAACG
+CACGTA
+TTGGAG
+GCAATG
+TATCCG
+ATGCAG
+GCTCTA
+ATTCGC
+TGTTGG
+ATGACC
+CCGTAA
+TGATCG
+TACAGG
+AGAACG
+GCCATA
+ACGGTA
+AAGCAC
+CGAACA
+ATGCTC
+GGCTTA
+ATCGCA
+GGATCA
+CCAATC
+AAGGTG
+ATCTCC
+GTATCG
+TGTTCC
+GGTGTA
+TACTCC
+ATCAGC
+AAGTGC
+AGGCTA
+CCTATG
+TATCGC
+CGCTAA
+GTAACC
+ACATGG
+CCGATA
+GGATAC
+GTTAGG
+TACGCA
+AGATGC
+TTGCAC
+CAGGAA
+TCTAGG
+GCTTCA
+TTGGTC
+TACCGA
+CATTGG
+CTCGAA
+GCTTAC
+ATACGG
+GTATGC
+TGTAGC
+CGTAAG
+TTACGC
+TACCAC
+CGCATA
+GCTAAG
+ATCCAC
+CCTTGA
+AGCGAA
+GGTTAG
+GATTGC
+CGTTCA
+ATCCGA
+GCATTC
+CCTGAA
+GGAATC
+TCAACG
+AACACC
b
diff -r 000000000000 -r 582b7bd4ae4c test-data/out3.subtable
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out3.subtable Thu Jan 24 09:52:58 2019 -0500
b
b'@@ -0,0 +1,176 @@\n+WD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AACACC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AACCTC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AACGAG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AACTGG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AAGCAC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AAGCCA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AAGGTG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AAGTGC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACAAGC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACAGAC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACAGGA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACAGTG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACATGG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACCAAC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACCAGA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACCATG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACGGTA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACGTAC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACGTGA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACGTTG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACTCAC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACTCGA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACTCTG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ACTTCG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGAACG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGACAG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGACCA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGACTC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGATGC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGCGAA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGCTAG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGCTCA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGCTTC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGGAAG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGGACA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGGATC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGGCTA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGTACC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGTAGG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGTGCA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_AGTGTC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATACGG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATCACG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATCAGC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATCCAC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATCCGA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATCGCA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATCGTG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATCTCC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATGACC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATGAGG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATGCAG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATGCTC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATGGAC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATGTCG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATTCGC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_ATTGCG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CAATGC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CACCAA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CACGTA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CACTCA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CACTTC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CAGAAG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CAGGAA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CATGCA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CATGTC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CATTGG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCAATC\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCACAA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCAGTA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCATAG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCATCA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCGATA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCGTAA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCTACA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCTATG\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCTGAA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CCTTGA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CGAACA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CGATGA\tWD_DP_120218_P1_1.fastq_WD_DP_120218_P1_1_CGATTG\tWD_DP_12021'..b'\t0\t0\t0\t7\t0\t0\t0\t0\t0\t1\t1\t0\t5\t1\t0\t0\t0\t3\t0\t1\t0\t2\t7\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t2\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t5\t0\t0\t2\t1\t0\t0\t5\t0\t0\t0\t0\t0\t1\t8\t6\t0\t0\t0\t0\t3\t2\t0\t0\t0\t8\t0\t0\t0\t1\t0\t0\t0\t1\t0\t1\t2\t0\t0\t0\t10\t0\t0\t0\t0\t0\t3\t0\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t1\t9\t0\t0\t0\t0\t1\t0\t0\t4\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t1\t0\t2\t0\t1\t8\t1\t5\t0\t0\t0\t4\t2\t0\t4\t4\t5\t0\t2\t0\t0\t0\t0\t0\t1\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t2\t0\t0\t1\t0\t1\t0\t0\t1\t0\t0\t0\t1\t6\t0\t0\t0\t2\t4\t4\t0\t0\t0\t1\t0\t0\t0\t0\t2\t0\t5\t0\t0\t1\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t3\t1\t0\t2\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t3\t1\t0\t0\t2\t1\t4\t0\t2\t2\t0\t0\t1\t0\t0\t1\t0\t0\t0\t5\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t4\t3\t0\t1\t0\t2\t2\t0\t0\t0\t4\t1\t0\t0\t1\t1\t1\t7\t0\t0\t0\t0\t0\t0\t0\t0\t8\t0\t0\t0\t0\t0\t0\t0\t0\t0\t4\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t1\t0\t1\t0\t2\t1\t0\t0\t0\t1\t6\t0\t1\t3\t0\t0\t0\t0\t0\t0\t1\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t3\t0\t7\t0\t3\t0\t0\t0\t0\t2\t0\t0\t0\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t1\t0\t0\t0\n+ENSDARG00000104782\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t3\t0\t0\t2\t3\t2\t0\t1\t0\t4\t4\t0\t1\t0\t0\t12\t0\t5\t0\t0\t1\t1\t2\t0\t1\t2\t0\t0\t0\t2\t2\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t2\t0\t6\t0\t0\t0\t5\t0\t6\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t3\t1\t0\t0\t0\t0\t1\t0\t0\t0\t1\t1\t2\t0\t0\t1\t0\t0\t0\t1\t1\t0\t1\t7\t0\t0\t1\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t4\t1\t0\t0\t5\t0\t0\t0\t0\t0\t0\t4\t13\t0\t0\t0\t0\t8\t0\t1\t0\t1\t0\t0\t0\t0\t1\t0\t0\t4\t0\t0\t0\t0\t1\t1\t4\t0\t3\t1\t2\t1\t0\t1\t2\t1\t1\t0\t0\t1\t2\t1\t0\t0\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t6\t0\t0\t0\t1\t0\t0\t3\t0\t0\t0\t0\t0\t0\t0\t0\t5\t0\t0\t0\t2\t0\t0\t0\t0\t2\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t4\t3\t0\t0\t3\t0\t0\t2\t0\t1\t0\t2\t0\t0\t5\t2\t5\t4\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t13\t1\t0\t0\t0\t0\t0\t0\t13\t1\t0\t0\t1\t0\t0\t1\t0\t0\t10\t0\t0\t1\t9\t0\t0\t0\t0\t0\t0\t0\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t6\t6\t0\t0\t1\t0\t0\t2\t0\t0\t0\t5\t3\t0\t0\t7\t0\t0\t0\t0\t0\t3\t1\t1\t1\t0\t0\t0\t9\t0\t0\t0\t0\t3\t0\t0\t0\t0\t0\t0\t5\t0\t0\t0\t0\t3\t0\t4\t0\t0\t0\t1\t0\t3\t0\t1\t2\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t6\t1\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t1\t0\t0\t1\t6\t4\t0\t0\t2\t2\t0\t0\t0\t1\t0\t0\t2\t0\t0\t8\t7\t1\t0\t0\t0\t7\t0\t0\t0\t0\t0\t0\t0\t3\t3\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t1\t5\t0\t0\t2\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t3\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t4\t0\t0\t6\t0\t0\t0\t0\t5\t1\t3\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t4\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t9\t2\t0\t0\t1\t0\t0\t0\t0\t0\t2\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t0\t1\t0\t0\t0\t21\t1\t0\t2\t1\t0\t2\t5\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t3\t1\t0\t0\t5\t0\t0\t0\t0\t0\t0\t0\t4\t2\t0\t0\t1\t0\t0\t0\t0\t4\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t6\t1\t1\t0\t3\t0\t0\t0\t0\t3\t3\t1\t0\t3\t1\t0\t0\t3\t0\t1\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t6\t0\t0\t0\t1\t5\t3\t7\t0\t3\t3\t3\t0\t0\t0\t0\t0\t2\t0\t1\t9\t2\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t2\t0\t0\t0\t0\t5\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t4\t2\t7\t0\t1\t0\t0\t5\t0\t0\t5\t1\t4\t0\t10\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\t4\t1\t1\t0\t8\t0\t0\t0\t11\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t0\t1\t0\t0\t6\t0\t0\t4\t3\t6\t1\t2\t0\t0\t4\t0\t0\t0\t0\t0\t0\t1\t3\t4\t9\t0\t4\t1\t5\t0\t0\t5\t1\t0\t0\t0\t0\t0\t1\t8\t0\t0\t0\t0\t13\t0\t2\t6\t0\t0\t0\t0\t1\t24\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t7\t0\t9\t0\t1\t1\t2\t5\t0\t0\t0\t1\t0\t1\t3\t0\t1\t0\t1\t1\t0\t0\t0\t0\t0\t0\t8\t0\t0\t0\t0\t3\t0\t0\t0\t0\t7\t0\t8\t2\t12\t4\t0\t0\t0\t4\t7\t6\t0\t20\t0\t1\t3\t1\t0\t4\t15\t2\t0\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t0\t1\t4\t0\t0\t0\t0\t0\t0\t21\t4\t0\t0\t0\t0\t4\t4\t10\t0\t10\t1\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t2\t3\t8\t0\t1\t3\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t5\t0\t0\t0\t0\t0\t0\t0\t0\t0\t10\t0\t0\t10\t0\t0\t6\t0\t0\t0\t0\t0\t0\t0\t0\t15\t1\t0\t0\t1\t0\t0\t0\t0\t4\t0\t0\t0\t0\t7\t0\t0\t0\t0\t0\t0\t0\t2\t1\t0\t5\t0\t0\t0\t0\t4\t2\t0\t0\t1\t1\t18\t1\t2\t0\t6\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t1\t8\t3\t8\t1\t0\t0\t3\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t7\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t5\t0\t0\t0\t0\t0\t0\t0\t1\t0\t10\t0\t1\t0\t1\t8\t12\t3\t0\t0\t12\t0\t0\t0\t5\t1\t1\t0\t0\t0\t0\t1\t1\t3\t0\t0\t0\t0\t0\t3\t1\t0\t3\t4\t0\t1\t0\t6\t0\t0\t0\t3\t1\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t4\t0\t0\t3\t0\t0\t0\t0\t0\t0\t0\t6\t1\t0\t4\t0\t0\t0\t4\t1\t1\t0\t0\t0\t0\t8\t4\t2\t0\t0\t0\t2\t4\t0\t0\t1\t15\t3\t0\t1\t1\t0\t2\t3\t4\t0\t0\t2\t0\t1\t0\t8\t0\t0\t0\t0\t0\t5\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t8\t0\t0\t0\t5\t2\t0\t0\t0\t0\t0\t0\t4\t0\t3\t1\t0\t0\t1\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t1\t0\t1\t4\t1\t0\t4\t0\t3\t0\t0\t0\t1\t5\t0\t5\t3\t0\t0\t4\t0\t0\t0\t3\t2\t0\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t8\t8\t0\t0\t0\t0\t0\t1\t0\t0\t0\t2\t1\t0\t0\t1\t0\t0\t0\t4\t12\t1\t0\t0\t1\t15\t8\t0\t1\t0\t7\t0\t0\t0\t1\t1\t1\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t0\t0\t0\t3\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t3\t0\t0\t0\t4\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t3\t0\t0\t10\t1\t0\t0\t2\t5\t1\t0\t0\t1\t0\t0\t0\t2\t0\t3\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t2\t1\t0\t0\t0\t4\t1\t0\t3\t0\t0\t2\t0\t0\t0\t2\t2\t0\t0\t0\t0\t4\t3\t0\t0\t0\t1\t0\t0\t0\t0\t8\t0\t1\t0\t0\t0\t0\t0\t0\t0\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t1\t0\t0\t4\t2\t0\t0\t2\t1\t7\t0\t2\t15\t0\t0\t0\t0\t0\t0\t2\t1\t0\t0\t1\t0\t0\t0\t0\t0\t2\t4\t2\t12\t0\t5\t0\t0\t0\t1\t0\t0\t0\t0\t5\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t2\t2\t0\t0\n'
b
diff -r 000000000000 -r 582b7bd4ae4c test-data/test.pdf
b
Binary file test-data/test.pdf has changed
b
diff -r 000000000000 -r 582b7bd4ae4c test-data/test.table
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.table Thu Jan 24 09:52:58 2019 -0500
b
b'@@ -0,0 +1,176 @@\n+P1_B1_AACCTC\tP1_B1_AACGAG\tP1_B1_AAGCCA\tP1_B1_ACAAGC\tP1_B1_ACAGAC\tP1_B1_ACAGGA\tP1_B1_ACAGTG\tP1_B1_ACCAAC\tP1_B1_ACCAGA\tP1_B1_ACCATG\tP1_B1_ACGTAC\tP1_B1_ACGTGA\tP1_B1_ACGTTG\tP1_B1_ACTCAC\tP1_B1_ACTCGA\tP1_B1_ACTCTG\tP1_B1_ACTTCG\tP1_B1_AGACAG\tP1_B1_AGACCA\tP1_B1_AGACTC\tP1_B1_AGCTAG\tP1_B1_AGCTCA\tP1_B1_AGCTTC\tP1_B1_AGGAAG\tP1_B1_AGGACA\tP1_B1_AGGATC\tP1_B1_AGTGCA\tP1_B1_AGTGTC\tP1_B1_ATCACG\tP1_B1_ATGTCG\tP1_B1_ATTGCG\tP1_B1_CACCAA\tP1_B1_CACTCA\tP1_B1_CACTTC\tP1_B1_CAGAAG\tP1_B1_CATGCA\tP1_B1_CATGTC\tP1_B1_CCACAA\tP1_B1_CCATAG\tP1_B1_CGATGA\tP1_B1_CGATTG\tP1_B1_CGTCTA\tP1_B1_CTAAGC\tP1_B1_CTAGAC\tP1_B1_CTAGGA\tP1_B1_CTAGTG\tP1_B1_CTATCC\tP1_B1_CTCAGA\tP1_B1_CTCATG\tP1_B1_CTGTTG\tP1_B1_CTTCGA\tP1_B1_GAAGAC\tP1_B1_GAAGGA\tP1_B1_GAATCC\tP1_B1_GAATGG\tP1_B1_GACAAC\tP1_B1_GACAGA\tP1_B1_GACGAA\tP1_B1_GAGTGA\tP1_B1_GAGTTG\tP1_B1_GATACG\tP1_B1_GATCGA\tP1_B1_GATCTG\tP1_B1_GCAACA\tP1_B1_GGACAA\tP1_B1_GGTAAC\tP1_B1_GTACAG\tP1_B1_GTACCA\tP1_B1_GTACTC\tP1_B1_GTCTCA\tP1_B1_GTGAAG\tP1_B1_GTGACA\tP1_B1_GTGATC\tP1_B1_GTGGAA\tP1_B1_GTTGAG\tP1_B1_TAACGG\tP1_B1_TAGGAC\tP1_B1_TAGTGG\tP1_B1_TCACAG\tP1_B1_TCATCC\tP1_B1_TCCGAA\tP1_B1_TCTTGC\tP1_B1_TGAACC\tP1_B1_TGAAGG\tP1_B1_TGAGGA\tP1_B1_TGCAAC\tP1_B1_TGCAGA\tP1_B1_TGGTGA\tP1_B1_TGGTTG\tP1_B1_TGTACG\tP1_B1_TGTCGA\tP1_B1_TGTCTG\tP1_B1_TTCACC\tP1_B1_TTCCAG\tP1_B1_TTCTCG\tP1_B1_TTGTGC\tP1_B2_AACACC\tP1_B2_AACTGG\tP1_B2_AAGCAC\tP1_B2_AAGGTG\tP1_B2_AAGTGC\tP1_B2_ACATGG\tP1_B2_ACGGTA\tP1_B2_AGAACG\tP1_B2_AGATGC\tP1_B2_AGCGAA\tP1_B2_AGGCTA\tP1_B2_AGTACC\tP1_B2_AGTAGG\tP1_B2_ATACGG\tP1_B2_ATCAGC\tP1_B2_ATCCAC\tP1_B2_ATCCGA\tP1_B2_ATCGCA\tP1_B2_ATCGTG\tP1_B2_ATCTCC\tP1_B2_ATGACC\tP1_B2_ATGAGG\tP1_B2_ATGCAG\tP1_B2_ATGCTC\tP1_B2_ATGGAC\tP1_B2_ATTCGC\tP1_B2_CAATGC\tP1_B2_CACGTA\tP1_B2_CAGGAA\tP1_B2_CATTGG\tP1_B2_CCAATC\tP1_B2_CCAGTA\tP1_B2_CCATCA\tP1_B2_CCGATA\tP1_B2_CCGTAA\tP1_B2_CCTACA\tP1_B2_CCTATG\tP1_B2_CCTGAA\tP1_B2_CCTTGA\tP1_B2_CGAACA\tP1_B2_CGCATA\tP1_B2_CGCTAA\tP1_B2_CGGTTA\tP1_B2_CGTAAG\tP1_B2_CGTTAC\tP1_B2_CGTTCA\tP1_B2_CTAACG\tP1_B2_CTCGAA\tP1_B2_CTGCTA\tP1_B2_GAGCAA\tP1_B2_GAGGTA\tP1_B2_GATTGC\tP1_B2_GCAATG\tP1_B2_GCAGAA\tP1_B2_GCATGA\tP1_B2_GCATTC\tP1_B2_GCCATA\tP1_B2_GCGTTA\tP1_B2_GCTAAG\tP1_B2_GCTCTA\tP1_B2_GCTTAC\tP1_B2_GCTTCA\tP1_B2_GGAATC\tP1_B2_GGATAC\tP1_B2_GGATCA\tP1_B2_GGCTTA\tP1_B2_GGTAGA\tP1_B2_GGTATG\tP1_B2_GGTGTA\tP1_B2_GGTTAG\tP1_B2_GTAACC\tP1_B2_GTATCG\tP1_B2_GTATGC\tP1_B2_GTTAGG\tP1_B2_TACAGG\tP1_B2_TACCAC\tP1_B2_TACCGA\tP1_B2_TACGCA\tP1_B2_TACTCC\tP1_B2_TAGCTC\tP1_B2_TATCCG\tP1_B2_TATCGC\tP1_B2_TCAACG\tP1_B2_TCTAGG\tP1_B2_TGATCG\tP1_B2_TGTAGC\tP1_B2_TGTTCC\tP1_B2_TGTTGG\tP1_B2_TTACGC\tP1_B2_TTCGAC\tP1_B2_TTGCAC\tP1_B2_TTGCGA\tP1_B2_TTGCTG\tP1_B2_TTGGAG\tP1_B2_TTGGCA\tP1_B2_TTGGTC\tP1_B3_AACCTC\tP1_B3_AACGAG\tP1_B3_AAGCCA\tP1_B3_ACAAGC\tP1_B3_ACAGAC\tP1_B3_ACAGGA\tP1_B3_ACAGTG\tP1_B3_ACCAAC\tP1_B3_ACCAGA\tP1_B3_ACCATG\tP1_B3_ACGTAC\tP1_B3_ACGTGA\tP1_B3_ACGTTG\tP1_B3_ACTCAC\tP1_B3_ACTCGA\tP1_B3_ACTCTG\tP1_B3_ACTTCG\tP1_B3_AGACAG\tP1_B3_AGACCA\tP1_B3_AGACTC\tP1_B3_AGCTAG\tP1_B3_AGCTCA\tP1_B3_AGCTTC\tP1_B3_AGGAAG\tP1_B3_AGGACA\tP1_B3_AGGATC\tP1_B3_AGTGCA\tP1_B3_AGTGTC\tP1_B3_ATCACG\tP1_B3_ATGTCG\tP1_B3_ATTGCG\tP1_B3_CACCAA\tP1_B3_CACTCA\tP1_B3_CACTTC\tP1_B3_CAGAAG\tP1_B3_CATGCA\tP1_B3_CATGTC\tP1_B3_CCACAA\tP1_B3_CCATAG\tP1_B3_CGATGA\tP1_B3_CGATTG\tP1_B3_CGTCTA\tP1_B3_CTAAGC\tP1_B3_CTAGAC\tP1_B3_CTAGGA\tP1_B3_CTAGTG\tP1_B3_CTATCC\tP1_B3_CTCAGA\tP1_B3_CTCATG\tP1_B3_CTGTTG\tP1_B3_CTTCGA\tP1_B3_GAAGAC\tP1_B3_GAAGGA\tP1_B3_GAATCC\tP1_B3_GAATGG\tP1_B3_GACAAC\tP1_B3_GACAGA\tP1_B3_GACGAA\tP1_B3_GAGTGA\tP1_B3_GAGTTG\tP1_B3_GATACG\tP1_B3_GATCGA\tP1_B3_GATCTG\tP1_B3_GCAACA\tP1_B3_GGACAA\tP1_B3_GGTAAC\tP1_B3_GTACAG\tP1_B3_GTACCA\tP1_B3_GTACTC\tP1_B3_GTCTCA\tP1_B3_GTGAAG\tP1_B3_GTGACA\tP1_B3_GTGATC\tP1_B3_GTGGAA\tP1_B3_GTTGAG\tP1_B3_TAACGG\tP1_B3_TAGGAC\tP1_B3_TAGTGG\tP1_B3_TCACAG\tP1_B3_TCATCC\tP1_B3_TCCGAA\tP1_B3_TCTTGC\tP1_B3_TGAACC\tP1_B3_TGAAGG\tP1_B3_TGAGGA\tP1_B3_TGCAAC\tP1_B3_TGCAGA\tP1_B3_TGGTGA\tP1_B3_TGGTTG\tP1_B3_TGTACG\tP1_B3_TGTCGA\tP1_B3_TGTCTG\tP1_B3_TTCACC\tP1_B3_TTCCAG\tP1_B3_TTCTCG\tP1_B3_TTGTGC\tP1_B4_AACACC\tP1_B4_AACTGG\tP1_B4_AAGCAC\tP1_B4_AAGGTG\tP1_B4_AAGTGC\tP1_B4_ACATGG\tP1_B4_ACGGTA\tP1_B4_AGAACG\tP1_B4_AGATGC\tP1_B4_AGCGAA\tP1_B4_AGGCTA\tP1_B4_AGTACC\tP1_B4_AGTAGG\tP1_B4_ATACGG\tP1_B4_ATCAGC\tP1_B4_ATCCAC\tP1_B4_ATCCGA\tP1_B4_ATCGCA\tP1'..b'\t0\t0\t0\t0\t0\t1\t7\t0\t0\t0\t3\t0\t1\t0\t55\t0\t9\t0\t0\t0\t0\t3\t5\t5\t4\t0\t3\t0\t1\t0\t0\t0\t0\t0\t0\t14\t0\t0\t47\t0\t0\t8\t1\t0\t26\t0\t6\t24\t0\t6\t6\t0\t0\t4\t19\t0\t1\t4\t0\t0\t9\t0\t11\t1\t19\t15\t0\t10\t39\t0\t0\t4\t0\t14\t1\t1\t0\t3\t3\t0\t0\t8\t1\t0\t9\t12\t0\t1\t15\t0\t30\t1\t2\t1\t5\t21\t2\t7\t0\t6\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t10\t0\t1\t0\t23\t0\t9\t2\t1\t4\t10\t0\t8\t1\t3\t0\t0\t0\t2\t0\t0\t0\t0\t2\t1\t1\t0\t6\t4\t0\t0\t0\t12\t1\t0\t2\t7\t34\t2\t0\t11\t4\t1\t0\t0\t1\t1\t4\t3\t0\t2\t7\t0\t0\t0\t0\t0\t12\t0\t0\t0\t0\t22\t14\t10\t0\t6\t2\t0\t9\t1\t0\t1\t0\t0\t7\t5\t2\t0\t0\t1\t0\t0\t0\t2\t14\t4\t7\t6\t2\t0\t0\t1\t0\t9\t0\t2\t9\t0\t6\t1\t0\t7\t10\t2\t0\t1\t0\t6\t2\t2\t16\t0\t0\t0\t4\t4\t1\t0\t1\t2\t1\t0\t19\t0\t0\t5\t1\t1\t3\t2\t1\t1\t0\t1\t0\t11\t0\t0\t0\t0\t0\t8\t2\t4\t10\t0\t2\t0\t9\t8\t0\t4\t4\t8\t0\t6\t0\t0\t10\t0\t10\t0\t1\t0\t0\t0\t0\t0\t0\t3\t0\t1\t3\t1\t0\t19\t8\t7\t7\t0\t0\t5\t0\t0\t1\t1\t0\t16\t0\t0\t0\t0\t12\t1\t0\t1\t0\t3\t0\t1\t1\t1\t0\t0\t1\t1\t0\t3\t0\t2\t5\t3\t0\t0\t1\t0\t0\t2\t0\t12\t0\t3\t5\t3\t1\t0\t5\t1\t2\t3\t0\t0\t3\t1\t4\t2\t0\t0\t4\t2\t11\t3\t13\t9\t0\t0\t4\t0\t4\t0\t0\t0\t3\t1\t2\t9\t0\t0\t3\t11\t1\t6\t0\t0\t2\t7\t0\t0\t3\t6\t0\t9\t0\t0\t0\t0\t3\t10\t0\t2\t0\t1\t0\t0\t0\t4\t0\t4\t0\t0\n+ENSDARG00000104458\t0\t0\t0\t0\t0\t0\t0\t1\t3\t0\t0\t0\t10\t0\t0\t3\t0\t1\t1\t2\t0\t0\t1\t0\t0\t3\t0\t2\t0\t1\t4\t0\t8\t0\t3\t0\t0\t0\t0\t7\t0\t3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t7\t0\t0\t0\t4\t3\t0\t1\t0\t0\t2\t0\t1\t0\t6\t8\t0\t0\t2\t2\t0\t3\t0\t0\t2\t0\t0\t2\t2\t0\t5\t0\t0\t0\t0\t0\t0\t1\t2\t0\t0\t1\t6\t0\t0\t0\t2\t0\t0\t0\t5\t0\t1\t1\t2\t0\t0\t4\t0\t1\t0\t1\t1\t0\t2\t0\t0\t1\t1\t0\t1\t0\t3\t0\t0\t0\t0\t1\t5\t0\t0\t1\t0\t1\t3\t0\t4\t3\t0\t5\t3\t0\t4\t5\t2\t0\t0\t4\t2\t0\t0\t9\t0\t1\t0\t0\t1\t0\t11\t0\t0\t2\t4\t0\t3\t0\t4\t7\t0\t9\t0\t3\t0\t0\t1\t15\t3\t0\t0\t6\t2\t0\t8\t0\t0\t0\t0\t0\t0\t0\t1\t5\t0\t0\t4\t2\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t8\t1\t0\t5\t0\t0\t0\t0\t2\t4\t0\t0\t5\t0\t7\t8\t0\t4\t0\t4\t0\t0\t0\t5\t0\t2\t3\t0\t0\t10\t0\t0\t9\t4\t2\t0\t0\t0\t0\t0\t0\t1\t0\t4\t3\t0\t0\t2\t0\t11\t2\t1\t0\t0\t0\t2\t1\t1\t0\t0\t5\t2\t0\t1\t1\t0\t0\t4\t2\t0\t0\t8\t0\t10\t0\t0\t0\t0\t0\t2\t0\t0\t12\t0\t3\t0\t0\t2\t10\t1\t0\t2\t0\t0\t9\t6\t2\t3\t2\t1\t1\t0\t2\t0\t0\t2\t0\t0\t4\t0\t0\t5\t0\t0\t0\t2\t1\t5\t0\t0\t2\t1\t0\t12\t0\t1\t5\t4\t0\t0\t0\t0\t5\t1\t0\t0\t1\t0\t1\t7\t0\t4\t0\t2\t1\t0\t0\t0\t1\t2\t2\t0\t9\t0\t1\t0\t1\t0\t1\t0\t0\t0\t0\t3\t0\t2\t2\t5\t0\t1\t0\t12\t1\t8\t0\t0\t0\t0\t6\t2\t3\t4\t0\t0\t0\t6\t0\t0\t0\t0\t0\t0\t9\t0\t0\t18\t0\t0\t1\t2\t0\t9\t3\t4\t10\t1\t4\t1\t0\t0\t0\t3\t0\t0\t9\t0\t1\t1\t0\t6\t2\t7\t5\t0\t3\t7\t0\t0\t7\t0\t5\t0\t5\t0\t0\t0\t0\t0\t3\t0\t0\t4\t1\t0\t2\t3\t0\t12\t0\t3\t3\t1\t7\t1\t3\t5\t3\t3\t0\t0\t0\t1\t1\t0\t2\t0\t0\t2\t1\t0\t0\t6\t0\t6\t0\t0\t1\t2\t0\t3\t1\t2\t0\t0\t0\t0\t0\t0\t0\t0\t2\t0\t2\t0\t0\t0\t0\t0\t0\t5\t0\t0\t2\t0\t5\t0\t0\t4\t2\t0\t1\t0\t4\t0\t2\t0\t1\t1\t7\t0\t0\t0\t0\t0\t14\t0\t2\t0\t2\t7\t5\t3\t0\t7\t1\t0\t3\t0\t0\t1\t0\t0\t7\t1\t5\t1\t0\t0\t0\t3\t0\t2\t7\t0\t1\t2\t0\t0\t0\t0\t0\t5\t0\t0\t2\t1\t5\t0\t0\t8\t6\t0\t0\t0\t3\t2\t0\t0\t8\t0\t0\t0\t1\t0\t0\t1\t2\t0\t0\t0\t10\t0\t0\t3\t2\t0\t0\t2\t9\t0\t0\t0\t1\t4\t0\t0\t1\t1\t0\t2\t0\t1\t8\t1\t5\t0\t4\t2\t0\t4\t4\t5\t0\t2\t0\t0\t0\t1\t2\t0\t0\t0\t0\t1\t0\t0\t0\t2\t0\t0\t0\t1\t1\t6\t2\t4\t4\t0\t0\t1\t0\t0\t0\t2\t0\t5\t0\t1\t0\t1\t1\t2\t0\t3\t1\t2\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t2\t1\t4\t0\t2\t2\t0\t0\t1\t0\t1\t0\t5\t1\t0\t0\t0\t4\t3\t0\t1\t0\t2\t2\t0\t4\t1\t0\t0\t1\t1\t7\t0\t8\t4\t0\t0\t1\t0\t1\t0\t1\t0\t1\t0\t2\t1\t0\t0\t1\t6\t1\t3\t0\t0\t0\t1\t3\t0\t0\t3\t0\t7\t0\t3\t0\t0\t2\t3\t0\t0\t0\t0\t0\t0\t0\t2\t0\t1\t0\t0\n+ENSDARG00000104782\t0\t0\t0\t0\t0\t3\t0\t2\t3\t2\t1\t0\t4\t4\t0\t1\t0\t12\t0\t5\t1\t1\t2\t0\t1\t2\t2\t2\t0\t2\t6\t0\t5\t0\t6\t0\t0\t0\t1\t3\t1\t1\t1\t1\t2\t0\t0\t1\t0\t1\t1\t0\t1\t7\t0\t0\t1\t3\t0\t0\t0\t0\t0\t0\t4\t5\t0\t4\t13\t0\t0\t8\t0\t1\t1\t0\t4\t0\t1\t1\t4\t3\t1\t2\t1\t1\t2\t1\t1\t0\t1\t2\t3\t0\t0\t6\t0\t1\t0\t3\t0\t0\t0\t2\t0\t1\t4\t3\t0\t0\t2\t0\t1\t0\t2\t0\t0\t5\t2\t5\t4\t0\t1\t0\t0\t13\t1\t0\t0\t0\t0\t13\t1\t0\t0\t1\t1\t0\t0\t10\t0\t1\t9\t3\t0\t6\t6\t2\t0\t0\t5\t3\t0\t0\t7\t0\t0\t0\t0\t3\t1\t1\t0\t0\t0\t9\t0\t3\t0\t5\t0\t0\t3\t0\t4\t0\t1\t0\t3\t2\t0\t0\t6\t1\t0\t1\t0\t1\t0\t0\t0\t0\t0\t1\t1\t0\t1\t6\t4\t0\t2\t2\t0\t0\t1\t0\t0\t2\t0\t8\t7\t1\t0\t7\t0\t0\t0\t0\t3\t3\t1\t0\t1\t0\t1\t5\t0\t2\t0\t1\t0\t1\t3\t1\t0\t0\t4\t0\t0\t6\t0\t0\t5\t1\t3\t0\t0\t0\t0\t1\t0\t0\t0\t4\t3\t0\t0\t1\t0\t9\t2\t1\t0\t0\t0\t0\t2\t3\t0\t0\t2\t0\t0\t0\t1\t0\t0\t21\t1\t0\t2\t1\t2\t5\t0\t0\t0\t0\t0\t3\t1\t0\t5\t0\t2\t4\t0\t0\t6\t1\t1\t0\t0\t0\t3\t3\t1\t0\t3\t1\t0\t0\t3\t1\t0\t1\t0\t0\t6\t0\t1\t5\t3\t7\t0\t3\t3\t3\t0\t0\t0\t2\t1\t9\t2\t1\t0\t5\t1\t0\t0\t0\t4\t2\t7\t0\t1\t0\t0\t5\t0\t5\t1\t4\t10\t0\t0\t1\t0\t0\t0\t0\t4\t1\t1\t0\t8\t0\t11\t0\t0\t0\t0\t2\t0\t0\t1\t6\t0\t4\t3\t6\t1\t2\t4\t0\t0\t1\t3\t4\t9\t4\t1\t5\t0\t5\t1\t0\t0\t0\t0\t1\t8\t0\t0\t13\t0\t2\t6\t0\t1\t24\t1\t7\t9\t1\t2\t5\t0\t0\t1\t3\t1\t0\t8\t3\t0\t7\t0\t8\t2\t12\t4\t0\t4\t7\t6\t0\t20\t0\t1\t3\t4\t15\t2\t0\t1\t1\t2\t4\t0\t21\t4\t0\t0\t4\t4\t10\t10\t1\t0\t1\t2\t3\t8\t1\t3\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t5\t0\t0\t10\t10\t0\t6\t0\t0\t1\t4\t0\t7\t0\t2\t1\t0\t0\t0\t4\t2\t0\t0\t1\t1\t18\t1\t2\t6\t0\t0\t0\t2\t0\t0\t8\t3\t8\t1\t0\t0\t3\t1\t0\t0\t1\t0\t0\t0\t0\t1\t7\t0\t5\t0\t1\t0\t10\t0\t1\t0\t1\t8\t12\t3\t0\t12\t0\t0\t5\t1\t1\t0\t0\t1\t3\t3\t3\t4\t0\t1\t0\t6\t0\t3\t1\t1\t1\t4\t3\t0\t0\t0\t0\t6\t1\t0\t4\t0\t4\t1\t0\t8\t4\t2\t0\t0\t2\t4\t0\t1\t15\t3\t0\t1\t1\t2\t3\t4\t2\t0\t1\t0\t8\t0\t0\t5\t0\t0\t8\t0\t5\t2\t0\t0\t0\t4\t3\t1\t1\t0\t1\t4\t1\t0\t4\t0\t3\t0\t1\t5\t0\t5\t3\t0\t0\t4\t0\t3\t2\t0\t2\t0\t1\t0\t8\t8\t0\t0\t0\t1\t0\t0\t2\t1\t4\t12\t1\t15\t8\t1\t0\t7\t0\t0\t1\t1\t1\t3\t0\t0\t0\t0\t2\t3\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t1\t3\t0\t1\t0\t2\t5\t1\t0\t0\t1\t0\t0\t0\t2\t3\t0\t0\t1\t2\t1\t0\t4\t1\t0\t3\t0\t0\t2\t0\t2\t2\t0\t0\t0\t4\t3\t0\t8\t2\t0\t0\t0\t1\t1\t0\t0\t1\t0\t0\t4\t2\t0\t2\t1\t7\t2\t15\t0\t0\t0\t2\t1\t0\t2\t4\t2\t12\t0\t5\t0\t1\t0\t5\t0\t1\t0\t0\t0\t1\t0\t1\t0\t2\t2\t0\n'