Repository 'small_read_size_histograms'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/small_read_size_histograms

Changeset 0:234b83159ea8 (2017-07-11)
Next changeset 1:dce695815b0f (2017-07-11)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
added:
size_histogram.py
size_histogram.r
size_histogram.xml
smRtools.py
smRtools.pyc
static/images/size_histogram.png
test-data/Size_distribution.pdf
test-data/Size_distribution_dataframe.tab
test-data/sample1.srbowtie_out
test-data/sample2.srbowtie_out
test-data/sample3.srbowtie_out
test-data/transposons.fasta
tool-data/bowtie_indices.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r 234b83159ea8 size_histogram.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/size_histogram.py Tue Jul 11 11:44:36 2017 -0400
[
@@ -0,0 +1,119 @@
+#!/usr/bin/python
+# python parser module for size distributions, guided by GFF3
+
+import argparse
+import subprocess
+from collections import OrderedDict
+from smRtools import extractsubinstance
+from smRtools import HandleSmRNAwindows
+
+
+def Parser():
+  the_parser = argparse.ArgumentParser()
+  the_parser.add_argument('--output_size_distribution', action="store", type=str, help="size distribution dataframe")
+  the_parser.add_argument('--reference_fasta', action="store", type=str, help="output file")
+  the_parser.add_argument('--reference_bowtie_index',action='store', help="paths to indexed or fasta references")
+  the_parser.add_argument('--input',nargs='+', help="paths to multiple input files")
+  the_parser.add_argument('--ext',nargs='+', help="input file type")
+  the_parser.add_argument('--label',nargs='+', help="labels of multiple input files")
+  the_parser.add_argument('--normalization_factor',nargs='+', type=float, help="Normalization factor for input file")
+  the_parser.add_argument('--gff', type=str, help="GFF containing regions of interest")
+  the_parser.add_argument('--minquery', type=int, help="Minimum readsize")
+  the_parser.add_argument('--maxquery', type=int, help="Maximum readsize")
+  the_parser.add_argument('--global_size', action="store_true", help="if specified, size distribution is calculated for the sum of all items")
+  the_parser.add_argument('--collapse', action="store_true", help="if specified, forward and reverse reads are collapsed")
+  args = the_parser.parse_args()
+  return args
+
+
+args=Parser()
+if args.reference_fasta:
+  genomeRefFormat = "fastaSource"
+  genomeRefFile = args.reference_fasta  
+if args.reference_bowtie_index:
+  genomeRefFormat = "bowtieIndex"
+  genomeRefFile = args.reference_bowtie_index  
+size_distribution_file=args.output_size_distribution
+minquery=args.minquery
+maxquery=args.maxquery
+filePath=args.input
+fileExt=args.ext
+fileLabel=args.label
+normalization_factor=args.normalization_factor
+global_size=args.global_size
+collapse=args.collapse
+
+if collapse:
+  pol=["both"]
+else:
+  pol=["F", "R"]
+
+MasterListOfGenomes = OrderedDict()
+
+def process_samples(filePath):
+  for i, filePath in enumerate(filePath):
+    norm=normalization_factor[i]
+    print fileLabel[i]
+    MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\
+                        biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm)
+  return MasterListOfGenomes
+
+
+def write_size_distribution_dataframe(readDict, size_distribution_file, pol=["both"] ):
+  '''refactored on 7-9-2014'''
+  with open(size_distribution_file, 'w') as size_distrib:
+    print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample"
+    for sample in readDict.keys():
+      if args.gff:
+        dict=readDict[sample]
+      else:
+        dict=readDict[sample].instanceDict
+      for gene in dict.keys():
+        histogram = dict[gene].size_histogram()
+        for polarity in pol:
+          for size, count in histogram[polarity].iteritems():
+            print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, size, count, sample)
+
+
+def write_size_distribution_dataframe_global(readDict, size_distribution_file, pol=["both"]):
+  with open(size_distribution_file, 'w') as size_distrib:
+    print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample"
+    for sample in readDict.keys():
+      histogram = readDict[sample].size_histogram()
+      gene="sample"
+      for polarity in pol:
+        for size, count in histogram[polarity].iteritems():
+          print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, size, count, sample)
+
+
+def gff_item_subinstances(readDict, gff3):
+  GFFinstanceDict=OrderedDict()
+  with open(gff3) as gff:
+    for line in gff:
+      if line[0] == "#": continue
+      gff_fields = line[:-1].split("\t")
+      chrom = gff_fields[0]
+      gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name
+      item_upstream_coordinate = int(gff_fields[3])
+      item_downstream_coordinate = int(gff_fields[4])
+      item_polarity = gff_fields[6]
+      for sample in readDict.keys():
+        if sample not in GFFinstanceDict:
+          GFFinstanceDict[sample]={}
+        subinstance=extractsubinstance(item_upstream_coordinate, item_downstream_coordinate, readDict[sample].instanceDict[chrom])
+        if item_polarity == '-':
+          subinstance.readDict={key*-1:value for key, value in subinstance.readDict.iteritems()}
+#          subinstance.readDict.setdefault(key, [])
+        subinstance.gene=gff_name
+        GFFinstanceDict[sample][gff_name]=subinstance
+  return GFFinstanceDict
+
+MasterListOfGenomes=process_samples(filePath)
+
+if args.gff:
+  MasterListOfGenomes=gff_item_subinstances(MasterListOfGenomes, args.gff)
+
+if global_size:
+  write_size_distribution_dataframe_global(MasterListOfGenomes, size_distribution_file, pol)
+else:
+  write_size_distribution_dataframe(MasterListOfGenomes, size_distribution_file, pol)
\ No newline at end of file
b
diff -r 000000000000 -r 234b83159ea8 size_histogram.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/size_histogram.r Tue Jul 11 11:44:36 2017 -0400
[
@@ -0,0 +1,110 @@
+## Setup R error handling to go to stderr
+options( show.error.messages=F,
+         error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+library(RColorBrewer)
+library(lattice)
+library(latticeExtra)
+library(grid)
+library(gridExtra)
+library(optparse)
+
+# Parse arguments
+option_list <- list(
+    make_option(c("-g", "--global"), type="character", help="Whether distribution is plotted globally or by chromosome"),
+    make_option(c("-s", "--size_distribution_tab"), type="character", help="Path to file with tabular size distribution"),
+    make_option("--size_distribution_pdf", type="character", help="Path to file with size distribution plot"),
+    make_option("--title", type="character", help="Title for readmaps and size distribution"),
+    make_option("--ylabel", type="character", help="ylabel for readmaps and size distribution"),
+    make_option("--yrange", type="integer", help="Y-axis range"),
+    make_option("--rows_per_page", type="integer", help="rows_per_page")
+    )
+
+parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
+args = parse_args(parser)
+
+##cheetahtemplate data frame implementation
+size=read.delim(args$size_distribution_tab, header=T, row.names=NULL)
+n_samples = length(unique (size$sample))
+n_genes = length (unique (levels(size$gene)))
+
+if (args$yrange != 0) {
+   # This is used for specifying the y-axis limits
+   ylim=c(-args$yrange, args$yrange)
+} else { ylim="" }
+
+par.settings.size=list(layout.heights=list(top.padding=1, bottom.padding=1),
+                       strip.background = list(col = c("lightblue", "lightgreen"))
+                       )
+
+smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);} # use if one want y axis in the middle of the plot
+
+plot_size_distribution = function(df, ...) {
+   bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
+                horizontal=FALSE,
+            group=polarity,
+            stack=TRUE,
+                col=c('red', 'blue'),
+                cex=0.75,
+                scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.5, alternating=T), x=list(cex=.6 ) ),
+                xlab = "readsize in nucleotides",
+                ylab = args$ylabel,
+                main = args$title,
+                par.strip.text = list(cex=0.75),
+                as.table=TRUE,
+                newpage = T,
+                ...)
+
+    combineLimits(update(useOuterStrips(bc,
+                                        strip.left = strip.custom(par.strip.text = list(cex=0.5))
+                                        ),
+                  layout=c(n_samples,args$rows_per_page)),
+                  margin.x=F, margin.y=1)
+    }
+
+# per_gene_size=lapply(genes, function(x) subset(size, gene==x)) # no object in this script
+
+if (args$global == "no") {
+width = 8.2677*n_samples/4
+} else { width = 8.2677 }
+
+options(warn=-1)
+pdf(file=args$size_distribution_pdf, paper="special", height=11.69, width=width)
+
+if (ylim == "" && args$global=="no") {
+    plot_size_distribution(size, par.settings=par.settings.size)
+   }
+if (ylim != "" && args$global=="no") { plot_size_distribution(size, par.settings=par.settings.size, ylim=ylim)
+   }
+if (ylim == "" && args$global=="yes") {  bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)),
+        data = size, origin = 0,
+        horizontal=FALSE,
+        group=polarity,
+        stack=TRUE,
+        col=c('red', 'blue'),
+        scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=1),
+        xlab = "readsize in nucleotides",
+        ylab = args$ylabel,
+        main = args$title, as.table=TRUE, newpage = T,
+        aspect=0.5,
+        strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue")
+        )
+   bc
+   }
+if (ylim != "" && args$global=="yes") {  bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)),
+        data = size, origin = 0,
+        horizontal=FALSE,
+        group=polarity,
+        stack=TRUE,
+        col=c('red', 'blue'),
+        scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=1),
+        xlab = "readsize in nucleotides",
+        ylab = args$ylabel,
+        ylim = ylim,
+        main = args$title, as.table=TRUE, newpage = T,
+        aspect=0.5,
+        strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue")
+        )
+   bc
+   }
+
+devname=dev.off()
b
diff -r 000000000000 -r 234b83159ea8 size_histogram.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/size_histogram.xml Tue Jul 11 11:44:36 2017 -0400
[
b'@@ -0,0 +1,168 @@\n+<tool id="artbio_size_histogram" name="Generate read size histograms" version="1.0.0">\n+    <description>from alignment files</description>\n+    <requirements>\n+        <requirement type="package" version="1.2.0=py27_0">bowtie</requirement>\n+        <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>\n+        <requirement type="package" version="1.9.3">numpy</requirement>\n+        <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>\n+        <requirement type="package" version="0.6_28=r3.3.2_0">r-latticeextra</requirement>\n+        <requirement type="package" version="2.2.1=r3.3.2_0">r-gridextra</requirement>\n+    </requirements>\n+    <command detect_errors="exit_code"><![CDATA[\n+        python \'$__tool_directory__\'/size_histogram.py\n+        #if $refGenomeSource.genomeSource == "history":\n+            --reference_fasta  ## sys.argv[2]\n+            \'$refGenomeSource.ownFile\' ## index source\n+        #else:\n+            #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ \'bowtie_indexes\' ].get_fields() )[0][-1]\n+            --reference_bowtie_index\n+            \'$reference\'\n+        #end if\n+        --output_size_distribution\n+        \'$size_distribution_dataframe\'\n+        --minquery\n+        $minquery\n+        --maxquery\n+        $maxquery\n+        --input\n+        #for $i in $refGenomeSource.series\n+            \'$i.input\'\n+        #end for\n+        --ext\n+        #for $i in $refGenomeSource.series\n+            \'$i.input.ext\'\n+        #end for\n+        --label\n+        #for $i in $refGenomeSource.series\n+            "$i.input.element_identifier"\n+        #end for\n+        #if $gff:\n+            --gff \'$gff\'\n+        #end if\n+        #if $global.value == \'yes\':\n+            --global_size\n+        #end if\n+        #if $collapsestrands.value == \'yes\':\n+            --collapse\n+        #end if\n+        --normalization_factor\n+        #for $i in $refGenomeSource.series\n+            $i.norm\n+        #end for\n+        &&\n+        Rscript \'$__tool_directory__\'/size_histogram.r\n+            --global \'$global\'\n+            --size_distribution_tab \'$size_distribution_dataframe\'\n+            --size_distribution_pdf \'$size_PDF\'\n+            --title \'$title\'\n+            --ylabel \'$ylabel\'\n+            --yrange \'$yrange\'\n+            --rows_per_page \'$rows_per_page\'\n+    ]]></command>\n+    <inputs>\n+        <conditional name="refGenomeSource">\n+            <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">\n+                <option value="indexed">Use a built-in index</option>\n+                <option value="history">Use one from the history</option>\n+            </param>\n+            <when value="indexed">\n+                <repeat name="series" title="Add alignment files">\n+                    <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam">\n+                        <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>\n+                    </param>\n+                    <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>\n+                </repeat>\n+            </when>\n+            <when value="history">\n+                <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />\n+                <repeat name="series" title="Add alignment files">\n+                    <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/>\n+                    <param nam'..b'y" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->\n+        <param name="global" type="select" label="Generate size distribution for each item, or generate a global alignment">\n+            <option value="no">for each item</option>\n+            <option value="yes">global</option>\n+        </param>\n+        <param name="collapsestrands" type="select" label="Whether + and - reads should be collapsed or not">\n+            <option value="no">Do not collapse</option>\n+            <option value="yes">Collapse + and - reads</option>\n+        </param>\n+        <param name="minquery" type="integer" size="3" value="18" label="Min size of reads to plot" help="\'15\' = 15 nucleotides"/>\n+        <param name="maxquery" type="integer" size="3" value="28" label="Max size of reads to plot" help="\'30\' = 30 nucleotides"/>\n+        <param name="title" type="text" size="15" value="Size distribution" label="Main Titles"/>\n+        <param name="xlabel" type="text" size="15" value="Size in nucleotides" label="x axis label"/>\n+        <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>\n+        <param name="yrange" type="integer" size="3" value="0" label="y axis range for size distributions. 0 means auto-scaling."/>\n+        <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">\n+            <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>\n+        </param>\n+    </inputs>\n+\n+    <outputs>\n+        <data format="tabular" name="size_distribution_dataframe" label="Size_distribution_dataframe.tab"/>\n+        <data format="pdf" name="size_PDF" label="Size_distribution.pdf"/>\n+    </outputs>\n+\n+<help>\n+\n+**What it does**\n+\n+Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a histogram of read sizes,\n+where by default for each "chromosome" a histogram of read sizes is drawn.\n+Reads that map in sense are on the top (red), reads that map antisense are on the bottom (blue).\n+\n+\n+.. class:: warningmark\n+\n+\'\'\'TIP\'\'\' The input data can be produced using the sRbowtie tool.\n+\n+----\n+\n+\'\'\'Example\'\'\'\n+\n+Query sequence::\n+For a SAM file as the following:\n+\n+  5\t16\t2L_79\t24393\t255\t17M\t*\t0\t0\tCCTTCATCTTTTTTTTT\tIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:17\tNM:i:0\n+\n+  11\t0\t2R_1\t12675\t255\t21M\t*\t0\t0\tAAAAAAAACGCGTCCTTGTGC\tIIIIIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:21\tNM:i:0\n+\n+  2\t16\t2L_5\t669\t255\t23M\t*\t0\t0\tTGTTGCTGCATTTCTTTTTTTTT\tIIIIIIIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:23\tNM:i:0\n+\n+produce a plot like this:\n+\n+----\n+\n+.. image:: static/images/size_histogram.png\n+    :height: 800\n+    :width: 500\n+\n+</help>\n+    <tests>\n+        <test>\n+            <param name="genomeSource" value="history" />\n+            <param name="ownFile" value="transposons.fasta" ftype="fasta" />\n+            <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/>\n+            <param name="series_0|norm" value="1" />\n+            <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/>\n+            <param name="series_1|norm" value="1" />\n+            <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/>\n+            <param name="series_2|norm" value="1" />\n+            <param name="global" value="no" />\n+            <param name="collapsestrands" value="no" />\n+            <param name="minquery" value="18"/>\n+            <param name="maxquery" value="30"/>\n+            <param name="title" value="Size distribution"/>\n+            <param name="xlabel" value="Size in nucleotides"/>\n+            <param name="ylabel" value="Number of reads"/>\n+            <param name="rows_per_page" value="10"/>\n+            <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" />\n+            <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" />\n+        </test>\n+    </tests>\n+</tool>\n+\n'
b
diff -r 000000000000 -r 234b83159ea8 smRtools.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/smRtools.py Tue Jul 11 11:44:36 2017 -0400
[
b'@@ -0,0 +1,704 @@\n+#!/usr/bin/python\n+# version 1 7-5-2012 unification of the SmRNAwindow class\n+\n+import sys, subprocess\n+from collections import defaultdict\n+from numpy import mean, median, std\n+##Disable scipy import temporarily, as no working scipy on toolshed.\n+##from scipy import stats\n+\n+def get_fasta (index="/home/galaxy/galaxy-dist/bowtie/5.37_Dmel/5.37_Dmel"):\n+  \'\'\'This function will return a dictionary containing fasta identifiers as keys and the\n+  sequence as values. Index must be the path to a fasta file.\'\'\'\n+  p = subprocess.Popen(args=["bowtie-inspect","-a", "0", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines\n+  outputlines = p.stdout.readlines()\n+  p.wait()\n+  item_dic = {}\n+  for line in outputlines:\n+    if (line[0] == ">"):\n+      try:\n+        item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item\n+      except: pass\n+      current_item = line[1:].rstrip().split()[0] #take the first word before space because bowtie splits headers !\n+      item_dic[current_item] = ""\n+      stringlist=[]\n+    else:\n+      stringlist.append(line.rstrip() )\n+  item_dic[current_item] = "".join(stringlist) # for the last item\n+  return item_dic\n+\n+def get_fasta_headers (index):\n+  p = subprocess.Popen(args=["bowtie-inspect","-n", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines\n+  outputlines = p.stdout.readlines()\n+  p.wait()\n+  item_dic = {}\n+  for line in outputlines:\n+    header = line.rstrip().split()[0] #take the first word before space because bowtie splits headers !\n+    item_dic[header] = 1\n+  return item_dic\n+\n+\n+def get_file_sample (file, numberoflines):\n+  \'\'\'import random to use this function\'\'\'\n+  F=open(file)\n+  fullfile = F.read().splitlines()\n+  F.close()\n+  if len(fullfile) < numberoflines:\n+    return "sample size exceeds file size"\n+  return random.sample(fullfile, numberoflines)\n+\n+def get_fasta_from_history (file):\n+  F = open (file, "r")\n+  item_dic = {}\n+  for line in F:\n+    if (line[0] == ">"):\n+      try:\n+        item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item\n+      except: pass\n+      current_item = line[1:-1].split()[0] #take the first word before space because bowtie splits headers !\n+      item_dic[current_item] = ""\n+      stringlist=[]\n+    else:\n+      stringlist.append(line[:-1])\n+  item_dic[current_item] = "".join(stringlist) # for the last item\n+  return item_dic\n+\n+def antipara (sequence):\n+    antidict = {"A":"T", "T":"A", "G":"C", "C":"G", "N":"N"}\n+    revseq = sequence[::-1]\n+    return "".join([antidict[i] for i in revseq])\n+\n+def RNAtranslate (sequence):\n+    return "".join([i if i in "AGCN" else "U" for i in sequence])\n+def DNAtranslate (sequence):\n+    return "".join([i if i in "AGCN" else "T" for i in sequence])\n+\n+def RNAfold (sequence_list):\n+  thestring= "\\n".join(sequence_list)\n+  p = subprocess.Popen(args=["RNAfold","--noPS"], stdin= subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\n+  output=p.communicate(thestring)[0]\n+  p.wait()\n+  output=output.split("\\n")\n+  if not output[-1]: output = output[:-1] # nasty patch to remove last empty line\n+  buffer=[]\n+  for line in output:\n+    if line[0] in ["N","A","T","U","G","C"]:\n+      buffer.append(DNAtranslate(line))\n+    if line[0] in ["(",".",")"]:\n+      fields=line.split("(")\n+      energy= fields[-1]\n+      energy = energy[:-1] # remove the ) parenthesis\n+      energy=float(energy)\n+      buffer.append(str(energy))\n+  return dict(zip(buffer[::2], buffer[1::2]))\n+\n+def extractsubinstance (start, end, instance):\n+  \'\'\' Testing whether this can be an function external to the class to save memory\'\'\'\n+  subinstance = SmRNAwindow (instance.gene, instance.sequence[start-1:end], start)\n+  subinstance.gene = "%s %s %s" % (subin'..b'e[offset:offset+queryhash[offset][i]]) )\n+            paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-targethash[matched_offset][i]+1:-matched_offset+1]) ) )\n+        if offset < 0:\n+          for i in range (paired):\n+            paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-queryhash[offset][i]+1:-offset+1]) ) )\n+            paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+targethash[matched_offset][i]] ) )\n+    return paired_sequences\n+\n+  def pairable (self, overlap, minquery, maxquery, mintarget, maxtarget):\n+    queryhash = defaultdict(list)\n+    targethash = defaultdict(list)\n+    query_range = range (int(minquery), int(maxquery)+1)\n+    target_range = range (int(mintarget), int(maxtarget)+1)\n+    paired_sequences = []\n+\n+    for offset in self.readDict: # selection of data\n+      for size in self.readDict[offset]:\n+        if size in query_range:\n+          queryhash[offset].append(size)\n+        if size in target_range:\n+          targethash[offset].append(size)\n+\n+    for offset in queryhash:\n+      matched_offset = -offset - overlap + 1\n+      if targethash[matched_offset]:\n+        if offset >= 0:\n+          for i in queryhash[offset]:\n+            paired_sequences.append("+%s" % RNAtranslate (self.sequence[offset:offset+i]) )\n+          for i in targethash[matched_offset]:\n+            paired_sequences.append( "-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-i+1:-matched_offset+1]) ) )\n+        if offset < 0:\n+          for i in queryhash[offset]:\n+            paired_sequences.append("-%s" %  RNAtranslate (antipara (self.sequence[-offset-i+1:-offset+1]) ) )\n+          for i in targethash[matched_offset]:\n+            paired_sequences.append("+%s" %  RNAtranslate (self.sequence[matched_offset:matched_offset+i] ) )\n+    return paired_sequences\n+\n+  def newpairable_bowtie (self, overlap, minquery, maxquery, mintarget, maxtarget):\n+    \'\'\' revision of pairable on 3-12-2012, with focus on the offset shift problem (bowtie is 1-based cooordinates whereas python strings are 0-based coordinates\'\'\'\n+    queryhash = defaultdict(list)\n+    targethash = defaultdict(list)\n+    query_range = range (int(minquery), int(maxquery)+1)\n+    target_range = range (int(mintarget), int(maxtarget)+1)\n+    bowtie_output = []\n+\n+    for offset in self.readDict: # selection of data\n+      for size in self.readDict[offset]:\n+        if size in query_range:\n+          queryhash[offset].append(size)\n+        if size in target_range:\n+          targethash[offset].append(size)\n+    counter = 0\n+    for offset in queryhash:\n+      matched_offset = -offset - overlap + 1\n+      if targethash[matched_offset]:\n+        if offset >= 0:\n+          for i in queryhash[offset]:\n+            counter += 1\n+            bowtie_output.append("%s\\t%s\\t%s\\t%s\\t%s" % (counter, "+", self.gene, offset-1, self.sequence[offset-1:offset-1+i]) ) # attention a la base 1-0 de l\'offset \n+        if offset < 0:\n+          for i in queryhash[offset]:\n+            counter += 1\n+            bowtie_output.append("%s\\t%s\\t%s\\t%s\\t%s" % (counter, "-", self.gene, -offset-i, self.sequence[-offset-i:-offset])) # attention a la base 1-0 de l\'offset\n+    return bowtie_output\n+\n+\n+def __main__(bowtie_index_path, bowtie_output_path):\n+  sequenceDic = get_fasta (bowtie_index_path)\n+  objDic = {}\n+  F = open (bowtie_output_path, "r") # F is the bowtie output taken as input\n+  for line in F:\n+    fields = line.split()\n+    polarity = fields[1]\n+    gene = fields[2]\n+    offset = int(fields[3])\n+    size = len (fields[4])\n+    try:\n+      objDic[gene].addread (polarity, offset, size)\n+    except KeyError:\n+      objDic[gene] = SmRNAwindow(gene, sequenceDic[gene])\n+      objDic[gene].addread (polarity, offset, size)\n+  F.close()\n+  for gene in objDic:\n+    print gene, objDic[gene].pairer(19,19,23,19,23)\n+\n+if __name__ == "__main__" : __main__(sys.argv[1], sys.argv[2]) \n'
b
diff -r 000000000000 -r 234b83159ea8 smRtools.pyc
b
Binary file smRtools.pyc has changed
b
diff -r 000000000000 -r 234b83159ea8 static/images/size_histogram.png
b
Binary file static/images/size_histogram.png has changed
b
diff -r 000000000000 -r 234b83159ea8 test-data/Size_distribution.pdf
b
Binary file test-data/Size_distribution.pdf has changed
b
diff -r 000000000000 -r 234b83159ea8 test-data/Size_distribution_dataframe.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Size_distribution_dataframe.tab Tue Jul 11 11:44:36 2017 -0400
b
b'@@ -0,0 +1,1461 @@\n+gene\tpolarity\tsize\tcount\tsample\n+FBti0020401\tF\t18\t1.0\tsample1.srbowtie_out\n+FBti0020401\tF\t19\t1.0\tsample1.srbowtie_out\n+FBti0020401\tF\t21\t0.0\tsample1.srbowtie_out\n+FBti0020401\tF\t22\t5.0\tsample1.srbowtie_out\n+FBti0020401\tF\t23\t1.0\tsample1.srbowtie_out\n+FBti0020401\tF\t24\t1.0\tsample1.srbowtie_out\n+FBti0020401\tF\t25\t5.0\tsample1.srbowtie_out\n+FBti0020401\tF\t26\t9.0\tsample1.srbowtie_out\n+FBti0020401\tF\t27\t4.0\tsample1.srbowtie_out\n+FBti0020401\tR\t18\t0.0\tsample1.srbowtie_out\n+FBti0020401\tR\t19\t0.0\tsample1.srbowtie_out\n+FBti0020401\tR\t21\t-2.0\tsample1.srbowtie_out\n+FBti0020401\tR\t22\t0.0\tsample1.srbowtie_out\n+FBti0020401\tR\t23\t-3.0\tsample1.srbowtie_out\n+FBti0020401\tR\t24\t-11.0\tsample1.srbowtie_out\n+FBti0020401\tR\t25\t-9.0\tsample1.srbowtie_out\n+FBti0020401\tR\t26\t-10.0\tsample1.srbowtie_out\n+FBti0020401\tR\t27\t-1.0\tsample1.srbowtie_out\n+FBti0020406\tF\t24\t0.0\tsample1.srbowtie_out\n+FBti0020406\tF\t25\t1.0\tsample1.srbowtie_out\n+FBti0020406\tF\t26\t0.0\tsample1.srbowtie_out\n+FBti0020406\tF\t28\t0.0\tsample1.srbowtie_out\n+FBti0020406\tR\t24\t-1.0\tsample1.srbowtie_out\n+FBti0020406\tR\t25\t0.0\tsample1.srbowtie_out\n+FBti0020406\tR\t26\t-1.0\tsample1.srbowtie_out\n+FBti0020406\tR\t28\t-1.0\tsample1.srbowtie_out\n+FBti0019511\tF\t18\t1.0\tsample1.srbowtie_out\n+FBti0019511\tR\t18\t0.0\tsample1.srbowtie_out\n+FBti0019512\tF\t21\t0\tsample1.srbowtie_out\n+FBti0019512\tR\t21\t0\tsample1.srbowtie_out\n+FBti0019513\tF\t21\t0\tsample1.srbowtie_out\n+FBti0019513\tR\t21\t0\tsample1.srbowtie_out\n+FBti0019473\tF\t18\t0.0\tsample1.srbowtie_out\n+FBti0019473\tF\t20\t1.0\tsample1.srbowtie_out\n+FBti0019473\tF\t21\t0.0\tsample1.srbowtie_out\n+FBti0019473\tF\t22\t0.0\tsample1.srbowtie_out\n+FBti0019473\tF\t23\t2.0\tsample1.srbowtie_out\n+FBti0019473\tF\t24\t4.0\tsample1.srbowtie_out\n+FBti0019473\tF\t25\t2.0\tsample1.srbowtie_out\n+FBti0019473\tF\t26\t9.0\tsample1.srbowtie_out\n+FBti0019473\tF\t27\t6.0\tsample1.srbowtie_out\n+FBti0019473\tR\t18\t-1.0\tsample1.srbowtie_out\n+FBti0019473\tR\t20\t-1.0\tsample1.srbowtie_out\n+FBti0019473\tR\t21\t-2.0\tsample1.srbowtie_out\n+FBti0019473\tR\t22\t-5.0\tsample1.srbowtie_out\n+FBti0019473\tR\t23\t-3.0\tsample1.srbowtie_out\n+FBti0019473\tR\t24\t-17.0\tsample1.srbowtie_out\n+FBti0019473\tR\t25\t-10.0\tsample1.srbowtie_out\n+FBti0019473\tR\t26\t-9.0\tsample1.srbowtie_out\n+FBti0019473\tR\t27\t-3.0\tsample1.srbowtie_out\n+FBti0019518\tF\t20\t0.0\tsample1.srbowtie_out\n+FBti0019518\tF\t23\t0.0\tsample1.srbowtie_out\n+FBti0019518\tF\t24\t2.0\tsample1.srbowtie_out\n+FBti0019518\tF\t25\t1.0\tsample1.srbowtie_out\n+FBti0019518\tF\t26\t2.0\tsample1.srbowtie_out\n+FBti0019518\tF\t27\t0.0\tsample1.srbowtie_out\n+FBti0019518\tR\t20\t-1.0\tsample1.srbowtie_out\n+FBti0019518\tR\t23\t-1.0\tsample1.srbowtie_out\n+FBti0019518\tR\t24\t-1.0\tsample1.srbowtie_out\n+FBti0019518\tR\t25\t-1.0\tsample1.srbowtie_out\n+FBti0019518\tR\t26\t0.0\tsample1.srbowtie_out\n+FBti0019518\tR\t27\t-1.0\tsample1.srbowtie_out\n+FBti0019519\tF\t19\t1.0\tsample1.srbowtie_out\n+FBti0019519\tF\t21\t1.0\tsample1.srbowtie_out\n+FBti0019519\tF\t22\t3.0\tsample1.srbowtie_out\n+FBti0019519\tF\t23\t4.0\tsample1.srbowtie_out\n+FBti0019519\tF\t24\t2.0\tsample1.srbowtie_out\n+FBti0019519\tF\t25\t3.0\tsample1.srbowtie_out\n+FBti0019519\tF\t26\t1.0\tsample1.srbowtie_out\n+FBti0019519\tF\t27\t4.0\tsample1.srbowtie_out\n+FBti0019519\tF\t28\t1.0\tsample1.srbowtie_out\n+FBti0019519\tR\t19\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t21\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t22\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t23\t-1.0\tsample1.srbowtie_out\n+FBti0019519\tR\t24\t-1.0\tsample1.srbowtie_out\n+FBti0019519\tR\t25\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t26\t-2.0\tsample1.srbowtie_out\n+FBti0019519\tR\t27\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t28\t0.0\tsample1.srbowtie_out\n+FBti0019498\tF\t21\t0\tsample1.srbowtie_out\n+FBti0019498\tR\t21\t0\tsample1.srbowtie_out\n+FBti0019514\tF\t23\t1.0\tsample1.srbowtie_out\n+FBti0019514\tR\t23\t0.0\tsample1.srbowtie_out\n+FBti0019515\tF\t18\t1.0\tsample1.srbowtie_out\n+FBti0019515\tF\t20\t1.0\tsample1.srbowtie_out\n+FBti0019515\tF\t23\t0.0\tsample1.srbowtie_out\n+FBti0019515\tF\t24\t1.0\tsample1.srbowtie_out\n+FBti0019515\tF\t25\t1.0\tsample1.srbowtie_out\n+FBti0019515\tF\t26\t2.0\tsample1.srbowtie_out\n+FBti0019515\tF\t27\t0.0\tsample1.srbowtie_out\n+FBti0019515\tF\t2'..b'Bti0020410\tF\t28\t1.0\tsample3.srbowtie_out\n+FBti0020410\tR\t19\t-1.0\tsample3.srbowtie_out\n+FBti0020410\tR\t20\t0.0\tsample3.srbowtie_out\n+FBti0020410\tR\t22\t-2.0\tsample3.srbowtie_out\n+FBti0020410\tR\t23\t-3.0\tsample3.srbowtie_out\n+FBti0020410\tR\t24\t-7.0\tsample3.srbowtie_out\n+FBti0020410\tR\t25\t-15.0\tsample3.srbowtie_out\n+FBti0020410\tR\t26\t-10.0\tsample3.srbowtie_out\n+FBti0020410\tR\t27\t-8.0\tsample3.srbowtie_out\n+FBti0020410\tR\t28\t-2.0\tsample3.srbowtie_out\n+FBti0020403\tF\t24\t3.0\tsample3.srbowtie_out\n+FBti0020403\tF\t25\t3.0\tsample3.srbowtie_out\n+FBti0020403\tF\t21\t2.0\tsample3.srbowtie_out\n+FBti0020403\tF\t22\t0.0\tsample3.srbowtie_out\n+FBti0020403\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0020403\tR\t24\t0.0\tsample3.srbowtie_out\n+FBti0020403\tR\t25\t-3.0\tsample3.srbowtie_out\n+FBti0020403\tR\t21\t0.0\tsample3.srbowtie_out\n+FBti0020403\tR\t22\t-1.0\tsample3.srbowtie_out\n+FBti0020403\tR\t23\t-3.0\tsample3.srbowtie_out\n+FBti0019486\tF\t19\t1.0\tsample3.srbowtie_out\n+FBti0019486\tF\t20\t0.0\tsample3.srbowtie_out\n+FBti0019486\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0019486\tF\t24\t1.0\tsample3.srbowtie_out\n+FBti0019486\tF\t25\t0.0\tsample3.srbowtie_out\n+FBti0019486\tF\t26\t0.0\tsample3.srbowtie_out\n+FBti0019486\tR\t19\t0.0\tsample3.srbowtie_out\n+FBti0019486\tR\t20\t-1.0\tsample3.srbowtie_out\n+FBti0019486\tR\t23\t-1.0\tsample3.srbowtie_out\n+FBti0019486\tR\t24\t-1.0\tsample3.srbowtie_out\n+FBti0019486\tR\t25\t-1.0\tsample3.srbowtie_out\n+FBti0019486\tR\t26\t-3.0\tsample3.srbowtie_out\n+FBti0019489\tF\t21\t0\tsample3.srbowtie_out\n+FBti0019489\tR\t21\t0\tsample3.srbowtie_out\n+FBti0019484\tF\t20\t0.0\tsample3.srbowtie_out\n+FBti0019484\tF\t23\t1.0\tsample3.srbowtie_out\n+FBti0019484\tF\t24\t3.0\tsample3.srbowtie_out\n+FBti0019484\tF\t25\t3.0\tsample3.srbowtie_out\n+FBti0019484\tF\t26\t2.0\tsample3.srbowtie_out\n+FBti0019484\tF\t27\t2.0\tsample3.srbowtie_out\n+FBti0019484\tR\t20\t-2.0\tsample3.srbowtie_out\n+FBti0019484\tR\t23\t0.0\tsample3.srbowtie_out\n+FBti0019484\tR\t24\t-2.0\tsample3.srbowtie_out\n+FBti0019484\tR\t25\t0.0\tsample3.srbowtie_out\n+FBti0019484\tR\t26\t-1.0\tsample3.srbowtie_out\n+FBti0019484\tR\t27\t0.0\tsample3.srbowtie_out\n+FBti0019485\tF\t24\t1.0\tsample3.srbowtie_out\n+FBti0019485\tF\t25\t0.0\tsample3.srbowtie_out\n+FBti0019485\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0019485\tR\t24\t0.0\tsample3.srbowtie_out\n+FBti0019485\tR\t25\t-1.0\tsample3.srbowtie_out\n+FBti0019485\tR\t23\t-1.0\tsample3.srbowtie_out\n+FBti0019482\tF\t26\t0.0\tsample3.srbowtie_out\n+FBti0019482\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0019482\tR\t26\t-1.0\tsample3.srbowtie_out\n+FBti0019482\tR\t23\t-1.0\tsample3.srbowtie_out\n+FBti0020400\tF\t18\t2.0\tsample3.srbowtie_out\n+FBti0020400\tF\t19\t2.0\tsample3.srbowtie_out\n+FBti0020400\tF\t20\t5.0\tsample3.srbowtie_out\n+FBti0020400\tF\t21\t6.0\tsample3.srbowtie_out\n+FBti0020400\tF\t22\t6.0\tsample3.srbowtie_out\n+FBti0020400\tF\t23\t36.0\tsample3.srbowtie_out\n+FBti0020400\tF\t24\t76.0\tsample3.srbowtie_out\n+FBti0020400\tF\t25\t89.0\tsample3.srbowtie_out\n+FBti0020400\tF\t26\t82.0\tsample3.srbowtie_out\n+FBti0020400\tF\t27\t20.0\tsample3.srbowtie_out\n+FBti0020400\tF\t28\t12.0\tsample3.srbowtie_out\n+FBti0020400\tR\t18\t0.0\tsample3.srbowtie_out\n+FBti0020400\tR\t19\t-1.0\tsample3.srbowtie_out\n+FBti0020400\tR\t20\t-5.0\tsample3.srbowtie_out\n+FBti0020400\tR\t21\t-2.0\tsample3.srbowtie_out\n+FBti0020400\tR\t22\t-7.0\tsample3.srbowtie_out\n+FBti0020400\tR\t23\t-6.0\tsample3.srbowtie_out\n+FBti0020400\tR\t24\t-23.0\tsample3.srbowtie_out\n+FBti0020400\tR\t25\t-22.0\tsample3.srbowtie_out\n+FBti0020400\tR\t26\t-11.0\tsample3.srbowtie_out\n+FBti0020400\tR\t27\t-11.0\tsample3.srbowtie_out\n+FBti0020400\tR\t28\t-1.0\tsample3.srbowtie_out\n+FBti0019480\tF\t21\t0.0\tsample3.srbowtie_out\n+FBti0019480\tF\t22\t1.0\tsample3.srbowtie_out\n+FBti0019480\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0019480\tF\t24\t3.0\tsample3.srbowtie_out\n+FBti0019480\tF\t25\t2.0\tsample3.srbowtie_out\n+FBti0019480\tF\t26\t2.0\tsample3.srbowtie_out\n+FBti0019480\tF\t27\t1.0\tsample3.srbowtie_out\n+FBti0019480\tR\t21\t-1.0\tsample3.srbowtie_out\n+FBti0019480\tR\t22\t0.0\tsample3.srbowtie_out\n+FBti0019480\tR\t23\t-2.0\tsample3.srbowtie_out\n+FBti0019480\tR\t24\t-5.0\tsample3.srbowtie_out\n+FBti0019480\tR\t25\t-1.0\tsample3.srbowtie_out\n+FBti0019480\tR\t26\t0.0\tsample3.srbowtie_out\n+FBti0019480\tR\t27\t0.0\tsample3.srbowtie_out\n'
b
diff -r 000000000000 -r 234b83159ea8 test-data/sample1.srbowtie_out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample1.srbowtie_out Tue Jul 11 11:44:36 2017 -0400
b
b'@@ -0,0 +1,2000 @@\n+12\t-\tFBti0019493\t2417\tAGTTCTTCCGAAAGGCTGCAGATG\n+19\t+\tFBti0020400\t2078\tTCGATGCGTAGATTTTTGGACGGGGC\n+60\t-\tFBti0019473\t2975\tCACATACCACAAGCCTACAAAACA\n+83\t+\tFBti0020400\t899\tTGAGCGACCGGCATTCCCTCGTCGAA\n+204\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+215\t+\tFBti0019509\t1142\tTGGACTTGGAGTGCAGCGCCTTGG\n+241\t-\tFBti0020402\t3533\tTGGTATACTTGGACGACATTATTGTA\n+253\t+\tFBti0020400\t6961\tTTTGGCTAGCGCTAGATCGGAAGCAGC\n+258\t+\tFBti0019493\t2877\tATCGTAGAAACGTGGGGAGCGT\n+280\t+\tFBti0019493\t855\tTCTCGAATGCTTGCCCGATATACGGAGC\n+271\t+\tFBti0020400\t591\tCGAAGACTTGTAGAATTAGCGCGGGC\n+282\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+284\t-\tFBti0020410\t2118\tAACATAACTCGAGCAGCAAAGCAA\n+242\t-\tFBti0019495\t3364\tCCCTTATGGACCCACAGTAACTTA\n+287\t+\tFBti0019500\t727\tTGTAAACGGTAGCTAATTCGAGCGGC\n+300\t-\tFBti0019495\t915\tTCCATACTCTGCCAAACGTCATAA\n+309\t+\tFBti0019503\t150\tTAGATGTCTGTACGAAAGCGAGAAGC\n+310\t+\tFBti0020400\t552\tTCCTAGTTTGCTGAAAAAAAGACCACT\n+349\t+\tFBti0019493\t1165\tCTGGTCGTTAAAGGGAGTTTTGGC\n+359\t-\tFBti0019493\t4308\tTGGCGTATCCAGACACATGCCCCATA\n+382\t+\tFBti0019493\t416\tGAGAAGCCGAAGGTCGTCGTC\n+405\t+\tFBti0019493\t4387\tCACGCCACGTAAAATAAGTTCGGAGG\n+428\t+\tFBti0019519\t1380\tTGCTAGAAGGCGATTTTCGGGC\n+431\t-\tFBti0020400\t8266\tTCCACACTTGGAATTTTGCAC\n+435\t+\tFBti0019493\t2979\tTTTGCTGGAGCGAACCTTCCGTA\n+337\t-\tFBti0019493\t3242\tGCGACTAGCCTCCTGCCAGA\n+467\t+\tFBti0020400\t6587\tTCGCCTGCGAAAACTTCTGTATCGC\n+419\t+\tFBti0019493\t1906\tTGGATGACTGGAAACGCTTCATGG\n+458\t-\tFBti0019500\t898\tCCGAAAATCGTCTTCTAGCA\n+488\t-\tFBti0020404\t1358\tTCTCAGCGTGAGCAAGCGGAAAGAG\n+495\t-\tFBti0019493\t453\tCCATCGGCCCAGAAGAGTTCATGCA\n+500\t+\tFBti0020410\t3150\tGTGGAAGTTACCGATGACTTGGG\n+514\t-\tFBti0020400\t2066\tACGACCATGTCATCGATGCGCA\n+522\t+\tFBti0019509\t1142\tTGGACTTGGAGTGCAGCGCCT\n+555\t+\tFBti0020400\t6826\tTTCCTTCTCAACCTTGTCCCATAGGG\n+565\t+\tFBti0019493\t2928\tACGGTAATCATGCTGCTGAAAGGTGC\n+599\t-\tFBti0019509\t52\tACCGCGCGCGCAGTTGAGAGCATA\n+637\t+\tFBti0019493\t2296\tTCGTCTCGCTGCTCAAAGGGCCAGA\n+741\t-\tFBti0019493\t3939\tTCGTAAGAGACGACTAATACAGCGATA\n+742\t-\tFBti0020401\t52\tTCAGCACCGCCAGATGGCCCGTAATC\n+773\t-\tFBti0019495\t3364\tCCCTTATGGACCCACAGTAACTTA\n+768\t-\tFBti0019493\t1947\tCCATGGGGGCGCGTCTACAAGATTT\n+787\t+\tFBti0019493\t3809\tTGATCAAGAGAAGATTCAACGGCT\n+828\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+835\t+\tFBti0019493\t2408\tTGTGAGAGAAGTTCTTCCGGAAGGC\n+834\t-\tFBti0019517\t381\tTGGAACAGTCTTTATTTGTAAGCTTA\n+844\t+\tFBti0020400\t4451\tATACATGAAATTTCGAATGACA\n+855\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+853\t-\tFBti0019473\t2977\tCATACCTCAAGCCTACAAAACA\n+868\t+\tFBti0019493\t1385\tTGCTGTCAGAGTAGATGCTGGAG\n+877\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+882\t+\tFBti0020400\t2662\tCGTTAAGTCAGATGTAGGCTCTAAA\n+940\t+\tFBti0019503\t199\tAAGTATTGGTGTATGCGGACTAGAACA\n+985\t+\tFBti0020400\t6725\tTCATAGACTGAATAGCAATAACTG\n+1011\t-\tFBti0019493\t850\tTATGCTCTCGAATGCTTGCCCGATATA\n+1039\t-\tFBti0019515\t1251\tGCTCGAGATCGTATCACACCAACA\n+1044\t-\tFBti0019495\t4491\tTCCCAATAAGATCGAAGCTATAAAAAA\n+1062\t-\tFBti0020401\t2809\tTAGTCCCTGGCATCCGTCTGCAAC\n+1072\t+\tFBti0020400\t6725\tTCATAGACTGAATAGCAATAACTGTA\n+1084\t-\tFBti0019503\t302\tTCAGTCAGTTTTCGATCGTTACGCA\n+1131\t+\tFBti0019500\t90\tTCTCGAATTTTTGTTAGAGAGCGAGA\n+1098\t+\tFBti0019493\t2445\tTTTGGATTTCGCCAAGGACGATGTG\n+1117\t+\tFBti0019509\t398\tTGGATGACTGGAAACGCTTCGTGGG\n+1112\t+\tFBti0020400\t8274\tTGGAATTTTGCACTGTTCGGATGAATA\n+1140\t-\tFBti0020410\t1557\tTCAAATTCAGAGCACAATCAATCG\n+982\t+\tFBti0020398\t323\tTGAAAAATAACGGATGATTAGTGA\n+1155\t-\tFBti0019493\t1370\tACAGACGGGGTGAGCTGCTGTCAGA\n+1156\t+\tFBti0019507\t160\tTGATAGTAGACAACTGTATGTGTGC\n+1166\t-\tFBti0019493\t812\tCGGAACTGCCGTCACAGAGGGCAA\n+1189\t+\tFBti0020400\t635\tTATTGATCGTGGTTAGTTTTACAC\n+1245\t-\tFBti0019493\t694\tCCACAGATGTGTGGGTTTCGACCATA\n+1242\t+\tFBti0019493\t3838\tTGTTTGCGGAAGAAGTGTTCCTGAGGA\n+1246\t-\tFBti0020402\t1593\tTCTACTATTGGAGGACTTCCGGAATA\n+1257\t+\tFBti0019507\t660\tTCAGATGAGAGACAAATTAGAAT\n+1261\t-\tFBti0020400\t4393\tCTTGCACCATGAGCTGAGCTTCTT\n+1275\t+\tFBti0019493\t1601\tGAGTAGAACAGCCGAACTTCCGGA\n+1300\t+\tFBti0019493\t1133\tTGGAAACCCTCACCACAGATTATGGC\n+1329\t-\tFBti0019492\t405\tGGCTCTCTGACTGGCGAATA\n+1361\t+\tFBti0020400\t2669\tTCAGATGTAGGCTCTAAATGGATGG\n+1387\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+1298\t+\tFBti0020400\t1829\tTGAAAAAT'..b'AATGGTCAAGTCGGACT\n+30606\t-\tFBti0019493\t1390\tTCTGAGTGGATGCTGGAGGCAAGAGTC\n+30653\t-\tFBti0019495\t4445\tCAGAATTCCTTGGCCACATCGTTTCA\n+30669\t-\tFBti0020410\t6017\tTACAACTACTTGCAGGATGCACAGCA\n+30670\t+\tFBti0019473\t2781\tTACATCTCTACACCCCCTCTCCA\n+30682\t-\tFBti0019504\t690\tACTGTTCATACAGCGATTCGTAGGAA\n+30698\t-\tFBti0019473\t3016\tCCTTAAGCCAAACACCGACAAAACGAA\n+30706\t-\tFBti0019509\t52\tACCGCGCGCGCAGTTGAGAGCATAGC\n+30743\t-\tFBti0019493\t1092\tAGCCATCCTAGTGGATCATCAGGAA\n+30752\t-\tFBti0020401\t478\tTCTCCGTCAATTGCACACAGGTGA\n+30825\t-\tFBti0019473\t4745\tCTTATCCTCACCCCAAACATAAA\n+30836\t-\tFBti0019507\t625\tTGTTTCAAAAAAAATATTTA\n+30875\t+\tFBti0019493\t3284\tTTCGGTATGCCGAACAGTGTCCAGAGT\n+30878\t+\tFBti0020400\t5802\tCACGGCAAGTAAAACAATTGTGAGTG\n+30880\t+\tFBti0019493\t3510\tTACGACAGCGAACCAGGACGGGTG\n+30889\t-\tFBti0019493\t1199\tTTTGCGCCGCATACTGCCAGTTCGA\n+30885\t+\tFBti0020401\t3437\tTAATTTATCATCGGCATCGGGTACG\n+30902\t+\tFBti0020400\t7164\tTAATCGAAGAAATACGAAATGGAAGGGA\n+30916\t-\tFBti0020400\t6345\tGCACATTAAACAGTATTTTCAATTGA\n+30907\t+\tFBti0019493\t3160\tTATATGACGGACTCATGGCACCTTG\n+30924\t+\tFBti0019493\t3823\tCTCAACGGCTGGCAATGTTCGCGGA\n+30937\t+\tFBti0019519\t1380\tTGCTAGAAGACGATTTTCGGGCCGA\n+30959\t+\tFBti0019509\t893\tTCCAGAGTTGGCAGAACAGATGGGA\n+30971\t+\tFBti0019493\t3085\tTGACCGGAGTCGTTGGAGCATTGGCG\n+30983\t+\tFBti0019500\t55\tTTTTTCGCCGTGGCTCTAGAGGTGGC\n+31000\t+\tFBti0019472\t690\tTCAGATGAGAGACAAATTAGAAT\n+31015\t-\tFBti0019515\t2449\tCGTCAACTCCACAAGACAACCATGTA\n+31039\t-\tFBti0019473\t4922\tGACCAAATAAAAATAATACGACTTCG\n+31049\t-\tFBti0020400\t8266\tTCCACATTTGGAATTTTGCACTGTTCGG\n+31059\t-\tFBti0020410\t3332\tTGCAGCGACTGTGTATACAAAAGTAGGC\n+31080\t-\tFBti0020410\t4859\tAGTATGACAATCAGCTCTTTGGATA\n+31154\t+\tFBti0019493\t2610\tTGCCGGGAAATGGGCTTGTGGCAGA\n+31152\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGC\n+31164\t-\tFBti0019493\t3648\tGGGAGAGCCCTCAGCGATACA\n+31207\t-\tFBti0020400\t3561\tTCTGCTGGATTATGCTCAGATTGAACA\n+31217\t+\tFBti0020400\t6725\tTCATAGACTGAATAGCAATAACTGTATT\n+31225\t-\tFBti0020402\t5551\tCATTACTACAACACACAATTCAAAA\n+31278\t+\tFBti0019509\t704\tCATCAATGGCACTATCTGCAAGGCAGTC\n+31285\t-\tFBti0019473\t4756\tCCCAAACATAAATACCACAGATATAA\n+31317\t+\tFBti0020400\t1771\tTGTTGACGAGGAGCCATTCTGCGCG\n+31346\t+\tFBti0019493\t287\tCATGAGGTACGTGAGCTCCGTCGAGG\n+31361\t+\tFBti0020400\t1131\tTCCTGGAGCGAATCGTCGTCGTGCAT\n+31377\t+\tFBti0019493\t1823\tTGACGATGCCGCTGTAGAGCTTGTAG\n+31415\t+\tFBti0019507\t263\tTTTTTGATCAATTGGCACCATGCGAA\n+31463\t-\tFBti0020402\t3479\tCCTTCCAACGATGTATGAACGATGTA\n+31488\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+31490\t-\tFBti0019493\t1270\tCAGGCCAGCAGAACCCCCGC\n+31534\t+\tFBti0020400\t2132\tTTGAGGTACTCTTCTTTCCATCGCA\n+31560\t-\tFBti0019499\t1538\tTCTTTTGCTTTAAGCTTTGTACTATGA\n+31564\t-\tFBti0019509\t1129\tTGCGGGACCTAGATGGACTTGGA\n+31601\t+\tFBti0020400\t4711\tTCAGAATTTGAATGGTCAAGTCGGA\n+31651\t-\tFBti0019515\t2460\tCAAGACAACCATGTATGCAGCACA\n+31670\t-\tFBti0020400\t484\tTAGTTTCTTTGATATGACTTTTGCA\n+31688\t-\tFBti0019493\t1213\tTGCCAGTTCGATGCACCTCTGGAA\n+31731\t-\tFBti0019493\t453\tCCATCGGCCCAGAAGAGTTCATGCA\n+31745\t+\tFBti0019493\t3692\tATATGAGGACTGGATGCACATTTTG\n+31799\t-\tFBti0019492\t248\tCCCCAAGGCAGCGTTCTTG\n+31842\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGTCAGT\n+31850\t+\tFBti0020400\t7003\tGCGCTGTACTGTGGCTTTGGATGGAG\n+31849\t+\tFBti0019483\t588\tTTATTTGTAAGCTTATTCGTGGTGA\n+31846\t-\tFBti0020402\t3385\tGACCCAGAGTCTATCCCCAAAACA\n+31884\t+\tFBti0019473\t2833\tGTATCTAGAACTTAGCTCAGCACT\n+31893\t-\tFBti0019495\t3364\tCCCTTATGGACCCACAGTAACTTA\n+31908\t+\tFBti0019518\t616\tTGTAAACGGTAGCTAATTCGAGCG\n+31919\t+\tFBti0019493\t1601\tGAGTAGAACAGCCGAACTTCCGGA\n+31977\t+\tFBti0019473\t4801\tCCTTGCGACAAAACAGAAAGAACACA\n+31985\t-\tFBti0019509\t83\tTGCCGTCCTGGAACTTCTGCAATG\n+31992\t+\tFBti0020400\t3602\tTGTTTTTGTGGCCTGGGCGATCTT\n+32016\t-\tFBti0020410\t6206\tTCTGAAAAATAACAAGGTACACATTGA\n+32036\t-\tFBti0020400\t6230\tAGCAGGCAGTCCCAGTTCTCAGTGTTAA\n+32029\t-\tFBti0019493\t2862\tGCACAACTAATGTCCATCGTAGAAA\n+32060\t-\tFBti0019473\t2975\tCACATACCTCAAGCCTACAAAACA\n+32080\t+\tFBti0019493\t3160\tTATATGACGGACTCATGGCACCTTG\n+32089\t+\tFBti0019493\t2877\tATCGTAGAAACGTGGGGAGCGGA\n+32097\t-\tFBti0019493\t1915\tCGAAACGCTTCGTGGGAGATCATG\n+32115\t+\tFBti0019493\t1829\tTGCCGCTGTAGAGCTTGTAGTGG\n+32124\t-\tFBti0019509\t687\tTCTCCCGGGTTGGACGGCATCAATG\n+32134\t-\tFBti0020403\t837\tCCCTAATATGTACCATGTAAATT\n'
b
diff -r 000000000000 -r 234b83159ea8 test-data/sample2.srbowtie_out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample2.srbowtie_out Tue Jul 11 11:44:36 2017 -0400
b
b'@@ -0,0 +1,2000 @@\n+78\t-\tFBti0020400\t2880\tTCACACATACTTTGCACGAATTTA\n+66\t-\tFBti0019493\t2648\tCCGAAGAGCAGTGATCCGAAGCAG\n+121\t+\tFBti0019493\t3157\tCCATATATGACGGACTCGTGGCACC\n+120\t+\tFBti0019519\t1324\tTGCTTGCTTGTGTGAGTAAAAACAAGA\n+152\t-\tFBti0019473\t2278\tTACCTCCCACAACGTAAA\n+162\t-\tFBti0019504\t869\tCTCACATGGCACAAATCGATAATA\n+171\t+\tFBti0020401\t4591\tATCGCTCTTGGGATCTGGTTGA\n+199\t+\tFBti0020400\t2669\tTCAGATGTAGGCTCTAAATGGATGGCC\n+222\t-\tFBti0019493\t2862\tGCACAACTAATGTCCATCGTAGAAA\n+231\t+\tFBti0020398\t169\tTTCCGAAGAGGATGGATACCGCGG\n+250\t+\tFBti0019493\t1601\tGAGTAGAATAGCCGAACTTCCGG\n+256\t+\tFBti0019503\t150\tTAGATGTCTGTACGAAAGCGAGAAGC\n+333\t-\tFBti0019499\t672\tGTTAGTAGTTCTACAATTCGTCGCA\n+435\t+\tFBti0019519\t1982\tTCTAAGAACTTCTGAGGTGAAGGGC\n+453\t+\tFBti0020400\t5598\tTAAAGTTAGTGCCAAGATGGGAGA\n+476\t+\tFBti0020400\t8514\tACTTATCTTATAAGTTGGTCGCTGAT\n+493\t+\tFBti0019493\t2681\tTGTGGATGTACCGGTAACTAGAGGC\n+495\t+\tFBti0020400\t6752\tTTAGCCCTGAGAATAGGCATGTTGCT\n+507\t-\tFBti0020400\t3630\tACCCTATTGGCTACAAATGTGGTCCA\n+513\t+\tFBti0019493\t1823\tTGACGATGCCGCTGTAGAGCTTG\n+515\t+\tFBti0019509\t1132\tGGGACCTAGATGGACTTGGAGTGC\n+523\t+\tFBti0019493\t519\tCCCAGTTTAAAAAGTCGGTGCACC\n+555\t+\tFBti0020400\t8437\tTCTTACAGCAAATTGAACGAAATTGGA\n+567\t+\tFBti0019493\t1670\tCAGTATAGTACATAGTGTATGTGA\n+599\t+\tFBti0019493\t3693\tTATGAGGACTGGATGCACATTTTG\n+603\t+\tFBti0019476\t1666\tTATGATTTGTGTGATTGCCCGACCT\n+607\t-\tFBti0019486\t1174\tCACAGTGCGTAAACAGTTTCTTGA\n+645\t+\tFBti0019493\t2928\tACGGTAATCATGCTGCTGAAAGGTG\n+642\t+\tFBti0019500\t727\tTGTAAACGGTAGCTAATTCGAGCGG\n+654\t+\tFBti0020400\t8215\tCTTTGGACTTTAGAAATTTTCACT\n+670\t+\tFBti0019493\t3510\tTACGACAGCGAACCAGGACGGGT\n+671\t+\tFBti0019493\t983\tTGAGATCGGAGTTAATGTTTGCTC\n+683\t-\tFBti0020410\t1670\tTGCAATCGATGCTCCAAAGGACACA\n+761\t+\tFBti0019493\t4279\tGCTATAGCTTCTAGTCCG\n+774\t+\tFBti0020400\t5355\tTTGGGTAGGAAGGCGGATTTCCGGC\n+777\t+\tFBti0019493\t1601\tGAGTAGAACAGCCGAACTTCCGGAA\n+785\t-\tFBti0020400\t2344\tTTCTATTTTTGCCAAGAGCGTGGAGA\n+784\t-\tFBti0020398\t420\tCCACTTTTGCCACTGGACGTTTAATA\n+796\t-\tFBti0020401\t6160\tACATCCTGCTCACCGAAGTCATAC\n+818\t+\tFBti0019493\t3576\tTCAAGTTTTGGATTCTCGATGAGGACGT\n+843\t+\tFBti0019493\t3696\tGAGGACTGGATGCACATTTTGTGCGC\n+882\t+\tFBti0020400\t635\tTATTGATCGTGGTTAGTTTTACACTAA\n+927\t-\tFBti0019504\t1138\tCCTGGGTTAAAAGATCGTTCGAACA\n+933\t-\tFBti0020395\t404\tCCCTATGAAACCACAAACAATAA\n+953\t+\tFBti0020400\t6426\tAGCGGAAACCCTCGTTGGT\n+966\t-\tFBti0020401\t3795\tTCCCGTTGAGTAAACTGGTATTGTTA\n+970\t-\tFBti0020400\t3047\tATCACTGGATGCCGTTCATTGTA\n+1010\t+\tFBti0019473\t2594\tGCCAATATAAGACGCTTCTGCGGACT\n+1030\t+\tFBti0020400\t3944\tGAAATCTGGACGAAACGATAGC\n+1026\t-\tFBti0020395\t509\tGCATTCAAGCTGAAGTCTGTGCTATTG\n+1064\t-\tFBti0019493\t2915\tCTCGACCAGCAAGACGGTAATCA\n+1104\t+\tFBti0020400\t5501\tTGGACTTGAACCACCTGGAA\n+1112\t-\tFBti0020401\t2670\tCCTGACGGCCCATACTATTGCTAAA\n+1118\t+\tFBti0019493\t1801\tTCCAAGATGGAAGGCGTCGAGATGACGA\n+1158\t+\tFBti0020400\t4101\tCGAACGATAAACGGTGCTAACCA\n+1167\t+\tFBti0020400\t2078\tTCGATGCGGAGATTTTTGGACGGGG\n+1178\t+\tFBti0020400\t7009\tTACTGTGGCTTTGGATGGGGTAGC\n+1189\t+\tFBti0019493\t3809\tTGATCAAGAGAAGACTCAACGGCTGGC\n+1188\t-\tFBti0019493\t1318\tTCCCCCATGTGGCTTAGCAAACTCT\n+1213\t+\tFBti0019504\t2148\tAAAGATGGCACTAGTGATCGT\n+1263\t-\tFBti0019515\t605\tTATCGGAGAAAACAAGTTCCATGTTA\n+1260\t-\tFBti0019509\t1129\tTGCGGGACCTAGATGGACTTGGA\n+1264\t-\tFBti0020400\t4555\tCTCCAAAGGTTACTGTTTTCAATTCGA\n+1267\t+\tFBti0019516\t712\tTAGAATTGAACATAAATATAAATGTG\n+1296\t+\tFBti0019519\t1324\tTGCTTGATTGTGTGAGTAAAAACA\n+1334\t+\tFBti0020400\t8659\tCGAAGACTTGTAGAATTAGCGCGGGC\n+1358\t+\tFBti0019493\t4042\tAGCTATCCAAGACTGCTCATTGAGG\n+1359\t-\tFBti0019493\t3338\tCCCGCTTGATCTGGCTGCTAAGTTA\n+1370\t-\tFBti0019476\t3131\tCACATCACTCAGAATCCATAAGAA\n+1398\t-\tFBti0019515\t1124\tTCCTAAGAATAAAAACGACAGTTCAA\n+1421\t+\tFBti0019493\t2408\tTGTGAGAGAAGTTCTACCGGAAGGC\n+1424\t+\tFBti0019504\t843\tGGAGATTGCGGTTTCGGTAGCGT\n+1464\t+\tFBti0020400\t2078\tTCGATGCGTAGATTTTTGGACGGGGC\n+1477\t-\tFBti0020400\t7023\tATGGAGTAGCACAGTCGTCGG\n+1491\t+\tFBti0020402\t841\tTAAGGAAGTAAACCCAAATCAATCAGT\n+1497\t-\tFBti0019499\t847\tCATTTCAGTACCAGGGGTCATACA\n+1498\t-\tFBti0019495\t7910\tTCCTGGCCGTCCATCACAA\n+1509\t+\tFBti0019493\t3692\tATATGAGGACTGGATGCACATTTTG\n+1523\t-\tFBti0020400\t2880\tTCACACATACTTTGCACGAATTTA\n+1542\t-\tFBti0019473\t2981\tCCTCAAGCCTACAAAACA\n+1543\t-\tFBti00194'..b'\t934\tTCTTGTTTTTACTCACACAAGCAAGCA\n+44318\t+\tFBti0019493\t3692\tATATGAGGACTGGATGCACATTTTG\n+44340\t+\tFBti0020400\t4711\tTCAGAATTTGAATGGTCAAGTCGGA\n+44362\t+\tFBti0019499\t682\tCTACAATTCGTCGCAGGCTAAAA\n+44375\t+\tFBti0019519\t1982\tTCTAAGAACTTCTGAGGTGAAGGGCATT\n+44413\t+\tFBti0019493\t2877\tATCGTAGAAACGTGGGGAGCGGA\n+44456\t-\tFBti0019473\t2977\tCATACCACAAGCCTACAAAACA\n+44536\t+\tFBti0020400\t5460\tCCTTGGACTGAGCAGCTACTGTT\n+44541\t-\tFBti0020402\t3277\tGCAGACAAATACCCAATACCAAATA\n+44552\t-\tFBti0019473\t2743\tTTCAGAGGAGTTCCGGAACAATAA\n+44559\t+\tFBti0019509\t847\tTGTCTAAGCTGGGAGCAGAGGAAGA\n+44575\t+\tFBti0020400\t5294\tTCATAGAATTTTGGATCCGCCAGT\n+44614\t+\tFBti0019473\t302\tATTCATCATAAAAAAATCGGTGGAC\n+44702\t+\tFBti0020400\t6743\tTAACTGTATTTAGCCCTGAGAATAGG\n+44729\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGT\n+44754\t-\tFBti0019493\t1894\tGGAGGGCGAAGATGGATGACTG\n+44795\t+\tFBti0019493\t3809\tTGATCAAGAGAAGACTCAACGGC\n+44791\t+\tFBti0019493\t2877\tATCGTAGAAACGTGGGGAGCGG\n+44829\t-\tFBti0019499\t380\tGCCCAAAACAAAAGAGTTAACAGTTG\n+44865\t+\tFBti0019495\t2740\tCAGCAATTACCGCAGCGGACG\n+44892\t+\tFBti0019501\t493\tGTTGCCTTGCACCCACGATGATGGT\n+44898\t+\tFBti0020400\t659\tTAAAACTTATTCTAATGCGTTGGGT\n+44958\t+\tFBti0020400\t6540\tTATAAATGGCTGTGAAAAGATCCCGGA\n+44965\t+\tFBti0020400\t7019\tTTGGATGGAGTAGCACAGTCGTCG\n+44971\t-\tFBti0019515\t1093\tTTTGCAGCGATGCCCACAGTACTGTAAA\n+44970\t-\tFBti0019493\t1544\tTGTGCCGTCCTGGAACTTCTGCAATG\n+45003\t+\tFBti0019492\t393\tACATTGAGAAGTGGCTCTCTGA\n+45007\t+\tFBti0019493\t3696\tGAGGACTGGATGCACATTTTGTGCGC\n+45002\t-\tFBti0019507\t899\tTCTTGTTTTTACTCACACAAGCAAGCA\n+45023\t+\tFBti0019473\t1008\tTTCGAACGTCACGGCTTCCAAACGA\n+45028\t+\tFBti0020400\t591\tCGAAGACTTGTAGAATTAGCGCGGGC\n+45065\t+\tFBti0019504\t732\tTATTTTGATGAACGTTTTGAACGGACG\n+45070\t+\tFBti0020400\t6066\tTCGGTACGGACGGTCTCACATCAT\n+45075\t-\tFBti0019493\t3939\tTCGTAAGAGACGACTAATACAGCGATA\n+45083\t-\tFBti0020410\t4863\tTGACAATCAGCTCTTTGGATAGA\n+45095\t-\tFBti0019517\t459\tTCCAAGTAGATTTACTAGAAACTATA\n+45156\t-\tFBti0019493\t2444\tATTTGGATTTCGCCAAGGACGATGTGT\n+45213\t+\tFBti0019493\t3507\tGACGACGACAGCGAACCAGAACGGG\n+45261\t+\tFBti0019499\t715\tTTAAGACATACGTTGTTCGCAAAACGA\n+45306\t-\tFBti0020401\t2725\tTCCGCTGCGTTTAGCGTTCTACTGGA\n+45316\t+\tFBti0019509\t665\tTGCCCGGTTGAAGAGCAGGCGCTCT\n+45315\t+\tFBti0020400\t635\tTATTGATCGTGGTTATTTTTACA\n+45322\t+\tFBti0020400\t5293\tCTCATAGAATTTTTGATCCGCCAGT\n+45330\t+\tFBti0020400\t6050\tTTGGAATGCGACTGACTCGGTACGGAA\n+45335\t-\tFBti0019473\t2743\tTTCAGAGGAGTTCCGGAACAATAA\n+45348\t+\tFBti0020401\t4578\tTAGTAGTTCCGGCATCGCTCTTGGGA\n+45361\t+\tFBti0019503\t105\tTTTCTGCCGAACGTAGTCTGGTCGCGG\n+45401\t-\tFBti0019517\t355\tTCCCTAGCTAACCGTAGAACTATGAT\n+45403\t-\tFBti0020401\t2670\tCCTGACGGCCCATATTATTGCTAAA\n+45456\t+\tFBti0020410\t4100\tCTATAATGTAAAACATCCAATTATA\n+45494\t+\tFBti0020400\t6918\tTCTGAGCCTCAAATTCGCTTAAGCGGT\n+45504\t-\tFBti0019500\t223\tCCCTTCACCTTAGAAGTTCGTTGA\n+45506\t-\tFBti0020402\t2308\tACCCATCAACAGTTCAATCCTTACATG\n+45507\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+45550\t-\tFBti0019473\t733\tTCCCACTTCGATGCAAAAAATGCCTC\n+45568\t+\tFBti0019495\t4548\tTAAGTCTTTCCTAGGCTTGTTGGGA\n+45573\t-\tFBti0019473\t2742\tTTTCAGAGGAATTCCGGAACAATA\n+45574\t-\tFBti0019476\t1654\tTCCCACCGAATTTATGATTTGT\n+45593\t-\tFBti0019476\t3129\tGACACATCACTCAGAATCCATAAGAA\n+45609\t-\tFBti0019493\t690\tCCTGCCACAGATGCGTGGGTTTCGACCA\n+45680\t+\tFBti0020410\t4198\tTAAGAACTTATATTCAGAGGGAGT\n+45720\t+\tFBti0020400\t2078\tTCGATGCGGAGATTTTTGGACG\n+45722\t-\tFBti0020402\t3533\tTGGTATACTTGGACGACATTATTGTA\n+45733\t+\tFBti0020400\t4711\tTCAGAATTTGAATGGTCAAGTCGGA\n+45738\t-\tFBti0019504\t690\tACTGTTCATACAGCGATTCGTAGGAA\n+45760\t+\tFBti0020398\t113\tTAATGAGGACTGCTCGCAAACGC\n+45771\t+\tFBti0019493\t1742\tTTGGTGGACCGCCGACCTCTGCGCT\n+45795\t+\tFBti0020401\t5810\tTGAAGAAACAGCAGAACATGTGCTA\n+45806\t+\tFBti0019493\t4098\tGTGGTTGGTACCCATATCGCGGG\n+45816\t+\tFBti0019493\t3809\tTGATCAAGAGAAGACTCAACGGC\n+45858\t-\tFBti0019499\t509\tAAAGTTTGATACAGTTGGCACATTAA\n+45859\t+\tFBti0019502\t145\tTGATAGCAGACAACTGTATGTGTGC\n+45884\t+\tFBti0020400\t5697\tTTGGAACGAAATTGGCCTGATTAGC\n+45906\t+\tFBti0020401\t5836\tTGCACTGCTCCAGGTTTACGGAGGA\n+45911\t+\tFBti0020401\t5810\tTGAAGAAACAGCAGAACATGTG\n+45927\t+\tFBti0019495\t4845\tCTTGTCGGCTGCGGAAACAAAC\n+45935\t-\tFBti0019493\t265\tACCCTCCCTCGGAGTCAGAGTACATG\n+45937\t-\tFBti0019493\t307\tTCGAGGTGGTGGTGCGATCATTCGTA\n'
b
diff -r 000000000000 -r 234b83159ea8 test-data/sample3.srbowtie_out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample3.srbowtie_out Tue Jul 11 11:44:36 2017 -0400
b
b'@@ -0,0 +1,2000 @@\n+64\t+\tFBti0020400\t1381\tTGAACTCCTACGCTGAGTTGGCGGAT\n+96\t+\tFBti0020400\t6361\tTTTCAATTGACTGTTTACCAACAAT\n+199\t-\tFBti0019516\t34\tTACGATTTTTTGGCACACGATTTTT\n+239\t-\tFBti0020400\t6021\tTTCTAGAGGGCCCACTTGAGTTCA\n+240\t-\tFBti0019499\t667\tAACAAGTTAGTAGTTCTACAATTC\n+391\t+\tFBti0019499\t789\tGAATATGTTAAGAAGCCTCTTGA\n+396\t+\tFBti0019516\t588\tGTAGTTATAATAATTTCTATTGTACT\n+409\t+\tFBti0020400\t6880\tCATCGTGACGGTTGGAGCTGCGG\n+416\t-\tFBti0019473\t2973\tGCCACATACCACAAGCCTACAAAACA\n+433\t-\tFBti0019504\t690\tACTGTTCATACAGCGATTCGTAGGAA\n+476\t+\tFBti0020400\t7167\tTCGAAGAAATACGAAATGGAAGGGA\n+487\t-\tFBti0019517\t79\tATAACACGAGTTGATGATCTTGGT\n+509\t-\tFBti0019492\t447\tAGCACGTGACGTTTACGCTAAACA\n+606\t-\tFBti0019493\t1947\tCCATGGGGGCGCGTCTACAAGATTT\n+638\t-\tFBti0019493\t4256\tCTCGAATTGCTACCGGGGTGGTCGCTA\n+654\t+\tFBti0020405\t414\tTGATTGTTGAGTGCTTGTGTC\n+710\t+\tFBti0019517\t14\tTGACCTTTTGTCGTGCCAACCCAA\n+746\t-\tFBti0019503\t160\tTACGAAAGCGAGAAGCATACAGAAA\n+1019\t-\tFBti0019493\t4048\tCCAAGACTGCTCATTGAGGTA\n+1042\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTTTGG\n+1162\t+\tFBti0019504\t248\tTGAAGTTTGACTGCTGAAGTCGA\n+1260\t-\tFBti0020410\t649\tTCCAACGGATCACGCCAAAGAATACA\n+1276\t+\tFBti0020401\t5810\tTGAAGAAACAGCTGAACATGTG\n+1289\t-\tFBti0019515\t2533\tGACGACATCGAAGGGAGTGGCAGGA\n+1376\t-\tFBti0019484\t118\tGCGAAGAGCGCTACAGCGAA\n+1390\t+\tFBti0019493\t4178\tTGAGACCCAGAGATCAGTAGAGATT\n+1406\t+\tFBti0020400\t2298\tTCGGATCCTCAGACATAGGAGAGA\n+1414\t+\tFBti0019493\t3510\tGACGACAGCGAACCAGGACGGGTGAC\n+1423\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGT\n+1453\t+\tFBti0020410\t6223\tTACACATTGATGACTTGATTTTCCA\n+1481\t+\tFBti0019503\t332\tGACATGCCTCGCTCAAGCGCCAGA\n+1513\t+\tFBti0019503\t199\tAAGTATTGGTGTATGCGGACTAGAAC\n+1522\t-\tFBti0020402\t5498\tTAGAACAAACAGACCCTGACCATTA\n+1527\t+\tFBti0020400\t8703\tTATTGATCGTGGTTAGTTTTACACTAAA\n+1598\t+\tFBti0019480\t593\tCGAATAGTGATTGCGAACCCCCGAAT\n+1607\t+\tFBti0019493\t1553\tCTGGAACTTCTCCAATGCACGTTGGCGA\n+1623\t+\tFBti0019493\t1165\tCTGGTCGTTAAAGGGAGTTTTGGC\n+1731\t-\tFBti0020410\t6102\tCTATCTTCCCCAGTTTACGAATTA\n+1745\t+\tFBti0020400\t3416\tTTTTAGAGCACGCTTCTCCAGGTCGG\n+1797\t-\tFBti0019493\t3335\tTCCCCCGCTTGATCTGGCT\n+1836\t+\tFBti0019493\t1906\tTGGATGACTGGAAACGCTTCGTGGG\n+1851\t+\tFBti0020400\t7689\tTTATTGTTCGGAAATTAATTTGGA\n+1858\t+\tFBti0019473\t2880\tCAGGACTAAATAGAATCTCGTATCAA\n+1885\t-\tFBti0020410\t6415\tTCTATTTATGGGCTGCAATAAACATG\n+1896\t+\tFBti0019493\t3596\tTAGGACGTCTTTCCTGCTTACAGGGC\n+1912\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGT\n+1946\t-\tFBti0019499\t664\tGCAAACAAGTTAGTAGTTCTACA\n+1985\t-\tFBti0020400\t3128\tAGAAACGGGTTCATGCTTAGGAT\n+1992\t+\tFBti0020400\t7019\tTTGGATGGGGTAGCACAGTCGTCGG\n+2021\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+2053\t+\tFBti0019519\t1382\tCTAGAAGACGATTTTCGGGCCGAAT\n+2055\t+\tFBti0020400\t5091\tCCAGTGAATTATCGTACGCGTGGGA\n+2063\t-\tFBti0020410\t4762\tGCTAAACTCGCGTCCATTAGTCACTGTA\n+2090\t-\tFBti0019499\t740\tGATTGAGATCACACCAACCAACAA\n+2103\t-\tFBti0019493\t3942\tTAAGAGACGACTAATACAGCGATA\n+2166\t+\tFBti0020404\t904\tTTTCAAGTCGACTTGAAGGTCATA\n+2177\t+\tFBti0020400\t5644\tAAGGATAGCTCTTGAGCCCGTGG\n+2207\t-\tFBti0019504\t1425\tCCAAAGTTATCAAAGCCGTTCAAAA\n+2260\t+\tFBti0019499\t677\tTAGTTCTACAATTCGTCGCAGGC\n+2390\t+\tFBti0019493\t1906\tTGGATGACTGGAAACGCTTCGTGG\n+2430\t+\tFBti0019503\t99\tTTAGAGTTTCTGCCGAACGTAGTCTGG\n+2518\t+\tFBti0019493\t2932\tTAATCATGCTGCTGAAAGGTGCCT\n+2541\t-\tFBti0019493\t2486\tGCACGTGAAGAGCAGTGTTGGTGCCA\n+2574\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+2619\t+\tFBti0019493\t2781\tTATTGTCAGCTGAGTGCATACGCGGA\n+2667\t-\tFBti0019493\t3466\tGCCTAGAGGAGTGTTTAATCCAG\n+2705\t+\tFBti0019502\t694\tTGTAAACGGTAGCTAATTCGAGCGGC\n+2754\t+\tFBti0019493\t3693\tTATGAGGACTGGATGCACATTTTG\n+2862\t+\tFBti0019493\t1385\tTGCTGTCAGAGTGGATGCTGGAGG\n+2878\t+\tFBti0019493\t1385\tTGCTGTCAGAGTGGATGCTGGA\n+2948\t-\tFBti0019515\t2449\tCGTCAACTCCACAAGACAACCATGTA\n+2967\t+\tFBti0019473\t1713\tTATTTGGGGGCGTAGCACTACTAG\n+3015\t-\tFBti0019493\t1318\tTCCCTCATGTGGCTTAGCAAACTCT\n+3031\t-\tFBti0020400\t5418\tACGCGGGCCTAGTCGGAGATCGGATG\n+3063\t+\tFBti0020400\t5644\tAAGGATAGCTCTTGAGCCCGTGGCGA\n+3084\t+\tFBti0020395\t887\tTACCAGGACTTAGGCAGTCTAAGAT\n+3087\t-\tFBti0019499\t1343\tACCAAAAAGACTTCAAGCAGTTA\n+3117\t-\tFBti0019493\t1277\tGCAGAACCCCCGCAATCCTG\n+3148\t+\tFBti0020410\t6255\tCGCGGACACTCGGCTCTAGGGTTG\n+3179\t+\tFBti0019493\t2548\tTCAAAGGAGCATTCGA'..b'CGAAGAGCAGTGA\n+64863\t-\tFBti0019515\t1250\tAGCTCGAGATCGTATCACACCAACA\n+64895\t+\tFBti0019493\t3484\tTCCAGAGTTGGCAGAACAGATGGGAC\n+64911\t-\tFBti0019473\t2977\tCATACCACAAGCCTACAAAACA\n+64952\t-\tFBti0019515\t1952\tTCTCTGAGAAAAAATTCACAACACTTT\n+65132\t+\tFBti0019493\t1385\tTGCTGTCAGAGTGGATGCTGGAG\n+65271\t+\tFBti0020401\t6217\tCAAGGACTGTAGAGCACGCGG\n+65280\t-\tFBti0019499\t846\tGCATTTCAGTACCAGGGGTCATAA\n+65278\t+\tFBti0019480\t492\tCTACAATGTTTTAAATCGCTCGGA\n+65332\t+\tFBti0019476\t1666\tTATGATTTGTGTGATTGCCCGACCA\n+65360\t+\tFBti0019493\t1600\tTGAGTAGAACAGCCGAACTTCCGGA\n+65386\t+\tFBti0020400\t2537\tTGAAGGAAATCGCGGGAAAGCAGG\n+65401\t-\tFBti0020400\t4234\tTCCAGCGAATACCGAGGGTTTTGGC\n+65456\t-\tFBti0019507\t226\tTTCACCTTCAAAGTTCTTTGA\n+65474\t-\tFBti0020400\t6321\tCCCCAAATTCCTGTGGTATCGACTGCA\n+65504\t+\tFBti0019493\t1821\tGATGACGATGCCGCTGTAGAGCTTG\n+65520\t-\tFBti0019483\t119\tAAGCTCACTGCTTAACGATCTTC\n+65521\t-\tFBti0019473\t3381\tTCCGCGTCGGTCCGCATACATCAA\n+65543\t+\tFBti0020400\t5752\tCGAATTGCTGGAACAGAGGTTGTT\n+65574\t-\tFBti0019482\t86\tTCCGCTTCTTTTTACCTTATTCATTA\n+65605\t+\tFBti0020400\t7110\tAAATAGACCTGGGTTTGTCAGCGG\n+65621\t+\tFBti0019493\t1599\tGTGAGTAGAATAGCCGAACTTCCGG\n+65736\t+\tFBti0019519\t1820\tAATATTCTTACATAAAGTCATTTT\n+65749\t+\tFBti0019493\t1906\tTGGATGACTGGAAACGCTTCGTGG\n+65790\t-\tFBti0019507\t223\tCCCTTCACCTTCAAAGTTCTTTGA\n+65814\t-\tFBti0020402\t3475\tGCTACCTTCCAACGATGTATGAACGA\n+65836\t-\tFBti0019499\t1178\tGCCCCCCCAAAGCCCAGACCTTA\n+65932\t+\tFBti0020410\t6646\tTATGTAAGAATGAATAAAAGGC\n+65987\t+\tFBti0020410\t3214\tTACGCTAAAGGTCTATTATAGAAAATAA\n+66015\t-\tFBti0019502\t365\tTTTCAAAAATGACTTTATATAAGA\n+66043\t-\tFBti0020402\t3941\tCCATACTTCGATCACCAGACTTCA\n+66064\t+\tFBti0019493\t4191\tTCAGTAGAGATTTTAGGTAGATCT\n+66090\t+\tFBti0019493\t705\tTGGGTTTCGACCACAAGGTTAGTGAA\n+66102\t+\tFBti0020400\t6881\tATCGTGACGGTTGGAGCTGC\n+66100\t+\tFBti0019504\t248\tTGAAGTTTGACTGCTGACGTCGA\n+66127\t+\tFBti0019503\t370\tCCATAACGAGAGTAGTGAAGAGGAA\n+66159\t+\tFBti0019493\t1601\tTAGTAGAACAGCCGAACTTCCGGA\n+66278\t+\tFBti0020400\t6743\tTAACTGTATTTAGCCCTGAGAATAGG\n+66282\t-\tFBti0019480\t478\tTCCCGTAAGACCGTCTACAATGTT\n+66311\t+\tFBti0019473\t2880\tCAGGACTAAATAGAATCTCGTATC\n+66319\t+\tFBti0020403\t160\tTGATAGCAGACAACTTTATGTGTGC\n+66424\t+\tFBti0019493\t3479\tTTTAATCCAGAGTTGGCAGAACA\n+66468\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGTCA\n+66570\t+\tFBti0020400\t3708\tCAGTAAAGTTCGGAGTTAATCGTA\n+66586\t+\tFBti0019503\t100\tTAGAGTTTCTGCCGAACGTAGTC\n+66578\t+\tFBti0020401\t249\tTACGATGGGATCTTGGGCATCAACA\n+66619\t+\tFBti0019503\t150\tTAGATGTCTGTACGAAAGCGAGAAGC\n+66659\t+\tFBti0019499\t1555\tTGTACTATGAACCGTTATCTTTCGT\n+66698\t+\tFBti0020400\t8437\tTCTTACAGCAAATTGAACGAAATTGGA\n+66707\t+\tFBti0019493\t3693\tTATGAGGACTGGATGCACATTTTG\n+66720\t-\tFBti0019473\t2160\tTCACAACACTATTCCCAACAACCAAT\n+66826\t-\tFBti0020402\t4884\tCACGCCAAGTATAGAATTCTGTAGAGA\n+66824\t+\tFBti0019484\t360\tCATATTATCACAAAAATAAATTTCAAA\n+66875\t-\tFBti0019493\t1318\tTCCCTCATGTGGCTTAGCAAACTCT\n+66973\t+\tFBti0019509\t398\tTGGATGACTGGAAACGCTTCGTGGG\n+67015\t+\tFBti0019493\t448\tCACAGCCATCGGCCCAGAAGAGTTCATG\n+67035\t+\tFBti0020400\t3035\tTTATACGGAAGAATCACTGGATGGC\n+67038\t+\tFBti0019519\t1382\tCTAGAAGACGATTTTCGGGC\n+67062\t-\tFBti0020404\t808\tGACTGAAAAATCGAGCAATATATAATA\n+67067\t+\tFBti0019507\t263\tTTTTTGATCAATTGGCACCATGCGAAA\n+67146\t+\tFBti0019516\t736\tTGTAAACGGTAGCTAATTCGAGCGGC\n+67195\t-\tFBti0019493\t3337\tCCCCGCTTGATCTGGCTGCTAA\n+67241\t-\tFBti0020400\t5474\tGCTACTGTTTGGTTTAAGCCCGAGA\n+67329\t+\tFBti0019499\t192\tGAGATCTTTATCAGTTGTCAGAA\n+67390\t+\tFBti0019493\t2548\tTCAAAGGAGCATTCGACAACGTCGAA\n+67395\t+\tFBti0020395\t638\tTAACATCTAAGCTAGTAAGTGAAGT\n+67394\t+\tFBti0020400\t8405\tTGTCGGAGAAATCCGTTAGATCTGA\n+67412\t-\tFBti0020404\t809\tACTGAAAAATCGAGCAATATATAATA\n+67515\t+\tFBti0019509\t398\tTGGATGACTGGAAACGCTTCGTGG\n+67622\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTC\n+67639\t-\tFBti0020400\t4259\tAGTACTTTCTGCATCGATCTCGAGAAA\n+67667\t+\tFBti0019493\t1385\tTGCTGTCAGAGTGGATGCTGGAG\n+67712\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+67719\t+\tFBti0019493\t3162\tTATGACGGACTCATGGCACCTT\n+67734\t+\tFBti0020400\t7019\tTTGGATGGAGTAGCACAGTCGTCG\n+67769\t+\tFBti0019517\t517\tTATGAGTTGCATGACCCTTACAGA\n+67784\t+\tFBti0019503\t100\tTAGAGTTTCTGCCGAACGTAGTCT\n+67826\t+\tFBti0019480\t22\tCACACTGTGAAATAAGTTGAATTTTT\n+67869\t-\tFBti0020402\t4612\tGCCCATAAAGAAATTATCAACCCAA\n'
b
diff -r 000000000000 -r 234b83159ea8 test-data/transposons.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transposons.fasta Tue Jul 11 11:44:36 2017 -0400
[
b'@@ -0,0 +1,1992 @@\n+>FBti0019472 type=transposable_element; loc=4:complement(73579..74622); name=1360{}ci[1482]; dbxref=FlyBase_Annotation_IDs:TE19472,FlyBase:FBti0019472; MD5=dbdb9ad5b597dd3bae178bef975a9472; length=1044; release=r5.49; species=Dmel; \n+CAAAGACACTAGAATAACAAGATGCGTAACGCCATACAATTTTTTGGCAC\n+ACGATTTTTTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTT\n+TTGTTAGAGAGCGAGAGAGCAGAGAGTGCTACAGCGAACAGCTCTTTTCA\n+TCGCATAAAGTGATAGCAGACAACTGTATGTGTGCACACGTATGCTCATG\n+CATTGTAAATTTGACAAAATATGCCCTTCACCTTAGAAGTTCTTTGACTT\n+TAAATCTATATTATTTTTGATCAATTGGCACCATGTGAAAAATTCTTGTT\n+TTGCATTGCCTTAACGTTATTATTATATGAAAATAGATTAGAAAGAGCCA\n+AATCTATGTACATATTATCACAAAAATAAATTTAAAAATGACTTTATATA\n+TTTAGAATATTTGTCATTAGAGTATTCGTCTTGCGGCGTGTAAAAATTAA\n+TTAGGCAATGATTGTTGAGTGCTTGTGTCCGCACTTCGTGCCTCAAGATA\n+TGACCAAAGCAAAGACACTAGAATAATTCTAGACTTTTGCAATAAACAGT\n+TATCATATTCTTATGGAATTTATGAAATTCCAGTAGTTATAATAATTTCT\n+ATTGTACTTCCTTTAATTATTTACTATATTTATTAAGTCATTTAACATTA\n+AAAGTGTTTCAAAAAAAAATATTTCGCTTTTAAAAAATTGTCAGATGAGA\n+GACAAATTAGAATTAAACATAAATATATATGTGTAAACGGTAGCTAATTC\n+GATCGGCGATTTTAACAAACAAATTTTAAAAGCTTTAATTAAGCGCGAAT\n+TTTAAAAAAATTATTTAATTTCATCATATTTCTAGGAAATTGGCAAAAAC\n+TACCCTAATATGTACAATGTAAATTCGTTTTTTCGATCAGAATTGATTTC\n+GGCCCGAAAATAGTCTTCTAGCACAACACGCACACATATACGCGTTCTCG\n+TCTCTTGTTTTTACTCACACAAGCAAGCAAATTCTATTTTTAGATTTCTT\n+ACGCTCTCAGCGTGAGCGAGCGGAAAGAGAGCAATTTTGGTCGT\n+>FBti0019473 type=transposable_element; loc=4:97180..102547; name=I{}1483; dbxref=FlyBase_Annotation_IDs:TE19473,FlyBase:FBti0019473; MD5=2348e74b1f342684866e0fbde8690c16; length=5368; release=r5.49; species=Dmel; \n+TACCACTTCAACCTCCGAAGAGATAAGTCGTGCCTCTCAGTCTAAAGCCT\n+CGCTTCGCGTAAGCCCAAAACTCTTATCAGCAAAATCTTGATAAACAAAT\n+ATCAACCACAAAGAGAAAATAAAAAACTTAACAACAAAAACAACAATACC\n+GCTAATCCGGGCTCAAGCCCTTAACCAACAATCATGACAGACCCACCAAA\n+CATTTACAAAATCACTTCAAAAACATACCAATCCCAATTAGGCGAACCTA\n+AATTTATAATTATTAAAAGAAATGACAACAACTCTTTCGAAAGAACTTCA\n+CCATTCATCATAAAAAAATCGGTGGACTTTGCCTGTGGAGGAGAAGTTGA\n+GGGATGCAAACGTACAAGAGACGGCAACCTGCTAATAAAAACCAAAAATG\n+AATTACAAGCCAGAAAACTCCTAAAACTAACAAAAATTGCAGATGAGGAT\n+GTAACAGCAAGTGAACATAAAACATTAAACTTCTCTAAGGGAGTTATTTA\n+CTGTAACGACCTTAGACACATCGACGAAGACACAATTCTACAAGAACTAA\n+AACCACAAAAAGTATCTGAAGTTAAAAAAATAATGAAACGGCAAAACCCC\n+AACTCTAACTCCGACACCAACAACATCACATTAGTTGAAACTGGACTCAT\n+AATTATAACCTTTGAATCGCATAAGCTCCCCGAGATAGTACGAATCGGGT\n+ACGAAACAGTCCGAGTACGAGACTATATCCCACTCCCACTTCGATGCAAA\n+AAATGCCTCCGCTTCGGTCATCCAACACCCATATGCAAAAGTGTAGAAAC\n+TTGCATCAATTGCTCTGAAACAAAACACACAAACGACGGAGAAAAATGCA\n+CAAACGAAAAAAACTGCTTAAATTGCCGAAATAACCCAGAACTTGACCAT\n+CAACACAGCCCAATTGACCGCAAATGCCCTACGTTCATAAAAAACCAGGA\n+ATTAACAGCAATTAAAACCACACAAAAAGTTGACCATAAAACGGCCCAAC\n+ACATATATTTCGAACGTCACGGCTTCCAAACGAAAAACACCTACGCCAAA\n+ACACTTACAAACGGCACAACCCAGAGGACAACAAACACTCCATCACCTAA\n+TATTCACACAAACACAACCCAATCACAACAACAAAATCCGCACCACACAC\n+CCAAATCAGCAGCACAAAACACTTCAGCTAAGACACCAACAACTGAACCA\n+GCCAAAACAACCTTACTATCCAACCAACCACACCAACACCACCACCACCA\n+CAGCTACGACAAACTAGAAGACATGGATACCGACTACACACCTACCAGAA\n+AACCATCTACGGCATACTCATCACAACTCACAGAAGACCTAAAAATAAAA\n+ATCTTCCCTAAAGATAAGTCCAATAACCTATCCATAAACCTTAAAGCATC\n+AAAACTAAAGGCCAAAGCCCACAAAAACAAGCACACTAACAACAGCGACA\n+GCGAATCCATATAGAACTCTACACAAAACCCTAACCGTTAACACTACCTT\n+TAAGTAAGTTATAAGCTTTAATTTTCTCACAAATGTCCCTAACTATAATC\n+CAATGGAATCTAAAAGGATATCTAAACAACTACAGCCATCTCCTTATTCT\n+AATCAAAAAATACTCCCCCCACATAATTTCCCTCCAAGAAACCCATATAC\n+AATACACTAATAACATTCCAACCCCAATAAACTACAAACTATTAACAAAT\n+ATTGCCACCAACAGATTTGGGGGCGTAGCACTACTAGTGCATAAGTCAAT\n+ACAACACACTGTCCTCAACATAACAATCGATATAGAAGCAATAGCCATAA\n+ATATAGAATCTAAACTTAAATTAAACATATTTTCCACATACATTTCTCCG\n+ACCAAAAACATAACTAACCAGACACTCCATAACACATTTAACATACAACA\n+AACACCCTCTCTAATTACGGGAGATTTTAATGGATGGCACCCATCCTGGG\n+GCTCCCCAACAACAAATAAACGAGGAAAAATAACTCATAGATTCATTGAC\n+AACATGCACCTTATCCTGTTAAACGACAAATCTCCCACACACTTTTCAAC\n+ACACAATACATACACACACATAGACCTCACACTCTGCTCTCCAATCCTAG\n+CCCCCCACGCCAAGTGGAAAATACTAAACGATCTTCACGGTAGCGACCAT\n+TTCCCTATTATCACAACACTATTCCCAACAACCAATCCACAAAAATTCTA\n+CAGACCCTTTTTTAAACTCAAAGAAGCCAACTGGGAACAGTTCAACGCTC\n+TTACCCACCAAACCAACAAGAAATACCCCACCTCCCACAACGTAAACAAA\n+GAAGCCGCTCTAATCAATAGAATCATCCTTTATAGCGCAAACCTCTCCAT\n'..b'AGCGCAGCAAAGATAAGAAGAAGAAATTAATAAACTAATTCCTAATG\n+TCAAATGGAATTACGTTAAATCGAAAGACAATCCAGCAGATGTGTCTTCA\n+AGAGGGATATCACCGCAAGCTCTTAAAATCTGTGAAATTTGGTGGAGAGG\n+GCCGAATTGGCTAGCTATAGATTCACAACACTGGCCCACTCAAAAGGAAT\n+CGGAAATTGTTGTGGTATCCACATTGATAAAATCCGAATATCTGCAAAAT\n+CATCTTTTATCGAAGTATTCATCGATCGACAAACTTCTTAGAGTAATGGT\n+GTATGTATTACGCTTCATAACAAAGCTGAGAGGAAAATCGCAACAGCCGT\n+CACATCTTACGGCAGAGGAATTAAAGCTAGCAAAGATTGCCGTGGTAAAG\n+ATACAACAACAGCTGGGTTTTGGACACGAAGTCAGACTACTCAAAAACAA\n+AAGACCATTCGACCCAAAGAGTAAGTTACAGGCGCTAACCGTTTTTGGAT\n+AGTGATGGCGTACTTCGAGTTGGTGGACGATTACAAAACGCAATGATACC\n+CTATAATGTAAAACATCCAAATATATACAAATCACATTTGACTTGGTTAA\n+TTGCAAAGGATGCTTATAAAGAAACTCTGCATGGCGGAATTAACATTATG\n+AGAACTTATATTCAGAGGGAGTTCTGGATATTTGGCATACAAAATCCCTT\n+AAAGAAATATTTAAGGGAATGTATTGTATGCATACGATACAAGCAAGAGA\n+TGTCCAGTCAACTGATGGGAAATTTACCAGTTTACCGAGTAACGACTGAT\n+TACTCGTTTCAAAATACTGGAATAGACTACGTCAGACCGTTCCAGATTCG\n+CTGCTCAAAGGCAAGAGGTCAAAAAACGTATAAAGGATACTTTTGTGTAT\n+TTGTTTGTATGGCAACAAAAGCAATACATCTGGAAGCGACCTTTCGTCAG\n+ACAAATTCCTGGAGGCTCATCGACGGTTCTTTGCAAGACGAGGCAAGAGT\n+GAGAACCTATACTCAGATAATGGAACAAACTTCGTGGGTGCTTCAAGAGT\n+ATTGGACAAAGAATTTGTAGCTGCCATTAAAAACAATAATGAGTTAGCCC\n+CCACATGGGATGTTTATGGGAAGCCGGTGAAGCATCACCTTAAACGAGTT\n+ATTGGTGAAAACAGATTTACATATGAAGAATTTGCATCGCTGCTATGGTA\n+AATCGAAGCAGTGCTAAACTCGGGTCCATTAGTCACTGTAAGGAGCGAAA\n+ACGATGGTGAGGACATATTACCGTCATTTTCTGGTGGGAAGACCTCTAAT\n+TTTGGCGAAAGTAAGACAATCAGCTCTTTGGATAGATGAAAGCTTATTCA\n+ACGCATCAGAGGTGATTTTTGGAAGAAATGAAAAGAGAAGTATCTGGTGT\n+CATTGCAACAGCGAACCAGATGGCGCCAAGAAAAGCCGAATCTGAAGGAG\n+GGACAGCTGGTTCTTATAAAACATGAGAACACTCATCCTGCAAAATGGCC\n+TGCATAAAACAATCAGAGGACTTCCTGGGAGACTTCAAGGACTACTGCGA\n+TTTCTTCGGCACGACAATACGGACAAAAATTGACAACATCAAAGAAAAAG\n+ACAAAATACTACGGCACCGTACCAACCGAAAGAAAAGGTTTATACTGTTC\n+TTTGATATGGGAAATGCAAATAGAATACAGGAAAATATGTAAGCGATCAT\n+AAAAAACGAAAAACATCTAATGGAATATGTTGACAATCAGATGACAAACT\n+ACGATAGAAGCAAACCGGAATTTGGGAAAACTGACCCAACAAGTCAATAT\n+TATTGCAGAAACCATGAAGGAGCACTTTATGGTATATAAGGAGTCAATTA\n+AATTCCTTATGTTATCAAATCAAGTGCGATTGAAGAGGCAGAAAGCCTAC\n+AAGCAACAGCGATCTCAATGATAACGGAAATTAGTGAAGGAAGAATCTTA\n+CACTAATTGCGCCTAACAAAATGCTGGAGGAGCTCGAAAAAGTTAAGCAA\n+AAATAAGGACGAAAACAAATGCTACCGAGTGGAAATTCAGTTATACAATT\n+ACCACTGATCTATAAACTGATGAAGGCCCAAGCTATGTTGAAGGATAATG\n+TCCTATTCATTGAAGCAAAATTGCCGATATACAACAATCAGGAAACGGAT\n+CTCTTTGAAGTAATCCCAATACCACTGTGGACAAACGGAACAAAGCTTAT\n+TCCAAAATTGAATTCTAAATTTTTTGCGTTCAATACAGACATAAACGCAT\n+ATCAGCTAATGTCTGAAATGGAAATTAACTAATGCAGACATGAGGATTCG\n+ACAACATGGCTTTGCGAAAATAATTGGGCATGGAAAAACGCGGATGAGTA\n+CCAAGCAAGGCACACTCATGCGAAATGATGGAATTCCAAGGATATTCGTT\n+CATCAAGCAGCTAATATAATATGCAACGAACAGCATCAAGTTATAGGACT\n+GCCCAATCAAGGCATTATACAACTACTTGCAGGATGCACAGCAATATTAG\n+GGGATACAACAAGAATTACTCCAAAAAGTAATTTCGACAGCGTCTGAAAT\n+GTCTATCTTTCCCAGTTTACGAATTATAGACGAGAAATGGAACGTGGTCC\n+CGCTGAAGCACTTGATTGTGAACAACACTGCAAAATCTTCAAATGCGCAT\n+CAAGACTCTGAAAAATAACAAGGTACACATTGATAACTTGATTTTCCACA\n+CGGCAAGCGGACACTCGGCTCTAGGGTTGACAACGATTATCATAATTATA\n+TTGGTCATTTATATCCGGAGGCAACGCATAAATGAGAGACGACTACTGGC\n+CGTACACTCAAGGGAATGCCTTAATATGTGTTTAGATATGATAAGTAGGT\n+AAACTATAAAAATGTTCTATTTATGGGCTGCAATAAACATGTCACCGGAC\n+AGTATAAGTGGCAACTACAGATAAGTACGATTGCAGCGGCCTATTGCCGA\n+AGTGTCAAGAGATATGACCATGCGGGAGGTGATTAGCGCGGTCATAGTCC\n+TCAAACATAGATTTAAGAATAAAACTTAGCTGCATTAACCAACGCAGACT\n+GCGGCGTCTTACAAGCGCTGCATTATATAATTATATGATAAGAACCTATG\n+TAAGAATGAATAAAAGGCGACGCCCTCGCAGCAGCGAGTCCGTTAGATTC\n+AAACACCCGAATTGAACTCATTAAGTGTACGCACAAGTTTATAGTATGAA\n+CA\n+>FBti0020412 type=transposable_element; loc=4:complement(318364..318786); name=Tc1{}1500; dbxref=FlyBase_Annotation_IDs:TE20412,FlyBase:FBti0020412; MD5=a6f5fa9c9f579836fdcefcd416b6996f; length=423; release=r5.49; species=Dmel; \n+CAGCTGCGGTTAAAATAATAGCACTACTGCAGGTGGAAAGTTGATTTCCT\n+AAAAAAAATTATTAAATCTTTATATTTTTTAAAGTCAGATTGCATGAATA\n+ATAAGTACCATATGTTGGCTCTCTGAGCAAGAAATTTTTAGTCTCTCAAT\n+GTAACGGTTCTTTTTGTTTTTGGGCACTTGCTGCAAAAGTGCGCGAAATA\n+AGGCGGTAACAAAAATAGCACTGACCACGTTTTTGCTGAATAAAATTAAT\n+AGGAGTGATTGCTTTGGGTTTTTTCGAAAAATTTTGAAAAAAGGAGTTGT\n+ATTAAAGGTTTTAACTGAATTTTTTCCCAACGAAGACCAAAAATTCTCTA\n+GTCATGGGTCGCGGAAAGCATTGTACCGTCGAAAAAAGAAATTTGATTAA\n+AAATATGATCTCTGAAGGTAAAA\n'
b
diff -r 000000000000 -r 234b83159ea8 tool-data/bowtie_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie_indices.loc.sample Tue Jul 11 11:44:36 2017 -0400
b
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon  hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full  hg18 hg18 Full  /depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19 hg19 hg19  /depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
b
diff -r 000000000000 -r 234b83159ea8 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Jul 11 11:44:36 2017 -0400
b
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of indexes in the Bowtie mapper format -->
+    <table name="bowtie_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie_indices.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 234b83159ea8 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Jul 11 11:44:36 2017 -0400
b
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="bowtie" version="1.1.2">
+      <repository changeset_revision="a1c1a92e13a6" name="package_bowtie_1_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="pysam" version="0.8.3">
+      <repository changeset_revision="08db58be052a" name="package_python_2_7_pysam_0_8_3" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="numpy" version="1.9">
+        <repository changeset_revision="f24fc0b630fc" name="package_python_2_7_numpy_1_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="R" version="3.1.2">
+        <repository changeset_revision="4d2fd1413b56" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="biocbasics" version="2.14">
+        <repository changeset_revision="f0ef1a7b157e" name="package_biocbasics_2_14" owner="mvdbeek" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>