Next changeset 1:dce695815b0f (2017-07-11) |
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869 |
added:
size_histogram.py size_histogram.r size_histogram.xml smRtools.py smRtools.pyc static/images/size_histogram.png test-data/Size_distribution.pdf test-data/Size_distribution_dataframe.tab test-data/sample1.srbowtie_out test-data/sample2.srbowtie_out test-data/sample3.srbowtie_out test-data/transposons.fasta tool-data/bowtie_indices.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
b |
diff -r 000000000000 -r 234b83159ea8 size_histogram.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/size_histogram.py Tue Jul 11 11:44:36 2017 -0400 |
[ |
@@ -0,0 +1,119 @@ +#!/usr/bin/python +# python parser module for size distributions, guided by GFF3 + +import argparse +import subprocess +from collections import OrderedDict +from smRtools import extractsubinstance +from smRtools import HandleSmRNAwindows + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument('--output_size_distribution', action="store", type=str, help="size distribution dataframe") + the_parser.add_argument('--reference_fasta', action="store", type=str, help="output file") + the_parser.add_argument('--reference_bowtie_index',action='store', help="paths to indexed or fasta references") + the_parser.add_argument('--input',nargs='+', help="paths to multiple input files") + the_parser.add_argument('--ext',nargs='+', help="input file type") + the_parser.add_argument('--label',nargs='+', help="labels of multiple input files") + the_parser.add_argument('--normalization_factor',nargs='+', type=float, help="Normalization factor for input file") + the_parser.add_argument('--gff', type=str, help="GFF containing regions of interest") + the_parser.add_argument('--minquery', type=int, help="Minimum readsize") + the_parser.add_argument('--maxquery', type=int, help="Maximum readsize") + the_parser.add_argument('--global_size', action="store_true", help="if specified, size distribution is calculated for the sum of all items") + the_parser.add_argument('--collapse', action="store_true", help="if specified, forward and reverse reads are collapsed") + args = the_parser.parse_args() + return args + + +args=Parser() +if args.reference_fasta: + genomeRefFormat = "fastaSource" + genomeRefFile = args.reference_fasta +if args.reference_bowtie_index: + genomeRefFormat = "bowtieIndex" + genomeRefFile = args.reference_bowtie_index +size_distribution_file=args.output_size_distribution +minquery=args.minquery +maxquery=args.maxquery +filePath=args.input +fileExt=args.ext +fileLabel=args.label +normalization_factor=args.normalization_factor +global_size=args.global_size +collapse=args.collapse + +if collapse: + pol=["both"] +else: + pol=["F", "R"] + +MasterListOfGenomes = OrderedDict() + +def process_samples(filePath): + for i, filePath in enumerate(filePath): + norm=normalization_factor[i] + print fileLabel[i] + MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\ + biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm) + return MasterListOfGenomes + + +def write_size_distribution_dataframe(readDict, size_distribution_file, pol=["both"] ): + '''refactored on 7-9-2014''' + with open(size_distribution_file, 'w') as size_distrib: + print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample" + for sample in readDict.keys(): + if args.gff: + dict=readDict[sample] + else: + dict=readDict[sample].instanceDict + for gene in dict.keys(): + histogram = dict[gene].size_histogram() + for polarity in pol: + for size, count in histogram[polarity].iteritems(): + print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, size, count, sample) + + +def write_size_distribution_dataframe_global(readDict, size_distribution_file, pol=["both"]): + with open(size_distribution_file, 'w') as size_distrib: + print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample" + for sample in readDict.keys(): + histogram = readDict[sample].size_histogram() + gene="sample" + for polarity in pol: + for size, count in histogram[polarity].iteritems(): + print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, size, count, sample) + + +def gff_item_subinstances(readDict, gff3): + GFFinstanceDict=OrderedDict() + with open(gff3) as gff: + for line in gff: + if line[0] == "#": continue + gff_fields = line[:-1].split("\t") + chrom = gff_fields[0] + gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name + item_upstream_coordinate = int(gff_fields[3]) + item_downstream_coordinate = int(gff_fields[4]) + item_polarity = gff_fields[6] + for sample in readDict.keys(): + if sample not in GFFinstanceDict: + GFFinstanceDict[sample]={} + subinstance=extractsubinstance(item_upstream_coordinate, item_downstream_coordinate, readDict[sample].instanceDict[chrom]) + if item_polarity == '-': + subinstance.readDict={key*-1:value for key, value in subinstance.readDict.iteritems()} +# subinstance.readDict.setdefault(key, []) + subinstance.gene=gff_name + GFFinstanceDict[sample][gff_name]=subinstance + return GFFinstanceDict + +MasterListOfGenomes=process_samples(filePath) + +if args.gff: + MasterListOfGenomes=gff_item_subinstances(MasterListOfGenomes, args.gff) + +if global_size: + write_size_distribution_dataframe_global(MasterListOfGenomes, size_distribution_file, pol) +else: + write_size_distribution_dataframe(MasterListOfGenomes, size_distribution_file, pol) \ No newline at end of file |
b |
diff -r 000000000000 -r 234b83159ea8 size_histogram.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/size_histogram.r Tue Jul 11 11:44:36 2017 -0400 |
[ |
@@ -0,0 +1,110 @@ +## Setup R error handling to go to stderr +options( show.error.messages=F, + error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +library(RColorBrewer) +library(lattice) +library(latticeExtra) +library(grid) +library(gridExtra) +library(optparse) + +# Parse arguments +option_list <- list( + make_option(c("-g", "--global"), type="character", help="Whether distribution is plotted globally or by chromosome"), + make_option(c("-s", "--size_distribution_tab"), type="character", help="Path to file with tabular size distribution"), + make_option("--size_distribution_pdf", type="character", help="Path to file with size distribution plot"), + make_option("--title", type="character", help="Title for readmaps and size distribution"), + make_option("--ylabel", type="character", help="ylabel for readmaps and size distribution"), + make_option("--yrange", type="integer", help="Y-axis range"), + make_option("--rows_per_page", type="integer", help="rows_per_page") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +args = parse_args(parser) + +##cheetahtemplate data frame implementation +size=read.delim(args$size_distribution_tab, header=T, row.names=NULL) +n_samples = length(unique (size$sample)) +n_genes = length (unique (levels(size$gene))) + +if (args$yrange != 0) { + # This is used for specifying the y-axis limits + ylim=c(-args$yrange, args$yrange) +} else { ylim="" } + +par.settings.size=list(layout.heights=list(top.padding=1, bottom.padding=1), + strip.background = list(col = c("lightblue", "lightgreen")) + ) + +smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);} # use if one want y axis in the middle of the plot + +plot_size_distribution = function(df, ...) { + bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0, + horizontal=FALSE, + group=polarity, + stack=TRUE, + col=c('red', 'blue'), + cex=0.75, + scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.5, alternating=T), x=list(cex=.6 ) ), + xlab = "readsize in nucleotides", + ylab = args$ylabel, + main = args$title, + par.strip.text = list(cex=0.75), + as.table=TRUE, + newpage = T, + ...) + + combineLimits(update(useOuterStrips(bc, + strip.left = strip.custom(par.strip.text = list(cex=0.5)) + ), + layout=c(n_samples,args$rows_per_page)), + margin.x=F, margin.y=1) + } + +# per_gene_size=lapply(genes, function(x) subset(size, gene==x)) # no object in this script + +if (args$global == "no") { +width = 8.2677*n_samples/4 +} else { width = 8.2677 } + +options(warn=-1) +pdf(file=args$size_distribution_pdf, paper="special", height=11.69, width=width) + +if (ylim == "" && args$global=="no") { + plot_size_distribution(size, par.settings=par.settings.size) + } +if (ylim != "" && args$global=="no") { plot_size_distribution(size, par.settings=par.settings.size, ylim=ylim) + } +if (ylim == "" && args$global=="yes") { bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)), + data = size, origin = 0, + horizontal=FALSE, + group=polarity, + stack=TRUE, + col=c('red', 'blue'), + scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=1), + xlab = "readsize in nucleotides", + ylab = args$ylabel, + main = args$title, as.table=TRUE, newpage = T, + aspect=0.5, + strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue") + ) + bc + } +if (ylim != "" && args$global=="yes") { bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)), + data = size, origin = 0, + horizontal=FALSE, + group=polarity, + stack=TRUE, + col=c('red', 'blue'), + scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=1), + xlab = "readsize in nucleotides", + ylab = args$ylabel, + ylim = ylim, + main = args$title, as.table=TRUE, newpage = T, + aspect=0.5, + strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue") + ) + bc + } + +devname=dev.off() |
b |
diff -r 000000000000 -r 234b83159ea8 size_histogram.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/size_histogram.xml Tue Jul 11 11:44:36 2017 -0400 |
[ |
b'@@ -0,0 +1,168 @@\n+<tool id="artbio_size_histogram" name="Generate read size histograms" version="1.0.0">\n+ <description>from alignment files</description>\n+ <requirements>\n+ <requirement type="package" version="1.2.0=py27_0">bowtie</requirement>\n+ <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>\n+ <requirement type="package" version="1.9.3">numpy</requirement>\n+ <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>\n+ <requirement type="package" version="0.6_28=r3.3.2_0">r-latticeextra</requirement>\n+ <requirement type="package" version="2.2.1=r3.3.2_0">r-gridextra</requirement>\n+ </requirements>\n+ <command detect_errors="exit_code"><![CDATA[\n+ python \'$__tool_directory__\'/size_histogram.py\n+ #if $refGenomeSource.genomeSource == "history":\n+ --reference_fasta ## sys.argv[2]\n+ \'$refGenomeSource.ownFile\' ## index source\n+ #else:\n+ #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ \'bowtie_indexes\' ].get_fields() )[0][-1]\n+ --reference_bowtie_index\n+ \'$reference\'\n+ #end if\n+ --output_size_distribution\n+ \'$size_distribution_dataframe\'\n+ --minquery\n+ $minquery\n+ --maxquery\n+ $maxquery\n+ --input\n+ #for $i in $refGenomeSource.series\n+ \'$i.input\'\n+ #end for\n+ --ext\n+ #for $i in $refGenomeSource.series\n+ \'$i.input.ext\'\n+ #end for\n+ --label\n+ #for $i in $refGenomeSource.series\n+ "$i.input.element_identifier"\n+ #end for\n+ #if $gff:\n+ --gff \'$gff\'\n+ #end if\n+ #if $global.value == \'yes\':\n+ --global_size\n+ #end if\n+ #if $collapsestrands.value == \'yes\':\n+ --collapse\n+ #end if\n+ --normalization_factor\n+ #for $i in $refGenomeSource.series\n+ $i.norm\n+ #end for\n+ &&\n+ Rscript \'$__tool_directory__\'/size_histogram.r\n+ --global \'$global\'\n+ --size_distribution_tab \'$size_distribution_dataframe\'\n+ --size_distribution_pdf \'$size_PDF\'\n+ --title \'$title\'\n+ --ylabel \'$ylabel\'\n+ --yrange \'$yrange\'\n+ --rows_per_page \'$rows_per_page\'\n+ ]]></command>\n+ <inputs>\n+ <conditional name="refGenomeSource">\n+ <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">\n+ <option value="indexed">Use a built-in index</option>\n+ <option value="history">Use one from the history</option>\n+ </param>\n+ <when value="indexed">\n+ <repeat name="series" title="Add alignment files">\n+ <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam">\n+ <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>\n+ </param>\n+ <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>\n+ </repeat>\n+ </when>\n+ <when value="history">\n+ <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />\n+ <repeat name="series" title="Add alignment files">\n+ <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/>\n+ <param nam'..b'y" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->\n+ <param name="global" type="select" label="Generate size distribution for each item, or generate a global alignment">\n+ <option value="no">for each item</option>\n+ <option value="yes">global</option>\n+ </param>\n+ <param name="collapsestrands" type="select" label="Whether + and - reads should be collapsed or not">\n+ <option value="no">Do not collapse</option>\n+ <option value="yes">Collapse + and - reads</option>\n+ </param>\n+ <param name="minquery" type="integer" size="3" value="18" label="Min size of reads to plot" help="\'15\' = 15 nucleotides"/>\n+ <param name="maxquery" type="integer" size="3" value="28" label="Max size of reads to plot" help="\'30\' = 30 nucleotides"/>\n+ <param name="title" type="text" size="15" value="Size distribution" label="Main Titles"/>\n+ <param name="xlabel" type="text" size="15" value="Size in nucleotides" label="x axis label"/>\n+ <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>\n+ <param name="yrange" type="integer" size="3" value="0" label="y axis range for size distributions. 0 means auto-scaling."/>\n+ <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">\n+ <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>\n+ </param>\n+ </inputs>\n+\n+ <outputs>\n+ <data format="tabular" name="size_distribution_dataframe" label="Size_distribution_dataframe.tab"/>\n+ <data format="pdf" name="size_PDF" label="Size_distribution.pdf"/>\n+ </outputs>\n+\n+<help>\n+\n+**What it does**\n+\n+Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a histogram of read sizes,\n+where by default for each "chromosome" a histogram of read sizes is drawn.\n+Reads that map in sense are on the top (red), reads that map antisense are on the bottom (blue).\n+\n+\n+.. class:: warningmark\n+\n+\'\'\'TIP\'\'\' The input data can be produced using the sRbowtie tool.\n+\n+----\n+\n+\'\'\'Example\'\'\'\n+\n+Query sequence::\n+For a SAM file as the following:\n+\n+ 5\t16\t2L_79\t24393\t255\t17M\t*\t0\t0\tCCTTCATCTTTTTTTTT\tIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:17\tNM:i:0\n+\n+ 11\t0\t2R_1\t12675\t255\t21M\t*\t0\t0\tAAAAAAAACGCGTCCTTGTGC\tIIIIIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:21\tNM:i:0\n+\n+ 2\t16\t2L_5\t669\t255\t23M\t*\t0\t0\tTGTTGCTGCATTTCTTTTTTTTT\tIIIIIIIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:23\tNM:i:0\n+\n+produce a plot like this:\n+\n+----\n+\n+.. image:: static/images/size_histogram.png\n+ :height: 800\n+ :width: 500\n+\n+</help>\n+ <tests>\n+ <test>\n+ <param name="genomeSource" value="history" />\n+ <param name="ownFile" value="transposons.fasta" ftype="fasta" />\n+ <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/>\n+ <param name="series_0|norm" value="1" />\n+ <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/>\n+ <param name="series_1|norm" value="1" />\n+ <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/>\n+ <param name="series_2|norm" value="1" />\n+ <param name="global" value="no" />\n+ <param name="collapsestrands" value="no" />\n+ <param name="minquery" value="18"/>\n+ <param name="maxquery" value="30"/>\n+ <param name="title" value="Size distribution"/>\n+ <param name="xlabel" value="Size in nucleotides"/>\n+ <param name="ylabel" value="Number of reads"/>\n+ <param name="rows_per_page" value="10"/>\n+ <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" />\n+ <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" />\n+ </test>\n+ </tests>\n+</tool>\n+\n' |
b |
diff -r 000000000000 -r 234b83159ea8 smRtools.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smRtools.py Tue Jul 11 11:44:36 2017 -0400 |
[ |
b'@@ -0,0 +1,704 @@\n+#!/usr/bin/python\n+# version 1 7-5-2012 unification of the SmRNAwindow class\n+\n+import sys, subprocess\n+from collections import defaultdict\n+from numpy import mean, median, std\n+##Disable scipy import temporarily, as no working scipy on toolshed.\n+##from scipy import stats\n+\n+def get_fasta (index="/home/galaxy/galaxy-dist/bowtie/5.37_Dmel/5.37_Dmel"):\n+ \'\'\'This function will return a dictionary containing fasta identifiers as keys and the\n+ sequence as values. Index must be the path to a fasta file.\'\'\'\n+ p = subprocess.Popen(args=["bowtie-inspect","-a", "0", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines\n+ outputlines = p.stdout.readlines()\n+ p.wait()\n+ item_dic = {}\n+ for line in outputlines:\n+ if (line[0] == ">"):\n+ try:\n+ item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item\n+ except: pass\n+ current_item = line[1:].rstrip().split()[0] #take the first word before space because bowtie splits headers !\n+ item_dic[current_item] = ""\n+ stringlist=[]\n+ else:\n+ stringlist.append(line.rstrip() )\n+ item_dic[current_item] = "".join(stringlist) # for the last item\n+ return item_dic\n+\n+def get_fasta_headers (index):\n+ p = subprocess.Popen(args=["bowtie-inspect","-n", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines\n+ outputlines = p.stdout.readlines()\n+ p.wait()\n+ item_dic = {}\n+ for line in outputlines:\n+ header = line.rstrip().split()[0] #take the first word before space because bowtie splits headers !\n+ item_dic[header] = 1\n+ return item_dic\n+\n+\n+def get_file_sample (file, numberoflines):\n+ \'\'\'import random to use this function\'\'\'\n+ F=open(file)\n+ fullfile = F.read().splitlines()\n+ F.close()\n+ if len(fullfile) < numberoflines:\n+ return "sample size exceeds file size"\n+ return random.sample(fullfile, numberoflines)\n+\n+def get_fasta_from_history (file):\n+ F = open (file, "r")\n+ item_dic = {}\n+ for line in F:\n+ if (line[0] == ">"):\n+ try:\n+ item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item\n+ except: pass\n+ current_item = line[1:-1].split()[0] #take the first word before space because bowtie splits headers !\n+ item_dic[current_item] = ""\n+ stringlist=[]\n+ else:\n+ stringlist.append(line[:-1])\n+ item_dic[current_item] = "".join(stringlist) # for the last item\n+ return item_dic\n+\n+def antipara (sequence):\n+ antidict = {"A":"T", "T":"A", "G":"C", "C":"G", "N":"N"}\n+ revseq = sequence[::-1]\n+ return "".join([antidict[i] for i in revseq])\n+\n+def RNAtranslate (sequence):\n+ return "".join([i if i in "AGCN" else "U" for i in sequence])\n+def DNAtranslate (sequence):\n+ return "".join([i if i in "AGCN" else "T" for i in sequence])\n+\n+def RNAfold (sequence_list):\n+ thestring= "\\n".join(sequence_list)\n+ p = subprocess.Popen(args=["RNAfold","--noPS"], stdin= subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\n+ output=p.communicate(thestring)[0]\n+ p.wait()\n+ output=output.split("\\n")\n+ if not output[-1]: output = output[:-1] # nasty patch to remove last empty line\n+ buffer=[]\n+ for line in output:\n+ if line[0] in ["N","A","T","U","G","C"]:\n+ buffer.append(DNAtranslate(line))\n+ if line[0] in ["(",".",")"]:\n+ fields=line.split("(")\n+ energy= fields[-1]\n+ energy = energy[:-1] # remove the ) parenthesis\n+ energy=float(energy)\n+ buffer.append(str(energy))\n+ return dict(zip(buffer[::2], buffer[1::2]))\n+\n+def extractsubinstance (start, end, instance):\n+ \'\'\' Testing whether this can be an function external to the class to save memory\'\'\'\n+ subinstance = SmRNAwindow (instance.gene, instance.sequence[start-1:end], start)\n+ subinstance.gene = "%s %s %s" % (subin'..b'e[offset:offset+queryhash[offset][i]]) )\n+ paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-targethash[matched_offset][i]+1:-matched_offset+1]) ) )\n+ if offset < 0:\n+ for i in range (paired):\n+ paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-queryhash[offset][i]+1:-offset+1]) ) )\n+ paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+targethash[matched_offset][i]] ) )\n+ return paired_sequences\n+\n+ def pairable (self, overlap, minquery, maxquery, mintarget, maxtarget):\n+ queryhash = defaultdict(list)\n+ targethash = defaultdict(list)\n+ query_range = range (int(minquery), int(maxquery)+1)\n+ target_range = range (int(mintarget), int(maxtarget)+1)\n+ paired_sequences = []\n+\n+ for offset in self.readDict: # selection of data\n+ for size in self.readDict[offset]:\n+ if size in query_range:\n+ queryhash[offset].append(size)\n+ if size in target_range:\n+ targethash[offset].append(size)\n+\n+ for offset in queryhash:\n+ matched_offset = -offset - overlap + 1\n+ if targethash[matched_offset]:\n+ if offset >= 0:\n+ for i in queryhash[offset]:\n+ paired_sequences.append("+%s" % RNAtranslate (self.sequence[offset:offset+i]) )\n+ for i in targethash[matched_offset]:\n+ paired_sequences.append( "-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-i+1:-matched_offset+1]) ) )\n+ if offset < 0:\n+ for i in queryhash[offset]:\n+ paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-i+1:-offset+1]) ) )\n+ for i in targethash[matched_offset]:\n+ paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+i] ) )\n+ return paired_sequences\n+\n+ def newpairable_bowtie (self, overlap, minquery, maxquery, mintarget, maxtarget):\n+ \'\'\' revision of pairable on 3-12-2012, with focus on the offset shift problem (bowtie is 1-based cooordinates whereas python strings are 0-based coordinates\'\'\'\n+ queryhash = defaultdict(list)\n+ targethash = defaultdict(list)\n+ query_range = range (int(minquery), int(maxquery)+1)\n+ target_range = range (int(mintarget), int(maxtarget)+1)\n+ bowtie_output = []\n+\n+ for offset in self.readDict: # selection of data\n+ for size in self.readDict[offset]:\n+ if size in query_range:\n+ queryhash[offset].append(size)\n+ if size in target_range:\n+ targethash[offset].append(size)\n+ counter = 0\n+ for offset in queryhash:\n+ matched_offset = -offset - overlap + 1\n+ if targethash[matched_offset]:\n+ if offset >= 0:\n+ for i in queryhash[offset]:\n+ counter += 1\n+ bowtie_output.append("%s\\t%s\\t%s\\t%s\\t%s" % (counter, "+", self.gene, offset-1, self.sequence[offset-1:offset-1+i]) ) # attention a la base 1-0 de l\'offset \n+ if offset < 0:\n+ for i in queryhash[offset]:\n+ counter += 1\n+ bowtie_output.append("%s\\t%s\\t%s\\t%s\\t%s" % (counter, "-", self.gene, -offset-i, self.sequence[-offset-i:-offset])) # attention a la base 1-0 de l\'offset\n+ return bowtie_output\n+\n+\n+def __main__(bowtie_index_path, bowtie_output_path):\n+ sequenceDic = get_fasta (bowtie_index_path)\n+ objDic = {}\n+ F = open (bowtie_output_path, "r") # F is the bowtie output taken as input\n+ for line in F:\n+ fields = line.split()\n+ polarity = fields[1]\n+ gene = fields[2]\n+ offset = int(fields[3])\n+ size = len (fields[4])\n+ try:\n+ objDic[gene].addread (polarity, offset, size)\n+ except KeyError:\n+ objDic[gene] = SmRNAwindow(gene, sequenceDic[gene])\n+ objDic[gene].addread (polarity, offset, size)\n+ F.close()\n+ for gene in objDic:\n+ print gene, objDic[gene].pairer(19,19,23,19,23)\n+\n+if __name__ == "__main__" : __main__(sys.argv[1], sys.argv[2]) \n' |
b |
diff -r 000000000000 -r 234b83159ea8 smRtools.pyc |
b |
Binary file smRtools.pyc has changed |
b |
diff -r 000000000000 -r 234b83159ea8 static/images/size_histogram.png |
b |
Binary file static/images/size_histogram.png has changed |
b |
diff -r 000000000000 -r 234b83159ea8 test-data/Size_distribution.pdf |
b |
Binary file test-data/Size_distribution.pdf has changed |
b |
diff -r 000000000000 -r 234b83159ea8 test-data/Size_distribution_dataframe.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Size_distribution_dataframe.tab Tue Jul 11 11:44:36 2017 -0400 |
b |
b'@@ -0,0 +1,1461 @@\n+gene\tpolarity\tsize\tcount\tsample\n+FBti0020401\tF\t18\t1.0\tsample1.srbowtie_out\n+FBti0020401\tF\t19\t1.0\tsample1.srbowtie_out\n+FBti0020401\tF\t21\t0.0\tsample1.srbowtie_out\n+FBti0020401\tF\t22\t5.0\tsample1.srbowtie_out\n+FBti0020401\tF\t23\t1.0\tsample1.srbowtie_out\n+FBti0020401\tF\t24\t1.0\tsample1.srbowtie_out\n+FBti0020401\tF\t25\t5.0\tsample1.srbowtie_out\n+FBti0020401\tF\t26\t9.0\tsample1.srbowtie_out\n+FBti0020401\tF\t27\t4.0\tsample1.srbowtie_out\n+FBti0020401\tR\t18\t0.0\tsample1.srbowtie_out\n+FBti0020401\tR\t19\t0.0\tsample1.srbowtie_out\n+FBti0020401\tR\t21\t-2.0\tsample1.srbowtie_out\n+FBti0020401\tR\t22\t0.0\tsample1.srbowtie_out\n+FBti0020401\tR\t23\t-3.0\tsample1.srbowtie_out\n+FBti0020401\tR\t24\t-11.0\tsample1.srbowtie_out\n+FBti0020401\tR\t25\t-9.0\tsample1.srbowtie_out\n+FBti0020401\tR\t26\t-10.0\tsample1.srbowtie_out\n+FBti0020401\tR\t27\t-1.0\tsample1.srbowtie_out\n+FBti0020406\tF\t24\t0.0\tsample1.srbowtie_out\n+FBti0020406\tF\t25\t1.0\tsample1.srbowtie_out\n+FBti0020406\tF\t26\t0.0\tsample1.srbowtie_out\n+FBti0020406\tF\t28\t0.0\tsample1.srbowtie_out\n+FBti0020406\tR\t24\t-1.0\tsample1.srbowtie_out\n+FBti0020406\tR\t25\t0.0\tsample1.srbowtie_out\n+FBti0020406\tR\t26\t-1.0\tsample1.srbowtie_out\n+FBti0020406\tR\t28\t-1.0\tsample1.srbowtie_out\n+FBti0019511\tF\t18\t1.0\tsample1.srbowtie_out\n+FBti0019511\tR\t18\t0.0\tsample1.srbowtie_out\n+FBti0019512\tF\t21\t0\tsample1.srbowtie_out\n+FBti0019512\tR\t21\t0\tsample1.srbowtie_out\n+FBti0019513\tF\t21\t0\tsample1.srbowtie_out\n+FBti0019513\tR\t21\t0\tsample1.srbowtie_out\n+FBti0019473\tF\t18\t0.0\tsample1.srbowtie_out\n+FBti0019473\tF\t20\t1.0\tsample1.srbowtie_out\n+FBti0019473\tF\t21\t0.0\tsample1.srbowtie_out\n+FBti0019473\tF\t22\t0.0\tsample1.srbowtie_out\n+FBti0019473\tF\t23\t2.0\tsample1.srbowtie_out\n+FBti0019473\tF\t24\t4.0\tsample1.srbowtie_out\n+FBti0019473\tF\t25\t2.0\tsample1.srbowtie_out\n+FBti0019473\tF\t26\t9.0\tsample1.srbowtie_out\n+FBti0019473\tF\t27\t6.0\tsample1.srbowtie_out\n+FBti0019473\tR\t18\t-1.0\tsample1.srbowtie_out\n+FBti0019473\tR\t20\t-1.0\tsample1.srbowtie_out\n+FBti0019473\tR\t21\t-2.0\tsample1.srbowtie_out\n+FBti0019473\tR\t22\t-5.0\tsample1.srbowtie_out\n+FBti0019473\tR\t23\t-3.0\tsample1.srbowtie_out\n+FBti0019473\tR\t24\t-17.0\tsample1.srbowtie_out\n+FBti0019473\tR\t25\t-10.0\tsample1.srbowtie_out\n+FBti0019473\tR\t26\t-9.0\tsample1.srbowtie_out\n+FBti0019473\tR\t27\t-3.0\tsample1.srbowtie_out\n+FBti0019518\tF\t20\t0.0\tsample1.srbowtie_out\n+FBti0019518\tF\t23\t0.0\tsample1.srbowtie_out\n+FBti0019518\tF\t24\t2.0\tsample1.srbowtie_out\n+FBti0019518\tF\t25\t1.0\tsample1.srbowtie_out\n+FBti0019518\tF\t26\t2.0\tsample1.srbowtie_out\n+FBti0019518\tF\t27\t0.0\tsample1.srbowtie_out\n+FBti0019518\tR\t20\t-1.0\tsample1.srbowtie_out\n+FBti0019518\tR\t23\t-1.0\tsample1.srbowtie_out\n+FBti0019518\tR\t24\t-1.0\tsample1.srbowtie_out\n+FBti0019518\tR\t25\t-1.0\tsample1.srbowtie_out\n+FBti0019518\tR\t26\t0.0\tsample1.srbowtie_out\n+FBti0019518\tR\t27\t-1.0\tsample1.srbowtie_out\n+FBti0019519\tF\t19\t1.0\tsample1.srbowtie_out\n+FBti0019519\tF\t21\t1.0\tsample1.srbowtie_out\n+FBti0019519\tF\t22\t3.0\tsample1.srbowtie_out\n+FBti0019519\tF\t23\t4.0\tsample1.srbowtie_out\n+FBti0019519\tF\t24\t2.0\tsample1.srbowtie_out\n+FBti0019519\tF\t25\t3.0\tsample1.srbowtie_out\n+FBti0019519\tF\t26\t1.0\tsample1.srbowtie_out\n+FBti0019519\tF\t27\t4.0\tsample1.srbowtie_out\n+FBti0019519\tF\t28\t1.0\tsample1.srbowtie_out\n+FBti0019519\tR\t19\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t21\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t22\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t23\t-1.0\tsample1.srbowtie_out\n+FBti0019519\tR\t24\t-1.0\tsample1.srbowtie_out\n+FBti0019519\tR\t25\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t26\t-2.0\tsample1.srbowtie_out\n+FBti0019519\tR\t27\t0.0\tsample1.srbowtie_out\n+FBti0019519\tR\t28\t0.0\tsample1.srbowtie_out\n+FBti0019498\tF\t21\t0\tsample1.srbowtie_out\n+FBti0019498\tR\t21\t0\tsample1.srbowtie_out\n+FBti0019514\tF\t23\t1.0\tsample1.srbowtie_out\n+FBti0019514\tR\t23\t0.0\tsample1.srbowtie_out\n+FBti0019515\tF\t18\t1.0\tsample1.srbowtie_out\n+FBti0019515\tF\t20\t1.0\tsample1.srbowtie_out\n+FBti0019515\tF\t23\t0.0\tsample1.srbowtie_out\n+FBti0019515\tF\t24\t1.0\tsample1.srbowtie_out\n+FBti0019515\tF\t25\t1.0\tsample1.srbowtie_out\n+FBti0019515\tF\t26\t2.0\tsample1.srbowtie_out\n+FBti0019515\tF\t27\t0.0\tsample1.srbowtie_out\n+FBti0019515\tF\t2'..b'Bti0020410\tF\t28\t1.0\tsample3.srbowtie_out\n+FBti0020410\tR\t19\t-1.0\tsample3.srbowtie_out\n+FBti0020410\tR\t20\t0.0\tsample3.srbowtie_out\n+FBti0020410\tR\t22\t-2.0\tsample3.srbowtie_out\n+FBti0020410\tR\t23\t-3.0\tsample3.srbowtie_out\n+FBti0020410\tR\t24\t-7.0\tsample3.srbowtie_out\n+FBti0020410\tR\t25\t-15.0\tsample3.srbowtie_out\n+FBti0020410\tR\t26\t-10.0\tsample3.srbowtie_out\n+FBti0020410\tR\t27\t-8.0\tsample3.srbowtie_out\n+FBti0020410\tR\t28\t-2.0\tsample3.srbowtie_out\n+FBti0020403\tF\t24\t3.0\tsample3.srbowtie_out\n+FBti0020403\tF\t25\t3.0\tsample3.srbowtie_out\n+FBti0020403\tF\t21\t2.0\tsample3.srbowtie_out\n+FBti0020403\tF\t22\t0.0\tsample3.srbowtie_out\n+FBti0020403\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0020403\tR\t24\t0.0\tsample3.srbowtie_out\n+FBti0020403\tR\t25\t-3.0\tsample3.srbowtie_out\n+FBti0020403\tR\t21\t0.0\tsample3.srbowtie_out\n+FBti0020403\tR\t22\t-1.0\tsample3.srbowtie_out\n+FBti0020403\tR\t23\t-3.0\tsample3.srbowtie_out\n+FBti0019486\tF\t19\t1.0\tsample3.srbowtie_out\n+FBti0019486\tF\t20\t0.0\tsample3.srbowtie_out\n+FBti0019486\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0019486\tF\t24\t1.0\tsample3.srbowtie_out\n+FBti0019486\tF\t25\t0.0\tsample3.srbowtie_out\n+FBti0019486\tF\t26\t0.0\tsample3.srbowtie_out\n+FBti0019486\tR\t19\t0.0\tsample3.srbowtie_out\n+FBti0019486\tR\t20\t-1.0\tsample3.srbowtie_out\n+FBti0019486\tR\t23\t-1.0\tsample3.srbowtie_out\n+FBti0019486\tR\t24\t-1.0\tsample3.srbowtie_out\n+FBti0019486\tR\t25\t-1.0\tsample3.srbowtie_out\n+FBti0019486\tR\t26\t-3.0\tsample3.srbowtie_out\n+FBti0019489\tF\t21\t0\tsample3.srbowtie_out\n+FBti0019489\tR\t21\t0\tsample3.srbowtie_out\n+FBti0019484\tF\t20\t0.0\tsample3.srbowtie_out\n+FBti0019484\tF\t23\t1.0\tsample3.srbowtie_out\n+FBti0019484\tF\t24\t3.0\tsample3.srbowtie_out\n+FBti0019484\tF\t25\t3.0\tsample3.srbowtie_out\n+FBti0019484\tF\t26\t2.0\tsample3.srbowtie_out\n+FBti0019484\tF\t27\t2.0\tsample3.srbowtie_out\n+FBti0019484\tR\t20\t-2.0\tsample3.srbowtie_out\n+FBti0019484\tR\t23\t0.0\tsample3.srbowtie_out\n+FBti0019484\tR\t24\t-2.0\tsample3.srbowtie_out\n+FBti0019484\tR\t25\t0.0\tsample3.srbowtie_out\n+FBti0019484\tR\t26\t-1.0\tsample3.srbowtie_out\n+FBti0019484\tR\t27\t0.0\tsample3.srbowtie_out\n+FBti0019485\tF\t24\t1.0\tsample3.srbowtie_out\n+FBti0019485\tF\t25\t0.0\tsample3.srbowtie_out\n+FBti0019485\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0019485\tR\t24\t0.0\tsample3.srbowtie_out\n+FBti0019485\tR\t25\t-1.0\tsample3.srbowtie_out\n+FBti0019485\tR\t23\t-1.0\tsample3.srbowtie_out\n+FBti0019482\tF\t26\t0.0\tsample3.srbowtie_out\n+FBti0019482\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0019482\tR\t26\t-1.0\tsample3.srbowtie_out\n+FBti0019482\tR\t23\t-1.0\tsample3.srbowtie_out\n+FBti0020400\tF\t18\t2.0\tsample3.srbowtie_out\n+FBti0020400\tF\t19\t2.0\tsample3.srbowtie_out\n+FBti0020400\tF\t20\t5.0\tsample3.srbowtie_out\n+FBti0020400\tF\t21\t6.0\tsample3.srbowtie_out\n+FBti0020400\tF\t22\t6.0\tsample3.srbowtie_out\n+FBti0020400\tF\t23\t36.0\tsample3.srbowtie_out\n+FBti0020400\tF\t24\t76.0\tsample3.srbowtie_out\n+FBti0020400\tF\t25\t89.0\tsample3.srbowtie_out\n+FBti0020400\tF\t26\t82.0\tsample3.srbowtie_out\n+FBti0020400\tF\t27\t20.0\tsample3.srbowtie_out\n+FBti0020400\tF\t28\t12.0\tsample3.srbowtie_out\n+FBti0020400\tR\t18\t0.0\tsample3.srbowtie_out\n+FBti0020400\tR\t19\t-1.0\tsample3.srbowtie_out\n+FBti0020400\tR\t20\t-5.0\tsample3.srbowtie_out\n+FBti0020400\tR\t21\t-2.0\tsample3.srbowtie_out\n+FBti0020400\tR\t22\t-7.0\tsample3.srbowtie_out\n+FBti0020400\tR\t23\t-6.0\tsample3.srbowtie_out\n+FBti0020400\tR\t24\t-23.0\tsample3.srbowtie_out\n+FBti0020400\tR\t25\t-22.0\tsample3.srbowtie_out\n+FBti0020400\tR\t26\t-11.0\tsample3.srbowtie_out\n+FBti0020400\tR\t27\t-11.0\tsample3.srbowtie_out\n+FBti0020400\tR\t28\t-1.0\tsample3.srbowtie_out\n+FBti0019480\tF\t21\t0.0\tsample3.srbowtie_out\n+FBti0019480\tF\t22\t1.0\tsample3.srbowtie_out\n+FBti0019480\tF\t23\t0.0\tsample3.srbowtie_out\n+FBti0019480\tF\t24\t3.0\tsample3.srbowtie_out\n+FBti0019480\tF\t25\t2.0\tsample3.srbowtie_out\n+FBti0019480\tF\t26\t2.0\tsample3.srbowtie_out\n+FBti0019480\tF\t27\t1.0\tsample3.srbowtie_out\n+FBti0019480\tR\t21\t-1.0\tsample3.srbowtie_out\n+FBti0019480\tR\t22\t0.0\tsample3.srbowtie_out\n+FBti0019480\tR\t23\t-2.0\tsample3.srbowtie_out\n+FBti0019480\tR\t24\t-5.0\tsample3.srbowtie_out\n+FBti0019480\tR\t25\t-1.0\tsample3.srbowtie_out\n+FBti0019480\tR\t26\t0.0\tsample3.srbowtie_out\n+FBti0019480\tR\t27\t0.0\tsample3.srbowtie_out\n' |
b |
diff -r 000000000000 -r 234b83159ea8 test-data/sample1.srbowtie_out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample1.srbowtie_out Tue Jul 11 11:44:36 2017 -0400 |
b |
b'@@ -0,0 +1,2000 @@\n+12\t-\tFBti0019493\t2417\tAGTTCTTCCGAAAGGCTGCAGATG\n+19\t+\tFBti0020400\t2078\tTCGATGCGTAGATTTTTGGACGGGGC\n+60\t-\tFBti0019473\t2975\tCACATACCACAAGCCTACAAAACA\n+83\t+\tFBti0020400\t899\tTGAGCGACCGGCATTCCCTCGTCGAA\n+204\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+215\t+\tFBti0019509\t1142\tTGGACTTGGAGTGCAGCGCCTTGG\n+241\t-\tFBti0020402\t3533\tTGGTATACTTGGACGACATTATTGTA\n+253\t+\tFBti0020400\t6961\tTTTGGCTAGCGCTAGATCGGAAGCAGC\n+258\t+\tFBti0019493\t2877\tATCGTAGAAACGTGGGGAGCGT\n+280\t+\tFBti0019493\t855\tTCTCGAATGCTTGCCCGATATACGGAGC\n+271\t+\tFBti0020400\t591\tCGAAGACTTGTAGAATTAGCGCGGGC\n+282\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+284\t-\tFBti0020410\t2118\tAACATAACTCGAGCAGCAAAGCAA\n+242\t-\tFBti0019495\t3364\tCCCTTATGGACCCACAGTAACTTA\n+287\t+\tFBti0019500\t727\tTGTAAACGGTAGCTAATTCGAGCGGC\n+300\t-\tFBti0019495\t915\tTCCATACTCTGCCAAACGTCATAA\n+309\t+\tFBti0019503\t150\tTAGATGTCTGTACGAAAGCGAGAAGC\n+310\t+\tFBti0020400\t552\tTCCTAGTTTGCTGAAAAAAAGACCACT\n+349\t+\tFBti0019493\t1165\tCTGGTCGTTAAAGGGAGTTTTGGC\n+359\t-\tFBti0019493\t4308\tTGGCGTATCCAGACACATGCCCCATA\n+382\t+\tFBti0019493\t416\tGAGAAGCCGAAGGTCGTCGTC\n+405\t+\tFBti0019493\t4387\tCACGCCACGTAAAATAAGTTCGGAGG\n+428\t+\tFBti0019519\t1380\tTGCTAGAAGGCGATTTTCGGGC\n+431\t-\tFBti0020400\t8266\tTCCACACTTGGAATTTTGCAC\n+435\t+\tFBti0019493\t2979\tTTTGCTGGAGCGAACCTTCCGTA\n+337\t-\tFBti0019493\t3242\tGCGACTAGCCTCCTGCCAGA\n+467\t+\tFBti0020400\t6587\tTCGCCTGCGAAAACTTCTGTATCGC\n+419\t+\tFBti0019493\t1906\tTGGATGACTGGAAACGCTTCATGG\n+458\t-\tFBti0019500\t898\tCCGAAAATCGTCTTCTAGCA\n+488\t-\tFBti0020404\t1358\tTCTCAGCGTGAGCAAGCGGAAAGAG\n+495\t-\tFBti0019493\t453\tCCATCGGCCCAGAAGAGTTCATGCA\n+500\t+\tFBti0020410\t3150\tGTGGAAGTTACCGATGACTTGGG\n+514\t-\tFBti0020400\t2066\tACGACCATGTCATCGATGCGCA\n+522\t+\tFBti0019509\t1142\tTGGACTTGGAGTGCAGCGCCT\n+555\t+\tFBti0020400\t6826\tTTCCTTCTCAACCTTGTCCCATAGGG\n+565\t+\tFBti0019493\t2928\tACGGTAATCATGCTGCTGAAAGGTGC\n+599\t-\tFBti0019509\t52\tACCGCGCGCGCAGTTGAGAGCATA\n+637\t+\tFBti0019493\t2296\tTCGTCTCGCTGCTCAAAGGGCCAGA\n+741\t-\tFBti0019493\t3939\tTCGTAAGAGACGACTAATACAGCGATA\n+742\t-\tFBti0020401\t52\tTCAGCACCGCCAGATGGCCCGTAATC\n+773\t-\tFBti0019495\t3364\tCCCTTATGGACCCACAGTAACTTA\n+768\t-\tFBti0019493\t1947\tCCATGGGGGCGCGTCTACAAGATTT\n+787\t+\tFBti0019493\t3809\tTGATCAAGAGAAGATTCAACGGCT\n+828\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+835\t+\tFBti0019493\t2408\tTGTGAGAGAAGTTCTTCCGGAAGGC\n+834\t-\tFBti0019517\t381\tTGGAACAGTCTTTATTTGTAAGCTTA\n+844\t+\tFBti0020400\t4451\tATACATGAAATTTCGAATGACA\n+855\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+853\t-\tFBti0019473\t2977\tCATACCTCAAGCCTACAAAACA\n+868\t+\tFBti0019493\t1385\tTGCTGTCAGAGTAGATGCTGGAG\n+877\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+882\t+\tFBti0020400\t2662\tCGTTAAGTCAGATGTAGGCTCTAAA\n+940\t+\tFBti0019503\t199\tAAGTATTGGTGTATGCGGACTAGAACA\n+985\t+\tFBti0020400\t6725\tTCATAGACTGAATAGCAATAACTG\n+1011\t-\tFBti0019493\t850\tTATGCTCTCGAATGCTTGCCCGATATA\n+1039\t-\tFBti0019515\t1251\tGCTCGAGATCGTATCACACCAACA\n+1044\t-\tFBti0019495\t4491\tTCCCAATAAGATCGAAGCTATAAAAAA\n+1062\t-\tFBti0020401\t2809\tTAGTCCCTGGCATCCGTCTGCAAC\n+1072\t+\tFBti0020400\t6725\tTCATAGACTGAATAGCAATAACTGTA\n+1084\t-\tFBti0019503\t302\tTCAGTCAGTTTTCGATCGTTACGCA\n+1131\t+\tFBti0019500\t90\tTCTCGAATTTTTGTTAGAGAGCGAGA\n+1098\t+\tFBti0019493\t2445\tTTTGGATTTCGCCAAGGACGATGTG\n+1117\t+\tFBti0019509\t398\tTGGATGACTGGAAACGCTTCGTGGG\n+1112\t+\tFBti0020400\t8274\tTGGAATTTTGCACTGTTCGGATGAATA\n+1140\t-\tFBti0020410\t1557\tTCAAATTCAGAGCACAATCAATCG\n+982\t+\tFBti0020398\t323\tTGAAAAATAACGGATGATTAGTGA\n+1155\t-\tFBti0019493\t1370\tACAGACGGGGTGAGCTGCTGTCAGA\n+1156\t+\tFBti0019507\t160\tTGATAGTAGACAACTGTATGTGTGC\n+1166\t-\tFBti0019493\t812\tCGGAACTGCCGTCACAGAGGGCAA\n+1189\t+\tFBti0020400\t635\tTATTGATCGTGGTTAGTTTTACAC\n+1245\t-\tFBti0019493\t694\tCCACAGATGTGTGGGTTTCGACCATA\n+1242\t+\tFBti0019493\t3838\tTGTTTGCGGAAGAAGTGTTCCTGAGGA\n+1246\t-\tFBti0020402\t1593\tTCTACTATTGGAGGACTTCCGGAATA\n+1257\t+\tFBti0019507\t660\tTCAGATGAGAGACAAATTAGAAT\n+1261\t-\tFBti0020400\t4393\tCTTGCACCATGAGCTGAGCTTCTT\n+1275\t+\tFBti0019493\t1601\tGAGTAGAACAGCCGAACTTCCGGA\n+1300\t+\tFBti0019493\t1133\tTGGAAACCCTCACCACAGATTATGGC\n+1329\t-\tFBti0019492\t405\tGGCTCTCTGACTGGCGAATA\n+1361\t+\tFBti0020400\t2669\tTCAGATGTAGGCTCTAAATGGATGG\n+1387\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+1298\t+\tFBti0020400\t1829\tTGAAAAAT'..b'AATGGTCAAGTCGGACT\n+30606\t-\tFBti0019493\t1390\tTCTGAGTGGATGCTGGAGGCAAGAGTC\n+30653\t-\tFBti0019495\t4445\tCAGAATTCCTTGGCCACATCGTTTCA\n+30669\t-\tFBti0020410\t6017\tTACAACTACTTGCAGGATGCACAGCA\n+30670\t+\tFBti0019473\t2781\tTACATCTCTACACCCCCTCTCCA\n+30682\t-\tFBti0019504\t690\tACTGTTCATACAGCGATTCGTAGGAA\n+30698\t-\tFBti0019473\t3016\tCCTTAAGCCAAACACCGACAAAACGAA\n+30706\t-\tFBti0019509\t52\tACCGCGCGCGCAGTTGAGAGCATAGC\n+30743\t-\tFBti0019493\t1092\tAGCCATCCTAGTGGATCATCAGGAA\n+30752\t-\tFBti0020401\t478\tTCTCCGTCAATTGCACACAGGTGA\n+30825\t-\tFBti0019473\t4745\tCTTATCCTCACCCCAAACATAAA\n+30836\t-\tFBti0019507\t625\tTGTTTCAAAAAAAATATTTA\n+30875\t+\tFBti0019493\t3284\tTTCGGTATGCCGAACAGTGTCCAGAGT\n+30878\t+\tFBti0020400\t5802\tCACGGCAAGTAAAACAATTGTGAGTG\n+30880\t+\tFBti0019493\t3510\tTACGACAGCGAACCAGGACGGGTG\n+30889\t-\tFBti0019493\t1199\tTTTGCGCCGCATACTGCCAGTTCGA\n+30885\t+\tFBti0020401\t3437\tTAATTTATCATCGGCATCGGGTACG\n+30902\t+\tFBti0020400\t7164\tTAATCGAAGAAATACGAAATGGAAGGGA\n+30916\t-\tFBti0020400\t6345\tGCACATTAAACAGTATTTTCAATTGA\n+30907\t+\tFBti0019493\t3160\tTATATGACGGACTCATGGCACCTTG\n+30924\t+\tFBti0019493\t3823\tCTCAACGGCTGGCAATGTTCGCGGA\n+30937\t+\tFBti0019519\t1380\tTGCTAGAAGACGATTTTCGGGCCGA\n+30959\t+\tFBti0019509\t893\tTCCAGAGTTGGCAGAACAGATGGGA\n+30971\t+\tFBti0019493\t3085\tTGACCGGAGTCGTTGGAGCATTGGCG\n+30983\t+\tFBti0019500\t55\tTTTTTCGCCGTGGCTCTAGAGGTGGC\n+31000\t+\tFBti0019472\t690\tTCAGATGAGAGACAAATTAGAAT\n+31015\t-\tFBti0019515\t2449\tCGTCAACTCCACAAGACAACCATGTA\n+31039\t-\tFBti0019473\t4922\tGACCAAATAAAAATAATACGACTTCG\n+31049\t-\tFBti0020400\t8266\tTCCACATTTGGAATTTTGCACTGTTCGG\n+31059\t-\tFBti0020410\t3332\tTGCAGCGACTGTGTATACAAAAGTAGGC\n+31080\t-\tFBti0020410\t4859\tAGTATGACAATCAGCTCTTTGGATA\n+31154\t+\tFBti0019493\t2610\tTGCCGGGAAATGGGCTTGTGGCAGA\n+31152\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGC\n+31164\t-\tFBti0019493\t3648\tGGGAGAGCCCTCAGCGATACA\n+31207\t-\tFBti0020400\t3561\tTCTGCTGGATTATGCTCAGATTGAACA\n+31217\t+\tFBti0020400\t6725\tTCATAGACTGAATAGCAATAACTGTATT\n+31225\t-\tFBti0020402\t5551\tCATTACTACAACACACAATTCAAAA\n+31278\t+\tFBti0019509\t704\tCATCAATGGCACTATCTGCAAGGCAGTC\n+31285\t-\tFBti0019473\t4756\tCCCAAACATAAATACCACAGATATAA\n+31317\t+\tFBti0020400\t1771\tTGTTGACGAGGAGCCATTCTGCGCG\n+31346\t+\tFBti0019493\t287\tCATGAGGTACGTGAGCTCCGTCGAGG\n+31361\t+\tFBti0020400\t1131\tTCCTGGAGCGAATCGTCGTCGTGCAT\n+31377\t+\tFBti0019493\t1823\tTGACGATGCCGCTGTAGAGCTTGTAG\n+31415\t+\tFBti0019507\t263\tTTTTTGATCAATTGGCACCATGCGAA\n+31463\t-\tFBti0020402\t3479\tCCTTCCAACGATGTATGAACGATGTA\n+31488\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+31490\t-\tFBti0019493\t1270\tCAGGCCAGCAGAACCCCCGC\n+31534\t+\tFBti0020400\t2132\tTTGAGGTACTCTTCTTTCCATCGCA\n+31560\t-\tFBti0019499\t1538\tTCTTTTGCTTTAAGCTTTGTACTATGA\n+31564\t-\tFBti0019509\t1129\tTGCGGGACCTAGATGGACTTGGA\n+31601\t+\tFBti0020400\t4711\tTCAGAATTTGAATGGTCAAGTCGGA\n+31651\t-\tFBti0019515\t2460\tCAAGACAACCATGTATGCAGCACA\n+31670\t-\tFBti0020400\t484\tTAGTTTCTTTGATATGACTTTTGCA\n+31688\t-\tFBti0019493\t1213\tTGCCAGTTCGATGCACCTCTGGAA\n+31731\t-\tFBti0019493\t453\tCCATCGGCCCAGAAGAGTTCATGCA\n+31745\t+\tFBti0019493\t3692\tATATGAGGACTGGATGCACATTTTG\n+31799\t-\tFBti0019492\t248\tCCCCAAGGCAGCGTTCTTG\n+31842\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGTCAGT\n+31850\t+\tFBti0020400\t7003\tGCGCTGTACTGTGGCTTTGGATGGAG\n+31849\t+\tFBti0019483\t588\tTTATTTGTAAGCTTATTCGTGGTGA\n+31846\t-\tFBti0020402\t3385\tGACCCAGAGTCTATCCCCAAAACA\n+31884\t+\tFBti0019473\t2833\tGTATCTAGAACTTAGCTCAGCACT\n+31893\t-\tFBti0019495\t3364\tCCCTTATGGACCCACAGTAACTTA\n+31908\t+\tFBti0019518\t616\tTGTAAACGGTAGCTAATTCGAGCG\n+31919\t+\tFBti0019493\t1601\tGAGTAGAACAGCCGAACTTCCGGA\n+31977\t+\tFBti0019473\t4801\tCCTTGCGACAAAACAGAAAGAACACA\n+31985\t-\tFBti0019509\t83\tTGCCGTCCTGGAACTTCTGCAATG\n+31992\t+\tFBti0020400\t3602\tTGTTTTTGTGGCCTGGGCGATCTT\n+32016\t-\tFBti0020410\t6206\tTCTGAAAAATAACAAGGTACACATTGA\n+32036\t-\tFBti0020400\t6230\tAGCAGGCAGTCCCAGTTCTCAGTGTTAA\n+32029\t-\tFBti0019493\t2862\tGCACAACTAATGTCCATCGTAGAAA\n+32060\t-\tFBti0019473\t2975\tCACATACCTCAAGCCTACAAAACA\n+32080\t+\tFBti0019493\t3160\tTATATGACGGACTCATGGCACCTTG\n+32089\t+\tFBti0019493\t2877\tATCGTAGAAACGTGGGGAGCGGA\n+32097\t-\tFBti0019493\t1915\tCGAAACGCTTCGTGGGAGATCATG\n+32115\t+\tFBti0019493\t1829\tTGCCGCTGTAGAGCTTGTAGTGG\n+32124\t-\tFBti0019509\t687\tTCTCCCGGGTTGGACGGCATCAATG\n+32134\t-\tFBti0020403\t837\tCCCTAATATGTACCATGTAAATT\n' |
b |
diff -r 000000000000 -r 234b83159ea8 test-data/sample2.srbowtie_out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample2.srbowtie_out Tue Jul 11 11:44:36 2017 -0400 |
b |
b'@@ -0,0 +1,2000 @@\n+78\t-\tFBti0020400\t2880\tTCACACATACTTTGCACGAATTTA\n+66\t-\tFBti0019493\t2648\tCCGAAGAGCAGTGATCCGAAGCAG\n+121\t+\tFBti0019493\t3157\tCCATATATGACGGACTCGTGGCACC\n+120\t+\tFBti0019519\t1324\tTGCTTGCTTGTGTGAGTAAAAACAAGA\n+152\t-\tFBti0019473\t2278\tTACCTCCCACAACGTAAA\n+162\t-\tFBti0019504\t869\tCTCACATGGCACAAATCGATAATA\n+171\t+\tFBti0020401\t4591\tATCGCTCTTGGGATCTGGTTGA\n+199\t+\tFBti0020400\t2669\tTCAGATGTAGGCTCTAAATGGATGGCC\n+222\t-\tFBti0019493\t2862\tGCACAACTAATGTCCATCGTAGAAA\n+231\t+\tFBti0020398\t169\tTTCCGAAGAGGATGGATACCGCGG\n+250\t+\tFBti0019493\t1601\tGAGTAGAATAGCCGAACTTCCGG\n+256\t+\tFBti0019503\t150\tTAGATGTCTGTACGAAAGCGAGAAGC\n+333\t-\tFBti0019499\t672\tGTTAGTAGTTCTACAATTCGTCGCA\n+435\t+\tFBti0019519\t1982\tTCTAAGAACTTCTGAGGTGAAGGGC\n+453\t+\tFBti0020400\t5598\tTAAAGTTAGTGCCAAGATGGGAGA\n+476\t+\tFBti0020400\t8514\tACTTATCTTATAAGTTGGTCGCTGAT\n+493\t+\tFBti0019493\t2681\tTGTGGATGTACCGGTAACTAGAGGC\n+495\t+\tFBti0020400\t6752\tTTAGCCCTGAGAATAGGCATGTTGCT\n+507\t-\tFBti0020400\t3630\tACCCTATTGGCTACAAATGTGGTCCA\n+513\t+\tFBti0019493\t1823\tTGACGATGCCGCTGTAGAGCTTG\n+515\t+\tFBti0019509\t1132\tGGGACCTAGATGGACTTGGAGTGC\n+523\t+\tFBti0019493\t519\tCCCAGTTTAAAAAGTCGGTGCACC\n+555\t+\tFBti0020400\t8437\tTCTTACAGCAAATTGAACGAAATTGGA\n+567\t+\tFBti0019493\t1670\tCAGTATAGTACATAGTGTATGTGA\n+599\t+\tFBti0019493\t3693\tTATGAGGACTGGATGCACATTTTG\n+603\t+\tFBti0019476\t1666\tTATGATTTGTGTGATTGCCCGACCT\n+607\t-\tFBti0019486\t1174\tCACAGTGCGTAAACAGTTTCTTGA\n+645\t+\tFBti0019493\t2928\tACGGTAATCATGCTGCTGAAAGGTG\n+642\t+\tFBti0019500\t727\tTGTAAACGGTAGCTAATTCGAGCGG\n+654\t+\tFBti0020400\t8215\tCTTTGGACTTTAGAAATTTTCACT\n+670\t+\tFBti0019493\t3510\tTACGACAGCGAACCAGGACGGGT\n+671\t+\tFBti0019493\t983\tTGAGATCGGAGTTAATGTTTGCTC\n+683\t-\tFBti0020410\t1670\tTGCAATCGATGCTCCAAAGGACACA\n+761\t+\tFBti0019493\t4279\tGCTATAGCTTCTAGTCCG\n+774\t+\tFBti0020400\t5355\tTTGGGTAGGAAGGCGGATTTCCGGC\n+777\t+\tFBti0019493\t1601\tGAGTAGAACAGCCGAACTTCCGGAA\n+785\t-\tFBti0020400\t2344\tTTCTATTTTTGCCAAGAGCGTGGAGA\n+784\t-\tFBti0020398\t420\tCCACTTTTGCCACTGGACGTTTAATA\n+796\t-\tFBti0020401\t6160\tACATCCTGCTCACCGAAGTCATAC\n+818\t+\tFBti0019493\t3576\tTCAAGTTTTGGATTCTCGATGAGGACGT\n+843\t+\tFBti0019493\t3696\tGAGGACTGGATGCACATTTTGTGCGC\n+882\t+\tFBti0020400\t635\tTATTGATCGTGGTTAGTTTTACACTAA\n+927\t-\tFBti0019504\t1138\tCCTGGGTTAAAAGATCGTTCGAACA\n+933\t-\tFBti0020395\t404\tCCCTATGAAACCACAAACAATAA\n+953\t+\tFBti0020400\t6426\tAGCGGAAACCCTCGTTGGT\n+966\t-\tFBti0020401\t3795\tTCCCGTTGAGTAAACTGGTATTGTTA\n+970\t-\tFBti0020400\t3047\tATCACTGGATGCCGTTCATTGTA\n+1010\t+\tFBti0019473\t2594\tGCCAATATAAGACGCTTCTGCGGACT\n+1030\t+\tFBti0020400\t3944\tGAAATCTGGACGAAACGATAGC\n+1026\t-\tFBti0020395\t509\tGCATTCAAGCTGAAGTCTGTGCTATTG\n+1064\t-\tFBti0019493\t2915\tCTCGACCAGCAAGACGGTAATCA\n+1104\t+\tFBti0020400\t5501\tTGGACTTGAACCACCTGGAA\n+1112\t-\tFBti0020401\t2670\tCCTGACGGCCCATACTATTGCTAAA\n+1118\t+\tFBti0019493\t1801\tTCCAAGATGGAAGGCGTCGAGATGACGA\n+1158\t+\tFBti0020400\t4101\tCGAACGATAAACGGTGCTAACCA\n+1167\t+\tFBti0020400\t2078\tTCGATGCGGAGATTTTTGGACGGGG\n+1178\t+\tFBti0020400\t7009\tTACTGTGGCTTTGGATGGGGTAGC\n+1189\t+\tFBti0019493\t3809\tTGATCAAGAGAAGACTCAACGGCTGGC\n+1188\t-\tFBti0019493\t1318\tTCCCCCATGTGGCTTAGCAAACTCT\n+1213\t+\tFBti0019504\t2148\tAAAGATGGCACTAGTGATCGT\n+1263\t-\tFBti0019515\t605\tTATCGGAGAAAACAAGTTCCATGTTA\n+1260\t-\tFBti0019509\t1129\tTGCGGGACCTAGATGGACTTGGA\n+1264\t-\tFBti0020400\t4555\tCTCCAAAGGTTACTGTTTTCAATTCGA\n+1267\t+\tFBti0019516\t712\tTAGAATTGAACATAAATATAAATGTG\n+1296\t+\tFBti0019519\t1324\tTGCTTGATTGTGTGAGTAAAAACA\n+1334\t+\tFBti0020400\t8659\tCGAAGACTTGTAGAATTAGCGCGGGC\n+1358\t+\tFBti0019493\t4042\tAGCTATCCAAGACTGCTCATTGAGG\n+1359\t-\tFBti0019493\t3338\tCCCGCTTGATCTGGCTGCTAAGTTA\n+1370\t-\tFBti0019476\t3131\tCACATCACTCAGAATCCATAAGAA\n+1398\t-\tFBti0019515\t1124\tTCCTAAGAATAAAAACGACAGTTCAA\n+1421\t+\tFBti0019493\t2408\tTGTGAGAGAAGTTCTACCGGAAGGC\n+1424\t+\tFBti0019504\t843\tGGAGATTGCGGTTTCGGTAGCGT\n+1464\t+\tFBti0020400\t2078\tTCGATGCGTAGATTTTTGGACGGGGC\n+1477\t-\tFBti0020400\t7023\tATGGAGTAGCACAGTCGTCGG\n+1491\t+\tFBti0020402\t841\tTAAGGAAGTAAACCCAAATCAATCAGT\n+1497\t-\tFBti0019499\t847\tCATTTCAGTACCAGGGGTCATACA\n+1498\t-\tFBti0019495\t7910\tTCCTGGCCGTCCATCACAA\n+1509\t+\tFBti0019493\t3692\tATATGAGGACTGGATGCACATTTTG\n+1523\t-\tFBti0020400\t2880\tTCACACATACTTTGCACGAATTTA\n+1542\t-\tFBti0019473\t2981\tCCTCAAGCCTACAAAACA\n+1543\t-\tFBti00194'..b'\t934\tTCTTGTTTTTACTCACACAAGCAAGCA\n+44318\t+\tFBti0019493\t3692\tATATGAGGACTGGATGCACATTTTG\n+44340\t+\tFBti0020400\t4711\tTCAGAATTTGAATGGTCAAGTCGGA\n+44362\t+\tFBti0019499\t682\tCTACAATTCGTCGCAGGCTAAAA\n+44375\t+\tFBti0019519\t1982\tTCTAAGAACTTCTGAGGTGAAGGGCATT\n+44413\t+\tFBti0019493\t2877\tATCGTAGAAACGTGGGGAGCGGA\n+44456\t-\tFBti0019473\t2977\tCATACCACAAGCCTACAAAACA\n+44536\t+\tFBti0020400\t5460\tCCTTGGACTGAGCAGCTACTGTT\n+44541\t-\tFBti0020402\t3277\tGCAGACAAATACCCAATACCAAATA\n+44552\t-\tFBti0019473\t2743\tTTCAGAGGAGTTCCGGAACAATAA\n+44559\t+\tFBti0019509\t847\tTGTCTAAGCTGGGAGCAGAGGAAGA\n+44575\t+\tFBti0020400\t5294\tTCATAGAATTTTGGATCCGCCAGT\n+44614\t+\tFBti0019473\t302\tATTCATCATAAAAAAATCGGTGGAC\n+44702\t+\tFBti0020400\t6743\tTAACTGTATTTAGCCCTGAGAATAGG\n+44729\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGT\n+44754\t-\tFBti0019493\t1894\tGGAGGGCGAAGATGGATGACTG\n+44795\t+\tFBti0019493\t3809\tTGATCAAGAGAAGACTCAACGGC\n+44791\t+\tFBti0019493\t2877\tATCGTAGAAACGTGGGGAGCGG\n+44829\t-\tFBti0019499\t380\tGCCCAAAACAAAAGAGTTAACAGTTG\n+44865\t+\tFBti0019495\t2740\tCAGCAATTACCGCAGCGGACG\n+44892\t+\tFBti0019501\t493\tGTTGCCTTGCACCCACGATGATGGT\n+44898\t+\tFBti0020400\t659\tTAAAACTTATTCTAATGCGTTGGGT\n+44958\t+\tFBti0020400\t6540\tTATAAATGGCTGTGAAAAGATCCCGGA\n+44965\t+\tFBti0020400\t7019\tTTGGATGGAGTAGCACAGTCGTCG\n+44971\t-\tFBti0019515\t1093\tTTTGCAGCGATGCCCACAGTACTGTAAA\n+44970\t-\tFBti0019493\t1544\tTGTGCCGTCCTGGAACTTCTGCAATG\n+45003\t+\tFBti0019492\t393\tACATTGAGAAGTGGCTCTCTGA\n+45007\t+\tFBti0019493\t3696\tGAGGACTGGATGCACATTTTGTGCGC\n+45002\t-\tFBti0019507\t899\tTCTTGTTTTTACTCACACAAGCAAGCA\n+45023\t+\tFBti0019473\t1008\tTTCGAACGTCACGGCTTCCAAACGA\n+45028\t+\tFBti0020400\t591\tCGAAGACTTGTAGAATTAGCGCGGGC\n+45065\t+\tFBti0019504\t732\tTATTTTGATGAACGTTTTGAACGGACG\n+45070\t+\tFBti0020400\t6066\tTCGGTACGGACGGTCTCACATCAT\n+45075\t-\tFBti0019493\t3939\tTCGTAAGAGACGACTAATACAGCGATA\n+45083\t-\tFBti0020410\t4863\tTGACAATCAGCTCTTTGGATAGA\n+45095\t-\tFBti0019517\t459\tTCCAAGTAGATTTACTAGAAACTATA\n+45156\t-\tFBti0019493\t2444\tATTTGGATTTCGCCAAGGACGATGTGT\n+45213\t+\tFBti0019493\t3507\tGACGACGACAGCGAACCAGAACGGG\n+45261\t+\tFBti0019499\t715\tTTAAGACATACGTTGTTCGCAAAACGA\n+45306\t-\tFBti0020401\t2725\tTCCGCTGCGTTTAGCGTTCTACTGGA\n+45316\t+\tFBti0019509\t665\tTGCCCGGTTGAAGAGCAGGCGCTCT\n+45315\t+\tFBti0020400\t635\tTATTGATCGTGGTTATTTTTACA\n+45322\t+\tFBti0020400\t5293\tCTCATAGAATTTTTGATCCGCCAGT\n+45330\t+\tFBti0020400\t6050\tTTGGAATGCGACTGACTCGGTACGGAA\n+45335\t-\tFBti0019473\t2743\tTTCAGAGGAGTTCCGGAACAATAA\n+45348\t+\tFBti0020401\t4578\tTAGTAGTTCCGGCATCGCTCTTGGGA\n+45361\t+\tFBti0019503\t105\tTTTCTGCCGAACGTAGTCTGGTCGCGG\n+45401\t-\tFBti0019517\t355\tTCCCTAGCTAACCGTAGAACTATGAT\n+45403\t-\tFBti0020401\t2670\tCCTGACGGCCCATATTATTGCTAAA\n+45456\t+\tFBti0020410\t4100\tCTATAATGTAAAACATCCAATTATA\n+45494\t+\tFBti0020400\t6918\tTCTGAGCCTCAAATTCGCTTAAGCGGT\n+45504\t-\tFBti0019500\t223\tCCCTTCACCTTAGAAGTTCGTTGA\n+45506\t-\tFBti0020402\t2308\tACCCATCAACAGTTCAATCCTTACATG\n+45507\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+45550\t-\tFBti0019473\t733\tTCCCACTTCGATGCAAAAAATGCCTC\n+45568\t+\tFBti0019495\t4548\tTAAGTCTTTCCTAGGCTTGTTGGGA\n+45573\t-\tFBti0019473\t2742\tTTTCAGAGGAATTCCGGAACAATA\n+45574\t-\tFBti0019476\t1654\tTCCCACCGAATTTATGATTTGT\n+45593\t-\tFBti0019476\t3129\tGACACATCACTCAGAATCCATAAGAA\n+45609\t-\tFBti0019493\t690\tCCTGCCACAGATGCGTGGGTTTCGACCA\n+45680\t+\tFBti0020410\t4198\tTAAGAACTTATATTCAGAGGGAGT\n+45720\t+\tFBti0020400\t2078\tTCGATGCGGAGATTTTTGGACG\n+45722\t-\tFBti0020402\t3533\tTGGTATACTTGGACGACATTATTGTA\n+45733\t+\tFBti0020400\t4711\tTCAGAATTTGAATGGTCAAGTCGGA\n+45738\t-\tFBti0019504\t690\tACTGTTCATACAGCGATTCGTAGGAA\n+45760\t+\tFBti0020398\t113\tTAATGAGGACTGCTCGCAAACGC\n+45771\t+\tFBti0019493\t1742\tTTGGTGGACCGCCGACCTCTGCGCT\n+45795\t+\tFBti0020401\t5810\tTGAAGAAACAGCAGAACATGTGCTA\n+45806\t+\tFBti0019493\t4098\tGTGGTTGGTACCCATATCGCGGG\n+45816\t+\tFBti0019493\t3809\tTGATCAAGAGAAGACTCAACGGC\n+45858\t-\tFBti0019499\t509\tAAAGTTTGATACAGTTGGCACATTAA\n+45859\t+\tFBti0019502\t145\tTGATAGCAGACAACTGTATGTGTGC\n+45884\t+\tFBti0020400\t5697\tTTGGAACGAAATTGGCCTGATTAGC\n+45906\t+\tFBti0020401\t5836\tTGCACTGCTCCAGGTTTACGGAGGA\n+45911\t+\tFBti0020401\t5810\tTGAAGAAACAGCAGAACATGTG\n+45927\t+\tFBti0019495\t4845\tCTTGTCGGCTGCGGAAACAAAC\n+45935\t-\tFBti0019493\t265\tACCCTCCCTCGGAGTCAGAGTACATG\n+45937\t-\tFBti0019493\t307\tTCGAGGTGGTGGTGCGATCATTCGTA\n' |
b |
diff -r 000000000000 -r 234b83159ea8 test-data/sample3.srbowtie_out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample3.srbowtie_out Tue Jul 11 11:44:36 2017 -0400 |
b |
b'@@ -0,0 +1,2000 @@\n+64\t+\tFBti0020400\t1381\tTGAACTCCTACGCTGAGTTGGCGGAT\n+96\t+\tFBti0020400\t6361\tTTTCAATTGACTGTTTACCAACAAT\n+199\t-\tFBti0019516\t34\tTACGATTTTTTGGCACACGATTTTT\n+239\t-\tFBti0020400\t6021\tTTCTAGAGGGCCCACTTGAGTTCA\n+240\t-\tFBti0019499\t667\tAACAAGTTAGTAGTTCTACAATTC\n+391\t+\tFBti0019499\t789\tGAATATGTTAAGAAGCCTCTTGA\n+396\t+\tFBti0019516\t588\tGTAGTTATAATAATTTCTATTGTACT\n+409\t+\tFBti0020400\t6880\tCATCGTGACGGTTGGAGCTGCGG\n+416\t-\tFBti0019473\t2973\tGCCACATACCACAAGCCTACAAAACA\n+433\t-\tFBti0019504\t690\tACTGTTCATACAGCGATTCGTAGGAA\n+476\t+\tFBti0020400\t7167\tTCGAAGAAATACGAAATGGAAGGGA\n+487\t-\tFBti0019517\t79\tATAACACGAGTTGATGATCTTGGT\n+509\t-\tFBti0019492\t447\tAGCACGTGACGTTTACGCTAAACA\n+606\t-\tFBti0019493\t1947\tCCATGGGGGCGCGTCTACAAGATTT\n+638\t-\tFBti0019493\t4256\tCTCGAATTGCTACCGGGGTGGTCGCTA\n+654\t+\tFBti0020405\t414\tTGATTGTTGAGTGCTTGTGTC\n+710\t+\tFBti0019517\t14\tTGACCTTTTGTCGTGCCAACCCAA\n+746\t-\tFBti0019503\t160\tTACGAAAGCGAGAAGCATACAGAAA\n+1019\t-\tFBti0019493\t4048\tCCAAGACTGCTCATTGAGGTA\n+1042\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTTTGG\n+1162\t+\tFBti0019504\t248\tTGAAGTTTGACTGCTGAAGTCGA\n+1260\t-\tFBti0020410\t649\tTCCAACGGATCACGCCAAAGAATACA\n+1276\t+\tFBti0020401\t5810\tTGAAGAAACAGCTGAACATGTG\n+1289\t-\tFBti0019515\t2533\tGACGACATCGAAGGGAGTGGCAGGA\n+1376\t-\tFBti0019484\t118\tGCGAAGAGCGCTACAGCGAA\n+1390\t+\tFBti0019493\t4178\tTGAGACCCAGAGATCAGTAGAGATT\n+1406\t+\tFBti0020400\t2298\tTCGGATCCTCAGACATAGGAGAGA\n+1414\t+\tFBti0019493\t3510\tGACGACAGCGAACCAGGACGGGTGAC\n+1423\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGT\n+1453\t+\tFBti0020410\t6223\tTACACATTGATGACTTGATTTTCCA\n+1481\t+\tFBti0019503\t332\tGACATGCCTCGCTCAAGCGCCAGA\n+1513\t+\tFBti0019503\t199\tAAGTATTGGTGTATGCGGACTAGAAC\n+1522\t-\tFBti0020402\t5498\tTAGAACAAACAGACCCTGACCATTA\n+1527\t+\tFBti0020400\t8703\tTATTGATCGTGGTTAGTTTTACACTAAA\n+1598\t+\tFBti0019480\t593\tCGAATAGTGATTGCGAACCCCCGAAT\n+1607\t+\tFBti0019493\t1553\tCTGGAACTTCTCCAATGCACGTTGGCGA\n+1623\t+\tFBti0019493\t1165\tCTGGTCGTTAAAGGGAGTTTTGGC\n+1731\t-\tFBti0020410\t6102\tCTATCTTCCCCAGTTTACGAATTA\n+1745\t+\tFBti0020400\t3416\tTTTTAGAGCACGCTTCTCCAGGTCGG\n+1797\t-\tFBti0019493\t3335\tTCCCCCGCTTGATCTGGCT\n+1836\t+\tFBti0019493\t1906\tTGGATGACTGGAAACGCTTCGTGGG\n+1851\t+\tFBti0020400\t7689\tTTATTGTTCGGAAATTAATTTGGA\n+1858\t+\tFBti0019473\t2880\tCAGGACTAAATAGAATCTCGTATCAA\n+1885\t-\tFBti0020410\t6415\tTCTATTTATGGGCTGCAATAAACATG\n+1896\t+\tFBti0019493\t3596\tTAGGACGTCTTTCCTGCTTACAGGGC\n+1912\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGT\n+1946\t-\tFBti0019499\t664\tGCAAACAAGTTAGTAGTTCTACA\n+1985\t-\tFBti0020400\t3128\tAGAAACGGGTTCATGCTTAGGAT\n+1992\t+\tFBti0020400\t7019\tTTGGATGGGGTAGCACAGTCGTCGG\n+2021\t-\tFBti0019493\t454\tCATCGGCCCAGAAGAGTTCATGCA\n+2053\t+\tFBti0019519\t1382\tCTAGAAGACGATTTTCGGGCCGAAT\n+2055\t+\tFBti0020400\t5091\tCCAGTGAATTATCGTACGCGTGGGA\n+2063\t-\tFBti0020410\t4762\tGCTAAACTCGCGTCCATTAGTCACTGTA\n+2090\t-\tFBti0019499\t740\tGATTGAGATCACACCAACCAACAA\n+2103\t-\tFBti0019493\t3942\tTAAGAGACGACTAATACAGCGATA\n+2166\t+\tFBti0020404\t904\tTTTCAAGTCGACTTGAAGGTCATA\n+2177\t+\tFBti0020400\t5644\tAAGGATAGCTCTTGAGCCCGTGG\n+2207\t-\tFBti0019504\t1425\tCCAAAGTTATCAAAGCCGTTCAAAA\n+2260\t+\tFBti0019499\t677\tTAGTTCTACAATTCGTCGCAGGC\n+2390\t+\tFBti0019493\t1906\tTGGATGACTGGAAACGCTTCGTGG\n+2430\t+\tFBti0019503\t99\tTTAGAGTTTCTGCCGAACGTAGTCTGG\n+2518\t+\tFBti0019493\t2932\tTAATCATGCTGCTGAAAGGTGCCT\n+2541\t-\tFBti0019493\t2486\tGCACGTGAAGAGCAGTGTTGGTGCCA\n+2574\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+2619\t+\tFBti0019493\t2781\tTATTGTCAGCTGAGTGCATACGCGGA\n+2667\t-\tFBti0019493\t3466\tGCCTAGAGGAGTGTTTAATCCAG\n+2705\t+\tFBti0019502\t694\tTGTAAACGGTAGCTAATTCGAGCGGC\n+2754\t+\tFBti0019493\t3693\tTATGAGGACTGGATGCACATTTTG\n+2862\t+\tFBti0019493\t1385\tTGCTGTCAGAGTGGATGCTGGAGG\n+2878\t+\tFBti0019493\t1385\tTGCTGTCAGAGTGGATGCTGGA\n+2948\t-\tFBti0019515\t2449\tCGTCAACTCCACAAGACAACCATGTA\n+2967\t+\tFBti0019473\t1713\tTATTTGGGGGCGTAGCACTACTAG\n+3015\t-\tFBti0019493\t1318\tTCCCTCATGTGGCTTAGCAAACTCT\n+3031\t-\tFBti0020400\t5418\tACGCGGGCCTAGTCGGAGATCGGATG\n+3063\t+\tFBti0020400\t5644\tAAGGATAGCTCTTGAGCCCGTGGCGA\n+3084\t+\tFBti0020395\t887\tTACCAGGACTTAGGCAGTCTAAGAT\n+3087\t-\tFBti0019499\t1343\tACCAAAAAGACTTCAAGCAGTTA\n+3117\t-\tFBti0019493\t1277\tGCAGAACCCCCGCAATCCTG\n+3148\t+\tFBti0020410\t6255\tCGCGGACACTCGGCTCTAGGGTTG\n+3179\t+\tFBti0019493\t2548\tTCAAAGGAGCATTCGA'..b'CGAAGAGCAGTGA\n+64863\t-\tFBti0019515\t1250\tAGCTCGAGATCGTATCACACCAACA\n+64895\t+\tFBti0019493\t3484\tTCCAGAGTTGGCAGAACAGATGGGAC\n+64911\t-\tFBti0019473\t2977\tCATACCACAAGCCTACAAAACA\n+64952\t-\tFBti0019515\t1952\tTCTCTGAGAAAAAATTCACAACACTTT\n+65132\t+\tFBti0019493\t1385\tTGCTGTCAGAGTGGATGCTGGAG\n+65271\t+\tFBti0020401\t6217\tCAAGGACTGTAGAGCACGCGG\n+65280\t-\tFBti0019499\t846\tGCATTTCAGTACCAGGGGTCATAA\n+65278\t+\tFBti0019480\t492\tCTACAATGTTTTAAATCGCTCGGA\n+65332\t+\tFBti0019476\t1666\tTATGATTTGTGTGATTGCCCGACCA\n+65360\t+\tFBti0019493\t1600\tTGAGTAGAACAGCCGAACTTCCGGA\n+65386\t+\tFBti0020400\t2537\tTGAAGGAAATCGCGGGAAAGCAGG\n+65401\t-\tFBti0020400\t4234\tTCCAGCGAATACCGAGGGTTTTGGC\n+65456\t-\tFBti0019507\t226\tTTCACCTTCAAAGTTCTTTGA\n+65474\t-\tFBti0020400\t6321\tCCCCAAATTCCTGTGGTATCGACTGCA\n+65504\t+\tFBti0019493\t1821\tGATGACGATGCCGCTGTAGAGCTTG\n+65520\t-\tFBti0019483\t119\tAAGCTCACTGCTTAACGATCTTC\n+65521\t-\tFBti0019473\t3381\tTCCGCGTCGGTCCGCATACATCAA\n+65543\t+\tFBti0020400\t5752\tCGAATTGCTGGAACAGAGGTTGTT\n+65574\t-\tFBti0019482\t86\tTCCGCTTCTTTTTACCTTATTCATTA\n+65605\t+\tFBti0020400\t7110\tAAATAGACCTGGGTTTGTCAGCGG\n+65621\t+\tFBti0019493\t1599\tGTGAGTAGAATAGCCGAACTTCCGG\n+65736\t+\tFBti0019519\t1820\tAATATTCTTACATAAAGTCATTTT\n+65749\t+\tFBti0019493\t1906\tTGGATGACTGGAAACGCTTCGTGG\n+65790\t-\tFBti0019507\t223\tCCCTTCACCTTCAAAGTTCTTTGA\n+65814\t-\tFBti0020402\t3475\tGCTACCTTCCAACGATGTATGAACGA\n+65836\t-\tFBti0019499\t1178\tGCCCCCCCAAAGCCCAGACCTTA\n+65932\t+\tFBti0020410\t6646\tTATGTAAGAATGAATAAAAGGC\n+65987\t+\tFBti0020410\t3214\tTACGCTAAAGGTCTATTATAGAAAATAA\n+66015\t-\tFBti0019502\t365\tTTTCAAAAATGACTTTATATAAGA\n+66043\t-\tFBti0020402\t3941\tCCATACTTCGATCACCAGACTTCA\n+66064\t+\tFBti0019493\t4191\tTCAGTAGAGATTTTAGGTAGATCT\n+66090\t+\tFBti0019493\t705\tTGGGTTTCGACCACAAGGTTAGTGAA\n+66102\t+\tFBti0020400\t6881\tATCGTGACGGTTGGAGCTGC\n+66100\t+\tFBti0019504\t248\tTGAAGTTTGACTGCTGACGTCGA\n+66127\t+\tFBti0019503\t370\tCCATAACGAGAGTAGTGAAGAGGAA\n+66159\t+\tFBti0019493\t1601\tTAGTAGAACAGCCGAACTTCCGGA\n+66278\t+\tFBti0020400\t6743\tTAACTGTATTTAGCCCTGAGAATAGG\n+66282\t-\tFBti0019480\t478\tTCCCGTAAGACCGTCTACAATGTT\n+66311\t+\tFBti0019473\t2880\tCAGGACTAAATAGAATCTCGTATC\n+66319\t+\tFBti0020403\t160\tTGATAGCAGACAACTTTATGTGTGC\n+66424\t+\tFBti0019493\t3479\tTTTAATCCAGAGTTGGCAGAACA\n+66468\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTCAGTCA\n+66570\t+\tFBti0020400\t3708\tCAGTAAAGTTCGGAGTTAATCGTA\n+66586\t+\tFBti0019503\t100\tTAGAGTTTCTGCCGAACGTAGTC\n+66578\t+\tFBti0020401\t249\tTACGATGGGATCTTGGGCATCAACA\n+66619\t+\tFBti0019503\t150\tTAGATGTCTGTACGAAAGCGAGAAGC\n+66659\t+\tFBti0019499\t1555\tTGTACTATGAACCGTTATCTTTCGT\n+66698\t+\tFBti0020400\t8437\tTCTTACAGCAAATTGAACGAAATTGGA\n+66707\t+\tFBti0019493\t3693\tTATGAGGACTGGATGCACATTTTG\n+66720\t-\tFBti0019473\t2160\tTCACAACACTATTCCCAACAACCAAT\n+66826\t-\tFBti0020402\t4884\tCACGCCAAGTATAGAATTCTGTAGAGA\n+66824\t+\tFBti0019484\t360\tCATATTATCACAAAAATAAATTTCAAA\n+66875\t-\tFBti0019493\t1318\tTCCCTCATGTGGCTTAGCAAACTCT\n+66973\t+\tFBti0019509\t398\tTGGATGACTGGAAACGCTTCGTGGG\n+67015\t+\tFBti0019493\t448\tCACAGCCATCGGCCCAGAAGAGTTCATG\n+67035\t+\tFBti0020400\t3035\tTTATACGGAAGAATCACTGGATGGC\n+67038\t+\tFBti0019519\t1382\tCTAGAAGACGATTTTCGGGC\n+67062\t-\tFBti0020404\t808\tGACTGAAAAATCGAGCAATATATAATA\n+67067\t+\tFBti0019507\t263\tTTTTTGATCAATTGGCACCATGCGAAA\n+67146\t+\tFBti0019516\t736\tTGTAAACGGTAGCTAATTCGAGCGGC\n+67195\t-\tFBti0019493\t3337\tCCCCGCTTGATCTGGCTGCTAA\n+67241\t-\tFBti0020400\t5474\tGCTACTGTTTGGTTTAAGCCCGAGA\n+67329\t+\tFBti0019499\t192\tGAGATCTTTATCAGTTGTCAGAA\n+67390\t+\tFBti0019493\t2548\tTCAAAGGAGCATTCGACAACGTCGAA\n+67395\t+\tFBti0020395\t638\tTAACATCTAAGCTAGTAAGTGAAGT\n+67394\t+\tFBti0020400\t8405\tTGTCGGAGAAATCCGTTAGATCTGA\n+67412\t-\tFBti0020404\t809\tACTGAAAAATCGAGCAATATATAATA\n+67515\t+\tFBti0019509\t398\tTGGATGACTGGAAACGCTTCGTGG\n+67622\t+\tFBti0019503\t283\tTTTGTGAAGAAGAGATCAGTC\n+67639\t-\tFBti0020400\t4259\tAGTACTTTCTGCATCGATCTCGAGAAA\n+67667\t+\tFBti0019493\t1385\tTGCTGTCAGAGTGGATGCTGGAG\n+67712\t+\tFBti0020400\t5460\tTCTTGGACTGAGCAGCTACTGTT\n+67719\t+\tFBti0019493\t3162\tTATGACGGACTCATGGCACCTT\n+67734\t+\tFBti0020400\t7019\tTTGGATGGAGTAGCACAGTCGTCG\n+67769\t+\tFBti0019517\t517\tTATGAGTTGCATGACCCTTACAGA\n+67784\t+\tFBti0019503\t100\tTAGAGTTTCTGCCGAACGTAGTCT\n+67826\t+\tFBti0019480\t22\tCACACTGTGAAATAAGTTGAATTTTT\n+67869\t-\tFBti0020402\t4612\tGCCCATAAAGAAATTATCAACCCAA\n' |
b |
diff -r 000000000000 -r 234b83159ea8 test-data/transposons.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transposons.fasta Tue Jul 11 11:44:36 2017 -0400 |
[ |
b'@@ -0,0 +1,1992 @@\n+>FBti0019472 type=transposable_element; loc=4:complement(73579..74622); name=1360{}ci[1482]; dbxref=FlyBase_Annotation_IDs:TE19472,FlyBase:FBti0019472; MD5=dbdb9ad5b597dd3bae178bef975a9472; length=1044; release=r5.49; species=Dmel; \n+CAAAGACACTAGAATAACAAGATGCGTAACGCCATACAATTTTTTGGCAC\n+ACGATTTTTTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTT\n+TTGTTAGAGAGCGAGAGAGCAGAGAGTGCTACAGCGAACAGCTCTTTTCA\n+TCGCATAAAGTGATAGCAGACAACTGTATGTGTGCACACGTATGCTCATG\n+CATTGTAAATTTGACAAAATATGCCCTTCACCTTAGAAGTTCTTTGACTT\n+TAAATCTATATTATTTTTGATCAATTGGCACCATGTGAAAAATTCTTGTT\n+TTGCATTGCCTTAACGTTATTATTATATGAAAATAGATTAGAAAGAGCCA\n+AATCTATGTACATATTATCACAAAAATAAATTTAAAAATGACTTTATATA\n+TTTAGAATATTTGTCATTAGAGTATTCGTCTTGCGGCGTGTAAAAATTAA\n+TTAGGCAATGATTGTTGAGTGCTTGTGTCCGCACTTCGTGCCTCAAGATA\n+TGACCAAAGCAAAGACACTAGAATAATTCTAGACTTTTGCAATAAACAGT\n+TATCATATTCTTATGGAATTTATGAAATTCCAGTAGTTATAATAATTTCT\n+ATTGTACTTCCTTTAATTATTTACTATATTTATTAAGTCATTTAACATTA\n+AAAGTGTTTCAAAAAAAAATATTTCGCTTTTAAAAAATTGTCAGATGAGA\n+GACAAATTAGAATTAAACATAAATATATATGTGTAAACGGTAGCTAATTC\n+GATCGGCGATTTTAACAAACAAATTTTAAAAGCTTTAATTAAGCGCGAAT\n+TTTAAAAAAATTATTTAATTTCATCATATTTCTAGGAAATTGGCAAAAAC\n+TACCCTAATATGTACAATGTAAATTCGTTTTTTCGATCAGAATTGATTTC\n+GGCCCGAAAATAGTCTTCTAGCACAACACGCACACATATACGCGTTCTCG\n+TCTCTTGTTTTTACTCACACAAGCAAGCAAATTCTATTTTTAGATTTCTT\n+ACGCTCTCAGCGTGAGCGAGCGGAAAGAGAGCAATTTTGGTCGT\n+>FBti0019473 type=transposable_element; loc=4:97180..102547; name=I{}1483; dbxref=FlyBase_Annotation_IDs:TE19473,FlyBase:FBti0019473; MD5=2348e74b1f342684866e0fbde8690c16; length=5368; release=r5.49; species=Dmel; \n+TACCACTTCAACCTCCGAAGAGATAAGTCGTGCCTCTCAGTCTAAAGCCT\n+CGCTTCGCGTAAGCCCAAAACTCTTATCAGCAAAATCTTGATAAACAAAT\n+ATCAACCACAAAGAGAAAATAAAAAACTTAACAACAAAAACAACAATACC\n+GCTAATCCGGGCTCAAGCCCTTAACCAACAATCATGACAGACCCACCAAA\n+CATTTACAAAATCACTTCAAAAACATACCAATCCCAATTAGGCGAACCTA\n+AATTTATAATTATTAAAAGAAATGACAACAACTCTTTCGAAAGAACTTCA\n+CCATTCATCATAAAAAAATCGGTGGACTTTGCCTGTGGAGGAGAAGTTGA\n+GGGATGCAAACGTACAAGAGACGGCAACCTGCTAATAAAAACCAAAAATG\n+AATTACAAGCCAGAAAACTCCTAAAACTAACAAAAATTGCAGATGAGGAT\n+GTAACAGCAAGTGAACATAAAACATTAAACTTCTCTAAGGGAGTTATTTA\n+CTGTAACGACCTTAGACACATCGACGAAGACACAATTCTACAAGAACTAA\n+AACCACAAAAAGTATCTGAAGTTAAAAAAATAATGAAACGGCAAAACCCC\n+AACTCTAACTCCGACACCAACAACATCACATTAGTTGAAACTGGACTCAT\n+AATTATAACCTTTGAATCGCATAAGCTCCCCGAGATAGTACGAATCGGGT\n+ACGAAACAGTCCGAGTACGAGACTATATCCCACTCCCACTTCGATGCAAA\n+AAATGCCTCCGCTTCGGTCATCCAACACCCATATGCAAAAGTGTAGAAAC\n+TTGCATCAATTGCTCTGAAACAAAACACACAAACGACGGAGAAAAATGCA\n+CAAACGAAAAAAACTGCTTAAATTGCCGAAATAACCCAGAACTTGACCAT\n+CAACACAGCCCAATTGACCGCAAATGCCCTACGTTCATAAAAAACCAGGA\n+ATTAACAGCAATTAAAACCACACAAAAAGTTGACCATAAAACGGCCCAAC\n+ACATATATTTCGAACGTCACGGCTTCCAAACGAAAAACACCTACGCCAAA\n+ACACTTACAAACGGCACAACCCAGAGGACAACAAACACTCCATCACCTAA\n+TATTCACACAAACACAACCCAATCACAACAACAAAATCCGCACCACACAC\n+CCAAATCAGCAGCACAAAACACTTCAGCTAAGACACCAACAACTGAACCA\n+GCCAAAACAACCTTACTATCCAACCAACCACACCAACACCACCACCACCA\n+CAGCTACGACAAACTAGAAGACATGGATACCGACTACACACCTACCAGAA\n+AACCATCTACGGCATACTCATCACAACTCACAGAAGACCTAAAAATAAAA\n+ATCTTCCCTAAAGATAAGTCCAATAACCTATCCATAAACCTTAAAGCATC\n+AAAACTAAAGGCCAAAGCCCACAAAAACAAGCACACTAACAACAGCGACA\n+GCGAATCCATATAGAACTCTACACAAAACCCTAACCGTTAACACTACCTT\n+TAAGTAAGTTATAAGCTTTAATTTTCTCACAAATGTCCCTAACTATAATC\n+CAATGGAATCTAAAAGGATATCTAAACAACTACAGCCATCTCCTTATTCT\n+AATCAAAAAATACTCCCCCCACATAATTTCCCTCCAAGAAACCCATATAC\n+AATACACTAATAACATTCCAACCCCAATAAACTACAAACTATTAACAAAT\n+ATTGCCACCAACAGATTTGGGGGCGTAGCACTACTAGTGCATAAGTCAAT\n+ACAACACACTGTCCTCAACATAACAATCGATATAGAAGCAATAGCCATAA\n+ATATAGAATCTAAACTTAAATTAAACATATTTTCCACATACATTTCTCCG\n+ACCAAAAACATAACTAACCAGACACTCCATAACACATTTAACATACAACA\n+AACACCCTCTCTAATTACGGGAGATTTTAATGGATGGCACCCATCCTGGG\n+GCTCCCCAACAACAAATAAACGAGGAAAAATAACTCATAGATTCATTGAC\n+AACATGCACCTTATCCTGTTAAACGACAAATCTCCCACACACTTTTCAAC\n+ACACAATACATACACACACATAGACCTCACACTCTGCTCTCCAATCCTAG\n+CCCCCCACGCCAAGTGGAAAATACTAAACGATCTTCACGGTAGCGACCAT\n+TTCCCTATTATCACAACACTATTCCCAACAACCAATCCACAAAAATTCTA\n+CAGACCCTTTTTTAAACTCAAAGAAGCCAACTGGGAACAGTTCAACGCTC\n+TTACCCACCAAACCAACAAGAAATACCCCACCTCCCACAACGTAAACAAA\n+GAAGCCGCTCTAATCAATAGAATCATCCTTTATAGCGCAAACCTCTCCAT\n'..b'AGCGCAGCAAAGATAAGAAGAAGAAATTAATAAACTAATTCCTAATG\n+TCAAATGGAATTACGTTAAATCGAAAGACAATCCAGCAGATGTGTCTTCA\n+AGAGGGATATCACCGCAAGCTCTTAAAATCTGTGAAATTTGGTGGAGAGG\n+GCCGAATTGGCTAGCTATAGATTCACAACACTGGCCCACTCAAAAGGAAT\n+CGGAAATTGTTGTGGTATCCACATTGATAAAATCCGAATATCTGCAAAAT\n+CATCTTTTATCGAAGTATTCATCGATCGACAAACTTCTTAGAGTAATGGT\n+GTATGTATTACGCTTCATAACAAAGCTGAGAGGAAAATCGCAACAGCCGT\n+CACATCTTACGGCAGAGGAATTAAAGCTAGCAAAGATTGCCGTGGTAAAG\n+ATACAACAACAGCTGGGTTTTGGACACGAAGTCAGACTACTCAAAAACAA\n+AAGACCATTCGACCCAAAGAGTAAGTTACAGGCGCTAACCGTTTTTGGAT\n+AGTGATGGCGTACTTCGAGTTGGTGGACGATTACAAAACGCAATGATACC\n+CTATAATGTAAAACATCCAAATATATACAAATCACATTTGACTTGGTTAA\n+TTGCAAAGGATGCTTATAAAGAAACTCTGCATGGCGGAATTAACATTATG\n+AGAACTTATATTCAGAGGGAGTTCTGGATATTTGGCATACAAAATCCCTT\n+AAAGAAATATTTAAGGGAATGTATTGTATGCATACGATACAAGCAAGAGA\n+TGTCCAGTCAACTGATGGGAAATTTACCAGTTTACCGAGTAACGACTGAT\n+TACTCGTTTCAAAATACTGGAATAGACTACGTCAGACCGTTCCAGATTCG\n+CTGCTCAAAGGCAAGAGGTCAAAAAACGTATAAAGGATACTTTTGTGTAT\n+TTGTTTGTATGGCAACAAAAGCAATACATCTGGAAGCGACCTTTCGTCAG\n+ACAAATTCCTGGAGGCTCATCGACGGTTCTTTGCAAGACGAGGCAAGAGT\n+GAGAACCTATACTCAGATAATGGAACAAACTTCGTGGGTGCTTCAAGAGT\n+ATTGGACAAAGAATTTGTAGCTGCCATTAAAAACAATAATGAGTTAGCCC\n+CCACATGGGATGTTTATGGGAAGCCGGTGAAGCATCACCTTAAACGAGTT\n+ATTGGTGAAAACAGATTTACATATGAAGAATTTGCATCGCTGCTATGGTA\n+AATCGAAGCAGTGCTAAACTCGGGTCCATTAGTCACTGTAAGGAGCGAAA\n+ACGATGGTGAGGACATATTACCGTCATTTTCTGGTGGGAAGACCTCTAAT\n+TTTGGCGAAAGTAAGACAATCAGCTCTTTGGATAGATGAAAGCTTATTCA\n+ACGCATCAGAGGTGATTTTTGGAAGAAATGAAAAGAGAAGTATCTGGTGT\n+CATTGCAACAGCGAACCAGATGGCGCCAAGAAAAGCCGAATCTGAAGGAG\n+GGACAGCTGGTTCTTATAAAACATGAGAACACTCATCCTGCAAAATGGCC\n+TGCATAAAACAATCAGAGGACTTCCTGGGAGACTTCAAGGACTACTGCGA\n+TTTCTTCGGCACGACAATACGGACAAAAATTGACAACATCAAAGAAAAAG\n+ACAAAATACTACGGCACCGTACCAACCGAAAGAAAAGGTTTATACTGTTC\n+TTTGATATGGGAAATGCAAATAGAATACAGGAAAATATGTAAGCGATCAT\n+AAAAAACGAAAAACATCTAATGGAATATGTTGACAATCAGATGACAAACT\n+ACGATAGAAGCAAACCGGAATTTGGGAAAACTGACCCAACAAGTCAATAT\n+TATTGCAGAAACCATGAAGGAGCACTTTATGGTATATAAGGAGTCAATTA\n+AATTCCTTATGTTATCAAATCAAGTGCGATTGAAGAGGCAGAAAGCCTAC\n+AAGCAACAGCGATCTCAATGATAACGGAAATTAGTGAAGGAAGAATCTTA\n+CACTAATTGCGCCTAACAAAATGCTGGAGGAGCTCGAAAAAGTTAAGCAA\n+AAATAAGGACGAAAACAAATGCTACCGAGTGGAAATTCAGTTATACAATT\n+ACCACTGATCTATAAACTGATGAAGGCCCAAGCTATGTTGAAGGATAATG\n+TCCTATTCATTGAAGCAAAATTGCCGATATACAACAATCAGGAAACGGAT\n+CTCTTTGAAGTAATCCCAATACCACTGTGGACAAACGGAACAAAGCTTAT\n+TCCAAAATTGAATTCTAAATTTTTTGCGTTCAATACAGACATAAACGCAT\n+ATCAGCTAATGTCTGAAATGGAAATTAACTAATGCAGACATGAGGATTCG\n+ACAACATGGCTTTGCGAAAATAATTGGGCATGGAAAAACGCGGATGAGTA\n+CCAAGCAAGGCACACTCATGCGAAATGATGGAATTCCAAGGATATTCGTT\n+CATCAAGCAGCTAATATAATATGCAACGAACAGCATCAAGTTATAGGACT\n+GCCCAATCAAGGCATTATACAACTACTTGCAGGATGCACAGCAATATTAG\n+GGGATACAACAAGAATTACTCCAAAAAGTAATTTCGACAGCGTCTGAAAT\n+GTCTATCTTTCCCAGTTTACGAATTATAGACGAGAAATGGAACGTGGTCC\n+CGCTGAAGCACTTGATTGTGAACAACACTGCAAAATCTTCAAATGCGCAT\n+CAAGACTCTGAAAAATAACAAGGTACACATTGATAACTTGATTTTCCACA\n+CGGCAAGCGGACACTCGGCTCTAGGGTTGACAACGATTATCATAATTATA\n+TTGGTCATTTATATCCGGAGGCAACGCATAAATGAGAGACGACTACTGGC\n+CGTACACTCAAGGGAATGCCTTAATATGTGTTTAGATATGATAAGTAGGT\n+AAACTATAAAAATGTTCTATTTATGGGCTGCAATAAACATGTCACCGGAC\n+AGTATAAGTGGCAACTACAGATAAGTACGATTGCAGCGGCCTATTGCCGA\n+AGTGTCAAGAGATATGACCATGCGGGAGGTGATTAGCGCGGTCATAGTCC\n+TCAAACATAGATTTAAGAATAAAACTTAGCTGCATTAACCAACGCAGACT\n+GCGGCGTCTTACAAGCGCTGCATTATATAATTATATGATAAGAACCTATG\n+TAAGAATGAATAAAAGGCGACGCCCTCGCAGCAGCGAGTCCGTTAGATTC\n+AAACACCCGAATTGAACTCATTAAGTGTACGCACAAGTTTATAGTATGAA\n+CA\n+>FBti0020412 type=transposable_element; loc=4:complement(318364..318786); name=Tc1{}1500; dbxref=FlyBase_Annotation_IDs:TE20412,FlyBase:FBti0020412; MD5=a6f5fa9c9f579836fdcefcd416b6996f; length=423; release=r5.49; species=Dmel; \n+CAGCTGCGGTTAAAATAATAGCACTACTGCAGGTGGAAAGTTGATTTCCT\n+AAAAAAAATTATTAAATCTTTATATTTTTTAAAGTCAGATTGCATGAATA\n+ATAAGTACCATATGTTGGCTCTCTGAGCAAGAAATTTTTAGTCTCTCAAT\n+GTAACGGTTCTTTTTGTTTTTGGGCACTTGCTGCAAAAGTGCGCGAAATA\n+AGGCGGTAACAAAAATAGCACTGACCACGTTTTTGCTGAATAAAATTAAT\n+AGGAGTGATTGCTTTGGGTTTTTTCGAAAAATTTTGAAAAAAGGAGTTGT\n+ATTAAAGGTTTTAACTGAATTTTTTCCCAACGAAGACCAAAAATTCTCTA\n+GTCATGGGTCGCGGAAAGCATTGTACCGTCGAAAAAAGAAATTTGATTAA\n+AAATATGATCTCTGAAGGTAAAA\n' |
b |
diff -r 000000000000 -r 234b83159ea8 tool-data/bowtie_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/bowtie_indices.loc.sample Tue Jul 11 11:44:36 2017 -0400 |
b |
@@ -0,0 +1,37 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Bowtie indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie_indices.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/bowtie/hg18/, +#then the bowtie_indices.loc entry would look like this: +# +#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18 +# +#and your /depot/data2/galaxy/bowtie/hg18/ directory +#would contain hg18.*.ebwt files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt +#...etc... +# +#Your bowtie_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon +#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie/hg18/hg18full +#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie/hg19/hg19 +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# |
b |
diff -r 000000000000 -r 234b83159ea8 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Jul 11 11:44:36 2017 -0400 |
b |
@@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of indexes in the Bowtie mapper format --> + <table name="bowtie_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bowtie_indices.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r 234b83159ea8 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Jul 11 11:44:36 2017 -0400 |
b |
@@ -0,0 +1,18 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bowtie" version="1.1.2"> + <repository changeset_revision="a1c1a92e13a6" name="package_bowtie_1_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="pysam" version="0.8.3"> + <repository changeset_revision="08db58be052a" name="package_python_2_7_pysam_0_8_3" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="numpy" version="1.9"> + <repository changeset_revision="f24fc0b630fc" name="package_python_2_7_numpy_1_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="R" version="3.1.2"> + <repository changeset_revision="4d2fd1413b56" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="biocbasics" version="2.14"> + <repository changeset_revision="f0ef1a7b157e" name="package_biocbasics_2_14" owner="mvdbeek" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |