Repository 'msp_sr_signature'
hg clone https://toolshed.g2.bx.psu.edu/repos/drosofff/msp_sr_signature

Changeset 0:a2f293717ce3 (2015-10-21)
Next changeset 1:6218b518cd16 (2017-06-07)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
added:
signature.py
signature.xml
smRtools.py
test-data/ensembl.fa
test-data/signature.pdf
test-data/signature.tab
test-data/sr_bowtie.bam
tool-data/bowtie_indices.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r a2f293717ce3 signature.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/signature.py Wed Oct 21 11:35:25 2015 -0400
[
@@ -0,0 +1,122 @@
+#!/usr/bin/python
+# script for computing overlap signatures from a bowtie output
+# Christophe Antoniewski <drosofff@gmail.com>
+# Usage signature.py <1:input> <2:format of input> <3:minsize query> <4:maxsize query> <5:minsize target> <6:maxsize target>
+#   <7:minscope> <8:maxscope> <9:output> <10:bowtie index> <11:procedure option> <12: graph (global or lattice)>
+#    <13: R code>
+# version 2.0.0
+
+import sys
+import subprocess
+import argparse
+from smRtools import *
+from collections import defaultdict  # test whether it is required
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument(
+        '--input', action="store", type=str, help="input alignment file")
+    the_parser.add_argument('--inputFormat', action="store", type=str, choices=[
+                            "tabular", "bam", "sam"], help="format of alignment file (tabular/bam/sam)")
+    the_parser.add_argument(
+        '--minquery', type=int, help="Minimum readsize of query reads (nt) - must be an integer")
+    the_parser.add_argument(
+        '--maxquery', type=int, help="Maximum readsize of query reads (nt) - must be an integer")
+    the_parser.add_argument(
+        '--mintarget', type=int, help="Minimum readsize of target reads (nt) - must be an integer")
+    the_parser.add_argument(
+        '--maxtarget', type=int, help="Maximum readsize of target reads (nt) - must be an integer")
+    the_parser.add_argument(
+        '--minscope', type=int, help="Minimum overlap analyzed (nt) - must be an integer")
+    the_parser.add_argument(
+        '--maxscope', type=int, help="Maximum overlap analyzed (nt) - must be an integer")
+    the_parser.add_argument(
+        '--outputOverlapDataframe', action="store", type=str, help="Overlap dataframe")
+    the_parser.add_argument('--referenceGenome', action='store',
+                            help="path to the bowtie-indexed or fasta reference")
+    the_parser.add_argument('--extract_index', action='store_true',
+                            help="specify if the reference is an indexed Bowtie reference")
+    the_parser.add_argument('--graph', action='store', choices=[
+                            "global", "lattice"], help="small RNA signature is computed either globally or by item (global-lattice)")
+    the_parser.add_argument(
+        '--rcode', type=str, help="R code to be passed to the python script")
+    args = the_parser.parse_args()
+    return args
+
+args = Parser()
+
+if args.extract_index:
+    GenomeFormat = "bowtieIndex"
+else:
+    GenomeFormat = "fastaSource"
+
+if args.inputFormat == "tabular":
+    Genome = HandleSmRNAwindows(
+        args.input, args.inputFormat, args.referenceGenome, GenomeFormat)
+elif args.inputFormat == "sam":
+    Genome = HandleSmRNAwindows(
+        args.input, args.inputFormat, args.referenceGenome, GenomeFormat)
+else:
+    Genome = HandleSmRNAwindows(
+        args.input, args.inputFormat, args.referenceGenome, GenomeFormat)
+
+# replace objDic by Genome.instanceDict or... objDic = Genome.instanceDict
+objDic = Genome.instanceDict
+
+args.maxscope += 1
+
+general_frequency_table = dict(
+    [(i, 0) for i in range(args.minscope, args.maxscope)])
+general_percent_table = dict(
+    [(i, 0) for i in range(args.minscope, args.maxscope)])
+OUT = open(args.outputOverlapDataframe, "w")
+
+if args.graph == "global":
+    # for normalized summing of local_percent_table(s)
+    readcount_dic = {}
+    Total_read_in_objDic = 0
+    for item in objDic:
+        readcount_dic[item] = objDic[item].readcount(
+            args.minquery, args.maxquery)
+        Total_read_in_objDic += readcount_dic[item]
+    ######
+    for x in (objDic):
+        local_frequency_table = objDic[x].signature(
+            args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope))
+        local_percent_table = objDic[x].hannon_signature(
+            args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope))
+        try:
+            for overlap in local_frequency_table.keys():
+                general_frequency_table[overlap] = general_frequency_table.get(
+                    overlap, 0) + local_frequency_table[overlap]
+        except:
+            pass
+        try:
+            for overlap in local_percent_table.keys():
+                general_percent_table[overlap] = general_percent_table.get(
+                    overlap, 0) + (1. / Total_read_in_objDic * readcount_dic[x] * local_percent_table[overlap])
+        except:
+            pass
+    print >> OUT, "overlap\tnum of pairs\tprobability"
+    for classe in sorted(general_frequency_table):
+        print >> OUT, "%i\t%i\t%f" % (
+            classe, general_frequency_table[classe], general_percent_table[classe])
+
+else:
+    print >> OUT, "overlap\tnum of pairs\tprobability\titem"
+    for x in (objDic):
+        local_frequency_table = objDic[x].signature(
+            args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope))
+        local_percent_table = objDic[x].hannon_signature(
+            args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope))
+        for classe in range(args.minscope, args.maxscope):
+            print >> OUT, "%i\t%i\t%f\t%s" % (
+                classe, local_frequency_table[classe], local_percent_table[classe], x)
+
+OUT.close()
+
+# Run the R script that is defined in the xml using the Rscript binary
+# provided with R.
+R_command = "Rscript " + args.rcode
+process = subprocess.Popen(R_command.split())
b
diff -r 000000000000 -r a2f293717ce3 signature.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/signature.xml Wed Oct 21 11:35:25 2015 -0400
[
b'@@ -0,0 +1,149 @@\n+<tool id="signature" name="Small RNA Signatures" version="2.0.5">\n+    <description />\n+    <requirements>\n+        <requirement type="package" version="0.12.7">bowtie</requirement>\n+        <requirement type="package" version="0.7.7">pysam</requirement>\n+        <requirement type="package" version="3.1.2">R</requirement>\n+        <requirement type="package" version="2.14">biocbasics</requirement>\n+        <requirement type="package" version="1.9">numpy</requirement>\n+    </requirements>\n+    <command interpreter="python">\n+           signature.py\n+           --input $refGenomeSource.input\n+           --inputFormat $refGenomeSource.input.ext\n+           --minquery $minquery\n+           --maxquery $maxquery\n+           --mintarget $mintarget\n+           --maxtarget $maxtarget\n+           --minscope $minscope\n+           --maxscope $maxscope\n+           --outputOverlapDataframe $output\n+          #if $refGenomeSource.genomeSource == "history":\n+            --referenceGenome $refGenomeSource.ownFile\n+          #else:\n+            #silent reference= filter( lambda x: str( x[0] ) == str( $input.dbkey ), $__app__.tool_data_tables[ \'bowtie_indexes\' ].get_fields() )[0][-1]\n+            --referenceGenome $reference\n+            --extract_index\n+          #end if\n+\t  --graph $graph_type \n+          --rcode $sigplotter\n+       </command>\n+    <inputs>\n+        <conditional name="refGenomeSource">\n+            <param help="Built-ins were indexed using default options" label="Will you select a reference genome from your history or use a built-in index?" name="genomeSource" type="select">\n+                <option value="indexed">Use a built-in index</option>\n+                <option value="history">Use one from the history</option>\n+            </param>\n+            <when value="indexed">\n+                <param format="tabular,sam,bam" label="Compute signature from this bowtie standard output" name="input" type="data">\n+                    <validator message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history." metadata_column="0" metadata_name="dbkey" table_name="bowtie_indexes" type="dataset_metadata_in_data_table" />\n+                </param>\n+            </when>\n+            <when value="history">\n+                <param format="fasta" label="Select the fasta reference" name="ownFile" type="data" />\n+                <param format="tabular,sam,bam" label="Compute signature from this bowtie standard output" name="input" type="data" />\n+            </when>\n+        </conditional>\n+        <param help="\'23\' = 23 nucleotides" label="Min size of query small RNAs" name="minquery" size="3" type="integer" value="23" />\n+        <param help="\'29\' = 29 nucleotides" label="Max size of query small RNAs" name="maxquery" size="3" type="integer" value="29" />\n+        <param help="\'23\' = 23 nucleotides" label="Min size of target small RNAs" name="mintarget" size="3" type="integer" value="23" />\n+        <param help="\'29\' = 29 nucleotides" label="Max size of target small RNAs" name="maxtarget" size="3" type="integer" value="29" />\n+        <param help="\'1\' = 1 nucleotide overlap" label="Minimal relative overlap analyzed" name="minscope" size="3" type="integer" value="1" />\n+        <param help="\'1\' = 1 nucleotide overlap" label="Maximal relative overlap analyzed" name="maxscope" size="3" type="integer" value="26" />\n+        <param help="Signature can be computed globally or by item present in the alignment file" label="Graph type" name="graph_type" type="select">\n+            <option selected="True" value="global">Global</option>\n+            <option value="lattice">Lattice</option>\n+        </param>\n+    </inputs>\n+    <outputs>\n+        <data format="tabular" label="signature data frame" name="output" />\n+        <data format="pdf" label="Overlap probabilities" name="output2" />\n+    </outputs>\n+    <tests>\n+        <test>\n+            <p'..b'lue="23" />\n+            <param name="maxtarget" value="29" />\n+            <param name="minscope" value="5" />\n+            <param name="maxscope" value="15" />\n+            <param name="graph_type" value="global" />\n+            <output file="signature.tab" ftype="tabular" name="output" />\n+            <output file="signature.pdf" ftype="pdf" name="output2" />\n+        </test>\n+    </tests>\n+    <help>\n+\n+**What it does**\n+\n+This tool computes the number of pairs by overlap classes (in nt) from a bowtie output file, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.   \n+The numerical options set the min and max size of both the query small rna class and the target small rna class.   \n+Three type of signals are plotted in separate pdf files, the number of pairs founds, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.   \n+\n+        </help>\n+    <citations>\n+            <citation type="doi">10.1007/978-1-4939-0931-5_12</citation>\n+    </citations>\n+    <configfiles>\n+        <configfile name="sigplotter">\n+      graph_type = "${graph_type}"\n+\n+      globalgraph = function () {\n+        ## Setup R error handling to go to stderr\n+        options( show.error.messages=F,\n+                 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+        signature = read.delim("${output}", header=TRUE)\n+        signaturez=data.frame(signature[,1], (signature[,2] -mean(signature[,2]))/sd(signature[,2]))\n+        overlap_prob_z=data.frame(signature[,1], (signature[,3] -mean(signature[,3]))/sd(signature[,3]))\n+        YLIM=max(signature[,2])\n+        \n+\n+        ## Open output2 PDF file\n+        pdf( "${output2}" )\n+        if (YLIM!=0) {\n+          par(mfrow=c(2,2),oma = c(0, 0, 3, 0))\n+\n+          plot(signature[,1:2], type = "h", main="Numbers of pairs", cex.main=1, xlab="overlap (nt)", ylim=c(0,YLIM), ylab="Numbers of pairs", col="darkslateblue", lwd=4)\n+\n+          plot(signaturez, type = "l", main="Number of pairs Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2)\n+\n+          plot(signature[,1], signature[,3]*100, type = "l", main="Overlap probabilities",\n+             cex.main=1, xlab="overlap (nt)", ylab="Probability [%]", ylim=c(0,50),\n+             pch=19, col="darkslateblue", lwd=2)\n+\n+          plot(overlap_prob_z, type = "l", main="Overlap Probability Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2)\n+\n+          mtext("Overlap Signatures of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs", outer = TRUE, cex=1)\n+        }\n+        devname = dev.off()\n+        ## Close the PDF file\n+      }\n+\n+      treillisgraph = function () {\n+        ## Open output2 PDF file\n+        pdf( "${output2}", paper="special", height=11.69, width=8.2677 )\n+        signature = read.delim("${output}", header=TRUE)\n+        options( show.error.messages=F,\n+               error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+        library(lattice)\n+        print (xyplot(signature[,3]*100~signature[,1]|signature[,4], type = "l", xlim=c(${minscope},${maxscope}), main="ping-pong Signature of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs",\n+             par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), scales=list(cex=0.5),\n+             cex.main=1, cex=.5, xlab="overlap (nt)", ylab="ping-pong signal [%]",\n+             pch=19, col="darkslateblue", lwd =1.5, cex.lab=1.2, cex.axis=1.2,\n+             layout=c(4,12), as.table=TRUE, newpage = T) )\n+        devnname = dev.off()\n+      }\n+\n+      if (graph_type=="global") {\n+        globalgraph()\n+\n+      }\n+      if(graph_type=="lattice") {\n+        treillisgraph()\n+      }\n+    </configfile>\n+    </configfiles>\n+</tool>\n'
b
diff -r 000000000000 -r a2f293717ce3 smRtools.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/smRtools.py Wed Oct 21 11:35:25 2015 -0400
[
b'@@ -0,0 +1,758 @@\n+#!/usr/bin/python\n+# version 1 7-5-2012 unification of the SmRNAwindow class\n+\n+import sys, subprocess\n+from collections import defaultdict\n+from numpy import mean, median, std\n+##Disable scipy import temporarily, as no working scipy on toolshed.\n+##from scipy import stats\n+\n+def get_fasta (index="/home/galaxy/galaxy-dist/bowtie/5.37_Dmel/5.37_Dmel"):\n+  \'\'\'This function will return a dictionary containing fasta identifiers as keys and the\n+  sequence as values. Index must be the path to a fasta file.\'\'\'\n+  p = subprocess.Popen(args=["bowtie-inspect","-a", "0", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines\n+  outputlines = p.stdout.readlines()\n+  p.wait()\n+  item_dic = {}\n+  for line in outputlines:\n+    if (line[0] == ">"):\n+      try:\n+        item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item\n+      except: pass\n+      current_item = line[1:].rstrip().split()[0] #take the first word before space because bowtie splits headers !\n+      item_dic[current_item] = ""\n+      stringlist=[]\n+    else:\n+      stringlist.append(line.rstrip() )\n+  item_dic[current_item] = "".join(stringlist) # for the last item\n+  return item_dic\n+\n+def get_fasta_headers (index):\n+  p = subprocess.Popen(args=["bowtie-inspect","-n", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines\n+  outputlines = p.stdout.readlines()\n+  p.wait()\n+  item_dic = {}\n+  for line in outputlines:\n+    header = line.rstrip().split()[0] #take the first word before space because bowtie splits headers !\n+    item_dic[header] = 1\n+  return item_dic\n+\n+\n+def get_file_sample (file, numberoflines):\n+  \'\'\'import random to use this function\'\'\'\n+  F=open(file)\n+  fullfile = F.read().splitlines()\n+  F.close()\n+  if len(fullfile) < numberoflines:\n+    return "sample size exceeds file size"\n+  return random.sample(fullfile, numberoflines)\n+\n+def get_fasta_from_history (file):\n+  F = open (file, "r")\n+  item_dic = {}\n+  for line in F:\n+    if (line[0] == ">"):\n+      try:\n+        item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item\n+      except: pass\n+      current_item = line[1:-1].split()[0] #take the first word before space because bowtie splits headers !\n+      item_dic[current_item] = ""\n+      stringlist=[]\n+    else:\n+      stringlist.append(line[:-1])\n+  item_dic[current_item] = "".join(stringlist) # for the last item\n+  return item_dic\n+\n+def antipara (sequence):\n+    antidict = {"A":"T", "T":"A", "G":"C", "C":"G", "N":"N"}\n+    revseq = sequence[::-1]\n+    return "".join([antidict[i] for i in revseq])\n+\n+def RNAtranslate (sequence):\n+    return "".join([i if i in "AGCN" else "U" for i in sequence])\n+def DNAtranslate (sequence):\n+    return "".join([i if i in "AGCN" else "T" for i in sequence])\n+\n+def RNAfold (sequence_list):\n+  thestring= "\\n".join(sequence_list)\n+  p = subprocess.Popen(args=["RNAfold","--noPS"], stdin= subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\n+  output=p.communicate(thestring)[0]\n+  p.wait()\n+  output=output.split("\\n")\n+  if not output[-1]: output = output[:-1] # nasty patch to remove last empty line\n+  buffer=[]\n+  for line in output:\n+    if line[0] in ["N","A","T","U","G","C"]:\n+      buffer.append(DNAtranslate(line))\n+    if line[0] in ["(",".",")"]:\n+      fields=line.split("(")\n+      energy= fields[-1]\n+      energy = energy[:-1] # remove the ) parenthesis\n+      energy=float(energy)\n+      buffer.append(str(energy))\n+  return dict(zip(buffer[::2], buffer[1::2]))\n+\n+def extractsubinstance (start, end, instance):\n+  \'\'\' Testing whether this can be an function external to the class to save memory\'\'\'\n+  subinstance = SmRNAwindow (instance.gene, instance.sequence[start-1:end], start)\n+  subinstance.gene = "%s %s %s" % (subin'..b'e[offset:offset+queryhash[offset][i]]) )\n+            paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-targethash[matched_offset][i]+1:-matched_offset+1]) ) )\n+        if offset < 0:\n+          for i in range (paired):\n+            paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-queryhash[offset][i]+1:-offset+1]) ) )\n+            paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+targethash[matched_offset][i]] ) )\n+    return paired_sequences\n+\n+  def pairable (self, overlap, minquery, maxquery, mintarget, maxtarget):\n+    queryhash = defaultdict(list)\n+    targethash = defaultdict(list)\n+    query_range = range (int(minquery), int(maxquery)+1)\n+    target_range = range (int(mintarget), int(maxtarget)+1)\n+    paired_sequences = []\n+\n+    for offset in self.readDict: # selection of data\n+      for size in self.readDict[offset]:\n+        if size in query_range:\n+          queryhash[offset].append(size)\n+        if size in target_range:\n+          targethash[offset].append(size)\n+\n+    for offset in queryhash:\n+      matched_offset = -offset - overlap + 1\n+      if targethash[matched_offset]:\n+        if offset >= 0:\n+          for i in queryhash[offset]:\n+            paired_sequences.append("+%s" % RNAtranslate (self.sequence[offset:offset+i]) )\n+          for i in targethash[matched_offset]:\n+            paired_sequences.append( "-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-i+1:-matched_offset+1]) ) )\n+        if offset < 0:\n+          for i in queryhash[offset]:\n+            paired_sequences.append("-%s" %  RNAtranslate (antipara (self.sequence[-offset-i+1:-offset+1]) ) )\n+          for i in targethash[matched_offset]:\n+            paired_sequences.append("+%s" %  RNAtranslate (self.sequence[matched_offset:matched_offset+i] ) )\n+    return paired_sequences\n+\n+  def newpairable_bowtie (self, overlap, minquery, maxquery, mintarget, maxtarget):\n+    \'\'\' revision of pairable on 3-12-2012, with focus on the offset shift problem (bowtie is 1-based cooordinates whereas python strings are 0-based coordinates\'\'\'\n+    queryhash = defaultdict(list)\n+    targethash = defaultdict(list)\n+    query_range = range (int(minquery), int(maxquery)+1)\n+    target_range = range (int(mintarget), int(maxtarget)+1)\n+    bowtie_output = []\n+\n+    for offset in self.readDict: # selection of data\n+      for size in self.readDict[offset]:\n+        if size in query_range:\n+          queryhash[offset].append(size)\n+        if size in target_range:\n+          targethash[offset].append(size)\n+    counter = 0\n+    for offset in queryhash:\n+      matched_offset = -offset - overlap + 1\n+      if targethash[matched_offset]:\n+        if offset >= 0:\n+          for i in queryhash[offset]:\n+            counter += 1\n+            bowtie_output.append("%s\\t%s\\t%s\\t%s\\t%s" % (counter, "+", self.gene, offset-1, self.sequence[offset-1:offset-1+i]) ) # attention a la base 1-0 de l\'offset \n+        if offset < 0:\n+          for i in queryhash[offset]:\n+            counter += 1\n+            bowtie_output.append("%s\\t%s\\t%s\\t%s\\t%s" % (counter, "-", self.gene, -offset-i, self.sequence[-offset-i:-offset])) # attention a la base 1-0 de l\'offset\n+    return bowtie_output\n+\n+\n+def __main__(bowtie_index_path, bowtie_output_path):\n+  sequenceDic = get_fasta (bowtie_index_path)\n+  objDic = {}\n+  F = open (bowtie_output_path, "r") # F is the bowtie output taken as input\n+  for line in F:\n+    fields = line.split()\n+    polarity = fields[1]\n+    gene = fields[2]\n+    offset = int(fields[3])\n+    size = len (fields[4])\n+    try:\n+      objDic[gene].addread (polarity, offset, size)\n+    except KeyError:\n+      objDic[gene] = SmRNAwindow(gene, sequenceDic[gene])\n+      objDic[gene].addread (polarity, offset, size)\n+  F.close()\n+  for gene in objDic:\n+    print gene, objDic[gene].pairer(19,19,23,19,23)\n+\n+if __name__ == "__main__" : __main__(sys.argv[1], sys.argv[2]) \n'
b
diff -r 000000000000 -r a2f293717ce3 test-data/ensembl.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ensembl.fa Wed Oct 21 11:35:25 2015 -0400
b
b'@@ -0,0 +1,10060 @@\n+>FBgn0026065_Idefix\n+GTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAA\n+CACATACACCCCAATACAACATACACTACTCCGGATGTACCCAACAGATACCAGATAAGA\n+ATAAGATTGTTATATGATCCTCGAGAATGGAAAAAACCCCAATTCTAGATAAGTCACCCA\n+CTGGTAGACTAAACATCCGTCCCCTAATTTAAACAATTCCTTGCTTAAGCCTCACCCCAT\n+CGTCACATTCCCACGTTCAAAGCTCGGAGCCGCAATCCCGAAAAACAAAAGTATCGATTT\n+CAATAAACAAATTATAAGAATCTAAGAGCACTTGTATCCAAGAGCAAATGCACTTGAATC\n+CAAGAGAAACGCAAAGCTTTTTCTCTTTACGATCAGAATCCTAAAGTCTAAAGTCCATAT\n+TAGAAAAGCTCGATACCGAGGCTTGAACGTCAACCAAATCAGAATAATTATCAGAGTTCA\n+GTTTGAGACCTAATTGTAAAAGGTTCGGTGTTCTTCTCAAATAAAAAGATTGTAATCATT\n+TAGTGAAATAAAAATTATATTTTTTTCACTTATAAATATTGCAAGTATTTAATTGGCGCA\n+GTCGGTTAGGATCCAATAAAATAAAAGAGTCCTTTTAGTACGGTACTGATCAACTGAAGG\n+ATATGCTATACGACTAGCTATCCAAGATCAGCGAATTAAAATAGTGATTCAAAAATATTT\n+TTTAATCCGCAAAAGAATCTACGTGAAAGTAGTATTCAAAATAAAATCCCGTGCGGTCGG\n+AAACAAAAATTAATTTAAATTTTTTAATTCCGAAACTTAAAACCAAGTTTAAAGAAAACT\n+TAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAA\n+CCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTCAA\n+AATCAAGAAAACTTAAAGCCAAAATAAGCTAGAAAACTAAAAGACATCATGGCAGTCCCA\n+CAACTCTCAGAAACACACCTAAACCAACTGCTAAACCAAATCAAAGAATTAAACTACTAC\n+GATGGCGCACCTGGCAAATTATCTGGATTCGTCAACCAAGTGGAACAACTGCTCAGTTTA\n+TACCCAACACAGGAAGCAAGACAGGCACACGTCATATATGGAGCAGTGAAGCGGTTATTA\n+GTGGATTCAGCCTTAGAAGTCGTAACCCAGGAAAGAGCTAACACATGGCTGGACATGAAG\n+AAAGCACTGGCAATGGCATTCAAAGACCATAGACCTTATGTAACTCTCATCAGACAATTA\n+GAAGACATATCATACCCAGGAAGTATCTGTAAGTTTATAGAAAAATTAGAAACACAATAC\n+TGGATTATGTTCGATAAGTTAGAATTAGAAAGTGACCATGTTGATAAATCGAATTATACC\n+GAAATGTTAAACAAAACTGTTAAATCAGTAATAGATCGAAAACTGCCGGATAGAATTTAT\n+ATGTCTTTGGCACGTAAAGATATTGATACAATTTATAAATTAAAACAAGCATCAATGGAA\n+TTAGGCCTTTATGATGCTATTCCAGAAAATCACCGTTCTAATAGAACAGAAATGAATAAA\n+CGTAGGAACAGGGGAAACTATAATCAAAATAATAATCAAAAATATTACAATAATAGAAAT\n+CACAACTACAGTAATTATTATCCTAGCATGAATCAGAATCATAATACACAACCACCTCAG\n+AATCCGACTCAACCTATGACAAATCAAAACCAATATTCACCGCGTTTCATACCGAATAAT\n+CAAAGAGGGAATTATTATGCATTTAGACGAGACTTAACACAAGCTCAGCAGAACAACCCA\n+CTTAATAACACCCTTAACTTCCAACCTTCGACATCGAATAATATTAACAGACAAGGGCCA\n+GTAAAAAGACAACGCGAGAGTCAGAGTGACCAAAGCAGGATGGATGTAAATTTTCATCAA\n+GCTGCCTCGGACACTCAAATGATAGAGAAGGACATACAAGTCCCTATGTAAAAATAATTC\n+ATCATAATAAAAATTATAAGGGAATGATCGATACAGGATCATCAATTAACATCATAAGAG\n+AAAATTTTGAGAACTTAGAAGAAAAGGAAGAAAACCTAATAGTATACACTATTAAAGGAC\n+CAATAACACTAAAGAGAAGTATAATAATAAAACCTACTTCAGTATGTCCGTCTGCTCAAA\n+AATTCTACATTCACAAATTTTCTGATAACTATGATTTCTTGTTAGGTCGAAAGTATTTAG\n+AAGATACAAAAGCTAAAATAGATTATGCTAACGAAACAGTAACACTAGGCTCAAAAGTAT\n+TTAAGTTTCTCTATGAAGAAAAGAAGGGCGAGACCGCATCCAAATGCCTTGACCCACAAG\n+AAAAGAATGATTCCGCTCTAGTGGACAGAACCAAACCAAAAATGCAAAAGGTTAAGACCG\n+CACCTAAGTGCCTTAAACCAAAGCATCAACAGCAGAAGAAAGAGACCGCATTACCCAAAT\n+GCCTCATTTCAAATGTTGTTAAAGACACAGTGGACAATGATGTAACACATCTCGATCCCA\n+TGTCCGTTGACAACGATATAGTCAACTTCGCGATTAACAATGAGTTACGCGAATGTAACG\n+AGTATAGACTCGAACACTTAAATGCAGAGGAAGTTGAATGTTTAAAGAAGTTCCTATACG\n+AATATAGAGACATTCAGTACAAAGAGGGCGAAAATTTGACCTTCACCAGTACTATTAAAC\n+ATGTCATCCAGACTCAACACGAAGACCCAGTATACCGTAAACCCTACAAGTACCCTCAAA\n+GCGTTGACCAAGAAGTTAACAAACAAATTAAAGAAATGATAGAACAAGGGATTGTTCGCA\n+AATCGAAGTCCCCTTATTGTTCTCCTATTTGGGTGGTCCCCAAGAAGGCAGACGCCTCTG\n+GGAAACAAAAATTCAGGTTGGTAGTCGATTACAGGAACCTAAATGAGATAACTGTTAACG\n+ACAAATTTCCCATTCCCCGAATGGATGAGATATTGGACAAACTAGGTAGATGCCAATACT\n+TTACCACTATAGATCTAGCCAAGGGTTTTCACCAAATCCAAATGGATGAAAATTCTATTG\n+CAAAAACAGCTTTTTCAACTAAGCATGGGCATTATGAATATACTCGTATGCCCTTTGGTT\n+TAAAAAACGCTCCAGCTACTTTTCAGAGATGCATGAATAATCTTCTGGAAGATTTAATCT\n+ACAAAGACTGTTTAGTCTATTTAGACGATATTATTGTTTATTCCACTCCATTGGAAGAAC\n+ACATTTTATCCCTAAAGAAAGTCTTTGAAAAACTGAGAGACGCTAATTTAAAGTTGCAAC\n+TAGATAAATGTGAATTCATGAAGAAAGAAACTGAATTCCTAGGACACATCGTCACAACAA\n+ATGGCATCAAACCAAATCCAAATAAAACTAAAGCAATTACAAATTTTCCATTACCCAAGA\n+CACCTAAGCAAATAAAATCATTTTTGGGATTATGTGGATTCTATCGCAAGTTTATTCCTA\n+ACTTTGCCAAAATAGTTAAACCCATGACCCTCAAATTAAAGAAAGGTGCTATAATAGACA\n+CCAAATGTAAAGAATACATCGAATCATTTGAAAAATTAAAAGTTTTGATAACTTCAGACC\n+CGATATTAATCTATCCTGATTTTTCAAAACCTTTTTCTTTGACAACTGATGCTAGCAACG\n+TAGCTATTGGTGCAGTGTTATCACAAAATCACAAGCCAGTTTGTTATGCCAGTAGAACGC\n+TAAACGAACATGAAATCAACTATGCTACGATTGAAAAAGAATTGTTAGCTA'..b'CCC\n+GTTCCAGAAAATTCTACCAGCAAACGAGCTCGCCTACACCCAAATGTTAAGTGCCACTTT\n+TGTGGAAAAATTGGCCACAAGATAGCTGACTGCCGCTCCATGAAAAACAACTTAAAGAAT\n+CAACAAGGATCTAGTTCGAGTATTGGGCGCTTATCTGACTCTAAACCTGGGTCAATTACT\n+TGCTATAGATGTGGAAACCAGGGGCATATAGCGTCAGCTTGCCCTGCAAGACAATCGTTG\n+TCAAACCAAACTAAAGCCGACGAGAAGCGTGTCAACGTGTGTCACGTAGTCGAGCCAATT\n+GGGACATTGATATCATCTGGTGAGTCGTATCCATTTTATTTCGACTCTGGAGCCGAATGC\n+TCACTTGTAAGAGAATCTGTGTCCACCCAACTCTCGGGCACACGAATTAACAACAATGTA\n+GTTTTAAAGGGTATCGGAAATAATACTGTTACCAGTACATTACAAATTTTGTCAAACGTA\n+ACAATAAGTGGTTACTGTCTCGAAGTGCTTTTTCACGTAATTCTTAATGATTGCATTAAT\n+TATAATATTATAATTGGACGCGAAATTTTAAGTCAGGGATTTAGTGCTACTATAACAATA\n+GATAAAATAGAGTTATGTAAAACAAGGTCTGTGCAAACCCTATCTGCTTAGAGTAGTAGT\n+TTTAGTCTTGAAAATGTTAATACCGAATTGTGTGGCGAGGATAGGAAAATCTTGGTAAAT\n+CTTTTGAATAAATTCTGTGACTCATTTATAGACGGTTTTCCCAAAAATCGTGTTACAACT\n+GGCGAACTAGAAGTACGCTTAATTGATCCAATAAAAACTGTACACAGACGACCGTACCGA\n+CTTAGTATAGAGGAAAAACAAATTGTCCGAAACAAGGTTAATGAGCTGCTGTTAGATAAC\n+ATCATCCGTCCTAGCAGCTCACCGTTCGCCAGTCCAGTTTTACTCGTTAAAAAGAAAAAT\n+GGTTCTGATCGCCTTTGCGTGGATTACCGCGAACTAAATACAAACACAGTTGCAGAGAAA\n+TATCCCTTACCACTAATTAGTGACCAAATATCTAGGTTGCGTGGAGCAAGTTTCTTTAGT\n+TGCTTGGATATGGCCAGCGGGTTTCATCAGATACCTATTCACGCAAATTCAATTGAGCGC\n+ACGGCTTTTGTGACACCTGACGGCCAATTCGAATTTCTAACTATGCCCTTCGGGTTAAAG\n+AATGCCCCATCCGTGTTCCAGCGTGCAGTTATGAAAGCTTTGGGTGAGCTTGCCCACTCT\n+TACGTTATCGTTTATATGGACGATATAATGATTATCGCAGAAACAAAAGAAGAAGCTTTT\n+GTAAGGTTAAGGACAGTTTTGAAAATATTATCGCAGGCTGGGTTTTCTTTTAATATCGGA\n+AAATGTTCATTCCTGAAATCTTGCATTGAATATCTGGGGTTTGTGGTAAAAGAGGGCGAA\n+ATAAGACCAAATCCATCTAAGATAAAAGCATTAGTCGCTTTACCGCCTCCGCAGTCTGTT\n+ACCCAAGTAAGACAAATTATTGGCCTAGCCTCTTATTTTAGGCAGTTTGTGCCAAAGTTT\n+TCAGAAATCATGAAACCCTTATATAGACTGACCTGCAAAAACAAAATATTTGAATGGAAA\n+CTTGAACACGAACAAATTCGTCAAAAAGTCACTAAATTGCTTACAGATGAGCCCGTCCTT\n+GTTATCTTCGATCCTCGGCATCCCATTGAACTGCATACAGATGCCAGTATGGATGGCTAC\n+GGAGCAATTCTACTCCACAAAATAGATAATAAACGTCGTGTAGTTGAGTATTACAGCAAA\n+CAAACATCCTTGACGGAATCTCGATATCATTCGTACGAGCTTGAAACTTTAGCTGTGTAT\n+AACTCCATGAGACACTTTCGTCACTATTTACATGGGCGAATTTGTTGTTTTTACAGACTG\n+TAATTCCCTAAAAGCTACTCGCAACAAGACTGAACTAACGCCGAGAGTACACCGTTGGTG\n+GGCATATATGCAGTCCTTCGACTTTGACTAGAATGACTTAGACTTAGAATATAGACCTGG\n+TGCCATAATGGCACATGTTGATTTCTTGTCACGCAATCCACTGCCATCTGCTCGGGTTAT\n+TACTGGTGAGGAAGAAAAACATGTTCTATTGGCCAAAATAACGGACAACTGGTTACTTGC\n+AGAACAGCAAAAGGATTCAGAGATTTCCACGATTGTTGTTAAAATACAGAACAATGAATT\n+GGGTGAGAGCTCGGCAAAAAGTTATGAATTACGCTCGAAAATGCTTTTTCGCAAAATTCA\n+AAGGAACGGTAAAACTCGTTGCCTGCCAGTTGCCCCCAGATCATTCAGATGGTCAGTAGT\n+GAACCAGGTCCATGAAGCAGTTGTACATTTGGGTGGGAAAAGACTTTAGACAAAATGTAC\n+GAATTTTACTGGTTTGAGAACATGGCCAAATATGTTCGTAAGTTCGTTGATAATTGCATT\n+ACGTGTAAGTTAACTAAGCCTCCGTCAGGAAAATTGCCAATCGAACTCCACCCCATACCA\n+AAAGTAGAAATTCCATGGCTATAAGTTGTACGACAAATCGCATAACGAAAGCCAGTCCTC\n+TTGAATTACTAATCGGAAAAGAATGTAGACCATTTAATATGTTACCAATATGTGAACAAG\n+TTAATAAAGTCGATGTAAATATTATAAGAAATATCGCGAGAGAAAATATTAAGAAGAACG\n+CCTTGTATGAAAAAACTAGATTCGATAAGCACAAAGCCAAATTTGATAACTTTGGTGTTG\n+GCGATTATGTTTTACTTAAGAACGAAGAAAGGCACCAAACAAAATTAGACCAAAAATATA\n+AAGGACCTTTCCTCGTGACAGAGGTACTTAAGGGAGATCGTTATATTTTAAAATCTTTAA\n+CTAATAAGCGGACTTATAAGTACCCACATGAAGCTTTGCGCAGTATGCCAACAGAGGAGA\n+TCCCCAAAGAGTTAGATCTATGTGACGATCAAGAAAACGTTGAAAGAGACGTTAGAAATC\n+CCTTGGTGGATTCCAATGTGGATGAAAACGTCGAAAGAGACGTTAGAAATCCCTTGGTGG\n+ATTCCAATGGGGATGAAAACGTTGAAAGAGACGTTAGAAATCCGTTGGTGGATGCCAATG\n+TGAGCGAAAAGTTACTGAGTTGTTTGAAGACTCAAGTGAATGAGAGGCATTGATGGATTT\n+CAATGCGAGATTGGGGACACATGCAACGTCGCCAAGTTGCCAGTGCTAGTAGGTACAAGT\n+GTTACTGTGTTGACTTATTTGATGTCTGGTGACTGGCGGCGTGGCGGGTTGAATTGTCCT\n+AGTGTGTTGCTAATAATAACAAACGATCTTCTTGGTACTTCTGTCACTCGAGTTGGTCGA\n+TAACAAGAAAAATAATAATAATAATTACGTTTAATGTTATCTTTCTAGATTAAGCTTGTT\n+TAATTTCAAAACTTATATTACACACGAGGACGTGTGCTGGTCAGGAAGGCCGTGTCGCAT\n+CATTATTAGTCTTATTTTTATTTTCTATGTTCCATCTCTAATAAACATGTCATCTCTATT\n+AAATAAAATTCGTATCGAGCTGTTCTTGTCTTCGTTTCTCTTTGATCGCTGTTCGCTGTG\n+TTCCGTTATGCGAGTTTAACGGGTTTTGCTCTGTTCTACATAGTCTCGGTTCGACGATGC\n+GTTAGAGTGAGACAAATGCTTGTCCTGTGGTGAGTTCGGACCAGCATGTATCAAGCGAGA\n+TAGAGCGATGTTGAAATGTACACGGGGCACTTATGTTTGAAAACTCTGAGAAAGCGGACG\n+CGTGAATATGTCGCAACCGAGGAAGTGTACGACTCGCGGGCGGAGCGCGGCAACAGAGGA\n+CCCCGAATCAGTTAACTTCCCGACA\n'
b
diff -r 000000000000 -r a2f293717ce3 test-data/signature.pdf
b
Binary file test-data/signature.pdf has changed
b
diff -r 000000000000 -r a2f293717ce3 test-data/signature.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/signature.tab Wed Oct 21 11:35:25 2015 -0400
b
@@ -0,0 +1,12 @@
+overlap num of pairs probability
+5 2 0.000051
+6 43 0.003106
+7 44 0.003745
+8 60 0.005621
+9 71 0.004685
+10 254 0.023097
+11 70 0.004458
+12 43 0.002481
+13 43 0.003913
+14 58 0.007665
+15 71 0.008706
b
diff -r 000000000000 -r a2f293717ce3 test-data/sr_bowtie.bam
b
Binary file test-data/sr_bowtie.bam has changed
b
diff -r 000000000000 -r a2f293717ce3 tool-data/bowtie_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie_indices.loc.sample Wed Oct 21 11:35:25 2015 -0400
b
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon  hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full  hg18 hg18 Full  /depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19 hg19 hg19  /depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
b
diff -r 000000000000 -r a2f293717ce3 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Oct 21 11:35:25 2015 -0400
b
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of indexes in the Bowtie mapper format -->
+    <table name="bowtie_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie_indices.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r a2f293717ce3 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Oct 21 11:35:25 2015 -0400
b
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="bowtie" version="0.12.7">
+      <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="pysam" version="0.7.7">
+      <repository changeset_revision="0a5141bdf9d0" name="package_pysam_0_7_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="numpy" version="1.9">
+        <repository changeset_revision="9cc1138e5e3e" name="package_numpy_1_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="R" version="3.1.2">
+        <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="biocbasics" version="2.14">
+        <repository changeset_revision="f0ef1a7b157e" name="package_biocbasics_2_14" owner="mvdbeek" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>