Next changeset 1:6218b518cd16 (2017-06-07) |
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit fe40dec87779c1fcfbd03330e653aa886f4a2cda |
added:
signature.py signature.xml smRtools.py test-data/ensembl.fa test-data/signature.pdf test-data/signature.tab test-data/sr_bowtie.bam tool-data/bowtie_indices.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
b |
diff -r 000000000000 -r a2f293717ce3 signature.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/signature.py Wed Oct 21 11:35:25 2015 -0400 |
[ |
@@ -0,0 +1,122 @@ +#!/usr/bin/python +# script for computing overlap signatures from a bowtie output +# Christophe Antoniewski <drosofff@gmail.com> +# Usage signature.py <1:input> <2:format of input> <3:minsize query> <4:maxsize query> <5:minsize target> <6:maxsize target> +# <7:minscope> <8:maxscope> <9:output> <10:bowtie index> <11:procedure option> <12: graph (global or lattice)> +# <13: R code> +# version 2.0.0 + +import sys +import subprocess +import argparse +from smRtools import * +from collections import defaultdict # test whether it is required + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument( + '--input', action="store", type=str, help="input alignment file") + the_parser.add_argument('--inputFormat', action="store", type=str, choices=[ + "tabular", "bam", "sam"], help="format of alignment file (tabular/bam/sam)") + the_parser.add_argument( + '--minquery', type=int, help="Minimum readsize of query reads (nt) - must be an integer") + the_parser.add_argument( + '--maxquery', type=int, help="Maximum readsize of query reads (nt) - must be an integer") + the_parser.add_argument( + '--mintarget', type=int, help="Minimum readsize of target reads (nt) - must be an integer") + the_parser.add_argument( + '--maxtarget', type=int, help="Maximum readsize of target reads (nt) - must be an integer") + the_parser.add_argument( + '--minscope', type=int, help="Minimum overlap analyzed (nt) - must be an integer") + the_parser.add_argument( + '--maxscope', type=int, help="Maximum overlap analyzed (nt) - must be an integer") + the_parser.add_argument( + '--outputOverlapDataframe', action="store", type=str, help="Overlap dataframe") + the_parser.add_argument('--referenceGenome', action='store', + help="path to the bowtie-indexed or fasta reference") + the_parser.add_argument('--extract_index', action='store_true', + help="specify if the reference is an indexed Bowtie reference") + the_parser.add_argument('--graph', action='store', choices=[ + "global", "lattice"], help="small RNA signature is computed either globally or by item (global-lattice)") + the_parser.add_argument( + '--rcode', type=str, help="R code to be passed to the python script") + args = the_parser.parse_args() + return args + +args = Parser() + +if args.extract_index: + GenomeFormat = "bowtieIndex" +else: + GenomeFormat = "fastaSource" + +if args.inputFormat == "tabular": + Genome = HandleSmRNAwindows( + args.input, args.inputFormat, args.referenceGenome, GenomeFormat) +elif args.inputFormat == "sam": + Genome = HandleSmRNAwindows( + args.input, args.inputFormat, args.referenceGenome, GenomeFormat) +else: + Genome = HandleSmRNAwindows( + args.input, args.inputFormat, args.referenceGenome, GenomeFormat) + +# replace objDic by Genome.instanceDict or... objDic = Genome.instanceDict +objDic = Genome.instanceDict + +args.maxscope += 1 + +general_frequency_table = dict( + [(i, 0) for i in range(args.minscope, args.maxscope)]) +general_percent_table = dict( + [(i, 0) for i in range(args.minscope, args.maxscope)]) +OUT = open(args.outputOverlapDataframe, "w") + +if args.graph == "global": + # for normalized summing of local_percent_table(s) + readcount_dic = {} + Total_read_in_objDic = 0 + for item in objDic: + readcount_dic[item] = objDic[item].readcount( + args.minquery, args.maxquery) + Total_read_in_objDic += readcount_dic[item] + ###### + for x in (objDic): + local_frequency_table = objDic[x].signature( + args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope)) + local_percent_table = objDic[x].hannon_signature( + args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope)) + try: + for overlap in local_frequency_table.keys(): + general_frequency_table[overlap] = general_frequency_table.get( + overlap, 0) + local_frequency_table[overlap] + except: + pass + try: + for overlap in local_percent_table.keys(): + general_percent_table[overlap] = general_percent_table.get( + overlap, 0) + (1. / Total_read_in_objDic * readcount_dic[x] * local_percent_table[overlap]) + except: + pass + print >> OUT, "overlap\tnum of pairs\tprobability" + for classe in sorted(general_frequency_table): + print >> OUT, "%i\t%i\t%f" % ( + classe, general_frequency_table[classe], general_percent_table[classe]) + +else: + print >> OUT, "overlap\tnum of pairs\tprobability\titem" + for x in (objDic): + local_frequency_table = objDic[x].signature( + args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope)) + local_percent_table = objDic[x].hannon_signature( + args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope)) + for classe in range(args.minscope, args.maxscope): + print >> OUT, "%i\t%i\t%f\t%s" % ( + classe, local_frequency_table[classe], local_percent_table[classe], x) + +OUT.close() + +# Run the R script that is defined in the xml using the Rscript binary +# provided with R. +R_command = "Rscript " + args.rcode +process = subprocess.Popen(R_command.split()) |
b |
diff -r 000000000000 -r a2f293717ce3 signature.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/signature.xml Wed Oct 21 11:35:25 2015 -0400 |
[ |
b'@@ -0,0 +1,149 @@\n+<tool id="signature" name="Small RNA Signatures" version="2.0.5">\n+ <description />\n+ <requirements>\n+ <requirement type="package" version="0.12.7">bowtie</requirement>\n+ <requirement type="package" version="0.7.7">pysam</requirement>\n+ <requirement type="package" version="3.1.2">R</requirement>\n+ <requirement type="package" version="2.14">biocbasics</requirement>\n+ <requirement type="package" version="1.9">numpy</requirement>\n+ </requirements>\n+ <command interpreter="python">\n+ signature.py\n+ --input $refGenomeSource.input\n+ --inputFormat $refGenomeSource.input.ext\n+ --minquery $minquery\n+ --maxquery $maxquery\n+ --mintarget $mintarget\n+ --maxtarget $maxtarget\n+ --minscope $minscope\n+ --maxscope $maxscope\n+ --outputOverlapDataframe $output\n+ #if $refGenomeSource.genomeSource == "history":\n+ --referenceGenome $refGenomeSource.ownFile\n+ #else:\n+ #silent reference= filter( lambda x: str( x[0] ) == str( $input.dbkey ), $__app__.tool_data_tables[ \'bowtie_indexes\' ].get_fields() )[0][-1]\n+ --referenceGenome $reference\n+ --extract_index\n+ #end if\n+\t --graph $graph_type \n+ --rcode $sigplotter\n+ </command>\n+ <inputs>\n+ <conditional name="refGenomeSource">\n+ <param help="Built-ins were indexed using default options" label="Will you select a reference genome from your history or use a built-in index?" name="genomeSource" type="select">\n+ <option value="indexed">Use a built-in index</option>\n+ <option value="history">Use one from the history</option>\n+ </param>\n+ <when value="indexed">\n+ <param format="tabular,sam,bam" label="Compute signature from this bowtie standard output" name="input" type="data">\n+ <validator message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history." metadata_column="0" metadata_name="dbkey" table_name="bowtie_indexes" type="dataset_metadata_in_data_table" />\n+ </param>\n+ </when>\n+ <when value="history">\n+ <param format="fasta" label="Select the fasta reference" name="ownFile" type="data" />\n+ <param format="tabular,sam,bam" label="Compute signature from this bowtie standard output" name="input" type="data" />\n+ </when>\n+ </conditional>\n+ <param help="\'23\' = 23 nucleotides" label="Min size of query small RNAs" name="minquery" size="3" type="integer" value="23" />\n+ <param help="\'29\' = 29 nucleotides" label="Max size of query small RNAs" name="maxquery" size="3" type="integer" value="29" />\n+ <param help="\'23\' = 23 nucleotides" label="Min size of target small RNAs" name="mintarget" size="3" type="integer" value="23" />\n+ <param help="\'29\' = 29 nucleotides" label="Max size of target small RNAs" name="maxtarget" size="3" type="integer" value="29" />\n+ <param help="\'1\' = 1 nucleotide overlap" label="Minimal relative overlap analyzed" name="minscope" size="3" type="integer" value="1" />\n+ <param help="\'1\' = 1 nucleotide overlap" label="Maximal relative overlap analyzed" name="maxscope" size="3" type="integer" value="26" />\n+ <param help="Signature can be computed globally or by item present in the alignment file" label="Graph type" name="graph_type" type="select">\n+ <option selected="True" value="global">Global</option>\n+ <option value="lattice">Lattice</option>\n+ </param>\n+ </inputs>\n+ <outputs>\n+ <data format="tabular" label="signature data frame" name="output" />\n+ <data format="pdf" label="Overlap probabilities" name="output2" />\n+ </outputs>\n+ <tests>\n+ <test>\n+ <p'..b'lue="23" />\n+ <param name="maxtarget" value="29" />\n+ <param name="minscope" value="5" />\n+ <param name="maxscope" value="15" />\n+ <param name="graph_type" value="global" />\n+ <output file="signature.tab" ftype="tabular" name="output" />\n+ <output file="signature.pdf" ftype="pdf" name="output2" />\n+ </test>\n+ </tests>\n+ <help>\n+\n+**What it does**\n+\n+This tool computes the number of pairs by overlap classes (in nt) from a bowtie output file, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science. \n+The numerical options set the min and max size of both the query small rna class and the target small rna class. \n+Three type of signals are plotted in separate pdf files, the number of pairs founds, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science. \n+\n+ </help>\n+ <citations>\n+ <citation type="doi">10.1007/978-1-4939-0931-5_12</citation>\n+ </citations>\n+ <configfiles>\n+ <configfile name="sigplotter">\n+ graph_type = "${graph_type}"\n+\n+ globalgraph = function () {\n+ ## Setup R error handling to go to stderr\n+ options( show.error.messages=F,\n+ error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+ signature = read.delim("${output}", header=TRUE)\n+ signaturez=data.frame(signature[,1], (signature[,2] -mean(signature[,2]))/sd(signature[,2]))\n+ overlap_prob_z=data.frame(signature[,1], (signature[,3] -mean(signature[,3]))/sd(signature[,3]))\n+ YLIM=max(signature[,2])\n+ \n+\n+ ## Open output2 PDF file\n+ pdf( "${output2}" )\n+ if (YLIM!=0) {\n+ par(mfrow=c(2,2),oma = c(0, 0, 3, 0))\n+\n+ plot(signature[,1:2], type = "h", main="Numbers of pairs", cex.main=1, xlab="overlap (nt)", ylim=c(0,YLIM), ylab="Numbers of pairs", col="darkslateblue", lwd=4)\n+\n+ plot(signaturez, type = "l", main="Number of pairs Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2)\n+\n+ plot(signature[,1], signature[,3]*100, type = "l", main="Overlap probabilities",\n+ cex.main=1, xlab="overlap (nt)", ylab="Probability [%]", ylim=c(0,50),\n+ pch=19, col="darkslateblue", lwd=2)\n+\n+ plot(overlap_prob_z, type = "l", main="Overlap Probability Z-scores", cex.main=1, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.2, col="darkslateblue", lwd=2)\n+\n+ mtext("Overlap Signatures of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs", outer = TRUE, cex=1)\n+ }\n+ devname = dev.off()\n+ ## Close the PDF file\n+ }\n+\n+ treillisgraph = function () {\n+ ## Open output2 PDF file\n+ pdf( "${output2}", paper="special", height=11.69, width=8.2677 )\n+ signature = read.delim("${output}", header=TRUE)\n+ options( show.error.messages=F,\n+ error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+ library(lattice)\n+ print (xyplot(signature[,3]*100~signature[,1]|signature[,4], type = "l", xlim=c(${minscope},${maxscope}), main="ping-pong Signature of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs",\n+ par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), scales=list(cex=0.5),\n+ cex.main=1, cex=.5, xlab="overlap (nt)", ylab="ping-pong signal [%]",\n+ pch=19, col="darkslateblue", lwd =1.5, cex.lab=1.2, cex.axis=1.2,\n+ layout=c(4,12), as.table=TRUE, newpage = T) )\n+ devnname = dev.off()\n+ }\n+\n+ if (graph_type=="global") {\n+ globalgraph()\n+\n+ }\n+ if(graph_type=="lattice") {\n+ treillisgraph()\n+ }\n+ </configfile>\n+ </configfiles>\n+</tool>\n' |
b |
diff -r 000000000000 -r a2f293717ce3 smRtools.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smRtools.py Wed Oct 21 11:35:25 2015 -0400 |
[ |
b'@@ -0,0 +1,758 @@\n+#!/usr/bin/python\n+# version 1 7-5-2012 unification of the SmRNAwindow class\n+\n+import sys, subprocess\n+from collections import defaultdict\n+from numpy import mean, median, std\n+##Disable scipy import temporarily, as no working scipy on toolshed.\n+##from scipy import stats\n+\n+def get_fasta (index="/home/galaxy/galaxy-dist/bowtie/5.37_Dmel/5.37_Dmel"):\n+ \'\'\'This function will return a dictionary containing fasta identifiers as keys and the\n+ sequence as values. Index must be the path to a fasta file.\'\'\'\n+ p = subprocess.Popen(args=["bowtie-inspect","-a", "0", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines\n+ outputlines = p.stdout.readlines()\n+ p.wait()\n+ item_dic = {}\n+ for line in outputlines:\n+ if (line[0] == ">"):\n+ try:\n+ item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item\n+ except: pass\n+ current_item = line[1:].rstrip().split()[0] #take the first word before space because bowtie splits headers !\n+ item_dic[current_item] = ""\n+ stringlist=[]\n+ else:\n+ stringlist.append(line.rstrip() )\n+ item_dic[current_item] = "".join(stringlist) # for the last item\n+ return item_dic\n+\n+def get_fasta_headers (index):\n+ p = subprocess.Popen(args=["bowtie-inspect","-n", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines\n+ outputlines = p.stdout.readlines()\n+ p.wait()\n+ item_dic = {}\n+ for line in outputlines:\n+ header = line.rstrip().split()[0] #take the first word before space because bowtie splits headers !\n+ item_dic[header] = 1\n+ return item_dic\n+\n+\n+def get_file_sample (file, numberoflines):\n+ \'\'\'import random to use this function\'\'\'\n+ F=open(file)\n+ fullfile = F.read().splitlines()\n+ F.close()\n+ if len(fullfile) < numberoflines:\n+ return "sample size exceeds file size"\n+ return random.sample(fullfile, numberoflines)\n+\n+def get_fasta_from_history (file):\n+ F = open (file, "r")\n+ item_dic = {}\n+ for line in F:\n+ if (line[0] == ">"):\n+ try:\n+ item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item\n+ except: pass\n+ current_item = line[1:-1].split()[0] #take the first word before space because bowtie splits headers !\n+ item_dic[current_item] = ""\n+ stringlist=[]\n+ else:\n+ stringlist.append(line[:-1])\n+ item_dic[current_item] = "".join(stringlist) # for the last item\n+ return item_dic\n+\n+def antipara (sequence):\n+ antidict = {"A":"T", "T":"A", "G":"C", "C":"G", "N":"N"}\n+ revseq = sequence[::-1]\n+ return "".join([antidict[i] for i in revseq])\n+\n+def RNAtranslate (sequence):\n+ return "".join([i if i in "AGCN" else "U" for i in sequence])\n+def DNAtranslate (sequence):\n+ return "".join([i if i in "AGCN" else "T" for i in sequence])\n+\n+def RNAfold (sequence_list):\n+ thestring= "\\n".join(sequence_list)\n+ p = subprocess.Popen(args=["RNAfold","--noPS"], stdin= subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\n+ output=p.communicate(thestring)[0]\n+ p.wait()\n+ output=output.split("\\n")\n+ if not output[-1]: output = output[:-1] # nasty patch to remove last empty line\n+ buffer=[]\n+ for line in output:\n+ if line[0] in ["N","A","T","U","G","C"]:\n+ buffer.append(DNAtranslate(line))\n+ if line[0] in ["(",".",")"]:\n+ fields=line.split("(")\n+ energy= fields[-1]\n+ energy = energy[:-1] # remove the ) parenthesis\n+ energy=float(energy)\n+ buffer.append(str(energy))\n+ return dict(zip(buffer[::2], buffer[1::2]))\n+\n+def extractsubinstance (start, end, instance):\n+ \'\'\' Testing whether this can be an function external to the class to save memory\'\'\'\n+ subinstance = SmRNAwindow (instance.gene, instance.sequence[start-1:end], start)\n+ subinstance.gene = "%s %s %s" % (subin'..b'e[offset:offset+queryhash[offset][i]]) )\n+ paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-targethash[matched_offset][i]+1:-matched_offset+1]) ) )\n+ if offset < 0:\n+ for i in range (paired):\n+ paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-queryhash[offset][i]+1:-offset+1]) ) )\n+ paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+targethash[matched_offset][i]] ) )\n+ return paired_sequences\n+\n+ def pairable (self, overlap, minquery, maxquery, mintarget, maxtarget):\n+ queryhash = defaultdict(list)\n+ targethash = defaultdict(list)\n+ query_range = range (int(minquery), int(maxquery)+1)\n+ target_range = range (int(mintarget), int(maxtarget)+1)\n+ paired_sequences = []\n+\n+ for offset in self.readDict: # selection of data\n+ for size in self.readDict[offset]:\n+ if size in query_range:\n+ queryhash[offset].append(size)\n+ if size in target_range:\n+ targethash[offset].append(size)\n+\n+ for offset in queryhash:\n+ matched_offset = -offset - overlap + 1\n+ if targethash[matched_offset]:\n+ if offset >= 0:\n+ for i in queryhash[offset]:\n+ paired_sequences.append("+%s" % RNAtranslate (self.sequence[offset:offset+i]) )\n+ for i in targethash[matched_offset]:\n+ paired_sequences.append( "-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-i+1:-matched_offset+1]) ) )\n+ if offset < 0:\n+ for i in queryhash[offset]:\n+ paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-i+1:-offset+1]) ) )\n+ for i in targethash[matched_offset]:\n+ paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+i] ) )\n+ return paired_sequences\n+\n+ def newpairable_bowtie (self, overlap, minquery, maxquery, mintarget, maxtarget):\n+ \'\'\' revision of pairable on 3-12-2012, with focus on the offset shift problem (bowtie is 1-based cooordinates whereas python strings are 0-based coordinates\'\'\'\n+ queryhash = defaultdict(list)\n+ targethash = defaultdict(list)\n+ query_range = range (int(minquery), int(maxquery)+1)\n+ target_range = range (int(mintarget), int(maxtarget)+1)\n+ bowtie_output = []\n+\n+ for offset in self.readDict: # selection of data\n+ for size in self.readDict[offset]:\n+ if size in query_range:\n+ queryhash[offset].append(size)\n+ if size in target_range:\n+ targethash[offset].append(size)\n+ counter = 0\n+ for offset in queryhash:\n+ matched_offset = -offset - overlap + 1\n+ if targethash[matched_offset]:\n+ if offset >= 0:\n+ for i in queryhash[offset]:\n+ counter += 1\n+ bowtie_output.append("%s\\t%s\\t%s\\t%s\\t%s" % (counter, "+", self.gene, offset-1, self.sequence[offset-1:offset-1+i]) ) # attention a la base 1-0 de l\'offset \n+ if offset < 0:\n+ for i in queryhash[offset]:\n+ counter += 1\n+ bowtie_output.append("%s\\t%s\\t%s\\t%s\\t%s" % (counter, "-", self.gene, -offset-i, self.sequence[-offset-i:-offset])) # attention a la base 1-0 de l\'offset\n+ return bowtie_output\n+\n+\n+def __main__(bowtie_index_path, bowtie_output_path):\n+ sequenceDic = get_fasta (bowtie_index_path)\n+ objDic = {}\n+ F = open (bowtie_output_path, "r") # F is the bowtie output taken as input\n+ for line in F:\n+ fields = line.split()\n+ polarity = fields[1]\n+ gene = fields[2]\n+ offset = int(fields[3])\n+ size = len (fields[4])\n+ try:\n+ objDic[gene].addread (polarity, offset, size)\n+ except KeyError:\n+ objDic[gene] = SmRNAwindow(gene, sequenceDic[gene])\n+ objDic[gene].addread (polarity, offset, size)\n+ F.close()\n+ for gene in objDic:\n+ print gene, objDic[gene].pairer(19,19,23,19,23)\n+\n+if __name__ == "__main__" : __main__(sys.argv[1], sys.argv[2]) \n' |
b |
diff -r 000000000000 -r a2f293717ce3 test-data/ensembl.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ensembl.fa Wed Oct 21 11:35:25 2015 -0400 |
b |
b'@@ -0,0 +1,10060 @@\n+>FBgn0026065_Idefix\n+GTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAA\n+CACATACACCCCAATACAACATACACTACTCCGGATGTACCCAACAGATACCAGATAAGA\n+ATAAGATTGTTATATGATCCTCGAGAATGGAAAAAACCCCAATTCTAGATAAGTCACCCA\n+CTGGTAGACTAAACATCCGTCCCCTAATTTAAACAATTCCTTGCTTAAGCCTCACCCCAT\n+CGTCACATTCCCACGTTCAAAGCTCGGAGCCGCAATCCCGAAAAACAAAAGTATCGATTT\n+CAATAAACAAATTATAAGAATCTAAGAGCACTTGTATCCAAGAGCAAATGCACTTGAATC\n+CAAGAGAAACGCAAAGCTTTTTCTCTTTACGATCAGAATCCTAAAGTCTAAAGTCCATAT\n+TAGAAAAGCTCGATACCGAGGCTTGAACGTCAACCAAATCAGAATAATTATCAGAGTTCA\n+GTTTGAGACCTAATTGTAAAAGGTTCGGTGTTCTTCTCAAATAAAAAGATTGTAATCATT\n+TAGTGAAATAAAAATTATATTTTTTTCACTTATAAATATTGCAAGTATTTAATTGGCGCA\n+GTCGGTTAGGATCCAATAAAATAAAAGAGTCCTTTTAGTACGGTACTGATCAACTGAAGG\n+ATATGCTATACGACTAGCTATCCAAGATCAGCGAATTAAAATAGTGATTCAAAAATATTT\n+TTTAATCCGCAAAAGAATCTACGTGAAAGTAGTATTCAAAATAAAATCCCGTGCGGTCGG\n+AAACAAAAATTAATTTAAATTTTTTAATTCCGAAACTTAAAACCAAGTTTAAAGAAAACT\n+TAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAA\n+CCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTCAA\n+AATCAAGAAAACTTAAAGCCAAAATAAGCTAGAAAACTAAAAGACATCATGGCAGTCCCA\n+CAACTCTCAGAAACACACCTAAACCAACTGCTAAACCAAATCAAAGAATTAAACTACTAC\n+GATGGCGCACCTGGCAAATTATCTGGATTCGTCAACCAAGTGGAACAACTGCTCAGTTTA\n+TACCCAACACAGGAAGCAAGACAGGCACACGTCATATATGGAGCAGTGAAGCGGTTATTA\n+GTGGATTCAGCCTTAGAAGTCGTAACCCAGGAAAGAGCTAACACATGGCTGGACATGAAG\n+AAAGCACTGGCAATGGCATTCAAAGACCATAGACCTTATGTAACTCTCATCAGACAATTA\n+GAAGACATATCATACCCAGGAAGTATCTGTAAGTTTATAGAAAAATTAGAAACACAATAC\n+TGGATTATGTTCGATAAGTTAGAATTAGAAAGTGACCATGTTGATAAATCGAATTATACC\n+GAAATGTTAAACAAAACTGTTAAATCAGTAATAGATCGAAAACTGCCGGATAGAATTTAT\n+ATGTCTTTGGCACGTAAAGATATTGATACAATTTATAAATTAAAACAAGCATCAATGGAA\n+TTAGGCCTTTATGATGCTATTCCAGAAAATCACCGTTCTAATAGAACAGAAATGAATAAA\n+CGTAGGAACAGGGGAAACTATAATCAAAATAATAATCAAAAATATTACAATAATAGAAAT\n+CACAACTACAGTAATTATTATCCTAGCATGAATCAGAATCATAATACACAACCACCTCAG\n+AATCCGACTCAACCTATGACAAATCAAAACCAATATTCACCGCGTTTCATACCGAATAAT\n+CAAAGAGGGAATTATTATGCATTTAGACGAGACTTAACACAAGCTCAGCAGAACAACCCA\n+CTTAATAACACCCTTAACTTCCAACCTTCGACATCGAATAATATTAACAGACAAGGGCCA\n+GTAAAAAGACAACGCGAGAGTCAGAGTGACCAAAGCAGGATGGATGTAAATTTTCATCAA\n+GCTGCCTCGGACACTCAAATGATAGAGAAGGACATACAAGTCCCTATGTAAAAATAATTC\n+ATCATAATAAAAATTATAAGGGAATGATCGATACAGGATCATCAATTAACATCATAAGAG\n+AAAATTTTGAGAACTTAGAAGAAAAGGAAGAAAACCTAATAGTATACACTATTAAAGGAC\n+CAATAACACTAAAGAGAAGTATAATAATAAAACCTACTTCAGTATGTCCGTCTGCTCAAA\n+AATTCTACATTCACAAATTTTCTGATAACTATGATTTCTTGTTAGGTCGAAAGTATTTAG\n+AAGATACAAAAGCTAAAATAGATTATGCTAACGAAACAGTAACACTAGGCTCAAAAGTAT\n+TTAAGTTTCTCTATGAAGAAAAGAAGGGCGAGACCGCATCCAAATGCCTTGACCCACAAG\n+AAAAGAATGATTCCGCTCTAGTGGACAGAACCAAACCAAAAATGCAAAAGGTTAAGACCG\n+CACCTAAGTGCCTTAAACCAAAGCATCAACAGCAGAAGAAAGAGACCGCATTACCCAAAT\n+GCCTCATTTCAAATGTTGTTAAAGACACAGTGGACAATGATGTAACACATCTCGATCCCA\n+TGTCCGTTGACAACGATATAGTCAACTTCGCGATTAACAATGAGTTACGCGAATGTAACG\n+AGTATAGACTCGAACACTTAAATGCAGAGGAAGTTGAATGTTTAAAGAAGTTCCTATACG\n+AATATAGAGACATTCAGTACAAAGAGGGCGAAAATTTGACCTTCACCAGTACTATTAAAC\n+ATGTCATCCAGACTCAACACGAAGACCCAGTATACCGTAAACCCTACAAGTACCCTCAAA\n+GCGTTGACCAAGAAGTTAACAAACAAATTAAAGAAATGATAGAACAAGGGATTGTTCGCA\n+AATCGAAGTCCCCTTATTGTTCTCCTATTTGGGTGGTCCCCAAGAAGGCAGACGCCTCTG\n+GGAAACAAAAATTCAGGTTGGTAGTCGATTACAGGAACCTAAATGAGATAACTGTTAACG\n+ACAAATTTCCCATTCCCCGAATGGATGAGATATTGGACAAACTAGGTAGATGCCAATACT\n+TTACCACTATAGATCTAGCCAAGGGTTTTCACCAAATCCAAATGGATGAAAATTCTATTG\n+CAAAAACAGCTTTTTCAACTAAGCATGGGCATTATGAATATACTCGTATGCCCTTTGGTT\n+TAAAAAACGCTCCAGCTACTTTTCAGAGATGCATGAATAATCTTCTGGAAGATTTAATCT\n+ACAAAGACTGTTTAGTCTATTTAGACGATATTATTGTTTATTCCACTCCATTGGAAGAAC\n+ACATTTTATCCCTAAAGAAAGTCTTTGAAAAACTGAGAGACGCTAATTTAAAGTTGCAAC\n+TAGATAAATGTGAATTCATGAAGAAAGAAACTGAATTCCTAGGACACATCGTCACAACAA\n+ATGGCATCAAACCAAATCCAAATAAAACTAAAGCAATTACAAATTTTCCATTACCCAAGA\n+CACCTAAGCAAATAAAATCATTTTTGGGATTATGTGGATTCTATCGCAAGTTTATTCCTA\n+ACTTTGCCAAAATAGTTAAACCCATGACCCTCAAATTAAAGAAAGGTGCTATAATAGACA\n+CCAAATGTAAAGAATACATCGAATCATTTGAAAAATTAAAAGTTTTGATAACTTCAGACC\n+CGATATTAATCTATCCTGATTTTTCAAAACCTTTTTCTTTGACAACTGATGCTAGCAACG\n+TAGCTATTGGTGCAGTGTTATCACAAAATCACAAGCCAGTTTGTTATGCCAGTAGAACGC\n+TAAACGAACATGAAATCAACTATGCTACGATTGAAAAAGAATTGTTAGCTA'..b'CCC\n+GTTCCAGAAAATTCTACCAGCAAACGAGCTCGCCTACACCCAAATGTTAAGTGCCACTTT\n+TGTGGAAAAATTGGCCACAAGATAGCTGACTGCCGCTCCATGAAAAACAACTTAAAGAAT\n+CAACAAGGATCTAGTTCGAGTATTGGGCGCTTATCTGACTCTAAACCTGGGTCAATTACT\n+TGCTATAGATGTGGAAACCAGGGGCATATAGCGTCAGCTTGCCCTGCAAGACAATCGTTG\n+TCAAACCAAACTAAAGCCGACGAGAAGCGTGTCAACGTGTGTCACGTAGTCGAGCCAATT\n+GGGACATTGATATCATCTGGTGAGTCGTATCCATTTTATTTCGACTCTGGAGCCGAATGC\n+TCACTTGTAAGAGAATCTGTGTCCACCCAACTCTCGGGCACACGAATTAACAACAATGTA\n+GTTTTAAAGGGTATCGGAAATAATACTGTTACCAGTACATTACAAATTTTGTCAAACGTA\n+ACAATAAGTGGTTACTGTCTCGAAGTGCTTTTTCACGTAATTCTTAATGATTGCATTAAT\n+TATAATATTATAATTGGACGCGAAATTTTAAGTCAGGGATTTAGTGCTACTATAACAATA\n+GATAAAATAGAGTTATGTAAAACAAGGTCTGTGCAAACCCTATCTGCTTAGAGTAGTAGT\n+TTTAGTCTTGAAAATGTTAATACCGAATTGTGTGGCGAGGATAGGAAAATCTTGGTAAAT\n+CTTTTGAATAAATTCTGTGACTCATTTATAGACGGTTTTCCCAAAAATCGTGTTACAACT\n+GGCGAACTAGAAGTACGCTTAATTGATCCAATAAAAACTGTACACAGACGACCGTACCGA\n+CTTAGTATAGAGGAAAAACAAATTGTCCGAAACAAGGTTAATGAGCTGCTGTTAGATAAC\n+ATCATCCGTCCTAGCAGCTCACCGTTCGCCAGTCCAGTTTTACTCGTTAAAAAGAAAAAT\n+GGTTCTGATCGCCTTTGCGTGGATTACCGCGAACTAAATACAAACACAGTTGCAGAGAAA\n+TATCCCTTACCACTAATTAGTGACCAAATATCTAGGTTGCGTGGAGCAAGTTTCTTTAGT\n+TGCTTGGATATGGCCAGCGGGTTTCATCAGATACCTATTCACGCAAATTCAATTGAGCGC\n+ACGGCTTTTGTGACACCTGACGGCCAATTCGAATTTCTAACTATGCCCTTCGGGTTAAAG\n+AATGCCCCATCCGTGTTCCAGCGTGCAGTTATGAAAGCTTTGGGTGAGCTTGCCCACTCT\n+TACGTTATCGTTTATATGGACGATATAATGATTATCGCAGAAACAAAAGAAGAAGCTTTT\n+GTAAGGTTAAGGACAGTTTTGAAAATATTATCGCAGGCTGGGTTTTCTTTTAATATCGGA\n+AAATGTTCATTCCTGAAATCTTGCATTGAATATCTGGGGTTTGTGGTAAAAGAGGGCGAA\n+ATAAGACCAAATCCATCTAAGATAAAAGCATTAGTCGCTTTACCGCCTCCGCAGTCTGTT\n+ACCCAAGTAAGACAAATTATTGGCCTAGCCTCTTATTTTAGGCAGTTTGTGCCAAAGTTT\n+TCAGAAATCATGAAACCCTTATATAGACTGACCTGCAAAAACAAAATATTTGAATGGAAA\n+CTTGAACACGAACAAATTCGTCAAAAAGTCACTAAATTGCTTACAGATGAGCCCGTCCTT\n+GTTATCTTCGATCCTCGGCATCCCATTGAACTGCATACAGATGCCAGTATGGATGGCTAC\n+GGAGCAATTCTACTCCACAAAATAGATAATAAACGTCGTGTAGTTGAGTATTACAGCAAA\n+CAAACATCCTTGACGGAATCTCGATATCATTCGTACGAGCTTGAAACTTTAGCTGTGTAT\n+AACTCCATGAGACACTTTCGTCACTATTTACATGGGCGAATTTGTTGTTTTTACAGACTG\n+TAATTCCCTAAAAGCTACTCGCAACAAGACTGAACTAACGCCGAGAGTACACCGTTGGTG\n+GGCATATATGCAGTCCTTCGACTTTGACTAGAATGACTTAGACTTAGAATATAGACCTGG\n+TGCCATAATGGCACATGTTGATTTCTTGTCACGCAATCCACTGCCATCTGCTCGGGTTAT\n+TACTGGTGAGGAAGAAAAACATGTTCTATTGGCCAAAATAACGGACAACTGGTTACTTGC\n+AGAACAGCAAAAGGATTCAGAGATTTCCACGATTGTTGTTAAAATACAGAACAATGAATT\n+GGGTGAGAGCTCGGCAAAAAGTTATGAATTACGCTCGAAAATGCTTTTTCGCAAAATTCA\n+AAGGAACGGTAAAACTCGTTGCCTGCCAGTTGCCCCCAGATCATTCAGATGGTCAGTAGT\n+GAACCAGGTCCATGAAGCAGTTGTACATTTGGGTGGGAAAAGACTTTAGACAAAATGTAC\n+GAATTTTACTGGTTTGAGAACATGGCCAAATATGTTCGTAAGTTCGTTGATAATTGCATT\n+ACGTGTAAGTTAACTAAGCCTCCGTCAGGAAAATTGCCAATCGAACTCCACCCCATACCA\n+AAAGTAGAAATTCCATGGCTATAAGTTGTACGACAAATCGCATAACGAAAGCCAGTCCTC\n+TTGAATTACTAATCGGAAAAGAATGTAGACCATTTAATATGTTACCAATATGTGAACAAG\n+TTAATAAAGTCGATGTAAATATTATAAGAAATATCGCGAGAGAAAATATTAAGAAGAACG\n+CCTTGTATGAAAAAACTAGATTCGATAAGCACAAAGCCAAATTTGATAACTTTGGTGTTG\n+GCGATTATGTTTTACTTAAGAACGAAGAAAGGCACCAAACAAAATTAGACCAAAAATATA\n+AAGGACCTTTCCTCGTGACAGAGGTACTTAAGGGAGATCGTTATATTTTAAAATCTTTAA\n+CTAATAAGCGGACTTATAAGTACCCACATGAAGCTTTGCGCAGTATGCCAACAGAGGAGA\n+TCCCCAAAGAGTTAGATCTATGTGACGATCAAGAAAACGTTGAAAGAGACGTTAGAAATC\n+CCTTGGTGGATTCCAATGTGGATGAAAACGTCGAAAGAGACGTTAGAAATCCCTTGGTGG\n+ATTCCAATGGGGATGAAAACGTTGAAAGAGACGTTAGAAATCCGTTGGTGGATGCCAATG\n+TGAGCGAAAAGTTACTGAGTTGTTTGAAGACTCAAGTGAATGAGAGGCATTGATGGATTT\n+CAATGCGAGATTGGGGACACATGCAACGTCGCCAAGTTGCCAGTGCTAGTAGGTACAAGT\n+GTTACTGTGTTGACTTATTTGATGTCTGGTGACTGGCGGCGTGGCGGGTTGAATTGTCCT\n+AGTGTGTTGCTAATAATAACAAACGATCTTCTTGGTACTTCTGTCACTCGAGTTGGTCGA\n+TAACAAGAAAAATAATAATAATAATTACGTTTAATGTTATCTTTCTAGATTAAGCTTGTT\n+TAATTTCAAAACTTATATTACACACGAGGACGTGTGCTGGTCAGGAAGGCCGTGTCGCAT\n+CATTATTAGTCTTATTTTTATTTTCTATGTTCCATCTCTAATAAACATGTCATCTCTATT\n+AAATAAAATTCGTATCGAGCTGTTCTTGTCTTCGTTTCTCTTTGATCGCTGTTCGCTGTG\n+TTCCGTTATGCGAGTTTAACGGGTTTTGCTCTGTTCTACATAGTCTCGGTTCGACGATGC\n+GTTAGAGTGAGACAAATGCTTGTCCTGTGGTGAGTTCGGACCAGCATGTATCAAGCGAGA\n+TAGAGCGATGTTGAAATGTACACGGGGCACTTATGTTTGAAAACTCTGAGAAAGCGGACG\n+CGTGAATATGTCGCAACCGAGGAAGTGTACGACTCGCGGGCGGAGCGCGGCAACAGAGGA\n+CCCCGAATCAGTTAACTTCCCGACA\n' |
b |
diff -r 000000000000 -r a2f293717ce3 test-data/signature.pdf |
b |
Binary file test-data/signature.pdf has changed |
b |
diff -r 000000000000 -r a2f293717ce3 test-data/signature.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/signature.tab Wed Oct 21 11:35:25 2015 -0400 |
b |
@@ -0,0 +1,12 @@ +overlap num of pairs probability +5 2 0.000051 +6 43 0.003106 +7 44 0.003745 +8 60 0.005621 +9 71 0.004685 +10 254 0.023097 +11 70 0.004458 +12 43 0.002481 +13 43 0.003913 +14 58 0.007665 +15 71 0.008706 |
b |
diff -r 000000000000 -r a2f293717ce3 test-data/sr_bowtie.bam |
b |
Binary file test-data/sr_bowtie.bam has changed |
b |
diff -r 000000000000 -r a2f293717ce3 tool-data/bowtie_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/bowtie_indices.loc.sample Wed Oct 21 11:35:25 2015 -0400 |
b |
@@ -0,0 +1,37 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Bowtie indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie_indices.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/bowtie/hg18/, +#then the bowtie_indices.loc entry would look like this: +# +#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18 +# +#and your /depot/data2/galaxy/bowtie/hg18/ directory +#would contain hg18.*.ebwt files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt +#...etc... +# +#Your bowtie_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon +#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie/hg18/hg18full +#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie/hg19/hg19 +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# |
b |
diff -r 000000000000 -r a2f293717ce3 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Oct 21 11:35:25 2015 -0400 |
b |
@@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of indexes in the Bowtie mapper format --> + <table name="bowtie_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bowtie_indices.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r a2f293717ce3 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Oct 21 11:35:25 2015 -0400 |
b |
@@ -0,0 +1,18 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bowtie" version="0.12.7"> + <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="pysam" version="0.7.7"> + <repository changeset_revision="0a5141bdf9d0" name="package_pysam_0_7_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="numpy" version="1.9"> + <repository changeset_revision="9cc1138e5e3e" name="package_numpy_1_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="R" version="3.1.2"> + <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="biocbasics" version="2.14"> + <repository changeset_revision="f0ef1a7b157e" name="package_biocbasics_2_14" owner="mvdbeek" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |