# HG changeset patch # User jjkoehorst # Date 1424536011 18000 # Node ID 1d78a849ad70076a443dd5720a15775d3d7d2c4c # Parent 27a2341f86cc6db93ce7e4b4378c9748db7ae27d Deleted selected files diff -r 27a2341f86cc -r 1d78a849ad70 genetic_elements/.DS_Store Binary file genetic_elements/.DS_Store has changed diff -r 27a2341f86cc -r 1d78a849ad70 genetic_elements/aragorn.py --- a/genetic_elements/aragorn.py Sat Feb 21 17:24:22 2015 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,125 +0,0 @@ -def delete_galaxy(): - import sys - for index, path in enumerate(sys.path): - if "galaxy-dist/" in path: - sys.path[index] = '' - -#Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now. -delete_galaxy() - -from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin - -# Import RDFLib's default Graph implementation. -from rdflib.graph import Graph - -import sys, os - -import rdflib -import subprocess -import hashlib -global URI -global SubClassOfDict -SubClassOfDict = {} - -URI = "http://csb.wur.nl/genome/" -global seeAlso -seeAlso = "rdfs:seeAlso" -global coreURI -coreURI = Namespace(URI) - -def createClass(uri): - #genomeGraph.add((uri,RDF.type,OWL.Class)) - #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing)) - #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) - #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) - #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"])) - return uri - -def tmp(): - import time - global tmpFolder - tmpFolder = "/tmp/"+str(time.time())+"/" - os.mkdir(tmpFolder) - -def query(): - global genomeGraph - genomeGraph = Graph() - filename = sys.argv[1] - genomeGraph.parse(filename, format="turtle") - qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}') - sequences = [] - for row in qres: - print ("Header:",row[0]) - sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","") - - return sequences - -def aragorn(sequences): - for sequence in sequences: - #Call aragorn for each contig, for ease of parsing - open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence)) - folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/" - cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output" - print (cmd) - os.system(cmd) - aragorn = open(tmpFolder+"aragorn.output").readlines() -# string = ''.join(aragorn) - - contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","") - dnaobjectURI = coreURI[contig] - #print (contig) - for line in aragorn: - if ">" in line: - print (line.split()) - try: - trna, pos = line.split()[1:] - except: - try: - trna, pos = line.split() - except: - if "(Permuted)" in line: - trna, permute, pos = line.split()[1:] - - if "tRNA-" in line: - trna, codon = (trna.strip(">)").split("(",1)) - else: - trna = trna.strip(">").strip() #Actually a tmRNA... - codon = '' - trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna - SubClassOfDict[trna.split("-")[0].title()] = 1 - if "c" in pos[0]: #complementary - stop, start = pos.split("[")[1].split("]")[0].split(",") - else: - start, stop = pos.split("[")[1].split("]")[0].split(",") - trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop] - genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI)) - genomeGraph.add((trnaURI, RDF.type,trnaClass)) - genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer))) - genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer))) - genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna))) - genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon))) - genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn"))) - genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36"))) - genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1]))) - -def subClassOfBuilder(): - for subclass in SubClassOfDict: - genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) - genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) - genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) - genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class)) - -def save(): - #Create the subclass off instances - #subClassOfBuilder() - ## Saves the file - data = genomeGraph.serialize(format='turtle') - open(sys.argv[2],"wb").write(data) - -def main(): - tmp() - sequences = query() - aragorn(sequences) - save() - -main() diff -r 27a2341f86cc -r 1d78a849ad70 genetic_elements/aragorn.xml --- a/genetic_elements/aragorn.xml Sat Feb 21 17:24:22 2015 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ - - - python - rdflib - aragorn - - Aragon - aragorn.py '$input' '$output' '-gc$genbank_gencode' '$tmRNA' '$tRNA' '$topology' '-fon' '-sourcedb' 'SAPP' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Aragorn_ predicts tRNA (and tmRNA) in nucleotide sequences. - -.. _Aragorn: http://mbio-serv2.mbioekol.lu.se/ARAGORN/ - ------ - -It requires an RDF genome file - - - - - @article{Laslett2004, -abstract = {A computer program, ARAGORN, identifies tRNA and tmRNA genes. The program employs heuristic algorithms to predict tRNA secondary structure, based on homology with recognized tRNA consensus sequences and ability to form a base-paired cloverleaf. tmRNA genes are identified using a modified version of the BRUCE program. ARAGORN achieves a detection sensitivity of 99\% from a set of 1290 eubacterial, eukaryotic and archaeal tRNA genes and detects all complete tmRNA sequences in the tmRNA database, improving on the performance of the BRUCE program. Recently discovered tmRNA genes in the chloroplasts of two species from the 'green' algae lineage are detected. The output of the program reports the proposed tRNA secondary structure and, for tmRNA genes, the secondary structure of the tRNA domain, the tmRNA gene sequence, the tag peptide and a list of organisms with matching tmRNA peptide tags.}, -author = {Laslett, Dean and Canback, Bjorn}, -doi = {10.1093/nar/gkh152}, -file = {:Users/koeho006/Library/Application Support/Mendeley Desktop/Downloaded/Laslett, Canback - 2004 - ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences.pdf:pdf}, -isbn = {1362-4962 (Electronic)$\backslash$n1362-4962 (Linking)}, -issn = {03051048}, -journal = {Nucleic Acids Research}, -mendeley-groups = {VAPP Application note}, -pages = {11--16}, -pmid = {14704338}, -title = {{ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences}}, -volume = {32}, -year = {2004} -} - - - - -