annotate genetic_elements/aragorn.py @ 17:2561c51e6605

aragorn addition
author jjkoehorst <jasperkoehorst@gmail.com>
date Sat, 21 Feb 2015 17:20:05 +0100
parents 74b8ba5e2d5b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
1 def delete_galaxy():
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
2 import sys
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
3 for index, path in enumerate(sys.path):
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
4 if "galaxy-dist/" in path:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
5 sys.path[index] = ''
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
6
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
7 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
8 delete_galaxy()
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
9
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
10 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
11
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
12 # Import RDFLib's default Graph implementation.
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
13 from rdflib.graph import Graph
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
14
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
15 import sys, os
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
16
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
17 import rdflib
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
18 import subprocess
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
19 import hashlib
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
20 global URI
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
21 global SubClassOfDict
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
22 SubClassOfDict = {}
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
23
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
24 URI = "http://csb.wur.nl/genome/"
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
25 global seeAlso
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
26 seeAlso = "rdfs:seeAlso"
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
27 global coreURI
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
28 coreURI = Namespace(URI)
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
29
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
30 def createClass(uri):
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
31 #genomeGraph.add((uri,RDF.type,OWL.Class))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
32 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
33 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
34 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
35 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
36 return uri
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
37
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
38 def tmp():
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
39 import time
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
40 global tmpFolder
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
41 tmpFolder = "/tmp/"+str(time.time())+"/"
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
42 os.mkdir(tmpFolder)
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
43
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
44 def query():
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
45 global genomeGraph
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
46 genomeGraph = Graph()
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
47 filename = sys.argv[1]
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
48 genomeGraph.parse(filename, format="turtle")
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
49 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
50 sequences = []
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
51 for row in qres:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
52 print ("Header:",row[0])
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
53 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
54
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
55 return sequences
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
56
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
57 def aragorn(sequences):
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
58 for sequence in sequences:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
59 #Call aragorn for each contig, for ease of parsing
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
60 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
61 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
62 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
63 print (cmd)
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
64 os.system(cmd)
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
65 aragorn = open(tmpFolder+"aragorn.output").readlines()
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
66 # string = ''.join(aragorn)
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
67
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
68 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
69 dnaobjectURI = coreURI[contig]
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
70 #print (contig)
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
71 for line in aragorn:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
72 if ">" in line:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
73 print (line.split())
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
74 try:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
75 trna, pos = line.split()[1:]
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
76 except:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
77 try:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
78 trna, pos = line.split()
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
79 except:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
80 if "(Permuted)" in line:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
81 trna, permute, pos = line.split()[1:]
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
82
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
83 if "tRNA-" in line:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
84 trna, codon = (trna.strip(">)").split("(",1))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
85 else:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
86 trna = trna.strip(">").strip() #Actually a tmRNA...
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
87 codon = ''
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
88 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
89 SubClassOfDict[trna.split("-")[0].title()] = 1
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
90 if "c" in pos[0]: #complementary
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
91 stop, start = pos.split("[")[1].split("]")[0].split(",")
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
92 else:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
93 start, stop = pos.split("[")[1].split("]")[0].split(",")
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
94 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
95 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
96 genomeGraph.add((trnaURI, RDF.type,trnaClass))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
97 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
98 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
99 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
100 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
101 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
102 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
103 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
104
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
105 def subClassOfBuilder():
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
106 for subclass in SubClassOfDict:
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
107 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
108 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
109 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
110 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
111
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
112 def save():
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
113 #Create the subclass off instances
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
114 #subClassOfBuilder()
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
115 ## Saves the file
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
116 data = genomeGraph.serialize(format='turtle')
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
117 open(sys.argv[2],"wb").write(data)
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
118
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
119 def main():
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
120 tmp()
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
121 sequences = query()
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
122 aragorn(sequences)
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
123 save()
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
124
74b8ba5e2d5b aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
125 main()