annotate genetic_elements/aragorn/aragorn.py @ 30:0a947cb25a3d draft

Uploaded
author jjkoehorst
date Wed, 29 Jun 2016 01:34:59 -0400
parents 2561c51e6605
children 9610ddbca991
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
1 def delete_galaxy():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
2 import sys
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
3 for index, path in enumerate(sys.path):
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
4 if "galaxy-dist/" in path:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
5 sys.path[index] = ''
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
6
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
7 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
8 delete_galaxy()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
9
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
10 from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
11
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
12 # Import RDFLib's default Graph implementation.
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
13 from rdflib.graph import Graph
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
14
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
15 import sys, os
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
16
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
17 import rdflib
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
18 import subprocess
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
19 import hashlib
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
20 global URI
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
21 global SubClassOfDict
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
22 SubClassOfDict = {}
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
23
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
24 URI = "http://csb.wur.nl/genome/"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
25 global seeAlso
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
26 seeAlso = "rdfs:seeAlso"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
27 global coreURI
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
28 coreURI = Namespace(URI)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
29
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
30 def createClass(uri):
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
31 #genomeGraph.add((uri,RDF.type,OWL.Class))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
32 #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
33 #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
34 #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
35 #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
36 return uri
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
37
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
38 def tmp():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
39 import time
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
40 global tmpFolder
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
41 tmpFolder = "/tmp/"+str(time.time())+"/"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
42 os.mkdir(tmpFolder)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
43
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
44 def query():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
45 global genomeGraph
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
46 genomeGraph = Graph()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
47 filename = sys.argv[1]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
48 genomeGraph.parse(filename, format="turtle")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
49 qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
50 sequences = []
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
51 for row in qres:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
52 print ("Header:",row[0])
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
53 sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
54
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
55 return sequences
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
56
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
57 def aragorn(sequences):
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
58 for sequence in sequences:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
59 #Call aragorn for each contig, for ease of parsing
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
60 open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
61 folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
62 cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
63 print (cmd)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
64 os.system(cmd)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
65 aragorn = open(tmpFolder+"aragorn.output").readlines()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
66 # string = ''.join(aragorn)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
67
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
68 contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
69 dnaobjectURI = coreURI[contig]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
70 #print (contig)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
71 for line in aragorn:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
72 if ">" in line:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
73 print (line.split())
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
74 try:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
75 trna, pos = line.split()[1:]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
76 except:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
77 try:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
78 trna, pos = line.split()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
79 except:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
80 if "(Permuted)" in line:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
81 trna, permute, pos = line.split()[1:]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
82
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
83 if "tRNA-" in line:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
84 trna, codon = (trna.strip(">)").split("(",1))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
85 else:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
86 trna = trna.strip(">").strip() #Actually a tmRNA...
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
87 codon = ''
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
88 trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
89 SubClassOfDict[trna.split("-")[0].title()] = 1
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
90 if "c" in pos[0]: #complementary
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
91 stop, start = pos.split("[")[1].split("]")[0].split(",")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
92 else:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
93 start, stop = pos.split("[")[1].split("]")[0].split(",")
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
94 trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
95 genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
96 genomeGraph.add((trnaURI, RDF.type,trnaClass))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
97 genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
98 genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
99 genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
100 genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
101 genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
102 genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
103 genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
104
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
105 def subClassOfBuilder():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
106 for subclass in SubClassOfDict:
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
107 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
108 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
109 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
110 genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
111
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
112 def save():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
113 #Create the subclass off instances
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
114 #subClassOfBuilder()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
115 ## Saves the file
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
116 data = genomeGraph.serialize(format='turtle')
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
117 open(sys.argv[2],"wb").write(data)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
118
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
119 def main():
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
120 tmp()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
121 sequences = query()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
122 aragorn(sequences)
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
123 save()
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
124
2561c51e6605 aragorn addition
jjkoehorst <jasperkoehorst@gmail.com>
parents:
diff changeset
125 main()