comparison gbk2rdf/gbktordf.py @ 6:ec73c34af97b

FASTA2RDF
author jjkoehorst <jasperkoehorst@gmail.com>
date Sat, 21 Feb 2015 15:19:42 +0100
parents db04e12b8779
children
comparison
equal deleted inserted replaced
4:47d1b27466ee 6:ec73c34af97b
106 106
107 try: 107 try:
108 gi = record.annotations["gi"] 108 gi = record.annotations["gi"]
109 typ = str(gi) 109 typ = str(gi)
110 except: 110 except:
111 scaf_value += 1 111 try:
112 typ = "scaffold_"+str(scaf_value) 112 gi = record.annotations["accessions"][0]
113 typ = str(gi)
114 except:
115 scaf_value += 1
116 typ = "scaffold_"+str(scaf_value)
113 genomeURI = coreURI[genome] 117 genomeURI = coreURI[genome]
114 gbkURI = coreURI[genome + "/" + typ] 118 gbkURI = coreURI[genome + "/" + typ]
115 #To contig connection to connect all data to it 119 #To contig connection to connect all data to it
116 genomeGraph.add((genomeURI, coreURI["dnaobject"] , gbkURI)) 120 genomeGraph.add((genomeURI, coreURI["dnaobject"] , gbkURI))
117 121
146 else: 150 else:
147 for a in record.annotations[annot]: 151 for a in record.annotations[annot]:
148 int_add(gbkURI,coreURI[annot.lower()],str(a)) 152 int_add(gbkURI,coreURI[annot.lower()],str(a))
149 else: 153 else:
150 int_add(gbkURI,coreURI[annot.lower()],str(record.annotations[annot])) 154 int_add(gbkURI,coreURI[annot.lower()],str(record.annotations[annot]))
151 155
152
153 #####END of RECORD#### 156 #####END of RECORD####
154 if len(sequence) > 0: 157 if len(sequence) > 0:
155 genomeGraph.add((gbkURI, coreURI["sequence"] , Literal(sequence))) 158 genomeGraph.add((gbkURI, coreURI["sequence"] , Literal(sequence)))
156 genomeGraph.add((genomeURI, RDF.type,genomeClass)) 159 genomeGraph.add((genomeURI, RDF.type,genomeClass))
157 genomeGraph.add((gbkURI, RDF.type,typeClass)) 160 genomeGraph.add((gbkURI, RDF.type,typeClass))
165 168
166 strand = str(feature.location.strand) 169 strand = str(feature.location.strand)
167 170
168 if strand == 'None': 171 if strand == 'None':
169 strand = 0 172 strand = 0
170
171 # if feature_type == "gene":
172 # gene = feature
173 #Store gene in next feature....
174 # gene_location_start = end = str(gene.location.end).replace(">","").replace("<","")
175 # gene_location_stop = str(gene.location.start).replace(">","").replace("<","")
176 # gene_qualifiers = gene.qualifiers
177 else: 173 else:
178 if feature.type == "misc_feature": #Store as part of previous cds or something... 174 if feature.type == "misc_feature": #Store as part of previous cds or something...
179 if strand == "-1": 175 if strand == "-1":
180 miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(end)+"_"+str(start)] 176 miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(end)+"_"+str(start)]
181 else: 177 else:
182 miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(start)+"_"+str(end)] 178 miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(start)+"_"+str(end)]
183 179
184 # genomeGraph.add((generalURI,coreURI["subFeature"],miscURI))
185
186 # TODO: Check if biopython has an overlap function... 180 # TODO: Check if biopython has an overlap function...
187 if int(prevObjStart) <= int(start): 181 if int(prevObjStart) <= int(start):
188 if int(end) <= int(prevObjStop): 182 if int(end) <= int(prevObjStop):
189 pass 183 pass
190 # genomeGraph.add((typeURI,coreURI["feature"],miscURI)) 184 # genomeGraph.add((typeURI,coreURI["feature"],miscURI))
199 store_general_information(miscURI,feature,record) 193 store_general_information(miscURI,feature,record)
200 else: 194 else:
201 prevObjStart = start 195 prevObjStart = start
202 prevObjStop = end 196 prevObjStop = end
203 197
204
205 if strand == "-1": 198 if strand == "-1":
206 typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(end)+"_"+str(start)] 199 typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(end)+"_"+str(start)]
207 else: 200 else:
208 typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(start)+"_"+str(end)] 201 typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(start)+"_"+str(end)]
209 202
210 # cds_sequence = str(feature.extract(sequence)) 203 #Contig specific connection
211 #Contig specific connection
212
213 genomeGraph.add((gbkURI, coreURI["feature"] , typeURI)) 204 genomeGraph.add((gbkURI, coreURI["feature"] , typeURI))
214 ############################ 205 ############################
215 206
216 store_general_information(typeURI,feature,record) 207 store_general_information(typeURI,feature,record)
217 208
225 subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(end)+"_"+str(start)] 216 subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(end)+"_"+str(start)]
226 else: 217 else:
227 subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(start)+"_"+str(end)] 218 subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(start)+"_"+str(end)]
228 genomeGraph.add((typeURI, coreURI["feature"] , subURI)) 219 genomeGraph.add((typeURI, coreURI["feature"] , subURI))
229 store_general_information(subURI,subfeature,record,feature) 220 store_general_information(subURI,subfeature,record,feature)
221
230 222
231 def store_general_information(generalURI,feature,record,superfeature=""): 223 def store_general_information(generalURI,feature,record,superfeature=""):
232 proteinClass = createClass(coreURI["Protein"], root=True) 224 proteinClass = createClass(coreURI["Protein"], root=True)
233 sequence = str(record.seq) 225 sequence = str(record.seq)
234 cds_sequence = str(feature.extract(sequence)) 226 cds_sequence = str(feature.extract(sequence))
275 #Feature is normally submitted to this function 267 #Feature is normally submitted to this function
276 #IF a subfeature is submitted it is submitted as a feature 268 #IF a subfeature is submitted it is submitted as a feature
277 #And subfeature variable will contain the superfeature 269 #And subfeature variable will contain the superfeature
278 if superfeature: 270 if superfeature:
279 codon = superfeature.qualifiers["transl_table"][0] 271 codon = superfeature.qualifiers["transl_table"][0]
280 # else:
281 # codon = subfeature.qualifiers["transl_table"][0]
282 except: 272 except:
283 #Default codon table 11 273 #Default codon table 11
284 codon = "11" 274 codon = "11"
285 #Protein linkage 275 #Protein linkage
286 translation = "" 276 translation = ""
354 def subClassOfBuilderRna(): 344 def subClassOfBuilderRna():
355 for subclass in SubClassOfDictRna: 345 for subclass in SubClassOfDictRna:
356 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) 346 genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
357 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) 347 genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
358 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) 348 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
359 genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
360 genomeGraph.add((coreURI[subclass],RDF.type,OWL.Class)) 349 genomeGraph.add((coreURI[subclass],RDF.type,OWL.Class))
361 350
362 def main(): 351 def main():
363 tmp() 352 tmp()
364 gbk_parser() 353 gbk_parser()