| 
3
 | 
     1 #!/usr/bin/env python3.4
 | 
| 
 | 
     2 # Author: Jasper Jan Koehorst
 | 
| 
 | 
     3 # Date created: Feb 21 2015
 | 
| 
 | 
     4 # Function: generation of a RDF file from Genbank/EMBL
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 import warnings
 | 
| 
 | 
     7 warnings.filterwarnings("ignore")
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 def delete_galaxy():
 | 
| 
 | 
    10 	import sys
 | 
| 
 | 
    11 	for index, path in enumerate(sys.path):
 | 
| 
 | 
    12 		if "galaxy-dist/" in path:
 | 
| 
 | 
    13 			sys.path[index] = ''
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 #Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
 | 
| 
 | 
    16 delete_galaxy()
 | 
| 
 | 
    17 
 | 
| 
 | 
    18 from Bio import SeqIO
 | 
| 
 | 
    19 # Import RDFLib's default Graph implementation.
 | 
| 
 | 
    20 import os, sys
 | 
| 
 | 
    21 from Bio.Seq import Seq
 | 
| 
 | 
    22 
 | 
| 
 | 
    23 from rdflib import Graph, URIRef, Literal,Namespace,RDF,RDFS,OWL, plugin
 | 
| 
 | 
    24 from rdflib.store import Store
 | 
| 
 | 
    25 import hashlib
 | 
| 
 | 
    26 store = plugin.get('IOMemory', Store)()
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 global URI
 | 
| 
 | 
    29 URI = "http://csb.wur.nl/genome/"
 | 
| 
 | 
    30 global seeAlso
 | 
| 
 | 
    31 seeAlso = "rdfs:seeAlso"
 | 
| 
 | 
    32 global coreURI
 | 
| 
 | 
    33 coreURI = Namespace(URI)
 | 
| 
 | 
    34 
 | 
| 
 | 
    35 global SubClassOfDict
 | 
| 
 | 
    36 SubClassOfDict = {}
 | 
| 
 | 
    37 global SubClassOfDictRna
 | 
| 
 | 
    38 SubClassOfDictRna = {}
 | 
| 
 | 
    39 
 | 
| 
 | 
    40 def createClass(uri, root=True):
 | 
| 
 | 
    41 	genomeGraph.add((uri,RDF.type,OWL.Class))
 | 
| 
 | 
    42 	if root:
 | 
| 
 | 
    43 		genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
 | 
| 
 | 
    44 	return uri
 | 
| 
 | 
    45 
 | 
| 
 | 
    46 def tmp():
 | 
| 
 | 
    47 	import time
 | 
| 
 | 
    48 	global tmpFolder
 | 
| 
 | 
    49 	tmpFolder = "/tmp/"+str(time.time())+"/"
 | 
| 
 | 
    50 	os.mkdir(tmpFolder)
 | 
| 
 | 
    51 
 | 
| 
 | 
    52 def cleantmp():
 | 
| 
 | 
    53 	os.system("ls "+tmpFolder)
 | 
| 
 | 
    54 	os.system("rm -rf "+tmpFolder)
 | 
| 
 | 
    55 
 | 
| 
 | 
    56 def crawler():
 | 
| 
 | 
    57 	#From input folder it looks for GBK file (gz files are in progress)
 | 
| 
 | 
    58 	input_file = sys.argv[sys.argv.index("-input")+1]
 | 
| 
 | 
    59 	gbk_parser(input_file)
 | 
| 
 | 
    60 
 | 
| 
 | 
    61 def gbk_parser():
 | 
| 
 | 
    62 	prevObjStart = -1
 | 
| 
 | 
    63 	prevObjStop = -1	
 | 
| 
 | 
    64 	store = plugin.get('IOMemory', Store)()
 | 
| 
 | 
    65 	global genomeGraph
 | 
| 
 | 
    66 	genomeGraph = Graph(store,URIRef(URI))
 | 
| 
 | 
    67 	genomeGraph.bind("ssb",coreURI)
 | 
| 
 | 
    68 	input_file = sys.argv[sys.argv.index("-input")+1]
 | 
| 
 | 
    69 
 | 
| 
 | 
    70 	#CLASS definitions
 | 
| 
 | 
    71 	genomeClass = createClass(coreURI["Genome"], root=True)
 | 
| 
 | 
    72 	typeClass = createClass(coreURI["DnaObject"], root=True)
 | 
| 
 | 
    73 	createClass(coreURI["Protein"], root=True)
 | 
| 
 | 
    74 	pubmedClass = createClass(coreURI["Pubmed"], root=True)
 | 
| 
 | 
    75 	miscClass = createClass(coreURI["MiscFeature"], root=False)
 | 
| 
 | 
    76 	createClass(coreURI["Feature"], root=True)
 | 
| 
 | 
    77 	SubClassOfDict["MiscFeature"] = 1
 | 
| 
 | 
    78 	SubClassOfDictRna["Trna"] = 1
 | 
| 
 | 
    79 	SubClassOfDictRna["Rrna"] = 1
 | 
| 
 | 
    80 	SubClassOfDictRna["Tmrna"] = 1
 | 
| 
 | 
    81 	SubClassOfDictRna["Ncrna"] = 1
 | 
| 
 | 
    82 
 | 
| 
 | 
    83 # 	codon = "11" #Default initialization if no CDS are present
 | 
| 
 | 
    84 	##################
 | 
| 
 | 
    85 	weird_chars = list(''',./?<>:;"'|\}]{[+=_-)(*&^%$#@!±§~` ''')
 | 
| 
 | 
    86 	scaf_value = 0
 | 
| 
 | 
    87 	#Which files are already done
 | 
| 
 | 
    88 	########
 | 
| 
 | 
    89 	formatGBK = sys.argv[sys.argv.index("-format")+1]
 | 
| 
 | 
    90 	for record in SeqIO.parse(input_file, formatGBK):
 | 
| 
 | 
    91 		#Read first feature for genome name and information...
 | 
| 
 | 
    92 		#Ignore the empty GBK file due to the lack of features?
 | 
| 
 | 
    93 
 | 
| 
 | 
    94 		for index, feature in enumerate(record.features):
 | 
| 
 | 
    95 			if index == 0:
 | 
| 
 | 
    96 				if "-identifier" in sys.argv:
 | 
| 
 | 
    97 					genome = sys.argv[sys.argv.index("-identifier")+1]
 | 
| 
 | 
    98 				else:
 | 
| 
 | 
    99 					try:
 | 
| 
 | 
   100 						genome = feature.qualifiers["organism"][0].replace(" ","_")
 | 
| 
 | 
   101 					except:
 | 
| 
 | 
   102 						#BUG: THIS IS A TEMP FIX, USE GALAXY -IDENTIFIER TO CAPTURE THIS
 | 
| 
 | 
   103 						genome = "XNoneX"
 | 
| 
 | 
   104 				for char in weird_chars:
 | 
| 
 | 
   105 					genome = genome.replace(char,"_")
 | 
| 
 | 
   106 
 | 
| 
 | 
   107 				try:
 | 
| 
 | 
   108 					gi = record.annotations["gi"]
 | 
| 
 | 
   109 					typ = str(gi)
 | 
| 
 | 
   110 				except:
 | 
| 
6
 | 
   111 					try:
 | 
| 
 | 
   112 						gi = record.annotations["accessions"][0]
 | 
| 
 | 
   113 						typ = str(gi)
 | 
| 
 | 
   114 					except:
 | 
| 
 | 
   115 						scaf_value += 1
 | 
| 
 | 
   116 						typ = "scaffold_"+str(scaf_value)
 | 
| 
3
 | 
   117 				genomeURI = coreURI[genome]
 | 
| 
 | 
   118 				gbkURI = coreURI[genome + "/" + typ]
 | 
| 
 | 
   119 				#To contig connection to connect all data to it
 | 
| 
 | 
   120 				genomeGraph.add((genomeURI, coreURI["dnaobject"] , gbkURI))
 | 
| 
 | 
   121 
 | 
| 
 | 
   122 				#General genome features also stored in the class...
 | 
| 
 | 
   123 				if "genome" in feature.qualifiers:
 | 
| 
 | 
   124 					genomeGraph.add((genomeURI, coreURI["organism"],Literal(feature.qualifiers["organism"][0])))
 | 
| 
 | 
   125 				if "strain" in feature.qualifiers:
 | 
| 
 | 
   126 					genomeGraph.add((genomeURI, coreURI["strain"],Literal(feature.qualifiers["strain"][0])))
 | 
| 
 | 
   127 				if "taxonomy" in record.annotations:
 | 
| 
 | 
   128 					for taxon in record.annotations["taxonomy"]:
 | 
| 
 | 
   129 						genomeGraph.add((genomeURI, coreURI["taxonomy"],Literal(taxon)))
 | 
| 
 | 
   130 					record.annotations["taxonomy"] = []
 | 
| 
 | 
   131 				#Genome sequence#
 | 
| 
 | 
   132 				sequence = str(record.seq)
 | 
| 
 | 
   133 				#Verify if sequence was not empty and is now full of X or N
 | 
| 
 | 
   134 				filtered_sequence = sequence.replace("X","").replace("N","")
 | 
| 
 | 
   135 				if len(filtered_sequence) == 0:
 | 
| 
 | 
   136 					sequence = ""
 | 
| 
 | 
   137 				#Record parsing#
 | 
| 
 | 
   138 				for annot in record.annotations:
 | 
| 
 | 
   139 					if type(record.annotations[annot]) == list:
 | 
| 
 | 
   140 						if annot == "references":
 | 
| 
 | 
   141 							for references in record.annotations[annot]:
 | 
| 
 | 
   142 								if references.pubmed_id != "":
 | 
| 
 | 
   143 									pubmed = references.pubmed_id
 | 
| 
 | 
   144 									genomeGraph.add((gbkURI, coreURI[annot.lower()] , coreURI["pubmed/"+pubmed]))
 | 
| 
 | 
   145 									obj_dict = references.__dict__
 | 
| 
 | 
   146 									for key in obj_dict:
 | 
| 
 | 
   147 										genomeGraph.add((coreURI["pubmed/"+pubmed], coreURI[key.lower()], Literal(str(obj_dict[key]))))
 | 
| 
 | 
   148 									genomeGraph.add((coreURI["pubmed/"+pubmed], RDF.type, pubmedClass))
 | 
| 
 | 
   149 									
 | 
| 
 | 
   150 						else:
 | 
| 
 | 
   151 							for a in record.annotations[annot]:
 | 
| 
 | 
   152 								int_add(gbkURI,coreURI[annot.lower()],str(a))
 | 
| 
 | 
   153 					else:
 | 
| 
 | 
   154 						int_add(gbkURI,coreURI[annot.lower()],str(record.annotations[annot]))
 | 
| 
6
 | 
   155 						
 | 
| 
3
 | 
   156 				#####END of RECORD####
 | 
| 
 | 
   157 				if len(sequence) > 0:
 | 
| 
 | 
   158 					genomeGraph.add((gbkURI, coreURI["sequence"] ,  Literal(sequence)))
 | 
| 
 | 
   159 				genomeGraph.add((genomeURI, RDF.type,genomeClass))
 | 
| 
 | 
   160 				genomeGraph.add((gbkURI, RDF.type,typeClass))
 | 
| 
 | 
   161 				for key in feature.qualifiers:
 | 
| 
 | 
   162 					genomeGraph.add((gbkURI, coreURI[key.lower()] , Literal(feature.qualifiers[key][0])))
 | 
| 
 | 
   163 				#break
 | 
| 
 | 
   164 			else: #The rest of the GBK file
 | 
| 
 | 
   165 				feature_type = feature.type
 | 
| 
 | 
   166 				end = str(feature.location.end).replace(">","").replace("<","")
 | 
| 
 | 
   167 				start = str(feature.location.start).replace(">","").replace("<","")
 | 
| 
 | 
   168 				
 | 
| 
 | 
   169 				strand = str(feature.location.strand)
 | 
| 
 | 
   170 
 | 
| 
 | 
   171 				if strand == 'None':
 | 
| 
 | 
   172 					strand = 0
 | 
| 
 | 
   173 				else:
 | 
| 
 | 
   174 					if feature.type == "misc_feature": #Store as part of previous cds or something...
 | 
| 
 | 
   175 						if strand == "-1":
 | 
| 
 | 
   176 							miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(end)+"_"+str(start)]
 | 
| 
 | 
   177 						else:
 | 
| 
 | 
   178 							miscURI = coreURI[genome + "/" + typ + "/"+feature_type+"/gbk/"+str(start)+"_"+str(end)]
 | 
| 
 | 
   179 						
 | 
| 
 | 
   180 						# TODO: Check if biopython has an overlap function...
 | 
| 
 | 
   181 						if int(prevObjStart) <= int(start):
 | 
| 
 | 
   182 							if int(end) <= int(prevObjStop):
 | 
| 
 | 
   183 								pass
 | 
| 
 | 
   184 # 								genomeGraph.add((typeURI,coreURI["feature"],miscURI))
 | 
| 
 | 
   185 # 								genomeGraph.add((miscURI,RDF.type,miscClass))
 | 
| 
 | 
   186 							else:
 | 
| 
 | 
   187 								genomeGraph.add((gbkURI, coreURI["feature"] , miscURI))
 | 
| 
 | 
   188 								genomeGraph.add((miscURI,RDF.type,miscClass))
 | 
| 
 | 
   189 						else:
 | 
| 
 | 
   190 							genomeGraph.add((gbkURI, coreURI["feature"] , miscURI))
 | 
| 
 | 
   191 							genomeGraph.add((miscURI,RDF.type,miscClass))
 | 
| 
 | 
   192 
 | 
| 
 | 
   193 						store_general_information(miscURI,feature,record)
 | 
| 
 | 
   194 					else:
 | 
| 
 | 
   195 						prevObjStart = start
 | 
| 
 | 
   196 						prevObjStop = end
 | 
| 
 | 
   197 						
 | 
| 
 | 
   198 						if strand == "-1":
 | 
| 
 | 
   199 							typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(end)+"_"+str(start)]
 | 
| 
 | 
   200 						else:
 | 
| 
 | 
   201 							typeURI = coreURI[genome + "/" + typ + "/" + feature_type+"/gbk/"+str(start)+"_"+str(end)]
 | 
| 
 | 
   202 
 | 
| 
6
 | 
   203 						#Contig specific connection						
 | 
| 
3
 | 
   204 						genomeGraph.add((gbkURI, coreURI["feature"] , typeURI))
 | 
| 
 | 
   205 						############################
 | 
| 
 | 
   206 
 | 
| 
 | 
   207 						store_general_information(typeURI,feature,record)
 | 
| 
 | 
   208 
 | 
| 
 | 
   209 						for subfeature in feature.sub_features:
 | 
| 
 | 
   210 							strand = str(subfeature.location.strand)
 | 
| 
 | 
   211 							subfeature_type = subfeature.type
 | 
| 
 | 
   212 							end = str(subfeature.location.end).replace(">","").replace("<","")
 | 
| 
 | 
   213 							start = str(subfeature.location.start).replace(">","").replace("<","")
 | 
| 
 | 
   214 
 | 
| 
 | 
   215 							if strand == "-1":
 | 
| 
 | 
   216 								subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(end)+"_"+str(start)]
 | 
| 
 | 
   217 							else:
 | 
| 
 | 
   218 								subURI = coreURI[genome + "/" + typ + "/" + subfeature_type+"/gbk/"+str(start)+"_"+str(end)]
 | 
| 
 | 
   219 							genomeGraph.add((typeURI, coreURI["feature"] , subURI))
 | 
| 
 | 
   220 							store_general_information(subURI,subfeature,record,feature)
 | 
| 
 | 
   221 
 | 
| 
6
 | 
   222 
 | 
| 
3
 | 
   223 def store_general_information(generalURI,feature,record,superfeature=""):
 | 
| 
 | 
   224 	proteinClass = createClass(coreURI["Protein"], root=True)
 | 
| 
 | 
   225 	sequence = str(record.seq)
 | 
| 
 | 
   226 	cds_sequence = str(feature.extract(sequence))
 | 
| 
 | 
   227 	#Fixes the 0 count instead of 1-count in biopython vs humans
 | 
| 
 | 
   228 	feature_type = feature.type
 | 
| 
 | 
   229 	end = str(feature.location.end).replace(">","").replace("<","")
 | 
| 
 | 
   230 	start = str(feature.location.start).replace(">","").replace("<","")
 | 
| 
 | 
   231 	strand = str(feature.location.strand)	
 | 
| 
 | 
   232 	if strand == "None":
 | 
| 
 | 
   233 		strand = 0
 | 
| 
 | 
   234 
 | 
| 
 | 
   235 	genomeGraph.add((generalURI,coreURI["sourcedb"],Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
 | 
| 
 | 
   236 
 | 
| 
 | 
   237 	if strand == "-1":
 | 
| 
 | 
   238 		genomeGraph.add((generalURI,coreURI["end"],Literal(int(start)+1)))
 | 
| 
 | 
   239 		genomeGraph.add((generalURI,coreURI["begin"],Literal(int(end))))
 | 
| 
 | 
   240 	else:
 | 
| 
 | 
   241 		genomeGraph.add((generalURI,coreURI["begin"],Literal(int(start)+1)))
 | 
| 
 | 
   242 		genomeGraph.add((generalURI,coreURI["end"],Literal(int(end))))	
 | 
| 
 | 
   243 	genomeGraph.add((generalURI,coreURI["strand"],Literal(int(strand))))
 | 
| 
 | 
   244 	if feature.type != "misc_feature":
 | 
| 
 | 
   245 		try:
 | 
| 
 | 
   246 			genomeGraph.add((generalURI,coreURI["sequence"],Literal(cds_sequence)))
 | 
| 
 | 
   247 		except: #When protein sequence is not given for whatever reason
 | 
| 
 | 
   248 			print ("wrong?")
 | 
| 
 | 
   249 
 | 
| 
 | 
   250 	if feature.type == "misc_feature":
 | 
| 
 | 
   251 		pass
 | 
| 
 | 
   252 	else:
 | 
| 
 | 
   253 		genomeGraph.add((generalURI,RDF.type,createClass(coreURI[feature_type.lower().title()], root=False)))
 | 
| 
 | 
   254 		if feature_type.lower() != "rrna" and feature_type.lower() != "trna" and feature_type.lower() != "tmrna" and feature_type.lower() != "ncrna":
 | 
| 
 | 
   255 			SubClassOfDict[feature_type.lower().title()] = 1
 | 
| 
 | 
   256 	for key in feature.qualifiers:
 | 
| 
 | 
   257 		values = feature.qualifiers[key]
 | 
| 
 | 
   258 		if key == "translation":
 | 
| 
 | 
   259 			pass
 | 
| 
 | 
   260 		elif type(values) == list:
 | 
| 
 | 
   261 			for v in values:
 | 
| 
 | 
   262 				int_add(generalURI,coreURI[key.lower()],v)
 | 
| 
 | 
   263 		else:
 | 
| 
 | 
   264 			int_add(generalURI,coreURI[key.lower()],values)
 | 
| 
 | 
   265 	if feature.type == "CDS":
 | 
| 
 | 
   266 		try:
 | 
| 
 | 
   267 			#Feature is normally submitted to this function
 | 
| 
 | 
   268 			#IF a subfeature is submitted it is submitted as a feature
 | 
| 
 | 
   269 			#And subfeature variable will contain the superfeature
 | 
| 
 | 
   270 			if superfeature:
 | 
| 
 | 
   271 				codon = superfeature.qualifiers["transl_table"][0]
 | 
| 
 | 
   272 		except:
 | 
| 
 | 
   273 			#Default codon table 11
 | 
| 
 | 
   274 			codon = "11"
 | 
| 
 | 
   275 		#Protein linkage
 | 
| 
 | 
   276 		translation = ""
 | 
| 
 | 
   277 		try:
 | 
| 
 | 
   278 			translation = feature.qualifiers["translation"][0].strip("*")
 | 
| 
 | 
   279 		except KeyError:
 | 
| 
 | 
   280 			#When protein sequence is not given...
 | 
| 
 | 
   281 			if len(feature.location.parts) > 1:
 | 
| 
 | 
   282 				#Exon boundaries?
 | 
| 
 | 
   283 				seq = ''
 | 
| 
 | 
   284 				for loc in feature.location:
 | 
| 
 | 
   285 					seq += record.seq[loc]
 | 
| 
 | 
   286 				if int(feature.location.strand) == -1:
 | 
| 
 | 
   287 					seq = Seq(seq).complement()
 | 
| 
 | 
   288 				else:
 | 
| 
 | 
   289 					seq = Seq(seq)
 | 
| 
 | 
   290 				translation = str(seq.translate(feature.qualifiers["transl_table"][0]))
 | 
| 
 | 
   291 			elif int(feature.location.strand) == -1:
 | 
| 
 | 
   292 				if str(record.seq[feature.location.nofuzzy_start:feature.location.nofuzzy_end].reverse_complement().translate(codon)).strip("*") != translation:
 | 
| 
 | 
   293 					if len(str(record.seq[feature.location.nofuzzy_start:feature.location.nofuzzy_end])) % 3 == 0:
 | 
| 
 | 
   294 						translation = str(record.seq[feature.location.nofuzzy_start:feature.location.nofuzzy_end].reverse_complement().translate(codon))
 | 
| 
 | 
   295 					else:
 | 
| 
 | 
   296 						translation = ''
 | 
| 
 | 
   297 			elif int(feature.location.strand) == +1:
 | 
| 
 | 
   298 					if len(str(record.seq[feature.location.nofuzzy_start:feature.location.nofuzzy_end])) % 3 == 0:
 | 
| 
 | 
   299 						translation = str(record.seq[feature.location.nofuzzy_start:feature.location.nofuzzy_end].translate(codon))
 | 
| 
 | 
   300 					else:
 | 
| 
 | 
   301 						translation = ''
 | 
| 
 | 
   302 			
 | 
| 
 | 
   303 			if translation:
 | 
| 
 | 
   304 				translation = list(translation)
 | 
| 
 | 
   305 				translation[0] = "M"
 | 
| 
 | 
   306 				translation = ''.join(translation).strip("*")
 | 
| 
 | 
   307 				if "*" in translation:
 | 
| 
 | 
   308 					pass		
 | 
| 
 | 
   309 
 | 
| 
 | 
   310 		translation = translation.encode('utf-8')
 | 
| 
 | 
   311 		md5_protein = hashlib.md5(translation).hexdigest()
 | 
| 
 | 
   312 		proteinURI = coreURI["protein/"+md5_protein]
 | 
| 
 | 
   313 		genomeGraph.add((generalURI,coreURI["protein"],proteinURI))
 | 
| 
 | 
   314 		for key in feature.qualifiers:
 | 
| 
 | 
   315 			for v in feature.qualifiers[key]:
 | 
| 
 | 
   316 				if key == "translation":
 | 
| 
 | 
   317 					genomeGraph.add((proteinURI,coreURI["md5"],Literal(md5_protein)))
 | 
| 
 | 
   318 					genomeGraph.add((proteinURI,coreURI["sequence"],Literal(translation)))
 | 
| 
 | 
   319 					genomeGraph.add((proteinURI,RDF.type,proteinClass))
 | 
| 
 | 
   320 				else:
 | 
| 
 | 
   321 					for v in feature.qualifiers[key]:
 | 
| 
 | 
   322 						int_add(generalURI,coreURI[key.lower()],v)
 | 
| 
 | 
   323 	
 | 
| 
 | 
   324 def int_add(subject, predicate, obj):
 | 
| 
 | 
   325 	try:
 | 
| 
 | 
   326 		object_float = float(obj.replace('"',''))
 | 
| 
 | 
   327 		object_int = int(obj.replace('"',''))
 | 
| 
 | 
   328 		if object_int == object_float:
 | 
| 
 | 
   329 			genomeGraph.add((subject,predicate,Literal(object_int)))
 | 
| 
 | 
   330 		else:
 | 
| 
 | 
   331 			genomeGraph.add((subject,predicate,Literal(object_float)))
 | 
| 
 | 
   332 	except:
 | 
| 
 | 
   333 		genomeGraph.add((subject,predicate,Literal(obj.replace('"',''))))
 | 
| 
 | 
   334 				
 | 
| 
 | 
   335 def save():
 | 
| 
 | 
   336 	data = genomeGraph.serialize(format='turtle')
 | 
| 
 | 
   337 	open(sys.argv[sys.argv.index("-output")+1],"wb").write(data)
 | 
| 
 | 
   338 
 | 
| 
 | 
   339 def subClassOfBuilder():
 | 
| 
 | 
   340 	for subclass in SubClassOfDict:
 | 
| 
 | 
   341 		genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
 | 
| 
 | 
   342 		genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Feature"]))
 | 
| 
 | 
   343 
 | 
| 
 | 
   344 def subClassOfBuilderRna():
 | 
| 
 | 
   345 	for subclass in SubClassOfDictRna:
 | 
| 
 | 
   346 		genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
 | 
| 
 | 
   347 		genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
 | 
| 
 | 
   348 		genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
 | 
| 
 | 
   349 		genomeGraph.add((coreURI[subclass],RDF.type,OWL.Class))
 | 
| 
 | 
   350 
 | 
| 
 | 
   351 def main():
 | 
| 
 | 
   352 	tmp()
 | 
| 
 | 
   353 	gbk_parser()
 | 
| 
 | 
   354 	subClassOfBuilder()
 | 
| 
 | 
   355 	subClassOfBuilderRna()
 | 
| 
 | 
   356 	save()
 | 
| 
 | 
   357 	cleantmp()
 | 
| 
 | 
   358 
 | 
| 
 | 
   359 if __name__ == "__main__":
 | 
| 
 | 
   360 	main() |