# HG changeset patch # User jjkoehorst # Date 1424535605 -3600 # Node ID 2561c51e66058eacc3debba7afa07167abc5459c # Parent 74b8ba5e2d5bf6774559f17ff324a75471731659 aragorn addition diff -r 74b8ba5e2d5b -r 2561c51e6605 genetic_elements/.DS_Store Binary file genetic_elements/.DS_Store has changed diff -r 74b8ba5e2d5b -r 2561c51e6605 genetic_elements/aragorn/aragorn.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genetic_elements/aragorn/aragorn.py Sat Feb 21 17:20:05 2015 +0100 @@ -0,0 +1,125 @@ +def delete_galaxy(): + import sys + for index, path in enumerate(sys.path): + if "galaxy-dist/" in path: + sys.path[index] = '' + +#Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now. +delete_galaxy() + +from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin + +# Import RDFLib's default Graph implementation. +from rdflib.graph import Graph + +import sys, os + +import rdflib +import subprocess +import hashlib +global URI +global SubClassOfDict +SubClassOfDict = {} + +URI = "http://csb.wur.nl/genome/" +global seeAlso +seeAlso = "rdfs:seeAlso" +global coreURI +coreURI = Namespace(URI) + +def createClass(uri): + #genomeGraph.add((uri,RDF.type,OWL.Class)) + #genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing)) + #genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) + #genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) + #genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"])) + return uri + +def tmp(): + import time + global tmpFolder + tmpFolder = "/tmp/"+str(time.time())+"/" + os.mkdir(tmpFolder) + +def query(): + global genomeGraph + genomeGraph = Graph() + filename = sys.argv[1] + genomeGraph.parse(filename, format="turtle") + qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}') + sequences = [] + for row in qres: + print ("Header:",row[0]) + sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","") + + return sequences + +def aragorn(sequences): + for sequence in sequences: + #Call aragorn for each contig, for ease of parsing + open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence)) + folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/" + cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output" + print (cmd) + os.system(cmd) + aragorn = open(tmpFolder+"aragorn.output").readlines() +# string = ''.join(aragorn) + + contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","") + dnaobjectURI = coreURI[contig] + #print (contig) + for line in aragorn: + if ">" in line: + print (line.split()) + try: + trna, pos = line.split()[1:] + except: + try: + trna, pos = line.split() + except: + if "(Permuted)" in line: + trna, permute, pos = line.split()[1:] + + if "tRNA-" in line: + trna, codon = (trna.strip(">)").split("(",1)) + else: + trna = trna.strip(">").strip() #Actually a tmRNA... + codon = '' + trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna + SubClassOfDict[trna.split("-")[0].title()] = 1 + if "c" in pos[0]: #complementary + stop, start = pos.split("[")[1].split("]")[0].split(",") + else: + start, stop = pos.split("[")[1].split("]")[0].split(",") + trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop] + genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI)) + genomeGraph.add((trnaURI, RDF.type,trnaClass)) + genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer))) + genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer))) + genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna))) + genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon))) + genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn"))) + genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36"))) + genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1]))) + +def subClassOfBuilder(): + for subclass in SubClassOfDict: + genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing)) + genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"])) + genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"])) + genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class)) + +def save(): + #Create the subclass off instances + #subClassOfBuilder() + ## Saves the file + data = genomeGraph.serialize(format='turtle') + open(sys.argv[2],"wb").write(data) + +def main(): + tmp() + sequences = query() + aragorn(sequences) + save() + +main() diff -r 74b8ba5e2d5b -r 2561c51e6605 genetic_elements/aragorn/aragorn.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genetic_elements/aragorn/aragorn.xml Sat Feb 21 17:20:05 2015 +0100 @@ -0,0 +1,77 @@ + + + python + rdflib + aragorn + + Aragon + aragorn.py '$input' '$output' '-gc$genbank_gencode' '$tmRNA' '$tRNA' '$topology' '-fon' '-sourcedb' 'SAPP' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Aragorn_ predicts tRNA (and tmRNA) in nucleotide sequences. + +.. _Aragorn: http://mbio-serv2.mbioekol.lu.se/ARAGORN/ + +----- + +It requires an RDF genome file + + + + + @article{Laslett2004, +abstract = {A computer program, ARAGORN, identifies tRNA and tmRNA genes. The program employs heuristic algorithms to predict tRNA secondary structure, based on homology with recognized tRNA consensus sequences and ability to form a base-paired cloverleaf. tmRNA genes are identified using a modified version of the BRUCE program. ARAGORN achieves a detection sensitivity of 99\% from a set of 1290 eubacterial, eukaryotic and archaeal tRNA genes and detects all complete tmRNA sequences in the tmRNA database, improving on the performance of the BRUCE program. Recently discovered tmRNA genes in the chloroplasts of two species from the 'green' algae lineage are detected. The output of the program reports the proposed tRNA secondary structure and, for tmRNA genes, the secondary structure of the tRNA domain, the tmRNA gene sequence, the tag peptide and a list of organisms with matching tmRNA peptide tags.}, +author = {Laslett, Dean and Canback, Bjorn}, +doi = {10.1093/nar/gkh152}, +file = {:Users/koeho006/Library/Application Support/Mendeley Desktop/Downloaded/Laslett, Canback - 2004 - ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences.pdf:pdf}, +isbn = {1362-4962 (Electronic)$\backslash$n1362-4962 (Linking)}, +issn = {03051048}, +journal = {Nucleic Acids Research}, +mendeley-groups = {VAPP Application note}, +pages = {11--16}, +pmid = {14704338}, +title = {{ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences}}, +volume = {32}, +year = {2004} +} + + + + +