changeset 17:2561c51e6605

aragorn addition
author jjkoehorst <jasperkoehorst@gmail.com>
date Sat, 21 Feb 2015 17:20:05 +0100
parents 74b8ba5e2d5b
children 27a2341f86cc
files genetic_elements/.DS_Store genetic_elements/aragorn/aragorn.py genetic_elements/aragorn/aragorn.xml
diffstat 3 files changed, 202 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file genetic_elements/.DS_Store has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genetic_elements/aragorn/aragorn.py	Sat Feb 21 17:20:05 2015 +0100
@@ -0,0 +1,125 @@
+def delete_galaxy():
+	import sys
+	for index, path in enumerate(sys.path):
+		if "galaxy-dist/" in path:
+			sys.path[index] = ''
+
+#Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. This is not an elegant solution but it works for now.
+delete_galaxy()
+
+from rdflib import Graph, URIRef, Literal,Namespace, XSD, BNode,RDF,RDFS,OWL, ConjunctiveGraph, plugin
+
+# Import RDFLib's default Graph implementation.
+from rdflib.graph import Graph
+
+import sys, os
+
+import rdflib
+import subprocess
+import hashlib
+global URI
+global SubClassOfDict
+SubClassOfDict = {}
+
+URI = "http://csb.wur.nl/genome/"
+global seeAlso
+seeAlso = "rdfs:seeAlso"
+global coreURI
+coreURI = Namespace(URI)
+
+def createClass(uri):
+	#genomeGraph.add((uri,RDF.type,OWL.Class))
+	#genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing))
+	#genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
+	#genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
+	#genomeGraph.add((uri,RDFS.subClassOf,coreURI["Rna"]))
+	return uri
+
+def tmp():
+	import time
+	global tmpFolder
+	tmpFolder = "/tmp/"+str(time.time())+"/"
+	os.mkdir(tmpFolder)
+
+def query():
+	global genomeGraph
+	genomeGraph = Graph()
+	filename = sys.argv[1]
+	genomeGraph.parse(filename, format="turtle")
+	qres = genomeGraph.query('select ?class ?sequence where {?class a ssb:DnaObject . ?class ssb:sequence ?sequence .}')
+	sequences = []
+	for row in qres:
+		print ("Header:",row[0])
+		sequences += [[">"+str(row[0]),str(row[1].strip())]] #.replace("/","-").replace("","")
+
+	return sequences
+
+def aragorn(sequences):
+	for sequence in sequences:
+		#Call aragorn for each contig, for ease of parsing
+		open(tmpFolder+"tmp.seq","w").write('\n'.join(sequence))
+		folder = os.path.realpath(__file__).rsplit("/",2)[0]+"/"
+		cmd = folder+"/tools/aragorn1.2.36/aragorn -fasta "+tmpFolder+"tmp.seq "+' '.join(sys.argv[3:-2])+" > "+tmpFolder+"aragorn.output"
+		print (cmd)
+		os.system(cmd)
+		aragorn = open(tmpFolder+"aragorn.output").readlines()
+# 		string = ''.join(aragorn)
+
+		contig = sequence[0].strip(">").replace("http://csb.wur.nl/genome/","")
+		dnaobjectURI = coreURI[contig]
+		#print (contig)
+		for line in aragorn:
+			if ">" in line:
+				print (line.split())
+				try:
+					trna, pos = line.split()[1:]
+				except:
+					try:
+						trna, pos = line.split()
+					except:
+						if "(Permuted)" in line:
+							trna, permute, pos = line.split()[1:]
+							
+				if "tRNA-" in line:
+					trna, codon = (trna.strip(">)").split("(",1))
+				else:
+					trna = trna.strip(">").strip() #Actually a tmRNA...
+					codon = ''
+				trnaClass = createClass(coreURI[trna.split("-")[0].title()]) #trna or tmrna
+				SubClassOfDict[trna.split("-")[0].title()] = 1
+				if "c" in pos[0]: #complementary
+					stop, start = pos.split("[")[1].split("]")[0].split(",")
+				else:
+					start, stop = pos.split("[")[1].split("]")[0].split(",")
+				trnaURI = coreURI[contig+"/trna-aragorn_1_2_36-"+trna.lower() +"/"+ start +"_"+ stop]
+				genomeGraph.add((dnaobjectURI, coreURI["feature"] , trnaURI))
+				genomeGraph.add((trnaURI, RDF.type,trnaClass))
+				genomeGraph.add((trnaURI, coreURI["begin"] , Literal(start,datatype=XSD.integer)))
+				genomeGraph.add((trnaURI, coreURI["end"] , Literal(stop,datatype=XSD.integer)))
+				genomeGraph.add((trnaURI, coreURI["trna_type"] , Literal(trna)))
+				genomeGraph.add((trnaURI, coreURI["trna_anti"] , Literal(codon)))
+				genomeGraph.add((trnaURI, coreURI["tool"] , Literal("aragorn")))
+				genomeGraph.add((trnaURI, coreURI["version"] , Literal("1.2.36")))
+				genomeGraph.add((trnaURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1])))
+
+def subClassOfBuilder():
+	for subclass in SubClassOfDict:
+		genomeGraph.add((coreURI["Feature"],RDFS.subClassOf,OWL.Thing))
+		genomeGraph.add((coreURI["Rna"],RDFS.subClassOf,coreURI["Feature"]))
+		genomeGraph.add((coreURI[subclass],RDFS.subClassOf,coreURI["Rna"]))
+		genomeGraph.add((coreURI["Rna"], RDF.type,OWL.Class))
+
+def save():
+	#Create the subclass off instances
+	#subClassOfBuilder()
+	## Saves the file
+	data = genomeGraph.serialize(format='turtle')
+	open(sys.argv[2],"wb").write(data)
+
+def main():
+	tmp()
+	sequences = query()
+	aragorn(sequences)
+	save()
+
+main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genetic_elements/aragorn/aragorn.xml	Sat Feb 21 17:20:05 2015 +0100
@@ -0,0 +1,77 @@
+<tool id="SAPP_aragorn_trna" name="tRNA and tmRNA" version="0.3">
+    <requirements>
+        <requirement type='package' version="3.4">python</requirement>
+        <requirement type='package' version="1.0">rdflib</requirement>
+        <requirement type="package" version="1.2.36">aragorn</requirement>
+    </requirements>
+    <description>Aragon</description>
+    <command interpreter="python3.4">aragorn.py '$input' '$output' '-gc$genbank_gencode' '$tmRNA' '$tRNA' '$topology' '-fon' '-sourcedb' 'SAPP'
+    </command>
+    <inputs>
+        <param name="input" type="data" format="rdf" label="RDF Genome"/>
+
+        <param name="genbank_gencode" type="select" label="Genetic code">
+            <option value="1" select="True">1. Standard</option>
+            <option value="2">2. Vertebrate Mitochondrial</option>
+            <option value="3">3. Yeast Mitochondrial</option>
+            <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
+            <option value="5">5. Invertebrate Mitochondrial</option>
+            <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
+            <option value="9">9. Echinoderm Mitochondrial</option>
+            <option value="10">10. Euplotid Nuclear</option>
+            <option value="11">11. Bacteria and Archaea</option>
+            <option value="12">12. Alternative Yeast Nuclear</option>
+            <option value="13">13. Ascidian Mitochondrial</option>
+            <option value="14">14. Flatworm Mitochondrial</option>
+            <option value="15">15. Blepharisma Macronuclear</option>
+            <option value="16">16. Chlorophycean Mitochondrial</option>
+            <option value="21">21. Trematode Mitochondrial</option>
+            <option value="22">22. Scenedesmus obliquus mitochondrial</option>
+            <option value="23">23. Thraustochytrium Mitochondrial</option>
+            <option value="24">24. Pterobranchia mitochondrial</option>
+        </param>
+        <param name="topology" type="select" label="Topology">
+            <option value="-c">Assume that each sequence has a circular topology</option>
+            <option value="-l">Assume that each sequence has a linear topology</option>
+        </param>
+        <param name='tmRNA' type='boolean' label='Search for tmRNA genes (-m)' truevalue='-m' falsevalue='' checked="true" help='' />
+        <param name='tRNA' type='boolean' label='Search for tRNA genes (-t)' truevalue='-t' falsevalue='' checked="true" help='' />
+    </inputs>
+    <outputs>
+        <data format="rdf" name="output" label="Aragorn: ${input.name}"></data>
+    </outputs>
+    <help>
+
+**What it does**
+
+Aragorn_ predicts tRNA (and tmRNA) in nucleotide sequences.
+
+.. _Aragorn: http://mbio-serv2.mbioekol.lu.se/ARAGORN/
+
+-----
+
+It requires an RDF genome file
+
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @article{Laslett2004,
+abstract = {A computer program, ARAGORN, identifies tRNA and tmRNA genes. The program employs heuristic algorithms to predict tRNA secondary structure, based on homology with recognized tRNA consensus sequences and ability to form a base-paired cloverleaf. tmRNA genes are identified using a modified version of the BRUCE program. ARAGORN achieves a detection sensitivity of 99\% from a set of 1290 eubacterial, eukaryotic and archaeal tRNA genes and detects all complete tmRNA sequences in the tmRNA database, improving on the performance of the BRUCE program. Recently discovered tmRNA genes in the chloroplasts of two species from the 'green' algae lineage are detected. The output of the program reports the proposed tRNA secondary structure and, for tmRNA genes, the secondary structure of the tRNA domain, the tmRNA gene sequence, the tag peptide and a list of organisms with matching tmRNA peptide tags.},
+author = {Laslett, Dean and Canback, Bjorn},
+doi = {10.1093/nar/gkh152},
+file = {:Users/koeho006/Library/Application Support/Mendeley Desktop/Downloaded/Laslett, Canback - 2004 - ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences.pdf:pdf},
+isbn = {1362-4962 (Electronic)$\backslash$n1362-4962 (Linking)},
+issn = {03051048},
+journal = {Nucleic Acids Research},
+mendeley-groups = {VAPP Application note},
+pages = {11--16},
+pmid = {14704338},
+title = {{ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences}},
+volume = {32},
+year = {2004}
+}
+</citation>
+</citations>
+
+</tool>
+