# HG changeset patch
# User bgruening
# Date 1421950551 18000
# Node ID d788d1abe23817a8c9135129079d6c74a7a6bbf6
# Parent d34f31cbc9ddba7c9bdbe9253ec644a4c62bd9e5
Uploaded
diff -r d34f31cbc9dd -r d788d1abe238 aragorn.xml
--- a/aragorn.xml Sat Jul 06 10:37:13 2013 -0400
+++ b/aragorn.xml Thu Jan 22 13:15:51 2015 -0500
@@ -1,11 +1,13 @@
-
- prediction (Aragon)
+
+ prediction (Aragorn)
aragorn
+ TRNAPRED_SCRIPT_PATH
- aragorn
- $input
+ $gff3_output_file;
+#end if
+]]>
@@ -48,6 +64,7 @@
+
@@ -55,6 +72,9 @@
+
+ gff3_output
+
@@ -65,12 +85,15 @@
-
+
+
+
+
diff -r d34f31cbc9dd -r d788d1abe238 aragorn_out_to_gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/aragorn_out_to_gff3.py Thu Jan 22 13:15:51 2015 -0500
@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+import re
+
+def start_pattern(string):
+ return re.match(r'^[0-9]+\.$', string) \
+ or string.startswith('Number of possible') \
+ or string.startswith('Searching for')
+
+def blank_line(string):
+ return re.match(r'^\s*$', string)
+
+def blocks(iterable):
+ accumulator = []
+ run_of_blanklines = 0
+ for line in iterable:
+ # Count blank lines
+ if blank_line(line):
+ run_of_blanklines += 1
+ else:
+ run_of_blanklines = 0
+
+ if start_pattern(line) or run_of_blanklines > 2 or 'Mean G+C' in line:
+ if accumulator:
+ yield accumulator
+ accumulator = [line]
+ else:
+ accumulator.append(line)
+ if accumulator:
+ yield accumulator
+
+IMPORTANT_INFO = {
+ 'trna': re.compile(r'tRNA-(?P[A-Za-z]{3})\((?P[A-Za-z]{3})\)'),
+ 'trna-alt': re.compile(r'tRNA-\?\((?P[^\)]+)\)\((?P[A-Za-z]{2,})\)'),
+ 'bases': re.compile(r'(?P[0-9]+) bases, %GC = (?P[0-9.]+)'),
+ 'sequence': re.compile(r'Sequence (?P[c]{0,1})\[(?P\d+),(?P\d+)\]'),
+ 'possible_pseudogene': re.compile(r'(?PPossible Pseudogene)'),
+}
+INFO_GROUPS = ('codon', 'anticodon', 'bases', 'gc', 'complement', 'start', 'end', 'pseudo')
+
+def important_info(block):
+ info = {}
+ for line in block:
+ for matcher in IMPORTANT_INFO:
+ matches = IMPORTANT_INFO[matcher].search(line)
+ if matches:
+ for group in INFO_GROUPS:
+ try:
+ info[group] = matches.group(group)
+ except:
+ pass
+ return info
+
+IMPORTANT_INFO_TMRNA = {
+ 'tag_peptide': re.compile(r'Tag peptide:\s+(?P[A-Z*]*)'),
+ 'location': re.compile(r'Location (?P[c]{0,1})\[(?P\d+),(?P\d+)\]'),
+}
+INFO_GROUPS_TMRNA = ('start', 'end', 'pep')
+
+def important_info_tmrna(block):
+ info = {}
+ for line in block:
+ for matcher in IMPORTANT_INFO_TMRNA:
+ matches = IMPORTANT_INFO_TMRNA[matcher].search(line)
+ if matches:
+ for group in INFO_GROUPS_TMRNA:
+ try:
+ info[group] = matches.group(group)
+ except:
+ pass
+ return info
+
+import fileinput
+stdin_data = []
+for line in fileinput.input():
+ stdin_data.append(line)
+
+possible_blocks = [line for line in blocks(stdin_data)]
+
+seqid = None
+print '##gff-version-3'
+# We're off to a GREAT start, if I'm accessing by index you just know that I'm going to do terrible
+# awful things
+for block_idx in range(len(possible_blocks)):
+ block = possible_blocks[block_idx]
+ data = None
+ fasta_defline = None
+
+ if block[0].startswith('Searching for') or 'nucleotides in sequence' in block[-1]:
+ # Try and get a sequence ID out of it
+ try:
+ fasta_defline = block[-2].strip()
+ except:
+ # Failing that, ignore it.
+ pass
+ else:
+ # They DUPLICATE results in multiple places, including a fasta version
+ # in the 'full report'.
+ possible_ugliness = [x for x in block if x.startswith('>t')]
+ if len(possible_ugliness) > 0:
+ continue
+
+ # However, if it didn't have one of those all important pieces of
+ # information, then it's either a different important piece of
+ # information, or complete junk
+ data = important_info(block)
+
+ # I am not proud of any of this. We essentially say "if that block
+ # didn't come up with useful info, then try making it a tmrna"
+ if len(data.keys()) == 0:
+ data = important_info_tmrna(block)
+ # And if that fails, just none it.
+ if len(data.keys()) == 0:
+ data = None
+ else:
+ # But if it didn't, confirm that we're a tmRNA
+ data['type'] = 'tmRNA'
+ else:
+ # If we did have keys, and didn't pass through any of the tmRNA
+ # checks, we're tRNA
+ data['type'] = 'tRNA'
+
+ # If we got a sequence ID in this block, set the defline
+ if 'nucleotides in sequence' in block[-1]:
+ try:
+ fasta_defline = block[-2].strip()
+ except:
+ pass
+
+ # if a defline is available, try and extract the fasta header ID
+ if fasta_defline is not None:
+ try:
+ seqid = fasta_defline[0:fasta_defline.index(' ')]
+ except:
+ seqid = fasta_defline
+
+ # If there's data
+ if data is not None and len(data.keys()) > 1:
+
+ # Deal with our flags/notes.
+ if data['type'] == 'tRNA':
+ # Are these acceptable GFF3 tags?
+ notes = {
+ 'Codon': data['codon'],
+ 'Anticodon': data['anticodon'],
+ }
+ if 'pseudo' in data:
+ notes['Note'] = 'Possible pseudogene'
+ else:
+ notes = {
+ 'Note': 'Tag peptide: ' + data['pep'] + ''
+ }
+
+ notestr = ';'.join(['%s="%s"' % (k,v) for k,v in notes.iteritems()])
+
+ print '\t'.join([
+ seqid,
+ 'aragorn',
+ data['type'],
+ data['start'],
+ data['end'],
+ '.',
+ '.',
+ '.',
+ notestr
+ ])
diff -r d34f31cbc9dd -r d788d1abe238 tRNAscan.xml
--- a/tRNAscan.xml Sat Jul 06 10:37:13 2013 -0400
+++ b/tRNAscan.xml Thu Jan 22 13:15:51 2015 -0500
@@ -5,6 +5,7 @@
biopython
+
@@ -54,6 +56,7 @@
+
diff -r d34f31cbc9dd -r d788d1abe238 test-data/aragorn_tansl-table-1_tmRNA_tRNA.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aragorn_tansl-table-1_tmRNA_tRNA.gff3 Thu Jan 22 13:15:51 2015 -0500
@@ -0,0 +1,2 @@
+##gff-version-3
+gi|240255695:23036500-23037000 aragorn tRNA 381 453 . . . Anticodon=tgc;Codon=Ala
diff -r d34f31cbc9dd -r d788d1abe238 tool_dependencies.xml
--- a/tool_dependencies.xml Sat Jul 06 10:37:13 2013 -0400
+++ b/tool_dependencies.xml Thu Jan 22 13:15:51 2015 -0500
@@ -1,47 +1,15 @@
-
+
-
-
- http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn1.2.36.tgz
- $INSTALL_DIR/bin/
- gcc -O3 -ffast-math -finline-functions -o aragorn aragorn1.2.36.c
-
-
- $INSTALL_DIR/bin
-
-
- $INSTALL_DIR/bin
-
-
-
- Compiling ARAGORN requires gcc.
+
-
-
- http://lowelab.ucsc.edu/software/tRNAscan-SE.tar.gz
- $INSTALL_DIR/bin/
- $INSTALL_DIR/lib/tRNAscan-SE/
- $INSTALL_DIR/man/
-
- cd ./tRNAscan-SE-1.3.1 && sed 's%^BINDIR = .*%BINDIR = $INSTALL_DIR/bin/%' Makefile | sed 's%^LIBDIR = .*%LIBDIR = $INSTALL_DIR/lib/tRNAscan-SE/%' | sed 's%^MANDIR = .*%MANDIR = $INSTALL_DIR/man%' > Makefile_new
- cd ./tRNAscan-SE-1.3.1 && rm Makefile && mv Makefile_new Makefile
- cd ./tRNAscan-SE-1.3.1 && make && make install
-
-
- wget ftp://selab.janelia.org/pub/software/infernal/infernal-1.0.2.tar.gz
- tar xfvz infernal-1.0.2.tar.gz
- cd infernal-1.0.2 && ./configure --prefix=$INSTALL_DIR && make && make install
-
- $INSTALL_DIR/bin
- $INSTALL_DIR/bin/
-
-
-
- Compiling and running tRNAScan-SE requires gcc a PERL environment.
+
+
+ $REPOSITORY_INSTALL_DIR
+