# HG changeset patch
# User yating-l
# Date 1492033067 14400
# Node ID 57299471d6c1ef7645f8ddea25151cedfdd8dc86
planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
diff -r 000000000000 -r 57299471d6c1 Group.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Group.py Wed Apr 12 17:37:47 2017 -0400
@@ -0,0 +1,59 @@
+from operator import itemgetter
+
+# Input: A group: a list that contains lines belonging to the same gene
+class Group:
+ # Modify "type" column and "attributes" colunm, initialize id, gene, source, stream
+ def __init__(self, group):
+ self.group = group
+ self.id = str(group[0][0])
+ self.source = str(group[0][1])
+ self.stream = str(group[0][6])
+ self.gene = str(group[0][8])
+ for x in range(0, len(group)):
+ self.group[x][2] = "CDS"
+ self.group[x][8] = "Parent=mRNA_" + self.gene
+ self.group[x][3] = int(self.group[x][3])
+ self.group[x][4] = int(self.group[x][4])
+
+ # Order the group elements accoriding to Stream, +: ascanding order, -: descanding order
+ def order(self):
+ self.num = len(self.group)
+ if self.stream == "+":
+ self.group = sorted(self.group, key=itemgetter(3))
+ self.min_item = self.group[0][3]
+ self.max_item = self.group[self.num-1][4]
+ elif self.stream == "-":
+ self.group = sorted(self.group, key=itemgetter(3), reverse=True)
+ self.min_item = self.group[self.num-1][3]
+ self.max_item = self.group[0][4]
+ else:
+ print("Stream in invalid!\n")
+
+ def phaseCalculator(self, i, donor = 0):
+ if i >= self.num:
+ pass
+ else:
+ self.type = self.group[i][2]
+ self.size = self.group[i][4] - self.group[i][3] + 1
+ if self.num == 1:
+ if self.type == "Eterm":
+ self.group[i][7] = str(self.size % 3)
+ else:
+ self.group[i][7] = "0"
+ elif self.num > 1 and i < self.num:
+ accept = (3 - donor) % 3
+ self.group[i][7] = str(accept)
+ donor = (self.size - accept) % 3
+ i = i + 1
+ self.phaseCalculator(i, donor)
+
+
+ def writer(self, gff3):
+ self.order()
+ self.phaseCalculator(0)
+ gff3.write(self.id + "\t" + self.source + "\tgene\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=" + self.gene + "\n")
+ gff3.write(self.id + "\t" + self.source + "\tmRNA\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=mRNA_" + self.gene + ";Parent=" + self.gene + "\n")
+ for x in range(0, len(self.group)):
+ self.group[x][3] = str(self.group[x][3])
+ self.group[x][4] = str(self.group[x][4])
+ gff3.write("\t".join(self.group[x]) + "\n")
\ No newline at end of file
diff -r 000000000000 -r 57299471d6c1 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Wed Apr 12 17:37:47 2017 -0400
@@ -0,0 +1,2 @@
+# SNAP
+Galaxy wrapper for SNAP gene prediction tool
diff -r 000000000000 -r 57299471d6c1 gff2Togff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff2Togff3.py Wed Apr 12 17:37:47 2017 -0400
@@ -0,0 +1,59 @@
+import argparse
+import sys
+import fileinput
+from Group import Group
+
+def main():
+ parser = argparse.ArgumentParser(description='Get a gff file and the output gff3 file')
+ parser.add_argument('--input', help='input gff file')
+ parser.add_argument('--output', help='output gff3 file', required=True)
+ args = parser.parse_args()
+ input = args.input
+ output = args.output
+ if not sys.stdin.isatty():
+ c = Convertor(sys.stdin, output)
+ else:
+ c = Convertor(input, output)
+ c.convert()
+
+class Convertor:
+ def __init__(self, input, output):
+ if type(input) is str:
+ with open(input) as self.f:
+ self.li = [line.rstrip().split("\t") for line in self.f]
+ else:
+ self.li = [line.rstrip().split("\t") for line in input]
+ self.gff3 = open(output, "w")
+ self.gff3.write("##gff-version 3\n")
+
+ def convert(self):
+ index = 0
+ while index in range(0, len(self.li)):
+ index = self.groupAsgene(index)
+ self.gff3.close()
+
+
+ def groupAsgene(self, start = 0):
+ gene = self.li[start][8]
+ index = len(self.li)
+ for i in range(start+1, len(self.li)):
+ line = self.li[i]
+ if gene != line[8]:
+ index = i
+ break
+ if index >= len(self.li):
+ group = self.li[start:len(self.li)]
+ else:
+ group = self.li[start:index]
+ g = Group(group)
+ g.writer(self.gff3)
+ return index
+
+
+
+
+if __name__ == "__main__":
+ main()
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 57299471d6c1 readme.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst Wed Apr 12 17:37:47 2017 -0400
@@ -0,0 +1,46 @@
+Galaxy wrapper for SNAP
+========================
+
+This wrapper is copyright 2016-2017 by Yating Liu
+
+This is a wrapper for the gene prediction tool SNAP. SNAP is a general purpose gene finding program suitable for both eukaryotic and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid
+Parser.
+
+Reference
+----------------------
+
+ Korf I. Gene finding in novel Genomes. BMC Bioinformatics 2004, 5:59
+
+Installation
+-----------------------
+
+To install SNAP, please download SNAP from
+
+http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz
+
+and follow the installation instractions. The software is routinely compiled and tested on Mac OS X. It should compile
+fine on any Linux/Unix type operating systems.
+The default compiler is gcc. If you have gcc installed, the easiest is to just compile as:
+```
+ make
+```
+
+The ZOE environment variable is used by SNAP to find the HMM files. Set this
+to the directory containing this file. For example, if you unpackaged the tar-ball in /usr/local/snap, set the ZOE environment variable to /usr/local/snap
+
+```
+ setenv ZOE /usr/local/snap # csh, tcsh, etc
+```
+ or
+```
+ export ZOE=/usr/local/snap # sh, bash, etc
+```
+To install the wrapper copy the snap folder in the galaxy tools and modify the $GALAXY_ROOT/config/tool_conf.xml file to make the tool available to Galaxy. For example:
+```
+
+```
+
+
+
+
+
diff -r 000000000000 -r 57299471d6c1 snap.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snap.xml Wed Apr 12 17:37:47 2017 -0400
@@ -0,0 +1,170 @@
+
+
+ snap
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ proteins == "-aa"
+
+
+ transcripts == "-tx"
+
+
+
+
+
+
+
+
+
+ [options]
+
+HMM file:
+
+ The most convenient way to specify the HMM file is by name. This requires
+ that the ZOE environment variable is set. In this case, snap will look
+ for the HMM file in $ZOE/HMM. You may also specify the HMM file by an
+ explicit path. The following are equivalent if $ZOE is in /usr/local:
+
+ snap C.elegans.hmm ...
+ snap /usr/local/Zoe/HMM/C.elegans.hmm ...
+ snap worm ... # there are a few convenient aliases in $ZOE/HMM
+
+FASTA file:
+
+ If you have several sequences to analyze, it is more efficient to run
+ snap on a concatenated FASTA file rather than separate runs on single
+ sequence files. The seqeuence may be in a compressed format
+
+ If sequences have been masked with lowercase letters, use -lcmask to
+ prevent exons from appearing in masked DNA.
+
+Output:
+
+ Annotation is reported to stdout in a non-standard format (ZFF). You can
+ change to GFF or ACEDB with the -gff or -ace options. Proteins and
+ transcripts are reported to FASTA files with the -aa and -tx options.
+
+External definitions:
+
+ SNAP allows you to adjust the score of any sequence model at any point
+ in a sequence. This behavior is invoked by giving a ZFF file to SNAP:
+
+ snap -xdef
+
+ Each feature description uses the 'group' field to issue a command:
+
+ SET set the score
+ ADJ adjust the score up or down
+ OK set non-cannonical scores
+
+ >FOO
+ Acceptor 120 120 + +50 . . . SET (sets an Acceptor to 50)
+ Donor 212 212 + -20 . . . ADJ (lowers a Donor by -20)
+ Inter 338 579 + -2 . . . ADJ (lowers Inter by -2 in a range)
+ Coding 440 512 - +3 . . . ADJ (raises Coding by +3 in a range)
+ Donor 625 638 + -5 . . . OK (sets range of odd Donors to -5)
+
+If the output has scrolled off your screen, try 'snap -help | more'
+
+ ]]>
+
+
+@misc{renameTODO,
+ author = {LastTODO, FirstTODO},
+ year = {TODO},
+ title = {TODO},
+ url = {http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz},
+}
+
+
diff -r 000000000000 -r 57299471d6c1 test-data/thale.dna.gz
Binary file test-data/thale.dna.gz has changed
diff -r 000000000000 -r 57299471d6c1 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Apr 12 17:37:47 2017 -0400
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+ http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz
+ make
+
+ .
+ $INSTALL_DIR
+
+
+ $INSTALL_DIR
+ $INSTALL_DIR
+
+
+
+
+ SNAP is a general purpose gene finding program suitable for both eukaryotic
+ and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid
+ Parser.
+
+
+