Mercurial > repos > yating-l > snap
diff Group.py @ 0:57299471d6c1 draft default tip
planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
author | yating-l |
---|---|
date | Wed, 12 Apr 2017 17:37:47 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Group.py Wed Apr 12 17:37:47 2017 -0400 @@ -0,0 +1,59 @@ +from operator import itemgetter + +# Input: A group: a list that contains lines belonging to the same gene +class Group: + # Modify "type" column and "attributes" colunm, initialize id, gene, source, stream + def __init__(self, group): + self.group = group + self.id = str(group[0][0]) + self.source = str(group[0][1]) + self.stream = str(group[0][6]) + self.gene = str(group[0][8]) + for x in range(0, len(group)): + self.group[x][2] = "CDS" + self.group[x][8] = "Parent=mRNA_" + self.gene + self.group[x][3] = int(self.group[x][3]) + self.group[x][4] = int(self.group[x][4]) + + # Order the group elements accoriding to Stream, +: ascanding order, -: descanding order + def order(self): + self.num = len(self.group) + if self.stream == "+": + self.group = sorted(self.group, key=itemgetter(3)) + self.min_item = self.group[0][3] + self.max_item = self.group[self.num-1][4] + elif self.stream == "-": + self.group = sorted(self.group, key=itemgetter(3), reverse=True) + self.min_item = self.group[self.num-1][3] + self.max_item = self.group[0][4] + else: + print("Stream in invalid!\n") + + def phaseCalculator(self, i, donor = 0): + if i >= self.num: + pass + else: + self.type = self.group[i][2] + self.size = self.group[i][4] - self.group[i][3] + 1 + if self.num == 1: + if self.type == "Eterm": + self.group[i][7] = str(self.size % 3) + else: + self.group[i][7] = "0" + elif self.num > 1 and i < self.num: + accept = (3 - donor) % 3 + self.group[i][7] = str(accept) + donor = (self.size - accept) % 3 + i = i + 1 + self.phaseCalculator(i, donor) + + + def writer(self, gff3): + self.order() + self.phaseCalculator(0) + gff3.write(self.id + "\t" + self.source + "\tgene\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=" + self.gene + "\n") + gff3.write(self.id + "\t" + self.source + "\tmRNA\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=mRNA_" + self.gene + ";Parent=" + self.gene + "\n") + for x in range(0, len(self.group)): + self.group[x][3] = str(self.group[x][3]) + self.group[x][4] = str(self.group[x][4]) + gff3.write("\t".join(self.group[x]) + "\n") \ No newline at end of file