annotate gbk_to_gff3.py @ 1:bb6332a85aa6 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:43:04 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
1 #!/usr/bin/env python
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
2
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
3 import argparse
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
4 import sys
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
5
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
6 from Bio import SeqIO
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
7 from Bio.SeqRecord import SeqRecord
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
8 from Bio.SeqFeature import FeatureLocation
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
9 from CPT_GFFParser import gffSeqFeature, gffWrite
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
10
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
11 bottomFeatTypes = ["exon", "RBS", "CDS"]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
12
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
13
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
14 def makeGffFeat(inFeat, num, recName, identifier):
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
15 if inFeat.type == "RBS" or (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
16 inFeat.type == "regulatory"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
17 and "regulatory_class" in inFeat.qualifiers.keys()
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
18 and inFeat.qualifiers["regulatory_class"][0] == "ribosome_binding_site"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
19 ):
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
20 inFeat.type = "Shine_Dalgarno_sequence"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
21 if "codon_start" in inFeat.qualifiers.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
22 shift = int(inFeat.qualifiers["codon_start"][0]) - 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
23 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
24 shift = "."
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
25 if identifier in inFeat.qualifiers.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
26 name = inFeat.qualifiers[identifier][0] + "." + inFeat.type
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
27 if num > 0:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
28 name += "." + str(num)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
29 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
30 name = recName + "." + inFeat.type + "." + str(num)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
31
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
32 outFeat = gffSeqFeature(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
33 inFeat.location,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
34 inFeat.type,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
35 "",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
36 inFeat.strand,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
37 name,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
38 inFeat.qualifiers,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
39 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
40 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
41 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
42 shift,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
43 0,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
44 "GbkToGff",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
45 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
46 outFeat.qualifiers["ID"] = [name]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
47 return outFeat
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
48
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
49
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
50 def main(inFile, makeMRNA, makeGene, identifier, fastaFile, outFile):
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
51
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
52 ofh = sys.stdout
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
53 if outFile:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
54 ofh = outFile
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
55
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
56 outRec = []
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
57 failed = 0
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
58 for rec in SeqIO.parse(inFile, "genbank"):
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
59 recID = rec.name
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
60
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
61 if len(str(rec.seq)) > 0:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
62 seqs_pending_writes = True
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
63 outSeq = str(rec.seq)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
64 seqLen = len(outSeq)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
65
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
66 locBucket = {}
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
67 outFeats = []
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
68 topTypeDict = {}
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
69 seekingParent = []
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
70 geneNum = 0
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
71 autoGeneNum = 0
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
72 for feat in rec.features:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
73 if (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
74 identifier not in feat.qualifiers.keys()
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
75 ): # Allow metadata features and other features with no ID (Output warning?) - AJC
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
76 if feat.type in bottomFeatTypes:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
77 seekingParent.append(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
78 [feat, [], []]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
79 ) # [Feature, all parent candidates, strongest parent candidates]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
80 continue
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
81 elif feat.type not in topTypeDict.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
82 topTypeDict[feat.type] = 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
83 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
84 topTypeDict[feat.type] += 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
85 outFeats.append(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
86 makeGffFeat(feat, topTypeDict[feat.type], recID, identifier)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
87 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
88 continue
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
89 elif feat.qualifiers[identifier][0] not in locBucket.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
90 locBucket[feat.qualifiers[identifier][0]] = []
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
91 locBucket[feat.qualifiers[identifier][0]].append(feat)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
92
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
93 for locus in locBucket.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
94 minLoc = locBucket[locus][0].location.start
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
95 maxLoc = locBucket[locus][0].location.end
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
96 for feat in locBucket[locus]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
97 minLoc = min(minLoc, feat.location.start)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
98 maxLoc = max(maxLoc, feat.location.end)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
99 for x in seekingParent:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
100 if x[0].location.start >= minLoc and x[0].location.end <= maxLoc:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
101 x[1].append(locus)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
102 if x[0].location.start == minLoc or x[0].location.end == maxLoc:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
103 x[2].append(locus)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
104
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
105 for x in seekingParent: # Reformat to [Feature, Locus, Unused/Free]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
106 if len(x[2]) == 1:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
107 finList = ""
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
108 if len(x[1]) > 1:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
109 for loc in x[1]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
110 if loc != x[2][0]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
111 finList += loc + ", "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
112 finList = (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
113 str(x[0].type)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
114 + " had no locus tag set in .gbk file, automatically derived. Other, weaker candidate(s) were "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
115 + finList[0:-2]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
116 + "."
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
117 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
118 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
119 finList = (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
120 str(x[0].type)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
121 + " had no locus tag set in .gbk file, automatically derived."
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
122 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
123 if "Notes" not in x[0].qualifiers.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
124 x[0].qualifiers["Notes"] = []
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
125 x[0].qualifiers["Notes"].append(finList)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
126 x[1] = x[2][0]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
127 elif len(x[2]) > 1:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
128 candidate = x[2][0] # Arbitrarily choose first one
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
129 finList = ""
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
130 strongList = ""
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
131 for loc in x[2]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
132 if loc != candidate:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
133 finList += loc + ", "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
134 strongList += loc + ", "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
135 for loc in x[1]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
136 if loc not in x[2]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
137 finList += loc + ", "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
138 finList = (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
139 str(x[0].type)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
140 + " had no locus tag set in .gbk file, automatically derived. Other candidate(s) were "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
141 + finList[0:-2]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
142 + " (Equally strong candidate(s): "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
143 + strongList[0:-2]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
144 + ")."
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
145 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
146 if "Notes" not in x[0].qualifiers.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
147 x[0].qualifiers["Notes"] = []
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
148 x[0].qualifiers["Notes"].append(finList)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
149 x[1] = candidate
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
150 elif len(x[1]) == 1:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
151 x[1] = x[1][0]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
152 if "Notes" not in x[0].qualifiers.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
153 x[0].qualifiers["Notes"] = []
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
154 finList = (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
155 str(x[0].type)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
156 + " had no locus tag set in .gbk file, automatically derived."
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
157 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
158 x[0].qualifiers["Notes"].append(finList)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
159 elif len(x[1]) > 1:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
160 candidate = x[1][0] # Arbitrarily choose first one
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
161 finList = ""
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
162 for loc in x[1]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
163 if loc != candidate:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
164 finList += loc + ", "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
165 finList = (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
166 str(x[0].type)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
167 + " had no locus tag set in .gbk file, automatically derived. Other candidates were "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
168 + finList[0:-2]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
169 + "."
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
170 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
171 if "Notes" not in x[0].qualifiers.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
172 x[0].qualifiers["Notes"] = []
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
173 x[0].qualifiers["Notes"].append(finList)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
174 x[1] = candidate
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
175 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
176 if makeGene:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
177 sys.stderr.write(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
178 "Warning: Unable to find potential parent for feature with no "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
179 + identifier
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
180 + " of type "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
181 + str(x[0].type)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
182 + " at location ["
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
183 + str(x[0].location.start + 1)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
184 + ", "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
185 + str(x[0].location.end)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
186 + "], creating standalone gene.\n"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
187 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
188 autoGeneNum += 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
189 x[0].source = "GbkToGff"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
190 x[0].score = 0
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
191 x[0].shift = 0
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
192 if "ID" not in x[0].qualifiers.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
193 x[0].qualifiers["ID"] = [
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
194 recID + ".standalone_" + x[0].type + "." + str(autoGeneNum)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
195 ]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
196 tempName = recID + ".derived_Gene." + str(autoGeneNum)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
197 tempQuals = {
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
198 "ID": [tempName],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
199 "Notes": [
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
200 "Gene feature automatically generated by Gbk to GFF conversion"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
201 ],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
202 }
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
203 tempGene = gffSeqFeature(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
204 FeatureLocation(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
205 x[0].location.start, x[0].location.end, x[0].location.strand
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
206 ),
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
207 "gene",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
208 "",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
209 x[0].strand,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
210 tempName,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
211 tempQuals,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
212 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
213 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
214 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
215 ".",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
216 0,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
217 "GbkToGff",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
218 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
219 if makeMRNA:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
220 tempName = recID + ".derived_mRNA." + str(autoGeneNum)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
221 tempQuals = {
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
222 "ID": [tempName],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
223 "Notes": [
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
224 "mRNA feature automatically generated by Gbk to GFF conversion"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
225 ],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
226 }
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
227 tempGene.sub_features.append(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
228 gffSeqFeature(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
229 FeatureLocation(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
230 x[0].location.start,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
231 x[0].location.end,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
232 x[0].location.strand,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
233 ),
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
234 "mRNA",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
235 "",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
236 x[0].strand,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
237 tempName,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
238 tempQuals,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
239 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
240 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
241 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
242 ".",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
243 0,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
244 "GbkToGff",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
245 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
246 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
247 tempGene.sub_features[-1].sub_features.append(x[0])
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
248 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
249 tempGene.sub_features.append(x[0])
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
250
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
251 outFeats.append(tempGene)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
252 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
253 sys.stderr.write(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
254 "Warning: Unable to find potential parent for feature with no "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
255 + identifier
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
256 + " of type "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
257 + str(x[0].type)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
258 + " at location ["
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
259 + str(x[0].location.start + 1)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
260 + ", "
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
261 + str(x[0].location.end)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
262 + "].\n"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
263 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
264 if x[0].type not in topTypeDict.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
265 topTypeDict[x[0].type] = 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
266 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
267 topTypeDict[x[0].type] += 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
268 outFeats.append(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
269 makeGffFeat(x[0], topTypeDict[x[0].type], recID, identifier)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
270 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
271
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
272 for locus in locBucket.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
273 if len(locBucket[locus]) == 1: # No heirarchy to be made
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
274 outFeats.append(makeGffFeat(locBucket[locus][0], 0, recID, identifier))
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
275 continue
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
276 topFeat = None
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
277 midFeat = None
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
278 bottomFeats = []
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
279 typeDict = {}
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
280 minLoc = locBucket[locus][0].location.start
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
281 maxLoc = locBucket[locus][0].location.end
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
282 geneNum += 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
283 for feat in locBucket[locus]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
284 # If we want to make our own top-level feat?
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
285 minLoc = min(minLoc, feat.location.start)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
286 maxLoc = max(maxLoc, feat.location.end)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
287
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
288 # Gene->mRNA->CDS included as example, to add other feature-heirarchys in the appropriate slot
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
289 if feat.type in ["gene"]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
290 if not topFeat:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
291 topFeat = feat
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
292 # Else handle multiple top features
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
293 elif feat.type in ["mRNA", "tRNA", "rRNA"]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
294 if not midFeat:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
295 midFeat = feat
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
296 # Else handle multiple mid feats (May need another elif type-in-list statement if we actually expect a list of mid feats)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
297 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
298 if feat.type not in typeDict.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
299 typeDict[feat.type] = 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
300 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
301 typeDict[feat.type] += 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
302 bottomFeats.append(feat)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
303
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
304 for x in seekingParent:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
305 if type(x[1]) != "list" and locus == x[1]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
306 x[0].qualifiers[identifier] = [locus]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
307 bottomFeats.append(x[0])
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
308 if x[0].type not in typeDict.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
309 typeDict[x[0].type] = 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
310 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
311 typeDict[x[0].type] += 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
312
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
313 # if not topFeat: # Make our own top-level feature based off minLoc, maxLoc bounds
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
314
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
315 for (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
316 x
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
317 ) in (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
318 typeDict.keys()
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
319 ): # If only 1, set it to 0 so we don't append a number to the name
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
320 if (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
321 typeDict[x] == 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
322 ): # Else, set to 1 so that we count up as we encounter the features
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
323 typeDict[x] = 0
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
324 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
325 typeDict[x] = 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
326
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
327 if not topFeat:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
328 if makeGene:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
329 if midFeat:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
330 possibleStrand = midFeat.strand
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
331 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
332 possibleStrand = bottomFeats[0].strand
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
333 tempName = recID + ".gene." + str(geneNum)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
334 tempQuals = {
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
335 identifier: [locus],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
336 "ID": [tempName],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
337 "Notes": [
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
338 "Gene feature automatically generated by Gbk to GFF conversion"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
339 ],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
340 }
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
341 topFeat = gffSeqFeature(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
342 FeatureLocation(minLoc, maxLoc, possibleStrand),
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
343 "gene",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
344 "",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
345 possibleStrand,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
346 tempName,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
347 tempQuals,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
348 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
349 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
350 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
351 ".",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
352 0,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
353 "GbkToGff",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
354 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
355 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
356 sys.stderr.write(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
357 "Unable to create a feature heirarchy at location [%d, %d] with features: \n"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
358 % (minLoc, maxLoc)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
359 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
360 for x in locBucket[locus]:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
361 sys.stderr.write(str(x))
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
362 sys.stderr.write("\n")
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
363 failed = 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
364 continue
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
365
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
366 outFeats.append(makeGffFeat(topFeat, 0, recID, identifier))
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
367 if not midFeat and topFeat.type == "gene" and makeMRNA:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
368 if identifier in topFeat.qualifiers.keys():
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
369 tempName = topFeat.qualifiers[identifier][0] + ".mRNA"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
370 tempQuals = {
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
371 identifier: topFeat.qualifiers[identifier],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
372 "ID": [tempName],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
373 "Notes": [
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
374 "mRNA feature automatically generated by Gbk to GFF conversion"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
375 ],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
376 }
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
377 else:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
378 tempName = outFeats[-1].ID + ".mRNA"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
379 tempQuals = {
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
380 identifier: topFeat.qualifiers[identifier],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
381 "ID": [tempName],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
382 "Notes": [
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
383 "mRNA feature automatically generated by Gbk to GFF conversion"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
384 ],
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
385 }
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
386 midFeat = gffSeqFeature(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
387 FeatureLocation(minLoc, maxLoc, topFeat.strand),
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
388 "mRNA",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
389 "",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
390 topFeat.strand,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
391 tempName,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
392 tempQuals,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
393 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
394 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
395 None,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
396 ".",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
397 0,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
398 "GbkToGff",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
399 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
400
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
401 if (
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
402 midFeat
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
403 ): # Again, need a new if statement if we want to handle multiple mid-tier features
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
404 outFeats[-1].sub_features.append(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
405 makeGffFeat(midFeat, 0, recID, identifier)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
406 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
407 outFeats[-1].sub_features[-1].qualifiers["Parent"] = [outFeats[-1].id]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
408 for x in bottomFeats:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
409 typeDict[x.type] += 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
410 outFeats[-1].sub_features[-1].sub_features.append(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
411 makeGffFeat(x, typeDict[x.type], recID, identifier)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
412 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
413 outFeats[-1].sub_features[-1].sub_features[-1].qualifiers[
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
414 "Parent"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
415 ] = [outFeats[-1].sub_features[-1].id]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
416 else: # No midFeat, append bottom feats directly to top feats
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
417 for x in bottomFeats:
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
418 typeDict[x.type] += 1
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
419 outFeats[-1].sub_features.append(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
420 makeGffFeat(x, typeDict[x.type], recID, identifier)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
421 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
422 outFeats[-1].sub_features[-1].qualifiers["Parent"] = [
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
423 outFeats[-1].id
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
424 ]
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
425
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
426 outRec.append(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
427 SeqRecord(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
428 rec.seq,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
429 recID,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
430 rec.name,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
431 rec.description,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
432 rec.dbxrefs,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
433 sorted(outFeats, key=lambda x: x.location.start),
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
434 rec.annotations,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
435 rec.letter_annotations,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
436 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
437 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
438 SeqIO.write([outRec[-1]], fastaFile, "fasta")
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
439 gffWrite(outRec, ofh)
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
440 exit(failed) # 0 if all features handled, 1 if unable to handle some
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
441
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
442
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
443 if __name__ == "__main__":
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
444 parser = argparse.ArgumentParser(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
445 description="Biopython solution to Gbk to GFF conversion"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
446 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
447
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
448 parser.add_argument(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
449 "inFile", type=argparse.FileType("r"), help="Path to an input GBK file"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
450 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
451 parser.add_argument(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
452 "--makeMRNA",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
453 action="store_true",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
454 required=False,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
455 help="Automatically create mRNA features",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
456 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
457 parser.add_argument(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
458 "--makeGene",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
459 action="store_true",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
460 required=False,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
461 help="Automatically create missing Gene features",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
462 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
463 parser.add_argument(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
464 "--identifier",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
465 type=str,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
466 default="locus_tag",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
467 required=False,
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
468 help="Qualifier to derive ID property from",
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
469 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
470 parser.add_argument(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
471 "--fastaFile", type=argparse.FileType("w"), help="Fasta output for sequences"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
472 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
473 parser.add_argument(
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
474 "--outFile", type=argparse.FileType("w"), help="GFF feature output"
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
475 )
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
476 args = parser.parse_args()
bb6332a85aa6 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
477 main(**vars(args))