annotate cpt_related_genome_prot/relatedness_prot.py @ 0:ebcc87a27f9c draft default tip

Uploaded
author cpt
date Fri, 10 Jun 2022 08:46:28 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
1 #!/usr/bin/env python
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
2 import sys
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
3 import argparse
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
4 import json
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
5 import logging
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
6 from Bio.Blast import NCBIXML
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
7
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
8 logging.basicConfig(level=logging.DEBUG)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
9 log = logging.getLogger()
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
10
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
11 def parse_blast(blast, isXML = False):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
12 res = []
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
13 finalRes = []
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
14 if isXML:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
15 for iter_num, blast_record in enumerate(NCBIXML.parse(blast), 1):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
16 for alignment in blast_record.alignments:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
17 tempID = alignment.hit_id[alignment.hit_id.find("gb|") + 3:]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
18 tempID = tempID[:tempID.find("|")]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
19 tempDesc = alignment.title
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
20 while tempDesc.find("|") >= 0:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
21 tempDesc = tempDesc[tempDesc.find("|") + 1:]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
22 tempDesc = tempDesc.strip()
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
23 tempID = tempID.strip()
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
24 #for hsp in alignment.hsps:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
25 line = [str(blast_record.query)[:str(blast_record.query).find("[")].strip()]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
26 line.append(alignment.hit_id)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
27 line.append(tempDesc)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
28 line.append(alignment.accession)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
29 res.append(line)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
30 blast.seek(0)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
31 resInd = -1
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
32 taxLine = blast.readline()
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
33 while taxLine:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
34 if "<Hit>" in taxLine:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
35 resInd += 1
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
36 taxSlice = ""
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
37 elif "<taxid>" in taxLine:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
38 taxSlice = taxLine[taxLine.find("<taxid>") + 7:taxLine.find("</taxid>")]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
39 finalRes.append(res[resInd])
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
40 finalRes[-1].append(taxSlice)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
41 taxLine = blast.readline()
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
42 return finalRes
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
43 else:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
44 for line in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
45 finalRes.append(line.strip("\n").split("\t"))
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
46 return finalRes
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
47
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
48 def with_dice(blast):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
49 for data in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
50 dice = 2 * int(data[14]) / (float(data[22]) + float(data[23]))
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
51 yield data + [dice]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
52
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
53
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
54 def filter_dice(blast, threshold=0.5):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
55 for data in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
56 if data[-1] > threshold:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
57 yield data
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
58
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
59
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
60 def split_identifiers_nucl(_, ident):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
61 if "<>" in ident:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
62 idents = ident.split("<>")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
63 else:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
64 idents = [ident]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
65 return idents
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
66
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
67
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
68 def split_identifiers_prot(_, ident):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
69 if "<>" in ident:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
70 idents = ident.split("<>")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
71 else:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
72 idents = [ident]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
73 return [
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
74 x[x.index("[") + 1 : x.rindex("]")]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
75 for x in idents
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
76 # MULTISPECIES: recombination-associated protein RdgC [Enterobacteriaceae]<>RecName: Full=Recombination-associated protein RdgC<>putative exonuclease, RdgC [Enterobacter sp. 638]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
77 if "[" in x and "]" in x
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
78 ]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
79
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
80
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
81 def split_identifiers_phage(par, ident):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
82 par = par.replace("lcl|", "")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
83 par = par[0 : par.index("_prot_")]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
84 return [par]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
85
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
86
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
87 def important_only(blast, split_identifiers):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
88 for data in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
89 yield [
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
90 data[0], # 01 Query Seq-id (ID of your sequence)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
91 data[1], # 13 All subject Seq-id(s), separated by a ';'
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
92 split_identifiers(
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
93 data[1], data[2]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
94 ), # 25 All subject title(s), separated by a '<>'
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
95 data[3].split(";"), # Extra: All Subject Accessions
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
96 data[4].split(";"), # Extra: All TaxIDs
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
97 ]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
98
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
99
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
100 def deform_scores(blast):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
101 for data in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
102 for org in data[2]:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
103 yield [data[0], data[1], org, data[3], data[4]]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
104
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
105
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
106 def expand_fields(blast):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
107 for data in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
108 for x in range(0, len(data[4])):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
109 yield [data[0], data[1], data[2][x], data[3], int(data[4][x])]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
110
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
111 def expand_taxIDs(blast, taxFilter):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
112 for data in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
113 # if(len(data[4]) > 0):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
114 # print(data[0])
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
115 for ID in data[4]:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
116 if ID != "N/A":
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
117 filterOut = False
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
118 for tax in taxFilter:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
119 if str(ID).strip() == tax:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
120 filterOut = True
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
121 if not filterOut:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
122 yield [data[0], data[1], data[2], data[3], int(ID)]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
123
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
124
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
125 def expand_titles(blast):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
126 for data in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
127 for title in data[2]:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
128 yield [data[0], data[1], title, data[3], data[4]]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
129
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
130
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
131 def filter_phage(blast, phageTaxLookup, phageSciNames):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
132 for data in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
133 for x in range(0, len(phageTaxLookup)):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
134 if (data[4]) == phageTaxLookup[x]:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
135 yield [data[0], data[1], phageSciNames[x], data[3], data[4]]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
136 break
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
137
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
138
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
139 def remove_dupes(data):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
140 has_seen = {}
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
141 for row in data:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
142 # qseqid, sseqid
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
143 key = (row[0], row[4])
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
144 # If we've seen the key before, we can exit
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
145 if key in has_seen:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
146 continue
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
147
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
148 # Otherwise, continue on
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
149 has_seen[key] = True
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
150 # Pretty simple
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
151 yield row
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
152
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
153 def scoreMap(blast):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
154 c = {}
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
155 m = {}
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
156 for (qseq, subID, subTitle, access, ID) in blast:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
157 if (str(subTitle), ID) not in c:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
158 m[(str(subTitle), ID)] = access
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
159 c[(str(subTitle), ID)] = 0
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
160
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
161 c[(str(subTitle), ID)] += 1
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
162 return c, m
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
163
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
164
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
165 if __name__ == "__main__":
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
166 parser = argparse.ArgumentParser(description="Top related genomes")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
167 parser.add_argument(
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
168 "blast", type=argparse.FileType("r"), help="Blast 25 Column Results"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
169 )
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
170 parser.add_argument("phagedb", type=argparse.FileType("r"))
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
171 parser.add_argument("--access", action="store_true")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
172 parser.add_argument("--protein", action="store_true")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
173 parser.add_argument("--canonical", action="store_true")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
174 parser.add_argument("--noFilter", action="store_true")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
175 #parser.add_argument("--title", action="store_true") # Add when ready to update XML after semester
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
176 parser.add_argument("--hits", type=int, default=5)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
177 parser.add_argument("--xmlMode", action="store_true")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
178 parser.add_argument("--taxFilter", type=str)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
179
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
180 args = parser.parse_args()
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
181
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
182 phageDb = args.phagedb
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
183 phageTaxLookup = []
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
184 sciName = []
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
185 line = phageDb.readline()
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
186
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
187 taxList = []
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
188 if args.taxFilter and args.taxFilter != "" :
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
189 args.taxFilter = args.taxFilter.split(" ")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
190 for ind in args.taxFilter:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
191 taxList.append(ind.strip())
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
192
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
193 while line:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
194 line = line.split("\t")
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
195 phageTaxLookup.append(int(line[0]))
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
196 line[1] = line[1].strip()
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
197 if (line[1] == ""):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
198 line[1] = "Novel Genome"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
199 sciName.append(line[1])
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
200 line = phageDb.readline()
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
201
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
202 if args.protein:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
203 splitId = split_identifiers_prot
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
204 # phageNameLookup = {k['source'].rstrip('.'): k['id'] for k in phageDb}
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
205 elif args.canonical:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
206 splitId = split_identifiers_phage
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
207 # phageNameLookup = {k['source'].rstrip('.'): k['id'] for k in phageDb}
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
208 else:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
209 splitId = split_identifiers_nucl
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
210 # phageNameLookup = {k['desc'].rstrip('.'): k['id'] for k in phageDb}
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
211
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
212 data = parse_blast(args.blast, args.xmlMode)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
213 # data = with_dice(data)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
214 # data = filter_dice(data, threshold=0.0)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
215 data = important_only(data, splitId)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
216
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
217 data = expand_taxIDs(data, taxList)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
218 data = remove_dupes(data)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
219 if not args.noFilter:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
220 data = filter_phage(data, phageTaxLookup, sciName)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
221 listify = []
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
222 for x in data:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
223 listify.append(x)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
224 #listify = greatest_taxID(listify)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
225
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
226 count_label = "Similar Unique Proteins"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
227
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
228 counts, accessions = scoreMap(listify)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
229
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
230 nameRec = listify[0][0]
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
231 sys.stdout.write(
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
232 "Top %d matches for BLASTp results of %s\n"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
233 % (args.hits, nameRec)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
234 )
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
235 header = "# TaxID\t"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
236 #if args.title:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
237 header += "Name\t"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
238 if args.access:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
239 header += "Accessions\t"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
240 header += "Similar Unique Proteins\n"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
241 sys.stdout.write(header)
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
242
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
243 for idx, ((name, ID), num) in enumerate(
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
244 sorted(counts.items(), key=lambda item: -item[1])
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
245 ):
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
246 if idx > args.hits - 1:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
247 break
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
248 line = str(ID) + "\t"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
249 #if args.title:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
250 line += str(name) + "\t"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
251 if args.access:
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
252 line += str(accessions[(name, ID)][0]) + "\t"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
253 line += str(num) + "\n"
ebcc87a27f9c Uploaded
cpt
parents:
diff changeset
254 sys.stdout.write(line)