Mercurial > repos > cpt > cpt_gbk_to_5col
comparison gbk_to_five_col.py @ 1:1bdd481d5c25 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:42:57 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:66143811fe8a | 1:1bdd481d5c25 |
---|---|
1 #!/usr/bin/env python | |
2 import BIO_FIX_TOPO # NOQA | |
3 import argparse | |
4 import logging | |
5 from Bio import SeqIO | |
6 | |
7 logging.basicConfig(level=logging.INFO) | |
8 log = logging.getLogger() | |
9 | |
10 | |
11 # Read in Genbank file and parse features | |
12 # Output features into Five Column format | |
13 | |
14 """ | |
15 >Feature SeqID | |
16 Line 1 | |
17 Column 1: Start location (first nucleotide) of a feature | |
18 Column 2: Stop location (last nucleotide) of a feature | |
19 Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon') | |
20 Line2: | |
21 Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note') | |
22 Column 5: Qualifier value | |
23 | |
24 Repeat for each feature in a seq | |
25 Repeat Line 2 for each qualifier in a feature | |
26 """ | |
27 | |
28 | |
29 def gbk_to_5col(genbank): | |
30 """Converts genbank to BankIt five column format""" | |
31 for record in SeqIO.parse(genbank, "genbank"): | |
32 print(">Feature %s" % record.id) | |
33 for feature in record.features: | |
34 if feature.type == "source": | |
35 continue | |
36 else: | |
37 for index, part in enumerate(feature.location.parts): | |
38 if part.strand > 0: | |
39 start = int(part.start) + 1 | |
40 end = int(part.end) | |
41 else: | |
42 start = int(part.end) | |
43 end = int(part.start) + 1 | |
44 if index == 0: | |
45 name = feature.type | |
46 print("%d\t%d\t%s" % (start, end, name)) | |
47 else: | |
48 print("%d\t%d" % (start, end)) | |
49 for (qualifier, values) in feature.qualifiers.items(): | |
50 for value in values: | |
51 print("\t\t\t%s\t%s" % (qualifier, value)) | |
52 | |
53 | |
54 if __name__ == "__main__": | |
55 parser = argparse.ArgumentParser( | |
56 description="Convert a Genbank file into five column format" | |
57 ) | |
58 parser.add_argument("genbank", type=argparse.FileType("r"), help="Genbank file") | |
59 | |
60 args = vars(parser.parse_args()) | |
61 gbk_to_5col(**args) |