comparison tbl2gff3.py @ 0:965674d88d34 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tbl2gff3 commit 725ff8b6a4f01bf532052ce46400f529551a317d"
author iuc
date Tue, 07 Jul 2020 04:20:40 -0400
parents
children 4a7f4b0cc0a3
comparison
equal deleted inserted replaced
-1:000000000000 0:965674d88d34
1 #!/usr/bin/env python
2 import argparse
3 import csv
4 import sys
5
6 from BCBio import GFF
7 from Bio.Seq import Seq
8 from Bio.SeqFeature import FeatureLocation, SeqFeature
9 from Bio.SeqRecord import SeqRecord
10
11
12 def c(row, v, default=None):
13 if v is None:
14 return default
15
16 try:
17 _ = int(v)
18 return row[int(v) - 1]
19 except ValueError:
20 return v
21
22
23 def tbl2gff3(
24 table,
25 rid,
26 begin,
27 end,
28 source=None,
29 type=None,
30 score=None,
31 frame=None,
32 a=None,
33 strand_column=None,
34 strand_value=None,
35 ):
36
37 records = {}
38
39 for row in csv.reader(table, delimiter="\t"):
40 # print(', '.join(row))
41
42 # if we haven't seen this record before, populate it.
43 recid = c(row, rid)
44 if recid not in records:
45 records[recid] = SeqRecord(Seq("ACTG"), id=recid)
46
47 r = records[recid]
48 q = {}
49 if c(row, score) is not None:
50 q["score"] = float(c(row, score))
51
52 q["source"] = c(row, source, "tbl2gff3")
53
54 _str = None
55 if strand_column is not None:
56 _str = int(c(row, strand_column))
57 elif strand_value is not None:
58 _str = int(strand_value)
59
60 for x in a:
61 k, v = x.split(":", 1)
62 _v = c(row, v)
63 if k in q:
64 q[k].append(_v)
65 else:
66 q[k] = [_v]
67
68 f = SeqFeature(
69 FeatureLocation(int(c(row, begin)), int(c(row, end))),
70 type=c(row, type),
71 strand=_str,
72 qualifiers=q,
73 )
74 r.features.append(f)
75
76 return records
77
78
79 if __name__ == "__main__":
80 parser = argparse.ArgumentParser(description="Convert tables to gff3", epilog="")
81 parser.add_argument("table", type=argparse.FileType("r"), help="Tabular Input")
82 parser.add_argument("rid", help="id column")
83 parser.add_argument("begin", help="begin column")
84 parser.add_argument("end", help="end column")
85 parser.add_argument("--type", help="feature type column")
86 parser.add_argument("--score", help="score column")
87 parser.add_argument("--source", help="source column")
88 parser.add_argument("--strand_column", help="strand column")
89 parser.add_argument("--strand_value", help="strand value")
90 # parser.add_argument('--frame', help='frame column')
91 parser.add_argument("-a", action="append", help="attribute column (-a k:v)")
92 args = parser.parse_args()
93
94 for rid, rec in tbl2gff3(**vars(args)).items():
95 GFF.write([rec], sys.stdout)