comparison unique_mlg_id.py @ 0:c39c543605a2 draft

Uploaded
author greg
date Wed, 02 Jun 2021 19:18:10 +0000
parents
children 6c10123cb4a7
comparison
equal deleted inserted replaced
-1:000000000000 0:c39c543605a2
1 #!/usr/bin/env python
2 import argparse
3 import sys
4
5 import psycopg2
6
7 from sqlalchemy import MetaData, create_engine
8 from sqlalchemy.engine.url import make_url
9
10
11 class UniqueMGLIDGenerator(object):
12 def __init__(self):
13 self.args = None
14 self.conn = None
15 self.parse_args()
16 self.outfh = open(self.args.output, "w")
17 self.connect_db()
18 self.engine = create_engine(self.args.database_connection_string)
19 self.metadata = MetaData(self.engine)
20
21 def parse_args(self):
22 parser = argparse.ArgumentParser()
23 parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'),
24 parser.add_argument('--output', dest='output', help='Output dataset'),
25 self.args = parser.parse_args()
26
27 def connect_db(self):
28 url = make_url(self.args.database_connection_string)
29 args = url.translate_connect_args(username='user')
30 args.update(url.query)
31 assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.'
32 self.conn = psycopg2.connect(**args)
33
34 def run(self):
35 cmd = """
36 SELECT DISTINCT coral_mlg_rep_sample_id
37 FROM genotype
38 WHERE coral_mlg_rep_sample_id is not NULL
39 ORDER BY coral_mlg_rep_sample_id;
40 """
41 cur = self.conn.cursor()
42 cur.execute(cmd)
43 rows = cur.fetchall()
44 for tup in rows:
45 self.outfh.write("%s\n" % tup[0])
46 self.outfh.close()
47
48 def shutdown(self):
49 self.conn.close()
50
51 def stop_err(self, msg):
52 sys.stderr.write(msg)
53 self.outfh.flush()
54 self.outfh.close()
55 sys.exit(1)
56
57
58 if __name__ == '__main__':
59 umlgidg = UniqueMGLIDGenerator()
60 umlgidg.run()
61 umlgidg.shutdown()