annotate unique_mlg_id.py @ 1:6c10123cb4a7 draft default tip

Uploaded
author greg
date Wed, 11 Dec 2024 16:28:27 +0000
parents c39c543605a2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c39c543605a2 Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env python
c39c543605a2 Uploaded
greg
parents:
diff changeset
2 import argparse
c39c543605a2 Uploaded
greg
parents:
diff changeset
3 import sys
c39c543605a2 Uploaded
greg
parents:
diff changeset
4
c39c543605a2 Uploaded
greg
parents:
diff changeset
5 import psycopg2
c39c543605a2 Uploaded
greg
parents:
diff changeset
6
1
6c10123cb4a7 Uploaded
greg
parents: 0
diff changeset
7 from sqlalchemy import create_engine
0
c39c543605a2 Uploaded
greg
parents:
diff changeset
8 from sqlalchemy.engine.url import make_url
c39c543605a2 Uploaded
greg
parents:
diff changeset
9
c39c543605a2 Uploaded
greg
parents:
diff changeset
10
c39c543605a2 Uploaded
greg
parents:
diff changeset
11 class UniqueMGLIDGenerator(object):
c39c543605a2 Uploaded
greg
parents:
diff changeset
12 def __init__(self):
c39c543605a2 Uploaded
greg
parents:
diff changeset
13 self.args = None
c39c543605a2 Uploaded
greg
parents:
diff changeset
14 self.conn = None
c39c543605a2 Uploaded
greg
parents:
diff changeset
15 self.parse_args()
c39c543605a2 Uploaded
greg
parents:
diff changeset
16 self.outfh = open(self.args.output, "w")
c39c543605a2 Uploaded
greg
parents:
diff changeset
17 self.connect_db()
c39c543605a2 Uploaded
greg
parents:
diff changeset
18 self.engine = create_engine(self.args.database_connection_string)
c39c543605a2 Uploaded
greg
parents:
diff changeset
19
c39c543605a2 Uploaded
greg
parents:
diff changeset
20 def parse_args(self):
c39c543605a2 Uploaded
greg
parents:
diff changeset
21 parser = argparse.ArgumentParser()
c39c543605a2 Uploaded
greg
parents:
diff changeset
22 parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'),
c39c543605a2 Uploaded
greg
parents:
diff changeset
23 parser.add_argument('--output', dest='output', help='Output dataset'),
c39c543605a2 Uploaded
greg
parents:
diff changeset
24 self.args = parser.parse_args()
c39c543605a2 Uploaded
greg
parents:
diff changeset
25
c39c543605a2 Uploaded
greg
parents:
diff changeset
26 def connect_db(self):
c39c543605a2 Uploaded
greg
parents:
diff changeset
27 url = make_url(self.args.database_connection_string)
c39c543605a2 Uploaded
greg
parents:
diff changeset
28 args = url.translate_connect_args(username='user')
c39c543605a2 Uploaded
greg
parents:
diff changeset
29 args.update(url.query)
c39c543605a2 Uploaded
greg
parents:
diff changeset
30 assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.'
c39c543605a2 Uploaded
greg
parents:
diff changeset
31 self.conn = psycopg2.connect(**args)
c39c543605a2 Uploaded
greg
parents:
diff changeset
32
c39c543605a2 Uploaded
greg
parents:
diff changeset
33 def run(self):
c39c543605a2 Uploaded
greg
parents:
diff changeset
34 cmd = """
c39c543605a2 Uploaded
greg
parents:
diff changeset
35 SELECT DISTINCT coral_mlg_rep_sample_id
c39c543605a2 Uploaded
greg
parents:
diff changeset
36 FROM genotype
c39c543605a2 Uploaded
greg
parents:
diff changeset
37 WHERE coral_mlg_rep_sample_id is not NULL
c39c543605a2 Uploaded
greg
parents:
diff changeset
38 ORDER BY coral_mlg_rep_sample_id;
c39c543605a2 Uploaded
greg
parents:
diff changeset
39 """
c39c543605a2 Uploaded
greg
parents:
diff changeset
40 cur = self.conn.cursor()
c39c543605a2 Uploaded
greg
parents:
diff changeset
41 cur.execute(cmd)
c39c543605a2 Uploaded
greg
parents:
diff changeset
42 rows = cur.fetchall()
c39c543605a2 Uploaded
greg
parents:
diff changeset
43 for tup in rows:
c39c543605a2 Uploaded
greg
parents:
diff changeset
44 self.outfh.write("%s\n" % tup[0])
c39c543605a2 Uploaded
greg
parents:
diff changeset
45 self.outfh.close()
c39c543605a2 Uploaded
greg
parents:
diff changeset
46
c39c543605a2 Uploaded
greg
parents:
diff changeset
47 def shutdown(self):
c39c543605a2 Uploaded
greg
parents:
diff changeset
48 self.conn.close()
c39c543605a2 Uploaded
greg
parents:
diff changeset
49
c39c543605a2 Uploaded
greg
parents:
diff changeset
50 def stop_err(self, msg):
c39c543605a2 Uploaded
greg
parents:
diff changeset
51 sys.stderr.write(msg)
c39c543605a2 Uploaded
greg
parents:
diff changeset
52 self.outfh.flush()
c39c543605a2 Uploaded
greg
parents:
diff changeset
53 self.outfh.close()
c39c543605a2 Uploaded
greg
parents:
diff changeset
54 sys.exit(1)
c39c543605a2 Uploaded
greg
parents:
diff changeset
55
c39c543605a2 Uploaded
greg
parents:
diff changeset
56
c39c543605a2 Uploaded
greg
parents:
diff changeset
57 if __name__ == '__main__':
c39c543605a2 Uploaded
greg
parents:
diff changeset
58 umlgidg = UniqueMGLIDGenerator()
c39c543605a2 Uploaded
greg
parents:
diff changeset
59 umlgidg.run()
c39c543605a2 Uploaded
greg
parents:
diff changeset
60 umlgidg.shutdown()