Mercurial > repos > greg > unique_mlg_id
changeset 0:c39c543605a2 draft
Uploaded
author | greg |
---|---|
date | Wed, 02 Jun 2021 19:18:10 +0000 |
parents | |
children | 6c10123cb4a7 |
files | unique_mlg_id.py unique_mlg_id.xml |
diffstat | 2 files changed, 88 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unique_mlg_id.py Wed Jun 02 19:18:10 2021 +0000 @@ -0,0 +1,61 @@ +#!/usr/bin/env python +import argparse +import sys + +import psycopg2 + +from sqlalchemy import MetaData, create_engine +from sqlalchemy.engine.url import make_url + + +class UniqueMGLIDGenerator(object): + def __init__(self): + self.args = None + self.conn = None + self.parse_args() + self.outfh = open(self.args.output, "w") + self.connect_db() + self.engine = create_engine(self.args.database_connection_string) + self.metadata = MetaData(self.engine) + + def parse_args(self): + parser = argparse.ArgumentParser() + parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'), + parser.add_argument('--output', dest='output', help='Output dataset'), + self.args = parser.parse_args() + + def connect_db(self): + url = make_url(self.args.database_connection_string) + args = url.translate_connect_args(username='user') + args.update(url.query) + assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.' + self.conn = psycopg2.connect(**args) + + def run(self): + cmd = """ + SELECT DISTINCT coral_mlg_rep_sample_id + FROM genotype + WHERE coral_mlg_rep_sample_id is not NULL + ORDER BY coral_mlg_rep_sample_id; + """ + cur = self.conn.cursor() + cur.execute(cmd) + rows = cur.fetchall() + for tup in rows: + self.outfh.write("%s\n" % tup[0]) + self.outfh.close() + + def shutdown(self): + self.conn.close() + + def stop_err(self, msg): + sys.stderr.write(msg) + self.outfh.flush() + self.outfh.close() + sys.exit(1) + + +if __name__ == '__main__': + umlgidg = UniqueMGLIDGenerator() + umlgidg.run() + umlgidg.shutdown()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unique_mlg_id.xml Wed Jun 02 19:18:10 2021 +0000 @@ -0,0 +1,27 @@ +<tool id="unique_mlg_id" name="Unique MLG IDs" version="1.0.0"> + <description></description> + <command detect_errors="exit_code"><![CDATA[ +python $__tool_directory__/unique_mlg_id.py +--database_connection_string '$__app__.config.corals_database_connection' +--output '$output' +]]></command> + <inputs> + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <test> + <!--Testing this tool is a bit difficult at the current time.--> + </test> + </tests> + <help> +**What it does** + +Queries the corals (stag) database to retrieve the unique corals multi-locus genotype representative sample ids. + </help> + <citations> + <citation type="doi">https://doi.org/10.1038/s41598-020-69101-z</citation> + </citations> +</tool> +