Mercurial > repos > greg > genotype_population_info

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Thu Aug 15 10:58:24 2019 -0400
@@ -0,0 +1,11 @@
+name: genotype_population_info
+owner: greg
+description: |
+  Contains a tool that generates the genotype population information file for use as input to the coral_multilocus_genotype tool.
+homepage_url: http://baumslab.org
+long_description: |
+  Contains a tool that generates the genotype population information file for use as input to the coral_multilocus_genotype tool.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/corals/genotype_population_info
+type: unrestricted
+categories:
+  - Micro-array Analysis
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genotype_population_info.py	Thu Aug 15 10:58:24 2019 -0400
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+import argparse
+import sys
+
+import psycopg2
+from sqlalchemy import create_engine, MetaData
+from sqlalchemy.engine.url import make_url
+
+
+class GenotypeInfoGenerator(object):
+    def __init__(self):
+        self.args = None
+        self.conn = None
+        self.parse_args()
+        self.outfh = open(self.args.output, "w")
+        self.connect_db()
+        self.engine = create_engine(self.args.database_connection_string)
+        self.metadata = MetaData(self.engine)
+
+    def parse_args(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'),
+        parser.add_argument('--input_partial_info', dest='input_partial_info', help='Tabular file containing part of the genotype info')
+        parser.add_argument('--output', dest='output', help='Output dataset'),
+        self.args = parser.parse_args()
+
+    def connect_db(self):
+        url = make_url(self.args.database_connection_string)
+        args = url.translate_connect_args(username='user')
+        args.update(url.query)
+        assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.'
+        self.conn = psycopg2.connect(**args)
+
+    def run(self):
+        sql = """
+              SELECT sample.user_specimen_id,
+                     reef.region
+              FROM sample
+              LEFT OUTER JOIN colony
+                              ON sample.colony_id = colony.id
+              LEFT OUTER JOIN reef
+                              ON reef.id = colony.reef_id
+              WHERE sample.affy_id = '%s';
+        """
+        with open(self.args.input_partial_info, "r") as fh:
+            for line in fh:
+                line = line.strip()
+                out_items = []
+                items = line.split('\t')
+                # Item number.
+                out_items.append(items[0])
+                affy_id = items[1]
+                out_items.append(affy_id)
+                if len(items) == 2:
+                    # Example line:
+                    # 1 a100000-4368120-060520-256_I07.CEL
+                    # The line is missing the user_specimen_id and
+                    # region, so retrieve it from the database.
+                    query = sql % affy_id
+                    cur = self.conn.cursor()
+                    cur.execute(query)
+                    try:
+                        missing_items = cur.fetchone()
+                        # user_specimen_id
+                        out_items.append(missing_items[0])
+                        # region
+                        out_items.append(missing_items[1])
+                    except Exception as e:
+                        msg = "Error retrieving user_specimen_id and region from the database for affy_id %s: %s" % (affy_id, e)
+                        self.stop_err(msg)
+                else:
+                    # The line contains all of the information we need.
+                    # user_specimen_id
+                    out_items.append(items[3])
+                    # region
+                    out_items.append(items[9])
+                self.outfh.write("%s\n" % "\t".join(out_items))
+        self.outfh.close()
+
+    def shutdown(self):
+        self.conn.close()
+
+    def stop_err(self, msg):
+        sys.stderr.write(msg)
+        self.outfh.flush()
+        self.outfh.close()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    gig = GenotypeInfoGenerator()
+    gig.run()
+    gig.shutdown()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genotype_population_info.xml	Thu Aug 15 10:58:24 2019 -0400
@@ -0,0 +1,40 @@
+<tool id="genotype_population_info" name="Generate genotype population" version="1.0.0">
+    <description>information from a partial dataset</description>
+    <command detect_errors="exit_code"><![CDATA[
+python $__tool_directory__/genotype_population_info.py
+--database_connection_string '$__app__.config.corals_database_connection'
+--input_partial_info '$input_partial_info'
+--output '$output'
+]]></command>
+    <inputs>
+        <param name="input_partial_info" type="data" format="tabular" label="Partial genotype population file"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <!--Testing this tool is a bit difficult at the current time.-->
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Accepts a file that contains Affymetrix identifiers and a subset of the user specimen identifiers and
+regions for genotyping.  The corals (stag) database is queried to retrieve the user specimen identifiers
+and regions that are missing for each Affymetrix identifier, and a complete set of genotype population
+information is produced for use as input to the Coral Multilocus Genotype tool.  This tool must be able
+to access the corals (stag) database.
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {Baums I},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {http://baumslab.org}
+        </citation>
+    </citations>
+</tool>
+