Mercurial > repos > greg > export_all_sample_date
comparison export_all_sample_data.py @ 0:f15e63d204b1 draft
Uploaded
author | greg |
---|---|
date | Mon, 23 Sep 2019 14:12:31 -0400 |
parents | |
children | 633cb7831526 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f15e63d204b1 |
---|---|
1 #!/usr/bin/env python | |
2 from __future__ import print_function | |
3 | |
4 import argparse | |
5 import datetime | |
6 import psycopg2 | |
7 | |
8 from sqlalchemy import create_engine | |
9 from sqlalchemy import MetaData | |
10 from sqlalchemy.engine.url import make_url | |
11 | |
12 metadata = MetaData() | |
13 | |
14 COLUMNS = ["Affymetrix ID", "Sample ID", "User Specimen ID", "Field Call", "Sample Depth", | |
15 "Percent Missing Data Coral", "Percent Heterozygous Coral", "Percent Acerv Coral", | |
16 "Percent Apalm Coral", "Bcoral Genet Id", "Registry ID", "DNA Extraction Method", | |
17 "DNA Concentration", "Colony Location", "Colony Latitude", "Colony Longitude", | |
18 "Colony Depth", "Reef Name", "Region", "Reef Latitude", "Reef Longitude", | |
19 "GPS Coordinates Associated With", "Coral Mlg Clonal ID", "Coral Mlg Rep Sample ID", | |
20 "Genetic Coral Species Call", "Spawning", "Sperm Motility", "TLE", "Disease Resist", | |
21 "Bleach Resist", "Mortality", "Healing Time", "Sequencing Facility", "Array Version", | |
22 "Plate Barcode", "Collector Last Name", "First Name", "Organization", "Email", | |
23 "Collection Date"] | |
24 | |
25 | |
26 class ExportAllSampleData(object): | |
27 def __init__(self): | |
28 self.args = None | |
29 self.conn = None | |
30 self.parse_args() | |
31 self.outfh = open(self.args.output, "w") | |
32 self.outfh.write("%s\n" % "\t".join(COLUMNS)) | |
33 self.connect_db() | |
34 self.engine = create_engine(self.args.database_connection_string) | |
35 | |
36 def connect_db(self): | |
37 url = make_url(self.args.database_connection_string) | |
38 args = url.translate_connect_args(username='user') | |
39 args.update(url.query) | |
40 assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.' | |
41 self.conn = psycopg2.connect(**args) | |
42 | |
43 def export_from_db(self): | |
44 today = datetime.date.today() | |
45 cmd = """ | |
46 SELECT sample.affy_id, sample.sample_id, sample.genotype_id, | |
47 sample.phenotype_id, sample.experiment_id, sample.colony_id, | |
48 sample.colony_location, sample.collector_id, sample.collection_date, | |
49 sample.user_specimen_id, sample.registry_id, sample.depth AS sample_depth, | |
50 sample.dna_extraction_method, sample.dna_concentration, | |
51 sample.percent_missing_data_coral, sample.percent_acerv_coral, | |
52 sample.percent_apalm_coral, sample.percent_heterozygous_coral, | |
53 sample.field_call, sample.bcoral_genet_id, genotype.coral_mlg_clonal_id, | |
54 genotype.coral_mlg_rep_sample_id, genotype.genetic_coral_species_call, | |
55 phenotype.spawning, phenotype.sperm_motility, phenotype.tle, | |
56 phenotype.disease_resist, phenotype.bleach_resist, phenotype.mortality, | |
57 phenotype.healing_time, experiment.seq_facility, experiment.array_version, | |
58 experiment.plate_barcode, colony.latitude AS colony_latitude, | |
59 colony.longitude AS colony_longitude, colony.depth AS colony_depth, | |
60 reef.name, reef.region, reef.latitude AS reef_latitude, reef.longitude AS reef_longitude, | |
61 reef.geographic_origin, person.last_name, person.first_name, | |
62 person.organization, person.email | |
63 FROM sample | |
64 LEFT OUTER JOIN genotype | |
65 ON sample.genotype_id = genotype.id | |
66 LEFT OUTER JOIN phenotype | |
67 ON sample.phenotype_id = phenotype.id | |
68 LEFT OUTER JOIN experiment | |
69 ON sample.experiment_id = experiment.id | |
70 LEFT OUTER JOIN colony | |
71 ON sample.colony_id = colony.id | |
72 LEFT OUTER JOIN reef | |
73 ON reef.id = colony.reef_id | |
74 LEFT OUTER JOIN person | |
75 ON sample.collector_id = person.id | |
76 WHERE sample.public OR sample.public_after_date < date'%s' | |
77 ORDER BY affy_id;""" % today | |
78 # Instantiate the cursor. | |
79 cur = self.conn.cursor() | |
80 # Execute the query. | |
81 cur.execute(cmd) | |
82 rows = cur.fetchall() | |
83 for tup in rows: | |
84 values = self.extract_values(tup) | |
85 # Output the row. | |
86 self.outfh.write("%s\n" % "\t".join(values)) | |
87 | |
88 def extract_values(self, tup): | |
89 values = [] | |
90 # Extract the items from the tuple. | |
91 affy_id = self.get_value(tup[0]) | |
92 sample_id = self.get_value(tup[1]) | |
93 colony_location = self.get_value(tup[6]) | |
94 collection_date = self.get_value(tup[8]) | |
95 if len(collection_date) > 0: | |
96 collection_date = collection_date[:10] | |
97 user_specimen_id = self.get_value(tup[9]) | |
98 registry_id = self.get_value(tup[10]) | |
99 sample_depth = self.get_value(tup[11]) | |
100 dna_extraction_method = self.get_value(tup[12]) | |
101 dna_concentration = self.get_value(tup[13]) | |
102 percent_missing_data_coral = self.get_value(tup[14]) | |
103 percent_acerv_coral = self.get_value(tup[15]) | |
104 percent_apalm_coral = self.get_value(tup[16]) | |
105 percent_heterozygous_coral = self.get_value(tup[17]) | |
106 field_call = self.get_value(tup[18]) | |
107 bcoral_genet_id = self.get_value(tup[19]) | |
108 coral_mlg_clonal_id = self.get_value(tup[20]) | |
109 coral_mlg_rep_sample_id = self.get_value(tup[21]) | |
110 genetic_coral_species_call = self.get_value(tup[22]) | |
111 spawning = self.get_value(tup[23]) | |
112 sperm_motility = self.get_value(tup[24]) | |
113 tle = self.get_value(tup[25]) | |
114 disease_resist = self.get_value(tup[26]) | |
115 bleach_resist = self.get_value(tup[27]) | |
116 mortality = self.get_value(tup[28]) | |
117 healing_time = self.get_value(tup[29]) | |
118 seq_facility = self.get_value(tup[30]) | |
119 array_version = self.get_value(tup[31]) | |
120 plate_barcode = self.get_value(tup[32]) | |
121 colony_latitude = self.get_value(tup[33]) | |
122 colony_longitude = self.get_value(tup[34]) | |
123 colony_depth = self.get_value(tup[35]) | |
124 reef_name = self.get_value(tup[36]) | |
125 region = self.get_value(tup[37]) | |
126 reef_latitude = self.get_value(tup[38]) | |
127 reef_longitude = self.get_value(tup[39]) | |
128 geographic_origin = self.get_value(tup[40]) | |
129 last_name = self.get_value(tup[41]) | |
130 first_name = self.get_value(tup[42]) | |
131 organization = self.get_value(tup[43]) | |
132 email = self.get_value(tup[44]) | |
133 # Append the columns in the specified order. | |
134 values.append(affy_id) | |
135 values.append(sample_id) | |
136 values.append(user_specimen_id) | |
137 values.append(field_call) | |
138 values.append(sample_depth) | |
139 values.append(percent_missing_data_coral) | |
140 values.append(percent_heterozygous_coral) | |
141 values.append(percent_acerv_coral) | |
142 values.append(percent_apalm_coral) | |
143 values.append(bcoral_genet_id) | |
144 values.append(registry_id) | |
145 values.append(dna_extraction_method) | |
146 values.append(dna_concentration) | |
147 values.append(colony_location) | |
148 values.append(colony_latitude) | |
149 values.append(colony_longitude) | |
150 values.append(colony_depth) | |
151 values.append(reef_name) | |
152 values.append(region) | |
153 values.append(reef_latitude) | |
154 values.append(reef_longitude) | |
155 values.append(geographic_origin) | |
156 values.append(coral_mlg_clonal_id) | |
157 values.append(coral_mlg_rep_sample_id) | |
158 values.append(genetic_coral_species_call) | |
159 values.append(spawning) | |
160 values.append(sperm_motility) | |
161 values.append(tle) | |
162 values.append(disease_resist) | |
163 values.append(bleach_resist) | |
164 values.append(mortality) | |
165 values.append(healing_time) | |
166 values.append(seq_facility) | |
167 values.append(array_version) | |
168 values.append(plate_barcode) | |
169 values.append(last_name) | |
170 values.append(first_name) | |
171 values.append(organization) | |
172 values.append(email) | |
173 values.append(collection_date) | |
174 return values | |
175 | |
176 def get_value(self, loc): | |
177 return str(loc) or "" | |
178 | |
179 def parse_args(self): | |
180 parser = argparse.ArgumentParser() | |
181 parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'), | |
182 parser.add_argument('--output', dest='output', help='Output dataset'), | |
183 self.args = parser.parse_args() | |
184 | |
185 def run(self): | |
186 self.export_from_db() | |
187 | |
188 def shutdown(self): | |
189 self.outfh.flush() | |
190 self.outfh.close() | |
191 self.conn.close() | |
192 | |
193 | |
194 if __name__ == '__main__': | |
195 easd = ExportAllSampleData() | |
196 easd.run() | |
197 easd.shutdown() |