0
|
1 #!/usr/bin/env python
|
|
2 from __future__ import print_function
|
|
3
|
|
4 import argparse
|
|
5 import datetime
|
|
6 import psycopg2
|
|
7
|
|
8 from sqlalchemy import create_engine
|
|
9 from sqlalchemy import MetaData
|
|
10 from sqlalchemy.engine.url import make_url
|
|
11
|
|
12 metadata = MetaData()
|
|
13
|
|
14 COLUMNS = ["Affymetrix ID", "Sample ID", "User Specimen ID", "Field Call", "Sample Depth",
|
|
15 "Percent Missing Data Coral", "Percent Heterozygous Coral", "Percent Acerv Coral",
|
|
16 "Percent Apalm Coral", "Bcoral Genet Id", "Registry ID", "DNA Extraction Method",
|
|
17 "DNA Concentration", "Colony Location", "Colony Latitude", "Colony Longitude",
|
|
18 "Colony Depth", "Reef Name", "Region", "Reef Latitude", "Reef Longitude",
|
|
19 "GPS Coordinates Associated With", "Coral Mlg Clonal ID", "Coral Mlg Rep Sample ID",
|
|
20 "Genetic Coral Species Call", "Spawning", "Sperm Motility", "TLE", "Disease Resist",
|
|
21 "Bleach Resist", "Mortality", "Healing Time", "Sequencing Facility", "Array Version",
|
|
22 "Plate Barcode", "Collector Last Name", "First Name", "Organization", "Email",
|
|
23 "Collection Date"]
|
|
24
|
|
25
|
|
26 class ExportAllSampleData(object):
|
|
27 def __init__(self):
|
|
28 self.args = None
|
|
29 self.conn = None
|
|
30 self.parse_args()
|
|
31 self.outfh = open(self.args.output, "w")
|
|
32 self.outfh.write("%s\n" % "\t".join(COLUMNS))
|
|
33 self.connect_db()
|
|
34 self.engine = create_engine(self.args.database_connection_string)
|
|
35
|
|
36 def connect_db(self):
|
|
37 url = make_url(self.args.database_connection_string)
|
|
38 args = url.translate_connect_args(username='user')
|
|
39 args.update(url.query)
|
|
40 assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.'
|
|
41 self.conn = psycopg2.connect(**args)
|
|
42
|
|
43 def export_from_db(self):
|
|
44 today = datetime.date.today()
|
|
45 cmd = """
|
|
46 SELECT sample.affy_id, sample.sample_id, sample.genotype_id,
|
|
47 sample.phenotype_id, sample.experiment_id, sample.colony_id,
|
|
48 sample.colony_location, sample.collector_id, sample.collection_date,
|
|
49 sample.user_specimen_id, sample.registry_id, sample.depth AS sample_depth,
|
|
50 sample.dna_extraction_method, sample.dna_concentration,
|
|
51 sample.percent_missing_data_coral, sample.percent_acerv_coral,
|
|
52 sample.percent_apalm_coral, sample.percent_heterozygous_coral,
|
|
53 sample.field_call, sample.bcoral_genet_id, genotype.coral_mlg_clonal_id,
|
|
54 genotype.coral_mlg_rep_sample_id, genotype.genetic_coral_species_call,
|
|
55 phenotype.spawning, phenotype.sperm_motility, phenotype.tle,
|
|
56 phenotype.disease_resist, phenotype.bleach_resist, phenotype.mortality,
|
|
57 phenotype.healing_time, experiment.seq_facility, experiment.array_version,
|
|
58 experiment.plate_barcode, colony.latitude AS colony_latitude,
|
|
59 colony.longitude AS colony_longitude, colony.depth AS colony_depth,
|
|
60 reef.name, reef.region, reef.latitude AS reef_latitude, reef.longitude AS reef_longitude,
|
|
61 reef.geographic_origin, person.last_name, person.first_name,
|
|
62 person.organization, person.email
|
|
63 FROM sample
|
|
64 LEFT OUTER JOIN genotype
|
|
65 ON sample.genotype_id = genotype.id
|
|
66 LEFT OUTER JOIN phenotype
|
|
67 ON sample.phenotype_id = phenotype.id
|
|
68 LEFT OUTER JOIN experiment
|
|
69 ON sample.experiment_id = experiment.id
|
|
70 LEFT OUTER JOIN colony
|
|
71 ON sample.colony_id = colony.id
|
|
72 LEFT OUTER JOIN reef
|
|
73 ON reef.id = colony.reef_id
|
|
74 LEFT OUTER JOIN person
|
|
75 ON sample.collector_id = person.id
|
|
76 WHERE sample.public OR sample.public_after_date < date'%s'
|
|
77 ORDER BY affy_id;""" % today
|
|
78 # Instantiate the cursor.
|
|
79 cur = self.conn.cursor()
|
|
80 # Execute the query.
|
|
81 cur.execute(cmd)
|
|
82 rows = cur.fetchall()
|
|
83 for tup in rows:
|
|
84 values = self.extract_values(tup)
|
|
85 # Output the row.
|
|
86 self.outfh.write("%s\n" % "\t".join(values))
|
|
87
|
|
88 def extract_values(self, tup):
|
|
89 values = []
|
|
90 # Extract the items from the tuple.
|
|
91 affy_id = self.get_value(tup[0])
|
|
92 sample_id = self.get_value(tup[1])
|
|
93 colony_location = self.get_value(tup[6])
|
|
94 collection_date = self.get_value(tup[8])
|
|
95 if len(collection_date) > 0:
|
|
96 collection_date = collection_date[:10]
|
|
97 user_specimen_id = self.get_value(tup[9])
|
|
98 registry_id = self.get_value(tup[10])
|
|
99 sample_depth = self.get_value(tup[11])
|
|
100 dna_extraction_method = self.get_value(tup[12])
|
|
101 dna_concentration = self.get_value(tup[13])
|
|
102 percent_missing_data_coral = self.get_value(tup[14])
|
|
103 percent_acerv_coral = self.get_value(tup[15])
|
|
104 percent_apalm_coral = self.get_value(tup[16])
|
|
105 percent_heterozygous_coral = self.get_value(tup[17])
|
|
106 field_call = self.get_value(tup[18])
|
|
107 bcoral_genet_id = self.get_value(tup[19])
|
|
108 coral_mlg_clonal_id = self.get_value(tup[20])
|
|
109 coral_mlg_rep_sample_id = self.get_value(tup[21])
|
|
110 genetic_coral_species_call = self.get_value(tup[22])
|
|
111 spawning = self.get_value(tup[23])
|
|
112 sperm_motility = self.get_value(tup[24])
|
|
113 tle = self.get_value(tup[25])
|
|
114 disease_resist = self.get_value(tup[26])
|
|
115 bleach_resist = self.get_value(tup[27])
|
|
116 mortality = self.get_value(tup[28])
|
|
117 healing_time = self.get_value(tup[29])
|
|
118 seq_facility = self.get_value(tup[30])
|
|
119 array_version = self.get_value(tup[31])
|
|
120 plate_barcode = self.get_value(tup[32])
|
|
121 colony_latitude = self.get_value(tup[33])
|
|
122 colony_longitude = self.get_value(tup[34])
|
|
123 colony_depth = self.get_value(tup[35])
|
|
124 reef_name = self.get_value(tup[36])
|
|
125 region = self.get_value(tup[37])
|
|
126 reef_latitude = self.get_value(tup[38])
|
|
127 reef_longitude = self.get_value(tup[39])
|
|
128 geographic_origin = self.get_value(tup[40])
|
|
129 last_name = self.get_value(tup[41])
|
|
130 first_name = self.get_value(tup[42])
|
|
131 organization = self.get_value(tup[43])
|
|
132 email = self.get_value(tup[44])
|
|
133 # Append the columns in the specified order.
|
|
134 values.append(affy_id)
|
|
135 values.append(sample_id)
|
|
136 values.append(user_specimen_id)
|
|
137 values.append(field_call)
|
|
138 values.append(sample_depth)
|
|
139 values.append(percent_missing_data_coral)
|
|
140 values.append(percent_heterozygous_coral)
|
|
141 values.append(percent_acerv_coral)
|
|
142 values.append(percent_apalm_coral)
|
|
143 values.append(bcoral_genet_id)
|
|
144 values.append(registry_id)
|
|
145 values.append(dna_extraction_method)
|
|
146 values.append(dna_concentration)
|
|
147 values.append(colony_location)
|
|
148 values.append(colony_latitude)
|
|
149 values.append(colony_longitude)
|
|
150 values.append(colony_depth)
|
|
151 values.append(reef_name)
|
|
152 values.append(region)
|
|
153 values.append(reef_latitude)
|
|
154 values.append(reef_longitude)
|
|
155 values.append(geographic_origin)
|
|
156 values.append(coral_mlg_clonal_id)
|
|
157 values.append(coral_mlg_rep_sample_id)
|
|
158 values.append(genetic_coral_species_call)
|
|
159 values.append(spawning)
|
|
160 values.append(sperm_motility)
|
|
161 values.append(tle)
|
|
162 values.append(disease_resist)
|
|
163 values.append(bleach_resist)
|
|
164 values.append(mortality)
|
|
165 values.append(healing_time)
|
|
166 values.append(seq_facility)
|
|
167 values.append(array_version)
|
|
168 values.append(plate_barcode)
|
|
169 values.append(last_name)
|
|
170 values.append(first_name)
|
|
171 values.append(organization)
|
|
172 values.append(email)
|
|
173 values.append(collection_date)
|
|
174 return values
|
|
175
|
|
176 def get_value(self, loc):
|
|
177 return str(loc) or ""
|
|
178
|
|
179 def parse_args(self):
|
|
180 parser = argparse.ArgumentParser()
|
|
181 parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'),
|
|
182 parser.add_argument('--output', dest='output', help='Output dataset'),
|
|
183 self.args = parser.parse_args()
|
|
184
|
|
185 def run(self):
|
|
186 self.export_from_db()
|
|
187
|
|
188 def shutdown(self):
|
|
189 self.outfh.flush()
|
|
190 self.outfh.close()
|
|
191 self.conn.close()
|
|
192
|
|
193
|
|
194 if __name__ == '__main__':
|
|
195 easd = ExportAllSampleData()
|
|
196 easd.run()
|
|
197 easd.shutdown()
|