Mercurial > repos > greg > export_all_sample_date
comparison export_all_sample_data.py @ 0:f15e63d204b1 draft
Uploaded
| author | greg | 
|---|---|
| date | Mon, 23 Sep 2019 14:12:31 -0400 | 
| parents | |
| children | 633cb7831526 | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:f15e63d204b1 | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 from __future__ import print_function | |
| 3 | |
| 4 import argparse | |
| 5 import datetime | |
| 6 import psycopg2 | |
| 7 | |
| 8 from sqlalchemy import create_engine | |
| 9 from sqlalchemy import MetaData | |
| 10 from sqlalchemy.engine.url import make_url | |
| 11 | |
| 12 metadata = MetaData() | |
| 13 | |
| 14 COLUMNS = ["Affymetrix ID", "Sample ID", "User Specimen ID", "Field Call", "Sample Depth", | |
| 15 "Percent Missing Data Coral", "Percent Heterozygous Coral", "Percent Acerv Coral", | |
| 16 "Percent Apalm Coral", "Bcoral Genet Id", "Registry ID", "DNA Extraction Method", | |
| 17 "DNA Concentration", "Colony Location", "Colony Latitude", "Colony Longitude", | |
| 18 "Colony Depth", "Reef Name", "Region", "Reef Latitude", "Reef Longitude", | |
| 19 "GPS Coordinates Associated With", "Coral Mlg Clonal ID", "Coral Mlg Rep Sample ID", | |
| 20 "Genetic Coral Species Call", "Spawning", "Sperm Motility", "TLE", "Disease Resist", | |
| 21 "Bleach Resist", "Mortality", "Healing Time", "Sequencing Facility", "Array Version", | |
| 22 "Plate Barcode", "Collector Last Name", "First Name", "Organization", "Email", | |
| 23 "Collection Date"] | |
| 24 | |
| 25 | |
| 26 class ExportAllSampleData(object): | |
| 27 def __init__(self): | |
| 28 self.args = None | |
| 29 self.conn = None | |
| 30 self.parse_args() | |
| 31 self.outfh = open(self.args.output, "w") | |
| 32 self.outfh.write("%s\n" % "\t".join(COLUMNS)) | |
| 33 self.connect_db() | |
| 34 self.engine = create_engine(self.args.database_connection_string) | |
| 35 | |
| 36 def connect_db(self): | |
| 37 url = make_url(self.args.database_connection_string) | |
| 38 args = url.translate_connect_args(username='user') | |
| 39 args.update(url.query) | |
| 40 assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.' | |
| 41 self.conn = psycopg2.connect(**args) | |
| 42 | |
| 43 def export_from_db(self): | |
| 44 today = datetime.date.today() | |
| 45 cmd = """ | |
| 46 SELECT sample.affy_id, sample.sample_id, sample.genotype_id, | |
| 47 sample.phenotype_id, sample.experiment_id, sample.colony_id, | |
| 48 sample.colony_location, sample.collector_id, sample.collection_date, | |
| 49 sample.user_specimen_id, sample.registry_id, sample.depth AS sample_depth, | |
| 50 sample.dna_extraction_method, sample.dna_concentration, | |
| 51 sample.percent_missing_data_coral, sample.percent_acerv_coral, | |
| 52 sample.percent_apalm_coral, sample.percent_heterozygous_coral, | |
| 53 sample.field_call, sample.bcoral_genet_id, genotype.coral_mlg_clonal_id, | |
| 54 genotype.coral_mlg_rep_sample_id, genotype.genetic_coral_species_call, | |
| 55 phenotype.spawning, phenotype.sperm_motility, phenotype.tle, | |
| 56 phenotype.disease_resist, phenotype.bleach_resist, phenotype.mortality, | |
| 57 phenotype.healing_time, experiment.seq_facility, experiment.array_version, | |
| 58 experiment.plate_barcode, colony.latitude AS colony_latitude, | |
| 59 colony.longitude AS colony_longitude, colony.depth AS colony_depth, | |
| 60 reef.name, reef.region, reef.latitude AS reef_latitude, reef.longitude AS reef_longitude, | |
| 61 reef.geographic_origin, person.last_name, person.first_name, | |
| 62 person.organization, person.email | |
| 63 FROM sample | |
| 64 LEFT OUTER JOIN genotype | |
| 65 ON sample.genotype_id = genotype.id | |
| 66 LEFT OUTER JOIN phenotype | |
| 67 ON sample.phenotype_id = phenotype.id | |
| 68 LEFT OUTER JOIN experiment | |
| 69 ON sample.experiment_id = experiment.id | |
| 70 LEFT OUTER JOIN colony | |
| 71 ON sample.colony_id = colony.id | |
| 72 LEFT OUTER JOIN reef | |
| 73 ON reef.id = colony.reef_id | |
| 74 LEFT OUTER JOIN person | |
| 75 ON sample.collector_id = person.id | |
| 76 WHERE sample.public OR sample.public_after_date < date'%s' | |
| 77 ORDER BY affy_id;""" % today | |
| 78 # Instantiate the cursor. | |
| 79 cur = self.conn.cursor() | |
| 80 # Execute the query. | |
| 81 cur.execute(cmd) | |
| 82 rows = cur.fetchall() | |
| 83 for tup in rows: | |
| 84 values = self.extract_values(tup) | |
| 85 # Output the row. | |
| 86 self.outfh.write("%s\n" % "\t".join(values)) | |
| 87 | |
| 88 def extract_values(self, tup): | |
| 89 values = [] | |
| 90 # Extract the items from the tuple. | |
| 91 affy_id = self.get_value(tup[0]) | |
| 92 sample_id = self.get_value(tup[1]) | |
| 93 colony_location = self.get_value(tup[6]) | |
| 94 collection_date = self.get_value(tup[8]) | |
| 95 if len(collection_date) > 0: | |
| 96 collection_date = collection_date[:10] | |
| 97 user_specimen_id = self.get_value(tup[9]) | |
| 98 registry_id = self.get_value(tup[10]) | |
| 99 sample_depth = self.get_value(tup[11]) | |
| 100 dna_extraction_method = self.get_value(tup[12]) | |
| 101 dna_concentration = self.get_value(tup[13]) | |
| 102 percent_missing_data_coral = self.get_value(tup[14]) | |
| 103 percent_acerv_coral = self.get_value(tup[15]) | |
| 104 percent_apalm_coral = self.get_value(tup[16]) | |
| 105 percent_heterozygous_coral = self.get_value(tup[17]) | |
| 106 field_call = self.get_value(tup[18]) | |
| 107 bcoral_genet_id = self.get_value(tup[19]) | |
| 108 coral_mlg_clonal_id = self.get_value(tup[20]) | |
| 109 coral_mlg_rep_sample_id = self.get_value(tup[21]) | |
| 110 genetic_coral_species_call = self.get_value(tup[22]) | |
| 111 spawning = self.get_value(tup[23]) | |
| 112 sperm_motility = self.get_value(tup[24]) | |
| 113 tle = self.get_value(tup[25]) | |
| 114 disease_resist = self.get_value(tup[26]) | |
| 115 bleach_resist = self.get_value(tup[27]) | |
| 116 mortality = self.get_value(tup[28]) | |
| 117 healing_time = self.get_value(tup[29]) | |
| 118 seq_facility = self.get_value(tup[30]) | |
| 119 array_version = self.get_value(tup[31]) | |
| 120 plate_barcode = self.get_value(tup[32]) | |
| 121 colony_latitude = self.get_value(tup[33]) | |
| 122 colony_longitude = self.get_value(tup[34]) | |
| 123 colony_depth = self.get_value(tup[35]) | |
| 124 reef_name = self.get_value(tup[36]) | |
| 125 region = self.get_value(tup[37]) | |
| 126 reef_latitude = self.get_value(tup[38]) | |
| 127 reef_longitude = self.get_value(tup[39]) | |
| 128 geographic_origin = self.get_value(tup[40]) | |
| 129 last_name = self.get_value(tup[41]) | |
| 130 first_name = self.get_value(tup[42]) | |
| 131 organization = self.get_value(tup[43]) | |
| 132 email = self.get_value(tup[44]) | |
| 133 # Append the columns in the specified order. | |
| 134 values.append(affy_id) | |
| 135 values.append(sample_id) | |
| 136 values.append(user_specimen_id) | |
| 137 values.append(field_call) | |
| 138 values.append(sample_depth) | |
| 139 values.append(percent_missing_data_coral) | |
| 140 values.append(percent_heterozygous_coral) | |
| 141 values.append(percent_acerv_coral) | |
| 142 values.append(percent_apalm_coral) | |
| 143 values.append(bcoral_genet_id) | |
| 144 values.append(registry_id) | |
| 145 values.append(dna_extraction_method) | |
| 146 values.append(dna_concentration) | |
| 147 values.append(colony_location) | |
| 148 values.append(colony_latitude) | |
| 149 values.append(colony_longitude) | |
| 150 values.append(colony_depth) | |
| 151 values.append(reef_name) | |
| 152 values.append(region) | |
| 153 values.append(reef_latitude) | |
| 154 values.append(reef_longitude) | |
| 155 values.append(geographic_origin) | |
| 156 values.append(coral_mlg_clonal_id) | |
| 157 values.append(coral_mlg_rep_sample_id) | |
| 158 values.append(genetic_coral_species_call) | |
| 159 values.append(spawning) | |
| 160 values.append(sperm_motility) | |
| 161 values.append(tle) | |
| 162 values.append(disease_resist) | |
| 163 values.append(bleach_resist) | |
| 164 values.append(mortality) | |
| 165 values.append(healing_time) | |
| 166 values.append(seq_facility) | |
| 167 values.append(array_version) | |
| 168 values.append(plate_barcode) | |
| 169 values.append(last_name) | |
| 170 values.append(first_name) | |
| 171 values.append(organization) | |
| 172 values.append(email) | |
| 173 values.append(collection_date) | |
| 174 return values | |
| 175 | |
| 176 def get_value(self, loc): | |
| 177 return str(loc) or "" | |
| 178 | |
| 179 def parse_args(self): | |
| 180 parser = argparse.ArgumentParser() | |
| 181 parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'), | |
| 182 parser.add_argument('--output', dest='output', help='Output dataset'), | |
| 183 self.args = parser.parse_args() | |
| 184 | |
| 185 def run(self): | |
| 186 self.export_from_db() | |
| 187 | |
| 188 def shutdown(self): | |
| 189 self.outfh.flush() | |
| 190 self.outfh.close() | |
| 191 self.conn.close() | |
| 192 | |
| 193 | |
| 194 if __name__ == '__main__': | |
| 195 easd = ExportAllSampleData() | |
| 196 easd.run() | |
| 197 easd.shutdown() | 
