Mercurial > repos > greg > validate_temperature_data
comparison validate_temperature_data.py @ 0:26415eac98c3 draft default tip
Uploaded
| author | greg |
|---|---|
| date | Tue, 27 Nov 2018 11:08:20 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26415eac98c3 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 import datetime | |
| 4 import decimal | |
| 5 import re | |
| 6 import shutil | |
| 7 import sys | |
| 8 | |
| 9 parser = argparse.ArgumentParser() | |
| 10 parser.add_argument('--data_type', dest='data_type', default=None, help='Temperature data type, normals or actuals') | |
| 11 parser.add_argument('--input_actuals', dest='input_actuals', default=None, help='Daily actuals temperature data') | |
| 12 parser.add_argument('--input_normals', dest='input_normals', default=None, help='30 year normals temperature data') | |
| 13 parser.add_argument('--output', dest='output', help='Output dataset'), | |
| 14 args = parser.parse_args() | |
| 15 | |
| 16 ACTUALS_HEADER = "LATITUDE,LONGITUDE,DATE,DOY,TMIN,TMAX" | |
| 17 NORMALS_HEADER = "stationid,latitude,longitude,elev_m,name,st,mmdd,doy,tmin,tmax" | |
| 18 | |
| 19 def add_error_msg(accumulated_msgs, msg): | |
| 20 return "%s\n%s" % (accumulated_msgs, msg) | |
| 21 | |
| 22 | |
| 23 def empty_value(line_no, label, accumulated_msgs): | |
| 24 return add_error_msg(accumulated_msgs, "The required %s value is missing on line %d." % (label, line_no)) | |
| 25 | |
| 26 | |
| 27 def stop_error(msg): | |
| 28 sys.exit(msg) | |
| 29 | |
| 30 | |
| 31 def validate_date_string(line_no, date_string, accumulated_msgs): | |
| 32 try: | |
| 33 datetime.datetime.strptime(date_string, '%Y-%m-%d') | |
| 34 return accumulated_msgs | |
| 35 except ValueError: | |
| 36 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string)) | |
| 37 | |
| 38 | |
| 39 def validate_decimal(line_no, decimal_string, accumulated_msgs, label): | |
| 40 try: | |
| 41 decimal.Decimal(decimal_string) | |
| 42 return accumulated_msgs | |
| 43 except Exception: | |
| 44 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect %s decimal value (%s)." % (line_no, label, decimal_string)) | |
| 45 | |
| 46 | |
| 47 def validate_integer(line_no, integer_string, accumulated_msgs, label): | |
| 48 if integer_string.isdigit(): | |
| 49 return accumulated_msgs | |
| 50 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect %s integer value (%s)." % (line_no, label, integer_string)) | |
| 51 | |
| 52 | |
| 53 def validate_mmdd(line_no, mmdd, accumulated_msgs): | |
| 54 try: | |
| 55 datetime.datetime.strptime(mmdd, '%m-%d') | |
| 56 return accumulated_msgs | |
| 57 except ValueError: | |
| 58 # Handle Feb 29. | |
| 59 items = mmdd.split("-") | |
| 60 try: | |
| 61 month = int(items[0]) | |
| 62 day = int(items[1]) | |
| 63 if month == 2 and day == 29: | |
| 64 return accumulated_msgs | |
| 65 except Exception: | |
| 66 # Error message accumulated below. | |
| 67 pass | |
| 68 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be mm-dd)." % (line_no, mmdd)) | |
| 69 | |
| 70 | |
| 71 accumulated_msgs = "" | |
| 72 last_doy = 0 | |
| 73 # Parse the input file, skipping the header, and validating | |
| 74 # that each data line consists of 31 comma-separated items. | |
| 75 if args.data_type == "normals": | |
| 76 input_file = args.input_normals | |
| 77 num_normals_rows = 0 | |
| 78 else: | |
| 79 input_file = args.input_actuals | |
| 80 with open(input_file, "r") as ih: | |
| 81 for i, line in enumerate(ih): | |
| 82 line = line.rstrip("\r\n") | |
| 83 items = line.split(",") | |
| 84 if args.data_type == "normals": | |
| 85 num_normals_rows += 1 | |
| 86 if i == 0: | |
| 87 if line != NORMALS_HEADER: | |
| 88 accumulated_msgs = add_error_msg(accumulated_msgs, "The header is invalid, must be %s" % NORMALS_HEADER) | |
| 89 continue | |
| 90 if i > 367: | |
| 91 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 367 lines (must be 1 header line and 366 data lines).") | |
| 92 stop_error(accumulated_msgs) | |
| 93 if len(items) != 10: | |
| 94 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 10)." % (i, len(items))) | |
| 95 stop_error(accumulated_msgs) | |
| 96 stationid = items[0].strip() | |
| 97 if len(stationid) == 0: | |
| 98 accumulated_msgs = empty_value(i, "stationid", accumulated_msgs) | |
| 99 latitude = items[1].strip() | |
| 100 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs, "latitude") | |
| 101 longitude = items[2].strip() | |
| 102 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs, "longitude") | |
| 103 elev_m = items[3].strip() | |
| 104 accumulated_msgs = validate_decimal(i, elev_m, accumulated_msgs, "elev_m") | |
| 105 name = items[4].strip() | |
| 106 if len(name) == 0: | |
| 107 accumulated_msgs = empty_value(i, "name", accumulated_msgs) | |
| 108 st = items[5].strip() | |
| 109 if len(st) == 0: | |
| 110 accumulated_msgs = empty_value(i, "st", accumulated_msgs) | |
| 111 mmdd = items[6].strip() | |
| 112 accumulated_msgs = validate_mmdd(i, mmdd, accumulated_msgs) | |
| 113 doy = items[7].strip() | |
| 114 accumulated_msgs = validate_integer(i, doy, accumulated_msgs, "doy") | |
| 115 # Make sure the DOY values are consecutive. | |
| 116 try: | |
| 117 if int(doy) != (last_doy + 1): | |
| 118 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains a DOY (%s) that is not conexcutive (previous DOY is %d)." % (i, doy, last_doy)) | |
| 119 stop_error(accumulated_msgs) | |
| 120 else: | |
| 121 last_doy += 1 | |
| 122 except Exception: | |
| 123 # The error for an invalid integer was captured above. | |
| 124 pass | |
| 125 tmin = items[8].strip() | |
| 126 accumulated_msgs = validate_decimal(i, tmin, accumulated_msgs, "tmin") | |
| 127 tmax = items[9].strip() | |
| 128 accumulated_msgs = validate_decimal(i, tmax, accumulated_msgs, "tmax") | |
| 129 else: | |
| 130 if i == 0: | |
| 131 if line != ACTUALS_HEADER: | |
| 132 accumulated_msgs = add_error_msg(accumulated_msgs, "The header is invalid, must be %s" % ACTUALS_HEADER) | |
| 133 continue | |
| 134 if i > 367: | |
| 135 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 367 lines (must be 1 header line and no more than 366 data lines).") | |
| 136 stop_error(accumulated_msgs) | |
| 137 if len(items) != 6: | |
| 138 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 6)." % (i, len(items))) | |
| 139 stop_error(accumulated_msgs) | |
| 140 latitude = items[0].strip() | |
| 141 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs, "LATITUDE") | |
| 142 longitude = items[1].strip() | |
| 143 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs, "LONGITUDE") | |
| 144 date_string = items[2].strip() | |
| 145 accumulated_msgs = validate_date_string(i, date_string, accumulated_msgs) | |
| 146 doy = items[3].strip() | |
| 147 accumulated_msgs = validate_integer(i, doy, accumulated_msgs, "doy") | |
| 148 # Make sure the DOY values are consecutive. | |
| 149 if i == 1: | |
| 150 last_doy = int(doy) | |
| 151 else: | |
| 152 try: | |
| 153 if int(doy) != (last_doy + 1): | |
| 154 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains a DOY (%s) that is not conexcutive (previous DOY is %d)." % (i, doy, last_doy)) | |
| 155 stop_error(accumulated_msgs) | |
| 156 else: | |
| 157 last_doy += 1 | |
| 158 except Exception: | |
| 159 # The error for an invalid integer was captured above. | |
| 160 pass | |
| 161 tmin = items[4].strip() | |
| 162 accumulated_msgs = validate_decimal(i, tmin, accumulated_msgs, "tmin") | |
| 163 tmax = items[5].strip() | |
| 164 accumulated_msgs = validate_decimal(i, tmax, accumulated_msgs, "tmax") | |
| 165 if args.data_type == "normals" and num_normals_rows != 367: | |
| 166 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains %d rows, (must be 367)." % num_normals_rows) | |
| 167 | |
| 168 if len(accumulated_msgs) > 0: | |
| 169 stop_error(accumulated_msgs) | |
| 170 | |
| 171 shutil.copyfile(input_file, args.output) |
