# HG changeset patch # User greg # Date 1528199579 14400 # Node ID 1b6864c5b50aaf5bed6d66f9130431f855e2beb9 # Parent 99e1c1300fcd428a6647b57569e4e16c119fbff6 Uploaded diff -r 99e1c1300fcd -r 1b6864c5b50a insect_phenology_model.R --- a/insect_phenology_model.R Tue Jun 05 07:52:51 2018 -0400 +++ b/insect_phenology_model.R Tue Jun 05 07:52:59 2018 -0400 @@ -6,6 +6,7 @@ make_option(c("--adult_mortality"), action="store", dest="adult_mortality", type="integer", help="Adjustment rate for adult mortality"), make_option(c("--adult_accumulation"), action="store", dest="adult_accumulation", type="integer", help="Adjustment of degree-days accumulation (old nymph->adult)"), make_option(c("--egg_mortality"), action="store", dest="egg_mortality", type="integer", help="Adjustment rate for egg mortality"), + make_option(c("--end_date"), action="store", dest="end_date", default=NULL, help="End date for custom date interval"), make_option(c("--input_norm"), action="store", dest="input_norm", help="30 year normals temperature data for selected station"), make_option(c("--input_ytd"), action="store", dest="input_ytd", default=NULL, help="Year-to-date temperature data for selected location"), make_option(c("--insect"), action="store", dest="insect", help="Insect name"), @@ -24,6 +25,7 @@ make_option(c("--plot_generations_separately"), action="store", dest="plot_generations_separately", help="Plot Plot P, F1 and F2 as separate lines or pool across them"), make_option(c("--plot_std_error"), action="store", dest="plot_std_error", help="Plot Standard error"), make_option(c("--replications"), action="store", dest="replications", type="integer", help="Number of replications"), + make_option(c("--start_date"), action="store", dest="start_date", default=NULL, help="Start date for custom date interval"), make_option(c("--young_nymph_accumulation"), action="store", dest="young_nymph_accumulation", type="integer", help="Adjustment of degree-days accumulation (egg->young nymph)") ) @@ -31,10 +33,11 @@ args <- parse_args(parser, positional_arguments=TRUE); opt <- args$options; -add_daylight_length = function(temperature_data_frame, num_rows) { - # Return a vector of daylight length (photoperido profile) for - # the number of days specified in the input_ytd temperature data - # (from Forsythe 1995). +add_daylight_length = function(temperature_data_frame) { + # Return temperature_data_frame with an added column + # of daylight length (photoperido profile). + num_rows = dim(temperature_data_frame)[1]; + # From Forsythe 1995. p = 0.8333; latitude = temperature_data_frame$LATITUDE[1]; daylight_length_vector = NULL; @@ -63,6 +66,26 @@ return(data_frame); } +extract_date_interval_rows = function(df, start_date, end_date) { + date_interval_rows = df[df$DATE >= start_date & df$DATE <= end_date]; + return(date_interval_rows); +} + +from_30_year_normals = function(norm_data_frame, start_date_doy, end_date_doy, year) { + # The data we want is fully contained within the 30 year normals data. + first_norm_row = which(norm_data_frame$DOY==start_date_doy); + last_norm_row = which(norm_data_frame$DOY==end_date_doy); + # Add 1 to the number of rows to ensure that the end date is included. + tmp_data_frame_rows = last_norm_row - first_norm_row + 1; + tmp_data_frame = get_new_temperature_data_frame(nrow=tmp_data_frame_rows); + j = 0; + for (i in first_norm_row:last_norm_row) { + j = j + 1; + tmp_data_frame[j,] = get_next_normals_row(norm_data_frame, year, i); + } + return (tmp_data_frame); +} + get_file_path = function(life_stage, base_name, life_stage_nymph=NULL, life_stage_adult=NULL) { if (!is.null(life_stage_nymph)) { lsi = get_life_stage_index(life_stage, life_stage_nymph=life_stage_nymph); @@ -123,7 +146,49 @@ return(list(p_m, p_se, f1_m, f1_se, f2_m, f2_se)) } -get_next_normals_row = function(norm_data_frame, year, is_leap_year, index) { +get_new_norm_data_frame = function(is_leap_year, input_norm=NULL, nrow=0) { + # The input_norm data has the following 10 columns: + # STATIONID, LATITUDE, LONGITUDE, ELEV_M, NAME, ST, MMDD, DOY, TMIN, TMAX + column_names = c("STATIONID", "LATITUDE","LONGITUDE", "ELEV_M", "NAME", "ST", "MMDD", "DOY", "TMIN", "TMAX"); + if (is.null(input_norm)) { + norm_data_frame = data.frame(matrix(ncol=10, nrow)); + # Set the norm_data_frame column names for access. + colnames(norm_data_frame) = column_names; + } else { + norm_data_frame = read.csv(file=input_norm, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); + # Set the norm_data_frame column names for access. + colnames(norm_data_frame) = column_names; + if (!is_leap_year) { + # All normals data includes Feb 29 which is row 60 in + # the data, so delete that row if we're not in a leap year. + norm_data_frame = norm_data_frame[-c(60),]; + # Since we've removed row 60, we need to subtract 1 from + # each value in the DOY column of the data frame starting + # with the 60th row. + num_rows = dim(norm_data_frame)[1]; + for (i in 60:num_rows) { + leap_year_doy = norm_data_frame$DOY[i]; + non_leap_year_doy = leap_year_doy - 1; + norm_data_frame$DOY[i] = non_leap_year_doy; + } + } + } + return (norm_data_frame); +} + +get_new_temperature_data_frame = function(input_ytd=NULL, nrow=0) { + # The input_ytd data has the following 6 columns: + # LATITUDE, LONGITUDE, DATE, DOY, TMIN, TMAX + if (is.null(input_ytd)) { + temperature_data_frame = data.frame(matrix(ncol=6, nrow)); + } else { + temperature_data_frame = read.csv(file=input_ytd, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); + } + colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); + return(temperature_data_frame); +} + +get_next_normals_row = function(norm_data_frame, year, index) { # Return the next 30 year normals row formatted # appropriately for the year-to-date data. latitude = norm_data_frame[index,"LATITUDE"][1]; @@ -132,18 +197,12 @@ mmdd = norm_data_frame[index,"MMDD"][1]; date_str = paste(year, mmdd, sep="-"); doy = norm_data_frame[index,"DOY"][1]; - if (!is_leap_year) { - # Since all normals data includes Feb 29, we have to - # subtract 1 from DOY if we're not in a leap year since - # we removed the Feb 29 row from the data frame above. - doy = as.integer(doy) - 1; - } tmin = norm_data_frame[index,"TMIN"][1]; tmax = norm_data_frame[index,"TMAX"][1]; return(list(latitude, longitude, date_str, doy, tmin, tmax)); } -get_temperature_at_hour = function(latitude, temperature_data_frame, row, num_days) { +get_temperature_at_hour = function(latitude, temperature_data_frame, row) { # Base development threshold for Brown Marmorated Stink Bug # insect phenology model. threshold = 14.17; @@ -214,7 +273,7 @@ return(c(curr_mean_temp, averages)) } -get_tick_index = function(index, last_tick, ticks, month_labels) { +get_tick_index = function(index, last_tick, ticks, tick_labels, tick_sep) { # The R code tries hard not to draw overlapping tick labels, and so # will omit labels where they would abut or overlap previously drawn # labels. This can result in, for example, every other tick being @@ -225,8 +284,8 @@ return(length(ticks)+1); } last_saved_tick = ticks[[length(ticks)]]; - if (index-last_saved_tick<3) { - last_saved_month = month_labels[[length(month_labels)]]; + if (index-last_saved_tick 0) { + prepend_end_norm_row = which(temperature_data_frame$DOY==prepend_end_doy_norm); + } else { + prepend_end_norm_row = 0; + } + if (append_start_doy_norm > 0) { + append_start_norm_row = which(temperature_data_frame$DOY==append_start_doy_norm); + } else { + append_start_norm_row = 0; + } + num_rows = dim(temperature_data_frame)[1]; + tick_labels = list(); ticks = list(); current_month_label = NULL; last_tick = 0; + if (date_interval) { + tick_sep = 0; + } else { + tick_sep = 3; + } for (i in 1:num_rows) { - if (start_doy_ytd > 1 & i==start_doy_ytd-1) { + # Get the year and month from the date which + # has the format YYYY-MM-DD. + date = format(temperature_data_frame$DATE[i]); + # Get the month label. + items = strsplit(date, "-")[[1]]; + month = items[2]; + month_label = month.abb[as.integer(month)]; + day = as.integer(items[3]); + doy = as.integer(temperature_data_frame$DOY[i]); + # We're plotting the entire year, so ticks will + # occur on Sundays and the first of each month. + if (i == prepend_end_norm_row) { # Add a tick for the end of the 30 year normnals data # that was prepended to the year-to-date data. - tick_index = get_tick_index(i, last_tick, ticks, month_labels) + label_str = "End prepended 30 year normals"; + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) ticks[tick_index] = i; - month_labels[tick_index] = "End prepended 30 year normals"; + if (date_interval) { + # Append the day to label_str + tick_labels[tick_index] = paste(label_str, day, sep=" "); + } else { + tick_labels[tick_index] = label_str; + } last_tick = i; - } else if (end_doy_ytd > 0 & i==end_doy_ytd+1) { + } else if (doy == append_start_doy_norm) { # Add a tick for the start of the 30 year normnals data # that was appended to the year-to-date data. - tick_index = get_tick_index(i, last_tick, ticks, month_labels) + label_str = "Start appended 30 year normals"; + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) ticks[tick_index] = i; - month_labels[tick_index] = "Start appended 30 year normals"; + if (!identical(current_month_label, month_label)) { + # Append the month to label_str. + label_str = paste(label_str, month_label, spe=" "); + current_month_label = month_label; + } + if (date_interval) { + # Append the day to label_str + label_str = paste(label_str, day, sep=" "); + } + tick_labels[tick_index] = label_str; last_tick = i; } else if (i==num_rows) { # Add a tick for the last day of the year. - tick_index = get_tick_index(i, last_tick, ticks, month_labels) + label_str = ""; + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) ticks[tick_index] = i; - month_labels[tick_index] = ""; - last_tick = i; + if (!identical(current_month_label, month_label)) { + # Append the month to label_str. + label_str = month_label; + current_month_label = month_label; + } + if (date_interval) { + # Append the day to label_str + label_str = paste(label_str, day, sep=" "); + } + tick_labels[tick_index] = label_str; } else { - # Get the year and month from the date which - # has the format YYYY-MM-DD. - date = format(temperature_data_frame$DATE[i]); - # Get the month label. - items = strsplit(date, "-")[[1]]; - month = items[2]; - month_label = month.abb[as.integer(month)]; if (!identical(current_month_label, month_label)) { - # Add an x-axis tick for the month. - tick_index = get_tick_index(i, last_tick, ticks, month_labels) + # Add a tick for the month. + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) ticks[tick_index] = i; - month_labels[tick_index] = month_label; + if (date_interval) { + # Append the day to the month. + tick_labels[tick_index] = paste(month_label, day, sep=" "); + } else { + tick_labels[tick_index] = month_label; + } current_month_label = month_label; last_tick = i; } - tick_index = get_tick_index(i, last_tick, ticks, month_labels) + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) if (!is.null(tick_index)) { - # Get the day. - day = weekdays(as.Date(date)); - if (day=="Sunday") { - # Add an x-axis tick if we're on a Sunday. - ticks[tick_index] = i; - # Add a blank month label so it is not displayed. - month_labels[tick_index] = ""; - last_tick = i; + if (date_interval) { + # Add a tick for every day. The first tick is the + # month label, so add a tick only if i is not 1 + if (i>1 & day>1) { + tick_index = get_tick_index(i, last_tick, ticks, tick_labels, tick_sep) + ticks[tick_index] = i; + # Add the day as the label. + tick_labels[tick_index] = day; + last_tick = i; + } + } else { + # Get the day. + day = weekdays(as.Date(date)); + if (day=="Sunday") { + # Add a tick if we're on a Sunday. + ticks[tick_index] = i; + # Add a blank month label so it is not displayed. + tick_labels[tick_index] = ""; + last_tick = i; + } } } } } - return(list(ticks, month_labels)); + return(list(ticks, tick_labels)); +} + +get_year_from_date = function(date_str) { + date_str_items = strsplit(date_str, "-")[[1]]; + return (date_str_items[1]); } is_leap_year = function(date_str) { @@ -353,86 +479,246 @@ return(mortality.probability); } -parse_input_data = function(input_ytd, input_norm, num_days_ytd, location) { - if (is.null(input_ytd)) { - # We're analysing only the 30 year normals data, so create an empty - # data frame for containing temperature data after it is converted - # from the 30 year normals format to the year-to-date format. - temperature_data_frame = data.frame(matrix(ncol=6, nrow=0)); - colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); - # Base all dates on the current date since 30 year - # normals data does not include any dates. - year = format(Sys.Date(), "%Y"); - start_date = paste(year, "01", "01", sep="-"); - end_date = paste(year, "12", "31", sep="-"); - # Set invalid start and end DOY. - start_doy_ytd = 0; - end_doy_ytd = 0; +parse_input_data = function(input_ytd, input_norm, location, start_date, end_date) { + # The end DOY for norm data prepended to ytd data. + prepend_end_doy_norm = 0; + # The start DOY for norm data appended to ytd data. + append_start_doy_norm = 0; + if (is.null(start_date) && is.null(end_date)) { + # We're not dealing with a date interval. + date_interval = FALSE; + if (is.null(input_ytd)) { + # Base all dates on the current date since 30 year + # normals data does not include any dates. + year = format(Sys.Date(), "%Y"); + } } else { - # Read the input_ytd temperature datafile into a data frame. - # The input_ytd data has the following 6 columns: - # LATITUDE, LONGITUDE, DATE, DOY, TMIN, TMAX - temperature_data_frame = read.csv(file=input_ytd, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); - # Set the temperature_data_frame column names for access. - colnames(temperature_data_frame) = c("LATITUDE", "LONGITUDE", "DATE", "DOY", "TMIN", "TMAX"); - # Get the start date. - start_date = temperature_data_frame$DATE[1]; - end_date = temperature_data_frame$DATE[num_days_ytd]; - # Extract the year from the start date. - date_str = format(start_date); - date_str_items = strsplit(date_str, "-")[[1]]; - year = date_str_items[1]; - # Save the first DOY to later check if start_date is Jan 1. - start_doy_ytd = as.integer(temperature_data_frame$DOY[1]); - end_doy_ytd = as.integer(temperature_data_frame$DOY[num_days_ytd]); + date_interval = TRUE; + year = get_year_from_date(start_date); + # Get the DOY for start_date and end_date. + start_date_doy = as.integer(strftime(start_date, format="%j")); + end_date_doy = as.integer(strftime(end_date, format="%j")); + } + if (is.null(input_ytd)) { + # We're processing only the 30 year normals data. + processing_year_to_date_data = FALSE; + if (is.null(start_date) && is.null(end_date)) { + # We're processing the entire year, so we can + # set the start_date to Jan 1. + start_date = paste(year, "01", "01", sep="-"); + } + } else { + processing_year_to_date_data = TRUE; + # Read the input_ytd temperature data file into a data frame. + temperature_data_frame = get_new_temperature_data_frame(input_ytd=input_ytd); + num_ytd_rows = dim(temperature_data_frame)[1]; + if (!date_interval) { + start_date = temperature_data_frame$DATE[1]; + year = get_year_from_date(start_date); + } } # See if we're in a leap year. is_leap_year = is_leap_year(start_date); - # Get the number of days in the year. - total_days = get_total_days(is_leap_year); # Read the input_norm temperature datafile into a data frame. - # The input_norm data has the following 10 columns: - # STATIONID, LATITUDE, LONGITUDE, ELEV_M, NAME, ST, MMDD, DOY, TMIN, TMAX - norm_data_frame = read.csv(file=input_norm, header=T, strip.white=TRUE, stringsAsFactors=FALSE, sep=","); - # Set the norm_data_frame column names for access. - colnames(norm_data_frame) = c("STATIONID", "LATITUDE","LONGITUDE", "ELEV_M", "NAME", "ST", "MMDD", "DOY", "TMIN", "TMAX"); - # All normals data includes Feb 29 which is row 60 in - # the data, so delete that row if we're not in a leap year. - if (!is_leap_year) { - norm_data_frame = norm_data_frame[-c(60),]; + norm_data_frame = get_new_norm_data_frame(is_leap_year, input_norm=input_norm); + if (processing_year_to_date_data) { + if (date_interval) { + # We're plotting a date interval. + start_date_ytd_row = which(temperature_data_frame$DATE==start_date); + if (length(start_date_ytd_row) > 0) { + # The start date is contained within the input_ytd data. + start_date_ytd_row = start_date_ytd_row[1]; + start_doy_ytd = as.integer(temperature_data_frame$DOY[start_date_ytd_row]); + } else { + # The start date is contained within the input_norm data. + start_date_ytd_row = 0; + start_date_norm_row = which(norm_data_frame$DOY==start_date_doy); + } + end_date_ytd_row = which(temperature_data_frame$DATE==end_date); + if (length(end_date_ytd_row) > 0) { + end_date_ytd_row = end_date_ytd_row[1]; + # The end date is contained within the input_ytd data. + end_doy_ytd = as.integer(temperature_data_frame$DOY[end_date_ytd_row]); + } else { + end_date_ytd_row = 0; + } + } else { + # We're plotting an entire year. + # Get the start date and end date from temperature_data_frame. + start_date_ytd_row = 1; + # Temporarily set start_date to get the year. + start_date = temperature_data_frame$DATE[1]; + end_date_ytd_row = num_ytd_rows; + end_date = temperature_data_frame$DATE[num_ytd_rows]; + date_str = format(start_date); + # Extract the year from the start date. + date_str_items = strsplit(date_str, "-")[[1]]; + # Get the year. + year = date_str_items[1]; + # Properly set the start_date to be Jan 1 of the year. + start_date = paste(year, "01", "01", sep="-"); + # Properly set the end_date to be Dec 31 of the year. + end_date = paste(year, "12", "31", sep="-"); + # Save the first DOY to later check if start_date is Jan 1. + start_doy_ytd = as.integer(temperature_data_frame$DOY[1]); + end_doy_ytd = as.integer(temperature_data_frame$DOY[num_ytd_rows]); + } + } else { + # We're processing only the 30 year normals data, so create an empty + # data frame for containing temperature data after it is converted + # from the 30 year normals format to the year-to-date format. + temperature_data_frame = get_new_temperature_data_frame(); + if (date_interval) { + # We're plotting a date interval. + # Extract the year, month and day from the start date. + start_date_str = format(start_date); + start_date_str_items = strsplit(start_date_str, "-")[[1]]; + year = start_date_str_items[1]; + start_date_month = start_date_str_items[2]; + start_date_day = start_date_str_items[3]; + start_date = paste(year, start_date_month, start_date_day, sep="-"); + # Extract the month and day from the end date. + end_date_str = format(start_date); + end_date_str_items = strsplit(end_date_str, "-")[[1]]; + end_date_month = end_date_str_items[2]; + end_date_day = end_date_str_items[3]; + end_date = paste(year, end_date_month, end_date_day, sep="-"); + } else { + # We're plotting an entire year. + start_date = paste(year, "01", "01", sep="-"); + end_date = paste(year, "12", "31", sep="-"); + } } - # Set the location to be the station name if the user elected no to enter it. - if (is.null(location) | length(location)==0) { + # Set the location to be the station name if the user elected not to enter it. + if (is.null(location) | length(location) == 0) { location = norm_data_frame$NAME[1]; } - if (is.null(input_ytd)) { - # Convert the 30 year normals data to the year-to-date format. - for (i in 1:total_days) { - temperature_data_frame[i,] = get_next_normals_row(norm_data_frame, year, is_leap_year, i); + if (processing_year_to_date_data) { + # Merge the year-to-date data with the 30 year normals data. + if (date_interval) { + # The values of start_date_ytd_row and end_date_ytd_row were set above. + if (start_date_ytd_row > 0 & end_date_ytd_row > 0) { + # The date interval is contained within the input_ytd + # data, so we don't need to merge the 30 year normals data. + temperature_data_frame = temperature_data_frame[start_date_ytd_row:end_date_ytd_row,]; + } else if (start_date_ytd_row == 0 & end_date_ytd_row > 0) { + # The date interval starts in input_norm and ends in + # input_ytd, so prepend appropriate rows from input_norm + # to appropriate rows from input_ytd. + first_norm_row = which(norm_data_frame$DOY==start_date_doy); + # Get the first DOY from temperature_data_frame. + first_ytd_doy = temperature_data_frame$DOY[1]; + # End DOY of input_norm data prepended to input_ytd. + prepend_end_doy_norm = first_ytd_doy - 1; + # Get the number of rows for the restricted date interval + # that are contained in temperature_data_frame. + num_temperature_data_frame_rows = end_date_ytd_row; + # Get the last row needed from the 30 year normals data. + last_norm_row = which(norm_data_frame$DOY==prepend_end_doy_norm); + # Get the number of rows for the restricted date interval + # that are contained in norm_data_frame. + num_norm_data_frame_rows = last_norm_row - first_norm_row; + # Create a temporary data frame to contain the 30 year normals + # data from the start date to the date immediately prior to the + # first row of the input_ytd data. + tmp_norm_data_frame = get_new_temperature_data_frame(nrow=num_temperature_data_frame_rows+num_norm_data_frame_rows); + j = 1; + for (i in first_norm_row:last_norm_row) { + # Populate the temp_data_frame row with + # values from norm_data_frame. + tmp_norm_data_frame[j,] = get_next_normals_row(norm_data_frame, year, i); + j = j + 1; + } + # Create a second temporary data frame containing the + # appropriate rows from temperature_data_frame. + tmp_temperature_data_frame = temperature_data_frame[1:num_temperature_data_frame_rows,]; + # Merge the 2 temporary data frames. + temperature_data_frame = rbind(tmp_norm_data_frame, tmp_temperature_data_frame); + } else if (start_date_ytd_row > 0 & end_date_ytd_row == 0) { + # The date interval starts in input_ytd and ends in input_norm, + # so append appropriate rows from input_norm to appropriate rows + # from input_ytd. First, get the number of rows for the restricted + # date interval that are contained in temperature_data_frame. + num_temperature_data_frame_rows = num_ytd_rows - start_date_ytd_row + 1; + # Get the DOY of the last row in the input_ytd data. + last_ytd_doy = temperature_data_frame$DOY[num_ytd_rows]; + # Get the DOYs for the first and last rows from norm_data_frame + # that will be appended to temperature_data_frame. + append_start_doy_norm = last_ytd_doy + 1; + # Get the row from norm_data_frame containing first_norm_doy. + first_norm_row = which(norm_data_frame$DOY == append_start_doy_norm); + # Get the row from norm_data_frame containing end_date_doy. + last_norm_row = which(norm_data_frame$DOY == end_date_doy); + # Get the number of rows for the restricted date interval + # that are contained in norm_data_frame. + num_norm_data_frame_rows = last_norm_row - first_norm_row; + # Create a temporary data frame to contain the data + # taken from both temperature_data_frame and norm_data_frame + # for the date interval. + tmp_data_frame = get_new_temperature_data_frame(nrow=num_temperature_data_frame_rows+num_norm_data_frame_rows); + # Populate tmp_data_frame with the appropriate rows from temperature_data_frame. + j = start_date_ytd_row; + for (i in 1:num_temperature_data_frame_rows) { + tmp_data_frame[i,] = temperature_data_frame[j,]; + j = j + 1; + } + # Apppend the appropriate rows from norm_data_frame to tmp_data_frame. + current_iteration = num_temperature_data_frame_rows + 1; + num_iterations = current_iteration + num_norm_data_frame_rows; + j = first_norm_row; + for (i in current_iteration:num_iterations) { + tmp_data_frame[i,] = get_next_normals_row(norm_data_frame, year, j); + j = j + 1; + } + temperature_data_frame = tmp_data_frame[,]; + } else if (start_date_ytd_row == 0 & end_date_ytd_row == 0) { + # The date interval is contained witin input_norm. + temperature_data_frame = from_30_year_normals(norm_data_frame, start_date_doy, end_date_doy, year); + } + } else { + # We're plotting an entire year. + if (start_doy_ytd > 1) { + # The input_ytd data starts after Jan 1, so prepend + # appropriate rows from input_norm to temperature_data_frame. + prepend_end_doy_norm = start_doy_ytd - 1; + last_norm_row = which(norm_data_frame$DOY == prepend_end_doy_norm); + # Create a temporary data frame to contain the input_norm data + # from Jan 1 to the date immediately prior to start_date. + tmp_data_frame = temperature_data_frame[FALSE,]; + # Populate tmp_data_frame with appropriate rows from norm_data_frame. + for (i in 1:last_norm_row) { + tmp_data_frame[i,] = get_next_normals_row(norm_data_frame, year, i); + } + # Merge the temporary data frame with temperature_data_frame. + temperature_data_frame = rbind(tmp_data_frame, temperature_data_frame); + } + # Set the value of total_days. + total_days = get_total_days(is_leap_year); + if (end_doy_ytd < total_days) { + # Define the next row for the year-to-date data from the 30 year normals data. + append_start_doy_norm = end_doy_ytd + 1; + first_norm_row = which(norm_data_frame$DOY == append_start_doy_norm); + # Append the 30 year normals data to the year-to-date data. + for (i in first_norm_row:total_days) { + temperature_data_frame[i,] = get_next_normals_row(norm_data_frame, year, i); + } + } } } else { - # Merge the year-to-date data with the 30 year normals data. - if (start_doy_ytd > 1) { - # The year-to-date data starts after Jan 1, so create a - # temporary data frame to contain the 30 year normals data - # from Jan 1 to the date immediately prior to start_date. - tmp_data_frame = temperature_data_frame[FALSE,]; - for (i in 1:start_doy_ytd-1) { - tmp_data_frame[i,] = get_next_normals_row(norm_data_frame, year, is_leap_year, i); + # We're processing only the 30 year normals data. + if (date_interval) { + # Populate temperature_data_frame from norm_data_frame. + temperature_data_frame = from_30_year_normals(norm_data_frame, start_date_doy, end_date_doy, year); + } else { + total_days = get_total_days(is_leap_year); + for (i in 1:total_days) { + temperature_data_frame[i,] = get_next_normals_row(norm_data_frame, year, i); } - # Next merge the temporary data frame with the year-to-date data frame. - temperature_data_frame = rbind(tmp_data_frame, temperature_data_frame); - } - # Define the next row for the year-to-date data from the 30 year normals data. - first_normals_append_row = end_doy_ytd + 1; - # Append the 30 year normals data to the year-to-date data. - for (i in first_normals_append_row:total_days) { - temperature_data_frame[i,] = get_next_normals_row(norm_data_frame, year, is_leap_year, i); } } # Add a column containing the daylight length for each day. - temperature_data_frame = add_daylight_length(temperature_data_frame, total_days); - return(list(temperature_data_frame, start_date, end_date, start_doy_ytd, end_doy_ytd, is_leap_year, total_days, location)); + temperature_data_frame = add_daylight_length(temperature_data_frame); + return(list(temperature_data_frame, start_date, end_date, prepend_end_doy_norm, append_start_doy_norm, is_leap_year, location)); } render_chart = function(ticks, date_labels, chart_type, plot_std_error, insect, location, latitude, start_date, end_date, days, maxval, @@ -519,28 +805,85 @@ } } +stop_err = function(msg) { + cat(msg, file=stderr()); + quit(save="no", status=1); +} + +validate_date = function(date_str) { + valid_date = as.Date(date_str, format="%Y-%m-%d"); + if( class(valid_date)=="try-error" || is.na(valid_date)) { + msg = paste("Invalid date: ", date_str, ", valid date format is yyyy-mm-dd.", sep=""); + stop_err(msg); + } + return(valid_date); +} + +if (is.null(opt$input_ytd)) { + processing_year_to_date_data = FALSE; +} else { + processing_year_to_date_data = TRUE; +} # Determine if we're plotting generations separately. if (opt$plot_generations_separately=="yes") { plot_generations_separately = TRUE; } else { plot_generations_separately = FALSE; } -# Display the total number of days in the Galaxy history item blurb. -cat("Year-to-date number of days: ", opt$num_days_ytd, "\n"); - # Parse the inputs. -data_list = parse_input_data(opt$input_ytd, opt$input_norm, opt$num_days_ytd, opt$location); +data_list = parse_input_data(opt$input_ytd, opt$input_norm, opt$location, opt$start_date, opt$end_date); temperature_data_frame = data_list[[1]]; -# Information needed for plots. +# Information needed for plots, some of these values are +# being reset here since in some case they were set above. start_date = data_list[[2]]; end_date = data_list[[3]]; -start_doy_ytd = data_list[[4]]; -end_doy_ytd = data_list[[5]]; +prepend_end_doy_norm = data_list[[4]]; +append_start_doy_norm = data_list[[5]]; is_leap_year = data_list[[6]]; -total_days = data_list[[7]]; -total_days_vector = c(1:total_days); -location = data_list[[8]]; +location = data_list[[7]]; +if (is.null(opt$start_date) && is.null(opt$end_date)) { + # We're plotting an entire year. + date_interval = FALSE; + # Display the total number of days in the Galaxy history item blurb. + if (processing_year_to_date_data) { + cat("Number of days year-to-date: ", opt$num_days_ytd, "\n"); + } else { + if (is_leap_year) { + num_days = 366; + } else { + num_days = 365; + } + cat("Number of days in year: ", num_days, "\n"); + } +} else { + # FIXME: currently custom date fields are free text, but + # Galaxy should soon include support for a date selector + # at which point this tool should be enhanced to use it. + # Validate start_date. + date_interval = TRUE; + # Calaculate the number of days in the date interval rather + # than using the number of rows in the input temperature data. + start_date = validate_date(opt$start_date); + # Validate end_date. + end_date = validate_date(opt$end_date); + if (start_date >= end_date) { + stop_err("The start date must be between 1 and 50 days before the end date when setting date intervals for plots."); + } + # Calculate the number of days in the date interval. + num_days = difftime(end_date, start_date, units=c("days")); + # Add 1 to the number of days to make the dates inclusive. For + # example, if the user enters a date range of 2018-01-01 to + # 2018-01-31, they likely expect the end date to be included. + num_days = num_days + 1; + if (num_days > 50) { + # We need to restrict date intervals since + # plots render tick marks for each day. + stop_err("Date intervals for plotting cannot exceed 50 days."); + } + # Display the total number of days in the Galaxy history item blurb. + cat("Number of days in date interval: ", num_days, "\n"); +} # Create copies of the temperature data for generations P, F1 and F2 if we're plotting generations separately. if (plot_generations_separately) { temperature_data_frame_P = data.frame(temperature_data_frame); @@ -549,7 +892,7 @@ } # Get the ticks date labels for plots. -ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, total_days, start_doy_ytd, end_doy_ytd); +ticks_and_labels = get_x_axis_ticks_and_labels(temperature_data_frame, prepend_end_doy_norm, append_start_doy_norm, date_interval); ticks = c(unlist(ticks_and_labels[1])); date_labels = c(unlist(ticks_and_labels[2])); # All latitude values are the same, so get the value for plots from the first row. @@ -615,6 +958,7 @@ } } # Initialize matrices. +total_days = dim(temperature_data_frame)[1]; if (process_eggs) { Eggs.replications = matrix(rep(0, total_days*opt$replications), ncol=opt$replications); } @@ -778,7 +1122,7 @@ doy = temperature_data_frame$DOY[row]; # Photoperiod in the day. photoperiod = temperature_data_frame$DAYLEN[row]; - temp.profile = get_temperature_at_hour(latitude, temperature_data_frame, row, total_days); + temp.profile = get_temperature_at_hour(latitude, temperature_data_frame, row); mean.temp = temp.profile[1]; averages.temp = temp.profile[2]; averages.day[row] = averages.temp; @@ -1459,6 +1803,7 @@ write.csv(temperature_data_frame_F2, file=file_path, row.names=F); } +total_days_vector = c(1:dim(temperature_data_frame)[1]); if (plot_generations_separately) { for (life_stage in life_stages) { if (life_stage == "Egg") {