Mercurial > repos > jay > gaiac_regression_plot
changeset 5:0b581fb9fc70 draft
planemo upload for repository https://github.com/jaidevjoshi83/gaiac commit d93e649fdfa6940af167c34c2d6de23f231e2f1b-dirty
| author | jay |
|---|---|
| date | Wed, 28 Jan 2026 14:33:14 +0000 |
| parents | 72b3a1ec495b |
| children | 2502e3ee6397 |
| files | gaiac_time_sync/gaiac_time_sync.py gaiac_time_sync/test_output.csv gaiac_time_sync/test_output_3files.csv |
| diffstat | 3 files changed, 89 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/gaiac_time_sync/gaiac_time_sync.py Tue Jan 27 03:08:31 2026 +0000 +++ b/gaiac_time_sync/gaiac_time_sync.py Wed Jan 28 14:33:14 2026 +0000 @@ -4,43 +4,103 @@ def align_sensor_data(file_list, date_time, sep=',', output_mode='multiple', output='aligned.tsv'): - file_list = file_list.split(',') + if isinstance(file_list, str): + file_list = file_list.split(',') + if len(file_list) < 2: print("Please provide at least two files.") return - # Read all files into a list of dataframes - dfs = [pd.read_csv(file, sep=sep, parse_dates=[date_time]) for file in file_list] + # Check if date_time is numeric (column index) or string (column name) + use_index = False + try: + # User input '1' likely means 1st column (index 0) + col_idx = int(date_time) - 1 + if col_idx < 0: + raise ValueError("Column index must be >= 1") + use_index = True + print(f"Using column index {col_idx} (from input '{date_time}')") + except ValueError: + # Not an integer, treat as column name + merge_col = date_time + print(f"Using column name '{merge_col}'") - # Get common timestamps by successive inner merges - common_times = dfs[0][[date_time]] + dfs = [] + for file in file_list: + file = file.strip() # clean whitespace + if not file: continue + + if use_index: + # Parse dates using index + df = pd.read_csv(file, sep=sep, parse_dates=[col_idx]) + + original_col_name = df.columns[col_idx] + merge_col = "__common_timestamp__" + df.rename(columns={original_col_name: merge_col}, inplace=True) + else: + # Parse dates using name + df = pd.read_csv(file, sep=sep, parse_dates=[date_time]) + + dfs.append(df) + + if not dfs: + print("No valid dataframes loaded.") + return + + common_times = dfs[0][[merge_col]] for df in dfs[1:]: - common_times = common_times.merge(df[[date_time]], on=date_time, how='inner') + common_times = common_times.merge(df[[merge_col]], on=merge_col, how='inner') - # Now filter each dataframe to contain only common timestamps aligned_dfs = [ - df[df[date_time].isin(common_times[date_time])].reset_index(drop=True) + df[df[merge_col].isin(common_times[merge_col])].reset_index(drop=True) for df in dfs ] + + if use_index and output_mode == 'multiple': + for df in aligned_dfs: + df.rename(columns={merge_col: "Date_Time"}, inplace=True) + # Update merge_col to new name so single mode merging works if triggered + merge_col = "Date_Time" + # Output files if output_mode == 'multiple': for i, (file, df) in enumerate(zip(file_list, aligned_dfs)): - filename = os.path.splitext(os.path.basename(file))[0] + # Clean filename logic (handle paths) + filename = os.path.splitext(os.path.basename(file.strip()))[0] + + pass + + + if output_mode == 'single': + # Merge all aligned dataframes + merged_df = aligned_dfs[0] + # Rename back if needed? + if use_index: + merged_df.rename(columns={merge_col: "Date_Time"}, inplace=True) + merge_col = "Date_Time" + + for i, df in enumerate(aligned_dfs[1:]): + + if use_index: + df.rename(columns={'__common_timestamp__': merge_col}, inplace=True) + + merged_df = merged_df.merge(df, on=merge_col, how='inner') + + merged_df.to_csv(output, index=False, sep=sep) + print("Single merged file saved.") + + elif output_mode == 'multiple': # Original logic for multiple + + for i, (file, df) in enumerate(zip(file_list, aligned_dfs)): + filename = os.path.splitext(os.path.basename(file.strip()))[0] + output_prefix = os.path.splitext(output)[0] output_file = f"{output_prefix}_{filename}.csv" df.to_csv(output_file, index=False, sep=sep) - print("Aligned files saved individually.") - elif output_mode == 'single': - # Merge all aligned dataframes on date_time - merged_df = aligned_dfs[0] - for df in aligned_dfs[1:]: - merged_df = merged_df.merge(df, on=date_time, how='inner') + print("Aligned files saved individually.") + else: + print("Invalid output mode.") - - merged_df.to_csv(output, index=False, sep=sep) - print("Single merged file saved.") - else: - print("Invalid output mode. Use 'multiple' or 'single'.") def main():
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gaiac_time_sync/test_output.csv Wed Jan 28 14:33:14 2026 +0000 @@ -0,0 +1,5 @@ +Date_Time Temparature1 Humidity1 Temparature2 Humidity2 +2019-06-07 13:28:00 39.0 50.471 39.0 50.471 +2019-06-07 13:29:00 39.0 51.0 39.0 50.471 +2019-06-07 13:30:00 39.588 49.647 39.588 49.647 +2019-06-07 13:31:00 38.9 50.0 40.0 49.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gaiac_time_sync/test_output_3files.csv Wed Jan 28 14:33:14 2026 +0000 @@ -0,0 +1,4 @@ +Date_Time Temparature1 Humidity1 Temparature2 Humidity2 Temparature3 humidity3 +2019-06-07 13:28:00 39.0 50.471 39.0 50.471 38 53.0 +2019-06-07 13:29:00 39.0 51.0 39.0 50.471 38 52.824 +2019-06-07 13:31:00 38.9 50.0 40.0 49.0 38 51.944
