Mercurial > repos > jay > gaiac_precision
diff gaiac_time_sync/gaiac_time_sync.py @ 0:287d6cc86582 draft
planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
author | jay |
---|---|
date | Thu, 15 May 2025 14:45:45 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gaiac_time_sync/gaiac_time_sync.py Thu May 15 14:45:45 2025 +0000 @@ -0,0 +1,91 @@ +import pandas as pd +import os +import argparse + +def align_sensor_data(file_list, date_time, sep=',', output_mode='multiple', output='aligned.tsv'): + + file_list = file_list.split(',') + if len(file_list) < 2: + print("Please provide at least two files.") + return + + # Read all files into a list of dataframes + dfs = [pd.read_csv(file, sep=sep, parse_dates=[date_time]) for file in file_list] + + # Get common timestamps by successive inner merges + common_times = dfs[0][[date_time]] + for df in dfs[1:]: + common_times = common_times.merge(df[[date_time]], on=date_time, how='inner') + + # Now filter each dataframe to contain only common timestamps + aligned_dfs = [ + df[df[date_time].isin(common_times[date_time])].reset_index(drop=True) + for df in dfs + ] + + # Output files + if output_mode == 'multiple': + for i, (file, df) in enumerate(zip(file_list, aligned_dfs)): + filename = os.path.splitext(os.path.basename(file))[0] + output_file = f"{output_prefix}_{filename}.csv" + df.to_csv(output_file, index=False, sep=sep) + print("Aligned files saved individually.") + elif output_mode == 'single': + # Merge all aligned dataframes on date_time + merged_df = aligned_dfs[0] + for df in aligned_dfs[1:]: + merged_df = merged_df.merge(df, on=date_time, how='inner') + + + merged_df.to_csv(output, index=False, sep=sep) + print("Single merged file saved.") + else: + print("Invalid output mode. Use 'multiple' or 'single'.") + + +def main(): + parser = argparse.ArgumentParser(description="Align sensor data files on common timestamps.") + + parser.add_argument( + '-f', '--files', + required=True, + help='List of input file paths (at least two)' + ) + + parser.add_argument( + '-s', '--sep', + default='\t', + help='Separator used in the input files (default: ,)' + ) + + parser.add_argument( + '-m', '--mode', + choices=['multiple', 'single'], + default='single', + help="Output mode: 'multiple' for individual files, 'single' for one merged file (default: multiple)" + ) + + parser.add_argument( + '-o', '--output', + default='aligned', + help="Output filename" + ) + + parser.add_argument( + '-t', '--date_time_column', + default='date_time', + help="Provide the name of the date and time column." + ) + + args = parser.parse_args() + + align_sensor_data( + file_list=args.files, + date_time=args.date_time_column, + sep=args.sep, + output_mode=args.mode, + output=args.output + ) + +if __name__ == '__main__': + main()