Mercurial > repos > jay > gaiac_box_plot
comparison gaiac_time_sync/gaiac_time_sync.py @ 0:0763bb545f98 draft
planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
author | jay |
---|---|
date | Thu, 15 May 2025 14:44:13 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0763bb545f98 |
---|---|
1 import pandas as pd | |
2 import os | |
3 import argparse | |
4 | |
5 def align_sensor_data(file_list, date_time, sep=',', output_mode='multiple', output='aligned.tsv'): | |
6 | |
7 file_list = file_list.split(',') | |
8 if len(file_list) < 2: | |
9 print("Please provide at least two files.") | |
10 return | |
11 | |
12 # Read all files into a list of dataframes | |
13 dfs = [pd.read_csv(file, sep=sep, parse_dates=[date_time]) for file in file_list] | |
14 | |
15 # Get common timestamps by successive inner merges | |
16 common_times = dfs[0][[date_time]] | |
17 for df in dfs[1:]: | |
18 common_times = common_times.merge(df[[date_time]], on=date_time, how='inner') | |
19 | |
20 # Now filter each dataframe to contain only common timestamps | |
21 aligned_dfs = [ | |
22 df[df[date_time].isin(common_times[date_time])].reset_index(drop=True) | |
23 for df in dfs | |
24 ] | |
25 | |
26 # Output files | |
27 if output_mode == 'multiple': | |
28 for i, (file, df) in enumerate(zip(file_list, aligned_dfs)): | |
29 filename = os.path.splitext(os.path.basename(file))[0] | |
30 output_file = f"{output_prefix}_{filename}.csv" | |
31 df.to_csv(output_file, index=False, sep=sep) | |
32 print("Aligned files saved individually.") | |
33 elif output_mode == 'single': | |
34 # Merge all aligned dataframes on date_time | |
35 merged_df = aligned_dfs[0] | |
36 for df in aligned_dfs[1:]: | |
37 merged_df = merged_df.merge(df, on=date_time, how='inner') | |
38 | |
39 | |
40 merged_df.to_csv(output, index=False, sep=sep) | |
41 print("Single merged file saved.") | |
42 else: | |
43 print("Invalid output mode. Use 'multiple' or 'single'.") | |
44 | |
45 | |
46 def main(): | |
47 parser = argparse.ArgumentParser(description="Align sensor data files on common timestamps.") | |
48 | |
49 parser.add_argument( | |
50 '-f', '--files', | |
51 required=True, | |
52 help='List of input file paths (at least two)' | |
53 ) | |
54 | |
55 parser.add_argument( | |
56 '-s', '--sep', | |
57 default='\t', | |
58 help='Separator used in the input files (default: ,)' | |
59 ) | |
60 | |
61 parser.add_argument( | |
62 '-m', '--mode', | |
63 choices=['multiple', 'single'], | |
64 default='single', | |
65 help="Output mode: 'multiple' for individual files, 'single' for one merged file (default: multiple)" | |
66 ) | |
67 | |
68 parser.add_argument( | |
69 '-o', '--output', | |
70 default='aligned', | |
71 help="Output filename" | |
72 ) | |
73 | |
74 parser.add_argument( | |
75 '-t', '--date_time_column', | |
76 default='date_time', | |
77 help="Provide the name of the date and time column." | |
78 ) | |
79 | |
80 args = parser.parse_args() | |
81 | |
82 align_sensor_data( | |
83 file_list=args.files, | |
84 date_time=args.date_time_column, | |
85 sep=args.sep, | |
86 output_mode=args.mode, | |
87 output=args.output | |
88 ) | |
89 | |
90 if __name__ == '__main__': | |
91 main() |