annotate gaiac_time_sync/gaiac_time_sync.py @ 3:9de2b10ed246 draft default tip

planemo upload for repository https://github.com/jaidevjoshi83/gaiac commit e9587f93346c7b55e1be00bad5844bf2db3ed03d-dirty
author jay
date Thu, 10 Jul 2025 19:40:38 +0000
parents abfc2c9779d6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
1 import pandas as pd
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
2 import os
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
3 import argparse
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
4
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
5 def align_sensor_data(file_list, date_time, sep=',', output_mode='multiple', output='aligned.tsv'):
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
6
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
7 file_list = file_list.split(',')
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
8 if len(file_list) < 2:
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
9 print("Please provide at least two files.")
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
10 return
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
11
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
12 # Read all files into a list of dataframes
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
13 dfs = [pd.read_csv(file, sep=sep, parse_dates=[date_time]) for file in file_list]
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
14
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
15 # Get common timestamps by successive inner merges
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
16 common_times = dfs[0][[date_time]]
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
17 for df in dfs[1:]:
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
18 common_times = common_times.merge(df[[date_time]], on=date_time, how='inner')
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
19
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
20 # Now filter each dataframe to contain only common timestamps
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
21 aligned_dfs = [
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
22 df[df[date_time].isin(common_times[date_time])].reset_index(drop=True)
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
23 for df in dfs
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
24 ]
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
25
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
26 # Output files
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
27 if output_mode == 'multiple':
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
28 for i, (file, df) in enumerate(zip(file_list, aligned_dfs)):
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
29 filename = os.path.splitext(os.path.basename(file))[0]
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
30 output_file = f"{output_prefix}_{filename}.csv"
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
31 df.to_csv(output_file, index=False, sep=sep)
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
32 print("Aligned files saved individually.")
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
33 elif output_mode == 'single':
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
34 # Merge all aligned dataframes on date_time
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
35 merged_df = aligned_dfs[0]
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
36 for df in aligned_dfs[1:]:
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
37 merged_df = merged_df.merge(df, on=date_time, how='inner')
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
38
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
39
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
40 merged_df.to_csv(output, index=False, sep=sep)
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
41 print("Single merged file saved.")
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
42 else:
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
43 print("Invalid output mode. Use 'multiple' or 'single'.")
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
44
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
45
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
46 def main():
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
47 parser = argparse.ArgumentParser(description="Align sensor data files on common timestamps.")
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
48
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
49 parser.add_argument(
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
50 '-f', '--files',
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
51 required=True,
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
52 help='List of input file paths (at least two)'
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
53 )
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
54
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
55 parser.add_argument(
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
56 '-s', '--sep',
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
57 default='\t',
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
58 help='Separator used in the input files (default: ,)'
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
59 )
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
60
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
61 parser.add_argument(
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
62 '-m', '--mode',
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
63 choices=['multiple', 'single'],
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
64 default='single',
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
65 help="Output mode: 'multiple' for individual files, 'single' for one merged file (default: multiple)"
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
66 )
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
67
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
68 parser.add_argument(
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
69 '-o', '--output',
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
70 default='aligned',
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
71 help="Output filename"
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
72 )
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
73
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
74 parser.add_argument(
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
75 '-t', '--date_time_column',
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
76 default='date_time',
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
77 help="Provide the name of the date and time column."
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
78 )
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
79
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
80 args = parser.parse_args()
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
81
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
82 align_sensor_data(
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
83 file_list=args.files,
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
84 date_time=args.date_time_column,
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
85 sep=args.sep,
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
86 output_mode=args.mode,
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
87 output=args.output
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
88 )
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
89
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
90 if __name__ == '__main__':
abfc2c9779d6 planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
jay
parents:
diff changeset
91 main()