changeset 5:0b581fb9fc70 draft

planemo upload for repository https://github.com/jaidevjoshi83/gaiac commit d93e649fdfa6940af167c34c2d6de23f231e2f1b-dirty
author jay
date Wed, 28 Jan 2026 14:33:14 +0000
parents 72b3a1ec495b
children 2502e3ee6397
files gaiac_time_sync/gaiac_time_sync.py gaiac_time_sync/test_output.csv gaiac_time_sync/test_output_3files.csv
diffstat 3 files changed, 89 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/gaiac_time_sync/gaiac_time_sync.py	Tue Jan 27 03:08:31 2026 +0000
+++ b/gaiac_time_sync/gaiac_time_sync.py	Wed Jan 28 14:33:14 2026 +0000
@@ -4,43 +4,103 @@
 
 def align_sensor_data(file_list, date_time, sep=',', output_mode='multiple', output='aligned.tsv'):
     
-    file_list =  file_list.split(',')
+    if isinstance(file_list, str):
+        file_list = file_list.split(',')
+        
     if len(file_list) < 2:
         print("Please provide at least two files.")
         return
 
-    # Read all files into a list of dataframes
-    dfs = [pd.read_csv(file, sep=sep, parse_dates=[date_time]) for file in file_list]
+    # Check if date_time is numeric (column index) or string (column name)
+    use_index = False
+    try:
+        # User input '1' likely means 1st column (index 0)
+        col_idx = int(date_time) - 1 
+        if col_idx < 0:
+            raise ValueError("Column index must be >= 1")
+        use_index = True
+        print(f"Using column index {col_idx} (from input '{date_time}')")
+    except ValueError:
+        # Not an integer, treat as column name
+        merge_col = date_time
+        print(f"Using column name '{merge_col}'")
 
-    # Get common timestamps by successive inner merges
-    common_times = dfs[0][[date_time]]
+    dfs = []
+    for file in file_list:
+        file = file.strip() # clean whitespace
+        if not file: continue
+            
+        if use_index:
+            # Parse dates using index
+            df = pd.read_csv(file, sep=sep, parse_dates=[col_idx])
+
+            original_col_name = df.columns[col_idx]
+            merge_col = "__common_timestamp__"
+            df.rename(columns={original_col_name: merge_col}, inplace=True)
+        else:
+            # Parse dates using name
+            df = pd.read_csv(file, sep=sep, parse_dates=[date_time])
+        
+        dfs.append(df)
+
+    if not dfs:
+        print("No valid dataframes loaded.")
+        return
+
+    common_times = dfs[0][[merge_col]]
     for df in dfs[1:]:
-        common_times = common_times.merge(df[[date_time]], on=date_time, how='inner')
+        common_times = common_times.merge(df[[merge_col]], on=merge_col, how='inner')
 
-    # Now filter each dataframe to contain only common timestamps
     aligned_dfs = [
-        df[df[date_time].isin(common_times[date_time])].reset_index(drop=True)
+        df[df[merge_col].isin(common_times[merge_col])].reset_index(drop=True)
         for df in dfs
     ]
 
+    
+    if use_index and output_mode == 'multiple':
+        for df in aligned_dfs:
+            df.rename(columns={merge_col: "Date_Time"}, inplace=True)
+        # Update merge_col to new name so single mode merging works if triggered
+        merge_col = "Date_Time" 
+
     # Output files
     if output_mode == 'multiple':
         for i, (file, df) in enumerate(zip(file_list, aligned_dfs)):
-            filename = os.path.splitext(os.path.basename(file))[0]
+            # Clean filename logic (handle paths)
+            filename = os.path.splitext(os.path.basename(file.strip()))[0]
+            
+            pass
+
+
+    if output_mode == 'single':
+        # Merge all aligned dataframes
+        merged_df = aligned_dfs[0]
+        # Rename back if needed? 
+        if use_index:
+             merged_df.rename(columns={merge_col: "Date_Time"}, inplace=True)
+             merge_col = "Date_Time"
+
+        for i, df in enumerate(aligned_dfs[1:]):
+             
+             if use_index:
+                 df.rename(columns={'__common_timestamp__': merge_col}, inplace=True)
+                 
+             merged_df = merged_df.merge(df, on=merge_col, how='inner')
+
+        merged_df.to_csv(output, index=False, sep=sep)
+        print("Single merged file saved.")
+        
+    elif output_mode == 'multiple': # Original logic for multiple
+         
+         for i, (file, df) in enumerate(zip(file_list, aligned_dfs)):
+            filename = os.path.splitext(os.path.basename(file.strip()))[0]
+            output_prefix = os.path.splitext(output)[0]
             output_file = f"{output_prefix}_{filename}.csv"
             df.to_csv(output_file, index=False, sep=sep)
-        print("Aligned files saved individually.")
-    elif output_mode == 'single':
-        # Merge all aligned dataframes on date_time
-        merged_df = aligned_dfs[0]
-        for df in aligned_dfs[1:]:
-            merged_df = merged_df.merge(df, on=date_time, how='inner')
+         print("Aligned files saved individually.")
+    else:
+        print("Invalid output mode.")
 
-       
-        merged_df.to_csv(output, index=False, sep=sep)
-        print("Single merged file saved.")
-    else:
-        print("Invalid output mode. Use 'multiple' or 'single'.")
 
 
 def main():
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gaiac_time_sync/test_output.csv	Wed Jan 28 14:33:14 2026 +0000
@@ -0,0 +1,5 @@
+Date_Time	Temparature1	Humidity1	Temparature2	Humidity2
+2019-06-07 13:28:00	39.0	50.471	39.0	50.471
+2019-06-07 13:29:00	39.0	51.0	39.0	50.471
+2019-06-07 13:30:00	39.588	49.647	39.588	49.647
+2019-06-07 13:31:00	38.9	50.0	40.0	49.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gaiac_time_sync/test_output_3files.csv	Wed Jan 28 14:33:14 2026 +0000
@@ -0,0 +1,4 @@
+Date_Time	Temparature1	Humidity1	Temparature2	Humidity2	Temparature3	humidity3
+2019-06-07 13:28:00	39.0	50.471	39.0	50.471	38	53.0
+2019-06-07 13:29:00	39.0	51.0	39.0	50.471	38	52.824
+2019-06-07 13:31:00	38.9	50.0	40.0	49.0	38	51.944