changeset 6:eac57ce5ca9c draft default tip

planemo upload for repository https://github.com/jaidevjoshi83/gaiac commit e62320eeba2e8bae9b1965507caa8faf027f6eaf-dirty
author jay
date Wed, 28 Jan 2026 14:47:37 +0000
parents 23562132641e
children
files gaiac_time_sync/gaiac_time_sync.py gaiac_time_sync/test-data/out.tsv gaiac_time_sync/test_output.csv gaiac_time_sync/test_output_3files.csv
diffstat 4 files changed, 7 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/gaiac_time_sync/gaiac_time_sync.py	Wed Jan 28 14:31:37 2026 +0000
+++ b/gaiac_time_sync/gaiac_time_sync.py	Wed Jan 28 14:47:37 2026 +0000
@@ -33,8 +33,7 @@
         if use_index:
             # Parse dates using index
             df = pd.read_csv(file, sep=sep, parse_dates=[col_idx])
-            # Normalize the column name to specific identifier for merging
-            # This handles cases where different files describe the date column with different headers
+
             original_col_name = df.columns[col_idx]
             merge_col = "__common_timestamp__"
             df.rename(columns={original_col_name: merge_col}, inplace=True)
@@ -48,76 +47,32 @@
         print("No valid dataframes loaded.")
         return
 
-    # Get common timestamps by successive inner merges
     common_times = dfs[0][[merge_col]]
     for df in dfs[1:]:
         common_times = common_times.merge(df[[merge_col]], on=merge_col, how='inner')
 
-    # Now filter each dataframe to contain only common timestamps
     aligned_dfs = [
         df[df[merge_col].isin(common_times[merge_col])].reset_index(drop=True)
         for df in dfs
     ]
 
-    # After filtering, if we used a placeholder name, we might want to restore original names?
-    # Or keep it universal.
-    # The requirement says "returns files with time synchronized data".
-    # If output_mode='multiple', we dump them back. 
-    # If we renamed the timestamp column to '__common_timestamp__', it will appear as such in output.
-    # User might prefer the original name.
-    # But if input files had DIFFERENT names for that column, which one should we use?
-    # Let's simple restore it to "date_time" (user input) or something generic if it was index.
     
-    # Actually, simplest is to rename it back to "Date_Time" or similar if we changed it.
     if use_index and output_mode == 'multiple':
         for df in aligned_dfs:
             df.rename(columns={merge_col: "Date_Time"}, inplace=True)
-        # Update merge_col to new name so single mode merging works if triggered
+
         merge_col = "Date_Time" 
 
     # Output files
-    if output_mode == 'multiple':
-        for i, (file, df) in enumerate(zip(file_list, aligned_dfs)):
-            # Clean filename logic (handle paths)
-            filename = os.path.splitext(os.path.basename(file.strip()))[0]
-            # output is just a prefix-ish or single file arg? 
-            # In XML, -o $out. But $out is a single file path in Galaxy typically unless discover_datasets used.
-            # Wait, XML output is: <data name='out' ... />
-            # If output_mode is 'multiple', the script generates multiple files?
-            # XML says one output 'out'. 
-            # The script default is 'single' in argparse, but XML doesn't set mode!
-            # XML command: python ... -o $out
-            # XML inputs don't allow selecting mode.
-            # Python script default mode is 'single'.
-            
-            # So output_mode is likely 'single'.
-            pass
-
-    # Re-eval python default arguments:
-    # parser.add_argument('-m', '--mode', default='single', ...)
-    # XML doesn't pass -m. So it uses 'single'.
-    # So we merge into one file.
-
     if output_mode == 'single':
         # Merge all aligned dataframes
         merged_df = aligned_dfs[0]
-        # Rename back if needed? 
         if use_index:
              merged_df.rename(columns={merge_col: "Date_Time"}, inplace=True)
              merge_col = "Date_Time"
 
         for i, df in enumerate(aligned_dfs[1:]):
-             # When merging 'single', we end up with wide format?
-             # Or just inner join?
-             # Original code:
-             # merged_df = merged_df.merge(df, on=date_time, how='inner')
              
-             # If we merge, we need suffixes if other columns have same names!
-             # Original code didn't specify suffixes, so pandas defaults _x, _y.
-             # With >2 files, it gets messy (_x, _y, _x, _y...)
-             # But let's keep original logic for suffixes.
-             
-             # If we used index, the column is named 'merge_col' in 'df' too.
              if use_index:
                  df.rename(columns={'__common_timestamp__': merge_col}, inplace=True)
                  
@@ -126,19 +81,12 @@
         merged_df.to_csv(output, index=False, sep=sep)
         print("Single merged file saved.")
         
-    elif output_mode == 'multiple': # Original logic for multiple
-         # ...
-         # The original code's "multiple" block was slightly broken or unused by Galaxy XML
-         # because Galaxy XML expects specific output file or discovery.
-         # But I digress, I just need to fix the Date parsing error.
+    elif output_mode == 'multiple':
          
          for i, (file, df) in enumerate(zip(file_list, aligned_dfs)):
             filename = os.path.splitext(os.path.basename(file.strip()))[0]
-            # output is passed as full path 'out.tsv' probably.
-            # If default output was 'aligned', it tries 'aligned_filename.csv'
-            # Here 'output' arg is likely a file path from Galaxy.
             output_prefix = os.path.splitext(output)[0]
-            output_file = f"{output_prefix}_{filename}.csv"
+            output_file = f"{output_prefix}_{filename}.tsv"
             df.to_csv(output_file, index=False, sep=sep)
          print("Aligned files saved individually.")
     else:
@@ -158,7 +106,7 @@
     parser.add_argument(
         '-s', '--sep',
         default='\t',
-        help='Separator used in the input files (default: ,)'
+        help='Separator used in the input files (default: tab)'
     )
 
     parser.add_argument(
@@ -170,7 +118,7 @@
 
     parser.add_argument(
         '-o', '--output',
-        default='aligned',
+        default='aligned.tsv',
         help="Output filename"
     )
 
--- a/gaiac_time_sync/test-data/out.tsv	Wed Jan 28 14:31:37 2026 +0000
+++ b/gaiac_time_sync/test-data/out.tsv	Wed Jan 28 14:47:37 2026 +0000
@@ -1,4 +1,4 @@
-date_time	Temparature1	Humidity1	Temparature2	Humidity2	Temparature3	humidity3
+Date_Time	Temparature1	Humidity1	Temparature2	Humidity2	Temparature3	humidity3
 2019-06-07 13:28:00	39.0	50.471	39.0	50.471	38	53.0
 2019-06-07 13:29:00	39.0	51.0	39.0	50.471	38	52.824
 2019-06-07 13:31:00	38.9	50.0	40.0	49.0	38	51.944
--- a/gaiac_time_sync/test_output.csv	Wed Jan 28 14:31:37 2026 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-Date_Time	Temparature1	Humidity1	Temparature2	Humidity2
-2019-06-07 13:28:00	39.0	50.471	39.0	50.471
-2019-06-07 13:29:00	39.0	51.0	39.0	50.471
-2019-06-07 13:30:00	39.588	49.647	39.588	49.647
-2019-06-07 13:31:00	38.9	50.0	40.0	49.0
--- a/gaiac_time_sync/test_output_3files.csv	Wed Jan 28 14:31:37 2026 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-Date_Time	Temparature1	Humidity1	Temparature2	Humidity2	Temparature3	humidity3
-2019-06-07 13:28:00	39.0	50.471	39.0	50.471	38	53.0
-2019-06-07 13:29:00	39.0	51.0	39.0	50.471	38	52.824
-2019-06-07 13:31:00	38.9	50.0	40.0	49.0	38	51.944