changeset 339:1fd4e4e93c85 draft

Uploaded
author luca_milaz
date Thu, 04 Sep 2025 12:40:01 +0000
parents 111dbc8e0415
children da4f5f4a9046
files COBRAxy/utils/general_utils.py
diffstat 1 files changed, 57 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/COBRAxy/utils/general_utils.py	Thu Sep 04 12:26:56 2025 +0000
+++ b/COBRAxy/utils/general_utils.py	Thu Sep 04 12:40:01 2025 +0000
@@ -17,48 +17,57 @@
 import bz2
 from io import StringIO
 
-# FILES
+class ValueErr(Exception):
+    def __init__(self, param_name, expected, actual):
+        super().__init__(f"Invalid value for {param_name}: expected {expected}, got {actual}")
+
+class PathErr(Exception):
+    def __init__(self, path, message):
+        super().__init__(f"Path error for '{path}': {message}")
+
 class FileFormat(Enum):
     """
     Encodes possible file extensions to conditionally save data in a different format.
     """
     DAT    = ("dat",) # this is how galaxy treats all your files!
     CSV    = ("csv",) # this is how most editable input data is written
-    TSV    = ("tsv",) # this is how most editable input data is ACTUALLY written
-    
+    TSV    = ("tsv",) # this is how most editable input data is ACTUALLY written TODO:more support pls!!
     SVG    = ("svg",) # this is how most metabolic maps are written
     PNG    = ("png",) # this is a common output format for images (such as metabolic maps)
     PDF    = ("pdf",) # this is also a common output format for images, as it's required in publications.
-
-    XML    = ("xml","xml.gz", "xml.zip", "xml.bz2") # SBML files are XML files, sometimes compressed
-    JSON   = ("json","json.gz", "json.zip", "json.bz2") # COBRA models can be stored as JSON files, sometimes compressed
-
+    
+    # Updated to include compressed variants
+    XML    = ("xml", "xml.gz", "xml.zip", "xml.bz2") # SBML files are XML files, sometimes compressed
+    JSON   = ("json", "json.gz", "json.zip", "json.bz2") # COBRA models can be stored as JSON files, sometimes compressed
+    
     TXT = ("txt",) # this is how most output data is written
-    
     PICKLE = ("pickle", "pk", "p") # this is how all runtime data structures are saved
 
-   
+    def __init__(self, *extensions):
+        self.extensions = extensions
+        # Store original extension when set via fromExt
+        self._original_extension = None
+
     @classmethod
-    def fromExt(cls, ext :str) -> "FileFormat":
+    def fromExt(cls, ext: str) -> "FileFormat":
         """
         Converts a file extension string to a FileFormat instance.
-
         Args:
             ext : The file extension as a string.
-
         Returns:
             FileFormat: The FileFormat instance corresponding to the file extension.
         """
         variantName = ext.upper()
         if variantName in FileFormat.__members__: 
             instance = FileFormat[variantName]
-            instance.original_extension = ext
+            instance._original_extension = ext
             return instance
         
-        variantName = variantName.lower()
+        variantName = ext.lower()
         for member in cls:
             if variantName in member.value: 
-                member.original_extension = ext
+                # Create a copy-like behavior by storing the original extension
+                member._original_extension = ext
                 return member
         
         raise ValueErr("ext", "a valid FileFormat file extension", ext)
@@ -66,62 +75,51 @@
     def __str__(self) -> str:
         """
         (Private) converts to str representation. Good practice for usage with argparse.
-
         Returns:
             str : the string representation of the file extension.
         """
-
-        if(self.value[0] in  ["json", "xml"]): #return the original string extension for compressed files
-            return self.original_extension
-        else:
-            return self.value[-1] # for all other formats and pickle
+        # If we have an original extension stored (for compressed files), use it
+        if hasattr(self, '_original_extension') and self._original_extension:
+            return self._original_extension
+        
+        # TODO: fix, it's the dumb pickle thing keep this behaviour if we are not dealing with XML or JSON
+        return self.value[-1]
 
 class FilePath():
     """
     Represents a file path. View this as an attempt to standardize file-related operations by expecting
     values of this type in any process requesting a file path.
     """
-    def __init__(self, filePath :str, ext :FileFormat, *, prefix = "") -> None:
+    def __init__(self, filePath: str, ext: FileFormat, *, prefix="") -> None:
         """
         (Private) Initializes an instance of FilePath.
-
         Args:
             path : the end of the path, containing the file name.
             ext : the file's extension.
             prefix : anything before path, if the last '/' isn't there it's added by the code.
-        
         Returns:
             None : practically, a FilePath instance.
         """
-        self.ext      = ext
+        self.ext = ext
         self.filePath = filePath
 
-        if prefix and prefix[-1] != '/': prefix += '/'
+        if prefix and prefix[-1] != '/': 
+            prefix += '/'
         self.prefix = prefix
     
     @classmethod
-    def fromStrPath(cls, path :str) -> "FilePath":
+    def fromStrPath(cls, path: str) -> "FilePath":
         """
         Factory method to parse a string from which to obtain, if possible, a valid FilePath instance.
         It detects double extensions such as .json.gz and .xml.bz2, which are common in COBRA models.
         These double extensions are not supported for other file types such as .csv.
-
         Args:
             path : the string containing the path
-        
         Raises:
             PathErr : if the provided string doesn't represent a valid path.
-        
         Returns:
             FilePath : the constructed instance.
         """
-        # This method is often used to construct FilePath instances from ARGS UI arguments. These arguments *should*
-        # always be correct paths and could be used as raw strings, however most if not all functions that work with
-        # file paths request the FilePath objects specifically, which is a very good thing in any case other than this.
-        # What ends up happening is we spend time parsing a string into a FilePath so that the function accepts it, only
-        # to call show() immediately to bring back the string and open the file it points to.
-        # TODO: this is an indication that the arguments SHOULD BE OF TYPE FilePath if they are filepaths, this ENSURES
-        # their correctness when modifying the UI and avoids the pointless back-and-forth.
         result = re.search(r"^(?P<prefix>.*\/)?(?P<name>.*)\.(?P<ext>[^.]*)$", path)
         if not result or not result["name"] or not result["ext"]:
             raise PathErr(path, "cannot recognize folder structure or extension in path")
@@ -129,27 +127,43 @@
         prefix = result["prefix"] if result["prefix"] else ""
         name, ext = result["name"], result["ext"]
 
-        # Split path into parts
+        # Check for double extensions (json.gz, xml.zip, etc.)
         parts = path.split(".")
         if len(parts) >= 3:  
             penultimate = parts[-2]
             last = parts[-1]
-            if penultimate in {"json", "xml"}:
+            double_ext = f"{penultimate}.{last}"
+            
+            # Try the double extension first
+            try:
+                ext_format = FileFormat.fromExt(double_ext)
                 name = ".".join(parts[:-2])
-                ext = f"{penultimate}.{last}"
+                # Extract prefix if it exists
+                if '/' in name:
+                    prefix = name[:name.rfind('/') + 1]
+                    name = name[name.rfind('/') + 1:]
+                return cls(name, ext_format, prefix=prefix)
+            except ValueErr:
+                # If double extension doesn't work, fall back to single extension
+                pass
 
-        return cls(name, FileFormat.fromExt(ext), prefix=prefix)
+        # Single extension fallback (original logic)
+        try:
+            ext_format = FileFormat.fromExt(ext)
+            return cls(name, ext_format, prefix=prefix)
+        except ValueErr:
+            raise PathErr(path, f"unsupported file extension: {ext}")
 
     def show(self) -> str:
         """
         Shows the path as a string.
-
         Returns:
             str : the path shown as a string.
         """
         return f"{self.prefix}{self.filePath}.{self.ext}"
     
-    def __str__(self) -> str: return self.show()
+    def __str__(self) -> str: 
+        return self.show()
 
 # ERRORS
 def terminate(msg :str) -> None: