Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/general_utils.py @ 339:1fd4e4e93c85 draft
Uploaded
| author | luca_milaz |
|---|---|
| date | Thu, 04 Sep 2025 12:40:01 +0000 |
| parents | 111dbc8e0415 |
| children | da4f5f4a9046 |
comparison
equal
deleted
inserted
replaced
| 338:111dbc8e0415 | 339:1fd4e4e93c85 |
|---|---|
| 15 import zipfile | 15 import zipfile |
| 16 import gzip | 16 import gzip |
| 17 import bz2 | 17 import bz2 |
| 18 from io import StringIO | 18 from io import StringIO |
| 19 | 19 |
| 20 # FILES | 20 class ValueErr(Exception): |
| 21 def __init__(self, param_name, expected, actual): | |
| 22 super().__init__(f"Invalid value for {param_name}: expected {expected}, got {actual}") | |
| 23 | |
| 24 class PathErr(Exception): | |
| 25 def __init__(self, path, message): | |
| 26 super().__init__(f"Path error for '{path}': {message}") | |
| 27 | |
| 21 class FileFormat(Enum): | 28 class FileFormat(Enum): |
| 22 """ | 29 """ |
| 23 Encodes possible file extensions to conditionally save data in a different format. | 30 Encodes possible file extensions to conditionally save data in a different format. |
| 24 """ | 31 """ |
| 25 DAT = ("dat",) # this is how galaxy treats all your files! | 32 DAT = ("dat",) # this is how galaxy treats all your files! |
| 26 CSV = ("csv",) # this is how most editable input data is written | 33 CSV = ("csv",) # this is how most editable input data is written |
| 27 TSV = ("tsv",) # this is how most editable input data is ACTUALLY written | 34 TSV = ("tsv",) # this is how most editable input data is ACTUALLY written TODO:more support pls!! |
| 28 | |
| 29 SVG = ("svg",) # this is how most metabolic maps are written | 35 SVG = ("svg",) # this is how most metabolic maps are written |
| 30 PNG = ("png",) # this is a common output format for images (such as metabolic maps) | 36 PNG = ("png",) # this is a common output format for images (such as metabolic maps) |
| 31 PDF = ("pdf",) # this is also a common output format for images, as it's required in publications. | 37 PDF = ("pdf",) # this is also a common output format for images, as it's required in publications. |
| 32 | 38 |
| 33 XML = ("xml","xml.gz", "xml.zip", "xml.bz2") # SBML files are XML files, sometimes compressed | 39 # Updated to include compressed variants |
| 34 JSON = ("json","json.gz", "json.zip", "json.bz2") # COBRA models can be stored as JSON files, sometimes compressed | 40 XML = ("xml", "xml.gz", "xml.zip", "xml.bz2") # SBML files are XML files, sometimes compressed |
| 35 | 41 JSON = ("json", "json.gz", "json.zip", "json.bz2") # COBRA models can be stored as JSON files, sometimes compressed |
| 42 | |
| 36 TXT = ("txt",) # this is how most output data is written | 43 TXT = ("txt",) # this is how most output data is written |
| 37 | |
| 38 PICKLE = ("pickle", "pk", "p") # this is how all runtime data structures are saved | 44 PICKLE = ("pickle", "pk", "p") # this is how all runtime data structures are saved |
| 39 | 45 |
| 40 | 46 def __init__(self, *extensions): |
| 47 self.extensions = extensions | |
| 48 # Store original extension when set via fromExt | |
| 49 self._original_extension = None | |
| 50 | |
| 41 @classmethod | 51 @classmethod |
| 42 def fromExt(cls, ext :str) -> "FileFormat": | 52 def fromExt(cls, ext: str) -> "FileFormat": |
| 43 """ | 53 """ |
| 44 Converts a file extension string to a FileFormat instance. | 54 Converts a file extension string to a FileFormat instance. |
| 45 | |
| 46 Args: | 55 Args: |
| 47 ext : The file extension as a string. | 56 ext : The file extension as a string. |
| 48 | |
| 49 Returns: | 57 Returns: |
| 50 FileFormat: The FileFormat instance corresponding to the file extension. | 58 FileFormat: The FileFormat instance corresponding to the file extension. |
| 51 """ | 59 """ |
| 52 variantName = ext.upper() | 60 variantName = ext.upper() |
| 53 if variantName in FileFormat.__members__: | 61 if variantName in FileFormat.__members__: |
| 54 instance = FileFormat[variantName] | 62 instance = FileFormat[variantName] |
| 55 instance.original_extension = ext | 63 instance._original_extension = ext |
| 56 return instance | 64 return instance |
| 57 | 65 |
| 58 variantName = variantName.lower() | 66 variantName = ext.lower() |
| 59 for member in cls: | 67 for member in cls: |
| 60 if variantName in member.value: | 68 if variantName in member.value: |
| 61 member.original_extension = ext | 69 # Create a copy-like behavior by storing the original extension |
| 70 member._original_extension = ext | |
| 62 return member | 71 return member |
| 63 | 72 |
| 64 raise ValueErr("ext", "a valid FileFormat file extension", ext) | 73 raise ValueErr("ext", "a valid FileFormat file extension", ext) |
| 65 | 74 |
| 66 def __str__(self) -> str: | 75 def __str__(self) -> str: |
| 67 """ | 76 """ |
| 68 (Private) converts to str representation. Good practice for usage with argparse. | 77 (Private) converts to str representation. Good practice for usage with argparse. |
| 69 | |
| 70 Returns: | 78 Returns: |
| 71 str : the string representation of the file extension. | 79 str : the string representation of the file extension. |
| 72 """ | 80 """ |
| 73 | 81 # If we have an original extension stored (for compressed files), use it |
| 74 if(self.value[0] in ["json", "xml"]): #return the original string extension for compressed files | 82 if hasattr(self, '_original_extension') and self._original_extension: |
| 75 return self.original_extension | 83 return self._original_extension |
| 76 else: | 84 |
| 77 return self.value[-1] # for all other formats and pickle | 85 # TODO: fix, it's the dumb pickle thing keep this behaviour if we are not dealing with XML or JSON |
| 86 return self.value[-1] | |
| 78 | 87 |
| 79 class FilePath(): | 88 class FilePath(): |
| 80 """ | 89 """ |
| 81 Represents a file path. View this as an attempt to standardize file-related operations by expecting | 90 Represents a file path. View this as an attempt to standardize file-related operations by expecting |
| 82 values of this type in any process requesting a file path. | 91 values of this type in any process requesting a file path. |
| 83 """ | 92 """ |
| 84 def __init__(self, filePath :str, ext :FileFormat, *, prefix = "") -> None: | 93 def __init__(self, filePath: str, ext: FileFormat, *, prefix="") -> None: |
| 85 """ | 94 """ |
| 86 (Private) Initializes an instance of FilePath. | 95 (Private) Initializes an instance of FilePath. |
| 87 | |
| 88 Args: | 96 Args: |
| 89 path : the end of the path, containing the file name. | 97 path : the end of the path, containing the file name. |
| 90 ext : the file's extension. | 98 ext : the file's extension. |
| 91 prefix : anything before path, if the last '/' isn't there it's added by the code. | 99 prefix : anything before path, if the last '/' isn't there it's added by the code. |
| 92 | |
| 93 Returns: | 100 Returns: |
| 94 None : practically, a FilePath instance. | 101 None : practically, a FilePath instance. |
| 95 """ | 102 """ |
| 96 self.ext = ext | 103 self.ext = ext |
| 97 self.filePath = filePath | 104 self.filePath = filePath |
| 98 | 105 |
| 99 if prefix and prefix[-1] != '/': prefix += '/' | 106 if prefix and prefix[-1] != '/': |
| 107 prefix += '/' | |
| 100 self.prefix = prefix | 108 self.prefix = prefix |
| 101 | 109 |
| 102 @classmethod | 110 @classmethod |
| 103 def fromStrPath(cls, path :str) -> "FilePath": | 111 def fromStrPath(cls, path: str) -> "FilePath": |
| 104 """ | 112 """ |
| 105 Factory method to parse a string from which to obtain, if possible, a valid FilePath instance. | 113 Factory method to parse a string from which to obtain, if possible, a valid FilePath instance. |
| 106 It detects double extensions such as .json.gz and .xml.bz2, which are common in COBRA models. | 114 It detects double extensions such as .json.gz and .xml.bz2, which are common in COBRA models. |
| 107 These double extensions are not supported for other file types such as .csv. | 115 These double extensions are not supported for other file types such as .csv. |
| 108 | |
| 109 Args: | 116 Args: |
| 110 path : the string containing the path | 117 path : the string containing the path |
| 111 | |
| 112 Raises: | 118 Raises: |
| 113 PathErr : if the provided string doesn't represent a valid path. | 119 PathErr : if the provided string doesn't represent a valid path. |
| 114 | |
| 115 Returns: | 120 Returns: |
| 116 FilePath : the constructed instance. | 121 FilePath : the constructed instance. |
| 117 """ | 122 """ |
| 118 # This method is often used to construct FilePath instances from ARGS UI arguments. These arguments *should* | |
| 119 # always be correct paths and could be used as raw strings, however most if not all functions that work with | |
| 120 # file paths request the FilePath objects specifically, which is a very good thing in any case other than this. | |
| 121 # What ends up happening is we spend time parsing a string into a FilePath so that the function accepts it, only | |
| 122 # to call show() immediately to bring back the string and open the file it points to. | |
| 123 # TODO: this is an indication that the arguments SHOULD BE OF TYPE FilePath if they are filepaths, this ENSURES | |
| 124 # their correctness when modifying the UI and avoids the pointless back-and-forth. | |
| 125 result = re.search(r"^(?P<prefix>.*\/)?(?P<name>.*)\.(?P<ext>[^.]*)$", path) | 123 result = re.search(r"^(?P<prefix>.*\/)?(?P<name>.*)\.(?P<ext>[^.]*)$", path) |
| 126 if not result or not result["name"] or not result["ext"]: | 124 if not result or not result["name"] or not result["ext"]: |
| 127 raise PathErr(path, "cannot recognize folder structure or extension in path") | 125 raise PathErr(path, "cannot recognize folder structure or extension in path") |
| 128 | 126 |
| 129 prefix = result["prefix"] if result["prefix"] else "" | 127 prefix = result["prefix"] if result["prefix"] else "" |
| 130 name, ext = result["name"], result["ext"] | 128 name, ext = result["name"], result["ext"] |
| 131 | 129 |
| 132 # Split path into parts | 130 # Check for double extensions (json.gz, xml.zip, etc.) |
| 133 parts = path.split(".") | 131 parts = path.split(".") |
| 134 if len(parts) >= 3: | 132 if len(parts) >= 3: |
| 135 penultimate = parts[-2] | 133 penultimate = parts[-2] |
| 136 last = parts[-1] | 134 last = parts[-1] |
| 137 if penultimate in {"json", "xml"}: | 135 double_ext = f"{penultimate}.{last}" |
| 136 | |
| 137 # Try the double extension first | |
| 138 try: | |
| 139 ext_format = FileFormat.fromExt(double_ext) | |
| 138 name = ".".join(parts[:-2]) | 140 name = ".".join(parts[:-2]) |
| 139 ext = f"{penultimate}.{last}" | 141 # Extract prefix if it exists |
| 140 | 142 if '/' in name: |
| 141 return cls(name, FileFormat.fromExt(ext), prefix=prefix) | 143 prefix = name[:name.rfind('/') + 1] |
| 144 name = name[name.rfind('/') + 1:] | |
| 145 return cls(name, ext_format, prefix=prefix) | |
| 146 except ValueErr: | |
| 147 # If double extension doesn't work, fall back to single extension | |
| 148 pass | |
| 149 | |
| 150 # Single extension fallback (original logic) | |
| 151 try: | |
| 152 ext_format = FileFormat.fromExt(ext) | |
| 153 return cls(name, ext_format, prefix=prefix) | |
| 154 except ValueErr: | |
| 155 raise PathErr(path, f"unsupported file extension: {ext}") | |
| 142 | 156 |
| 143 def show(self) -> str: | 157 def show(self) -> str: |
| 144 """ | 158 """ |
| 145 Shows the path as a string. | 159 Shows the path as a string. |
| 146 | |
| 147 Returns: | 160 Returns: |
| 148 str : the path shown as a string. | 161 str : the path shown as a string. |
| 149 """ | 162 """ |
| 150 return f"{self.prefix}{self.filePath}.{self.ext}" | 163 return f"{self.prefix}{self.filePath}.{self.ext}" |
| 151 | 164 |
| 152 def __str__(self) -> str: return self.show() | 165 def __str__(self) -> str: |
| 166 return self.show() | |
| 153 | 167 |
| 154 # ERRORS | 168 # ERRORS |
| 155 def terminate(msg :str) -> None: | 169 def terminate(msg :str) -> None: |
| 156 """ | 170 """ |
| 157 Terminate the execution of the script with an error message. | 171 Terminate the execution of the script with an error message. |
