Mercurial > repos > bimib > cobraxy
comparison COBRAxy/utils/general_utils.py @ 339:1fd4e4e93c85 draft
Uploaded
author | luca_milaz |
---|---|
date | Thu, 04 Sep 2025 12:40:01 +0000 |
parents | 111dbc8e0415 |
children | da4f5f4a9046 |
comparison
equal
deleted
inserted
replaced
338:111dbc8e0415 | 339:1fd4e4e93c85 |
---|---|
15 import zipfile | 15 import zipfile |
16 import gzip | 16 import gzip |
17 import bz2 | 17 import bz2 |
18 from io import StringIO | 18 from io import StringIO |
19 | 19 |
20 # FILES | 20 class ValueErr(Exception): |
21 def __init__(self, param_name, expected, actual): | |
22 super().__init__(f"Invalid value for {param_name}: expected {expected}, got {actual}") | |
23 | |
24 class PathErr(Exception): | |
25 def __init__(self, path, message): | |
26 super().__init__(f"Path error for '{path}': {message}") | |
27 | |
21 class FileFormat(Enum): | 28 class FileFormat(Enum): |
22 """ | 29 """ |
23 Encodes possible file extensions to conditionally save data in a different format. | 30 Encodes possible file extensions to conditionally save data in a different format. |
24 """ | 31 """ |
25 DAT = ("dat",) # this is how galaxy treats all your files! | 32 DAT = ("dat",) # this is how galaxy treats all your files! |
26 CSV = ("csv",) # this is how most editable input data is written | 33 CSV = ("csv",) # this is how most editable input data is written |
27 TSV = ("tsv",) # this is how most editable input data is ACTUALLY written | 34 TSV = ("tsv",) # this is how most editable input data is ACTUALLY written TODO:more support pls!! |
28 | |
29 SVG = ("svg",) # this is how most metabolic maps are written | 35 SVG = ("svg",) # this is how most metabolic maps are written |
30 PNG = ("png",) # this is a common output format for images (such as metabolic maps) | 36 PNG = ("png",) # this is a common output format for images (such as metabolic maps) |
31 PDF = ("pdf",) # this is also a common output format for images, as it's required in publications. | 37 PDF = ("pdf",) # this is also a common output format for images, as it's required in publications. |
32 | 38 |
33 XML = ("xml","xml.gz", "xml.zip", "xml.bz2") # SBML files are XML files, sometimes compressed | 39 # Updated to include compressed variants |
34 JSON = ("json","json.gz", "json.zip", "json.bz2") # COBRA models can be stored as JSON files, sometimes compressed | 40 XML = ("xml", "xml.gz", "xml.zip", "xml.bz2") # SBML files are XML files, sometimes compressed |
35 | 41 JSON = ("json", "json.gz", "json.zip", "json.bz2") # COBRA models can be stored as JSON files, sometimes compressed |
42 | |
36 TXT = ("txt",) # this is how most output data is written | 43 TXT = ("txt",) # this is how most output data is written |
37 | |
38 PICKLE = ("pickle", "pk", "p") # this is how all runtime data structures are saved | 44 PICKLE = ("pickle", "pk", "p") # this is how all runtime data structures are saved |
39 | 45 |
40 | 46 def __init__(self, *extensions): |
47 self.extensions = extensions | |
48 # Store original extension when set via fromExt | |
49 self._original_extension = None | |
50 | |
41 @classmethod | 51 @classmethod |
42 def fromExt(cls, ext :str) -> "FileFormat": | 52 def fromExt(cls, ext: str) -> "FileFormat": |
43 """ | 53 """ |
44 Converts a file extension string to a FileFormat instance. | 54 Converts a file extension string to a FileFormat instance. |
45 | |
46 Args: | 55 Args: |
47 ext : The file extension as a string. | 56 ext : The file extension as a string. |
48 | |
49 Returns: | 57 Returns: |
50 FileFormat: The FileFormat instance corresponding to the file extension. | 58 FileFormat: The FileFormat instance corresponding to the file extension. |
51 """ | 59 """ |
52 variantName = ext.upper() | 60 variantName = ext.upper() |
53 if variantName in FileFormat.__members__: | 61 if variantName in FileFormat.__members__: |
54 instance = FileFormat[variantName] | 62 instance = FileFormat[variantName] |
55 instance.original_extension = ext | 63 instance._original_extension = ext |
56 return instance | 64 return instance |
57 | 65 |
58 variantName = variantName.lower() | 66 variantName = ext.lower() |
59 for member in cls: | 67 for member in cls: |
60 if variantName in member.value: | 68 if variantName in member.value: |
61 member.original_extension = ext | 69 # Create a copy-like behavior by storing the original extension |
70 member._original_extension = ext | |
62 return member | 71 return member |
63 | 72 |
64 raise ValueErr("ext", "a valid FileFormat file extension", ext) | 73 raise ValueErr("ext", "a valid FileFormat file extension", ext) |
65 | 74 |
66 def __str__(self) -> str: | 75 def __str__(self) -> str: |
67 """ | 76 """ |
68 (Private) converts to str representation. Good practice for usage with argparse. | 77 (Private) converts to str representation. Good practice for usage with argparse. |
69 | |
70 Returns: | 78 Returns: |
71 str : the string representation of the file extension. | 79 str : the string representation of the file extension. |
72 """ | 80 """ |
73 | 81 # If we have an original extension stored (for compressed files), use it |
74 if(self.value[0] in ["json", "xml"]): #return the original string extension for compressed files | 82 if hasattr(self, '_original_extension') and self._original_extension: |
75 return self.original_extension | 83 return self._original_extension |
76 else: | 84 |
77 return self.value[-1] # for all other formats and pickle | 85 # TODO: fix, it's the dumb pickle thing keep this behaviour if we are not dealing with XML or JSON |
86 return self.value[-1] | |
78 | 87 |
79 class FilePath(): | 88 class FilePath(): |
80 """ | 89 """ |
81 Represents a file path. View this as an attempt to standardize file-related operations by expecting | 90 Represents a file path. View this as an attempt to standardize file-related operations by expecting |
82 values of this type in any process requesting a file path. | 91 values of this type in any process requesting a file path. |
83 """ | 92 """ |
84 def __init__(self, filePath :str, ext :FileFormat, *, prefix = "") -> None: | 93 def __init__(self, filePath: str, ext: FileFormat, *, prefix="") -> None: |
85 """ | 94 """ |
86 (Private) Initializes an instance of FilePath. | 95 (Private) Initializes an instance of FilePath. |
87 | |
88 Args: | 96 Args: |
89 path : the end of the path, containing the file name. | 97 path : the end of the path, containing the file name. |
90 ext : the file's extension. | 98 ext : the file's extension. |
91 prefix : anything before path, if the last '/' isn't there it's added by the code. | 99 prefix : anything before path, if the last '/' isn't there it's added by the code. |
92 | |
93 Returns: | 100 Returns: |
94 None : practically, a FilePath instance. | 101 None : practically, a FilePath instance. |
95 """ | 102 """ |
96 self.ext = ext | 103 self.ext = ext |
97 self.filePath = filePath | 104 self.filePath = filePath |
98 | 105 |
99 if prefix and prefix[-1] != '/': prefix += '/' | 106 if prefix and prefix[-1] != '/': |
107 prefix += '/' | |
100 self.prefix = prefix | 108 self.prefix = prefix |
101 | 109 |
102 @classmethod | 110 @classmethod |
103 def fromStrPath(cls, path :str) -> "FilePath": | 111 def fromStrPath(cls, path: str) -> "FilePath": |
104 """ | 112 """ |
105 Factory method to parse a string from which to obtain, if possible, a valid FilePath instance. | 113 Factory method to parse a string from which to obtain, if possible, a valid FilePath instance. |
106 It detects double extensions such as .json.gz and .xml.bz2, which are common in COBRA models. | 114 It detects double extensions such as .json.gz and .xml.bz2, which are common in COBRA models. |
107 These double extensions are not supported for other file types such as .csv. | 115 These double extensions are not supported for other file types such as .csv. |
108 | |
109 Args: | 116 Args: |
110 path : the string containing the path | 117 path : the string containing the path |
111 | |
112 Raises: | 118 Raises: |
113 PathErr : if the provided string doesn't represent a valid path. | 119 PathErr : if the provided string doesn't represent a valid path. |
114 | |
115 Returns: | 120 Returns: |
116 FilePath : the constructed instance. | 121 FilePath : the constructed instance. |
117 """ | 122 """ |
118 # This method is often used to construct FilePath instances from ARGS UI arguments. These arguments *should* | |
119 # always be correct paths and could be used as raw strings, however most if not all functions that work with | |
120 # file paths request the FilePath objects specifically, which is a very good thing in any case other than this. | |
121 # What ends up happening is we spend time parsing a string into a FilePath so that the function accepts it, only | |
122 # to call show() immediately to bring back the string and open the file it points to. | |
123 # TODO: this is an indication that the arguments SHOULD BE OF TYPE FilePath if they are filepaths, this ENSURES | |
124 # their correctness when modifying the UI and avoids the pointless back-and-forth. | |
125 result = re.search(r"^(?P<prefix>.*\/)?(?P<name>.*)\.(?P<ext>[^.]*)$", path) | 123 result = re.search(r"^(?P<prefix>.*\/)?(?P<name>.*)\.(?P<ext>[^.]*)$", path) |
126 if not result or not result["name"] or not result["ext"]: | 124 if not result or not result["name"] or not result["ext"]: |
127 raise PathErr(path, "cannot recognize folder structure or extension in path") | 125 raise PathErr(path, "cannot recognize folder structure or extension in path") |
128 | 126 |
129 prefix = result["prefix"] if result["prefix"] else "" | 127 prefix = result["prefix"] if result["prefix"] else "" |
130 name, ext = result["name"], result["ext"] | 128 name, ext = result["name"], result["ext"] |
131 | 129 |
132 # Split path into parts | 130 # Check for double extensions (json.gz, xml.zip, etc.) |
133 parts = path.split(".") | 131 parts = path.split(".") |
134 if len(parts) >= 3: | 132 if len(parts) >= 3: |
135 penultimate = parts[-2] | 133 penultimate = parts[-2] |
136 last = parts[-1] | 134 last = parts[-1] |
137 if penultimate in {"json", "xml"}: | 135 double_ext = f"{penultimate}.{last}" |
136 | |
137 # Try the double extension first | |
138 try: | |
139 ext_format = FileFormat.fromExt(double_ext) | |
138 name = ".".join(parts[:-2]) | 140 name = ".".join(parts[:-2]) |
139 ext = f"{penultimate}.{last}" | 141 # Extract prefix if it exists |
140 | 142 if '/' in name: |
141 return cls(name, FileFormat.fromExt(ext), prefix=prefix) | 143 prefix = name[:name.rfind('/') + 1] |
144 name = name[name.rfind('/') + 1:] | |
145 return cls(name, ext_format, prefix=prefix) | |
146 except ValueErr: | |
147 # If double extension doesn't work, fall back to single extension | |
148 pass | |
149 | |
150 # Single extension fallback (original logic) | |
151 try: | |
152 ext_format = FileFormat.fromExt(ext) | |
153 return cls(name, ext_format, prefix=prefix) | |
154 except ValueErr: | |
155 raise PathErr(path, f"unsupported file extension: {ext}") | |
142 | 156 |
143 def show(self) -> str: | 157 def show(self) -> str: |
144 """ | 158 """ |
145 Shows the path as a string. | 159 Shows the path as a string. |
146 | |
147 Returns: | 160 Returns: |
148 str : the path shown as a string. | 161 str : the path shown as a string. |
149 """ | 162 """ |
150 return f"{self.prefix}{self.filePath}.{self.ext}" | 163 return f"{self.prefix}{self.filePath}.{self.ext}" |
151 | 164 |
152 def __str__(self) -> str: return self.show() | 165 def __str__(self) -> str: |
166 return self.show() | |
153 | 167 |
154 # ERRORS | 168 # ERRORS |
155 def terminate(msg :str) -> None: | 169 def terminate(msg :str) -> None: |
156 """ | 170 """ |
157 Terminate the execution of the script with an error message. | 171 Terminate the execution of the script with an error message. |