comparison COBRAxy/utils/general_utils.py @ 339:1fd4e4e93c85 draft

Uploaded
author luca_milaz
date Thu, 04 Sep 2025 12:40:01 +0000
parents 111dbc8e0415
children da4f5f4a9046
comparison
equal deleted inserted replaced
338:111dbc8e0415 339:1fd4e4e93c85
15 import zipfile 15 import zipfile
16 import gzip 16 import gzip
17 import bz2 17 import bz2
18 from io import StringIO 18 from io import StringIO
19 19
20 # FILES 20 class ValueErr(Exception):
21 def __init__(self, param_name, expected, actual):
22 super().__init__(f"Invalid value for {param_name}: expected {expected}, got {actual}")
23
24 class PathErr(Exception):
25 def __init__(self, path, message):
26 super().__init__(f"Path error for '{path}': {message}")
27
21 class FileFormat(Enum): 28 class FileFormat(Enum):
22 """ 29 """
23 Encodes possible file extensions to conditionally save data in a different format. 30 Encodes possible file extensions to conditionally save data in a different format.
24 """ 31 """
25 DAT = ("dat",) # this is how galaxy treats all your files! 32 DAT = ("dat",) # this is how galaxy treats all your files!
26 CSV = ("csv",) # this is how most editable input data is written 33 CSV = ("csv",) # this is how most editable input data is written
27 TSV = ("tsv",) # this is how most editable input data is ACTUALLY written 34 TSV = ("tsv",) # this is how most editable input data is ACTUALLY written TODO:more support pls!!
28
29 SVG = ("svg",) # this is how most metabolic maps are written 35 SVG = ("svg",) # this is how most metabolic maps are written
30 PNG = ("png",) # this is a common output format for images (such as metabolic maps) 36 PNG = ("png",) # this is a common output format for images (such as metabolic maps)
31 PDF = ("pdf",) # this is also a common output format for images, as it's required in publications. 37 PDF = ("pdf",) # this is also a common output format for images, as it's required in publications.
32 38
33 XML = ("xml","xml.gz", "xml.zip", "xml.bz2") # SBML files are XML files, sometimes compressed 39 # Updated to include compressed variants
34 JSON = ("json","json.gz", "json.zip", "json.bz2") # COBRA models can be stored as JSON files, sometimes compressed 40 XML = ("xml", "xml.gz", "xml.zip", "xml.bz2") # SBML files are XML files, sometimes compressed
35 41 JSON = ("json", "json.gz", "json.zip", "json.bz2") # COBRA models can be stored as JSON files, sometimes compressed
42
36 TXT = ("txt",) # this is how most output data is written 43 TXT = ("txt",) # this is how most output data is written
37
38 PICKLE = ("pickle", "pk", "p") # this is how all runtime data structures are saved 44 PICKLE = ("pickle", "pk", "p") # this is how all runtime data structures are saved
39 45
40 46 def __init__(self, *extensions):
47 self.extensions = extensions
48 # Store original extension when set via fromExt
49 self._original_extension = None
50
41 @classmethod 51 @classmethod
42 def fromExt(cls, ext :str) -> "FileFormat": 52 def fromExt(cls, ext: str) -> "FileFormat":
43 """ 53 """
44 Converts a file extension string to a FileFormat instance. 54 Converts a file extension string to a FileFormat instance.
45
46 Args: 55 Args:
47 ext : The file extension as a string. 56 ext : The file extension as a string.
48
49 Returns: 57 Returns:
50 FileFormat: The FileFormat instance corresponding to the file extension. 58 FileFormat: The FileFormat instance corresponding to the file extension.
51 """ 59 """
52 variantName = ext.upper() 60 variantName = ext.upper()
53 if variantName in FileFormat.__members__: 61 if variantName in FileFormat.__members__:
54 instance = FileFormat[variantName] 62 instance = FileFormat[variantName]
55 instance.original_extension = ext 63 instance._original_extension = ext
56 return instance 64 return instance
57 65
58 variantName = variantName.lower() 66 variantName = ext.lower()
59 for member in cls: 67 for member in cls:
60 if variantName in member.value: 68 if variantName in member.value:
61 member.original_extension = ext 69 # Create a copy-like behavior by storing the original extension
70 member._original_extension = ext
62 return member 71 return member
63 72
64 raise ValueErr("ext", "a valid FileFormat file extension", ext) 73 raise ValueErr("ext", "a valid FileFormat file extension", ext)
65 74
66 def __str__(self) -> str: 75 def __str__(self) -> str:
67 """ 76 """
68 (Private) converts to str representation. Good practice for usage with argparse. 77 (Private) converts to str representation. Good practice for usage with argparse.
69
70 Returns: 78 Returns:
71 str : the string representation of the file extension. 79 str : the string representation of the file extension.
72 """ 80 """
73 81 # If we have an original extension stored (for compressed files), use it
74 if(self.value[0] in ["json", "xml"]): #return the original string extension for compressed files 82 if hasattr(self, '_original_extension') and self._original_extension:
75 return self.original_extension 83 return self._original_extension
76 else: 84
77 return self.value[-1] # for all other formats and pickle 85 # TODO: fix, it's the dumb pickle thing keep this behaviour if we are not dealing with XML or JSON
86 return self.value[-1]
78 87
79 class FilePath(): 88 class FilePath():
80 """ 89 """
81 Represents a file path. View this as an attempt to standardize file-related operations by expecting 90 Represents a file path. View this as an attempt to standardize file-related operations by expecting
82 values of this type in any process requesting a file path. 91 values of this type in any process requesting a file path.
83 """ 92 """
84 def __init__(self, filePath :str, ext :FileFormat, *, prefix = "") -> None: 93 def __init__(self, filePath: str, ext: FileFormat, *, prefix="") -> None:
85 """ 94 """
86 (Private) Initializes an instance of FilePath. 95 (Private) Initializes an instance of FilePath.
87
88 Args: 96 Args:
89 path : the end of the path, containing the file name. 97 path : the end of the path, containing the file name.
90 ext : the file's extension. 98 ext : the file's extension.
91 prefix : anything before path, if the last '/' isn't there it's added by the code. 99 prefix : anything before path, if the last '/' isn't there it's added by the code.
92
93 Returns: 100 Returns:
94 None : practically, a FilePath instance. 101 None : practically, a FilePath instance.
95 """ 102 """
96 self.ext = ext 103 self.ext = ext
97 self.filePath = filePath 104 self.filePath = filePath
98 105
99 if prefix and prefix[-1] != '/': prefix += '/' 106 if prefix and prefix[-1] != '/':
107 prefix += '/'
100 self.prefix = prefix 108 self.prefix = prefix
101 109
102 @classmethod 110 @classmethod
103 def fromStrPath(cls, path :str) -> "FilePath": 111 def fromStrPath(cls, path: str) -> "FilePath":
104 """ 112 """
105 Factory method to parse a string from which to obtain, if possible, a valid FilePath instance. 113 Factory method to parse a string from which to obtain, if possible, a valid FilePath instance.
106 It detects double extensions such as .json.gz and .xml.bz2, which are common in COBRA models. 114 It detects double extensions such as .json.gz and .xml.bz2, which are common in COBRA models.
107 These double extensions are not supported for other file types such as .csv. 115 These double extensions are not supported for other file types such as .csv.
108
109 Args: 116 Args:
110 path : the string containing the path 117 path : the string containing the path
111
112 Raises: 118 Raises:
113 PathErr : if the provided string doesn't represent a valid path. 119 PathErr : if the provided string doesn't represent a valid path.
114
115 Returns: 120 Returns:
116 FilePath : the constructed instance. 121 FilePath : the constructed instance.
117 """ 122 """
118 # This method is often used to construct FilePath instances from ARGS UI arguments. These arguments *should*
119 # always be correct paths and could be used as raw strings, however most if not all functions that work with
120 # file paths request the FilePath objects specifically, which is a very good thing in any case other than this.
121 # What ends up happening is we spend time parsing a string into a FilePath so that the function accepts it, only
122 # to call show() immediately to bring back the string and open the file it points to.
123 # TODO: this is an indication that the arguments SHOULD BE OF TYPE FilePath if they are filepaths, this ENSURES
124 # their correctness when modifying the UI and avoids the pointless back-and-forth.
125 result = re.search(r"^(?P<prefix>.*\/)?(?P<name>.*)\.(?P<ext>[^.]*)$", path) 123 result = re.search(r"^(?P<prefix>.*\/)?(?P<name>.*)\.(?P<ext>[^.]*)$", path)
126 if not result or not result["name"] or not result["ext"]: 124 if not result or not result["name"] or not result["ext"]:
127 raise PathErr(path, "cannot recognize folder structure or extension in path") 125 raise PathErr(path, "cannot recognize folder structure or extension in path")
128 126
129 prefix = result["prefix"] if result["prefix"] else "" 127 prefix = result["prefix"] if result["prefix"] else ""
130 name, ext = result["name"], result["ext"] 128 name, ext = result["name"], result["ext"]
131 129
132 # Split path into parts 130 # Check for double extensions (json.gz, xml.zip, etc.)
133 parts = path.split(".") 131 parts = path.split(".")
134 if len(parts) >= 3: 132 if len(parts) >= 3:
135 penultimate = parts[-2] 133 penultimate = parts[-2]
136 last = parts[-1] 134 last = parts[-1]
137 if penultimate in {"json", "xml"}: 135 double_ext = f"{penultimate}.{last}"
136
137 # Try the double extension first
138 try:
139 ext_format = FileFormat.fromExt(double_ext)
138 name = ".".join(parts[:-2]) 140 name = ".".join(parts[:-2])
139 ext = f"{penultimate}.{last}" 141 # Extract prefix if it exists
140 142 if '/' in name:
141 return cls(name, FileFormat.fromExt(ext), prefix=prefix) 143 prefix = name[:name.rfind('/') + 1]
144 name = name[name.rfind('/') + 1:]
145 return cls(name, ext_format, prefix=prefix)
146 except ValueErr:
147 # If double extension doesn't work, fall back to single extension
148 pass
149
150 # Single extension fallback (original logic)
151 try:
152 ext_format = FileFormat.fromExt(ext)
153 return cls(name, ext_format, prefix=prefix)
154 except ValueErr:
155 raise PathErr(path, f"unsupported file extension: {ext}")
142 156
143 def show(self) -> str: 157 def show(self) -> str:
144 """ 158 """
145 Shows the path as a string. 159 Shows the path as a string.
146
147 Returns: 160 Returns:
148 str : the path shown as a string. 161 str : the path shown as a string.
149 """ 162 """
150 return f"{self.prefix}{self.filePath}.{self.ext}" 163 return f"{self.prefix}{self.filePath}.{self.ext}"
151 164
152 def __str__(self) -> str: return self.show() 165 def __str__(self) -> str:
166 return self.show()
153 167
154 # ERRORS 168 # ERRORS
155 def terminate(msg :str) -> None: 169 def terminate(msg :str) -> None:
156 """ 170 """
157 Terminate the execution of the script with an error message. 171 Terminate the execution of the script with an error message.