Mercurial > repos > recetox > rem_complex
annotate rem_complex.py @ 4:60db8070a5c3 draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit d42dca433f84f2540d0901a8add71d01637f5179
author | recetox |
---|---|
date | Tue, 11 Jun 2024 09:10:34 +0000 |
parents | 567327a97ad2 |
children |
rev | line source |
---|---|
0
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
1 import argparse |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
2 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
3 import pandas as pd |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
4 from openbabel import openbabel, pybel |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
5 openbabel.obErrorLog.SetOutputLevel(1) # 0: suppress warnings; 1: warnings |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
6 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
7 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
8 def parse_arguments() -> argparse.Namespace: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
9 parser = argparse.ArgumentParser() |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
10 parser.add_argument('-iformat', '--input_format', help='Input file format') |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
11 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') |
3
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
12 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name') |
0
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
13 args = parser.parse_args() |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
14 return args |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
15 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
16 |
3
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
17 def filter_csv_tsv_molecules(file_name: str, output_file_name: str, sep: str) -> None: |
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
18 """Removes molecules with '.' in SMILES string from csv or tsv file. |
0
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
19 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
20 Args: |
3
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
21 file_name (str): Path to csv or tsv file that contains metadata. |
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
22 output_file_name (str): Path to destination file, tsv format. |
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
23 sep (str): Separator used in the file (',' for csv, '\t' for tsv). |
0
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
24 """ |
3
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
25 df = pd.read_csv(file_name, sep=sep) |
0
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
26 mask = df['smiles'].str.contains(".", na=False, regex=False) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
27 mask = mask.apply(lambda x: not x) |
3
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
28 df[mask].to_csv(output_file_name, index=False, sep='\t') |
0
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
29 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
30 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
31 def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
32 """Removes molecules with '.' in SMILES string from smi or inchi files. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
33 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
34 Args: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
35 file_name (str): Path to smi or inchi files. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
36 output_file_name (str): Path to destination files, in smi or inchi formats. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
37 input_format (str): Input file format. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
38 """ |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
39 molecules = list(pybel.readfile(input_format, file_name)) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
40 filtered_molecules = [mol for mol in molecules if "." not in mol.write('smi').strip()] |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
41 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
42 with open(output_file_name, 'w') as f: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
43 for mol in filtered_molecules: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
44 f.write(mol.write(input_format)) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
45 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
46 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
47 def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
48 """Removes molecular complexes depending on the input format. |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
49 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
50 Args: |
3
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
51 file_name (str): Path to csv, tsv, smi, or inchi files. |
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
52 output_file_name (str): Path to destination files, in corresponding formats. |
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
53 input_format (str): Input file format. |
0
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
54 """ |
3
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
55 if input_format in ['csv', 'tsv']: |
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
56 sep = ',' if input_format == 'csv' else '\t' |
567327a97ad2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 4b8a43b863ff8a0ff1d5a08e516068853adf358d
recetox
parents:
0
diff
changeset
|
57 filter_csv_tsv_molecules(file_name, output_file_name, sep) |
0
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
58 else: |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
59 filter_other_format_molecules(file_name, output_file_name, input_format) |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
60 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
61 |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
62 if __name__ == "__main__": |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
63 args = parse_arguments() |
a0e07a0bc047
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
recetox
parents:
diff
changeset
|
64 filter_complex_molecules(args.input_filename, args.output_filename, args.input_format) |