Mercurial > repos > bgruening > sdf_to_tab
annotate sdf_to_tab.py @ 6:4beb3e026bbb draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:39:05 +0000 |
parents | 351fbd750a6d |
children |
rev | line source |
---|---|
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
2 import argparse |
5
351fbd750a6d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
3
diff
changeset
|
3 |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
4 import pandas as pd |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
5 from rdkit import Chem |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
6 |
5
351fbd750a6d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
3
diff
changeset
|
7 |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
8 def sdf_to_tab(vars): |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
9 mols = Chem.SDMolSupplier(vars.inp, sanitize=False) |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
10 df = pd.DataFrame() # for output |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
11 |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
12 for n in range(len(mols)): |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
13 if mols[n]: |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
14 d = mols[n].GetPropsAsDict() |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
15 # filter dict for desired props |
6
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
16 if vars.props.strip() == "": # none specified, return all |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
17 d = { |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
18 prop: val |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
19 for (prop, val) in d.items() |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
20 if not any(x in str(val) for x in ["\n", "\t"]) |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
21 } # remove items containing newlines or tabs |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
22 else: |
6
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
23 d = { |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
24 prop: val |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
25 for (prop, val) in d.items() |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
26 if prop in vars.props.replace(" ", "").split(",") |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
27 } # remove items not requested via CLI |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
28 if vars.name: |
6
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
29 d["SDFMoleculeName"] = mols[n].GetProp("_Name") |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
30 if vars.smiles: |
6
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
31 d["SMILES"] = Chem.MolToSmiles(mols[n], isomericSmiles=False) |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
32 d["Index"] = int(n) |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
33 |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
34 df = df.append(d, ignore_index=True) |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
35 else: |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
36 print("Molecule could not be read - skipped.") |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
37 |
6
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
38 df = df.astype({"Index": int}).set_index("Index") |
3
71bc02c59d3a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit a03b1b7b283901a1510562f1e6eba41f70afaac4"
bgruening
parents:
2
diff
changeset
|
39 sorted_cols = sorted(df.columns.values.tolist()) |
6
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
40 df.to_csv(vars.out, sep="\t", header=vars.header, columns=sorted_cols) |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
41 |
5
351fbd750a6d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
3
diff
changeset
|
42 |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
43 def main(): |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
44 parser = argparse.ArgumentParser(description="Convert SDF to tabular") |
6
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
45 parser.add_argument("--inp", "-i", help="The input file", required=True) |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
46 parser.add_argument("--out", "-o", help="The output file", required=True) |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
47 parser.add_argument( |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
48 "--props", |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
49 "-p", |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
50 help="Properties to filter (leave blank for all)", |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
51 required=True, |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
52 ) |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
53 parser.add_argument( |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
54 "--header", |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
55 "-t", |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
56 action="store_true", |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
57 help="Write property name as the first row.", |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
58 ) |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
59 parser.add_argument( |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
60 "--smiles", "-s", action="store_true", help="Include SMILES in output." |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
61 ) |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
62 parser.add_argument( |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
63 "--name", "-n", action="store_true", help="Include molecule name in output." |
4beb3e026bbb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
5
diff
changeset
|
64 ) |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
65 sdf_to_tab(parser.parse_args()) |
5
351fbd750a6d
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
3
diff
changeset
|
66 |
0
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
67 |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
68 if __name__ == "__main__": |
06828e0cc8a7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff
changeset
|
69 main() |