annotate sdf_to_tab.py @ 6:4beb3e026bbb draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author bgruening
date Sat, 04 Dec 2021 16:39:05 +0000
parents 351fbd750a6d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
1 #!/usr/bin/env python3
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
2 import argparse
5
351fbd750a6d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents: 3
diff changeset
3
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
4 import pandas as pd
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
5 from rdkit import Chem
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
6
5
351fbd750a6d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents: 3
diff changeset
7
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
8 def sdf_to_tab(vars):
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
9 mols = Chem.SDMolSupplier(vars.inp, sanitize=False)
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
10 df = pd.DataFrame() # for output
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
11
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
12 for n in range(len(mols)):
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
13 if mols[n]:
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
14 d = mols[n].GetPropsAsDict()
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
15 # filter dict for desired props
6
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
16 if vars.props.strip() == "": # none specified, return all
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
17 d = {
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
18 prop: val
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
19 for (prop, val) in d.items()
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
20 if not any(x in str(val) for x in ["\n", "\t"])
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
21 } # remove items containing newlines or tabs
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
22 else:
6
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
23 d = {
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
24 prop: val
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
25 for (prop, val) in d.items()
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
26 if prop in vars.props.replace(" ", "").split(",")
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
27 } # remove items not requested via CLI
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
28 if vars.name:
6
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
29 d["SDFMoleculeName"] = mols[n].GetProp("_Name")
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
30 if vars.smiles:
6
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
31 d["SMILES"] = Chem.MolToSmiles(mols[n], isomericSmiles=False)
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
32 d["Index"] = int(n)
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
33
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
34 df = df.append(d, ignore_index=True)
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
35 else:
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
36 print("Molecule could not be read - skipped.")
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
37
6
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
38 df = df.astype({"Index": int}).set_index("Index")
3
71bc02c59d3a "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit a03b1b7b283901a1510562f1e6eba41f70afaac4"
bgruening
parents: 2
diff changeset
39 sorted_cols = sorted(df.columns.values.tolist())
6
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
40 df.to_csv(vars.out, sep="\t", header=vars.header, columns=sorted_cols)
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
41
5
351fbd750a6d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents: 3
diff changeset
42
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
43 def main():
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
44 parser = argparse.ArgumentParser(description="Convert SDF to tabular")
6
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
45 parser.add_argument("--inp", "-i", help="The input file", required=True)
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
46 parser.add_argument("--out", "-o", help="The output file", required=True)
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
47 parser.add_argument(
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
48 "--props",
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
49 "-p",
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
50 help="Properties to filter (leave blank for all)",
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
51 required=True,
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
52 )
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
53 parser.add_argument(
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
54 "--header",
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
55 "-t",
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
56 action="store_true",
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
57 help="Write property name as the first row.",
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
58 )
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
59 parser.add_argument(
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
60 "--smiles", "-s", action="store_true", help="Include SMILES in output."
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
61 )
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
62 parser.add_argument(
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
63 "--name", "-n", action="store_true", help="Include molecule name in output."
4beb3e026bbb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents: 5
diff changeset
64 )
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
65 sdf_to_tab(parser.parse_args())
5
351fbd750a6d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents: 3
diff changeset
66
0
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
67
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
68 if __name__ == "__main__":
06828e0cc8a7 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 714e984db6ba1198cacf4dcf325320a5889fa02c"
bgruening
parents:
diff changeset
69 main()