comparison sdf_to_tab.py @ 0:5c501eb8d56c draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author bgruening
date Sat, 04 Dec 2021 16:39:31 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5c501eb8d56c
1 #!/usr/bin/env python3
2 import argparse
3
4 import pandas as pd
5 from rdkit import Chem
6
7
8 def sdf_to_tab(vars):
9 mols = Chem.SDMolSupplier(vars.inp, sanitize=False)
10 df = pd.DataFrame() # for output
11
12 for n in range(len(mols)):
13 if mols[n]:
14 d = mols[n].GetPropsAsDict()
15 # filter dict for desired props
16 if vars.props.strip() == "": # none specified, return all
17 d = {
18 prop: val
19 for (prop, val) in d.items()
20 if not any(x in str(val) for x in ["\n", "\t"])
21 } # remove items containing newlines or tabs
22 else:
23 d = {
24 prop: val
25 for (prop, val) in d.items()
26 if prop in vars.props.replace(" ", "").split(",")
27 } # remove items not requested via CLI
28 if vars.name:
29 d["SDFMoleculeName"] = mols[n].GetProp("_Name")
30 if vars.smiles:
31 d["SMILES"] = Chem.MolToSmiles(mols[n], isomericSmiles=False)
32 d["Index"] = int(n)
33
34 df = df.append(d, ignore_index=True)
35 else:
36 print("Molecule could not be read - skipped.")
37
38 df = df.astype({"Index": int}).set_index("Index")
39 sorted_cols = sorted(df.columns.values.tolist())
40 df.to_csv(vars.out, sep="\t", header=vars.header, columns=sorted_cols)
41
42
43 def main():
44 parser = argparse.ArgumentParser(description="Convert SDF to tabular")
45 parser.add_argument("--inp", "-i", help="The input file", required=True)
46 parser.add_argument("--out", "-o", help="The output file", required=True)
47 parser.add_argument(
48 "--props",
49 "-p",
50 help="Properties to filter (leave blank for all)",
51 required=True,
52 )
53 parser.add_argument(
54 "--header",
55 "-t",
56 action="store_true",
57 help="Write property name as the first row.",
58 )
59 parser.add_argument(
60 "--smiles", "-s", action="store_true", help="Include SMILES in output."
61 )
62 parser.add_argument(
63 "--name", "-n", action="store_true", help="Include molecule name in output."
64 )
65 sdf_to_tab(parser.parse_args())
66
67
68 if __name__ == "__main__":
69 main()