Mercurial > repos > bgruening > rdconf
annotate sdf_to_tab.py @ 0:5c501eb8d56c draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:39:31 +0000 |
parents | |
children |
rev | line source |
---|---|
0
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
2 import argparse |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
3 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
4 import pandas as pd |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
5 from rdkit import Chem |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
6 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
7 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
8 def sdf_to_tab(vars): |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
9 mols = Chem.SDMolSupplier(vars.inp, sanitize=False) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
10 df = pd.DataFrame() # for output |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
11 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
12 for n in range(len(mols)): |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
13 if mols[n]: |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
14 d = mols[n].GetPropsAsDict() |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
15 # filter dict for desired props |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
16 if vars.props.strip() == "": # none specified, return all |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
17 d = { |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
18 prop: val |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
19 for (prop, val) in d.items() |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
20 if not any(x in str(val) for x in ["\n", "\t"]) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
21 } # remove items containing newlines or tabs |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
22 else: |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
23 d = { |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
24 prop: val |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
25 for (prop, val) in d.items() |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
26 if prop in vars.props.replace(" ", "").split(",") |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
27 } # remove items not requested via CLI |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
28 if vars.name: |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
29 d["SDFMoleculeName"] = mols[n].GetProp("_Name") |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
30 if vars.smiles: |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
31 d["SMILES"] = Chem.MolToSmiles(mols[n], isomericSmiles=False) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
32 d["Index"] = int(n) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
33 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
34 df = df.append(d, ignore_index=True) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
35 else: |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
36 print("Molecule could not be read - skipped.") |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
37 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
38 df = df.astype({"Index": int}).set_index("Index") |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
39 sorted_cols = sorted(df.columns.values.tolist()) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
40 df.to_csv(vars.out, sep="\t", header=vars.header, columns=sorted_cols) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
41 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
42 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
43 def main(): |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
44 parser = argparse.ArgumentParser(description="Convert SDF to tabular") |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
45 parser.add_argument("--inp", "-i", help="The input file", required=True) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
46 parser.add_argument("--out", "-o", help="The output file", required=True) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
47 parser.add_argument( |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
48 "--props", |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
49 "-p", |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
50 help="Properties to filter (leave blank for all)", |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
51 required=True, |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
52 ) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
53 parser.add_argument( |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
54 "--header", |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
55 "-t", |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
56 action="store_true", |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
57 help="Write property name as the first row.", |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
58 ) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
59 parser.add_argument( |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
60 "--smiles", "-s", action="store_true", help="Include SMILES in output." |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
61 ) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
62 parser.add_argument( |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
63 "--name", "-n", action="store_true", help="Include molecule name in output." |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
64 ) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
65 sdf_to_tab(parser.parse_args()) |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
66 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
67 |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
68 if __name__ == "__main__": |
5c501eb8d56c
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
diff
changeset
|
69 main() |