Mercurial > repos > bgruening > enumerate_charges
annotate sdf_to_tab.py @ 5:67ee76f0e497 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:40:23 +0000 |
parents | bbbf5fb356dd |
children |
rev | line source |
---|---|
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
2 import argparse |
4
bbbf5fb356dd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
2
diff
changeset
|
3 |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
4 import pandas as pd |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
5 from rdkit import Chem |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
6 |
4
bbbf5fb356dd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
2
diff
changeset
|
7 |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
8 def sdf_to_tab(vars): |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
9 mols = Chem.SDMolSupplier(vars.inp, sanitize=False) |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
10 df = pd.DataFrame() # for output |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
11 |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
12 for n in range(len(mols)): |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
13 if mols[n]: |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
14 d = mols[n].GetPropsAsDict() |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
15 # filter dict for desired props |
5
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
16 if vars.props.strip() == "": # none specified, return all |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
17 d = { |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
18 prop: val |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
19 for (prop, val) in d.items() |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
20 if not any(x in str(val) for x in ["\n", "\t"]) |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
21 } # remove items containing newlines or tabs |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
22 else: |
5
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
23 d = { |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
24 prop: val |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
25 for (prop, val) in d.items() |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
26 if prop in vars.props.replace(" ", "").split(",") |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
27 } # remove items not requested via CLI |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
28 if vars.name: |
5
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
29 d["SDFMoleculeName"] = mols[n].GetProp("_Name") |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
30 if vars.smiles: |
5
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
31 d["SMILES"] = Chem.MolToSmiles(mols[n], isomericSmiles=False) |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
32 d["Index"] = int(n) |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
33 |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
34 df = df.append(d, ignore_index=True) |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
35 else: |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
36 print("Molecule could not be read - skipped.") |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
37 |
5
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
38 df = df.astype({"Index": int}).set_index("Index") |
2
2a868592ebcb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit a03b1b7b283901a1510562f1e6eba41f70afaac4"
bgruening
parents:
1
diff
changeset
|
39 sorted_cols = sorted(df.columns.values.tolist()) |
5
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
40 df.to_csv(vars.out, sep="\t", header=vars.header, columns=sorted_cols) |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
41 |
4
bbbf5fb356dd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
2
diff
changeset
|
42 |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
43 def main(): |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
44 parser = argparse.ArgumentParser(description="Convert SDF to tabular") |
5
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
45 parser.add_argument("--inp", "-i", help="The input file", required=True) |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
46 parser.add_argument("--out", "-o", help="The output file", required=True) |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
47 parser.add_argument( |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
48 "--props", |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
49 "-p", |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
50 help="Properties to filter (leave blank for all)", |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
51 required=True, |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
52 ) |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
53 parser.add_argument( |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
54 "--header", |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
55 "-t", |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
56 action="store_true", |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
57 help="Write property name as the first row.", |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
58 ) |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
59 parser.add_argument( |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
60 "--smiles", "-s", action="store_true", help="Include SMILES in output." |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
61 ) |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
62 parser.add_argument( |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
63 "--name", "-n", action="store_true", help="Include molecule name in output." |
67ee76f0e497
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
bgruening
parents:
4
diff
changeset
|
64 ) |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
65 sdf_to_tab(parser.parse_args()) |
4
bbbf5fb356dd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
2
diff
changeset
|
66 |
0
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
67 |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
68 if __name__ == "__main__": |
0f3e5c69251e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
69 main() |