annotate sdf_to_tab.py @ 4:bbbf5fb356dd draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
author bgruening
date Wed, 17 Feb 2021 12:59:24 +0000
parents 2a868592ebcb
children 67ee76f0e497
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
1 #!/usr/bin/env python3
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
2 import argparse
4
bbbf5fb356dd "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents: 2
diff changeset
3
0
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
4 import pandas as pd
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
5 from rdkit import Chem
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
6
4
bbbf5fb356dd "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents: 2
diff changeset
7
0
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
8 def sdf_to_tab(vars):
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
9 mols = Chem.SDMolSupplier(vars.inp, sanitize=False)
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
10 df = pd.DataFrame() # for output
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
11
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
12 for n in range(len(mols)):
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
13 if mols[n]:
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
14 d = mols[n].GetPropsAsDict()
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
15 # filter dict for desired props
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
16 if vars.props.strip() == '': # none specified, return all
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
17 d = {prop: val for (prop, val) in d.items() if not any(x in str(val) for x in ['\n', '\t'])} # remove items containing newlines or tabs
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
18 else:
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
19 d = {prop: val for (prop, val) in d.items() if prop in vars.props.replace(' ', '').split(',')} # remove items not requested via CLI
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
20 if vars.name:
1
3d96dc99698f "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 09b22cceacb34dd4c6c1b42890f93232df128208"
bgruening
parents: 0
diff changeset
21 d['SDFMoleculeName'] = mols[n].GetProp('_Name')
0
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
22 if vars.smiles:
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
23 d['SMILES'] = Chem.MolToSmiles(mols[n], isomericSmiles=False)
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
24 d['Index'] = int(n)
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
25
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
26 df = df.append(d, ignore_index=True)
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
27 else:
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
28 print("Molecule could not be read - skipped.")
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
29
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
30 df = df.astype({'Index': int}).set_index('Index')
2
2a868592ebcb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit a03b1b7b283901a1510562f1e6eba41f70afaac4"
bgruening
parents: 1
diff changeset
31 sorted_cols = sorted(df.columns.values.tolist())
2a868592ebcb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit a03b1b7b283901a1510562f1e6eba41f70afaac4"
bgruening
parents: 1
diff changeset
32 df.to_csv(vars.out, sep='\t', header=vars.header, columns=sorted_cols)
0
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
33
4
bbbf5fb356dd "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents: 2
diff changeset
34
0
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
35 def main():
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
36 parser = argparse.ArgumentParser(description="Convert SDF to tabular")
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
37 parser.add_argument('--inp', '-i', help="The input file", required=True)
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
38 parser.add_argument('--out', '-o', help="The output file", required=True)
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
39 parser.add_argument('--props', '-p', help="Properties to filter (leave blank for all)", required=True)
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
40 parser.add_argument('--header', '-t', action='store_true',
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
41 help="Write property name as the first row.")
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
42 parser.add_argument('--smiles', '-s', action='store_true',
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
43 help="Include SMILES in output.")
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
44 parser.add_argument('--name', '-n', action='store_true',
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
45 help="Include molecule name in output.")
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
46 sdf_to_tab(parser.parse_args())
4
bbbf5fb356dd "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents: 2
diff changeset
47
0
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
48
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
49 if __name__ == "__main__":
0f3e5c69251e "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff changeset
50 main()