# HG changeset patch # User bgruening # Date 1567770615 14400 # Node ID f56492688f43bb1a6849ad30e0b2ac82bb5d6611 # Parent 6f8458d1cf461bce2dcc8232a1454538001a2fd1 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit d7fca21744d3c87cf173c1539c069a5c720084f3" diff -r 6f8458d1cf46 -r f56492688f43 chembl.py --- a/chembl.py Mon Aug 12 12:57:32 2019 -0400 +++ b/chembl.py Fri Sep 06 07:50:15 2019 -0400 @@ -13,7 +13,7 @@ """ smiles = set() for smi in res: - smiles.add(smi['molecule_structures']['canonical_smiles']) + smiles.add('{}\t{}'.format(smi['molecule_structures']['canonical_smiles'], smi['molecule_chembl_id'])) return smiles def sim_search(smiles, tanimoto): @@ -21,14 +21,14 @@ Return compounds which are within a Tanimoto range of the SMILES input """ similarity = new_client.similarity - return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures']) + return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures', 'molecule_chembl_id']) def substr_search(smiles): """ Return compounds which contain the SMILES substructure input """ substructure = new_client.substructure - return substructure.filter(smiles=smiles).only(['molecule_structures']) + return substructure.filter(smiles=smiles).only(['molecule_structures', 'molecule_chembl_id']) def filter_drugs(mols): """ diff -r 6f8458d1cf46 -r f56492688f43 test-data/out1.smi --- a/test-data/out1.smi Mon Aug 12 12:57:32 2019 -0400 +++ b/test-data/out1.smi Fri Sep 06 07:50:15 2019 -0400 @@ -1,47 +1,47 @@ -CN1CCCC1c2cccnc2 -CN1CCC[C@@H]1c2cccnc2 -CN1CCC[C@H]1c2cccnc2 -CCN1CCCC1c2cccnc2 -CN1CCCC1c2ccc(C)nc2 -CCc1ccc(cn1)C2CCCN2C -CN1CCCC1c2cncc(C)c2 -CCCc1ccc(cn1)C2CCCN2C -CCc1cncc(c1)C2CCCN2C -CN1CCCC[C@H]1c2cccnc2 -CN1CCCCC1c2cccnc2 -CCCc1cncc(c1)C2CCCN2C -CN1CCCC1c2cccnc2C -CCCCc1ccc(cn1)C2CCCN2C -CCCCCc1ccc(cn1)C2CCCN2C -CC1CCN(C)[C@@H]1c2cccnc2 -CN1CCCC1c2ccc(CCCc3ccccc3)nc2 -CN1CCCC1c2cncc(Cl)c2 -CN1CCCC1c2ccc(CCc3ccccc3)nc2 -CN1CCC[C@H]1c2ccccc2 -CN1CCCC1c2ccccc2 -CN1CCC[C@H]1c2ccccn2 -CN1CCCC1c2cncc(F)c2 -COc1cncc(c1)C2CCCN2C -CN1CCCC1c2cncc(Br)c2 -CN1CCCC1c2ccc(nc2)c3ccccc3 -CN1CCCC1c2ccc(\C=C\c3ccccc3)nc2 -COc1ccncc1C2CCCN2C -CCCC[C@H]1CC[C@H](N1C)c2cccnc2 -CCCC[C@@H]1CC[C@H](N1C)c2cccnc2 -CN1CCCC1c2cnccc2N -C[C@H]1C[C@H](N(C)C1)c2cccnc2 -CC[C@H]1C[C@H](N(C)C1)c2cccnc2 -CN1CCCC1c2ccc(CCc3ccc(Cl)cc3)nc2 -CN1CCCC1c2ccc(Cl)nc2 -CN1CCCC1c2ccc(C)cc2 -COCC1CCN(C)[C@@H]1c2cccnc2 -C(N1CCCC1c2cccnc2)c3ccccc3 -C[C@H]1CC[C@H](N1C)c2cccnc2 -C[C@@H]1CC[C@H](N1C)c2cccnc2 -CN1CCCC1c2ccc(Cl)cc2 -CN1CCCC1c2ccc(F)nc2 -CN1CCC(CF)[C@H]1c2cccnc2 -CN1CCCC1c2ccc(Br)nc2 -COc1ccc(CCc2ccc(cn2)C3CCCN3C)cc1 -CN1CCC(CO)[C@H]1c2cccnc2 -CN1CCCC1c2cnc3ccccc3c2 \ No newline at end of file +CN1CCCC1c2ccc(Cl)cc2 CHEMBL279352 +CCCC[C@H]1CC[C@H](N1C)c2cccnc2 CHEMBL119581 +CN1CCCC1c2ccc(F)nc2 CHEMBL72470 +C[C@H]1CC[C@H](N1C)c2cccnc2 CHEMBL116645 +CCCC[C@@H]1CC[C@H](N1C)c2cccnc2 CHEMBL119574 +CN1CCCC1c2ccc(CCc3ccc(Cl)cc3)nc2 CHEMBL189470 +CN1CCC[C@H]1c2cccnc2 CHEMBL3 +CN1CCCC1c2cncc(Cl)c2 CHEMBL61013 +CCCCCc1ccc(cn1)C2CCCN2C CHEMBL103566 +CN1CCCC1c2cccnc2 CHEMBL440464 +CN1CCCC1c2ccc(nc2)c3ccccc3 CHEMBL350748 +CN1CCCC1c2cnccc2N CHEMBL193763 +C(N1CCCC1c2cccnc2)c3ccccc3 CHEMBL163552 +COCC1CCN(C)[C@@H]1c2cccnc2 CHEMBL119020 +CN1CCCC1c2ccc(C)nc2 CHEMBL294757 +CN1CCCC1c2ccc(C)cc2 CHEMBL180376 +CN1CCCC[C@H]1c2cccnc2 CHEMBL3640783 +C[C@@H]1CC[C@H](N1C)c2cccnc2 CHEMBL118038 +CN1CCC[C@H]1c2ccccn2 CHEMBL2068760 +CN1CCCC1c2cccnc2C CHEMBL160034 +CCc1ccc(cn1)C2CCCN2C CHEMBL103225 +C[C@H]1C[C@H](N(C)C1)c2cccnc2 CHEMBL432837 +CC[C@H]1C[C@H](N(C)C1)c2cccnc2 CHEMBL115793 +CN1CCCC1c2ccccc2 CHEMBL367126 +CN1CCCC1c2ccc(CCCc3ccccc3)nc2 CHEMBL361703 +CC1CCN(C)[C@@H]1c2cccnc2 CHEMBL119019 +CN1CCCC1c2ccc(Br)nc2 CHEMBL58231 +CN1CCCCC1c2cccnc2 CHEMBL102246 +CN1CCCC1c2ccc(CCc3ccccc3)nc2 CHEMBL190295 +CN1CCC[C@H]1c2ccccc2 CHEMBL1199383 +COc1cncc(c1)C2CCCN2C CHEMBL303530 +COc1ccc(CCc2ccc(cn2)C3CCCN3C)cc1 CHEMBL190652 +CN1CCCC1c2cncc(F)c2 CHEMBL62128 +CN1CCCC1c2cncc(C)c2 CHEMBL364003 +CN1CCC(CO)[C@H]1c2cccnc2 CHEMBL325426 +CCN1CCCC1c2cccnc2 CHEMBL163042 +COc1ccncc1C2CCCN2C CHEMBL370168 +CCc1cncc(c1)C2CCCN2C CHEMBL111526 +CN1CCC[C@@H]1c2cccnc2 CHEMBL9732 +CN1CCC(CF)[C@H]1c2cccnc2 CHEMBL119016 +CN1CCCC1c2ccc(Cl)nc2 CHEMBL70050 +CN1CCCC1c2cncc(Br)c2 CHEMBL61772 +CN1CCCC1c2ccc(\C=C\c3ccccc3)nc2 CHEMBL190678 +CCCc1cncc(c1)C2CCCN2C CHEMBL382287 +CN1CCCC1c2cnc3ccccc3c2 CHEMBL161944 +CCCc1ccc(cn1)C2CCCN2C CHEMBL103537 +CCCCc1ccc(cn1)C2CCCN2C CHEMBL320023 \ No newline at end of file diff -r 6f8458d1cf46 -r f56492688f43 test-data/out2.smi --- a/test-data/out2.smi Mon Aug 12 12:57:32 2019 -0400 +++ b/test-data/out2.smi Fri Sep 06 07:50:15 2019 -0400 @@ -1,1 +1,1 @@ -CN1CCC[C@H]1c2cccnc2 \ No newline at end of file +CN1CCC[C@H]1c2cccnc2 CHEMBL3 \ No newline at end of file diff -r 6f8458d1cf46 -r f56492688f43 test-data/out3.smi --- a/test-data/out3.smi Mon Aug 12 12:57:32 2019 -0400 +++ b/test-data/out3.smi Fri Sep 06 07:50:15 2019 -0400 @@ -1,72 +1,72 @@ -CN1CCC[C@H]1c2cccnc2 -CN1CCC[C@H]1c2ccc[n+]([BH2-]C#N)c2 -CN1CC[C@H]2CCc3ncccc3[C@@H]12 -CN1CC[C@H]2CCc3c(ccc[n+]3[BH2-]C#N)[C@@H]12 -CN1[C@@H](CC[C@H]1c2cccnc2)C#N -CN1[C@H](CC[C@H]1c2cccnc2)C#N -CN1CCC[C@H]1c2cncc(c2)C#C -CN1C[C@@H](Cc2ccccc2)C[C@H]1c3cccnc3 -C[C@@H]1CC[C@H](N1C)c2cccnc2 -C[C@H]1CC[C@H](N1C)c2cccnc2 -CC[C@H]1C[C@H](N(C)C1)c2cccnc2 -CN1C[C@@H](O)C[C@H]1c2cccnc2 -CN1CCC(CO)[C@H]1c2cccnc2 -CSC[C@H]1C[C@H](N(C)C1)c2cccnc2 -CN1C[C@H](CO)C[C@H]1c2cccnc2 -CN1C[C@@H](CC#N)C[C@H]1c2cccnc2 -CN1C[C@@H](CF)C[C@H]1c2cccnc2 -CO[C@H]1C[C@H](N(C)C1)c2cccnc2 -CN1CCC(CF)[C@H]1c2cccnc2 -CC1CCN(C)[C@@H]1c2cccnc2 -COCC1CCN(C)[C@@H]1c2cccnc2 -CN1C[C@@H](CO)C[C@H]1c2cccnc2 -CN1C[C@H](C[C@H]1c2cccnc2)OC(=O)C -CN1C[C@@H](C[C@H]1c2cccnc2)C#N -CC1CN(C)[C@@H](C1C)c2cccnc2 -C[C@H]1C[C@H](N(C)C1)c2cccnc2 -CN1C[C@H](C[C@H]1c2cccnc2)OS(=O)(=O)C -COC[C@H]1C[C@H](N(C)C1)c2cccnc2 -CCCC[C@@H]1CC[C@H](N1C)c2cccnc2 -CCCC[C@H]1CC[C@H](N1C)c2cccnc2 -CN1[C@@H](CC[C@H]1c2cccnc2)c3ccccc3 -CN1[C@@H](CC[C@@H]1c2ccccc2)c3cccnc3 -Clc1ccc(OC[C@H]2CN3C(=O)CC[C@@]3(O2)c4cccnc4)cc1 -Clc1ccc(OC[C@@H]2CN3C(=O)CC[C@@]3(O2)c4cccnc4)cc1 -CN1[C@@H](CCC1=O)c2cccnc2 -CCCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 -CN1CCC[C@H]1c2ccc[n+](CCCCCCCCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 -CCCCCCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 -CCCCCCCCCCCC[n+]1cccc(c1)[C@@H]2CCCN2C -CCCCCCCCCC[n+]1cccc(c1)[C@@H]2CCCN2C -CN1CCC[C@H]1c2ccc[n+](CCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 -CN1CCC[C@H]1c2ccc[n+](CCCCCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 -CN1CCC[C@H]1c2ccc[n+](CCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 -CN1CCC[C@H]1c2ccc[n+](CCCCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 -CN1CCC[C@H]1c2ccc[n+](CCCCc3ccc(CCCC[n+]4cccc(c4)[C@@H]5CCCN5C)cc3)c2 -CN1CCC[C@H]1c2ccc[n+](CCCCCc3ccccc3CCCCC[n+]4cccc(c4)[C@@H]5CCCN5C)c2 -CN1CCC[C@H]1c2ccc[n+](CCCC#Cc3ccccc3C#CCCC[n+]4cccc(c4)[C@@H]5CCCN5C)c2 -CN1CCC[C@H]1c2ccc[n+](CCCCCc3cc(CCCCC[n+]4cccc(c4)[C@@H]5CCCN5C)cc(CCCCC[n+]6cccc(c6)[C@@H]7CCCN7C)c3)c2 -CN1CCC[C@H]1c2ccc[n+](CCCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 -CCCCCCCCCC[n+]1cccc2c1CC[C@]3(C)CCN(C)[C@]23C -CN1CCC[C@H]1c2ccc[n+](CCCC#Cc3cccc(c3)C#CCCC[n+]4cccc(c4)[C@@H]5CCCN5C)c2 -CCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 -CN1CCC[C@H]1c2ccc[n+](CCCC#Cc3cc(cc(c3)C#CCCC[n+]4cccc(c4)[C@@H]5CCCN5C)C#CCCC[n+]6cccc(c6)[C@@H]7CCCN7C)c2 -CCCCCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 -CCCCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 -C[N@+]1(CC[N@+]2(C)CCC[C@@H]2c3cccnc3)CCC[C@H]1c4cccnc4 -CN1[C@@H](C[C@@H](OC2O[C@@H]([C@@H](O)[C@H](O)[C@H]2O)C(=O)O)C1=O)c3cccnc3 -CN1C(=O)CC[C@@]1(O)c2cccnc2 -OCN1[C@@H](CCC1=O)c2cccnc2 -CN1CCC[C@H]1c2ccc[n+](c2)[C@@H]3O[C@@H]([C@@H](O)[C@H](O)[C@H]3O)C(=O)O -C[N+]1([O-])CCC[C@H]1c2cccnc2 -CN1CCC[C@@]1(O)c2cccnc2 -CN1[C@@H](CCC1=O)c2ccc[n+](C)c2 -CN1[C@@H](CCC1=O)c2ccc[n+]([O-])c2 -CN1[C@@H](CCC1=O)c2ccc[n+](c2)C3O[C@@H]([C@@H](O)[C@H](O)[C@H]3O)C(=O)C -Cc1cncc(c1)[C@@H]2CCC[N+]2(C)[O-] -COc1ncc(cc1c2ncc(cc2[C@@H]3CC[C@H]4[C@H](OC(=O)N34)c5cc(cc(c5)C(F)(F)F)C(F)(F)F)C(F)(F)F)c6c(C)cc(cc6C)C(=O)O -COc1ccc(cc1c2ncc(cc2[C@@H]3CC[C@H]4[C@H](OC(=O)N34)c5cc(cc(c5)C(F)(F)F)C(F)(F)F)C(F)(F)F)c6c(C)cc(cc6C)C(=O)O -COc1ncc(cc1c2ncc(cc2[C@@H]3CC[C@H]4[C@H](OC(=O)N34)c5cc(cc(c5)C(F)(F)F)C(F)(F)F)C(F)(F)F)c6ccc(cc6C)C(=O)O -COCCOc1ncccc1[C@@H]2C(C(=O)C(C)C)C(=O)C(=O)N2c3ccc(cc3)c4ccsc4 -COCCOc1ncccc1[C@@H]2C(C(=O)C(C)C)C(=O)C(=O)N2c3ccc(cc3)c4ccc(C)s4 -O=S(=O)(Nc1ncns1)c2ccc3c(cccc3c2)N4CCC[C@H]4c5cccnc5 \ No newline at end of file +COc1ncc(cc1c2ncc(cc2[C@@H]3CC[C@H]4[C@H](OC(=O)N34)c5cc(cc(c5)C(F)(F)F)C(F)(F)F)C(F)(F)F)c6ccc(cc6C)C(=O)O CHEMBL3657536 +CN1CCC[C@@]1(O)c2cccnc2 CHEMBL3544720 +CN1CCC[C@H]1c2ccc[n+](CCCC#Cc3cc(cc(c3)C#CCCC[n+]4cccc(c4)[C@@H]5CCCN5C)C#CCCC[n+]6cccc(c6)[C@@H]7CCCN7C)c2 CHEMBL1185865 +CN1CCC[C@H]1c2ccc[n+](CCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 CHEMBL1179917 +CN1C[C@@H](Cc2ccccc2)C[C@H]1c3cccnc3 CHEMBL420694 +CC1CCN(C)[C@@H]1c2cccnc2 CHEMBL119019 +C[C@H]1C[C@H](N(C)C1)c2cccnc2 CHEMBL432837 +CN1C(=O)CC[C@@]1(O)c2cccnc2 CHEMBL3544599 +C[C@@H]1CC[C@H](N1C)c2cccnc2 CHEMBL118038 +CO[C@H]1C[C@H](N(C)C1)c2cccnc2 CHEMBL326597 +CCCC[C@@H]1CC[C@H](N1C)c2cccnc2 CHEMBL119574 +CN1[C@@H](CCC1=O)c2ccc[n+]([O-])c2 CHEMBL3544793 +CN1C[C@@H](CF)C[C@H]1c2cccnc2 CHEMBL116706 +CN1C[C@@H](CO)C[C@H]1c2cccnc2 CHEMBL118849 +CN1C[C@@H](O)C[C@H]1c2cccnc2 CHEMBL117339 +CC[C@H]1C[C@H](N(C)C1)c2cccnc2 CHEMBL115793 +CN1CCC[C@H]1c2cncc(c2)C#C CHEMBL111659 +CN1CCC(CO)[C@H]1c2cccnc2 CHEMBL325426 +CN1CC[C@H]2CCc3ncccc3[C@@H]12 CHEMBL281410 +CN1[C@@H](CC[C@@H]1c2ccccc2)c3cccnc3 CHEMBL443018 +CN1CCC[C@H]1c2ccc[n+](CCCCc3ccc(CCCC[n+]4cccc(c4)[C@@H]5CCCN5C)cc3)c2 CHEMBL1182422 +CN1CCC[C@H]1c2ccc[n+](c2)[C@@H]3O[C@@H]([C@@H](O)[C@H](O)[C@H]3O)C(=O)O CHEMBL3544704 +CN1[C@@H](CCC1=O)c2ccc[n+](c2)C3O[C@@H]([C@@H](O)[C@H](O)[C@H]3O)C(=O)C CHEMBL3559662 +CN1CCC[C@H]1c2ccc[n+](CCCCCc3ccccc3CCCCC[n+]4cccc(c4)[C@@H]5CCCN5C)c2 CHEMBL1182431 +COC[C@H]1C[C@H](N(C)C1)c2cccnc2 CHEMBL119227 +CN1C[C@H](CO)C[C@H]1c2cccnc2 CHEMBL434607 +CCCCCCCCCC[n+]1cccc(c1)[C@@H]2CCCN2C CHEMBL1179042 +CCCCCCCCCC[n+]1cccc2c1CC[C@]3(C)CCN(C)[C@]23C CHEMBL1183842 +CN1[C@H](CC[C@H]1c2cccnc2)C#N CHEMBL84210 +CN1C[C@@H](C[C@H]1c2cccnc2)C#N CHEMBL119352 +COc1ccc(cc1c2ncc(cc2[C@@H]3CC[C@H]4[C@H](OC(=O)N34)c5cc(cc(c5)C(F)(F)F)C(F)(F)F)C(F)(F)F)c6c(C)cc(cc6C)C(=O)O CHEMBL3657535 +CCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 CHEMBL1185506 +CCCCCCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 CHEMBL1179033 +CN1[C@@H](CCC1=O)c2ccc[n+](C)c2 CHEMBL3544791 +CN1CCC[C@H]1c2ccc[n+](CCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 CHEMBL1179945 +CN1[C@@H](CCC1=O)c2cccnc2 CHEMBL578211 +CN1[C@@H](CC[C@H]1c2cccnc2)C#N CHEMBL81899 +C[C@H]1CC[C@H](N1C)c2cccnc2 CHEMBL116645 +CC1CN(C)[C@@H](C1C)c2cccnc2 CHEMBL334222 +CSC[C@H]1C[C@H](N(C)C1)c2cccnc2 CHEMBL119144 +CN1CCC[C@H]1c2cccnc2 CHEMBL3 +CN1CCC[C@H]1c2ccc[n+](CCCCCCCCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 CHEMBL1179025 +CN1CC[C@H]2CCc3c(ccc[n+]3[BH2-]C#N)[C@@H]12 CHEMBL277416 +CN1CCC[C@H]1c2ccc[n+](CCCC#Cc3cccc(c3)C#CCCC[n+]4cccc(c4)[C@@H]5CCCN5C)c2 CHEMBL1185135 +COCCOc1ncccc1[C@@H]2C(C(=O)C(C)C)C(=O)C(=O)N2c3ccc(cc3)c4ccsc4 CHEMBL3674905 +C[N+]1([O-])CCC[C@H]1c2cccnc2 CHEMBL3544718 +CN1C[C@H](C[C@H]1c2cccnc2)OS(=O)(=O)C CHEMBL119186 +CN1CCC[C@H]1c2ccc[n+](CCCC#Cc3ccccc3C#CCCC[n+]4cccc(c4)[C@@H]5CCCN5C)c2 CHEMBL1182432 +CCCCCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 CHEMBL1185915 +CN1CCC[C@H]1c2ccc[n+](CCCCCc3cc(CCCCC[n+]4cccc(c4)[C@@H]5CCCN5C)cc(CCCCC[n+]6cccc(c6)[C@@H]7CCCN7C)c3)c2 CHEMBL1182507 +CN1CCC[C@H]1c2ccc[n+](CCCCCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 CHEMBL1179942 +Cc1cncc(c1)[C@@H]2CCC[N+]2(C)[O-] CHEMBL3640778 +O=S(=O)(Nc1ncns1)c2ccc3c(cccc3c2)N4CCC[C@H]4c5cccnc5 CHEMBL3692144 +CCCC[C@H]1CC[C@H](N1C)c2cccnc2 CHEMBL119581 +CCCCCCCCCCCC[n+]1cccc(c1)[C@@H]2CCCN2C CHEMBL1179038 +CN1CCC[C@H]1c2ccc[n+]([BH2-]C#N)c2 CHEMBL282473 +COCC1CCN(C)[C@@H]1c2cccnc2 CHEMBL119020 +CN1CCC[C@H]1c2ccc[n+](CCCCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 CHEMBL1179963 +CCCCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 CHEMBL1185930 +CN1CCC(CF)[C@H]1c2cccnc2 CHEMBL119016 +CN1[C@@H](CC[C@H]1c2cccnc2)c3ccccc3 CHEMBL116998 +C[N@+]1(CC[N@+]2(C)CCC[C@@H]2c3cccnc3)CCC[C@H]1c4cccnc4 CHEMBL1358826 +CN1[C@@H](C[C@@H](OC2O[C@@H]([C@@H](O)[C@H](O)[C@H]2O)C(=O)O)C1=O)c3cccnc3 CHEMBL3544598 +CN1C[C@@H](CC#N)C[C@H]1c2cccnc2 CHEMBL334001 +OCN1[C@@H](CCC1=O)c2cccnc2 CHEMBL3544621 +COCCOc1ncccc1[C@@H]2C(C(=O)C(C)C)C(=O)C(=O)N2c3ccc(cc3)c4ccc(C)s4 CHEMBL3679738 +Clc1ccc(OC[C@@H]2CN3C(=O)CC[C@@]3(O2)c4cccnc4)cc1 CHEMBL343159 +CN1CCC[C@H]1c2ccc[n+](CCCCCCCCC[n+]3cccc(c3)[C@@H]4CCCN4C)c2 CHEMBL1183837 +COc1ncc(cc1c2ncc(cc2[C@@H]3CC[C@H]4[C@H](OC(=O)N34)c5cc(cc(c5)C(F)(F)F)C(F)(F)F)C(F)(F)F)c6c(C)cc(cc6C)C(=O)O CHEMBL3657533 +CN1C[C@H](C[C@H]1c2cccnc2)OC(=O)C CHEMBL419230 +CCCCCCCCC[n+]1cccc2[C@@H]3[C@@H](CCN3C)CCc12 CHEMBL1179016 +Clc1ccc(OC[C@H]2CN3C(=O)CC[C@@]3(O2)c4cccnc4)cc1 CHEMBL442410 \ No newline at end of file diff -r 6f8458d1cf46 -r f56492688f43 test-data/out4.smi --- a/test-data/out4.smi Mon Aug 12 12:57:32 2019 -0400 +++ b/test-data/out4.smi Fri Sep 06 07:50:15 2019 -0400 @@ -1,4 +1,4 @@ -C1CCCCC1 -C1CCCCCCCCCCC1 -C1CCCCCCC1 -C1CCCC1 \ No newline at end of file +C1CCCC1 CHEMBL1370850 +C1CCCCCCCCCCC1 CHEMBL3185808 +C1CCCCCCC1 CHEMBL452651 +C1CCCCC1 CHEMBL15980 \ No newline at end of file