Mercurial > repos > marcel > caddsuite_linux_x86_64
comparison CADDSuite-1.0.1/data/OpenBabel/SMARTS_InteLigand.txt @ 9:2cff9609f2c7
Uploaded
author | marcel |
---|---|
date | Tue, 15 Nov 2011 10:40:26 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
8:9ab6da6cd36c | 9:2cff9609f2c7 |
---|---|
1 # | |
2 # SMARTS Patterns for Functional Group Classification | |
3 # | |
4 # written by Christian Laggner | |
5 # Copyright 2005 Inte:Ligand Software-Entwicklungs und Consulting GmbH | |
6 # | |
7 # Released under the Lesser General Public License (LGPL license) | |
8 # see http://www.gnu.org/copyleft/lesser.html | |
9 # Modified from Version 221105 | |
10 ##################################################################################################### | |
11 | |
12 # General Stuff: | |
13 # These patters were written in an attempt to represent the classification of organic compounds | |
14 # from the viewpoint of an organic chemist. | |
15 # They are often very restrictive. This may be generally a good thing, but it also takes some time | |
16 # for filtering/indexing large compound sets. | |
17 # For filtering undesired groups (in druglike compounds) one will want to have more general patterns | |
18 # (e.g. you don't want *any* halide of *any* acid, *neither* aldehyde *nor* formyl esters and amides, ...). | |
19 # | |
20 | |
21 # Part I: Carbon | |
22 # ============== | |
23 | |
24 | |
25 # I.1: Carbon-Carbon Bonds | |
26 # ------------------------ | |
27 | |
28 # I.1.1 Alkanes: | |
29 | |
30 Primary_carbon: [CX4H3][#6] | |
31 | |
32 Secondary_carbon: [CX4H2]([#6])[#6] | |
33 | |
34 Tertiary_carbon: [CX4H1]([#6])([#6])[#6] | |
35 | |
36 Quaternary_carbon: [CX4]([#6])([#6])([#6])[#6] | |
37 | |
38 | |
39 # I.1.2 C-C double and Triple Bonds | |
40 | |
41 Alkene: [CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]=[CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])] | |
42 # sp2 C may be substituted only by C or H - | |
43 # does not hit ketenes and allenes, nor enamines, enols and the like | |
44 | |
45 Alkyne: [CX2]#[CX2] | |
46 # non-carbon substituents (e.g. alkynol ethers) are rather rare, thus no further discrimination | |
47 | |
48 Allene: [CX3]=[CX2]=[CX3] | |
49 | |
50 | |
51 # I.2: One Carbon-Hetero Bond | |
52 # --------------------------- | |
53 | |
54 | |
55 # I.2.1 Alkyl Halogenides | |
56 | |
57 Alkylchloride: [ClX1][CX4] | |
58 # will also hit chloromethylethers and the like, but no chloroalkenes, -alkynes or -aromats | |
59 # a more restrictive version can be obtained by modifying the Alcohol string. | |
60 | |
61 Alkylfluoride: [FX1][CX4] | |
62 | |
63 Alkylbromide: [BrX1][CX4] | |
64 | |
65 Alkyliodide: [IX1][CX4] | |
66 | |
67 | |
68 # I.2.2 Alcohols and Ethers | |
69 | |
70 Alcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])] | |
71 # nonspecific definition, no acetals, aminals, and the like | |
72 | |
73 Primary_alcohol: [OX2H][CX4H2;!$(C([OX2H])[O,S,#7,#15])] | |
74 | |
75 Secondary_alcohol: [OX2H][CX4H;!$(C([OX2H])[O,S,#7,#15])] | |
76 | |
77 Tertiary_alcohol: [OX2H][CX4D4;!$(C([OX2H])[O,S,#7,#15])] | |
78 | |
79 Dialkylether: [OX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])] | |
80 # no acetals and the like; no enolethers | |
81 | |
82 Dialkylthioether: [SX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])] | |
83 # no acetals and the like; no enolethers | |
84 | |
85 Alkylarylether: [OX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])] | |
86 # no acetals and the like; no enolethers | |
87 | |
88 Diarylether: [c][OX2][c] | |
89 | |
90 Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])] | |
91 | |
92 Diarylthioether: [c][SX2][c] | |
93 | |
94 Oxonium: [O+;!$([O]~[!#6]);!$([S]*~[#7,#8,#15,#16])] | |
95 # can't be aromatic, thus O and not #8 | |
96 | |
97 # I.2.3 Amines | |
98 | |
99 Amine: [NX3+0,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])] | |
100 # hits all amines (prim/sec/tert/quart), including ammonium salts, also enamines, but not amides, imides, aminals, ... | |
101 | |
102 # the following amines include also the protonated forms | |
103 | |
104 Primary_aliph_amine: [NX3H2+0,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] | |
105 | |
106 Secondary_aliph_amine: [NX3H1+0,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] | |
107 | |
108 Tertiary_aliph_amine: [NX3H0+0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] | |
109 | |
110 Quaternary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] | |
111 | |
112 Primary_arom_amine: [NX3H2+0,NX4H3+]c | |
113 | |
114 Secondary_arom_amine: [NX3H1+0,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] | |
115 | |
116 Tertiary_arom_amine: [NX3H0+0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] | |
117 | |
118 Quaternary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] | |
119 | |
120 Secondary_mixed_amine: [NX3H1+0,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])] | |
121 | |
122 Tertiary_mixed_amine: [NX3H0+0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])] | |
123 | |
124 Quaternary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])] | |
125 | |
126 Ammonium: [N+;!$([N]~[!#6]);!$(N=*);!$([N]*~[#7,#8,#15,#16])] | |
127 # only C and H substituents allowed. Quaternary or protonated amines | |
128 # NX4+ or Nv4+ is not recognized by Daylight's depictmatch if less than four C are present | |
129 | |
130 | |
131 # I.2.4 Others | |
132 | |
133 Alkylthiol: [SX2H][CX4;!$(C([SX2H])~[O,S,#7,#15])] | |
134 | |
135 Dialkylthioether: [SX2]([CX4;!$(C([SX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([SX2])[O,S,#7,#15])] | |
136 | |
137 Alkylarylthioether: [SX2](c)[CX4;!$(C([SX2])[O,S,#7,#15])] | |
138 | |
139 Disulfide: [SX2D2][SX2D2] | |
140 | |
141 1,2-Aminoalcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15,F,Cl,Br,I])][CX4;!$(C([N])[O,S,#7,#15])][NX3;!$(NC=[O,S,N])] | |
142 # does not hit alpha-amino acids, enaminoalcohols, 1,2-aminoacetals, o-aminophenols, etc. | |
143 | |
144 1,2-Diol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])][CX4;!$(C([OX2H])[O,S,#7,#15])][OX2H] | |
145 # does not hit alpha-hydroxy acids, enolalcohols, 1,2-hydroxyacetals, 1,2-diphenols, etc. | |
146 | |
147 1,1-Diol: [OX2H][CX4;!$(C([OX2H])([OX2H])[O,S,#7,#15])][OX2H] | |
148 | |
149 Hydroperoxide: [OX2H][OX2] | |
150 #does not neccessarily have to be connected to a carbon atom, includes also hydrotrioxides | |
151 | |
152 Peroxo: [OX2D2][OX2D2] | |
153 | |
154 Organolithium_compounds: [LiX1][#6,#14] | |
155 | |
156 Organomagnesium_compounds: [MgX2][#6,#14] | |
157 # not restricted to Grignard compounds, also dialkyl Mg | |
158 | |
159 Organometallic_compounds: [!#1;!#5;!#6;!#7;!#8;!#9;!#14;!#15;!#16;!#17;!#33;!#34;!#35;!#52;!#53;!#85]~[#6;!-] | |
160 # very general, includes all metals covalently bound to carbon | |
161 | |
162 | |
163 # I.3: Two Carbon-Hetero Bonds (Carbonyl and Derivatives) | |
164 # ---------------------------- | |
165 | |
166 # I.3.1 Double Bond to Hetero | |
167 | |
168 Aldehyde: [$([CX3H][#6]),$([CX3H2])]=[OX1] | |
169 # hits aldehydes including formaldehyde | |
170 | |
171 Ketone: [#6][CX3](=[OX1])[#6] | |
172 # does not include oxo-groups connected to a (hetero-) aromatic ring | |
173 | |
174 Thioaldehyde: [$([CX3H][#6]),$([CX3H2])]=[SX1] | |
175 | |
176 Thioketone: [#6][CX3](=[SX1])[#6] | |
177 # does not include thioxo-groups connected to a (hetero-) aromatic ring | |
178 | |
179 Imine: [NX2;$([N][#6]),$([NH]);!$([N][CX3]=[#7,#8,#15,#16])]=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])] | |
180 # nitrogen is not part of an amidelike strukture, nor of an aromatic ring, but can be part of an aminal or similar | |
181 | |
182 Immonium: [NX3+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])] | |
183 | |
184 Oxime: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2H] | |
185 | |
186 Oximether: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2][#6;!$(C=[#7,#8])] | |
187 # ether, not ester or amide; does not hit isoxazole | |
188 | |
189 | |
190 # I.3.2. Two Single Bonds to Hetero | |
191 | |
192 Acetal: [OX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])] | |
193 # does not hit hydroxy-methylesters, ketenacetals, hemiacetals, orthoesters, etc. | |
194 | |
195 Hemiacetal: [OX2H][CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])] | |
196 | |
197 Aminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][NX3v3;!$(NC=[#7,#8,#15,#16])][#6] | |
198 # Ns are not part of an amide or similar. v3 ist to exclude nitro and similar groups | |
199 | |
200 Hemiaminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][OX2H] | |
201 | |
202 Thioacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][SX2][#6;!$(C=[O,S,N])] | |
203 | |
204 Thiohemiacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][OX2H] | |
205 | |
206 Halogen_acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1] | |
207 # hits chloromethylenethers and other reactive alkylating agents | |
208 | |
209 Acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] | |
210 # includes all of the above and other combinations (S-C-N, hydrates, ...), but still no aminomethylenesters and similar | |
211 | |
212 Halogenmethylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1] | |
213 # also reactive alkylating agents. Acid does not have to be carboxylic acid, also S- and P-based acids allowed | |
214 | |
215 NOS_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] | |
216 # Same as above, but N,O or S instead of halogen. Ester/amide allowed only on one side | |
217 | |
218 Hetero_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] | |
219 # Combination of the last two patterns | |
220 | |
221 Cyanhydrine: [NX1]#[CX2][CX4;$([CH2]),$([CH]([CX2])[#6]),$(C([CX2])([#6])[#6])][OX2H] | |
222 | |
223 | |
224 # I.3.3 Single Bond to Hetero, C=C Double Bond (Enols and Similar) | |
225 | |
226 Chloroalkene: [ClX1][CX3]=[CX3] | |
227 | |
228 Fluoroalkene: [FX1][CX3]=[CX3] | |
229 | |
230 Bromoalkene: [BrX1][CX3]=[CX3] | |
231 | |
232 Iodoalkene: [IX1][CX3]=[CX3] | |
233 | |
234 Enol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3] | |
235 # no phenols | |
236 | |
237 Endiol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3;$([H1]),$(C[#6])][OX2H] | |
238 # no 1,2-diphenols, ketenacetals, ... | |
239 | |
240 Enolether: [OX2]([#6;!$(C=[N,O,S])])[CX3;$([H0][#6]),$([H1])]=[CX3] | |
241 # finds also endiodiethers, but not enolesters, no aromats | |
242 | |
243 Enolester: [OX2]([CX3]=[OX1])[#6X3;$([#6][#6]),$([H1])]=[#6X3;!$(C[OX2H])] | |
244 | |
245 | |
246 Enamine: [NX3;$([NH2][CX3]),$([NH1]([CX3])[#6]),$([N]([CX3])([#6])[#6]);!$([N]*=[#7,#8,#15,#16])][CX3;$([CH]),$([C][#6])]=[CX3] | |
247 # does not hit amines attached to aromatic rings, nor may the nitrogen be aromatic | |
248 | |
249 Thioenol: [SX2H][CX3;$([H1]),$(C[#6])]=[CX3] | |
250 | |
251 Thioenolether: [SX2]([#6;!$(C=[N,O,S])])[CX3;$(C[#6]),$([CH])]=[CX3] | |
252 | |
253 | |
254 # I.4: Three Carbon-Hetero Bonds (Carboxyl and Derivatives) | |
255 # ------------------------------ | |
256 | |
257 Acylchloride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[ClX1] | |
258 | |
259 Acylfluoride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1] | |
260 | |
261 Acylbromide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[BrX1] | |
262 | |
263 Acyliodide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[IX1] | |
264 | |
265 Acylhalide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1,ClX1,BrX1,IX1] | |
266 # all of the above | |
267 | |
268 | |
269 # The following contains all simple carboxylic combinations of O, N, S, & Hal - | |
270 # - acids, esters, amides, ... as well as a few extra cases (anhydride, hydrazide...) | |
271 # Cyclic structures (including aromats) like lactones, lactames, ... got their own | |
272 # definitions. Structures where both heteroatoms are part of an aromatic ring | |
273 # (oxazoles, imidazoles, ...) were excluded. | |
274 | |
275 Carboxylic_acid: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[$([OX2H]),$([OX1-])] | |
276 # includes carboxylate anions | |
277 | |
278 Carboxylic_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][#6;!$(C=[O,N,S])] | |
279 # does not hit anhydrides or lactones | |
280 | |
281 Lactone: [#6][#6X3R](=[OX1])[#8X2][#6;!$(C=[O,N,S])] | |
282 # may also be aromatic | |
283 | |
284 Carboxylic_anhydride: [CX3;$([H0][#6]),$([H1])](=[OX1])[#8X2][CX3;$([H0][#6]),$([H1])](=[OX1]) | |
285 # anhydride formed by two carboxylic acids, no mixed anhydrides (e.g. between carboxylic acid and sulfuric acid); may be part of a ring, even aromatic | |
286 | |
287 Carboxylic_acid_derivative: [$([#6X3H0][#6]),$([#6X3H])](=[!#6])[!#6] | |
288 # includes most of the structures of I.4 and many more, also 1,3-heteroaromatics such as isoxazole | |
289 | |
290 Carbothioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[OX1])[$([SX2H]),$([SX1-])]),$([C](=[SX1])[$([OX2H]),$([OX1-])])] | |
291 # hits both tautomeric forms, as well as anions | |
292 | |
293 Carbothioic_S_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[SX2][#6;!$(C=[O,N,S])] | |
294 | |
295 Carbothioic_S_lactone: [#6][#6X3R](=[OX1])[#16X2][#6;!$(C=[O,N,S])] | |
296 # may also be aromatic | |
297 | |
298 Carbothioic_O_ester: [CX3;$([H0][#6]),$([H1])](=[SX1])[OX2][#6;!$(C=[O,N,S])] | |
299 | |
300 Carbothioic_O_lactone: [#6][#6X3R](=[SX1])[#8X2][#6;!$(C=[O,N,S])] | |
301 | |
302 Carbothioic_halide: [CX3;$([H0][#6]),$([H1])](=[SX1])[FX1,ClX1,BrX1,IX1] | |
303 | |
304 Carbodithioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2H])] | |
305 | |
306 Carbodithioic_ester: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2][#6;!$(C=[O,N,S])])] | |
307 | |
308 Carbodithiolactone: [#6][#6X3R](=[SX1])[#16X2][#6;!$(C=[O,N,S])] | |
309 | |
310 | |
311 Amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
312 # does not hit lactames | |
313 | |
314 Primary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[NX3H2] | |
315 | |
316 Secondary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])] | |
317 | |
318 Tertiary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])] | |
319 | |
320 Lactam: [#6R][#6X3R](=[OX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
321 # cyclic amides, may also be aromatic | |
322 | |
323 Alkyl_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([#6])[#6X3;$([H0][#6]),$([H1])](=[OX1]) | |
324 # may be part of a ring, even aromatic. only C allowed at central N. May also be triacyl amide | |
325 | |
326 N_hetero_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([!#6])[#6X3;$([H0][#6]),$([H1])](=[OX1]) | |
327 # everything else than H or C at central N | |
328 | |
329 Imide_acidic: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H1][#6X3;$([H0][#6]),$([H1])](=[OX1]) | |
330 # can be deprotonated | |
331 | |
332 Thioamide: [$([CX3;!R][#6]),$([CX3H;!R])](=[SX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
333 # does not hit thiolactames | |
334 | |
335 Thiolactam: [#6R][#6X3R](=[SX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
336 # cyclic thioamides, may also be aromatic | |
337 | |
338 | |
339 Oximester: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#8X2][#7X2]=,:[#6X3;$([H0]([#6])[#6]),$([H1][#6]),$([H2])] | |
340 # may also be part of a ring / aromatic | |
341 | |
342 Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH]),$([C][#6])]=[NX2;!$(NC=[O,S])] | |
343 # only basic amidines, not as part of aromatic ring (e.g. imidazole) | |
344 | |
345 Hydroxamic_acid: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][$([OX2H]),$([OX1-])] | |
346 | |
347 Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])] | |
348 # does not hit anhydrides of carboxylic acids withs hydroxamic acids | |
349 | |
350 | |
351 Imidoacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] | |
352 # not cyclic | |
353 | |
354 Imidoacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] | |
355 # the enamide-form of lactames. may be aromatic like 2-hydroxypyridine | |
356 | |
357 Imidoester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])] | |
358 # esters of the above structures. no anhydrides. | |
359 | |
360 Imidolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])] | |
361 # no oxazoles and similar | |
362 | |
363 Imidothioacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])] | |
364 # not cyclic | |
365 | |
366 Imidothioacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])] | |
367 # the enamide-form of thiolactames. may be aromatic like 2-thiopyridine | |
368 | |
369 Imidothioester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])] | |
370 # thioesters of the above structures. no anhydrides. | |
371 | |
372 Imidothiolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])] | |
373 # no thioxazoles and similar | |
374 | |
375 Amidine: [#7X3v3;!$(N([#6X3]=[#7X2])C=[O,S])][CX3R0;$([H1]),$([H0][#6])]=[NX2v3;!$(N(=[#6X3][#7X3])C=[O,S])] | |
376 # only basic amidines, not substituted by carbonyl or thiocarbonyl, not as part of a ring | |
377 | |
378 Imidolactam: [#6][#6X3R;$([H0](=[NX2;!$(N(=[#6X3][#7X3])C=[O,S])])[#7X3;!$(N([#6X3]=[#7X2])C=[O,S])]),$([H0](-[NX3;!$(N([#6X3]=[#7X2])C=[O,S])])=,:[#7X2;!$(N(=[#6X3][#7X3])C=[O,S])])] | |
379 # one of the two C~N bonds is part of a ring (may be aromatic), but not both - thus no imidazole | |
380 | |
381 Imidoylhalide: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1] | |
382 # not cyclic | |
383 | |
384 Imidoylhalide_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1] | |
385 # may also be aromatic | |
386 | |
387 # may be ring, aromatic, substituted with carbonyls, hetero, ... | |
388 # (everything else would get too complicated) | |
389 | |
390 Amidrazone: [$([$([#6X3][#6]),$([#6X3H])](=[#7X2v3])[#7X3v3][#7X3v3]),$([$([#6X3][#6]),$([#6X3H])]([#7X3v3])=[#7X2v3][#7X3v3])] | |
391 # hits both tautomers. as above, it may be ring, aromatic, substituted with carbonyls, hetero, ... | |
392 | |
393 | |
394 Alpha_aminoacid: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[OX2H,OX1-] | |
395 # N may be alkylated, but not part of an amide (as in peptides), ionic forms are included | |
396 # includes also non-natural aminoacids with double-bonded or two aliph./arom. substituents at alpha-C | |
397 # N may not be aromatic as in 1H-pyrrole-2-carboxylic acid | |
398 | |
399 Alpha_hydroxyacid: [OX2H][C][CX3](=[OX1])[OX2H,OX1-] | |
400 | |
401 Peptide_middle: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])] | |
402 # finds peptidic structures which are neither C- nor N-terminal. Both neighbours must be amino-acids/peptides | |
403 | |
404 Peptide_C_term: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[OX2H,OX1-] | |
405 # finds C-terminal amino acids | |
406 | |
407 Peptide_N_term: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])] | |
408 # finds N-terminal amino acids. As above, N may be substituted, but not part of an amide-bond. | |
409 | |
410 | |
411 Carboxylic_orthoester: [#6][OX2][CX4;$(C[#6]),$([CH])]([OX2][#6])[OX2][#6] | |
412 # hits also anhydride like struktures (e. g. HC(OMe)2-OC=O residues) | |
413 | |
414 Ketene: [CX3]=[CX2]=[OX1] | |
415 | |
416 Ketenacetal: [#7X2,#8X3,#16X2;$(*[#6,#14])][#6X3]([#7X2,#8X3,#16X2;$(*[#6,#14])])=[#6X3] | |
417 # includes aminals, silylacetals, ketenesters, etc. C=C DB is not aromatic, everything else may be | |
418 | |
419 Nitrile: [NX1]#[CX2] | |
420 # includes cyanhydrines | |
421 | |
422 Isonitrile: [CX1-]#[NX2+] | |
423 | |
424 | |
425 Vinylogous_carbonyl_or_carboxyl_derivative: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7,#8,#16,F,Cl,Br,I] | |
426 # may be part of a ring, even aromatic | |
427 | |
428 Vinylogous_acid: [#6X3](=[OX1])[#6X3]=,:[#6X3][$([OX2H]),$([OX1-])] | |
429 | |
430 Vinylogous_ester: [#6X3](=[OX1])[#6X3]=,:[#6X3][#6;!$(C=[O,N,S])] | |
431 | |
432 Vinylogous_amide: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
433 | |
434 Vinylogous_halide: [#6X3](=[OX1])[#6X3]=,:[#6X3][FX1,ClX1,BrX1,IX1] | |
435 | |
436 | |
437 | |
438 # I.5: Four Carbon-Hetero Bonds (Carbonic Acid and Derivatives) | |
439 # ----------------------------- | |
440 | |
441 Carbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[OX1])[#8X2][#6;!$(C=[O,N,S])] | |
442 # may be part of a ring, even aromatic | |
443 | |
444 Carbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[OX2][FX1,ClX1,BrX1,IX1] | |
445 | |
446 Carbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[$([OX2H]),$([OX1-])] | |
447 # unstable | |
448 | |
449 Carbonic_acid_derivatives: [!#6][#6X3](=[!#6])[!#6] | |
450 | |
451 | |
452 Thiocarbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[SX1])[#8X2][#6;!$(C=[O,N,S])] | |
453 # may be part of a ring, even aromatic | |
454 | |
455 Thiocarbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[OX2][FX1,ClX1,BrX1,IX1] | |
456 | |
457 Thiocarbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[$([OX2H]),$([OX1-])] | |
458 | |
459 | |
460 Urea:[#7X3;!$([#7][!#6])][#6X3](=[OX1])[#7X3;!$([#7][!#6])] | |
461 # no check whether part of imide, biuret, etc. Aromatic structures are only hit if | |
462 # both N share no double bonds, like in the dioxo-form of uracil | |
463 | |
464 Thiourea: [#7X3;!$([#7][!#6])][#6X3](=[SX1])[#7X3;!$([#7][!#6])] | |
465 | |
466 Isourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#8X2&!$([#8][!#6]),OX1-])[#7X3;!$([#7][!#6])] | |
467 # O may be substituted. no check whether further amide-like bonds are present. Aromatic | |
468 # structures are only hit if single bonded N shares no additional double bond, like in | |
469 # the 1-hydroxy-3-oxo form of uracil | |
470 | |
471 Isothiourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#16X2&!$([#16][!#6]),SX1-])[#7X3;!$([#7][!#6])] | |
472 | |
473 Guanidine: [N;v3X3,v4X4+][CX3](=[N;v3X2,v4X3+])[N;v3X3,v4X4+] | |
474 # also hits guanidinium salts. v3 and v4 to avoid nitroamidines | |
475 | |
476 Carbaminic_acid: [NX3]C(=[OX1])[O;X2H,X1-] | |
477 # quite unstable, unlikely to be found. Also hits salts | |
478 | |
479 Urethan: [#7X3][#6](=[OX1])[#8X2][#6] | |
480 # also hits when part of a ring, no check whether the last C is part of carbonyl | |
481 | |
482 Biuret: [#7X3][#6](=[OX1])[#7X3][#6](=[OX1])[#7X3] | |
483 | |
484 Semicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1] | |
485 | |
486 Carbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[OX1] | |
487 | |
488 Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1] | |
489 | |
490 Carbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[OX1] | |
491 | |
492 Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1] | |
493 | |
494 Thiocarbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[SX1] | |
495 | |
496 Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1] | |
497 | |
498 Thiocarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[SX1] | |
499 | |
500 | |
501 Isocyanate: [NX2]=[CX2]=[OX1] | |
502 | |
503 Cyanate: [OX2][CX2]#[NX1] | |
504 | |
505 Isothiocyanate: [NX2]=[CX2]=[SX1] | |
506 | |
507 Thiocyanate: [SX2][CX2]#[NX1] | |
508 | |
509 Carbodiimide: [NX2]=[CX2]=[NX2] | |
510 | |
511 Orthocarbonic_derivatives: [CX4H0]([O,S,#7])([O,S,#7])([O,S,#7])[O,S,#7,F,Cl,Br,I] | |
512 # halogen allowed just once, to avoid mapping to -OCF3 and similar groups (much more | |
513 # stable as for example C(OCH3)4) | |
514 | |
515 | |
516 # I.6 Aromatics | |
517 # ------------- | |
518 | |
519 # I know that this classification is not very logical, arylamines are found under I.2 ... | |
520 | |
521 Phenol: [OX2H][c] | |
522 | |
523 1,2-Diphenol: [OX2H][c][c][OX2H] | |
524 | |
525 Arylchloride: [Cl][c] | |
526 | |
527 Arylfluoride: [F][c] | |
528 | |
529 Arylbromide: [Br][c] | |
530 | |
531 Aryliodide: [I][c] | |
532 | |
533 Arylthiol: [SX2H][c] | |
534 | |
535 Iminoarene: [c]=[NX2;$([H1]),$([H0][#6;!$([C]=[N,S,O])])] | |
536 # N may be substituted with H or C, but not carbonyl or similar | |
537 # aromatic atom is always C, not S or P (these are not planar when substituted) | |
538 | |
539 Oxoarene: [c]=[OX1] | |
540 | |
541 Thioarene: [c]=[SX1] | |
542 | |
543 Hetero_N_basic_H: [nX3H1+0] | |
544 # as in pyrole. uncharged to exclude pyridinium ions | |
545 | |
546 Hetero_N_basic_no_H: [nX3H0+0] | |
547 # as in N-methylpyrole. uncharged to exclude pyridinium ions | |
548 | |
549 Hetero_N_nonbasic: [nX2,nX3+] | |
550 # as in pyridine, pyridinium | |
551 | |
552 Hetero_O: [o] | |
553 | |
554 Hetero_S: [sX2] | |
555 # X2 because Daylight's depictmatch falsely describes C1=CS(=O)C=C1 as aromatic | |
556 # (is not planar because of lonepair at S) | |
557 | |
558 Heteroaromatic: [a;!c] | |
559 | |
560 | |
561 # Part II: N, S, P, Si, B | |
562 # ======================= | |
563 | |
564 | |
565 # II.1 Nitrogen | |
566 # ------------- | |
567 | |
568 Nitrite: [NX2](=[OX1])[O;$([X2]),$([X1-])] | |
569 # hits nitrous acid, its anion, esters, and other O-substituted derivatives | |
570 | |
571 Thionitrite: [SX2][NX2]=[OX1] | |
572 | |
573 Nitrate: [$([NX3](=[OX1])(=[OX1])[O;$([X2]),$([X1-])]),$([NX3+]([OX1-])(=[OX1])[O;$([X2]),$([X1-])])] | |
574 # hits nitric acid, its anion, esters, and other O-substituted derivatives | |
575 | |
576 Nitro: [$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8] | |
577 # hits nitro groups attached to C,N, ... but not nitrates | |
578 | |
579 Nitroso: [NX2](=[OX1])[!#7;!#8] | |
580 # no nitrites, no nitrosamines | |
581 | |
582 Azide: [NX1]~[NX2]~[NX2,NX1] | |
583 # hits both mesomeric forms, also anion | |
584 | |
585 Acylazide: [CX3](=[OX1])[NX2]~[NX2]~[NX1] | |
586 | |
587 Diazo: [$([#6]=[NX2+]=[NX1-]),$([#6-]-[NX2+]#[NX1])] | |
588 | |
589 Diazonium: [#6][NX2+]#[NX1] | |
590 | |
591 Nitrosamine: [#7;!$(N*=O)][NX2]=[OX1] | |
592 | |
593 Nitrosamide: [NX2](=[OX1])N-*=O | |
594 # includes nitrososulfonamides | |
595 | |
596 N-Oxide: [$([#7+][OX1-]),$([#7v5]=[OX1]);!$([#7](~[O])~[O]);!$([#7]=[#7])] | |
597 # Hits both forms. Won't hit azoxy, nitro, nitroso, or nitrate. | |
598 | |
599 | |
600 Hydrazine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])] | |
601 # no hydrazides | |
602 | |
603 Hydrazone: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX2]=[#6] | |
604 | |
605 Hydroxylamine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][OX2;$([H1]),$(O[#6;!$(C=[N,O,S])])] | |
606 # no discrimination between O-, N-, and O,N-substitution | |
607 | |
608 | |
609 # II.2 Sulfur | |
610 # ----------- | |
611 | |
612 Sulfon: [$([SX4](=[OX1])(=[OX1])([#6])[#6]),$([SX4+2]([OX1-])([OX1-])([#6])[#6])] | |
613 # can't be aromatic, thus S and not #16 | |
614 | |
615 Sulfoxide: [$([SX3](=[OX1])([#6])[#6]),$([SX3+]([OX1-])([#6])[#6])] | |
616 | |
617 Sulfonium: [S+;!$([S]~[!#6]);!$([S]*~[#7,#8,#15,#16])] | |
618 # can't be aromatic, thus S and not #16 | |
619 | |
620 Sulfuric_acid: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] | |
621 # includes anions | |
622 | |
623 Sulfuric_monoester: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] | |
624 | |
625 Sulfuric_diester: [SX4](=[OX1])(=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] | |
626 | |
627 Sulfuric_monoamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] | |
628 | |
629 Sulfuric_diamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
630 | |
631 Sulfuric_esteramide: [SX4](=[OX1])(=[OX1])([#7X3][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] | |
632 | |
633 Sulfuric_derivative: [SX4D4](=[!#6])(=[!#6])([!#6])[!#6] | |
634 # everything else (would not be a "true" derivative of sulfuric acid, if one of the substituents were less electronegative | |
635 # than sulfur, but this should be very very rare, anyway) | |
636 | |
637 | |
638 | |
639 #### sulfurous acid and derivatives missing!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
640 | |
641 | |
642 | |
643 | |
644 Sulfonic_acid: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[$([OX2H]),$([OX1-])] | |
645 | |
646 Sulfonamide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
647 | |
648 Sulfonic_ester: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[OX2][#6;!$(C=[O,N,S])] | |
649 | |
650 Sulfonic_halide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[FX1,ClX1,BrX1,IX1] | |
651 | |
652 Sulfonic_derivative: [SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6])[!#6] | |
653 # includes all of the above and many more | |
654 # for comparison: this is what "all sulfonic derivatives but not the ones above" would look like: | |
655 # [$([SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6;!O])[!#6]),$([SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[!$([FX1,ClX1,BrX1,IX1]);!$([#6]);!$([OX2H]);!$([OX1-]);!$([OX2][#6;!$(C=[O,N,S])]);!$([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])])] | |
656 | |
657 | |
658 Sulfinic_acid: [SX3;$([H1]),$([H0][#6])](=[OX1])[$([OX2H]),$([OX1-])] | |
659 | |
660 Sulfinic_amide: [SX3;$([H1]),$([H0][#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
661 | |
662 Sulfinic_ester: [SX3;$([H1]),$([H0][#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])] | |
663 | |
664 Sulfinic_halide: [SX3;$([H1]),$([H0][#6])](=[OX1])[FX1,ClX1,BrX1,IX1] | |
665 | |
666 Sulfinic_derivative: [SX3;$([H1]),$([H0][#6])](=[!#6])[!#6] | |
667 | |
668 Sulfenic_acid: [SX2;$([H1]),$([H0][#6])][$([OX2H]),$([OX1-])] | |
669 | |
670 Sulfenic_amide: [SX2;$([H1]),$([H0][#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
671 | |
672 Sulfenic_ester: [SX2;$([H1]),$([H0][#6])][OX2][#6;!$(C=[O,N,S])] | |
673 | |
674 Sulfenic_halide: [SX2;$([H1]),$([H0][#6])][FX1,ClX1,BrX1,IX1] | |
675 | |
676 Sulfenic_derivative: [SX2;$([H1]),$([H0][#6])][!#6] | |
677 | |
678 | |
679 # II.3 Phosphorous | |
680 # ---------------- | |
681 | |
682 Phosphine: [PX3;$([H3]),$([H2][#6]),$([H1]([#6])[#6]),$([H0]([#6])([#6])[#6])] | |
683 # similar to amine, but less restrictive: includes also amide- and aminal-analogues | |
684 | |
685 Phosphine_oxide: [PX4;$([H3]=[OX1]),$([H2](=[OX1])[#6]),$([H1](=[OX1])([#6])[#6]),$([H0](=[OX1])([#6])([#6])[#6])] | |
686 | |
687 Phosphonium: [P+;!$([P]~[!#6]);!$([P]*~[#7,#8,#15,#16])] | |
688 # similar to Ammonium | |
689 | |
690 Phosphorylen: [PX4;$([H3]=[CX3]),$([H2](=[CX3])[#6]),$([H1](=[CX3])([#6])[#6]),$([H0](=[CX3])([#6])([#6])[#6])] | |
691 | |
692 | |
693 # conventions for the following acids and derivatives: | |
694 # acids find protonated and deprotonated acids | |
695 # esters do not find mixed anhydrides ( ...P-O-C(=O)) | |
696 # derivatives: subtituents which go in place of the OH and =O are not H or C (may also be O, | |
697 # thus including acids and esters) | |
698 | |
699 Phosphonic_acid: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] | |
700 # includes anions | |
701 | |
702 Phosphonic_monoester: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] | |
703 | |
704 Phosphonic_diester: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] | |
705 | |
706 Phosphonic_monoamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
707 | |
708 Phosphonic_diamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
709 | |
710 Phosphonic_esteramide: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
711 | |
712 Phosphonic_acid_derivative: [PX4;$([H1]),$([H0][#6])](=[!#6])([!#6])[!#6] | |
713 # all of the above and much more | |
714 | |
715 | |
716 Phosphoric_acid: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] | |
717 # includes anions | |
718 | |
719 Phosphoric_monoester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] | |
720 | |
721 Phosphoric_diester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] | |
722 | |
723 Phosphoric_triester: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] | |
724 | |
725 Phosphoric_monoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
726 | |
727 Phosphoric_diamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
728 | |
729 Phosphoric_triamide: [PX4D4](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
730 | |
731 Phosphoric_monoestermonoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
732 | |
733 Phosphoric_diestermonoamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
734 | |
735 Phosphoric_monoesterdiamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
736 | |
737 Phosphoric_acid_derivative: [PX4D4](=[!#6])([!#6])([!#6])[!#6] | |
738 | |
739 | |
740 Phosphinic_acid: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[$([OX2H]),$([OX1-])] | |
741 | |
742 Phosphinic_ester: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])] | |
743 | |
744 Phosphinic_amide: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
745 | |
746 Phosphinic_acid_derivative: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[!#6])[!#6] | |
747 | |
748 | |
749 Phosphonous_acid: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] | |
750 | |
751 Phosphonous_monoester: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] | |
752 | |
753 Phosphonous_diester: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] | |
754 | |
755 Phosphonous_monoamide: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
756 | |
757 Phosphonous_diamide: [PX3;$([H1]),$([H0][#6])]([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
758 | |
759 Phosphonous_esteramide: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
760 | |
761 Phosphonous_derivatives: [PX3;$([D2]),$([D3][#6])]([!#6])[!#6] | |
762 | |
763 | |
764 Phosphinous_acid: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][$([OX2H]),$([OX1-])] | |
765 | |
766 Phosphinous_ester: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][OX2][#6;!$(C=[O,N,S])] | |
767 | |
768 Phosphinous_amide: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] | |
769 | |
770 Phosphinous_derivatives: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][!#6] | |
771 | |
772 | |
773 # II.4 Silicon | |
774 # ------------ | |
775 | |
776 Quart_silane: [SiX4]([#6])([#6])([#6])[#6] | |
777 # four C-substituents. non-reactive, non-toxic, in experimental phase for drug development | |
778 | |
779 Non-quart_silane: [SiX4;$([H1]([#6])([#6])[#6]),$([H2]([#6])[#6]),$([H3][#6]),$([H4])] | |
780 # has 1-4 hydride(s), reactive. Daylight's depictmatch does not add hydrogens automatically to | |
781 # the free positions at Si, thus Hs had to be added implicitly | |
782 | |
783 Silylmonohalide: [SiX4]([FX1,ClX1,BrX1,IX1])([#6])([#6])[#6] | |
784 # reagents for inserting protection groups | |
785 | |
786 Het_trialkylsilane: [SiX4]([!#6])([#6])([#6])[#6] | |
787 # mostly acid-labile protection groups such as trimethylsilyl-ethers | |
788 | |
789 Dihet_dialkylsilane: [SiX4]([!#6])([!#6])([#6])[#6] | |
790 | |
791 Trihet_alkylsilane: [SiX4]([!#6])([!#6])([!#6])[#6] | |
792 | |
793 Silicic_acid_derivative: [SiX4]([!#6])([!#6])([!#6])[!#6] | |
794 # four substituent which are neither C nor H | |
795 | |
796 | |
797 # II.5 Boron | |
798 # ---------- | |
799 | |
800 Trialkylborane: [BX3]([#6])([#6])[#6] | |
801 # also carbonyls allowed | |
802 | |
803 Boric_acid_derivatives: [BX3]([!#6])([!#6])[!#6] | |
804 # includes acids, esters, amides, ... H-substituent at B is very rare. | |
805 | |
806 Boronic_acid_derivative: [BX3]([!#6])([!#6])[!#6] | |
807 # # includes acids, esters, amides, ... | |
808 | |
809 Borohydride: [BH1,BH2,BH3,BH4] | |
810 # at least one H attached to B | |
811 | |
812 Quaternary_boron: [BX4] | |
813 # mostly borates (negative charge), in complex with Lewis-base | |
814 | |
815 | |
816 | |
817 # Part III: Some Special Patterns | |
818 # =============================== | |
819 | |
820 | |
821 # III.1 Chains | |
822 # ------------ | |
823 | |
824 # some simple chains | |
825 | |
826 | |
827 | |
828 # III.2 Rings | |
829 # ----------- | |
830 | |
831 Aromatic: a | |
832 | |
833 Heterocyclic: [!#6;!R0] | |
834 # may be aromatic or not | |
835 | |
836 Epoxide: [OX2r3]1[#6r3][#6r3]1 | |
837 # toxic/reactive. may be annelated to aromat, but must not be aromatic itself (oxirane-2,3-dione) | |
838 | |
839 NH_aziridine: [NX3H1r3]1[#6r3][#6r3]1 | |
840 # toxic/reactive according to Maybridge's garbage filter | |
841 | |
842 Spiro: [D4R;$(*(@*)(@*)(@*)@*)] | |
843 # at least two different rings can be found which are sharing just one atom. | |
844 # these two rings can be connected by a third ring, so it matches also some | |
845 # bridged systems, like morphine | |
846 | |
847 Annelated_rings: [R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])] | |
848 # two different rings sharing exactly two atoms | |
849 | |
850 Bridged_rings: [R;$(*(@*)(@*)@*);!$([D4R;$(*(@*)(@*)(@*)@*)]);!$([R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])])] | |
851 # part of two or more rings, not spiro, not annelated -> finds bridgehead atoms, | |
852 # but only if they are not annelated at the same time - otherwise impossible (?) | |
853 # to distinguish from non-bridgehead annelated atoms | |
854 | |
855 # some basic ring-patterns (just size, no other information): | |
856 | |
857 | |
858 | |
859 | |
860 | |
861 # III.3 Sugars and Nucleosides/Nucleotides, Steroids | |
862 # -------------------------------------------------- | |
863 | |
864 # because of the large variety of sugar derivatives, different patterns can be applied. | |
865 # The choice of patterns and their combinations will depend on the contents of the database | |
866 # e.g. natural products, nucleoside analoges with modified sugars, ... as well as on the | |
867 # desired restriction | |
868 | |
869 | |
870 Sugar_pattern_1: [OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)] | |
871 # 5 or 6-membered ring containing one O and at least one (r5) or two (r6) oxygen-substituents. | |
872 | |
873 Sugar_pattern_2: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] | |
874 # 5 or 6-membered ring containing one O and an acetal-like bond at postion 2. | |
875 | |
876 Sugar_pattern_combi: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C(O)@C1)] | |
877 # combination of the two above | |
878 | |
879 Sugar_pattern_2_reducing: [OX2;$([r5]1@C(!@[OX2H1])@C@C@C1),$([r6]1@C(!@[OX2H1])@C@C@C@C1)] | |
880 # 5 or 6-membered cyclic hemi-acetal | |
881 | |
882 Sugar_pattern_2_alpha: [OX2;$([r5]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] | |
883 # 5 or 6-membered cyclic hemi-acetal | |
884 | |
885 Sugar_pattern_2_beta: [OX2;$([r5]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] | |
886 # 5 or 6-membered cyclic hemi-acetal | |
887 | |
888 ##Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)]) | |
889 # pattern1 occours more than once (in same molecule, but moieties don't have to be adjacent!) | |
890 | |
891 ##Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]) | |
892 # pattern2 occours more than once (in same molecule, but moieties don't have to be adjacent!) | |
893 | |
894 | |
895 # III.4 Everything else... | |
896 # ------------------------ | |
897 | |
898 Conjugated_double_bond: *=*[*]=,#,:[*] | |
899 | |
900 Conjugated_tripple_bond: *#*[*]=,#,:[*] | |
901 | |
902 Cis_double_bond: */[D2]=[D2]\* | |
903 # only one single-bonded substituent on each DB-atom. no aromats. | |
904 # only found when character of DB is explicitely stated. | |
905 | |
906 Trans_double_bond: */[D2]=[D2]/* | |
907 # analog | |
908 | |
909 Mixed_anhydrides: [$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))][#8X2][$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))] | |
910 # should hits all combinations of two acids | |
911 | |
912 Halogen_on_hetero: [FX1,ClX1,BrX1,IX1][!#6] | |
913 | |
914 Halogen_multi_subst: [F,Cl,Br,I;!$([X1]);!$([X0-])] | |
915 # Halogen which is not mono-substituted nor an anion, e.g. chlorate. | |
916 # Most of these cases should be also filtered by Halogen_on_hetero. | |
917 | |
918 Trifluoromethyl: [FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F])[F])]([FX1])([FX1]) | |
919 # C with three F attached, connected to anything which is not another halogen | |
920 | |
921 C_ONS_bond: [#6]~[#7,#8,#16] | |
922 # probably all drug-like molecules have at least one O, N, or S connected to a C -> nice filter | |
923 | |
924 ## Mixture: (*).(*) | |
925 # two or more seperate parts, may also be salt | |
926 # component-level grouping is not yet supported in Open Babel Version 2.0 | |
927 | |
928 | |
929 Charged: [!+0] | |
930 | |
931 Anion: [-1,-2,-3,-4,-5,-6,-7] | |
932 | |
933 Kation: [+1,+2,+3,+4,+5,+6,+7] | |
934 | |
935 Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7]) | |
936 # two or more seperate components with opposite charges | |
937 | |
938 ##Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7]) | |
939 # both negative and positive charges somewhere within the same molecule. | |
940 | |
941 1,3-Tautomerizable: [$([#7X2,OX1,SX1]=*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=*),$([#7X3,OX2,SX2;!H0]*:n)] | |
942 # 1,3 migration of H allowed. Includes keto/enol and amide/enamide. | |
943 # Aromatic rings must stay aromatic - no keto form of phenol | |
944 | |
945 1,5-Tautomerizable: [$([#7X2,OX1,SX1]=,:**=,:*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=**=*),$([#7X3,OX2,SX2;!H0]*=,:**:n)] | |
946 | |
947 Rotatable_bond: [!$(*#*)&!D1]-!@[!$(*#*)&!D1] | |
948 # taken from http://www.daylight.com/support/contrib/smarts/content.html | |
949 | |
950 Michael_acceptor: [CX3]=[CX3][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-])] | |
951 # the classical case: C=C near carbonyl, nitrile, nitro, or similar | |
952 # Oxo-heteroaromats and similar are not included. | |
953 | |
954 Dicarbodiazene: [CX3](=[OX1])[NX2]=[NX2][CX3](=[OX1]) | |
955 # Michael-like acceptor, see Mitsunobu reaction | |
956 | |
957 # H-Bond_donor: | |
958 | |
959 # H-Bond_acceptor: | |
960 | |
961 # Pos_ionizable: | |
962 | |
963 # Neg_ionizable: | |
964 | |
965 # Unlikely_ions: | |
966 # O+,N-,C+,C-, ... | |
967 | |
968 CH-acidic: [$([CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]),$([CX4;!$([H0])]1[CX3]=[CX3][CX3]=[CX3]1)] | |
969 # C-H alpha to carbony, nitro or similar, C is not double-bonded, only C, H, S,P=O and nitro substituents allowed. | |
970 # pentadiene is included. acids, their salts, prim./sec. amides, and imides are excluded. | |
971 # hits also CH-acidic_strong | |
972 | |
973 CH-acidic_strong: [CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])]([$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])])[$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])] | |
974 # same as above (without pentadiene), but carbonyl or similar on two or three sides | |
975 | |
976 Chiral_center_specified: [$([*@](~*)(~*)(*)*),$([*@H](*)(*)*),$([*@](~*)(*)*),$([*@H](~*)~*)] | |
977 # Hits atoms with tetrahedral chirality, if chiral center is specified in the SMILES string | |
978 # depictmach does not find oxonium, sulfonium, or sulfoxides! | |
979 | |
980 # Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)] | |
981 # Hits atoms with tetrahedral chirality, if chiral center is not specified in the SMILES string | |
982 # "@?" (unspecified chirality) is not yet supported in Open Babel Version 2.0 | |
983 |