0
|
1 <tool id="ctb_filter" name="Filter" version="1.0">
|
|
2 <description> a set of molecules from a file</description>
|
|
3 <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>
|
|
4 <requirements>
|
|
5 <requirement type="package" version="2.3.2">openbabel</requirement>
|
|
6 <requirement type="package" version="0.1">cheminfolib</requirement>
|
|
7 </requirements>
|
|
8 <command interpreter="python">
|
2
|
9 <![CDATA[
|
0
|
10 ob_filter.py
|
|
11 -i "${infile}"
|
|
12 -o "${outfile}"
|
|
13 -iformat "${infile.ext}"
|
|
14 -oformat "${infile.ext}"
|
|
15 --filters '{
|
|
16 #if $filter_methods.filter_methods_opts == "ruleof5":
|
|
17 "hbd" : [0, 5],
|
|
18 "hba" : [0, 10],
|
|
19 "molwt" : [0, 500],
|
|
20 "logp" : [-5, 5],
|
|
21 #elif $filter_methods.filter_methods_opts == "LeadLike":
|
|
22 "rotbonds" : [0, 7],
|
|
23 "molwt" : [0, 350],
|
|
24 "logp" : [-5, 3.5],
|
|
25 #elif $filter_methods.filter_methods_opts == "DrugLike":
|
|
26 "hba" : [0, 10],
|
|
27 "rotbonds" : [0, 8],
|
|
28 "molwt" : [150, 500],
|
|
29 "logp" : [-5, 5],
|
|
30 "psa" : [0, 150],
|
|
31 #elif $filter_methods.filter_methods_opts == "FragmentLike":
|
|
32 "rotbonds" : [0, 5],
|
|
33 "molwt" : [0, 250],
|
|
34 "logp" : [-5, 2.5],
|
|
35 #else:
|
|
36 #for $filter in $filter_methods.filter_set:
|
|
37 #set $filter_selected = $filter.filter_sel.filter_sel_opts
|
|
38 #set $filter_min = $filter_selected + "_min"
|
|
39 #set $filter_max = $filter_selected + "_max"
|
|
40 "$filter_selected" : [$filter.filter_sel[$filter_min], $filter.filter_sel[$filter_max] ],
|
|
41 #end for
|
|
42 #end if
|
|
43 }'
|
2
|
44 ]]>
|
0
|
45 </command>
|
|
46 <inputs>
|
|
47 <param name="infile" type="data" format="sdf,smi,mol,mol2,cml,inchi" label="Select input file previously created with the 'Compute physico-chemical properties' tool"/>
|
|
48 <conditional name="filter_methods">
|
|
49 <param name="filter_methods_opts" type="select" label="Select a pre-defined filtering set">
|
|
50 <option value="user">User-defined properties</option>
|
|
51 <option value="ruleof5">Lipinski's Rule-of-Five</option>
|
|
52 <option value="LeadLike">Lead-like properties</option>
|
|
53 <option value="DrugLike">Drug-like properties</option>
|
|
54 <option value="FragmentLike">Fragment-like properties</option>
|
|
55 </param>
|
|
56 <when value="ruleof5" />
|
|
57 <when value="LeadLike" />
|
|
58 <when value="DrugLike" />
|
|
59 <when value="FragmentLike" />
|
|
60 <when value="user">
|
|
61 <repeat name="filter_set" title="Filters selection">
|
|
62 <conditional name="filter_sel">
|
|
63 <param name="filter_sel_opts" type="select" label="Select properties to filter">
|
|
64 <option value="hbd">Number of Hydrogen-bond donor groups</option>
|
|
65 <option value="hba">Number of Hydrogen-bond acceptor groups</option>
|
|
66 <option value="psa">Total polar Surface Area</option>
|
|
67 <option value="rotbonds">Number of rotatable bonds</option>
|
|
68 <option value="molwt">Molecular weight</option>
|
|
69 <option value="logp">Predicted value of LogP</option>
|
|
70 <option value="mr">Predicted value for the Molecular Refractivity</option>
|
|
71 <option value="atoms">Number of atoms</option>
|
|
72 <!-- Add later, we need to add a new smarts pattern to plugindefines.txt
|
|
73 <option value="hatoms">Number of heavy atoms</option>-->
|
|
74 <!-- We skip that options, for further information please read: http://www.dalkescientific.com/writings/diary/archive/2011/06/04/dealing_with_sssr.html -->
|
|
75 <!--<option value="rings">Number of rings</option>-->
|
|
76 </param>
|
|
77 <when value="hbd">
|
|
78 <param name="hbd_min" size="3" type="integer" value="" label="Minimum number of HB donors"/>
|
|
79 <param name="hbd_max" size="3" type="integer" value="" label="Maximum number of HB donors"/>
|
|
80 </when>
|
|
81 <when value="hba">
|
|
82 <param name="hba_min" size="3" type="integer" value="" label="Minimum number of HB acceptors"/>
|
|
83 <param name="hba_max" size="3" type="integer" value="" label="Maximum number of HB acceptors"/>
|
|
84 </when>
|
|
85 <when value="psa">
|
|
86 <param name="psa_min" size="5" type="integer" value="" label="Minimum threshold for the Total Polar Surface Area"/>
|
|
87 <param name="psa_max" size="5" type="integer" value="" label="Maximum threshold for the Total Polar Surface Area"/>
|
|
88 </when>
|
|
89 <when value="rotbonds">
|
|
90 <param name="rotbonds_min" size="3" type="integer" value="" label="Minimum number of rotatable bonds"/>
|
|
91 <param name="rotbonds_max" size="3" type="integer" value="" label="Maximum number of rotatable bonds"/>
|
|
92 </when>
|
|
93 <when value="molwt">
|
|
94 <param name="molwt_min" size="5" type="integer" value="" label="Minimum threshold value for the Molecular Weight"/>
|
|
95 <param name="molwt_max" size="5" type="integer" value="" label="Maximum threshold value for the Molecular Weight"/>
|
|
96 </when>
|
|
97 <when value="logp">
|
|
98 <param name="logp_min" size="4" type="float" value="" label="Minimum threshold value for the log-P"/>
|
|
99 <param name="logp_max" size="4" type="float" value="" label="Maximum threshold value for the log-P"/>
|
|
100 </when>
|
|
101 <when value="mr">
|
|
102 <param name="mr_min" size="4" type="float" value="" label="Minimum threshold value for the Molecular Refractivity"/>
|
|
103 <param name="mr_max" size="4" type="float" value="" label="Maximum threshold value for the Molecular Refractivity"/>
|
|
104 </when>
|
|
105 <when value="atoms">
|
|
106 <param name="atoms_min" size="3" type="integer" value="" label="Minimum number of atoms"/>
|
|
107 <param name="atoms_max" size="3" type="integer" value="" label="Maximum number of atoms"/>
|
|
108 </when>
|
|
109 <!--<when value="rings">
|
|
110 <param name="rings_min" size="3" type="integer" value="" label="Minimum number of rings"/>
|
|
111 <param name="rings_max" size="3" type="integer" value="" label="Maximum number of rings"/>
|
|
112 </when>-->
|
|
113 </conditional>
|
|
114 </repeat>
|
|
115 </when>
|
|
116 </conditional>
|
|
117 </inputs>
|
|
118 <outputs>
|
|
119 <data format_source="infile" name="outfile" />
|
|
120 </outputs>
|
|
121 <tests>
|
|
122 <test>
|
|
123 <param name="infile" ftype="smi" value="CID_2244.smi"/>
|
|
124 <param name="filter_methods_opts" value="ruleof5" />
|
|
125 <output name="outfile" ftype="smi" file="ob_filter_on_CID2244.smi" />
|
|
126 </test>
|
|
127 <!--
|
|
128 Limitation of the test framework:
|
|
129 https://trello.com/card/disambiguated-conditional-parameters-not-supported-in-unit-tests/506338ce32ae458f6d15e4b3/820
|
|
130 <test>
|
|
131 <param name="infile" ftype="smi" value="CID_2244.smi"/>
|
|
132 <param name="filter_methods_opts" value="user" />
|
|
133 <param name="filter_sel_opts" value="hba" />
|
|
134 <param name="hbd_min" value="0" />
|
|
135 <param name="hbd_max" value="5" />
|
|
136 <output name="outfile" ftype="smi" file="ob_filter_on_CID2244_2.smi" />
|
|
137 </test>
|
|
138 -->
|
|
139 </tests>
|
|
140 <help>
|
2
|
141 <![CDATA[
|
0
|
142
|
|
143
|
|
144 .. class:: infomark
|
|
145
|
|
146 **What this tool does**
|
|
147
|
2
|
148 Filters a library of compounds based on user-defined physico-chemical parameters or predefined options (e.g. Ro5, lead-like properties, etc.). Multiple parameters can be selected for more specific queries.
|
0
|
149
|
|
150 -----
|
|
151
|
|
152 .. class:: warningmark
|
|
153
|
|
154 **Hint**
|
|
155
|
|
156 | If your input file is in SDF format you can use the *Compute physico-chemical properties* tool to precalulate the properties and use the filter on that precomputed dataset. It should be faster and can be reused but it's bigger than a SMILES file.
|
2
|
157 |
|
0
|
158 | For exact matches please use the target value for both minimum and maximum parameters (e.g. a selection of exactly 4 rotatable bonds can be performed by selecting 4 as minimum and maximum value).
|
2
|
159 |
|
0
|
160 | Selecting the same property multiple times with different parameters will result in querying the largest overlapping subset of values for the parameter (e.g. a selection of between 0 and 3 rotatable bonds plus a selection between 2 and 4 will result in a query for compounds between 2 and 3 rotatable bonds).
|
|
161
|
|
162 -----
|
|
163
|
|
164 .. class:: infomark
|
|
165
|
|
166 **Definition of the pre-defined filtering rules**
|
|
167
|
|
168 **# Lipinski's Rule of Five:**
|
2
|
169 =< 5 Hydrogen-bond donor groups
|
0
|
170
|
2
|
171 =< 10 Hydrogen-bond acceptor groups
|
0
|
172
|
2
|
173 =< 500 Molecular weight
|
0
|
174
|
2
|
175 =< 5 octanol/water partition coefficient (log P)
|
0
|
176
|
|
177 **# Lead Like properties** (Teague, Davis, Leeson, Oprea, Angew Chem Int Ed Engl. 1999 Dec 16;38(24):3743-3748):
|
2
|
178 =< 7 rotatable bonds
|
0
|
179
|
2
|
180 =< 350 Molecular weight
|
0
|
181
|
2
|
182 =< 3.5 octanol/water partition coefficient (log P)
|
0
|
183
|
|
184 **# Drug Like properties** (Lipinski, J Pharmacol Toxicol Methods. 2000 Jul-Aug;44(1):235-49):
|
2
|
185 =< 10 Hydrogen-bond acceptor groups
|
0
|
186
|
2
|
187 =< 8 rotatable bonds
|
0
|
188
|
2
|
189 150 =< Molecular weight =< 500
|
0
|
190
|
2
|
191 =< 150 Polar Surface Area
|
0
|
192
|
2
|
193 =< 5 octanol/water partition coefficient (log P)
|
0
|
194
|
|
195 **# Fragment Like properties** (Carr RA, Congreve M, Murray CW, Rees DC, Drug Discov Today. 2005 Jul 15;10(14):987):
|
2
|
196 =< 5 rotatable bonds
|
0
|
197
|
2
|
198 =< 250 Molecular weight
|
0
|
199
|
2
|
200 =< 2.5 octanol/water partition coefficient (log P)
|
0
|
201
|
|
202 -----
|
|
203
|
|
204 .. class:: infomark
|
|
205
|
|
206 **Input**
|
|
207
|
|
208 | - `SD-Format`_
|
|
209 | - `SMILES Format`_
|
|
210
|
|
211 .. _SD-Format: http://en.wikipedia.org/wiki/Chemical_table_file
|
|
212 .. _SMILES Format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification
|
|
213
|
|
214 -----
|
|
215
|
|
216 .. class:: infomark
|
|
217
|
2
|
218 **Output**
|
0
|
219
|
|
220 | SDF formatted coordinates of the molecules, with selected properties stored as meta-data for each compound.
|
2
|
221 |
|
0
|
222 | SMILES, InChI or mol2 formatted files containing the 1D strings or 3D coordinates of each compound.
|
|
223
|
|
224 -----
|
|
225
|
|
226 .. class:: infomark
|
|
227
|
|
228 **Cite**
|
|
229
|
|
230 N M O'Boyle, C Morley and G R Hutchison - `Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`_
|
|
231
|
|
232 .. _`Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`: http://journal.chemistrycentral.com/content/2/1/5
|
|
233
|
|
234 N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison - `Open Babel: An open chemical toolbox.`_
|
|
235
|
|
236 .. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33
|
|
237
|
|
238 `Open Babel`_
|
|
239
|
|
240 .. _`Open Babel`: http://openbabel.org/wiki/Main_Page
|
|
241
|
2
|
242 ]]>
|
0
|
243 </help>
|
|
244 </tool>
|