# HG changeset patch # User bgruening # Date 1376551506 14400 # Node ID 527ecd2fc500383995df6187d1ef636e54293d3e Uploaded diff -r 000000000000 -r 527ecd2fc500 convert/ob_convert.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert/ob_convert.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,460 @@ + + Converts various chemistry and molecular modeling data files + + + openbabel + + +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces + +#set $format = $oformat.oformat_opts_selector + + #if $format == "fs": + ## For the fastsearch index we need to copy the original molecule files to the composite datatype of obfs. + ## Because openbabel likes file extensions, we give the molecule file a proper file extension. + mkdir $outfile.extra_files_path; + cp "${infile}" ${os.path.join($outfile.extra_files_path, 'molecule.%s' % $infile.ext )}; + #end if + +obabel -i "${infile.ext}" + + #if $format == "fs": + ## the fs filetype need his own symlink path, all others can take the original ones + ${os.path.join($outfile.extra_files_path, 'molecule.%s' % $infile.ext )} + -o "$format" -e + -O ${os.path.join($outfile.extra_files_path,'molecule.fs')} + #if int($oformat.fs_fold) > 0: + -xN$oformat.fs_fold + #end if + ${oformat.fs_fptype} + #else: + "${infile}" + -o "$format" + -O "${outfile}" + -e + #end if + + + #if $format == 'cml': + $oformat.cml_array + $oformat.cml_cml1 + $oformat.cml_aromatic + $oformat.cml_hydrogen + $oformat.cml_metadata + $oformat.cml_omit + $oformat.cml_continuous + $oformat.cml_properties + $oformat.cml_gen2d + $oformat.cml_gen3d + #elif $format == 'inchi': + ##ignore less import warnings + -w + #if $oformat.inchi_truncate: + #set $truncate = ''.join( str( $oformat.inchi_truncate ).split( ',' ) ) + -xT ${truncate} + #end if + + #if $oformat.inchi_additional: + #set $additional = ' '.join( str( $oformat.inchi_additional ).split( ',' ) ) + -xX '${additional}' + #end if + + $oformat.inchi_key + $oformat.inchi_name + $oformat.inchi_unique + $oformat.inchi_unique_sort + #elif $format == 'can': + $oformat.can_exp_h + $oformat.can_iso_chi + $oformat.can_rad + $oformat.can_atomclass_out + #elif $format == 'smi': + $oformat.smi_exp_h + $oformat.smi_iso_chi + $oformat.smi_rad + $oformat.smi_atomclass_out + $oformat.smi_can + $oformat.smi_coordinates + #elif $format == 'sdf': + $oformat.sdf_exp_h + $oformat.sdf_no_prop + $oformat.sdf_wedge_bonds + $oformat.sdf_alias_out + $oformat.sdf_gen2d + $oformat.sdf_gen3d + #elif $format == 'fpt': + $oformat.fpt_fptype + #if int($oformat.fpt_fold) > 0: + $oformat.fpt_fold + #end if + $oformat.fpt_hex_multiple + $oformat.fpt_hex + $oformat.fpt_set + $oformat.fpt_unset + #elif $format == 'mol2': + $oformat.mol2_ignore_res + $oformat.mol2_gen2d + $oformat.mol2_gen3d + #end if + + ## Uniqueness according to stripped InChI's or canonical SMILES + #if str($unique.unique_opts_selector): + #if $unique.unique_opts_selector == 'inchi': + #if $unique.truncate: + #set $truncate = ''.join( str( $unique.truncate ).split( ',' ) ) + --unique $truncate + #end if + #else + --unique $unique.unique_opts_selector + #end if + #end if + + + #if str($appendtotitle).strip(): + --addtotitle '${appendtotitle}' + #end if + + $remove_h + $dative_bonds + + #if int($ph) >= 0: + -p $ph + #end if + + 2>&1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +The compound converter joins several `Open Babel`_ command prompt converters in an easy to use tool. It converts various chemistry and moleculare modeling data files. +The output format can be specified as well as several parameters. Some parameters are available for all tools (e.g. protonation state and pH) +others are specific for a given output format (e.g. exclude isotopes for conversion to canSMI). + +----- + +.. class:: infomark + +**Output** + +Can be specified manually. + +----- + +.. class:: infomark + +**Cite** + +N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison - `Open Babel: An open chemical toolbox.`_ + +.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33 + +`Open Babel`_ + +.. _`Open Babel`: http://openbabel.org/wiki/Main_Page + + + + diff -r 000000000000 -r 527ecd2fc500 filter/multi_obgrep.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/multi_obgrep.py Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,101 @@ +#!/usr/bin/env python +""" + Input: Molecules in SDF, SMILES ... + Output: Molecule file filtered with obgrep. + Copyright 2013, Bjoern Gruening and Xavier Lucas +""" +import sys, os +import argparse +import openbabel +openbabel.obErrorLog.StopLogging() +import pybel +import multiprocessing +import tempfile +import subprocess +import shutil +import shlex + +def parse_command_line(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--infile', required=True, help='Molecule file.') + parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') + parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') + parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") + parser.add_argument("--n-times", dest="n_times", type=int, + default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") + parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) + parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", + default=False, help="Invert the matching, print non-matching molecules.") + parser.add_argument("--only-name", dest="only_name", action="store_true", + default=False, help="Only print the name of the molecules.") + parser.add_argument("--full-match", dest="full_match", action="store_true", + default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") + parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", + default=False, help="Print the number of matches.") + return parser.parse_args() + +results = list() +def mp_callback(res): + results.append(res) + +def mp_helper( query, args ): + """ + Helper function for multiprocessing. + That function is a wrapper around obgrep. + """ + + cmd_list = [] + if args.invert_matches: + cmd_list.append('-v') + if args.only_name: + cmd_list.append('-n') + if args.full_match: + cmd_list.append('-f') + if args.number_of_matches: + cmd_list.append('-c') + if args.n_times: + cmd_list.append('-t %s' % str(args.n_times)) + + tmp = tempfile.NamedTemporaryFile(delete=False) + cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) + child = subprocess.Popen(shlex.split(cmd), + stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) + + stdout, stderr = child.communicate() + return (tmp.name, query) + + +def obgrep( args ): + + temp_file = tempfile.NamedTemporaryFile() + temp_link = "%s.%s" % (temp_file.name, args.iformat) + temp_file.close() + os.symlink(args.infile, temp_link) + args.infile = temp_link + + pool = multiprocessing.Pool( args.processors ) + for query in open( args.query ): + pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) + #mp_callback( mp_helper(query.strip(), args) ) + pool.close() + pool.join() + + out_handle = open( args.outfile, 'wb' ) + for result_file, query in results: + res_handle = open(result_file,'rb') + shutil.copyfileobj( res_handle, out_handle ) + res_handle.close() + os.remove( result_file ) + out_handle.close() + + os.remove( temp_link ) + +def __main__(): + """ + Multiprocessing obgrep search. + """ + args = parse_command_line() + obgrep( args ) + +if __name__ == "__main__" : + __main__() diff -r 000000000000 -r 527ecd2fc500 filter/multi_obgrep.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/multi_obgrep.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,84 @@ + + an advanced molecular grep program using SMARTS + + openbabel + + + + multi_obgrep.py + -i $infile + --iformat ${infile.ext} + -q $query + -o "${outfile}" + $invert_matches + --n-times $n_times + $only_name + $full_match + $number_of_matches + --processors 10 + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Uses the Open Babel Obgrep_ to search for molecules inside multi-molecule files (e.g. SMI, SDF, etc.) or across multiple files. +It is known that not all SMARTS features from the Daylight Toolkit are supported, please have a look here_. + +.. _Obgrep: http://openbabel.org/wiki/Obgrep +.. _here: http://openbabel.org/wiki/SMARTS + +----- + +.. class:: infomark + +**Input** + +| - `SD-Format`_ +| - `SMILES Format`_ + +.. _SD-Format: http://en.wikipedia.org/wiki/Chemical_table_file +.. _SMILES Format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification + +----- + +.. class:: infomark + +**Output** + +Same as input format. + +----- + +.. class:: infomark + +**Cite** + +N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison - `Open Babel: An open chemical toolbox.`_ + +.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33 + +`Open Babel`_ + +.. _`Open Babel`: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 filter/ob_filter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/ob_filter.py Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,112 @@ +#!/usr/bin/env python +""" + Input: set of molecules with pre-calculated physico-chemical properties + Output: set of molecules that pass all the filters + Copyright 2012, Bjoern Gruening and Xavier Lucas + + TODO: AND/OR conditions? +""" +import sys, os +import argparse +import cheminfolib +import json +import pybel +import shlex, subprocess + +cheminfolib.pybel_stop_logging() + +def parse_command_line(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input', help='Input file name') + parser.add_argument('-iformat', help='Input file format') + parser.add_argument('-oformat', + default='smi', + help='Output file format') + parser.add_argument('-o', '--output', + help='Output file name', + required=True) + parser.add_argument('--filters', + help="Specify the filters to apply", + required=True, + ) + return parser.parse_args() + +def filter_precalculated_compounds(args, filters): + outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) + for mol in pybel.readfile('sdf', args.input): + for key, elem in filters.items(): + # map the short description to the larger metadata names stored in the sdf file + property = cheminfolib.ColumnNames[key] + min = elem[0] + max = elem[1] + if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max): + pass + else: + # leave the filter loop, because one filter constrained are not satisfied + break + else: + # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound + outfile.write(mol) + outfile.close() + +def filter_new_compounds(args, filters): + + if args.iformat == args.oformat: + # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out + # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text + cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output) + else: + cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output) + filter_cmd = '' + # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] + for key, elem in filters.items(): + ob_descriptor_name = cheminfolib.OBDescriptor[key][0] + min = elem[0] + max = elem[1] + filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) + + args = shlex.split('%s "%s"' % (cmd, filter_cmd)) + #print '%s "%s"' % (cmd, filter_cmd) + # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout + child = subprocess.Popen(args, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + stdout, stderr = child.communicate() + return_code = child.returncode + + if return_code: + sys.stdout.write(stdout) + sys.stderr.write(stderr) + sys.stderr.write("Return error code %i from command:\n" % return_code) + sys.stderr.write("%s\n" % cmd) + else: + sys.stdout.write(stdout) + sys.stdout.write(stderr) + + +def __main__(): + """ + Select compounds with certain properties from a small library + """ + args = parse_command_line() + # Its a small trick to get the parameters in an easy way from the xml file. + # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. + # Also the last loop creates a ',{' that is not an valid jason expression. + filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) + if args.iformat == 'sdf': + # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering + mol = pybel.readfile('sdf', args.input).next() + for key, elem in filters.items(): + property = cheminfolib.ColumnNames[key] + if not property in mol.data: + break + else: + # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule + # assume it is the same for all other molecules and start the precalculated filtering + filter_precalculated_compounds(args, filters) + return True + filter_new_compounds(args, filters) + + +if __name__ == "__main__" : + __main__() diff -r 000000000000 -r 527ecd2fc500 filter/ob_filter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/ob_filter.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,240 @@ + + a set of molecules from a file + + + openbabel + cheminfolib + + + ob_filter.py + -i "${infile}" + -o "${outfile}" + -iformat "${infile.ext}" + -oformat "${infile.ext}" + --filters '{ + #if $filter_methods.filter_methods_opts == "ruleof5": + "hbd" : [0, 5], + "hba" : [0, 10], + "molwt" : [0, 500], + "logp" : [-5, 5], + #elif $filter_methods.filter_methods_opts == "LeadLike": + "rotbonds" : [0, 7], + "molwt" : [0, 350], + "logp" : [-5, 3.5], + #elif $filter_methods.filter_methods_opts == "DrugLike": + "hba" : [0, 10], + "rotbonds" : [0, 8], + "molwt" : [150, 500], + "logp" : [-5, 5], + "psa" : [0, 150], + #elif $filter_methods.filter_methods_opts == "FragmentLike": + "rotbonds" : [0, 5], + "molwt" : [0, 250], + "logp" : [-5, 2.5], + #else: + #for $filter in $filter_methods.filter_set: + #set $filter_selected = $filter.filter_sel.filter_sel_opts + #set $filter_min = $filter_selected + "_min" + #set $filter_max = $filter_selected + "_max" + "$filter_selected" : [$filter.filter_sel[$filter_min], $filter.filter_sel[$filter_max] ], + #end for + #end if + }' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Filters a library of compounds based on user-defined physico-chemical parameters or predefined options (e.g. Ro5, lead-like properties, etc.). Multiple parameters can be selected for more specific queries. + +----- + +.. class:: warningmark + +**Hint** + +| If your input file is in SDF format you can use the *Compute physico-chemical properties* tool to precalulate the properties and use the filter on that precomputed dataset. It should be faster and can be reused but it's bigger than a SMILES file. +| +| For exact matches please use the target value for both minimum and maximum parameters (e.g. a selection of exactly 4 rotatable bonds can be performed by selecting 4 as minimum and maximum value). +| +| Selecting the same property multiple times with different parameters will result in querying the largest overlapping subset of values for the parameter (e.g. a selection of between 0 and 3 rotatable bonds plus a selection between 2 and 4 will result in a query for compounds between 2 and 3 rotatable bonds). + +----- + +.. class:: infomark + +**Definition of the pre-defined filtering rules** + + **# Lipinski's Rule of Five:** + =< 5 Hydrogen-bond donor groups + + =< 10 Hydrogen-bond acceptor groups + + =< 500 Molecular weight + + =< 5 octanol/water partition coefficient (log P) + + **# Lead Like properties** (Teague, Davis, Leeson, Oprea, Angew Chem Int Ed Engl. 1999 Dec 16;38(24):3743-3748): + =< 7 rotatable bonds + + =< 350 Molecular weight + + =< 3.5 octanol/water partition coefficient (log P) + + **# Drug Like properties** (Lipinski, J Pharmacol Toxicol Methods. 2000 Jul-Aug;44(1):235-49): + =< 10 Hydrogen-bond acceptor groups + + =< 8 rotatable bonds + + 150 =< Molecular weight =< 500 + + =< 150 Polar Surface Area + + =< 5 octanol/water partition coefficient (log P) + + **# Fragment Like properties** (Carr RA, Congreve M, Murray CW, Rees DC, Drug Discov Today. 2005 Jul 15;10(14):987): + =< 5 rotatable bonds + + =< 250 Molecular weight + + =< 2.5 octanol/water partition coefficient (log P) + +----- + +.. class:: infomark + +**Input** + +| - `SD-Format`_ +| - `SMILES Format`_ + +.. _SD-Format: http://en.wikipedia.org/wiki/Chemical_table_file +.. _SMILES Format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification + +----- + +.. class:: infomark + +**Output** + +| SDF formatted coordinates of the molecules, with selected properties stored as meta-data for each compound. +| +| SMILES, InChI or mol2 formatted files containing the 1D strings or 3D coordinates of each compound. + +----- + +.. class:: infomark + +**Cite** + +N M O'Boyle, C Morley and G R Hutchison - `Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`_ + +.. _`Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`: http://journal.chemistrycentral.com/content/2/1/5 + +N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison - `Open Babel: An open chemical toolbox.`_ + +.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33 + +`Open Babel`_ + +.. _`Open Babel`: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 filter/ob_grep.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/ob_grep.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,98 @@ + + an advanced molecular grep program using SMARTS + + + openbabel + + + ## The command is a Cheetah template which allows some Python based syntax. + ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces + obgrep + + $invert_matches + #if $n_times != 0: + -t $n_times + #end if + + $only_name + $full_match + $number_of_matches + -i ${infile.ext} + "${smarts_pattern}" + "${infile}" + > "${outfile}" 2>&1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Uses the Open Babel Obgrep_ to search for molecules inside multi-molecule files (e.g. SMI, SDF, etc.) or across multiple files. +It is known that not all SMARTS features from the Daylight Toolkit are supported, please have a look here_. + + +.. _Obgrep: http://openbabel.org/wiki/Obgrep +.. _here: http://openbabel.org/wiki/SMARTS + +----- + +.. class:: infomark + +**Input** + +| - `SD-Format`_ +| - `SMILES Format`_ + +.. _SD-Format: http://en.wikipedia.org/wiki/Chemical_table_file +.. _SMILES Format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification + +----- + +.. class:: infomark + +**Output** + +Same as input format. + +----- + +.. class:: infomark + +**Cite** + +N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison - `Open Babel: An open chemical toolbox.`_ + +.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33 + +`Open Babel`_ + +.. _`Open Babel`: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 filter/ob_remDuplicates.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/ob_remDuplicates.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,86 @@ + + + + + openbabel + + + obabel + -i"${infile.ext}" + "${infile}" + -ocopy + --unique "${descriptor}" + -O "${outfile}" + -e + 2>&1 + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Filters a library of compounds and removes duplicated molecules. + +----- + +.. class:: warningmark + +**Hint** + +Comparison based on Canonical SMILES without stereochemistry may be useful in cases where this information is not crucial for library preparation. Several VS tools will automatically generate stereoisomeric forms. + +----- + +.. class:: infomark + +**Input** + +| - `InChI`_ +| - `SMILES Format`_ + +.. _InChI: http://www.iupac.org/home/publications/e-resources/inchi.html +.. _SMILES Format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification + +----- + +.. class:: infomark + +**Output** + +Same as input format. + +----- + +.. class:: infomark + +**Cite** + +N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison - `Open Babel: An open chemical toolbox.`_ + +.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33 + +`Open Babel`_ + +.. _`Open Babel`: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 filter/ob_remIons.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/ob_remIons.py Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,38 @@ +#!/usr/bin/env python +""" + Input: molecular input file. + Output: Molecule file with removed ions and fragments. + Copyright 2012, Bjoern Gruening and Xavier Lucas +""" +import sys, os +import argparse +import openbabel +openbabel.obErrorLog.StopLogging() +import pybel + +def parse_command_line(): + parser = argparse.ArgumentParser() + parser.add_argument('-iformat', default='sdf' , help='input file format') + parser.add_argument('-i', '--input', required=True, help='input file name') + parser.add_argument('-o', '--output', required=True, help='output file name') + return parser.parse_args() + +def remove_ions(args): + outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) + for mol in pybel.readfile(args.iformat, args.input): + if mol.OBMol.NumHvyAtoms() > 5: + mol.OBMol.StripSalts(0) + # Check if new small fragments have been created and remove them + if mol.OBMol.NumHvyAtoms() > 5: + outfile.write(mol) + outfile.close() + +def __main__(): + """ + Remove any counterion and delete any fragment but the largest one for each molecule. + """ + args = parse_command_line() + remove_ions(args) + +if __name__ == "__main__" : + __main__() diff -r 000000000000 -r 527ecd2fc500 filter/ob_remIons.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/ob_remIons.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,62 @@ + + + + + openbabel + + + ob_remIons.py + -i "${infile}" + -iformat "${infile.ext}" + -o "${outfile}" + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Parses a multiple molecules file and deletes any present counterions or fragments. + +----- + +.. class:: warningmark + +**Hint** + +| Only the **largest fragment** on every molecule is extracted. +| +| Only molecules with more than 5 heavy atoms are parsed. + +----- + +.. class:: infomark + +**Cite** + +N M O'Boyle, C Morley and G R Hutchison - `Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`_ + +.. _`Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`: http://journal.chemistrycentral.com/content/2/1/5 + +N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison - `Open Babel: An open chemical toolbox.`_ + +.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33 + +`Open Babel`_ + +.. _`Open Babel`: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 filter/ob_remSmall.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/ob_remSmall.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,70 @@ + + + + + openbabel + + + obabel + -i"${infile.ext}" + "${infile}" + -ocopy + -O "${outfile}" + --filter "atoms > $cutoff" + -e + 2>&1 + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Filters a library of compounds and removes small molecules below a predefined input number of atoms. + +----- + +.. class:: warningmark + +**Hint** + +Some libraries may contain molecules without a 1D/3D descriptor. These molecules may provoke crashes of any other tool. It is strongly adviced to run this tool before proceeding to any further steps. + +----- + +.. class:: infomark + +**Output** + +Same as input format. + +----- + +.. class:: infomark + +**Cite** + +N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison - `Open Babel: An open chemical toolbox.`_ + +.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33 + +`Open Babel`_ + +.. _`Open Babel`: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 filter/test-data/2_mol.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/2_mol.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,2 @@ +CC(=O)OC1=CC=CC=C1C(=O)[O-] +CC(=O)OC1=CC=CC=C1C(=O)[O-] diff -r 000000000000 -r 527ecd2fc500 filter/test-data/3_mol .smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/3_mol .smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,3 @@ +CC(=O)OC1=CC=CC=C1C(=O)[O-] +CC(=O)OC1=CC=CC=C1C(=O)[O-] +C diff -r 000000000000 -r 527ecd2fc500 filter/test-data/8_mol.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/8_mol.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,8 @@ +C1NC2=CC(=C(C=C2S(=O)(=O)N1)S(=O)(=O)N)Cl +C1=COC(=C1)CNC2=CC(=C(C=C2C(=O)O)S(=O)(=O)N)Cl +CO[C@H]1[C@@H](C[C@@H]2CN3CCC4=C([C@H]3C[C@@H]2[C@@H]1C(=O)OC)NC5=C4C=CC(=C5)OC)OC(=O)C6=CC(=C(C(=C6)OC)OC)OC +CO[C@H]1[C@@H](C[C@@H]2CN3CCC4=C([C@H]3C[C@@H]2[C@@H]1C(=O)OC)NC5=C4C=CC(=C5)OC)OC(=O)C6=CC(=C(C(=C6)OC)OC)OC.C1NC2=CC(=C(C=C2S(=O)(=O)N1)S(=O)(=O)N)Cl +CCC1(N=C2C=C3C(=NC4=CC=CC=C4N3C5=CC=CC=C5)C=C2N1C6=CC=CC=C6)CC +CCC1(N=C2C=C3C(=NC4=CC=CC=C4N3C5=CC=CC=C5)C=C2N1C6=CC=CC=C6)C +CC(=O)OC1=CC=CC=C1C(=O)[O-] +CC(=O)OC1=CC=CC=C1C(=O)[O-] diff -r 000000000000 -r 527ecd2fc500 filter/test-data/CID_2244.can --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/CID_2244.can Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +CC(=O)Oc1ccccc1C(=O)O 2244 diff -r 000000000000 -r 527ecd2fc500 filter/test-data/CID_2244.inchi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/CID_2244.inchi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) diff -r 000000000000 -r 527ecd2fc500 filter/test-data/CID_2244.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/CID_2244.sdf Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,155 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +> +1 + +> +212 + +> +4 + +> +1 + +> +3 + +> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> +2-acetoxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetoxybenzoic acid + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.2 + +> +180.042259 + +> +C9H8O4 + +> +180.15742 + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +63.6 + +> +180.042259 + +> +0 + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +1 +5 +255 + +> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ + diff -r 000000000000 -r 527ecd2fc500 filter/test-data/CID_2244.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/CID_2244.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +O(c1c(cccc1)C(=O)O)C(=O)C 2244 diff -r 000000000000 -r 527ecd2fc500 filter/test-data/CID_2244_FP2.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/CID_2244_FP2.fps Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:40:38 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r 000000000000 -r 527ecd2fc500 filter/test-data/CID_2244_FP3.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/CID_2244_FP3.fps Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=55 +#type=OpenBabel-FP3/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:15 +0400000c50b007 2244 diff -r 000000000000 -r 527ecd2fc500 filter/test-data/CID_2244_FP4.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/CID_2244_FP4.fps Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=307 +#type=OpenBabel-FP4/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:22 +010000000000000000009800000000004001000000000000000000000000000000000240402801 2244 diff -r 000000000000 -r 527ecd2fc500 filter/test-data/CID_2244_addh.can --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/CID_2244_addh.can Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +CC(=O)Oc1ccccc1C(=O)[O-] 2244 diff -r 000000000000 -r 527ecd2fc500 filter/test-data/CID_2244_maccs.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/CID_2244_maccs.fps Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=166 +#type=OpenBabel-MACCS/2 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T17:00:39 +0000000000000000000000010000016480cca2d21e 2244 diff -r 000000000000 -r 527ecd2fc500 filter/test-data/ob_filter_on_CID2244.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/ob_filter_on_CID2244.sdf Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,154 @@ +2244 + OpenBabel07101213142D + + 21 21 0 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +> +1 + +> +212 + +> +4 + +> +1 + +> +3 + +> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> +2-acetoxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetoxybenzoic acid + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.2 + +> +180.042259 + +> +C9H8O4 + +> +180.15742 + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +63.6 + +> +180.042259 + +> +0 + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +1 +5 +255 + +> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ diff -r 000000000000 -r 527ecd2fc500 filter/test-data/ob_filter_on_CID2244.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/ob_filter_on_CID2244.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +O(c1c(cccc1)C(=O)O)C(=O)C 2244 diff -r 000000000000 -r 527ecd2fc500 filter/test-data/ob_filter_on_CID2244_2.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/ob_filter_on_CID2244_2.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +O(c1c(cccc1)C(=O)O)C(=O)C 2244 diff -r 000000000000 -r 527ecd2fc500 filter/test-data/ob_remDuplicates_on_2_mol.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/ob_remDuplicates_on_2_mol.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,3 @@ +Removed - a duplicate of (#1) +CC(=O)Oc1ccccc1C(=O)[O-] +1 molecule converted diff -r 000000000000 -r 527ecd2fc500 filter/test-data/obgrep_on_8_mol.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/obgrep_on_8_mol.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,4 @@ +c1coc(c1)CNc1cc(c(cc1C(=O)O)S(=O)(=O)N)Cl +CO[C@H]1[C@@H](C[C@@H]2CN3CCc4c([C@H]3C[C@@H]2[C@@H]1C(=O)OC)[nH]c1c4ccc(c1)OC)OC(=O)c1cc(c(c(c1)OC)OC)OC +CO[C@H]1[C@@H](C[C@@H]2CN3CCc4c([C@H]3C[C@@H]2[C@@H]1C(=O)OC)[nH]c1c4ccc(c1)OC)OC(=O)c1cc(c(c(c1)OC)OC)OC.C1Nc2cc(c(cc2S(=O)(=O)N1)S(=O)(=O)N)Cl + diff -r 000000000000 -r 527ecd2fc500 filter/test-data/obremsmall_on_3_mol.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/obremsmall_on_3_mol.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,2 @@ +CC(=O)OC1=CC=CC=C1C(=O)[O-] +CC(=O)OC1=CC=CC=C1C(=O)[O-] diff -r 000000000000 -r 527ecd2fc500 filter/test-data/obrmions_on_2_mol.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter/test-data/obrmions_on_2_mol.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,2 @@ +CC(=O)Oc1ccccc1C(=O)[O-] +CC(=O)Oc1ccccc1C(=O)[O-] diff -r 000000000000 -r 527ecd2fc500 modify/change_title_to_metadata_value.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/change_title_to_metadata_value.py Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" + Change the title from a molecule file to metadata + value of a given-id of the same molecule file. +""" + +import os, sys +import argparse +import openbabel +openbabel.obErrorLog.StopLogging() +import pybel + + +def main(): + parser = argparse.ArgumentParser( + description="Change the title from a molecule file to metadata \ +value of a given-id of the same molecule file.", + ) + parser.add_argument('--infile', '-i', + required=True, help="path to the input file") + parser.add_argument('--outfile', '-o', + required=True, help="path to the output file") + parser.add_argument('--key', '-k', + required=True, help="the metadata key from the sdf file which should inlcude the new title") + + args = parser.parse_args() + + output = pybel.Outputfile("sdf", args.outfile, overwrite=True) + + for mol in pybel.readfile("sdf", args.infile): + if args.key in mol.data: + mol.title = mol.data[args.key] + output.write( mol ) + + output.close() + + +if __name__ == "__main__": + main() + diff -r 000000000000 -r 527ecd2fc500 modify/change_title_to_metadata_value.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/change_title_to_metadata_value.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,64 @@ + + to meta-data value. + + + openbabel + + + ## The command is a Cheetah template which allows some Python based syntax. + ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces + change_title_to_metadata_value.py + --infile "${infile}" + --key "${key}" + --outfile "${outfile}" + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Changes the title of a molecule file to a metadata value of a given ID in the same molecule file. + +----- + +.. class:: infomark + +**Input** + +`SD-file`_ with metadata including the given ID. + +.. _SD-file: http://en.wikipedia.org/wiki/Chemical_table_file + +----- + +.. class:: infomark + +**Output** + +Same as input with changed title tag. + +----- + +.. class:: infomark + +**Cite** + +`Open Babel`_ + +.. _Open Babel: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 modify/ob_addh.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/ob_addh.py Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,38 @@ +#!/usr/bin/env python +""" + Input: Molecule file + Output: Molecule file with hydrogen atoms added at the target pH. +""" +import sys, os +import argparse +import openbabel +openbabel.obErrorLog.StopLogging() +import pybel + +def parse_command_line(argv): + parser = argparse.ArgumentParser() + parser.add_argument('--iformat', type=str, default='sdf' , help='input file format') + parser.add_argument('-i', '--input', type=str, required=True, help='input file name') + parser.add_argument('-o', '--output', type=str, required=True, help='output file name') + parser.add_argument('--polar', action="store_true", default=False, help='Add hydrogen atoms only to polar atoms') + parser.add_argument('--pH', type=float, default="7.4", help='Specify target pH value') + return parser.parse_args() + +def addh(args): + outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) + for mol in pybel.readfile(args.iformat, args.input): + if mol.OBMol.NumHvyAtoms() > 5: + mol.removeh() + mol.OBMol.AddHydrogens(args.polar, True, args.pH) + outfile.write(mol) + outfile.close() + +def __main__(): + """ + Add hydrogen atoms at a certain pH value + """ + args = parse_command_line(sys.argv) + addh(args) + +if __name__ == "__main__" : + __main__() diff -r 000000000000 -r 527ecd2fc500 modify/ob_addh.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/ob_addh.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,85 @@ + + at a certain pH value + + + openbabel + + + ob_addh.py + -i "${infile}" + --iformat "${infile.ext}" + -o "${outfile}" + $polar + --pH "${pH_value}" + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Parses a molecular file and adds hydrogen atoms at a user-defined pH value. + +* Protocol:: + + 1. The hydrogen atoms included in the input molecule are deleted. + 2. Protonation state is predicted at the target pH and the corresponding hydrogen atoms added accordingly. + +----- + +.. class:: infomark + +**Input** + +3D format files are required, e.g. SDF_ + +.. _SDF: http://en.wikipedia.org/wiki/Chemical_table_file + +----- + +.. class:: warningmark + +**Hint** + +To avoid possible crashes, only molecules with more than five heavy atoms are parsed. + +----- + +.. class:: infomark + +**Output** + +Same output format as the input format. + +----- + +.. class:: infomark + +**Cite** + +`Open Babel`_ + +.. _Open Babel: http://openbabel.org/wiki/Main_Page + +N M O'Boyle, C Morley and G R Hutchison - `Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`_ + +.. _`Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`: http://www.biomedcentral.com/content/pdf/1752-153X-2-5.pdf + + + diff -r 000000000000 -r 527ecd2fc500 modify/ob_genProp.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/ob_genProp.py Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,52 @@ +#!/usr/bin/env python +""" + Input: Molecular input file. + Output: Physico-chemical properties are computed and stored as metadata in the sdf output file. + Copyright 2012, Bjoern Gruening and Xavier Lucas +""" +import sys, os +import argparse +import openbabel +openbabel.obErrorLog.StopLogging() +import pybel +import cheminfolib + + +def parse_command_line(argv): + parser = argparse.ArgumentParser() + parser.add_argument('--iformat', default='sdf' , help='input file format') + parser.add_argument('-i', '--input', required=True, help='input file name') + parser.add_argument('--oformat', default='sdf', choices = ['sdf', 'table'] , help='output file format') + parser.add_argument('--header', type=bool, help='Include the header as the first line of the output table') + parser.add_argument('-o', '--output', required=True, help='output file name') + return parser.parse_args() + +def compute_properties(args): + if args.oformat == 'sdf': + outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) + else: + outfile = open(args.output, 'w') + if args.header: + mol = pybel.readfile(args.iformat, args.input).next() + metadata = cheminfolib.get_properties_ext(mol) + outfile.write( '%s\n' % '\t'.join( [ cheminfolib.ColumnNames[key] for key in metadata ] ) ) + + for mol in pybel.readfile(args.iformat, args.input): + if mol.OBMol.NumHvyAtoms() > 5: + metadata = cheminfolib.get_properties_ext(mol) + if args.oformat == 'sdf': + [ mol.data.update( { cheminfolib.ColumnNames[key] : metadata[key] } ) for key in metadata ] + outfile.write(mol) + else: + outfile.write( '%s\n' % ('\t'.join( [ str(metadata[key]) for key in metadata ] ) ) ) + outfile.close() + +def __main__(): + """ + Physico-chemical properties are computed and stored as metadata in the sdf output file + """ + args = parse_command_line(sys.argv) + compute_properties(args) + +if __name__ == "__main__" : + __main__() diff -r 000000000000 -r 527ecd2fc500 modify/ob_genProp.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/ob_genProp.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,135 @@ + + for a set of molecules + + + openbabel + cheminfolib + + + ob_genProp.py + -i "${infile}" + --iformat "${infile.ext}" + --oformat "${output_opts.output_format_types}" + #if $output_opts.header.value: + --header $output_opts.header + #end if + -o "${outfile}" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Computes several physico-chemical properties for a set of molecules. + +The following physico-chemical properties and descriptors are computed for each molecule: + + - number of hydrogen-bond donor and acceptor groups + + - number of rotatable bonds + + - logP + + - number of rings + + - number of heavy atoms + + - molecular weight + + - total Polar Surface Area + + - molecular refractivity + + - Canonical SMILES + + - InChI string + + - InChI-Key + + - Spectrophores(TM) + +----- + +.. class:: infomark + +**Input** + +- SDF_ +- MOL2_ + +.. _SDF: http://en.wikipedia.org/wiki/Chemical_table_file +.. _MOL2: http://www.tripos.com/index.php?family=modules,SimplePage,Mol2_File_Format2009 + +3D coordinates of the molecules have to be provided. + +----- + +.. class:: warningmark + +**Hint** the generation of Spectrophores(TM) requires the previous addition of explicit hydrogen atoms and the proper definition of 3D coordinates. The user is directed towards the corresponding tools if accurate Spectrophores(TM) descriptors are required. + +----- + +.. class:: infomark + +**Output** + +Either a SD-file containing several computed physico-chemical properties stored as metadata or a tabular file with the metadata stored in columns. + +----- + +.. class:: infomark + +**Cite** + +N M O'Boyle, C Morley and G R Hutchison - `Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`_ + +Silicos_ - |Spectrophores (TM)| is a registered tool implemented in the open-source OpenBabel. + +.. |Spectrophores (TM)| unicode:: Spectrophores U+2122 + +`Open Babel`_ + +.. _Open Babel: http://openbabel.org/wiki/Main_Page +.. _`Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`: http://www.biomedcentral.com/content/pdf/1752-153X-2-5.pdf +.. _Silicos: http://openbabel.org/docs/dev/Fingerprints/spectrophore.html + + + + diff -r 000000000000 -r 527ecd2fc500 modify/remove_protonation_state.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/remove_protonation_state.py Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,35 @@ +#!/usr/bin/env python +""" + Input: molecular input file. + Output: Molecule file with removed ions and fragments. + Copyright 2013, Bjoern Gruening and Xavier Lucas +""" +import sys, os +import argparse +import openbabel +openbabel.obErrorLog.StopLogging() +import pybel + +def parse_command_line(): + parser = argparse.ArgumentParser() + parser.add_argument('--iformat', default='sdf' , help='input file format') + parser.add_argument('-i', '--input', required=True, help='input file name') + parser.add_argument('-o', '--output', required=True, help='output file name') + return parser.parse_args() + +def remove_protonation( args ): + outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) + for mol in pybel.readfile(args.iformat, args.input): + [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms] + outfile.write( mol ) + outfile.close() + +def __main__(): + """ + Remove any protonation state from each atom in each molecule. + """ + args = parse_command_line() + remove_protonation( args ) + +if __name__ == "__main__" : + __main__() diff -r 000000000000 -r 527ecd2fc500 modify/remove_protonation_state.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/remove_protonation_state.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,43 @@ + + of every atom + + + openbabel + + + remove_protonation_state.py + -i $infile + -o $outfile + --iformat "${infile.ext}" + 2>&1 + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Removes the protonation state of every atom. + +----- + +.. class:: infomark + +**Cite** + +`Open Babel`_ + +.. _Open Babel: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_2244.can --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_2244.can Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +CC(=O)Oc1ccccc1C(=O)O 2244 diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_2244.inchi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_2244.inchi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_2244.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_2244.sdf Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,155 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +> +1 + +> +212 + +> +4 + +> +1 + +> +3 + +> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> +2-acetoxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetoxybenzoic acid + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.2 + +> +180.042259 + +> +C9H8O4 + +> +180.15742 + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +63.6 + +> +180.042259 + +> +0 + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +1 +5 +255 + +> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ + diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_2244.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_2244.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +O(c1c(cccc1)C(=O)O)C(=O)C 2244 diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_2244_FP2.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_2244_FP2.fps Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=1021 +#type=OpenBabel-FP2/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:40:38 +00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_2244_FP3.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_2244_FP3.fps Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=55 +#type=OpenBabel-FP3/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:15 +0400000c50b007 2244 diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_2244_FP4.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_2244_FP4.fps Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=307 +#type=OpenBabel-FP4/1 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T16:59:22 +010000000000000000009800000000004001000000000000000000000000000000000240402801 2244 diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_2244_addh.can --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_2244_addh.can Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,1 @@ +CC(=O)Oc1ccccc1C(=O)[O-] 2244 diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_2244_maccs.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_2244_maccs.fps Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=166 +#type=OpenBabel-MACCS/2 +#software=OpenBabel/2.3.1 +#source=CID_2244.sdf +#date=2012-05-15T17:00:39 +0000000000000000000000010000016480cca2d21e 2244 diff -r 000000000000 -r 527ecd2fc500 modify/test-data/CID_3033.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/CID_3033.sdf Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,271 @@ +3033 + -OEChem-08231107463D + + 30 31 0 0 0 0 0 0 0999 V2000 + 1.9541 1.1500 -2.5078 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1377 -1.6392 2.1136 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -3.2620 -2.9284 -1.0647 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7906 -1.9108 0.9092 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2679 -0.2051 -0.3990 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0640 0.5139 -0.3769 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7313 0.7178 -0.0192 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4761 -0.6830 -1.1703 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6571 -0.2482 -0.1795 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0382 1.4350 0.0081 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3728 1.8429 0.7234 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6797 2.5600 0.7506 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3470 2.7640 1.1083 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5353 0.3477 -1.0918 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1740 -0.8865 0.9534 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.8480 -1.8749 -0.3123 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9124 0.3058 -0.8739 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5511 -0.9285 1.1713 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4203 -0.3324 0.2576 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7086 -0.9792 -1.8930 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.3614 -0.4266 -1.7676 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0861 -1.1146 -0.6780 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.0812 1.2885 -0.2604 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6569 2.0278 1.0167 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4382 3.2769 1.0511 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0683 3.6399 1.6868 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6037 0.7654 -1.5758 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9635 -1.4215 2.0480 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4925 -0.3651 0.4274 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5025 -3.7011 -0.5102 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 16 1 0 0 0 0 + 3 30 1 0 0 0 0 + 4 16 2 0 0 0 0 + 5 7 1 0 0 0 0 + 5 9 1 0 0 0 0 + 5 22 1 0 0 0 0 + 6 7 1 0 0 0 0 + 6 8 1 0 0 0 0 + 6 10 2 0 0 0 0 + 7 11 2 0 0 0 0 + 8 16 1 0 0 0 0 + 8 20 1 0 0 0 0 + 8 21 1 0 0 0 0 + 9 14 2 0 0 0 0 + 9 15 1 0 0 0 0 + 10 12 1 0 0 0 0 + 10 23 1 0 0 0 0 + 11 13 1 0 0 0 0 + 11 24 1 0 0 0 0 + 12 13 2 0 0 0 0 + 12 25 1 0 0 0 0 + 13 26 1 0 0 0 0 + 14 17 1 0 0 0 0 + 15 18 2 0 0 0 0 + 17 19 2 0 0 0 0 + 17 27 1 0 0 0 0 + 18 19 1 0 0 0 0 + 18 28 1 0 0 0 0 + 19 29 1 0 0 0 0 +M END +> +3033 + +> +0.6 + +> +1 +20 +18 +39 +29 +42 +38 +35 +30 +25 +33 +28 +32 +36 +26 +24 +40 +11 +27 +37 +7 +41 +10 +19 +43 +8 +6 +16 +44 +23 +34 +14 +15 +31 +9 +13 +17 +21 +22 +5 +12 +2 +3 +4 + +> +28 +1 -0.18 +10 -0.15 +11 -0.15 +12 -0.15 +13 -0.15 +14 0.18 +15 0.18 +16 0.66 +17 -0.15 +18 -0.15 +19 -0.15 +2 -0.18 +22 0.4 +23 0.15 +24 0.15 +25 0.15 +26 0.15 +27 0.15 +28 0.15 +29 0.15 +3 -0.65 +30 0.5 +4 -0.57 +5 -0.6 +6 -0.14 +7 0.1 +8 0.2 +9 0.1 + +> +4 + +> +7 +1 3 acceptor +1 4 acceptor +1 5 cation +1 5 donor +3 3 4 16 anion +6 6 7 10 11 12 13 rings +6 9 14 15 17 18 19 rings + +> +19 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +00000BD900000001 + +> +65.6362 + +> +35.578 + +> +10366900 7 17386020514759110480 +114674 6 16903282898360328323 +11578080 2 17913245089295617604 +11582403 64 14544541357940910356 +11640471 11 18127963303313961600 +12236239 1 18272088352834916308 +12363563 72 18042978579496277287 +12553582 1 18190740839094073615 +12596599 1 18201439237582433270 +12788726 201 18410285909464206003 +13032168 30 18201440238019390274 +13140716 1 18187086113919468457 +13538477 17 18339642338307470464 +13583140 156 17241914119188522922 +13764800 53 17895191172601517065 +13965767 371 17259888045752176376 +14115302 16 18342181093776810149 +14787075 74 17907866106787333628 +15279307 12 18198622322777022915 +15375462 189 18270674264943931347 +15669948 3 18336550511731321249 +16752209 62 18336841852664817743 +16945 1 18188484791351783177 +19433438 48 18059583550169763352 +200 152 18130792217719576158 +20645476 183 18270115859187436189 +20905425 154 17970632883131290416 +21452121 199 18046637711133085653 +21639500 275 16988270998321974524 +22112679 90 18342446063036096292 +23419403 2 17835564502519425292 +23493267 7 18115023138028600728 +23526113 38 16660924516543134566 +23557571 272 17821721762863303772 +23559900 14 17896315990920094510 +23598288 3 18411412925846384519 +23598291 2 18059009613384180254 +238 59 16343141308025475526 +4340502 62 17273677940604857177 +6049 1 17240202131864233360 +6992083 37 18058168521433072460 +7615 1 18201433675414973908 +77492 1 18272651289913926852 +81228 2 17968373550240022809 +9709674 26 17896035610527288590 + +> +378.03 +7.01 +2.75 +1.77 +0.78 +1.58 +0.3 +0.41 +1.94 +-1.08 +1.9 +-8.69 +11.04 +2.58 + +> +790.335 + +> +214.7 + +> +2 +5 +255 + +$$$$ + diff -r 000000000000 -r 527ecd2fc500 modify/test-data/change_title_on_CID_3033.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/change_title_on_CID_3033.sdf Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,270 @@ +214.7 + OpenBabel06291213403D + + 30 31 0 0 0 0 0 0 0 0999 V2000 + 1.9541 1.1500 -2.5078 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1377 -1.6392 2.1136 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -3.2620 -2.9284 -1.0647 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7906 -1.9108 0.9092 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2679 -0.2051 -0.3990 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0640 0.5139 -0.3769 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7313 0.7178 -0.0192 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4761 -0.6830 -1.1703 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6571 -0.2482 -0.1795 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0382 1.4350 0.0081 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3728 1.8429 0.7234 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6797 2.5600 0.7506 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3470 2.7640 1.1083 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5353 0.3477 -1.0918 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1740 -0.8865 0.9534 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.8480 -1.8749 -0.3123 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9124 0.3058 -0.8739 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5511 -0.9285 1.1713 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4203 -0.3324 0.2576 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7086 -0.9792 -1.8930 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.3614 -0.4266 -1.7676 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0861 -1.1146 -0.6780 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.0812 1.2885 -0.2604 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6569 2.0278 1.0167 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4382 3.2769 1.0511 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0683 3.6399 1.6868 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6037 0.7654 -1.5758 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9635 -1.4215 2.0480 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4925 -0.3651 0.4274 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5025 -3.7011 -0.5102 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 16 1 0 0 0 0 + 3 30 1 0 0 0 0 + 4 16 2 0 0 0 0 + 5 7 1 0 0 0 0 + 5 9 1 0 0 0 0 + 5 22 1 0 0 0 0 + 6 7 1 0 0 0 0 + 6 8 1 0 0 0 0 + 6 10 2 0 0 0 0 + 7 11 2 0 0 0 0 + 8 16 1 0 0 0 0 + 8 20 1 0 0 0 0 + 8 21 1 0 0 0 0 + 9 14 2 0 0 0 0 + 9 15 1 0 0 0 0 + 10 12 1 0 0 0 0 + 10 23 1 0 0 0 0 + 11 13 1 0 0 0 0 + 11 24 1 0 0 0 0 + 12 13 2 0 0 0 0 + 12 25 1 0 0 0 0 + 13 26 1 0 0 0 0 + 14 17 1 0 0 0 0 + 15 18 2 0 0 0 0 + 17 19 2 0 0 0 0 + 17 27 1 0 0 0 0 + 18 19 1 0 0 0 0 + 18 28 1 0 0 0 0 + 19 29 1 0 0 0 0 +M END +> +3033 + +> +0.6 + +> +1 +20 +18 +39 +29 +42 +38 +35 +30 +25 +33 +28 +32 +36 +26 +24 +40 +11 +27 +37 +7 +41 +10 +19 +43 +8 +6 +16 +44 +23 +34 +14 +15 +31 +9 +13 +17 +21 +22 +5 +12 +2 +3 +4 + +> +28 +1 -0.18 +10 -0.15 +11 -0.15 +12 -0.15 +13 -0.15 +14 0.18 +15 0.18 +16 0.66 +17 -0.15 +18 -0.15 +19 -0.15 +2 -0.18 +22 0.4 +23 0.15 +24 0.15 +25 0.15 +26 0.15 +27 0.15 +28 0.15 +29 0.15 +3 -0.65 +30 0.5 +4 -0.57 +5 -0.6 +6 -0.14 +7 0.1 +8 0.2 +9 0.1 + +> +4 + +> +7 +1 3 acceptor +1 4 acceptor +1 5 cation +1 5 donor +3 3 4 16 anion +6 6 7 10 11 12 13 rings +6 9 14 15 17 18 19 rings + +> +19 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +00000BD900000001 + +> +65.6362 + +> +35.578 + +> +10366900 7 17386020514759110480 +114674 6 16903282898360328323 +11578080 2 17913245089295617604 +11582403 64 14544541357940910356 +11640471 11 18127963303313961600 +12236239 1 18272088352834916308 +12363563 72 18042978579496277287 +12553582 1 18190740839094073615 +12596599 1 18201439237582433270 +12788726 201 18410285909464206003 +13032168 30 18201440238019390274 +13140716 1 18187086113919468457 +13538477 17 18339642338307470464 +13583140 156 17241914119188522922 +13764800 53 17895191172601517065 +13965767 371 17259888045752176376 +14115302 16 18342181093776810149 +14787075 74 17907866106787333628 +15279307 12 18198622322777022915 +15375462 189 18270674264943931347 +15669948 3 18336550511731321249 +16752209 62 18336841852664817743 +16945 1 18188484791351783177 +19433438 48 18059583550169763352 +200 152 18130792217719576158 +20645476 183 18270115859187436189 +20905425 154 17970632883131290416 +21452121 199 18046637711133085653 +21639500 275 16988270998321974524 +22112679 90 18342446063036096292 +23419403 2 17835564502519425292 +23493267 7 18115023138028600728 +23526113 38 16660924516543134566 +23557571 272 17821721762863303772 +23559900 14 17896315990920094510 +23598288 3 18411412925846384519 +23598291 2 18059009613384180254 +238 59 16343141308025475526 +4340502 62 17273677940604857177 +6049 1 17240202131864233360 +6992083 37 18058168521433072460 +7615 1 18201433675414973908 +77492 1 18272651289913926852 +81228 2 17968373550240022809 +9709674 26 17896035610527288590 + +> +378.03 +7.01 +2.75 +1.77 +0.78 +1.58 +0.3 +0.41 +1.94 +-1.08 +1.9 +-8.69 +11.04 +2.58 + +> +790.335 + +> +214.7 + +> +2 +5 +255 + +$$$$ diff -r 000000000000 -r 527ecd2fc500 modify/test-data/ob_genprop_on_CID2244.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/ob_genprop_on_CID2244.sdf Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,193 @@ +2244 + OpenBabel07101213512D + + 21 21 0 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +> +1 + +> +212 + +> +4 + +> +1 + +> +3 + +> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> +2-acetoxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetoxybenzoic acid + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.2 + +> +180.042259 + +> +C9H8O4 + +> +180.15742 + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +63.6 + +> +180.042259 + +> +0 + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +1 +5 +255 + +> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +> +1 + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.3101 + +> +63.6 + +> +13 + +> +180.15742 + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +-1.454, -1.106, -1.162, 0.285, 0.369, 1.008, 0.144, -0.956, 0.133, 0.883, 1.828, 0.029, -1.312, -1.424, -1.287, -0.644, 0.59, 0.746, -0.127, 0.535, 0.368, 0.93, 1.721, -0.097, -1.591, -0.888, -1.332, 0.013, 0.889, 0.249, -0.343, -0.346, -0.047, 1.471, 1.595, 0.329, -1.101, -1.186, 0.953, 0.953, -0.644, -0.653, -1.136, 1.547, 0.533, -0.262, -0.274, 1.27 + +> +1 + +> +CC(=O)Oc1ccccc1C(=O)O + +> +44.9003 + +> +4 + +> +3 + +$$$$ diff -r 000000000000 -r 527ecd2fc500 modify/test-data/ob_genprop_on_CID2244.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify/test-data/ob_genprop_on_CID2244.tabular Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,2 @@ +Hydrogen-bond donors InChI key logP Polar surface area Number of heavy atoms Molecular weight InChI Spectrophores(TM) Number of rings Canonical SMILES Molecular refractivity Hydrogen-bond acceptors Rotatable bonds +1 BSYNRYMUTXBXSQ-UHFFFAOYSA-N 1.3101 63.6 13 180.15742 InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) -1.454, -1.106, -1.162, 0.285, 0.369, 1.008, 0.144, -0.956, 0.133, 0.883, 1.828, 0.029, -1.312, -1.424, -1.287, -0.644, 0.59, 0.746, -0.127, 0.535, 0.368, 0.93, 1.721, -0.097, -1.591, -0.888, -1.332, 0.013, 0.889, 0.249, -0.343, -0.346, -0.047, 1.471, 1.595, 0.329, -1.101, -1.186, 0.953, 0.953, -0.644, -0.653, -1.136, 1.547, 0.533, -0.262, -0.274, 1.27 1 CC(=O)Oc1ccccc1C(=O)O 44.9003 4 3 diff -r 000000000000 -r 527ecd2fc500 repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,4 @@ + + + + diff -r 000000000000 -r 527ecd2fc500 search/ob_spectrophore_search.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/search/ob_spectrophore_search.py Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,66 @@ +#!/usr/bin/env python +""" + Input: tabular format file with one column storing the unique id for the compounds and any other with the Spectrophores(TM) descriptors. + Output: parse the target file using the same protocol used to generate the databases in our servers. Physico-chemical properties are computed and stored as metadata in the sdf output file. + Copyright 2012, Bjoern Gruening and Xavier Lucas +""" +import sys, os +import argparse +import openbabel +openbabel.obErrorLog.StopLogging() +import pybel +import math +import numpy as np + +#TODO get rid of eval() + +global spectrophore +spectrophore = pybel.ob.OBSpectrophore() + +def parse_command_line(): + parser = argparse.ArgumentParser() + parser.add_argument('--target', required=True, help='target file name in sdf format with Spectrophores(TM) descriptors stored as meta-data') + parser.add_argument('--library', required=True, help='library of compounds with pre-computed physico-chemical properties, including Spectrophores(TM) in tabular format') + parser.add_argument('-c', '--column', required=True, type=int, help='#column containing the Spectrophores(TM) descriptors in the library file') + parser.add_argument('-o', '--output', required=True, help='output file name') + parser.add_argument('-n', '--normalization', default="ZeroMeanAndUnitStd", choices=['No', 'ZeroMean', 'UnitStd', 'ZeroMeanAndUnitStd'], help='Normalization method') + parser.add_argument('-a', '--accuracy', default="20", choices=['1', '2', '5', '10', '15', '20', '30', '36', '45', '60'], help='Accuracy expressed as angular stepsize') + parser.add_argument('-s', '--stereo', default="No", choices=['No', 'Unique', 'Mirror', 'All'], help='Stereospecificity of the cage') + parser.add_argument('-r', '--resolution', type=float, default="3.0", help='Resolution') + return parser.parse_args() + +def set_parameters(args): + if args.normalization == 'No': + spectrophore.SetNormalization( spectrophore.NoNormalization ) + else: + spectrophore.SetNormalization( eval('spectrophore.NormalizationTowards' + args.normalization) ) + spectrophore.SetAccuracy( eval('spectrophore.AngStepSize' + args.accuracy) ) + spectrophore.SetStereo( eval('spectrophore.' + args.stereo + 'StereoSpecificProbes') ) + spectrophore.SetResolution( args.resolution ) + return True + +def Compute_Spectrophores_distance(target_spectrophore, args): + outfile = open(args.output, 'w') + for mol in open(args.library, 'r'): + try: + distance = ( ( np.asarray( target_spectrophore, dtype=float ) - np.asarray( mol.split('\t')[ args.column - 1 ].strip().split(', '), dtype=float) )**2).sum() + except ValueError: + distance = 0 + outfile.write( '%s\t%f\n' % (mol.strip(), distance ) ) + outfile.close() + +def __main__(): + """ + Computation of Spectrophores(TM) distances to a target molecule. + """ + args = parse_command_line() + # This sets up the parameters for the Spectrophore generation. Parameters are set to fit those of our standard parsing tool + set_parameters(args) + + mol = pybel.readfile('sdf', args.target).next() + target_spectrophore = mol.data["Spectrophores(TM)"].strip().split(', ') + # Compute the paired-distance between every molecule in the library and the target + distances = Compute_Spectrophores_distance(target_spectrophore, args) + +if __name__ == "__main__" : + __main__() diff -r 000000000000 -r 527ecd2fc500 search/ob_spectrophore_search.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/search/ob_spectrophore_search.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,88 @@ + + similarity search based on 1D chemical features + + openbabel + numpy + + + ob_spectrophore_search.py + --target $target + --library $library + --output $outfile + --column $column + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What does this tool do?** + +This tool computes the Euclidean distance between the Spectrophores(TM) descriptors of the target to each molecule stored in the library. + +|Spectrophores (TM)| search + |Spectrophores (TM)| is a screening technology by Silicos_ which converts three-dimensional molecular property data into one-dimensional spectra. Typical characteristics that can be converted include electrostatic potentials, molecular shape, lipophilicity, hardness and softness potentials. The computation is independent of the position and orientation of a molecule and allows an easy comparison of |Spectrophores (TM)| of different molecules. + + Molecules with similar three-dimensional properties and shape, and therefore also similar biological activities, always have similar |Spectrophores (TM)|. As a result this technique is a very powerful tool to investigate the similarity of molecules and can be applied as a screening tool for molecular databases, virtual screening, and database characterisations. + +*Advantages:* + +- |Spectrophores (TM)| can realistically compute ligand-protein interactions based on aforementioned molecular descriptors +- |Spectrophores (TM)| can be applied in both a ligand- or target-based setting +- |Spectrophores (TM)| can distinguish, if needed, between the different enantiomers of stereo-selective compounds +- |Spectrophores (TM)| can be computed fast + +.. |Spectrophores (TM)| unicode:: Spectrophores U+2122 + +----- + +.. class:: warningmark + +**Hint** this tool is useful to select compounds with similar chemical features to a target, but accounting for the discovery of diverse scaffolds. This is in contrast to the results expected in a similarity search based on atom connectivity. + +----- + +.. class:: infomark + +**Input** + +The target molecule must be a SD formatted file with the |Spectrophores (TM)| descriptors stored as metadata. Such files can be generated using the *Compute physico-chemical properties* tool. + +----- + +.. class:: infomark + +**Output** + +The library of compounds is a tabular file with one line per compound. One column contains the |Spectrophores (TM)| descriptors. + +----- + +.. class:: infomark + +**Cite** + +N M O'Boyle, C Morley and G R Hutchison - `Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`_ + +Silicos_ - |Spectrophores (TM)| is a registered tool implemented in the open-source OpenBabel. + +.. _`Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit`: http://www.biomedcentral.com/content/pdf/1752-153X-2-5.pdf +.. _Silicos: http://openbabel.org/docs/dev/Fingerprints/spectrophore.html + + + diff -r 000000000000 -r 527ecd2fc500 search/test-data/CID2244_with_spectrophore.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/search/test-data/CID2244_with_spectrophore.sdf Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,193 @@ +2244 + OpenBabel07101216402D + + 21 21 0 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> +2244 + +> +1 + +> +212 + +> +4 + +> +1 + +> +3 + +> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> +2-acetoxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetyloxybenzoic acid + +> +2-acetoxybenzoic acid + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.2 + +> +180.042259 + +> +C9H8O4 + +> +180.15742 + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +CC(=O)OC1=CC=CC=C1C(=O)O + +> +63.6 + +> +180.042259 + +> +0 + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +1 +5 +255 + +> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +> +1 + +> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> +1.3101 + +> +63.6 + +> +13 + +> +180.15742 + +> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> +-1.454, -1.106, -1.162, 0.285, 0.369, 1.008, 0.144, -0.956, 0.133, 0.883, 1.828, 0.029, -1.312, -1.424, -1.287, -0.644, 0.59, 0.746, -0.127, 0.535, 0.368, 0.93, 1.721, -0.097, -1.591, -0.888, -1.332, 0.013, 0.889, 0.249, -0.343, -0.346, -0.047, 1.471, 1.595, 0.329, -1.101, -1.186, 0.953, 0.953, -0.644, -0.653, -1.136, 1.547, 0.533, -0.262, -0.274, 1.27 + +> +1 + +> +CC(=O)Oc1ccccc1C(=O)O + +> +44.9003 + +> +4 + +> +3 + +$$$$ diff -r 000000000000 -r 527ecd2fc500 search/test-data/lib.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/search/test-data/lib.tabular Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,9 @@ +Hydrogen-bond donors InChI key logP Polar surface area Number of heavy atoms Molecular weight InChI Spectrophores(TM) Number of rings Canonical SMILES Molecular refractivity Hydrogen-bond acceptors Rotatable bonds +3 JZUFKLXOESDKRF-UHFFFAOYSA-N 2.9774 135.12 17 297.73912 InChI=1S/C7H8ClN3O4S2/c8-4-1-5-7(2-6(4)16(9,12)13)17(14,15)11-3-10-5/h1-2,10-11H,3H2,(H2,9,12,13) nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, -1.245, -1.245, -0.178, -0.178, -0.178, -0.178, -0.178, -0.178, -0.178, 1.956, 1.956, -0.178, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 2 Clc1cc2NCNS(=O)(=O)c2cc1S(=O)(=O)N 67.5768 7 1 +3 ZZUFCTLCJUWOSV-UHFFFAOYSA-N 3.7448 131.01 21 330.74414 InChI=1S/C12H11ClN2O5S/c13-9-5-10(15-6-7-2-1-3-20-7)8(12(16)17)4-11(9)21(14,18)19/h1-5,15H,6H2,(H,16,17)(H2,14,18,19) nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, -2.141, -2.141, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 2 OC(=O)c1cc(c(cc1NCc1ccco1)Cl)S(=O)(=O)N 75.4644 7 5 +1 QEVHRUUCFGRFIF-MDEJGZGSSA-N 4.109 117.78 44 608.6787 InChI=1S/C33H40N2O9/c1-38-19-7-8-20-21-9-10-35-16-18-13-27(44-32(36)17-11-25(39-2)30(41-4)26(12-17)40-3)31(42-5)28(33(37)43-6)22(18)15-24(35)29(21)34-23(20)14-19/h7-8,11-12,14,18,22,24,27-28,31,34H,9-10,13,15-16H2,1-6H3/t18-,22+,24-,27-,28+,31+/m1/s1 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 6 COc1ccc2c(c1)[nH]c1c2CCN2[C@@H]1C[C@H]1[C@@H](C2)C[C@H]([C@@H]([C@H]1C(=O)OC)OC)OC(=O)c1cc(OC)c(c(c1)OC)OC 165.5222 10 10 +4 NJKRHQRIYAWMRO-BQTSRIDJSA-N 7.0864 252.9 61 906.41782 InChI=1S/C33H40N2O9.C7H8ClN3O4S2/c1-38-19-7-8-20-21-9-10-35-16-18-13-27(44-32(36)17-11-25(39-2)30(41-4)26(12-17)40-3)31(42-5)28(33(37)43-6)22(18)15-24(35)29(21)34-23(20)14-19;8-4-1-5-7(2-6(4)16(9,12)13)17(14,15)11-3-10-5/h7-8,11-12,14,18,22,24,27-28,31,34H,9-10,13,15-16H2,1-6H3;1-2,10-11H,3H2,(H2,9,12,13)/t18-,22+,24-,27-,28+,31+;/m1./s1 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 8 Clc1cc2NCNS(=O)(=O)c2cc1S(=O)(=O)N.COc1ccc2c(c1)[nH]c1c2CCN2[C@@H]1C[C@H]1[C@@H](C2)C[C@H]([C@@H]([C@H]1C(=O)OC)OC)OC(=O)c1cc(OC)c(c(c1)OC)OC 233.099 17 11 +0 UOOUPHIKFMZJEA-UHFFFAOYSA-N 6.248 33.42 33 430.54354 InChI=1S/C29H26N4/c1-3-29(4-2)31-25-20-27-24(19-28(25)33(29)22-15-9-6-10-16-22)30-23-17-11-12-18-26(23)32(27)21-13-7-5-8-14-21/h5-20H,3-4H2,1-2H3 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, -2.141, -2.141, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 6 CCC1(CC)N=c2c(N1c1ccccc1)cc1c(c2)n(c2ccccc2)c2c(n1)cccc2 143.054 3 4 +0 YQCDIJPZZOKCLA-UHFFFAOYSA-N 5.8579 33.42 32 416.51696 InChI=1S/C28H24N4/c1-3-28(2)30-24-19-26-23(18-27(24)32(28)21-14-8-5-9-15-21)29-22-16-10-11-17-25(22)31(26)20-12-6-4-7-13-20/h4-19H,3H2,1-2H3 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 6 CCC1(C)N=c2c(N1c1ccccc1)cc1c(c2)n(c2ccccc2)c2c(n1)cccc2 138.247 3 3 +0 BSYNRYMUTXBXSQ-UHFFFAOYSA-M -0.0246 66.43 13 179.14948 InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/p-1 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 2.141, 2.141, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 1 CC(=O)Oc1ccccc1C(=O)[O-] 42.958 4 3 +0 BSYNRYMUTXBXSQ-UHFFFAOYSA-M -0.0246 66.43 13 179.14948 InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/p-1 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 2.141, 2.141, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 1 CC(=O)Oc1ccccc1C(=O)[O-] 42.958 4 3 diff -r 000000000000 -r 527ecd2fc500 search/test-data/ob_spectrophore_search.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/search/test-data/ob_spectrophore_search.tabular Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,9 @@ +Hydrogen-bond donors InChI key logP Polar surface area Number of heavy atoms Molecular weight InChI Spectrophores(TM) Number of rings Canonical SMILES Molecular refractivity Hydrogen-bond acceptors Rotatable bonds 0.000000 +3 JZUFKLXOESDKRF-UHFFFAOYSA-N 2.9774 135.12 17 297.73912 InChI=1S/C7H8ClN3O4S2/c8-4-1-5-7(2-6(4)16(9,12)13)17(14,15)11-3-10-5/h1-2,10-11H,3H2,(H2,9,12,13) nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, -1.245, -1.245, -0.178, -0.178, -0.178, -0.178, -0.178, -0.178, -0.178, 1.956, 1.956, -0.178, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 2 Clc1cc2NCNS(=O)(=O)c2cc1S(=O)(=O)N 67.5768 7 1 nan +3 ZZUFCTLCJUWOSV-UHFFFAOYSA-N 3.7448 131.01 21 330.74414 InChI=1S/C12H11ClN2O5S/c13-9-5-10(15-6-7-2-1-3-20-7)8(12(16)17)4-11(9)21(14,18)19/h1-5,15H,6H2,(H,16,17)(H2,14,18,19) nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, -2.141, -2.141, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 2 OC(=O)c1cc(c(cc1NCc1ccco1)Cl)S(=O)(=O)N 75.4644 7 5 nan +1 QEVHRUUCFGRFIF-MDEJGZGSSA-N 4.109 117.78 44 608.6787 InChI=1S/C33H40N2O9/c1-38-19-7-8-20-21-9-10-35-16-18-13-27(44-32(36)17-11-25(39-2)30(41-4)26(12-17)40-3)31(42-5)28(33(37)43-6)22(18)15-24(35)29(21)34-23(20)14-19/h7-8,11-12,14,18,22,24,27-28,31,34H,9-10,13,15-16H2,1-6H3/t18-,22+,24-,27-,28+,31+/m1/s1 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 6 COc1ccc2c(c1)[nH]c1c2CCN2[C@@H]1C[C@H]1[C@@H](C2)C[C@H]([C@@H]([C@H]1C(=O)OC)OC)OC(=O)c1cc(OC)c(c(c1)OC)OC 165.5222 10 10 nan +4 NJKRHQRIYAWMRO-BQTSRIDJSA-N 7.0864 252.9 61 906.41782 InChI=1S/C33H40N2O9.C7H8ClN3O4S2/c1-38-19-7-8-20-21-9-10-35-16-18-13-27(44-32(36)17-11-25(39-2)30(41-4)26(12-17)40-3)31(42-5)28(33(37)43-6)22(18)15-24(35)29(21)34-23(20)14-19;8-4-1-5-7(2-6(4)16(9,12)13)17(14,15)11-3-10-5/h7-8,11-12,14,18,22,24,27-28,31,34H,9-10,13,15-16H2,1-6H3;1-2,10-11H,3H2,(H2,9,12,13)/t18-,22+,24-,27-,28+,31+;/m1./s1 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 8 Clc1cc2NCNS(=O)(=O)c2cc1S(=O)(=O)N.COc1ccc2c(c1)[nH]c1c2CCN2[C@@H]1C[C@H]1[C@@H](C2)C[C@H]([C@@H]([C@H]1C(=O)OC)OC)OC(=O)c1cc(OC)c(c(c1)OC)OC 233.099 17 11 nan +0 UOOUPHIKFMZJEA-UHFFFAOYSA-N 6.248 33.42 33 430.54354 InChI=1S/C29H26N4/c1-3-29(4-2)31-25-20-27-24(19-28(25)33(29)22-15-9-6-10-16-22)30-23-17-11-12-18-26(23)32(27)21-13-7-5-8-14-21/h5-20H,3-4H2,1-2H3 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, -2.141, -2.141, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, 0.428, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 6 CCC1(CC)N=c2c(N1c1ccccc1)cc1c(c2)n(c2ccccc2)c2c(n1)cccc2 143.054 3 4 nan +0 YQCDIJPZZOKCLA-UHFFFAOYSA-N 5.8579 33.42 32 416.51696 InChI=1S/C28H24N4/c1-3-28(2)30-24-19-26-23(18-27(24)32(28)21-14-8-5-9-15-21)29-22-16-10-11-17-25(22)31(26)20-12-6-4-7-13-20/h4-19H,3H2,1-2H3 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 6 CCC1(C)N=c2c(N1c1ccccc1)cc1c(c2)n(c2ccccc2)c2c(n1)cccc2 138.247 3 3 nan +0 BSYNRYMUTXBXSQ-UHFFFAOYSA-M -0.0246 66.43 13 179.14948 InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/p-1 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 2.141, 2.141, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 1 CC(=O)Oc1ccccc1C(=O)[O-] 42.958 4 3 nan +0 BSYNRYMUTXBXSQ-UHFFFAOYSA-M -0.0246 66.43 13 179.14948 InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/p-1 nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 2.141, 2.141, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan 1 CC(=O)Oc1ccccc1C(=O)[O-] 42.958 4 3 nan diff -r 000000000000 -r 527ecd2fc500 subsearch/subsearch.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/subsearch/subsearch.py Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,116 @@ +#!/usr/bin/env python +""" + Input: Molecules in SDF, SMILES ... + Output: Moleculs filtered with specified substructures. + Copyright 2013, Bjoern Gruening and Xavier Lucas +""" +import sys, os +import argparse +import openbabel +openbabel.obErrorLog.StopLogging() +import pybel +import multiprocessing +import tempfile +import subprocess +import shutil + +def parse_command_line(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--infile', required=True, help='Molecule file.') + parser.add_argument('--iformat', help='Input format.') + parser.add_argument('--fastsearch-index', dest="fastsearch_index", + required=True, help='Path to the openbabel fastsearch index.') + parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') + parser.add_argument('--oformat', + default='smi', help='Output file format') + parser.add_argument("--max-candidates", dest="max_candidates", type=int, + default=4000, help="The maximum number of candidates.") + parser.add_argument('-p', '--processors', type=int, + default=multiprocessing.cpu_count()) + return parser.parse_args() + +results = list() +def mp_callback(res): + results.append(res) + +def mp_helper( query, args ): + """ + Helper function for multiprocessing. + That function is a wrapper around the following command: + obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 + """ + + if args.oformat == 'names': + opts = '-osmi -xt' + else: + opts = '-o%s' % args.oformat + + tmp = tempfile.NamedTemporaryFile(delete=False) + cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) + + child = subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + stdout, stderr = child.communicate() + return_code = child.returncode + + if return_code: + sys.stdout.write(stdout) + sys.stderr.write(stderr) + sys.stderr.write("Return error code %i from command:\n" % return_code) + sys.stderr.write("%s\n" % cmd) + else: + sys.stdout.write(stdout) + sys.stdout.write(stderr) + return (tmp.name, query) + + +def get_smiles_or_smarts( args ): + """ + Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. + """ + if args.iformat in ['smi', 'text', 'tabular']: + with open( args.infile ) as text_file: + for line in text_file: + yield line.split('\t')[0].strip() + else: + # inchi or sdf files + for mol in pybel.readfile( args.iformat, args.infile ): + yield mol.wrtie('smiles').split('\t')[0] + +def substructure_search( args ): + + pool = multiprocessing.Pool( args.processors ) + for query in get_smiles_or_smarts( args ): + pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) + #mp_callback( mp_helper(query, args) ) + pool.close() + pool.join() + + if args.oformat == 'names': + out_handle = open( args.outfile, 'w' ) + for result_file, query in results: + with open(result_file) as res_handle: + for line in res_handle: + out_handle.write('%s\t%s\n' % ( line.strip(), query )) + os.remove( result_file ) + out_handle.close() + else: + out_handle = open( args.outfile, 'wb' ) + for result_file, query in results: + res_handle = open(result_file,'rb') + shutil.copyfileobj( res_handle, out_handle ) + res_handle.close() + os.remove( result_file ) + out_handle.close() + + +def __main__(): + """ + Multiprocessing Open Babel Substructure Search. + """ + args = parse_command_line() + substructure_search( args ) + +if __name__ == "__main__" : + __main__() diff -r 000000000000 -r 527ecd2fc500 subsearch/subsearch.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/subsearch/subsearch.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,73 @@ + + of fingerprint data sets + + openbabel + + + + subsearch.py + -i $query + --iformat "${query.ext}" + --fastsearch-index "${os.path.join($fastsearch.extra_files_path,'molecule.fs')}" + -o "${outfile}" + --oformat $oformat + --max-candidates $max_candidates + --processors 10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Substructure search in based on Open Babel FastSearch_ Index. It uses molecular fingerprints to prepare and search an index of a multi-molecule datafile. + +.. _FastSearch: http://openbabel.org/wiki/FastSearch + +----- + +.. class:: infomark + +**Input** + +SMILES or SMARTS patterns are possible. SD- and InChI files are converted to SMILES. + +----- + +.. class:: infomark + +**Cite** + +`Open Babel`_ + +.. _Open Babel: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,24 @@ + + + + + + + + + + + + https://raw.github.com/bgruening/galaxytools/552bffa1b5d9e0ef6f8b84ed35a619dc3e58def1/chemicaltoolbox/cheminfolib.py + + cheminfolib.py + $INSTALL_DIR + + + $INSTALL_DIR + + + + cheminfolib is just a small helper library for the chemicaltoolbox project and has no further dependencies. + + diff -r 000000000000 -r 527ecd2fc500 visualisation/ob_depiction_svg.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/visualisation/ob_depiction_svg.xml Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,127 @@ + + of compounds + + openbabel + + + obabel -i${infile.ext} $infile + $embed_molecule_as_cml + $draw_all_carbon_atoms + -xC + -O + $outfile -o$oformat + #if str($sort) != "none": + --sort $sort + #end if + #if str($display_name) != "None" and len(str($display_name)) > 0 and str($display_name) != 'title': + --append "$display_name" + --title "" + #end if + $thick_lines + 2>&1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What this tool does** + +Creates an .svg or .png image of a small set of molecules (few hundreds). Based on Open Babel PNG_/SVG_ 2D depiction. + +.. _PNG: http://openbabel.org/docs/dev/FileFormats/PNG_2D_depiction.html +.. _SVG: http://openbabel.org/docs/dev/FileFormats/SVG_2D_depiction.html + +----- + +.. class:: warningmark + +**Hint** + +Use only libraries with at most a few hundred molecules. + +----- + +.. class:: infomark + +**Cite** + +`Open Babel`_ + +.. _Open Babel: http://openbabel.org/wiki/Main_Page + + + diff -r 000000000000 -r 527ecd2fc500 visualisation/test-data/8_mol.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/visualisation/test-data/8_mol.smi Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,8 @@ +C1NC2=CC(=C(C=C2S(=O)(=O)N1)S(=O)(=O)N)Cl +C1=COC(=C1)CNC2=CC(=C(C=C2C(=O)O)S(=O)(=O)N)Cl +CO[C@H]1[C@@H](C[C@@H]2CN3CCC4=C([C@H]3C[C@@H]2[C@@H]1C(=O)OC)NC5=C4C=CC(=C5)OC)OC(=O)C6=CC(=C(C(=C6)OC)OC)OC +CO[C@H]1[C@@H](C[C@@H]2CN3CCC4=C([C@H]3C[C@@H]2[C@@H]1C(=O)OC)NC5=C4C=CC(=C5)OC)OC(=O)C6=CC(=C(C(=C6)OC)OC)OC.C1NC2=CC(=C(C=C2S(=O)(=O)N1)S(=O)(=O)N)Cl +CCC1(N=C2C=C3C(=NC4=CC=CC=C4N3C5=CC=CC=C5)C=C2N1C6=CC=CC=C6)CC +CCC1(N=C2C=C3C(=NC4=CC=CC=C4N3C5=CC=CC=C5)C=C2N1C6=CC=CC=C6)C +CC(=O)OC1=CC=CC=C1C(=O)[O-] +CC(=O)OC1=CC=CC=C1C(=O)[O-] diff -r 000000000000 -r 527ecd2fc500 visualisation/test-data/ob_depiction_svg_on_8_mol.svg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/visualisation/test-data/ob_depiction_svg_on_8_mol.svg Thu Aug 15 03:25:06 2013 -0400 @@ -0,0 +1,537 @@ + + +OBDepict + + + + + + + + + + + + + + + + + + + + + + + + + + + + +HN +S +O +O +HN +S +O +O +NH +2 +Cl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +O +HN +O +OH +S +O +O +NH +2 +Cl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +O +H +H +H +N +H +H +H +O +O +NH +O +O +O +O +O +O + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +O +H +H +H +N +H +H +H +O +O +NH +O +O +O +O +O +O +HN +S +O +O +HN +S +O +O +NH +2 +Cl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +N +N +N +N + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +N +N +N +N + + + + + + + + + + + + + + + + + + + + + + + +O +O +O +- +O + + + + + + + + + + + + + + + + + + + + + + + +O +O +O +- +O + + + + +