diff convert/ob_convert.xml @ 0:527ecd2fc500 draft

Uploaded
author bgruening
date Thu, 15 Aug 2013 03:25:06 -0400
parents
children 125da3a296ca
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert/ob_convert.xml	Thu Aug 15 03:25:06 2013 -0400
@@ -0,0 +1,460 @@
+<tool id="ctb_compound_convert" name="Compound Convert" version="0.1">
+    <description>Converts various chemistry and molecular modeling data files</description>
+    <!--<parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>
+        We can't use that currently, because a few output-datatypes, like the fastsearch index do not support merging. We need to patch galaxy to not abort in such a case
+        and run the job in non-multi mode.
+    -->
+    <requirements>
+        <requirement type="package" version="2.3.2">openbabel</requirement>
+    </requirements>
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+
+#set $format = $oformat.oformat_opts_selector
+
+    #if $format == "fs":
+        ## For the fastsearch index we need to copy the original molecule files to the composite datatype of obfs.
+        ## Because openbabel likes file extensions, we give the molecule file a proper file extension.
+        mkdir $outfile.extra_files_path;
+        cp "${infile}" ${os.path.join($outfile.extra_files_path, 'molecule.%s' % $infile.ext )};
+    #end if
+
+obabel -i "${infile.ext}"
+
+    #if $format == "fs":
+        ## the fs filetype need his own symlink path, all others can take the original ones
+        ${os.path.join($outfile.extra_files_path, 'molecule.%s' % $infile.ext )}
+        -o "$format" -e
+        -O ${os.path.join($outfile.extra_files_path,'molecule.fs')}
+        #if int($oformat.fs_fold) > 0:
+            -xN$oformat.fs_fold
+        #end if
+        ${oformat.fs_fptype}
+    #else:
+        "${infile}"
+        -o "$format"
+        -O "${outfile}"
+        -e
+    #end if
+
+
+    #if $format == 'cml':
+        $oformat.cml_array
+        $oformat.cml_cml1
+        $oformat.cml_aromatic
+        $oformat.cml_hydrogen
+        $oformat.cml_metadata
+        $oformat.cml_omit
+        $oformat.cml_continuous
+        $oformat.cml_properties
+        $oformat.cml_gen2d
+        $oformat.cml_gen3d
+    #elif $format == 'inchi':
+        ##ignore less import warnings
+        -w
+        #if $oformat.inchi_truncate:
+            #set $truncate = ''.join( str( $oformat.inchi_truncate ).split( ',' ) )
+            -xT ${truncate}
+        #end if
+
+        #if $oformat.inchi_additional:
+            #set $additional = ' '.join( str( $oformat.inchi_additional ).split( ',' ) )
+            -xX '${additional}'
+        #end if
+
+        $oformat.inchi_key
+        $oformat.inchi_name
+        $oformat.inchi_unique
+        $oformat.inchi_unique_sort
+    #elif $format == 'can':
+        $oformat.can_exp_h
+        $oformat.can_iso_chi
+        $oformat.can_rad
+        $oformat.can_atomclass_out
+    #elif $format == 'smi':
+        $oformat.smi_exp_h
+        $oformat.smi_iso_chi
+        $oformat.smi_rad
+        $oformat.smi_atomclass_out
+        $oformat.smi_can
+        $oformat.smi_coordinates
+    #elif $format == 'sdf':
+        $oformat.sdf_exp_h
+        $oformat.sdf_no_prop
+        $oformat.sdf_wedge_bonds
+        $oformat.sdf_alias_out
+        $oformat.sdf_gen2d
+        $oformat.sdf_gen3d
+    #elif $format == 'fpt':
+        $oformat.fpt_fptype
+        #if int($oformat.fpt_fold) > 0:
+            $oformat.fpt_fold
+        #end if
+        $oformat.fpt_hex_multiple
+        $oformat.fpt_hex
+        $oformat.fpt_set
+        $oformat.fpt_unset
+    #elif $format == 'mol2':
+        $oformat.mol2_ignore_res
+        $oformat.mol2_gen2d
+        $oformat.mol2_gen3d
+    #end if
+
+    ## Uniqueness according to stripped InChI's or canonical SMILES
+    #if str($unique.unique_opts_selector):
+        #if $unique.unique_opts_selector == 'inchi':
+            #if $unique.truncate:
+                #set $truncate = ''.join( str( $unique.truncate ).split( ',' ) )
+                --unique $truncate
+            #end if
+        #else
+            --unique $unique.unique_opts_selector
+        #end if
+    #end if
+
+
+    #if str($appendtotitle).strip():
+        --addtotitle '${appendtotitle}'
+    #end if
+
+    $remove_h
+    $dative_bonds
+
+    #if int($ph) >= 0:
+        -p $ph
+    #end if
+
+    2>&#38;1
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="sdf,mol2,cml,inchi,smi" label="Select input file with molecules"/>
+        <conditional name="oformat">
+            <param name="oformat_opts_selector" type="select" label="Output format">
+                <option value="acr">Carine ASCI Crystal</option>
+                <option value="alc">Alchemy format</option>
+                <option value="bgf">MSI BGF format</option>
+                <option value="box">Dock 3.5 Box format</option>
+                <option value="bs">Ball and Stick format</option>
+                <option value="c3d1">Chem3D Cartesian 1 format</option>
+                <option value="c3d2">Chem3D Cartesian 2 format</option>
+                <option value="caccrt">Cacao Cartesian format</option>
+                <option value="cache">CAChe MolStruct format</option>
+                <option value="cacint">Cacao Internal format</option>
+                <option value="can" selected="True">Canonical SMILES format (can)</option>
+                <option value="cdxml">ChemDraw CDXML format</option>
+                <option value="cht">Chemtool format</option>
+                <option value="cif">Crystallographic Information File</option>
+                <option value="cml">Chemical Markup Language</option>
+                <option value="cmlr">CML Reaction format</option>
+                <option value="com">Gaussian 98/03 Cartesian Input</option>
+                <option value="copy">Copies raw text</option>
+                <option value="crk2d">Chemical Resource Kit 2D diagram format</option>
+                <option value="crk3d">Chemical Resource Kit 3D format</option>
+                <option value="csr">Accelrys/MSI Quanta CSR format</option>
+                <option value="cssr">CSD CSSR format</option>
+                <option value="ct">ChemDraw Connection Table format</option>
+                <option value="dmol">DMol3 coordinates format</option>
+                <!--<option value="ent">Protein Data Bank format</option>
+                <option value="fa">FASTA format</option>-->
+                <option value="fasta">FASTA format</option>
+                <option value="feat">Feature format</option>
+                <option value="fh">Fenske-Hall Z-Matrix format</option>
+                <option value="fix">SMILES FIX format</option>
+                <option value="fpt">Fingerprint format (fpt)</option>
+                <option value="fract">Free Form Fractional format</option>
+                <option value="fs">Open Babel FastSearching database (fs)</option>
+                <!--<option value="fsa">FASTA format</option>-->
+                <option value="gamin">GAMESS Input</option>
+                <option value="gau">Gaussian 98/03 Cartesian Input</option>
+                <!--<option value="gjc">Gaussian 98/03 Cartesian Input</option>
+                <option value="gjf">Gaussian 98/03 Cartesian Input</option>-->
+                <option value="gpr">Ghemical format</option>
+                <option value="gr96">GROMOS96 format</option>
+                <option value="hin">HyperChem HIN format</option>
+                <option value="inchi">IUPAC InChI</option>
+                <option value="inp">GAMESS Input</option>
+                <option value="jin">Jaguar input format</option>
+                <!--<option value="mdl">MDL MOL format (mol)</option>-->
+                <option value="mmd">MacroModel format</option>
+                <option value="mmod">MacroModel format</option>
+                <!--<option value="mol">MDL MOL format (mol)</option> use SDF-->
+                <option value="mol2">Sybyl Mol2 format (mol2)</option>
+                <option value="molreport">Open Babel molecule report</option>
+                <option value="mop">MOPAC Cartesian format</option>
+                <option value="mopcrt">MOPAC Cartesian format</option>
+                <option value="mopin">MOPAC Internal</option>
+                <option value="mpc">MOPAC Cartesian format</option>
+                <option value="mpd">Sybyl descriptor format</option>
+                <option value="mpqcin">MPQC simplified input format</option>
+                <option value="nw">NWChem input format</option>
+                <option value="pcm">PCModel format</option>
+                <option value="pdb">Protein Data Bank format (pdb)</option>
+                <option value="pov">POV-Ray input format</option>
+                <option value="pqs">Parallel Quantum Solutions format</option>
+                <option value="qcin">Q-Chem input format</option>
+                <option value="report">Open Babel report format</option>
+                <option value="rxn">MDL RXN format</option>
+                <!--<option value="sd">MDL MOL format</option>-->
+                <option value="sdf">MDL MOL format (sdf, mol)</option>
+                <option value="smi">SMILES format (smi)</option>
+                <!--<option value="sy2">Sybyl Mol2 format</option>-->
+                <option value="tdd">Thermo format</option>
+                <option value="test">Test format</option>
+                <option value="therm">Thermo format</option>
+                <option value="tmol">TurboMole Coordinate format</option>
+                <option value="txyz">Tinker MM2 format</option>
+                <option value="unixyz">UniChem XYZ format</option>
+                <option value="vmol">ViewMol format</option>
+                <option value="xed">XED format</option>
+                <option value="xyz">XYZ cartesian coordinates format</option>
+                <option value="yob">YASARA.org YOB format</option>
+                <option value="zin">ZINDO input format</option>
+            </param>
+            <when value="acr" />
+            <when value="alc" />
+            <when value="bgf" />
+            <when value="box" />
+            <when value="bs" />
+            <when value="c3d1" />
+            <when value="c3d2" />
+            <when value="caccrt" />
+            <when value="cache" />
+            <when value="cacint" />
+            <when value="can">
+                <param name="can_exp_h" type="boolean" label="Output explicit hydrogens as such (-xh)" truevalue="-xh" falsevalue="" checked="false" />
+                <param name="can_iso_chi" type="boolean" label="Do not include isotopic or chiral markings (-xi)" truevalue="-xi" falsevalue="" checked="false" />
+                <param name="can_rad" type="boolean" label="Radicals lower case eg ethyl is Cc (-xr)" truevalue="-xr" falsevalue="" checked="false" />
+                <param name="can_atomclass_out" type="boolean" label="Output atomclass like [C:2] (-xa)" truevalue="-xa" falsevalue="" checked="false" />
+            </when>
+            <when value="cdxml" />
+            <when value="cht" />
+            <when value="cif" />
+            <when value="cml">
+                <param name="cml_cml1" type="boolean" label="convert to CML 1 (rather than CML 2) (-x1)" truevalue="-x1" falsevalue="" checked="false" />
+                <param name="cml_array" type="boolean" label="write array format for atoms and bonds (-xa)" truevalue="-xa" falsevalue="" checked="false" />
+                <param name="cml_aromatic" type="boolean" label="write aromatic bonds as such, not Kekule form (-xA)" truevalue="-xA" falsevalue="" checked="false" />
+                <param name="cml_hydrogen" type="boolean" label="use hydrogen Count for all hydrogens (-xh)" truevalue="-xh" falsevalue="" checked="false" />
+                <param name="cml_metadata" type="boolean" label="write metadata (-xm)" truevalue="-xm" falsevalue="" checked="false" />
+                <param name="cml_omit" type="boolean" label="omit XML and namespace declarations (-xx)" truevalue="-xx" falsevalue="" checked="false" />
+                <param name="cml_continuous" type="boolean" label="continuous output: no formatting (-xc)" truevalue="-xc" falsevalue="" checked="false" />
+                <param name="cml_properties" type="boolean" label="write properties (-xp)" truevalue="-xp" falsevalue="" checked="false" />
+                <param name="cml_gen2d" type="boolean" label="Generate 2D coordinates (--gen2d)" truevalue="--gen2d" falsevalue="" checked="false" />
+                <param name="cml_gen3d" type="boolean" label="Generate 3D coordinates (--gen3d)" truevalue="--gen3d" falsevalue="" checked="false" />
+            </when>
+            <when value="cmlr" />
+            <when value="com" />
+            <when value="copy" />
+            <when value="crk2d" />
+            <when value="crk3d" />
+            <when value="csr" />
+            <when value="cssr" />
+            <when value="ct" />
+            <when value="dmol" />
+            <when value="fasta" />
+            <when value="feat" />
+            <when value="fh" />
+            <when value="fix" />
+            <when value="fpt">
+                <param name="fpt_fptype" type="select" label="Fingerprint type">
+                    <option value="-xfFP2" selected="True">FP2</option>
+                    <option value="-xfFP3">FP3</option>
+                    <option value="-xfFP4">FP4</option>
+                    <option value="-xfMACCS">MACCS</option>
+                </param>
+                <param name="fpt_fold" type="integer" value="0" size="20" label="Fold fingerprint to the number of specifed bits (32, 64, 128, etc.)" help="Use zero for default">
+                   <validator type="in_range" min="0" />
+                </param>
+                <param name="fpt_hex_multiple" type="boolean" label="hex output when multiple molecules (-xh)" truevalue="-xh" falsevalue="" checked="false" />
+                <param name="fpt_hex" type="boolean" label="hex output only (-xo)" truevalue="-xo" falsevalue="" checked="false" />
+                <param name="fpt_set" type="boolean" label="describe each set bit (-xs)" truevalue="-xs" falsevalue="" checked="false" />
+                <param name="fpt_unset" type="boolean" label="describe each unset bit (-xu)" truevalue="-xu" falsevalue="" checked="false" />
+            </when>
+            <when value="fract" />
+            <when value="fs">
+                <param name="fs_fptype" type="select" label="Fingerprint type">
+                    <option value="-xfFP2" selected="True">FP2</option>
+                    <option value="-xfFP3">FP3</option>
+                    <option value="-xfFP4">FP4</option>
+                    <option value="-xfMACCS">MACCS</option>
+                </param>
+                <param name="fs_fold" type="integer" value="0" size="20" label="Fold fingerprint to the number of specifed bits (32, 64, 128, etc.)" help="Use zero for default">
+                   <validator type="in_range" min="0" />
+                </param>
+            </when>
+            <when value="gamin" />
+            <when value="gau" />
+            <when value="gpr" />
+            <when value="gr96" />
+            <when value="hin" />
+            <when value="inchi">
+                <param name="inchi_key" type="boolean" label="output InChIKey only (-xK)" truevalue="-xK" falsevalue="" checked="false" />
+                <param name="inchi_name" type="boolean" label="add molecule name after InChI (-xt)" truevalue="-xt" falsevalue="" checked="true" />
+                <param name="inchi_unique" type="boolean" label="output only unique molecules (-xu)" truevalue="-xu" falsevalue="" checked="false" />
+                <param name="inchi_unique_sort" type="boolean" label="output only unique molecules and sort them (-xU)" truevalue="-xU" falsevalue="" checked="false" />
+                <param name="inchi_truncate" type="select" multiple="True" display="checkboxes" label="truncate InChI according to various parameters">
+                    <option value="/formula">formula only</option>
+                    <option value="/connect">formula and connectivity only</option>
+                    <option value="/nostereo">ignore E/Z and sp3 stereochemistry</option>
+                    <option value="/sp3">ignore sp3 stereochemistry</option>
+                    <option value="/noEZ">ignore E/Z steroeochemistry</option>
+                    <option value="/nochg">ignore charge and protonation</option>
+                    <option value="/noiso">ignore isotopes</option>
+                </param>
+
+                <param name="inchi_additional" type="select" multiple="True" display="checkboxes" label="Additional InChI options" help="For more information please see the InChI documentation (http://www.inchi-trust.org/fileadmin/user_upload/html/inchifaq/inchi-faq.html).">
+                    <option value="NEWPSOFF">Narrow End of Wedge Points to Stereo is OFF (NEWPSOFF - stdInChI)</option>
+                    <option value="DoNotAddH">Skip the addition of hydrogen atoms (DoNotAddH - stdInChI)</option>
+                    <option value="SNon">Exclude stereo (SNon - stdInChI)</option>
+                    <option value="SRel">Relative stereo (SRel)</option>
+                    <option value="SRac">Racemic stereo (SRac)</option>
+                    <option value="SUCF">Use Chiral Flag (SUCF)</option>
+                    <option value="ChiralFlagON">Set Chiral Flag (ChrialFlagON)</option>
+                    <option value="ChiralFlagOFF">Set Not-Chiral Flag (ChrialFlagOFF)</option>
+                    <option value="SUU">Include omitted unknown/undefined stereo (SUU)</option>
+                    <option value="SLUUD">Stereo labels for "unknown" and "undefined" are different, 'u' and '?', respectively (SLUUD) </option>
+                    <option value="FixedH">Mobile H Perception Off (FixedH)</option>
+                    <option value="RecMet">Include reconnected bond to metal results (RecMet)</option>
+                    <option value="KET">Keto-enol tautomerism (KET)</option>
+                    <option value="15T">1,5-tautomerism (15T)</option>
+                </param>
+            </when>
+            <when value="inp" />
+            <when value="jin" />
+            <when value="mmd" />
+            <when value="mmod" />
+            <when value="mol2">
+                <param name="mol2_ignore_res" type="boolean" label="Output ignores residue information for ligands (-xl)" truevalue="-xl" falsevalue="" checked="false" />
+                <param name="mol2_gen2d" type="boolean" label="Generate 2D coordinates (--gen2d)" truevalue="--gen2d" falsevalue="" checked="false" />
+                <param name="mol2_gen3d" type="boolean" label="Generate 3D coordinates (--gen3d)" truevalue="--gen3d" falsevalue="" checked="false" />
+            </when>
+            <when value="molreport" />
+            <when value="mop" />
+            <when value="mopcrt" />
+            <when value="mopin" />
+            <when value="mpc" />
+            <when value="mpd" />
+            <when value="mpqcin" />
+            <when value="nw" />
+            <when value="pcm" />
+            <when value="pdb" />
+            <when value="pov" />
+            <when value="pqs" />
+            <when value="qcin" />
+            <when value="report" />
+            <when value="rxn" />
+            <when value="sdf">
+                <param name="sdf_exp_h" type="boolean" label="output V3000 not V2000 (used for >999 atoms/bonds) (-x3)" truevalue="-x3" falsevalue="" checked="false" />
+                <param name="sdf_no_prop" type="boolean" label="write no properties (-xm)" truevalue="-xm" falsevalue="" checked="false" />
+                <param name="sdf_wedge_bonds" type="boolean" label="use wedge and hash bonds from input (2D structures only) (-xw)" truevalue="-xw" falsevalue="" checked="false" />
+                <param name="sdf_alias_out" type="boolean" label="output in Alias form, e.g. Ph (-xA)" truevalue="-xA" falsevalue="" checked="false" />
+                <param name="sdf_gen2d" type="boolean" label="Generate 2D coordinates (--gen2d)" truevalue="--gen2d" falsevalue="" checked="false" />
+                <param name="sdf_gen3d" type="boolean" label="Generate 3D coordinates (--gen3d)" truevalue="--gen3d" falsevalue="" checked="false" />
+            </when>
+            <when value="smi">
+                <param name="smi_exp_h" type="boolean" label="Output explicit hydrogens as such (-xh)" truevalue="-xh" falsevalue="" checked="false" />
+                <param name="smi_iso_chi" type="boolean" label="Do not include isotopic or chiral markings (-xi)" truevalue="-xi" falsevalue="" checked="false" />
+                <param name="smi_rad" type="boolean" label="Radicals lower case eg ethyl is Cc (-xr)" truevalue="-xr" falsevalue="" checked="false" />
+                <param name="smi_atomclass_out" type="boolean" label="Output atomclass like [C:2] (-xa)" truevalue="-xa" falsevalue="" checked="false" />
+                <param name="smi_can" type="boolean" label="Output in canonical form (-xc)" truevalue="-xc" falsevalue="" checked="false" />
+                <param name="smi_coordinates" type="boolean" label="append X/Y coordinates in canonical-SMILES order (-xx)" truevalue="-xx" falsevalue="" checked="false" />
+            </when>
+            <when value="tdd" />
+            <when value="test" />
+            <when value="therm" />
+            <when value="tmol" />
+            <when value="txyz" />
+            <when value="unixyz" />
+            <when value="vmol" />
+            <when value="xed" />
+            <when value="xyz" />
+            <when value="yob" />
+            <when value="zin" />
+        </conditional>
+
+
+        <!-- Options for all formats.-->
+        <param name="remove_h" type="boolean" label="Delete hydrogen atoms, make all hydrogen implicit (-d)" truevalue="-d" falsevalue="" />
+        <param name="ph" size="5" type="float" value="-1" label="Add hydrogens appropriate for pH (-p)" help="-1 means deactivated"/>
+        <param name="dative_bonds" type="boolean" label="Convert dative bonds, e.g. [N+]([O-])=O to N(=O)=O (-b)" truevalue="-b" falsevalue="" />
+
+        <param name="appendtotitle" type="text" value="" size="20" label="Append the specified text after each molecule title"/>
+
+        <!-- Uniqueness -->
+        <conditional name="unique">
+            <param name="unique_opts_selector" type="select" label="Uniqueness according to">
+                <option value="" selected="True">No unique filter</option>
+                <option value="inchi">InChI</option>
+                <option value="cansmi">canonical SMILES with stereochemical information</option>
+                <option value="cansmiNS">canonical SMILES without stereochemical information</option>
+                <option value="title">title</option>
+            </param>
+            <when value="" />
+            <when value="cansmi" />
+            <when value="cansmiNS" />
+            <when value="title" />
+            <when value="inchi">
+                <param name="truncate" type="select" multiple="True" display="checkboxes" label="Uniqueness defined as truncated InChI">
+                    <option value="/formula">formula only</option>
+                    <option value="/connect">formula and connectivity only</option>
+                    <option value="/nostereo">ignore E/Z and sp3 stereochemistry</option>
+                    <option value="/sp3">ignore sp3 stereochemistry</option>
+                    <option value="/noEZ">ignore E/Z steroeochemistry</option>
+                    <option value="/nochg">ignore charge and protonation</option>
+                    <option value="/noiso">ignore isotopes</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+
+
+    <outputs>
+        <data name="outfile" type="data" format="text" label="Convert to ${oformat.oformat_opts_selector} from ${on_string}">
+            <change_format>
+                <when input="oformat.oformat_opts_selector" value="sdf" format="sdf"/>
+                <when input="oformat.oformat_opts_selector" value="can" format="smi"/>
+                <when input="oformat.oformat_opts_selector" value="smi" format="smi"/>
+                <when input="oformat.oformat_opts_selector" value="mol2" format="mol2"/>
+                <when input="oformat.oformat_opts_selector" value="inchi" format="inchi"/>
+                <when input="oformat.oformat_opts_selector" value="cml" format="cml"/>
+                <when input="oformat.oformat_opts_selector" value="mol" format="mol"/>
+                <when input="oformat.oformat_opts_selector" value="pdb" format="pdb"/>
+                <when input="oformat.oformat_opts_selector" value="fs" format="obfs"/>
+            </change_format>
+        </data>
+    </outputs>
+    <help>
+
+.. class:: infomark
+
+**What this tool does**
+
+The compound converter joins several `Open Babel`_ command prompt converters in an easy to use tool. It converts various chemistry and moleculare modeling data files. 
+The output format can be specified as well as several parameters. Some parameters are available for all tools (e.g. protonation state and pH) 
+others are specific for a given output format (e.g. exclude isotopes for conversion to canSMI).
+
+-----
+
+.. class:: infomark
+
+**Output**
+
+Can be specified manually.
+
+-----
+
+.. class:: infomark
+
+**Cite**
+
+N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch, and G R Hutchison - `Open Babel: An open chemical toolbox.`_
+
+.. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33
+
+`Open Babel`_
+
+.. _`Open Babel`: http://openbabel.org/wiki/Main_Page
+
+
+  </help>
+</tool>