Repository 'biotransformer'
hg clone https://toolshed.g2.bx.psu.edu/repos/recetox/biotransformer

Changeset 0:0b86600b715e (2023-06-06)
Next changeset 1:296bd426527f (2023-06-22)
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9b8e9941cdf0689518021bc0aa4b7196b28d25d7
added:
biotransformer.xml
macros.xml
test-data/output1.tsv
test-data/output2.tsv
test-data/output3.tsv
test-data/smiles.csv
wrapper_biotransformer.py
b
diff -r 000000000000 -r 0b86600b715e biotransformer.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/biotransformer.xml Tue Jun 06 11:23:51 2023 +0000
[
@@ -0,0 +1,70 @@
+<tool id="biotransformer" name="BioTransformer" version="@TOOL_VERSION@+galaxy0" profile="21.09">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="creator"/>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">biotransformer</requirement>
+        <requirement type="package" version="3.1.1">openbabel</requirement>
+        <requirement type="package" version="1.1.1">pandas</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        python3 '${__tool_directory__}/wrapper_biotransformer.py'
+         -k $k
+         -b $b
+         -s $steps
+         -t $tolerance
+         -a
+         -icsv $input_file
+         -ocsv $output_file
+         -ocsvDup $output_file2
+         -ocsvDup2 $output_file3
+          ]]>
+    </command>
+    <inputs>
+        <param name="k" type="select" value="pred"
+               label="The task to be permed [pred=prediction, cid=compound identification].">
+            <option value="pred" selected="true">pred</option>
+            <option value="cid">cid</option>
+        </param>
+        <param name="b" type="select" value="ecbased" label="The type of description.">
+            <option value="ecbased" selected="true">EC-based</option>
+            <option value="cyp450">CYP450</option>
+            <option value="phaseII">Phase II</option>
+            <option value="hgut">Human gut microbial</option>
+            <option value="superbio">Human super transformer 1 (superbio)</option>
+            <option value="allHuman">Human super transformer 2 (allHuman)</option>
+            <option value="envimicro">Environmental microbial</option>
+        </param>
+        <param name="steps" type="integer" value="1" label=" The number of steps for the prediction."/>
+        <param name="tolerance" type="float" value="0.01" label="Mass tolerance for metabolite identification."/>
+        <param name="input_file" type="data" format="csv" label="Input CSV file."/>
+    </inputs>
+
+    <outputs>
+        <data format="tsv" name="output_file" label="BioTransformer on ${on_string}"/>
+        <data format="tsv" name="output_file2" label="BioTransformer with filter on ${on_string}"/>
+        <data format="tsv" name="output_file3" label="BioTransformer with super filter on ${on_string}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="k" value="pred"/>
+            <param name="b" value="allHuman"/>
+            <param name="steps" value="1"/>
+            <param name="tolerance" value="0.01"/>
+            <param name="input_file" value="smiles.csv"/>
+            <output name="output_file" value="output1.tsv"/>
+            <output name="output_file2" value="output2.tsv"/>
+            <output name="output_file3" value="output3.tsv"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+            @HELP@
+        ]]>
+    </help>
+    <citations>
+        <citation type="doi">https://doi.org/10.1186/s13321-018-0324-5</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 0b86600b715e macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Jun 06 11:23:51 2023 +0000
[
@@ -0,0 +1,57 @@
+<macros>
+    <token name="@TOOL_VERSION@">3.0_20230403</token>
+    <xml name="creator">
+        <creator>
+            <person
+                givenName="Martin"
+                familyName="Čech"
+                url="https://github.com/martenson"
+                identifier="0000-0002-9318-1781" />
+            <person
+                givenName="Karolína"
+                familyName="Trachtová"
+                url="https://github.com/trachtok" />
+            <organization
+                url="https://www.recetox.muni.cz/"
+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+    <token name="@HELP@">
+        <![CDATA[
+            BioTransformer is a software tool that predicts small molecule metabolism in mammals, their gut micr obiota,
+            as well as the soil/aquatic microbiota. BioTransformer also assists scientists in metabolite identification,
+            based on the metabolism prediction.
+
+            BioTransformer is offered to the public as a freely acessible software package under the GNU License GPL v3.
+
+            Users are free to copy and redistribute the material in any medium or format. Moreover, they could modify, and
+            build upon the material under the condition that they must give appropriate credit, provide links to the license,
+            and indicate if changes were made. Furthermore, the above copyright notice and this permission notice must be
+            included. Use and re-distribution of the these resources, in whole or in part, for commercial purposes requires
+            explicit permission of the authors. We ask that all users of the BioTransformer software tool, the BioTransformer
+            web server, or BioTransformerDB to cite the BioTransformer reference in any resulting publications, and to
+            acknowledge the authors.
+
+            Parameters explanation:
+
+            **Input.** Currently, only a CSV file with one SMILES per line is accepted.
+
+            **The type of prediction:** EC-based (ecbased), CYP450 (cyp450), Phase II (phaseII), Human gut
+            microbial (hgut), human super transformer* (superbio, or allHuman), Environmental microbial (envimicro).
+
+            **The number of steps for the prediction:** this option will be used for the EC-based, CYP450, Phase II, and Environmental
+            microbial biotransformers. The default value is 1.
+
+            **Mass tolerance for metabolite identification** (default is 0.01).
+
+            **Output of BioTransformer** with CSV as an input are 3 TSV files. One without any filtering, second with filtered
+            duplicates based on 6 columns (InChI, InChIKey, Synonyms, Molecular formula, Major Isotope Mass, AlogP) and third with
+            filtered duplicates based on 3 columns (Molecular formula, Major Isotope Mass, AlogP).
+
+            (* ) While the 'superbio' option runs a set number of transformation steps in a pre-defined order (e.g. deconjugation
+            first, then Oxidation/reduction, etc.), the 'allHuman' option predicts all possible metabolites from any applicable
+            reaction(Oxidation, reduction, (de-)conjugation) at each step.
+        ]]>
+    </token>
+</macros>
b
diff -r 000000000000 -r 0b86600b715e test-data/output1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output1.tsv Tue Jun 06 11:23:51 2023 +0000
[
b'@@ -0,0 +1,215 @@\n+\tSMILES query\tSMILES target\tInChI\tInChIKey\tSMILES\tSynonyms\tPUBCHEM_CID\tMolecular formula\tMajor Isotope Mass\tALogP\tLipinski_Violations\tInsecticide_Likeness_Violations\tPost_Em_Herbicide_Likeness_Violations\tMetabolite ID\tcdk:Title\tReaction\tReaction ID\tEnzyme(s)\tBiosystem\tPrecursor ID\tPrecursor SMILES\tPrecursor InChI\tPrecursor InChIKey\tPrecursor ALogP\tPrecursor Major Isotope Mass\n+0\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O\t\n+"\tInChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21)\tADQJSAVCKZSGMK-UHFFFAOYSA-N\tCC(C)C1=CC=C(C)C=C1OC2OC(C(O)=O)C(C(C2O)O)O\t"NSC404789\n+NSC-404789\n+(2S,3S,4S,5R)-3,4,5-trihydroxy-6-(5-methyl-2-propan-2-ylphenoxy)oxane-2-carboxylic acid\n+3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid"\t346530\tC16H22O7\t326.13655304400004\t1.5474999999999992\t0\t1\t1\tBTM00001\tBTM00001\tAromatic OH-glucuronidation\tBTMR0166\tEC 2.4.1.17\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+1\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(C)cc1OS(=O)(=O)O\t\n+"\tInChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13)\tNODSEPOUFZPJEQ-UHFFFAOYSA-N\tCC(C)C1=CC=C(C)C=C1OS(O)(=O)=O\t"thymol sulfate\n+Thymol sulphate\n+Thymol sulfuric acid\n+Thymol sulphuric acid\n+SCHEMBL235717\n+CHEBI:82911\n+(5-methyl-2-propan-2-ylphenyl) hydrogen sulfate\n+5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate\n+Q27156452"\t12456386\tC10H14O4S\t230.061279928\t2.5061999999999993\t0\t0\t0\tBTM00002\tBTM00002\tSulfonation of phenolic compound\tBTMR1376\tEC 2.8.2.1\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+2\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(C)cc1OC\t\n+"\tInChI=1S/C11H16O/c1-8(2)10-6-5-9(3)7-11(10)12-4/h5-8H,1-4H3\tLSQXNMXDFRRDSJ-UHFFFAOYSA-N\tCC(C)C1=CC=C(C)C=C1OC\t"Thymol methyl ether\n+2-Isopropyl-5-methylanisole\n+1076-56-8\n+O-Methylthymol\n+Thymyl methyl ether\n+Methyl thymyl ether\n+3-Methoxy-p-cymene\n+4-Isopropyl-3-methoxytoluene\n+1-Isopropyl-2-methoxy-4-methylbenzene\n+Methyl thymol ether\n+Benzene, 2-methoxy-4-methyl-1-(1-methylethyl)-\n+ANISOLE, 2-ISOPROPYL-5-METHYL-\n+2-methoxy-4-methyl-1-propan-2-ylbenzene\n+1-Methyl-3-methoxy-4-isopropylbenzene\n+Benzene,2-methoxy-4-methyl-1-(1-methylethyl)-\n+methylthymol\n+FEMA No. 3436\n+Thymol methyl\n+thymol Me ether\n+2-Methoxy-4-methyl-1-(1-methylethyl)benzene\n+2-methoxy-4-methyl-1-(propan-2-yl)benzene\n+Fema3436\n+VTE0C4390U\n+DTXSID5047617\n+NSC-404221\n+Methylthymol, o-\n+EINECS 214-063-9\n+NSC 404221\n+BRN 2042889\n+UNII-VTE0C4390U\n+AI3-03431\n+thymyl methyl oxide\n+Methyl THYMYL oxide\n+starbld0009587\n+Thymol derivative, 21\n+4-06-00-03335 (Beilstein Handbook Reference)\n+3-METHOXY-PARA-CYMENE\n+SCHEMBL196752\n+2-Isopropyl-5-methyl-Anisole\n+CHEMBL2424841\n+DTXCID3027617\n+CHEBI:167336\n+BDBM248170\n+Tox21_302575\n+MFCD01674973\n+NSC404221\n+2-ISO PROPYL-5-METHYLANISOLE\n+AKOS015914183\n+Thymol methyl ether (= methyl thymol)\n+NCGC00256877-01\n+LS-13985\n+CAS-1076-56-8\n+CS-0335474\n+FT-0754651\n+I0996\n+1-Methyl-3-methoxy-4-isopropylbenzene, 98%\n+D91215\n+Q27292012\n+1-METHYL-3-METHOXY-4-ISOPROPYL BENZENE [FHFI]\n+1-Isopropyl-2-methoxy-4-methylbenzene, analytical standard"\t14104\tC11H16O\t164.120115132\t3.493999999999998\t0\t0\t0\tBTM00003\tBTM00003\tMethylation of phenolic compound\tBTMR1377\tEC 2.1.1.25\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+3\tCC(C)C1=CC=C(C)C=C1O\t"Cc1ccc(c(c1)O)C(C)(C)O\t\n+"\tInChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3\tUWRRYLNXMGBJKK-UHFFFAOYSA-N\tCC(C)(C1=CC=C(C)C=C1O)O\t"SCHEMBL22652590\n+2-(2-hydroxypropan-2-yl)-5-methylphenol\n+EN300-1838871\n+4478-33-5"\t11332674\tC10H14O2\t166.099379688\t2.0267999999999997\t0\t0\t0\tBTM00004\tBTM00004\tHydroxylation of non-terminal aliphatic carbon adjacent to aromatic ring\tBTMR1077\t"CYP1A2\n+CYP2C8\n+CYP2C9\n+CYP2D6\n'..b'NSC-34803\n+BRN 2084452\n+Thymohydrochinon\n+2-methyl-5-propan-2-ylbenzene-1,4-diol\n+2-hydroxythymol\n+2,5-DIHYDROXY-P-CYMENE\n+Thymohydroquinone (I)\n+UNII-1C2ICM1R8V\n+SCHEMBL69082\n+CHEMBL4204349\n+2-isopropyl-5-methylhydroquinone\n+DTXSID70176706\n+WLN: QR DQ B1 EY1&1\n+NSC34803\n+1, 2-methyl-5-(1-methylethyl)-\n+AKOS006274324\n+2-isopropyl-5-methyl-benzene-1,4-diol\n+CS-0259073\n+FT-0700031\n+4-HYDROXY-5-ISOPROPYL-2-METHYLPHENOL\n+EN300-722422\n+Z1198148655\n+2-Methyl-5-(1-Methylethyl)cyclohexa-2,5-Diene-1,4-Dione\n+9J9\n+IMW"\t95779\tC10H14O2\t166.099379688\t2.9756\t0\t0\t0\tBTM00005\tBTM00005\tp-Hydroxylation of phenol\tBTMR1038\t"CYP1A2\n+CYP2A6\n+CYP2B6\n+CYP2C8\n+CYP2C9\n+CYP2C19\n+CYP2D6\n+CYP2E1\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+5\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(cc1O)CO\t\n+"\tInChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3\tUNNQYEJIPIBHFS-UHFFFAOYSA-N\tCC(C)C1=CC=C(CO)C=C1O\t"77311-68-3\n+5-(Hydroxymethyl)-2-(propan-2-yl)phenol\n+5-hydroxymethyl-2-isopropylphenol\n+DTXSID70554040\n+2-Isopropyl-5-(hydroxymethyl)phenol"\t14002478\tC10H14O2\t166.099379688\t2.1523000000000003\t0\t0\t0\tBTM00006\tBTM00006\tAliphatic hydroxylation of methyl carbon adjacent to aromatic ring\tBTMR1058\t"CYP1A2\n+CYP2A6\n+CYP2B6\n+CYP2C8\n+CYP2C9\n+CYP2C19\n+CYP2D6\n+CYP2E1\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+6\tCC(C)C1=CC=C(C)C=C1O\t"Cc1ccc(C(C)CO)c(c1)O\t\n+"\tInChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3\tCLJPRXFHCRIUKW-UHFFFAOYSA-N\tC(C(C)C1=CC=C(C)C=C1O)O\t"9-Hydroxythymol\n+61955-76-8\n+2-(1-hydroxypropan-2-yl)-5-methylphenol\n+p-cymene-3,8-diol\n+HY-N10925\n+p-Mentha-1,3,5-triene-3,9-diol\n+CS-0637580"\t14432748\tC10H14O2\t166.099379688\t2.0848000000000004\t0\t0\t0\tBTM00007\tBTM00007\tHydroxylation of terminal methyl\tBTMR1061\t"CYP1A2\n+CYP2A6\n+CYP2B6\n+CYP2C8\n+CYP2C9\n+CYP2C19\n+CYP2D6\n+CYP2E1\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+7\tCC(C)C1=CC=C(C)C=C1O\t"C=C(C)c1ccc(C)cc1O\t\n+"\tInChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3\tIHWFPRKZRRGTTI-UHFFFAOYSA-N\tCC(=C)C1=CC=C(C)C=C1O\t"8,9-Dehydrothymol\n+2-isopropenyl-5-methylphenol\n+18612-99-2\n+5-methyl-2-prop-1-en-2-ylphenol\n+Phenol, 5-methyl-2-(1-methylethenyl)-\n+m-Cresol, 6-isopropenyl-\n+SCHEMBL686122\n+2-Isopropenyl-5-methyl-phenol\n+DTXSID60423892\n+5-Methyl-2-(1-methylethenyl)phenol\n+5-Methyl-2-(prop-1-en-2-yl)phenol"\t6429037\tC10H12O\t148.088815004\t3.0469000000000004\t0\t0\t0\tBTM00008\tBTM00008\tTerminal desaturation\tBTMR1190\t"CYP1A2\n+CYP2A6\n+CYP2C9\n+CYP2D6\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+8\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(C)c(c1O)O\t\n+"\tInChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3\tLYUBXLHGANLIMX-UHFFFAOYSA-N\tCC(C)C1=CC=C(C)C(=C1O)O\t"Cymopyrocatechol\n+490-06-2\n+3-Isopropyl-6-methylpyrocatechol\n+p-cymene-2,3-diol\n+3-isopropyl-6-methylbenzene-1,2-diol\n+3-isopropyl-6-methylcatechol\n+Pyrocatechol, 2-isopropyl-6-methyl-\n+93XFQ715UL\n+3-methyl-6-(propan-2-yl)benzene-1,2-diol\n+1,2-Benzenediol, 3-methyl-6-(1-methylethyl)-\n+NSC-40567\n+NSC 40567\n+BRN 2248022\n+3-methyl-6-propan-2-ylbenzene-1,2-diol\n+NSC40567\n+UNII-93XFQ715UL\n+SCHEMBL1494556\n+2,3-DIHYDROXY-P-CYMENE\n+DTXSID10197652\n+AKOS006275160\n+3-isopropyl-6-methyl-1,2-benzenediol\n+3-methyl-6-propan-2-yl-benzene-1,2-diol\n+CS-0353716\n+EN300-1599484\n+2,3-DIHYDROXY-4-ISOPROPYL-1-METHYLBENZENE\n+A828568\n+3-METHYL-6-(1-METHYLETHYL)-1,2-BENZENEDIOL"\t95873\tC10H14O2\t166.099379688\t2.9756\t0\t0\t0\tBTM00009\tBTM00009\tO-Hydroxylation of phenol\tBTMR1037\t"CYP1A2\n+CYP2A6\n+CYP2B6\n+CYP2C8\n+CYP2C9\n+CYP2C19\n+CYP2D6\n+CYP2E1\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n'
b
diff -r 000000000000 -r 0b86600b715e test-data/output2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output2.tsv Tue Jun 06 11:23:51 2023 +0000
[
b'@@ -0,0 +1,215 @@\n+\tSMILES query\tSMILES target\tInChI\tInChIKey\tSMILES\tSynonyms\tPUBCHEM_CID\tMolecular formula\tMajor Isotope Mass\tALogP\tLipinski_Violations\tInsecticide_Likeness_Violations\tPost_Em_Herbicide_Likeness_Violations\tMetabolite ID\tcdk:Title\tReaction\tReaction ID\tEnzyme(s)\tBiosystem\tPrecursor ID\tPrecursor SMILES\tPrecursor InChI\tPrecursor InChIKey\tPrecursor ALogP\tPrecursor Major Isotope Mass\n+0\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O\t\n+"\tInChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21)\tADQJSAVCKZSGMK-UHFFFAOYSA-N\tCC(C)C1=CC=C(C)C=C1OC2OC(C(O)=O)C(C(C2O)O)O\t"NSC404789\n+NSC-404789\n+(2S,3S,4S,5R)-3,4,5-trihydroxy-6-(5-methyl-2-propan-2-ylphenoxy)oxane-2-carboxylic acid\n+3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid"\t346530\tC16H22O7\t326.13655304400004\t1.5474999999999992\t0\t1\t1\tBTM00001\tBTM00001\tAromatic OH-glucuronidation\tBTMR0166\tEC 2.4.1.17\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+1\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(C)cc1OS(=O)(=O)O\t\n+"\tInChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13)\tNODSEPOUFZPJEQ-UHFFFAOYSA-N\tCC(C)C1=CC=C(C)C=C1OS(O)(=O)=O\t"thymol sulfate\n+Thymol sulphate\n+Thymol sulfuric acid\n+Thymol sulphuric acid\n+SCHEMBL235717\n+CHEBI:82911\n+(5-methyl-2-propan-2-ylphenyl) hydrogen sulfate\n+5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate\n+Q27156452"\t12456386\tC10H14O4S\t230.061279928\t2.5061999999999993\t0\t0\t0\tBTM00002\tBTM00002\tSulfonation of phenolic compound\tBTMR1376\tEC 2.8.2.1\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+2\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(C)cc1OC\t\n+"\tInChI=1S/C11H16O/c1-8(2)10-6-5-9(3)7-11(10)12-4/h5-8H,1-4H3\tLSQXNMXDFRRDSJ-UHFFFAOYSA-N\tCC(C)C1=CC=C(C)C=C1OC\t"Thymol methyl ether\n+2-Isopropyl-5-methylanisole\n+1076-56-8\n+O-Methylthymol\n+Thymyl methyl ether\n+Methyl thymyl ether\n+3-Methoxy-p-cymene\n+4-Isopropyl-3-methoxytoluene\n+1-Isopropyl-2-methoxy-4-methylbenzene\n+Methyl thymol ether\n+Benzene, 2-methoxy-4-methyl-1-(1-methylethyl)-\n+ANISOLE, 2-ISOPROPYL-5-METHYL-\n+2-methoxy-4-methyl-1-propan-2-ylbenzene\n+1-Methyl-3-methoxy-4-isopropylbenzene\n+Benzene,2-methoxy-4-methyl-1-(1-methylethyl)-\n+methylthymol\n+FEMA No. 3436\n+Thymol methyl\n+thymol Me ether\n+2-Methoxy-4-methyl-1-(1-methylethyl)benzene\n+2-methoxy-4-methyl-1-(propan-2-yl)benzene\n+Fema3436\n+VTE0C4390U\n+DTXSID5047617\n+NSC-404221\n+Methylthymol, o-\n+EINECS 214-063-9\n+NSC 404221\n+BRN 2042889\n+UNII-VTE0C4390U\n+AI3-03431\n+thymyl methyl oxide\n+Methyl THYMYL oxide\n+starbld0009587\n+Thymol derivative, 21\n+4-06-00-03335 (Beilstein Handbook Reference)\n+3-METHOXY-PARA-CYMENE\n+SCHEMBL196752\n+2-Isopropyl-5-methyl-Anisole\n+CHEMBL2424841\n+DTXCID3027617\n+CHEBI:167336\n+BDBM248170\n+Tox21_302575\n+MFCD01674973\n+NSC404221\n+2-ISO PROPYL-5-METHYLANISOLE\n+AKOS015914183\n+Thymol methyl ether (= methyl thymol)\n+NCGC00256877-01\n+LS-13985\n+CAS-1076-56-8\n+CS-0335474\n+FT-0754651\n+I0996\n+1-Methyl-3-methoxy-4-isopropylbenzene, 98%\n+D91215\n+Q27292012\n+1-METHYL-3-METHOXY-4-ISOPROPYL BENZENE [FHFI]\n+1-Isopropyl-2-methoxy-4-methylbenzene, analytical standard"\t14104\tC11H16O\t164.120115132\t3.493999999999998\t0\t0\t0\tBTM00003\tBTM00003\tMethylation of phenolic compound\tBTMR1377\tEC 2.1.1.25\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+3\tCC(C)C1=CC=C(C)C=C1O\t"Cc1ccc(c(c1)O)C(C)(C)O\t\n+"\tInChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3\tUWRRYLNXMGBJKK-UHFFFAOYSA-N\tCC(C)(C1=CC=C(C)C=C1O)O\t"SCHEMBL22652590\n+2-(2-hydroxypropan-2-yl)-5-methylphenol\n+EN300-1838871\n+4478-33-5"\t11332674\tC10H14O2\t166.099379688\t2.0267999999999997\t0\t0\t0\tBTM00004\tBTM00004\tHydroxylation of non-terminal aliphatic carbon adjacent to aromatic ring\tBTMR1077\t"CYP1A2\n+CYP2C8\n+CYP2C9\n+CYP2D6\n'..b'NSC-34803\n+BRN 2084452\n+Thymohydrochinon\n+2-methyl-5-propan-2-ylbenzene-1,4-diol\n+2-hydroxythymol\n+2,5-DIHYDROXY-P-CYMENE\n+Thymohydroquinone (I)\n+UNII-1C2ICM1R8V\n+SCHEMBL69082\n+CHEMBL4204349\n+2-isopropyl-5-methylhydroquinone\n+DTXSID70176706\n+WLN: QR DQ B1 EY1&1\n+NSC34803\n+1, 2-methyl-5-(1-methylethyl)-\n+AKOS006274324\n+2-isopropyl-5-methyl-benzene-1,4-diol\n+CS-0259073\n+FT-0700031\n+4-HYDROXY-5-ISOPROPYL-2-METHYLPHENOL\n+EN300-722422\n+Z1198148655\n+2-Methyl-5-(1-Methylethyl)cyclohexa-2,5-Diene-1,4-Dione\n+9J9\n+IMW"\t95779\tC10H14O2\t166.099379688\t2.9756\t0\t0\t0\tBTM00005\tBTM00005\tp-Hydroxylation of phenol\tBTMR1038\t"CYP1A2\n+CYP2A6\n+CYP2B6\n+CYP2C8\n+CYP2C9\n+CYP2C19\n+CYP2D6\n+CYP2E1\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+5\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(cc1O)CO\t\n+"\tInChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3\tUNNQYEJIPIBHFS-UHFFFAOYSA-N\tCC(C)C1=CC=C(CO)C=C1O\t"77311-68-3\n+5-(Hydroxymethyl)-2-(propan-2-yl)phenol\n+5-hydroxymethyl-2-isopropylphenol\n+DTXSID70554040\n+2-Isopropyl-5-(hydroxymethyl)phenol"\t14002478\tC10H14O2\t166.099379688\t2.1523000000000003\t0\t0\t0\tBTM00006\tBTM00006\tAliphatic hydroxylation of methyl carbon adjacent to aromatic ring\tBTMR1058\t"CYP1A2\n+CYP2A6\n+CYP2B6\n+CYP2C8\n+CYP2C9\n+CYP2C19\n+CYP2D6\n+CYP2E1\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+6\tCC(C)C1=CC=C(C)C=C1O\t"Cc1ccc(C(C)CO)c(c1)O\t\n+"\tInChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3\tCLJPRXFHCRIUKW-UHFFFAOYSA-N\tC(C(C)C1=CC=C(C)C=C1O)O\t"9-Hydroxythymol\n+61955-76-8\n+2-(1-hydroxypropan-2-yl)-5-methylphenol\n+p-cymene-3,8-diol\n+HY-N10925\n+p-Mentha-1,3,5-triene-3,9-diol\n+CS-0637580"\t14432748\tC10H14O2\t166.099379688\t2.0848000000000004\t0\t0\t0\tBTM00007\tBTM00007\tHydroxylation of terminal methyl\tBTMR1061\t"CYP1A2\n+CYP2A6\n+CYP2B6\n+CYP2C8\n+CYP2C9\n+CYP2C19\n+CYP2D6\n+CYP2E1\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+7\tCC(C)C1=CC=C(C)C=C1O\t"C=C(C)c1ccc(C)cc1O\t\n+"\tInChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3\tIHWFPRKZRRGTTI-UHFFFAOYSA-N\tCC(=C)C1=CC=C(C)C=C1O\t"8,9-Dehydrothymol\n+2-isopropenyl-5-methylphenol\n+18612-99-2\n+5-methyl-2-prop-1-en-2-ylphenol\n+Phenol, 5-methyl-2-(1-methylethenyl)-\n+m-Cresol, 6-isopropenyl-\n+SCHEMBL686122\n+2-Isopropenyl-5-methyl-phenol\n+DTXSID60423892\n+5-Methyl-2-(1-methylethenyl)phenol\n+5-Methyl-2-(prop-1-en-2-yl)phenol"\t6429037\tC10H12O\t148.088815004\t3.0469000000000004\t0\t0\t0\tBTM00008\tBTM00008\tTerminal desaturation\tBTMR1190\t"CYP1A2\n+CYP2A6\n+CYP2C9\n+CYP2D6\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n+8\tCC(C)C1=CC=C(C)C=C1O\t"CC(C)c1ccc(C)c(c1O)O\t\n+"\tInChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3\tLYUBXLHGANLIMX-UHFFFAOYSA-N\tCC(C)C1=CC=C(C)C(=C1O)O\t"Cymopyrocatechol\n+490-06-2\n+3-Isopropyl-6-methylpyrocatechol\n+p-cymene-2,3-diol\n+3-isopropyl-6-methylbenzene-1,2-diol\n+3-isopropyl-6-methylcatechol\n+Pyrocatechol, 2-isopropyl-6-methyl-\n+93XFQ715UL\n+3-methyl-6-(propan-2-yl)benzene-1,2-diol\n+1,2-Benzenediol, 3-methyl-6-(1-methylethyl)-\n+NSC-40567\n+NSC 40567\n+BRN 2248022\n+3-methyl-6-propan-2-ylbenzene-1,2-diol\n+NSC40567\n+UNII-93XFQ715UL\n+SCHEMBL1494556\n+2,3-DIHYDROXY-P-CYMENE\n+DTXSID10197652\n+AKOS006275160\n+3-isopropyl-6-methyl-1,2-benzenediol\n+3-methyl-6-propan-2-yl-benzene-1,2-diol\n+CS-0353716\n+EN300-1599484\n+2,3-DIHYDROXY-4-ISOPROPYL-1-METHYLBENZENE\n+A828568\n+3-METHYL-6-(1-METHYLETHYL)-1,2-BENZENEDIOL"\t95873\tC10H14O2\t166.099379688\t2.9756\t0\t0\t0\tBTM00009\tBTM00009\tO-Hydroxylation of phenol\tBTMR1037\t"CYP1A2\n+CYP2A6\n+CYP2B6\n+CYP2C8\n+CYP2C9\n+CYP2C19\n+CYP2D6\n+CYP2E1\n+CYP3A4"\tHUMAN\tNSC404789\tCC(C)C1=CC=C(C)C=C1O\tInChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3\tMGSRCZKZVOBKFT-UHFFFAOYSA-N\t\t150.1044\n'
b
diff -r 000000000000 -r 0b86600b715e test-data/output3.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output3.tsv Tue Jun 06 11:23:51 2023 +0000
[
@@ -0,0 +1,179 @@
+ SMILES query SMILES target InChI InChIKey SMILES Synonyms PUBCHEM_CID Molecular formula Major Isotope Mass ALogP Lipinski_Violations Insecticide_Likeness_Violations Post_Em_Herbicide_Likeness_Violations Metabolite ID cdk:Title Reaction Reaction ID Enzyme(s) Biosystem Precursor ID Precursor SMILES Precursor InChI Precursor InChIKey Precursor ALogP Precursor Major Isotope Mass
+0 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O
+" InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21) ADQJSAVCKZSGMK-UHFFFAOYSA-N CC(C)C1=CC=C(C)C=C1OC2OC(C(O)=O)C(C(C2O)O)O "NSC404789
+NSC-404789
+(2S,3S,4S,5R)-3,4,5-trihydroxy-6-(5-methyl-2-propan-2-ylphenoxy)oxane-2-carboxylic acid
+3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid" 346530 C16H22O7 326.13655304400004 1.5474999999999992 0 1 1 BTM00001 BTM00001 Aromatic OH-glucuronidation BTMR0166 EC 2.4.1.17 HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
+1 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)cc1OS(=O)(=O)O
+" InChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13) NODSEPOUFZPJEQ-UHFFFAOYSA-N CC(C)C1=CC=C(C)C=C1OS(O)(=O)=O "thymol sulfate
+Thymol sulphate
+Thymol sulfuric acid
+Thymol sulphuric acid
+SCHEMBL235717
+CHEBI:82911
+(5-methyl-2-propan-2-ylphenyl) hydrogen sulfate
+5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate
+Q27156452" 12456386 C10H14O4S 230.061279928 2.5061999999999993 0 0 0 BTM00002 BTM00002 Sulfonation of phenolic compound BTMR1376 EC 2.8.2.1 HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
+2 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)cc1OC
+" InChI=1S/C11H16O/c1-8(2)10-6-5-9(3)7-11(10)12-4/h5-8H,1-4H3 LSQXNMXDFRRDSJ-UHFFFAOYSA-N CC(C)C1=CC=C(C)C=C1OC "Thymol methyl ether
+2-Isopropyl-5-methylanisole
+1076-56-8
+O-Methylthymol
+Thymyl methyl ether
+Methyl thymyl ether
+3-Methoxy-p-cymene
+4-Isopropyl-3-methoxytoluene
+1-Isopropyl-2-methoxy-4-methylbenzene
+Methyl thymol ether
+Benzene, 2-methoxy-4-methyl-1-(1-methylethyl)-
+ANISOLE, 2-ISOPROPYL-5-METHYL-
+2-methoxy-4-methyl-1-propan-2-ylbenzene
+1-Methyl-3-methoxy-4-isopropylbenzene
+Benzene,2-methoxy-4-methyl-1-(1-methylethyl)-
+methylthymol
+FEMA No. 3436
+Thymol methyl
+thymol Me ether
+2-Methoxy-4-methyl-1-(1-methylethyl)benzene
+2-methoxy-4-methyl-1-(propan-2-yl)benzene
+Fema3436
+VTE0C4390U
+DTXSID5047617
+NSC-404221
+Methylthymol, o-
+EINECS 214-063-9
+NSC 404221
+BRN 2042889
+UNII-VTE0C4390U
+AI3-03431
+thymyl methyl oxide
+Methyl THYMYL oxide
+starbld0009587
+Thymol derivative, 21
+4-06-00-03335 (Beilstein Handbook Reference)
+3-METHOXY-PARA-CYMENE
+SCHEMBL196752
+2-Isopropyl-5-methyl-Anisole
+CHEMBL2424841
+DTXCID3027617
+CHEBI:167336
+BDBM248170
+Tox21_302575
+MFCD01674973
+NSC404221
+2-ISO PROPYL-5-METHYLANISOLE
+AKOS015914183
+Thymol methyl ether (= methyl thymol)
+NCGC00256877-01
+LS-13985
+CAS-1076-56-8
+CS-0335474
+FT-0754651
+I0996
+1-Methyl-3-methoxy-4-isopropylbenzene, 98%
+D91215
+Q27292012
+1-METHYL-3-METHOXY-4-ISOPROPYL BENZENE [FHFI]
+1-Isopropyl-2-methoxy-4-methylbenzene, analytical standard" 14104 C11H16O 164.120115132 3.493999999999998 0 0 0 BTM00003 BTM00003 Methylation of phenolic compound BTMR1377 EC 2.1.1.25 HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
+3 CC(C)C1=CC=C(C)C=C1O "Cc1ccc(c(c1)O)C(C)(C)O
+" InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3 UWRRYLNXMGBJKK-UHFFFAOYSA-N CC(C)(C1=CC=C(C)C=C1O)O "SCHEMBL22652590
+2-(2-hydroxypropan-2-yl)-5-methylphenol
+EN300-1838871
+4478-33-5" 11332674 C10H14O2 166.099379688 2.0267999999999997 0 0 0 BTM00004 BTM00004 Hydroxylation of non-terminal aliphatic carbon adjacent to aromatic ring BTMR1077 "CYP1A2
+CYP2C8
+CYP2C9
+CYP2D6
+CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
+4 CC(C)C1=CC=C(C)C=C1O "CC(C)c1cc(c(C)cc1O)O
+" InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3 OQIOHYHRGZNZCW-UHFFFAOYSA-N CC(C)C1=CC(=C(C)C=C1O)O "Thymohydroquinone
+2217-60-9
+Hydrothymoquinone
+Thymoquinol
+p-Cymene-2,5-diol
+1,4-Benzenediol, 2-methyl-5-(1-methylethyl)-
+2-Methyl-5-isopropylhydroquinone
+Hydroquinone, 5-isopropyl-2-methyl-
+2-methyl-5-(propan-2-yl)benzene-1,4-diol
+NSC 34803
+2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL
+1C2ICM1R8V
+2-methyl-5-(1-methylethyl)-1,4-benzenediol
+NSC-34803
+BRN 2084452
+Thymohydrochinon
+2-methyl-5-propan-2-ylbenzene-1,4-diol
+2-hydroxythymol
+2,5-DIHYDROXY-P-CYMENE
+Thymohydroquinone (I)
+UNII-1C2ICM1R8V
+SCHEMBL69082
+CHEMBL4204349
+2-isopropyl-5-methylhydroquinone
+DTXSID70176706
+WLN: QR DQ B1 EY1&1
+NSC34803
+1, 2-methyl-5-(1-methylethyl)-
+AKOS006274324
+2-isopropyl-5-methyl-benzene-1,4-diol
+CS-0259073
+FT-0700031
+4-HYDROXY-5-ISOPROPYL-2-METHYLPHENOL
+EN300-722422
+Z1198148655
+2-Methyl-5-(1-Methylethyl)cyclohexa-2,5-Diene-1,4-Dione
+9J9
+IMW" 95779 C10H14O2 166.099379688 2.9756 0 0 0 BTM00005 BTM00005 p-Hydroxylation of phenol BTMR1038 "CYP1A2
+CYP2A6
+CYP2B6
+CYP2C8
+CYP2C9
+CYP2C19
+CYP2D6
+CYP2E1
+CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
+5 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(cc1O)CO
+" InChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3 UNNQYEJIPIBHFS-UHFFFAOYSA-N CC(C)C1=CC=C(CO)C=C1O "77311-68-3
+5-(Hydroxymethyl)-2-(propan-2-yl)phenol
+5-hydroxymethyl-2-isopropylphenol
+DTXSID70554040
+2-Isopropyl-5-(hydroxymethyl)phenol" 14002478 C10H14O2 166.099379688 2.1523000000000003 0 0 0 BTM00006 BTM00006 Aliphatic hydroxylation of methyl carbon adjacent to aromatic ring BTMR1058 "CYP1A2
+CYP2A6
+CYP2B6
+CYP2C8
+CYP2C9
+CYP2C19
+CYP2D6
+CYP2E1
+CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
+6 CC(C)C1=CC=C(C)C=C1O "Cc1ccc(C(C)CO)c(c1)O
+" InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3 CLJPRXFHCRIUKW-UHFFFAOYSA-N C(C(C)C1=CC=C(C)C=C1O)O "9-Hydroxythymol
+61955-76-8
+2-(1-hydroxypropan-2-yl)-5-methylphenol
+p-cymene-3,8-diol
+HY-N10925
+p-Mentha-1,3,5-triene-3,9-diol
+CS-0637580" 14432748 C10H14O2 166.099379688 2.0848000000000004 0 0 0 BTM00007 BTM00007 Hydroxylation of terminal methyl BTMR1061 "CYP1A2
+CYP2A6
+CYP2B6
+CYP2C8
+CYP2C9
+CYP2C19
+CYP2D6
+CYP2E1
+CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
+7 CC(C)C1=CC=C(C)C=C1O "C=C(C)c1ccc(C)cc1O
+" InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3 IHWFPRKZRRGTTI-UHFFFAOYSA-N CC(=C)C1=CC=C(C)C=C1O "8,9-Dehydrothymol
+2-isopropenyl-5-methylphenol
+18612-99-2
+5-methyl-2-prop-1-en-2-ylphenol
+Phenol, 5-methyl-2-(1-methylethenyl)-
+m-Cresol, 6-isopropenyl-
+SCHEMBL686122
+2-Isopropenyl-5-methyl-phenol
+DTXSID60423892
+5-Methyl-2-(1-methylethenyl)phenol
+5-Methyl-2-(prop-1-en-2-yl)phenol" 6429037 C10H12O 148.088815004 3.0469000000000004 0 0 0 BTM00008 BTM00008 Terminal desaturation BTMR1190 "CYP1A2
+CYP2A6
+CYP2C9
+CYP2D6
+CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
b
diff -r 000000000000 -r 0b86600b715e test-data/smiles.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/smiles.csv Tue Jun 06 11:23:51 2023 +0000
b
@@ -0,0 +1,1 @@
+CC(C)C1=CC=C(C)C=C1O
\ No newline at end of file
b
diff -r 000000000000 -r 0b86600b715e wrapper_biotransformer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/wrapper_biotransformer.py Tue Jun 06 11:23:51 2023 +0000
[
@@ -0,0 +1,75 @@
+import re
+import subprocess
+import sys
+import tempfile
+
+import pandas
+from openbabel import openbabel, pybel
+openbabel.obErrorLog.StopLogging()
+
+
+def InchiToSmiles(df):
+    '''Translate inchi to smiles'''
+    sm = []
+    for item in df['InChI']:
+        tmp = pybel.readstring("inchi", item)
+        sm.append(tmp.write("smi"))
+    return sm
+
+
+executable = ["biotransformer"]
+
+argv = sys.argv[1:]
+icsv = argv.pop(argv.index("-icsv") + 1)
+argv.remove("-icsv")
+ocsv = argv.pop(argv.index("-ocsv") + 1)
+argv.remove("-ocsv")
+ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1)
+argv.remove("-ocsvDup")
+ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1)
+argv.remove("-ocsvDup2")
+
+in_df = pandas.read_csv(icsv, header=None)
+out_df1 = pandas.DataFrame()  # all results
+out_df2 = pandas.DataFrame()  # filtered results based on 6 columns
+out_df3 = pandas.DataFrame()  # filtered results based on 3 columns
+
+smList1 = []  # list with smiles string
+smList2 = []
+smList3 = []
+for _, (smiles,) in in_df.iterrows():
+    with tempfile.NamedTemporaryFile() as out:
+        print("Working on compound: " + smiles)
+        if not re.search(r'\.', smiles):
+            subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name])
+            try:
+                bio_out = pandas.read_csv(out.name)
+                tmp2 = bio_out.drop_duplicates(subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"])
+                tmp3 = bio_out.drop_duplicates(subset=["Molecular formula", "Major Isotope Mass", "ALogP"])
+
+                smList1.append([smiles] * bio_out.shape[0])
+                smList2.append([smiles] * tmp2.shape[0])
+                smList3.append([smiles] * tmp3.shape[0])
+
+                out_df1 = pandas.concat([out_df1, bio_out])
+                out_df2 = pandas.concat([out_df2, tmp2])
+                out_df3 = pandas.concat([out_df3, tmp3])
+            except pandas.errors.EmptyDataError:
+                continue
+        else:
+            print("ERROR: Input compound cannot be a mixture.")
+smList1 = sum(smList1, [])  # merge sublists into one list
+smList2 = sum(smList2, [])
+smList3 = sum(smList3, [])
+
+out_df1.insert(0, "SMILES query", smList1)
+out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1))
+out_df1.to_csv(ocsv, sep='\t')
+
+out_df2.insert(0, "SMILES query", smList2)
+out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2))
+out_df2.to_csv(ocsv_dup, sep='\t')
+
+out_df3.insert(0, "SMILES query", smList3)
+out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3))
+out_df3.to_csv(ocsv_dup2, sep='\t')