Repository 'rem_complex'
hg clone https://toolshed.g2.bx.psu.edu/repos/recetox/rem_complex

Changeset 0:a0e07a0bc047 (2023-11-27)
Next changeset 1:e0ca9dfcdb18 (2023-12-01)
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/rem_complex commit 833f7671d1e1b713d52ba5c7e59d28be38b92b1e
added:
macros.xml
rem_complex.py
rem_complex.xml
test-data/input.csv
test-data/input.inchi
test-data/input.smi
test-data/sample_output.csv
test-data/sample_output.inchi
test-data/sample_output.smi
b
diff -r 000000000000 -r a0e07a0bc047 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Nov 27 09:04:04 2023 +0000
[
@@ -0,0 +1,29 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.0.0</token>
+    <xml name="creator">
+    <creator>
+        <person
+            givenName="Wudmir"
+            familyName="Rojas"
+            url="https://github.com/wverastegui"
+            identifier="0000-0001-7036-9987" />
+        <person
+            givenName="Helge"
+            familyName="Hecht"
+            url="https://github.com/hechth"
+            identifier="0000-0001-6744-996X" />
+        <organization
+            url="https://www.recetox.muni.cz/"
+            email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+            name="RECETOX MUNI"/>
+    </creator>
+    </xml>
+    <token name="@HELP@"><![CDATA[
+            The remove complex tool filters out coordination complexes from a list of SMLES or InChI.
+        
+            Documentation
+                The rem complex tool removes coordination complexes from a list of SMLES or InChI. The tool accepts input files with csv, smi and inchi formats, 
+                and returns the same format as the input file. The tool is based on the openbabel python library.        
+    ]]>
+    </token>
+</macros>
b
diff -r 000000000000 -r a0e07a0bc047 rem_complex.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rem_complex.py Mon Nov 27 09:04:04 2023 +0000
[
@@ -0,0 +1,62 @@
+import argparse
+
+import pandas as pd
+from openbabel import openbabel, pybel
+openbabel.obErrorLog.SetOutputLevel(1)  # 0: suppress warnings; 1: warnings
+
+
+def parse_arguments() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-iformat', '--input_format', help='Input file format')
+    parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name')
+    parser.add_argument('-o', '--output_filename', type=str, required=True, help='Outout file name')
+    args = parser.parse_args()
+    return args
+
+
+def filter_csv_molecules(file_name: str, output_file_name: str) -> None:
+    """Removes molecules with '.' in SMILES string from csv file.
+
+    Args:
+        file_name (str): Path to csv file that contains metadata.
+        output_file_name (str): Path to destination file, in csv format.
+    """
+    df = pd.read_csv(file_name)
+    mask = df['smiles'].str.contains(".", na=False, regex=False)
+    mask = mask.apply(lambda x: not x)
+    df[mask].to_csv(output_file_name, index=False)
+
+
+def filter_other_format_molecules(file_name: str, output_file_name: str, input_format: str) -> None:
+    """Removes molecules with '.' in SMILES string from smi or inchi files.
+
+    Args:
+        file_name (str): Path to smi or inchi files.
+        output_file_name (str): Path to destination files, in smi or inchi formats.
+        input_format (str): Input file format.
+    """
+    molecules = list(pybel.readfile(input_format, file_name))
+    filtered_molecules = [mol for mol in molecules if "." not in mol.write('smi').strip()]
+
+    with open(output_file_name, 'w') as f:
+        for mol in filtered_molecules:
+            f.write(mol.write(input_format))
+
+
+def filter_complex_molecules(file_name: str, output_file_name: str, input_format: str) -> None:
+    """Removes molecular complexes depending on the input format.
+
+    Args:
+        file_name (str): Path to csv, smi or inchi files
+        output_file_name (str): Path to destination files, in csv. smi or inchi formats.
+        input_format (str): Input file formats.
+    """
+    if input_format == 'csv':
+        filter_csv_molecules(file_name, output_file_name)
+    else:
+        filter_other_format_molecules(file_name, output_file_name, input_format)
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    filter_complex_molecules(args.input_filename, args.output_filename, args.input_format)
b
diff -r 000000000000 -r a0e07a0bc047 rem_complex.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rem_complex.xml Mon Nov 27 09:04:04 2023 +0000
[
@@ -0,0 +1,44 @@
+<tool id="rem_complex" name="Remove coordination complexes" version="@TOOL_VERSION@+galaxy0"  profile="21.09">
+    <description>Remove molecular coordination complexes from a list of structure representations</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="1.5.3">pandas</requirement>
+        <requirement type="package" version="3.1.1">openbabel</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python  $__tool_directory__/rem_complex.py
+        -i '${input}'
+        -iformat '${input.ext}'
+        -o '${output}' 
+    ]]></command>
+    <inputs>
+        <param name="input" format="inchi,smi,csv" type="data" help="Accepted input formats: CSV, SMI, and InChI."/>
+    </inputs>
+    <outputs>
+        <data name="output" format_source="input" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="smi" value="input.smi" />
+            <output name="output" ftype="smi" file="sample_output.smi"/>
+        </test>
+        <test>
+            <param name="input" ftype="inchi" value="input.inchi"/>
+            <output name="output" ftype="inchi" file="sample_output.inchi"/>
+        </test>
+        <test>
+            <param name="input" ftype="csv" value="input.csv"/>
+            <output name="output" ftype="csv" file="sample_output.csv"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+            @HELP@
+        ]]>
+    </help>
+    <citations>
+        <citation type="doi">https://doi.org/10.5281/zenodo.6035335</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r a0e07a0bc047 test-data/input.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.csv Mon Nov 27 09:04:04 2023 +0000
[
@@ -0,0 +1,16 @@
+inchi,formula,num_peaks,compound_name,smiles,comment,retention_index
+InChI=1S/C6H4ClO2Si.C5H5.2CO.Fe/c7-10-8-5-3-1-2-4-6(5)9-10;1-2-4-5-3-1;2*1-2;/h1-4H;1-5H;;;,C13H9ClFeO4Si,3,"((.eta.5-Cyclopentadienylironbiscarbonyl)(1,2-phenylenedioxysilyl)chloride complex",Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe],SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not real!,
+"InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1",C13H14O,20,"((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol",C[C@@]1(CO)C[C@@H]1C#Cc1ccccc1,SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588|,1588
+"InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1",C34H54O4,14,"((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate",C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12,SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353|,3353
+"InChI=1S/C11H15NO2S/c1-9-2-4-10(5-3-9)15-7-6-12-8-11(13)14/h2-5,12H,6-8H2,1H3,(H,13,14)",C11H15NO2S,20,((2-[(4-Methylphenyl)sulfanyl]ethyl)amino)acetic acid,Cc1ccc(SCCNCC(=O)O)cc1,SpectrumID: 1226271; Source: W5-1989-35586-29950; QI: 400; Class: Alpha amino acids |RI:2011|,2011
+"InChI=1S/C12H16N2O2S/c1-12(7-15)8-17-11(14-12)13-9-3-5-10(16-2)6-4-9/h3-6,15H,7-8H2,1-2H3,(H,13,14)",C12H16N2O2S,167,"((2Z)-2-[(4-Methoxyphenyl)imino]-4-methyl-1,3-thiazolidin-4-yl)methanol",COc1ccc(NC2=NC(C)(CO)CS2)cc1,SpectrumID: 1432066; Source: AD-0-2532-0; QI: 900; Class: Methoxyanilines |RI:2319|,2319
+"InChI=1S/C23H17F3N2O/c24-23(25,26)22-19(16-10-4-1-5-11-16)20(21(29)17-12-6-2-7-13-17)28(27-22)18-14-8-3-9-15-18/h1-15,19-20H/t19-,20+/m1/s1",C23H17F3N2O,9,"((3S,4R)-2,4-Diphenyl-5-trifluoromethyl-3,4-dihydro-2H-pyrazol-3-yl)-phenyl-methanone",O=C(c1ccccc1)[C@@H]1[C@@H](c2ccccc2)C(C(F)(F)F)=NN1c1ccccc1,SpectrumID: 1676300; Source: F4-43-2771-4a; QI: 56; Class: Alkyl-phenylketones; CASRN not real! |RI:2735|,2735
+"InChI=1S/C15H24O/c1-10(2)13-6-4-11(3)14-7-5-12(9-16)8-15(13)14/h4,8,10,13-16H,5-7,9H2,1-3H3/t13-,14+,15-/m0/s1",C15H24O,156,"((4aS,8S,8aR)-8-isopropyl-5-methyl-3,4,4a,7,8,8a-hexahydronaphthalen-2-yl)methanol",CC1=CC[C@@H](C(C)C)[C@@H]2C=C(CO)CC[C@H]12,"SpectrumID: 1815091; Source: N.Kacem, et al. Industrial Crops and Products, V.90, 2016, P.87-93; QI: 881; Class: Sesquiterpenoids; CASRN not real! |RI:1683|",1683
+"InChI=1S/C17H28NO6P/c1-13(2)23-25(21,24-14(3)4)18(12-17(19)20)11-10-15-6-8-16(22-5)9-7-15/h6-9,13-14H,10-12H2,1-5H3,(H,19,20)",C17H28NO6P,12,((Diisopropoxyphosphoryl)[2-(4-methoxyphenyl)ethyl]amino)acetic acid,COc1ccc(CCN(CC(=O)O)P(=O)(OC(C)C)OC(C)C)cc1,SpectrumID: 1356024; Source: W5-1989-35199-30337; QI: 78; Class: Alpha amino acids and derivatives,
+"InChI=1S/C19H27NO/c1-3-19-11-7-13-20(18(19)14-17(21)10-12-19)15(2)16-8-5-4-6-9-16/h4-6,8-9,15,18H,3,7,10-14H2,1-2H3/t15-,18-,19+/m0/s1",C19H27NO,21,"(+)-(1'S,4aR,8aS)-4a-Ethyl-1-(1'-phenylethyl)octahydroquinolin-7-one",CC[C@]12CCCN([C@@H](C)c3ccccc3)[C@H]1CC(=O)CC2,SpectrumID: 1634636; Source: KD-12-2102-6; QI: 147; Class: Quinolidines; CASRN not real! |RI:2267|,2267
+"InChI=1S/C22H36O5/c1-14(23)9-10-18-21(6)12-8-11-20(4,5)19(21)17(26-15(2)24)13-22(18,7)27-16(3)25/h17-19H,8-13H2,1-7H3/t17-,18+,19-,21+,22+/m0/s1",C22H36O5,9,"(+)-(1R,2R,4S,4aS,8aS)-4-(Acetyloxy)-2,5,5,8a-tetramethyl-1-(3-oxobutyl)decahydro-2-naphthlenyl acetate",CC(=O)CC[C@@H]1[C@@]2(C)CCCC(C)(C)[C@@H]2[C@@H](OC(C)=O)C[C@@]1(C)OC(C)=O,SpectrumID: 1624823; Source: KC-57-5677-51; QI: 106; Class: Sesquiterpenoids; CASRN not real! |RI:2534|,2534
+"InChI=1S/C13H20O2/c1-9-7-8-13(15-9,12(4,5)6)10(2)11(3)14/h7-9H,2H2,1,3-6H3/t9-,13+/m0/s1",C13H20O2,14,"(+)-(2'S,5'S)-3-(2'-tert-Butyl-5'-methyl-2',5'-dihydrofuran-2'-yl)but-3-en-2-one",C=C(C(C)=O)[C@@]1(C(C)(C)C)C=C[C@H](C)O1,"SpectrumID: 1618842; Source: F4-0-2808-29; QI: 171; Class: Alpha-branched alpha,beta-unsaturated ketones; CASRN not real! |RI:1382|",1382
+"InChI=1S/C15H18O3/c1-10-13(8-15-12(16)7-14(10)18-15)17-9-11-5-3-2-4-6-11/h2-6,10,13-15H,7-9H2,1H3/t10-,13+,14-,15-/m1/s1",C15H18O3,11,"(+)-(1S,3R,4S,5R)-3-Benzyloxy-4-methyl-8-oxabicyclo[3.2.1]octan-6-one",C[C@@H]1[C@@H](OCc2ccccc2)C[C@H]2O[C@@H]1CC2=O,SpectrumID: 871257; Source: QC-10-1542-4; QI: 23; Class: Benzylethers; CASRN not real! |RI:1893|,1893
+"InChI=1S/C14H21NO2.HI/c1-9-6-11-7-12(17-5)8-13(16)14(11)10(2)15(9,3)4;/h7-10H,6H2,1-5H3;1H/t9-,10-;/m0./s1",C14H22INO2,7,"(1S,3S)-8-Hydroxy-6-methoxy-N,N-dimethyl-1,3-dimethyl-1,2,3,4-tetrahydroisoquinolinium iodide",COc1cc([O-])c2c(c1)C[C@H](C)[N+](C)(C)[C@H]2C.I,SpectrumID: 855698; Source: F-56-584-1; QI: 92; Class: Tetrahydroisoquinolines; CASRN not real!,
+"InChI=1S/C18H20NOS.HI/c1-19-11-14-5-3-4-6-16(14)18(17(19)12-20)13-7-9-15(21-2)10-8-13;/h3-11,17-18,20H,12H2,1-2H3;1H/q+1;/p-1/t17-,18+;/m0./s1",C18H20INOS,20,"(3R,4R)-3-Hydroxymethyl-2-methyl-4-(4-methylthiophenyl)-3,4-dihydtoisoquinolinium iodide",CSc1ccc([C@@H]2c3ccccc3C=[N+](C)[C@H]2CO)cc1.[I-],SpectrumID: 1637146; Source: KD-15-2504-16; QI: 95; Class: Dihydroisoquinolines; CASRN not real!,
+"InChI=1S/C28H40N4O6S2.2ClHO4/c1-19-25-11-15-37-27(35)24(30-22(4)34)10-6-8-14-32-18-40-26(20(32)2)12-16-38-28(36)23(29-21(3)33)9-5-7-13-31(19)17-39-25;2*2-1(3,4)5/h17-18,23-24H,5-16H2,1-4H3;2*(H,2,3,4,5)/t23-,24-;;/m0../s1",C28H42Cl2N4O14S2,4,"(9S,19S)-9,19-Diacetamido-4,14-dimethyl-10,20-dioxo-1,11-dioxa-4,14(5,3)bis*thiazole)icosaphane-4,14-diium di-perchlorate salt",CC([O-])=N[C@H]1CCCC[n+]2csc(c2C)CCOC(=O)[C@@H](N=C(C)[O-])CCCC[n+]2csc(c2C)CCOC1=O.[O-][Cl+3]([O-])([O-])O.[O-][Cl+3]([O-])([O-])O,SpectrumID: 783347; Source: KC-0-871-38b; QI: 19; Class: N-acyl-alpha amino acids and derivatives; CASRN not real!,
b
diff -r 000000000000 -r a0e07a0bc047 test-data/input.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.inchi Mon Nov 27 09:04:04 2023 +0000
b
@@ -0,0 +1,15 @@
+InChI=1S/C6H4ClO2Si.C5H5.2CO.Fe/c7-10-8-5-3-1-2-4-6(5)9-10;1-2-4-5-3-1;2*1-2;/h1-4H;1-5H;;;
+InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1
+InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1
+InChI=1S/C11H15NO2S/c1-9-2-4-10(5-3-9)15-7-6-12-8-11(13)14/h2-5,12H,6-8H2,1H3,(H,13,14)
+InChI=1S/C12H16N2O2S/c1-12(7-15)8-17-11(14-12)13-9-3-5-10(16-2)6-4-9/h3-6,15H,7-8H2,1-2H3,(H,13,14)
+InChI=1S/C23H17F3N2O/c24-23(25,26)22-19(16-10-4-1-5-11-16)20(21(29)17-12-6-2-7-13-17)28(27-22)18-14-8-3-9-15-18/h1-15,19-20H/t19-,20+/m1/s1
+InChI=1S/C15H24O/c1-10(2)13-6-4-11(3)14-7-5-12(9-16)8-15(13)14/h4,8,10,13-16H,5-7,9H2,1-3H3/t13-,14+,15-/m0/s1
+InChI=1S/C17H28NO6P/c1-13(2)23-25(21,24-14(3)4)18(12-17(19)20)11-10-15-6-8-16(22-5)9-7-15/h6-9,13-14H,10-12H2,1-5H3,(H,19,20)
+InChI=1S/C19H27NO/c1-3-19-11-7-13-20(18(19)14-17(21)10-12-19)15(2)16-8-5-4-6-9-16/h4-6,8-9,15,18H,3,7,10-14H2,1-2H3/t15-,18-,19+/m0/s1
+InChI=1S/C22H36O5/c1-14(23)9-10-18-21(6)12-8-11-20(4,5)19(21)17(26-15(2)24)13-22(18,7)27-16(3)25/h17-19H,8-13H2,1-7H3/t17-,18+,19-,21+,22+/m0/s1
+InChI=1S/C13H20O2/c1-9-7-8-13(15-9,12(4,5)6)10(2)11(3)14/h7-9H,2H2,1,3-6H3/t9-,13+/m0/s1
+InChI=1S/C15H18O3/c1-10-13(8-15-12(16)7-14(10)18-15)17-9-11-5-3-2-4-6-11/h2-6,10,13-15H,7-9H2,1H3/t10-,13+,14-,15-/m1/s1
+InChI=1S/C14H21NO2.HI/c1-9-6-11-7-12(17-5)8-13(16)14(11)10(2)15(9,3)4;/h7-10H,6H2,1-5H3;1H/t9-,10-;/m0./s1
+InChI=1S/C18H20NOS.HI/c1-19-11-14-5-3-4-6-16(14)18(17(19)12-20)13-7-9-15(21-2)10-8-13;/h3-11,17-18,20H,12H2,1-2H3;1H/q+1;/p-1/t17-,18+;/m0./s1
+InChI=1S/C28H40N4O6S2.2ClHO4/c1-19-25-11-15-37-27(35)24(30-22(4)34)10-6-8-14-32-18-40-26(20(32)2)12-16-38-28(36)23(29-21(3)33)9-5-7-13-31(19)17-39-25;2*2-1(3,4)5/h17-18,23-24H,5-16H2,1-4H3;2*(H,2,3,4,5)/t23-,24-;;/m0../s1
b
diff -r 000000000000 -r a0e07a0bc047 test-data/input.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.smi Mon Nov 27 09:04:04 2023 +0000
[
@@ -0,0 +1,15 @@
+Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe]
+C[C@@]1(CO)C[C@@H]1C#Cc1ccccc1
+C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12
+Cc1ccc(SCCNCC(=O)O)cc1
+COc1ccc(NC2=NC(C)(CO)CS2)cc1
+O=C(c1ccccc1)[C@@H]1[C@@H](c2ccccc2)C(C(F)(F)F)=NN1c1ccccc1
+CC1=CC[C@@H](C(C)C)[C@@H]2C=C(CO)CC[C@H]12
+COc1ccc(CCN(CC(=O)O)P(=O)(OC(C)C)OC(C)C)cc1
+CC[C@]12CCCN([C@@H](C)c3ccccc3)[C@H]1CC(=O)CC2
+CC(=O)CC[C@@H]1[C@@]2(C)CCCC(C)(C)[C@@H]2[C@@H](OC(C)=O)C[C@@]1(C)OC(C)=O
+C=C(C(C)=O)[C@@]1(C(C)(C)C)C=C[C@H](C)O1
+C[C@@H]1[C@@H](OCc2ccccc2)C[C@H]2O[C@@H]1CC2=O
+COc1cc([O-])c2c(c1)C[C@H](C)[N+](C)(C)[C@H]2C.I
+CSc1ccc([C@@H]2c3ccccc3C=[N+](C)[C@H]2CO)cc1.[I-]
+CC([O-])=N[C@H]1CCCC[n+]2csc(c2C)CCOC(=O)[C@@H](N=C(C)[O-])CCCC[n+]2csc(c2C)CCOC1=O.[O-][Cl+3]([O-])([O-])O.[O-][Cl+3]([O-])([O-])O
b
diff -r 000000000000 -r a0e07a0bc047 test-data/sample_output.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_output.csv Mon Nov 27 09:04:04 2023 +0000
[
@@ -0,0 +1,12 @@
+inchi,formula,num_peaks,compound_name,smiles,comment,retention_index
+"InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1",C13H14O,20,"((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol",C[C@@]1(CO)C[C@@H]1C#Cc1ccccc1,SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588|,1588.0
+"InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1",C34H54O4,14,"((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate",C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12,SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353|,3353.0
+"InChI=1S/C11H15NO2S/c1-9-2-4-10(5-3-9)15-7-6-12-8-11(13)14/h2-5,12H,6-8H2,1H3,(H,13,14)",C11H15NO2S,20,((2-[(4-Methylphenyl)sulfanyl]ethyl)amino)acetic acid,Cc1ccc(SCCNCC(=O)O)cc1,SpectrumID: 1226271; Source: W5-1989-35586-29950; QI: 400; Class: Alpha amino acids |RI:2011|,2011.0
+"InChI=1S/C12H16N2O2S/c1-12(7-15)8-17-11(14-12)13-9-3-5-10(16-2)6-4-9/h3-6,15H,7-8H2,1-2H3,(H,13,14)",C12H16N2O2S,167,"((2Z)-2-[(4-Methoxyphenyl)imino]-4-methyl-1,3-thiazolidin-4-yl)methanol",COc1ccc(NC2=NC(C)(CO)CS2)cc1,SpectrumID: 1432066; Source: AD-0-2532-0; QI: 900; Class: Methoxyanilines |RI:2319|,2319.0
+"InChI=1S/C23H17F3N2O/c24-23(25,26)22-19(16-10-4-1-5-11-16)20(21(29)17-12-6-2-7-13-17)28(27-22)18-14-8-3-9-15-18/h1-15,19-20H/t19-,20+/m1/s1",C23H17F3N2O,9,"((3S,4R)-2,4-Diphenyl-5-trifluoromethyl-3,4-dihydro-2H-pyrazol-3-yl)-phenyl-methanone",O=C(c1ccccc1)[C@@H]1[C@@H](c2ccccc2)C(C(F)(F)F)=NN1c1ccccc1,SpectrumID: 1676300; Source: F4-43-2771-4a; QI: 56; Class: Alkyl-phenylketones; CASRN not real! |RI:2735|,2735.0
+"InChI=1S/C15H24O/c1-10(2)13-6-4-11(3)14-7-5-12(9-16)8-15(13)14/h4,8,10,13-16H,5-7,9H2,1-3H3/t13-,14+,15-/m0/s1",C15H24O,156,"((4aS,8S,8aR)-8-isopropyl-5-methyl-3,4,4a,7,8,8a-hexahydronaphthalen-2-yl)methanol",CC1=CC[C@@H](C(C)C)[C@@H]2C=C(CO)CC[C@H]12,"SpectrumID: 1815091; Source: N.Kacem, et al. Industrial Crops and Products, V.90, 2016, P.87-93; QI: 881; Class: Sesquiterpenoids; CASRN not real! |RI:1683|",1683.0
+"InChI=1S/C17H28NO6P/c1-13(2)23-25(21,24-14(3)4)18(12-17(19)20)11-10-15-6-8-16(22-5)9-7-15/h6-9,13-14H,10-12H2,1-5H3,(H,19,20)",C17H28NO6P,12,((Diisopropoxyphosphoryl)[2-(4-methoxyphenyl)ethyl]amino)acetic acid,COc1ccc(CCN(CC(=O)O)P(=O)(OC(C)C)OC(C)C)cc1,SpectrumID: 1356024; Source: W5-1989-35199-30337; QI: 78; Class: Alpha amino acids and derivatives,
+"InChI=1S/C19H27NO/c1-3-19-11-7-13-20(18(19)14-17(21)10-12-19)15(2)16-8-5-4-6-9-16/h4-6,8-9,15,18H,3,7,10-14H2,1-2H3/t15-,18-,19+/m0/s1",C19H27NO,21,"(+)-(1'S,4aR,8aS)-4a-Ethyl-1-(1'-phenylethyl)octahydroquinolin-7-one",CC[C@]12CCCN([C@@H](C)c3ccccc3)[C@H]1CC(=O)CC2,SpectrumID: 1634636; Source: KD-12-2102-6; QI: 147; Class: Quinolidines; CASRN not real! |RI:2267|,2267.0
+"InChI=1S/C22H36O5/c1-14(23)9-10-18-21(6)12-8-11-20(4,5)19(21)17(26-15(2)24)13-22(18,7)27-16(3)25/h17-19H,8-13H2,1-7H3/t17-,18+,19-,21+,22+/m0/s1",C22H36O5,9,"(+)-(1R,2R,4S,4aS,8aS)-4-(Acetyloxy)-2,5,5,8a-tetramethyl-1-(3-oxobutyl)decahydro-2-naphthlenyl acetate",CC(=O)CC[C@@H]1[C@@]2(C)CCCC(C)(C)[C@@H]2[C@@H](OC(C)=O)C[C@@]1(C)OC(C)=O,SpectrumID: 1624823; Source: KC-57-5677-51; QI: 106; Class: Sesquiterpenoids; CASRN not real! |RI:2534|,2534.0
+"InChI=1S/C13H20O2/c1-9-7-8-13(15-9,12(4,5)6)10(2)11(3)14/h7-9H,2H2,1,3-6H3/t9-,13+/m0/s1",C13H20O2,14,"(+)-(2'S,5'S)-3-(2'-tert-Butyl-5'-methyl-2',5'-dihydrofuran-2'-yl)but-3-en-2-one",C=C(C(C)=O)[C@@]1(C(C)(C)C)C=C[C@H](C)O1,"SpectrumID: 1618842; Source: F4-0-2808-29; QI: 171; Class: Alpha-branched alpha,beta-unsaturated ketones; CASRN not real! |RI:1382|",1382.0
+"InChI=1S/C15H18O3/c1-10-13(8-15-12(16)7-14(10)18-15)17-9-11-5-3-2-4-6-11/h2-6,10,13-15H,7-9H2,1H3/t10-,13+,14-,15-/m1/s1",C15H18O3,11,"(+)-(1S,3R,4S,5R)-3-Benzyloxy-4-methyl-8-oxabicyclo[3.2.1]octan-6-one",C[C@@H]1[C@@H](OCc2ccccc2)C[C@H]2O[C@@H]1CC2=O,SpectrumID: 871257; Source: QC-10-1542-4; QI: 23; Class: Benzylethers; CASRN not real! |RI:1893|,1893.0
b
diff -r 000000000000 -r a0e07a0bc047 test-data/sample_output.inchi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_output.inchi Mon Nov 27 09:04:04 2023 +0000
b
@@ -0,0 +1,11 @@
+InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1
+InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1
+InChI=1S/C11H15NO2S/c1-9-2-4-10(5-3-9)15-7-6-12-8-11(13)14/h2-5,12H,6-8H2,1H3,(H,13,14)
+InChI=1S/C12H16N2O2S/c1-12(7-15)8-17-11(14-12)13-9-3-5-10(16-2)6-4-9/h3-6,15H,7-8H2,1-2H3,(H,13,14)
+InChI=1S/C23H17F3N2O/c24-23(25,26)22-19(16-10-4-1-5-11-16)20(21(29)17-12-6-2-7-13-17)28(27-22)18-14-8-3-9-15-18/h1-15,19-20H/t19-,20+/m1/s1
+InChI=1S/C15H24O/c1-10(2)13-6-4-11(3)14-7-5-12(9-16)8-15(13)14/h4,8,10,13-16H,5-7,9H2,1-3H3/t13-,14+,15-/m0/s1
+InChI=1S/C17H28NO6P/c1-13(2)23-25(21,24-14(3)4)18(12-17(19)20)11-10-15-6-8-16(22-5)9-7-15/h6-9,13-14H,10-12H2,1-5H3,(H,19,20)
+InChI=1S/C19H27NO/c1-3-19-11-7-13-20(18(19)14-17(21)10-12-19)15(2)16-8-5-4-6-9-16/h4-6,8-9,15,18H,3,7,10-14H2,1-2H3/t15-,18-,19+/m0/s1
+InChI=1S/C22H36O5/c1-14(23)9-10-18-21(6)12-8-11-20(4,5)19(21)17(26-15(2)24)13-22(18,7)27-16(3)25/h17-19H,8-13H2,1-7H3/t17-,18+,19-,21+,22+/m0/s1
+InChI=1S/C13H20O2/c1-9-7-8-13(15-9,12(4,5)6)10(2)11(3)14/h7-9H,2H2,1,3-6H3/t9-,13+/m0/s1
+InChI=1S/C15H18O3/c1-10-13(8-15-12(16)7-14(10)18-15)17-9-11-5-3-2-4-6-11/h2-6,10,13-15H,7-9H2,1H3/t10-,13+,14-,15-/m1/s1
b
diff -r 000000000000 -r a0e07a0bc047 test-data/sample_output.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_output.smi Mon Nov 27 09:04:04 2023 +0000
[
@@ -0,0 +1,11 @@
+C[C@@]1(CO)C[C@@H]1C#Cc1ccccc1
+C=C(C)[C@@H]1CC[C@]2(COC(=O)C)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(=O)C)C(C)(C)[C@@H]5CC[C@@]34C)[C@@H]12
+Cc1ccc(SCCNCC(=O)O)cc1
+COc1ccc(NC2=NC(C)(CO)CS2)cc1
+O=C(c1ccccc1)[C@@H]1[C@@H](c2ccccc2)C(=NN1c1ccccc1)C(F)(F)F
+CC1=CC[C@@H](C(C)C)[C@@H]2C=C(CO)CC[C@H]12
+COc1ccc(CCN(CC(=O)O)P(=O)(OC(C)C)OC(C)C)cc1
+CC[C@]12CCCN([C@@H](C)c3ccccc3)[C@H]1CC(=O)CC2
+CC(=O)CC[C@@H]1[C@@]2(C)CCCC(C)(C)[C@@H]2[C@@H](OC(=O)C)C[C@@]1(C)OC(=O)C
+C=C(C(=O)C)[C@@]1(C(C)(C)C)C=C[C@H](C)O1
+C[C@@H]1[C@@H](OCc2ccccc2)C[C@H]2O[C@@H]1CC2=O