Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_online_data commit aed18d7d09e332efe57d00b33c2b8249abefaedb |
added:
get_online_data.py get_online_data.xml get_pdb.xml test-data/1AKI.pdb test-data/gztest.txt test-data/ziptest.txt |
b |
diff -r 000000000000 -r 2538366eb8fb get_online_data.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_online_data.py Wed May 22 07:43:41 2019 -0400 |
[ |
@@ -0,0 +1,73 @@ +import os +import urllib.request +import gzip, tempfile +import zipfile +import subprocess +import shutil +import argparse +from io import BytesIO + +def unescape(cond_text): + # Unescape if input has been escaped + mapped_chars = { '>' :'__gt__', + '<' :'__lt__', + "'" :'__sq__', + '"' :'__dq__', + '[' :'__ob__', + ']' :'__cb__', + '{' :'__oc__', + '}' :'__cc__', + '@' : '__at__', + '\n' : '__cn__', + '\r' : '__cr__', + '\t' : '__tc__' + } + for key, value in mapped_chars.items(): + cond_text = cond_text.replace( value, key ) + return cond_text + +def get_files(options): + urls = unescape(options.url) + with open(options.out, 'wb+') as out: + if options.whitelist: + allowed_extensions = [ext.strip() for ext in unescape(options.whitelist).split('\n')] + else: + allowed_extensions = ['.sdf', '.smi', '.inchi', '.mol'] + + for url in urls.split('\n'): + request = urllib.request.Request(url) + response = urllib.request.urlopen(request) + resp_read = response.read() + if resp_read[:2] == b'\x1f\x8b': # test magic number for gzipped files + response = urllib.request.urlopen(request) + out.write(gzip.decompress(resp_read)) + elif resp_read[:2] == b'PK': # test magic number for zipped files + temp = tempfile.NamedTemporaryFile(delete=False) + temp.close() + zf = zipfile.ZipFile(BytesIO(resp_read), allowZip64=True) + tmpdir = tempfile.mkdtemp() + + for filename in zf.namelist(): + zf.extractall(tmpdir) + + os.remove(temp.name) + molfiles = [] + for root, dirs, files in os.walk(tmpdir): + for filename in files: + if os.path.splitext(filename)[-1].lower() in allowed_extensions or allowed_extensions == []: + mfile = os.path.join(root, filename) + shutil.copyfileobj(open(mfile, 'rb'), out) + shutil.rmtree( tmpdir ) + zf.close() + else: + out.write(resp_read) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="""Download compressed files and extract files of with chosen extensions + """) + parser.add_argument('--url', dest='url', help='URL') + parser.add_argument('--whitelist', dest='whitelist', default=None, help='whitelist') + parser.add_argument('--out', dest='out', help='output') + + options = parser.parse_args() + get_files(options) \ No newline at end of file |
b |
diff -r 000000000000 -r 2538366eb8fb get_online_data.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_online_data.xml Wed May 22 07:43:41 2019 -0400 |
[ |
@@ -0,0 +1,58 @@ +<tool id="ctb_online_data_fetch" name="Online data" version="0.4"> + <description>fetching</description> + <requirements> + <requirement type="package" version="3">python</requirement> + </requirements> + <command> +<![CDATA[ + python '$__tool_directory__/get_online_data.py' + --url '$url_paste' + --out '$output' + --whitelist '$whitelist' +]]> + </command> + <inputs> + <param name="url_paste" type="text" area="true" size="5x55" label="URLs" help="List of URLs (one per line)."/> + <param name="whitelist" type="text" area="true" size="10x20" + label="Whitelist of filename extensions" + help="Specify a list of file extensions which should be extracted (default: sdf, mol, smi, inchi). Each extension should be placed on a new line."/> + </inputs> + <outputs> + <data format="txt" name="output" /> + </outputs> + <tests> + <test> + <param name="url_paste" value="https://github.com/simonbray/test-files/blob/master/get_online_data/1AKI.pdb.gz?raw=true" /> + <param name="whitelist" value=".pdb"/> + <output name="output" file="gztest.txt" /> + </test> + <test> + <param name="url_paste" value="https://github.com/simonbray/test-files/blob/master/get_online_data/sdfs.zip?raw=true" /> + <output name="output" file="ziptest.txt" /> + </test> + </tests> + <help> +<![CDATA[ + +.. class:: infomark + +**What this tool does** + +Fetch data via FTP or HTTP and store them in your history. Supply one or more URLs; all files with the chosen file extensions will be extracted. Caution: all files are concatenated together. + +----- + +.. class:: infomark + +**Input** + +Supported filetypes are: + - gz/gzip + - ZIP (with recursive extracting of specific filetypes) + + +]]> + </help> + <citations> + </citations> +</tool> |
b |
diff -r 000000000000 -r 2538366eb8fb get_pdb.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_pdb.xml Wed May 22 07:43:41 2019 -0400 |
[ |
@@ -0,0 +1,29 @@ +<tool id="get_pdb" name="Get PDB file" version="0.1.0"> + <description>from Protein Data Bank</description> + <requirements> + <requirement type="package" version="1.20.1">wget</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + wget https://files.rcsb.org/download/${pdb_id}.pdb -O '$output' + ]]></command> + <inputs> + <param name="pdb_id" type="text" label="PDB accession code"> + <validator type="regex" message="Invalid accession code">^[0-9][a-zA-Z0-9]{3}$</validator> + </param> + </inputs> + <outputs> + <data name="output" format="pdb" /> + </outputs> + <tests> + <test> + <param name="pdb_id" value="1AKI"/> + <output name="output" file="1AKI.pdb"/> + </test> + </tests> + <help><![CDATA[ + Download a protein structure in PDB format from the Protein Data Bank using its four-letter accession code. + ]]></help> + <citations> + <citation type="doi">10.1093/nar/28.1.235</citation> + </citations> +</tool> \ No newline at end of file |
b |
diff -r 000000000000 -r 2538366eb8fb test-data/1AKI.pdb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1AKI.pdb Wed May 22 07:43:41 2019 -0400 |
b |
b'@@ -0,0 +1,1436 @@\n+HEADER HYDROLASE 19-MAY-97 1AKI \n+TITLE THE STRUCTURE OF THE ORTHORHOMBIC FORM OF HEN EGG-WHITE \n+TITLE 2 LYSOZYME AT 1.5 ANGSTROMS RESOLUTION \n+COMPND MOL_ID: 1; \n+COMPND 2 MOLECULE: LYSOZYME; \n+COMPND 3 CHAIN: A; \n+COMPND 4 EC: 3.2.1.17 \n+SOURCE MOL_ID: 1; \n+SOURCE 2 ORGANISM_SCIENTIFIC: GALLUS GALLUS; \n+SOURCE 3 ORGANISM_COMMON: CHICKEN; \n+SOURCE 4 ORGANISM_TAXID: 9031; \n+SOURCE 5 CELL: EGG \n+KEYWDS HYDROLASE, GLYCOSIDASE \n+EXPDTA X-RAY DIFFRACTION \n+AUTHOR D.CARTER,J.HE,J.R.RUBLE,B.WRIGHT \n+REVDAT 2 24-FEB-09 1AKI 1 VERSN \n+REVDAT 1 19-NOV-97 1AKI 0 \n+JRNL AUTH P.J.ARTYMIUK,C.C.F.BLAKE,D.W.RICE,K.S.WILSON \n+JRNL TITL THE STRUCTURES OF THE MONOCLINIC AND ORTHORHOMBIC \n+JRNL TITL 2 FORMS OF HEN EGG-WHITE LYSOZYME AT 6 ANGSTROMS \n+JRNL TITL 3 RESOLUTION \n+JRNL REF ACTA CRYSTALLOGR.,SECT.B V. 38 778 1982 \n+JRNL REFN ISSN 0108-7681 \n+REMARK 1 \n+REMARK 2 \n+REMARK 2 RESOLUTION. 1.50 ANGSTROMS. \n+REMARK 3 \n+REMARK 3 REFINEMENT. \n+REMARK 3 PROGRAM : GPRLSA, X-PLOR \n+REMARK 3 AUTHORS : FUREY \n+REMARK 3 \n+REMARK 3 DATA USED IN REFINEMENT. \n+REMARK 3 RESOLUTION RANGE HIGH (ANGSTROMS) : 1.50 \n+REMARK 3 RESOLUTION RANGE LOW (ANGSTROMS) : 10.00 \n+REMARK 3 DATA CUTOFF (SIGMA(F)) : 1.000 \n+REMARK 3 COMPLETENESS FOR RANGE (%) : 91.1 \n+REMARK 3 NUMBER OF REFLECTIONS : 16327 \n+REMARK 3 \n+REMARK 3 FIT TO DATA USED IN REFINEMENT. \n+REMARK 3 CROSS-VALIDATION METHOD : NULL \n+REMARK 3 FREE R VALUE TEST SET SELECTION : NULL \n+REMARK 3 R VALUE (WORKING + TEST SET) : NULL \n+REMARK 3 R VALUE (WORKING SET) : 0.212 \n+REMARK 3 FREE R VALUE : NULL \n+REMARK 3 FREE R VALUE TEST SET SIZE (%) : NULL \n+REMARK 3 FREE R VALUE TEST SET COUNT : NULL \n+REMARK 3 \n+REMARK 3 FIT/AGREEMENT OF MODEL WITH ALL DATA. \n+REMARK 3 R VALUE (WORKING + TEST SET,'..b'OH A 169 22.984 29.224 13.124 0.75 22.56 O \n+HETATM 1043 O HOH A 170 30.778 7.794 -3.514 0.65 21.58 O \n+HETATM 1044 O HOH A 171 42.965 14.657 4.991 0.63 23.91 O \n+HETATM 1045 O HOH A 172 36.927 17.948 -13.093 0.62 23.36 O \n+HETATM 1046 O HOH A 173 35.412 25.852 -11.575 0.58 23.42 O \n+HETATM 1047 O HOH A 174 37.428 32.540 -5.787 0.62 21.98 O \n+HETATM 1048 O HOH A 175 37.317 8.592 7.456 0.64 22.92 O \n+HETATM 1049 O HOH A 176 9.314 36.705 -11.546 0.69 23.77 O \n+HETATM 1050 O HOH A 177 39.972 23.760 -2.655 0.86 18.96 O \n+HETATM 1051 O HOH A 178 22.128 30.274 -0.543 0.76 18.78 O \n+HETATM 1052 O HOH A 179 22.244 15.813 10.000 0.68 19.66 O \n+HETATM 1053 O HOH A 180 40.729 9.223 0.292 0.64 20.15 O \n+HETATM 1054 O HOH A 181 12.500 15.267 4.097 0.56 20.12 O \n+HETATM 1055 O HOH A 182 20.372 28.618 -2.353 0.64 20.17 O \n+HETATM 1056 O HOH A 183 22.793 15.462 -6.673 0.63 20.60 O \n+HETATM 1057 O HOH A 184 23.138 31.809 15.121 0.55 20.90 O \n+HETATM 1058 O HOH A 185 22.671 38.691 8.245 0.48 21.16 O \n+HETATM 1059 O HOH A 186 33.966 33.112 6.837 0.59 19.45 O \n+HETATM 1060 O HOH A 187 19.572 25.423 -1.420 0.53 19.94 O \n+HETATM 1061 O HOH A 188 14.790 15.672 7.259 0.52 21.22 O \n+HETATM 1062 O HOH A 189 19.112 28.022 -14.647 0.49 19.83 O \n+HETATM 1063 O HOH A 190 17.302 39.059 -12.453 0.52 20.14 O \n+HETATM 1064 O HOH A 191 16.198 14.502 5.577 0.46 20.78 O \n+HETATM 1065 O HOH A 192 17.345 46.346 -7.080 0.50 18.13 O \n+HETATM 1066 O HOH A 193 14.992 31.300 -4.242 0.46 17.90 O \n+HETATM 1067 O HOH A 194 28.196 44.775 -3.148 0.44 18.15 O \n+HETATM 1068 O HOH A 195 29.479 13.863 -9.107 0.44 18.30 O \n+HETATM 1069 O HOH A 196 23.613 44.811 2.608 0.45 17.66 O \n+HETATM 1070 O HOH A 197 40.572 22.184 -6.358 0.42 18.06 O \n+HETATM 1071 O HOH A 198 12.475 31.860 -6.226 0.47 17.85 O \n+HETATM 1072 O HOH A 199 16.684 13.594 -5.832 0.31 18.51 O \n+HETATM 1073 O HOH A 200 27.534 38.059 -12.862 0.48 18.19 O \n+HETATM 1074 O HOH A 201 25.892 35.973 11.563 0.46 18.15 O \n+HETATM 1075 O HOH A 202 24.790 25.182 16.063 0.46 17.64 O \n+HETATM 1076 O HOH A 203 12.580 21.214 5.006 0.51 17.97 O \n+HETATM 1077 O HOH A 204 19.687 23.750 -4.851 0.37 18.08 O \n+HETATM 1078 O HOH A 205 27.098 35.956 -12.358 0.39 18.71 O \n+HETATM 1079 O HOH A 206 37.255 9.634 10.002 0.46 18.39 O \n+HETATM 1080 O HOH A 207 43.755 23.843 8.038 0.38 17.96 O \n+CONECT 48 981 \n+CONECT 238 889 \n+CONECT 513 630 \n+CONECT 601 724 \n+CONECT 630 513 \n+CONECT 724 601 \n+CONECT 889 238 \n+CONECT 981 48 \n+MASTER 290 0 0 8 2 0 0 6 1079 1 8 10 \n+END \n' |
b |
diff -r 000000000000 -r 2538366eb8fb test-data/gztest.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gztest.txt Wed May 22 07:43:41 2019 -0400 |
b |
b'@@ -0,0 +1,1436 @@\n+HEADER HYDROLASE 19-MAY-97 1AKI \n+TITLE THE STRUCTURE OF THE ORTHORHOMBIC FORM OF HEN EGG-WHITE \n+TITLE 2 LYSOZYME AT 1.5 ANGSTROMS RESOLUTION \n+COMPND MOL_ID: 1; \n+COMPND 2 MOLECULE: LYSOZYME; \n+COMPND 3 CHAIN: A; \n+COMPND 4 EC: 3.2.1.17 \n+SOURCE MOL_ID: 1; \n+SOURCE 2 ORGANISM_SCIENTIFIC: GALLUS GALLUS; \n+SOURCE 3 ORGANISM_COMMON: CHICKEN; \n+SOURCE 4 ORGANISM_TAXID: 9031; \n+SOURCE 5 CELL: EGG \n+KEYWDS HYDROLASE, GLYCOSIDASE \n+EXPDTA X-RAY DIFFRACTION \n+AUTHOR D.CARTER,J.HE,J.R.RUBLE,B.WRIGHT \n+REVDAT 2 24-FEB-09 1AKI 1 VERSN \n+REVDAT 1 19-NOV-97 1AKI 0 \n+JRNL AUTH P.J.ARTYMIUK,C.C.F.BLAKE,D.W.RICE,K.S.WILSON \n+JRNL TITL THE STRUCTURES OF THE MONOCLINIC AND ORTHORHOMBIC \n+JRNL TITL 2 FORMS OF HEN EGG-WHITE LYSOZYME AT 6 ANGSTROMS \n+JRNL TITL 3 RESOLUTION \n+JRNL REF ACTA CRYSTALLOGR.,SECT.B V. 38 778 1982 \n+JRNL REFN ISSN 0108-7681 \n+REMARK 1 \n+REMARK 2 \n+REMARK 2 RESOLUTION. 1.50 ANGSTROMS. \n+REMARK 3 \n+REMARK 3 REFINEMENT. \n+REMARK 3 PROGRAM : GPRLSA, X-PLOR \n+REMARK 3 AUTHORS : FUREY \n+REMARK 3 \n+REMARK 3 DATA USED IN REFINEMENT. \n+REMARK 3 RESOLUTION RANGE HIGH (ANGSTROMS) : 1.50 \n+REMARK 3 RESOLUTION RANGE LOW (ANGSTROMS) : 10.00 \n+REMARK 3 DATA CUTOFF (SIGMA(F)) : 1.000 \n+REMARK 3 COMPLETENESS FOR RANGE (%) : 91.1 \n+REMARK 3 NUMBER OF REFLECTIONS : 16327 \n+REMARK 3 \n+REMARK 3 FIT TO DATA USED IN REFINEMENT. \n+REMARK 3 CROSS-VALIDATION METHOD : NULL \n+REMARK 3 FREE R VALUE TEST SET SELECTION : NULL \n+REMARK 3 R VALUE (WORKING + TEST SET) : NULL \n+REMARK 3 R VALUE (WORKING SET) : 0.212 \n+REMARK 3 FREE R VALUE : NULL \n+REMARK 3 FREE R VALUE TEST SET SIZE (%) : NULL \n+REMARK 3 FREE R VALUE TEST SET COUNT : NULL \n+REMARK 3 \n+REMARK 3 FIT/AGREEMENT OF MODEL WITH ALL DATA. \n+REMARK 3 R VALUE (WORKING + TEST SET,'..b'OH A 169 22.984 29.224 13.124 0.75 22.56 O \n+HETATM 1043 O HOH A 170 30.778 7.794 -3.514 0.65 21.58 O \n+HETATM 1044 O HOH A 171 42.965 14.657 4.991 0.63 23.91 O \n+HETATM 1045 O HOH A 172 36.927 17.948 -13.093 0.62 23.36 O \n+HETATM 1046 O HOH A 173 35.412 25.852 -11.575 0.58 23.42 O \n+HETATM 1047 O HOH A 174 37.428 32.540 -5.787 0.62 21.98 O \n+HETATM 1048 O HOH A 175 37.317 8.592 7.456 0.64 22.92 O \n+HETATM 1049 O HOH A 176 9.314 36.705 -11.546 0.69 23.77 O \n+HETATM 1050 O HOH A 177 39.972 23.760 -2.655 0.86 18.96 O \n+HETATM 1051 O HOH A 178 22.128 30.274 -0.543 0.76 18.78 O \n+HETATM 1052 O HOH A 179 22.244 15.813 10.000 0.68 19.66 O \n+HETATM 1053 O HOH A 180 40.729 9.223 0.292 0.64 20.15 O \n+HETATM 1054 O HOH A 181 12.500 15.267 4.097 0.56 20.12 O \n+HETATM 1055 O HOH A 182 20.372 28.618 -2.353 0.64 20.17 O \n+HETATM 1056 O HOH A 183 22.793 15.462 -6.673 0.63 20.60 O \n+HETATM 1057 O HOH A 184 23.138 31.809 15.121 0.55 20.90 O \n+HETATM 1058 O HOH A 185 22.671 38.691 8.245 0.48 21.16 O \n+HETATM 1059 O HOH A 186 33.966 33.112 6.837 0.59 19.45 O \n+HETATM 1060 O HOH A 187 19.572 25.423 -1.420 0.53 19.94 O \n+HETATM 1061 O HOH A 188 14.790 15.672 7.259 0.52 21.22 O \n+HETATM 1062 O HOH A 189 19.112 28.022 -14.647 0.49 19.83 O \n+HETATM 1063 O HOH A 190 17.302 39.059 -12.453 0.52 20.14 O \n+HETATM 1064 O HOH A 191 16.198 14.502 5.577 0.46 20.78 O \n+HETATM 1065 O HOH A 192 17.345 46.346 -7.080 0.50 18.13 O \n+HETATM 1066 O HOH A 193 14.992 31.300 -4.242 0.46 17.90 O \n+HETATM 1067 O HOH A 194 28.196 44.775 -3.148 0.44 18.15 O \n+HETATM 1068 O HOH A 195 29.479 13.863 -9.107 0.44 18.30 O \n+HETATM 1069 O HOH A 196 23.613 44.811 2.608 0.45 17.66 O \n+HETATM 1070 O HOH A 197 40.572 22.184 -6.358 0.42 18.06 O \n+HETATM 1071 O HOH A 198 12.475 31.860 -6.226 0.47 17.85 O \n+HETATM 1072 O HOH A 199 16.684 13.594 -5.832 0.31 18.51 O \n+HETATM 1073 O HOH A 200 27.534 38.059 -12.862 0.48 18.19 O \n+HETATM 1074 O HOH A 201 25.892 35.973 11.563 0.46 18.15 O \n+HETATM 1075 O HOH A 202 24.790 25.182 16.063 0.46 17.64 O \n+HETATM 1076 O HOH A 203 12.580 21.214 5.006 0.51 17.97 O \n+HETATM 1077 O HOH A 204 19.687 23.750 -4.851 0.37 18.08 O \n+HETATM 1078 O HOH A 205 27.098 35.956 -12.358 0.39 18.71 O \n+HETATM 1079 O HOH A 206 37.255 9.634 10.002 0.46 18.39 O \n+HETATM 1080 O HOH A 207 43.755 23.843 8.038 0.38 17.96 O \n+CONECT 48 981 \n+CONECT 238 889 \n+CONECT 513 630 \n+CONECT 601 724 \n+CONECT 630 513 \n+CONECT 724 601 \n+CONECT 889 238 \n+CONECT 981 48 \n+MASTER 290 0 0 8 2 0 0 6 1079 1 8 10 \n+END \n' |
b |
diff -r 000000000000 -r 2538366eb8fb test-data/ziptest.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ziptest.txt Wed May 22 07:43:41 2019 -0400 |
b |
b'@@ -0,0 +1,646 @@\n+3037\n+ -OEChem-08231108593D\n+\n+ 27 28 0 0 0 0 0 0 0999 V2000\n+ -4.8550 1.3401 0.2120 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n+ 4.8529 -1.3406 0.2121 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n+ -0.1809 -2.1668 -0.3789 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 0.1788 2.1664 -0.3787 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ -0.0011 -0.0002 1.4744 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ -1.2222 -0.2738 0.6597 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 1.2377 0.2772 0.6480 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ -1.2586 -1.3462 -0.2316 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 1.2565 1.3457 -0.2314 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ -2.3343 0.5568 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2.3322 -0.5574 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ -2.4069 -1.5879 -0.9855 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2.4048 1.5875 -0.9852 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ -3.4827 0.3152 0.0433 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 3.4807 -0.3156 0.0435 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ -3.5190 -0.7571 -0.8481 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 3.5170 0.7568 -0.8478 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ -0.1548 0.8649 2.1342 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 0.1601 -0.8435 2.1593 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ -2.3089 1.3938 1.4913 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2.3053 -1.3909 1.4943 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ -2.4415 -2.4213 -1.6818 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2.4469 2.4191 -1.6835 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ -4.4070 -0.9574 -1.4422 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 4.4050 0.9570 -1.4418 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 0.2961 -2.2262 0.4641 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 0.3872 2.8487 -1.0397 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 1 14 1 0 0 0 0\n+ 2 15 1 0 0 0 0\n+ 3 8 1 0 0 0 0\n+ 3 26 1 0 0 0 0\n+ 4 9 1 0 0 0 0\n+ 4 27 1 0 0 0 0\n+ 5 6 1 0 0 0 0\n+ 5 7 1 0 0 0 0\n+ 5 18 1 0 0 0 0\n+ 5 19 1 0 0 0 0\n+ 6 8 2 0 0 0 0\n+ 6 10 1 0 0 0 0\n+ 7 9 2 0 0 0 0\n+ 7 11 1 0 0 0 0\n+ 8 12 1 0 0 0 0\n+ 9 13 1 0 0 0 0\n+ 10 14 2 0 0 0 0\n+ 10 20 1 0 0 0 0\n+ 11 15 2 0 0 0 0\n+ 11 21 1 0 0 0 0\n+ 12 16 2 0 0 0 0\n+ 12 22 1 0 0 0 0\n+ 13 17 2 0 0 0 0\n+ 13 23 1 0 0 0 0\n+ 14 16 1 0 0 0 0\n+ 15 17 1 0 0 0 0\n+ 16 24 1 0 0 0 0\n+ 17 25 1 0 0 0 0\n+M END\n+> <PUBCHEM_COMPOUND_CID>\n+3037\n+\n+> <PUBCHEM_CONFORMER_RMSD>\n+0.6\n+\n+> <PUBCHEM_CONFORMER_DIVERSEORDER>\n+8\n+10\n+12\n+1\n+7\n+5\n+11\n+3\n+6\n+9\n+4\n+2\n+\n+> <PUBCHEM_MMFF94_PARTIAL_CHARGES>\n+25\n+1 -0.18\n+10 -0.15\n+11 -0.15\n+12 -0.15\n+13 -0.15\n+14 0.18\n+15 0.18\n+16 -0.15\n+17 -0.15\n+2 -0.18\n+20 0.15\n+21 0.15\n+22 0.15\n+23 0.15\n+24 0.15\n+25 0.15\n+26 0.45\n+27 0.45\n+3 -0.53\n+4 -0.53\n+5 0.29\n+6 -0.14\n+7 -0.14\n+8 0.08\n+9 0.08\n+\n+> <PUBCHEM_EFFECTIVE_ROTOR_COUNT>\n+2\n+\n+> <PUBCHEM_PHARMACOPHORE_FEATURES>\n+4\n+1 3 donor\n+1 4 donor\n+6 6 8 10 12 14 16 rings\n+6 7 9 11 13 15 17 rings\n+\n+> <PUBCHEM_HEAVY_ATOM_COUNT>\n+17\n+\n+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>\n+0\n+\n+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>\n+0\n+\n+> <PUBCHEM_BOND_DEF_STEREO_COUNT>\n+0\n+\n+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>\n+0\n+\n+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>\n+0\n+\n+> <PUBCHEM_COMPONENT_COUNT>\n+1\n+\n+> <PUBCHEM_CACTVS_TAUTO_COUNT>\n+5\n+\n+> <PUBCHEM_CONFORMER_ID>\n+00000BDD00000008\n+\n+> <PUBCHEM_MMFF94_ENERGY>\n+44.6858\n+\n+> <PUBCHEM_FEATURE_SELFOVERLAP>\n+20.297\n+\n+> <PUBCHEM_SHAPE_FINGERPRINT>\n+10062212 137 18261117369936506423\n+104564 63 17986963035811110412\n+11458722 120 18339359768245870841\n+11471102 22 5472872458301843344\n+11578080 2 18190204380446433792\n+116883 192 18265608969609498196\n+12236239 1 18410856576819659107\n+12592029 89 18338223951597366363\n+13549 16 18410575084668353682\n+13693'..b'0527288590\n+\n+> <PUBCHEM_SHAPE_MULTIPOLES>\n+378.03\n+7.01\n+2.75\n+1.77\n+0.78\n+1.58\n+0.3\n+0.41\n+1.94\n+-1.08\n+1.9\n+-8.69\n+11.04\n+2.58\n+\n+> <PUBCHEM_SHAPE_SELFOVERLAP>\n+790.335\n+\n+> <PUBCHEM_SHAPE_VOLUME>\n+214.7\n+\n+> <PUBCHEM_COORDINATE_TYPE>\n+2\n+5\n+255\n+\n+$$$$\n+\n+2244\n+ -OEChem-05151212332D\n+\n+ 21 21 0 0 0 0 0 0 0999 V2000\n+ 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 1 5 1 0 0 0 0\n+ 1 12 1 0 0 0 0\n+ 2 11 1 0 0 0 0\n+ 2 21 1 0 0 0 0\n+ 3 11 2 0 0 0 0\n+ 4 12 2 0 0 0 0\n+ 5 6 1 0 0 0 0\n+ 5 7 2 0 0 0 0\n+ 6 8 2 0 0 0 0\n+ 6 11 1 0 0 0 0\n+ 7 9 1 0 0 0 0\n+ 7 14 1 0 0 0 0\n+ 8 10 1 0 0 0 0\n+ 8 15 1 0 0 0 0\n+ 9 10 2 0 0 0 0\n+ 9 16 1 0 0 0 0\n+ 10 17 1 0 0 0 0\n+ 12 13 1 0 0 0 0\n+ 13 18 1 0 0 0 0\n+ 13 19 1 0 0 0 0\n+ 13 20 1 0 0 0 0\n+M END\n+> <PUBCHEM_COMPOUND_CID>\n+2244\n+\n+> <PUBCHEM_COMPOUND_CANONICALIZED>\n+1\n+\n+> <PUBCHEM_CACTVS_COMPLEXITY>\n+212\n+\n+> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>\n+4\n+\n+> <PUBCHEM_CACTVS_HBOND_DONOR>\n+1\n+\n+> <PUBCHEM_CACTVS_ROTATABLE_BOND>\n+3\n+\n+> <PUBCHEM_CACTVS_SUBSKEYS>\n+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==\n+\n+> <PUBCHEM_IUPAC_OPENEYE_NAME>\n+2-acetoxybenzoic acid\n+\n+> <PUBCHEM_IUPAC_CAS_NAME>\n+2-acetyloxybenzoic acid\n+\n+> <PUBCHEM_IUPAC_NAME>\n+2-acetyloxybenzoic acid\n+\n+> <PUBCHEM_IUPAC_SYSTEMATIC_NAME>\n+2-acetyloxybenzoic acid\n+\n+> <PUBCHEM_IUPAC_TRADITIONAL_NAME>\n+2-acetoxybenzoic acid\n+\n+> <PUBCHEM_IUPAC_INCHI>\n+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)\n+\n+> <PUBCHEM_IUPAC_INCHIKEY>\n+BSYNRYMUTXBXSQ-UHFFFAOYSA-N\n+\n+> <PUBCHEM_XLOGP3>\n+1.2\n+\n+> <PUBCHEM_EXACT_MASS>\n+180.042259\n+\n+> <PUBCHEM_MOLECULAR_FORMULA>\n+C9H8O4\n+\n+> <PUBCHEM_MOLECULAR_WEIGHT>\n+180.15742\n+\n+> <PUBCHEM_OPENEYE_CAN_SMILES>\n+CC(=O)OC1=CC=CC=C1C(=O)O\n+\n+> <PUBCHEM_OPENEYE_ISO_SMILES>\n+CC(=O)OC1=CC=CC=C1C(=O)O\n+\n+> <PUBCHEM_CACTVS_TPSA>\n+63.6\n+\n+> <PUBCHEM_MONOISOTOPIC_WEIGHT>\n+180.042259\n+\n+> <PUBCHEM_TOTAL_CHARGE>\n+0\n+\n+> <PUBCHEM_HEAVY_ATOM_COUNT>\n+13\n+\n+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>\n+0\n+\n+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>\n+0\n+\n+> <PUBCHEM_BOND_DEF_STEREO_COUNT>\n+0\n+\n+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>\n+0\n+\n+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>\n+0\n+\n+> <PUBCHEM_COMPONENT_COUNT>\n+1\n+\n+> <PUBCHEM_CACTVS_TAUTO_COUNT>\n+1\n+\n+> <PUBCHEM_COORDINATE_TYPE>\n+1\n+5\n+255\n+\n+> <PUBCHEM_BONDANNOTATIONS>\n+5 6 8\n+5 7 8\n+6 8 8\n+7 9 8\n+8 10 8\n+9 10 8\n+\n+$$$$\n+\n' |