# HG changeset patch # User bgruening # Date 1557250370 14400 # Node ID 0d88631bb7de19e113dea785899d209c45880f33 # Parent e3a7d6cc87af95c2d62359ddb50ec26042304bf0 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit ed9b6859de648aa5f7cde483732f5df20aaff90e diff -r e3a7d6cc87af -r 0d88631bb7de butina_clustering.xml --- a/butina_clustering.xml Fri Mar 23 03:26:33 2018 -0400 +++ b/butina_clustering.xml Tue May 07 13:32:50 2019 -0400 @@ -1,7 +1,7 @@ - + of molecular fingerprints - chemfp + chemfp openbabel @@ -14,8 +14,8 @@ ]]> - - + + @@ -35,7 +35,7 @@ **What this tool does** -Unsupervised non-hierarchical clustering method based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project. +Unsupervised non-hierarchical clustering of molecular fingerprints, based on the Taylor-Butina algorithm, which guarantees that every cluster contains molecules which are within a distance cutoff of the central molecule. This tool is based on the chemfp_ project. .. _chemfp: http://chemfp.com/ @@ -89,5 +89,6 @@ 10.1186/1758-2946-5-S1-P36 + 10.1021/ci9803381 diff -r e3a7d6cc87af -r 0d88631bb7de mol2fps.xml --- a/mol2fps.xml Fri Mar 23 03:26:33 2018 -0400 +++ b/mol2fps.xml Tue May 07 13:32:50 2019 -0400 @@ -1,21 +1,20 @@ - - with different fingerprint types + + conversion to several different fingerprint formats - chemfp - rdkit + chemfp + rdkit openbabel - + @@ -68,51 +67,51 @@ - + - + - + - + + label="Include information about the number of hydrogens on each atom" /> - + - + - + - + + label="Include information about the number of hydrogens on each atom" /> + label="Include information about the number of hydrogens on each atom" /> + label="Include information about the number of hydrogens on each atom" /> - + - + - + @@ -128,47 +127,73 @@ - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + + + + + + + + + + + + of molecular fingerprints - chemfp - python - matplotlib - scipy + chemfp + matplotlib + scipy openbabel @@ -16,43 +15,44 @@ --cluster '$image' #end if #if str($output_files) in ['both', 'matrix']: - --smatrix '$smilarity_matrix' + --smatrix '$similarity_matrix' #end if --oformat '$oformat' ]]> - - + + - + - + - + output_files == "both" or output_files == "image" - + output_files == "both" or output_files == "matrix" - + - - + + + @@ -62,8 +62,7 @@ **What this tool does** -Generating hierarchical clusters and visualizing clusters with dendrograms. -For the clustering and the fingerprint handling the chemfp_ project is used. +Based on a set of fingerprints, generates a square self-similarity (NxN) matrix, as well as a dendrogram visualizing the clusters derived from it using hierarchical clustering. For the clustering and the fingerprint handling the chemfp_ project is used. .. _chemfp: http://chemfp.com/ @@ -73,7 +72,7 @@ **Hint** -The plotting of the cluster image is sensible only with a small dataset. +The plotting of the dendrogram is sensible only with a small dataset - if more than around 20 fingerprints are used the plot will be unreadable. ----- diff -r e3a7d6cc87af -r 0d88631bb7de sdf2fps.xml --- a/sdf2fps.xml Fri Mar 23 03:26:33 2018 -0400 +++ b/sdf2fps.xml Tue May 07 13:32:50 2019 -0400 @@ -1,8 +1,8 @@ - - extract fingerprints from sdf files metadata + + - extract fingerprints from sdf file metadata - chemfp + chemfp openbabel @@ -11,7 +11,7 @@ ]]> - + @@ -37,7 +37,7 @@ **Input** -`SD-Format`_ +`SD-Format`_, storing the atom types, together with the Cartesian coordinates. .. _`SD-Format`: http://en.wikipedia.org/wiki/Chemical_table_file @@ -93,6 +93,8 @@ **Output** +After the first few lines, starting with a hash symbol, which contain generic information, the fingerprints are listed as hexadecimal strings. + * Example:: #FPS1 diff -r e3a7d6cc87af -r 0d88631bb7de test-data/CID_2244_FP2.fps --- a/test-data/CID_2244_FP2.fps Fri Mar 23 03:26:33 2018 -0400 +++ b/test-data/CID_2244_FP2.fps Tue May 07 13:32:50 2019 -0400 @@ -1,7 +1,7 @@ #FPS1 #num_bits=1021 #type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_3.dat -#date=2017-05-19T13:52:59 +#software=OpenBabel/2.4.1 chemfp/1.5 +#source=CID_2244.sdf +#date=2019-05-03T12:39:13 00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 diff -r e3a7d6cc87af -r 0d88631bb7de test-data/CID_2244_FP3.fps --- a/test-data/CID_2244_FP3.fps Fri Mar 23 03:26:33 2018 -0400 +++ b/test-data/CID_2244_FP3.fps Tue May 07 13:32:50 2019 -0400 @@ -1,7 +1,7 @@ #FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_7.dat -#date=2017-05-19T13:53:45 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 +#num_bits=55 +#type=OpenBabel-FP3/1 +#software=OpenBabel/2.4.1 chemfp/1.5 +#source=CID_2244.sdf +#date=2019-05-03T12:39:21 +0400000c50b007 2244 diff -r e3a7d6cc87af -r 0d88631bb7de test-data/CID_2244_FP4.fps --- a/test-data/CID_2244_FP4.fps Fri Mar 23 03:26:33 2018 -0400 +++ b/test-data/CID_2244_FP4.fps Tue May 07 13:32:50 2019 -0400 @@ -1,7 +1,7 @@ #FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_11.dat -#date=2017-05-19T13:54:39 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 +#num_bits=307 +#type=OpenBabel-FP4/1 +#software=OpenBabel/2.4.1 chemfp/1.5 +#source=CID_2244.sdf +#date=2019-05-03T12:39:27 +010000000000000000009800000000004001000000000000000000000000000000000240402801 2244 diff -r e3a7d6cc87af -r 0d88631bb7de test-data/CID_2244_MACCS.fps --- a/test-data/CID_2244_MACCS.fps Fri Mar 23 03:26:33 2018 -0400 +++ b/test-data/CID_2244_MACCS.fps Tue May 07 13:32:50 2019 -0400 @@ -1,7 +1,7 @@ #FPS1 -#num_bits=1021 -#type=OpenBabel-FP2/1 -#software=OpenBabel/2.4.1 -#source=/tmp/tmptaAke4/files/000/dataset_15.dat -#date=2017-05-19T13:55:30 -00000010004000c00000020000030000010000000008000000000080000000000400400000000010200a020800000000000042000000000000800002000002000c200800010001010000000002808002208000400000000040080000000100000008000000000002004002000010000000020100080100200808000000000004 2244 +#num_bits=166 +#type=OpenBabel-MACCS/2 +#software=OpenBabel/2.4.1 chemfp/1.5 +#source=CID_2244.sdf +#date=2019-05-03T12:39:49 +0000000000000000000000010000016480cca2d21e 2244 diff -r e3a7d6cc87af -r 0d88631bb7de test-data/CID_2244_torsions.fps --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_2244_torsions.fps Tue May 07 13:32:50 2019 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=2048 +#type=RDKit-Torsion/2 fpSize=2048 targetSize=4 +#software=RDKit/2018.09.3 chemfp/1.5 +#source=test-data/CID_2244.smi +#date=2019-04-25T15:11:02 +00100010000003000000000000000000000000000000000000000000000000000000000000000000000000000000000300000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000100000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000010000001000000000000110000000000000000000000000000000000001001000000000000000000001000000000000000000000 2244 diff -r e3a7d6cc87af -r 0d88631bb7de test-data/NxN_Clustering_on_q.svg --- a/test-data/NxN_Clustering_on_q.svg Fri Mar 23 03:26:33 2018 -0400 +++ b/test-data/NxN_Clustering_on_q.svg Tue May 07 13:32:50 2019 -0400 @@ -2,7 +2,7 @@ - + - + @@ -209,7 +209,7 @@ z " id="DejaVuSans-32"/> - + @@ -288,7 +288,7 @@ z " id="DejaVuSans-33"/> - + @@ -303,7 +303,7 @@ - + @@ -337,7 +337,7 @@ z " id="DejaVuSans-34"/> - + @@ -352,7 +352,7 @@ - + @@ -367,7 +367,7 @@ - + @@ -382,7 +382,7 @@ - + @@ -396,7 +396,7 @@ - + @@ -410,7 +410,7 @@ - + @@ -425,7 +425,7 @@ - + @@ -440,7 +440,7 @@ - + @@ -454,7 +454,7 @@ - + @@ -472,10 +472,10 @@ +" id="mccd7e1f0e4" style="stroke:#000000;stroke-width:0.8;"/> - + @@ -499,7 +499,7 @@ - + @@ -515,7 +515,7 @@ - + @@ -531,7 +531,7 @@ - + @@ -547,7 +547,7 @@ - + @@ -563,7 +563,7 @@ - + @@ -579,7 +579,7 @@ - + @@ -595,7 +595,7 @@ - + @@ -610,68 +610,68 @@ - - - - - - - - - - - - + diff -r e3a7d6cc87af -r 0d88631bb7de test-data/NxN_Clustering_on_q.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/NxN_Clustering_on_q.txt Tue May 07 13:32:50 2019 -0400 @@ -0,0 +1,13 @@ +0.000000000000000000e+00 6.578947368421017572e-03 4.487179487179482340e-02 1.948051948051943150e-02 1.948051948051943150e-02 1.948051948051943150e-02 1.948051948051943150e-02 2.580645161290318068e-02 3.205128205128204844e-02 3.205128205128204844e-02 3.821656050955413164e-02 4.430379746835444443e-02 4.430379746835444443e-02 +6.578947368421017572e-03 0.000000000000000000e+00 5.095541401273884219e-02 2.580645161290318068e-02 1.298701298701299134e-02 1.298701298701299134e-02 1.298701298701299134e-02 3.205128205128204844e-02 3.821656050955413164e-02 2.564102564102566095e-02 3.184713375796177637e-02 3.797468354430377779e-02 3.797468354430377779e-02 +4.487179487179482340e-02 5.095541401273884219e-02 0.000000000000000000e+00 3.821656050955413164e-02 5.063291139240511107e-02 5.063291139240511107e-02 5.063291139240511107e-02 4.430379746835444443e-02 2.547770700636942109e-02 6.250000000000000000e-02 5.625000000000002220e-02 6.211180124223603105e-02 6.211180124223603105e-02 +1.948051948051943150e-02 2.580645161290318068e-02 3.821656050955413164e-02 0.000000000000000000e+00 2.564102564102566095e-02 2.564102564102566095e-02 2.564102564102566095e-02 6.451612903225822926e-03 2.547770700636942109e-02 3.797468354430377779e-02 1.910828025477706582e-02 5.000000000000004441e-02 2.531645569620255554e-02 +1.948051948051943150e-02 1.298701298701299134e-02 5.063291139240511107e-02 2.564102564102566095e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.184713375796177637e-02 3.797468354430377779e-02 2.547770700636942109e-02 3.164556962025311115e-02 3.773584905660376521e-02 3.773584905660376521e-02 +1.948051948051943150e-02 1.298701298701299134e-02 5.063291139240511107e-02 2.564102564102566095e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.184713375796177637e-02 3.797468354430377779e-02 2.547770700636942109e-02 3.164556962025311115e-02 3.773584905660376521e-02 3.773584905660376521e-02 +1.948051948051943150e-02 1.298701298701299134e-02 5.063291139240511107e-02 2.564102564102566095e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.184713375796177637e-02 3.797468354430377779e-02 2.547770700636942109e-02 3.164556962025311115e-02 3.773584905660376521e-02 3.773584905660376521e-02 +2.580645161290318068e-02 3.205128205128204844e-02 4.430379746835444443e-02 6.451612903225822926e-03 3.184713375796177637e-02 3.184713375796177637e-02 3.184713375796177637e-02 0.000000000000000000e+00 3.164556962025311115e-02 4.402515723270439274e-02 1.273885350318471055e-02 5.590062111801241684e-02 1.898734177215188890e-02 +3.205128205128204844e-02 3.821656050955413164e-02 2.547770700636942109e-02 2.547770700636942109e-02 3.797468354430377779e-02 3.797468354430377779e-02 3.797468354430377779e-02 3.164556962025311115e-02 0.000000000000000000e+00 3.773584905660376521e-02 3.144654088050313767e-02 3.749999999999997780e-02 3.749999999999997780e-02 +3.205128205128204844e-02 2.564102564102566095e-02 6.250000000000000000e-02 3.797468354430377779e-02 2.547770700636942109e-02 2.547770700636942109e-02 2.547770700636942109e-02 4.402515723270439274e-02 3.773584905660376521e-02 0.000000000000000000e+00 3.144654088050313767e-02 1.265822784810122226e-02 3.749999999999997780e-02 +3.821656050955413164e-02 3.184713375796177637e-02 5.625000000000002220e-02 1.910828025477706582e-02 3.164556962025311115e-02 3.164556962025311115e-02 3.164556962025311115e-02 1.273885350318471055e-02 3.144654088050313767e-02 3.144654088050313767e-02 0.000000000000000000e+00 4.347826086956518843e-02 6.329113924050666640e-03 +4.430379746835444443e-02 3.797468354430377779e-02 6.211180124223603105e-02 5.000000000000004441e-02 3.773584905660376521e-02 3.773584905660376521e-02 3.773584905660376521e-02 5.590062111801241684e-02 3.749999999999997780e-02 1.265822784810122226e-02 4.347826086956518843e-02 0.000000000000000000e+00 4.938271604938271331e-02 +4.430379746835444443e-02 3.797468354430377779e-02 6.211180124223603105e-02 2.531645569620255554e-02 3.773584905660376521e-02 3.773584905660376521e-02 3.773584905660376521e-02 1.898734177215188890e-02 3.749999999999997780e-02 3.749999999999997780e-02 6.329113924050666640e-03 4.938271604938271331e-02 0.000000000000000000e+00