diff nxn_clustering.py @ 12:3b14765c22ee draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 7fb96a3844b4771084f18de2346ed6d5e241d839"
author bgruening
date Sat, 25 Sep 2021 19:07:44 +0000
parents 198b1e30c739
children
line wrap: on
line diff
--- a/nxn_clustering.py	Wed Jun 24 13:12:05 2020 -0400
+++ b/nxn_clustering.py	Sat Sep 25 19:07:44 2021 +0000
@@ -1,34 +1,38 @@
-#!/usr/bin/env python
+# !/usr/bin/env python
 """
     Modified version of code examples from the chemfp project.
     http://code.google.com/p/chem-fingerprints/
     Thanks to Andrew Dalke of Andrew Dalke Scientific!
 """
-import matplotlib
-matplotlib.use('Agg')
-from matplotlib import rcParams
-rcParams.update({'figure.autolayout': True})
+
 import argparse
-import os
+
 import chemfp
-import scipy.cluster.hierarchy as hcluster
-import pylab
-import numpy
+import matplotlib
+matplotlib.use("Agg")  # noqa
+from matplotlib import rcParams  # noqa
+rcParams.update({"figure.autolayout": True})  # noqa
+import numpy  # noqa
+import pylab  # noqa
+import scipy.cluster.hierarchy as hcluster  # noqa
 
-def distance_matrix(arena, tanimoto_threshold = 0.0):
+
+def distance_matrix(arena, tanimoto_threshold=0.0):
     n = len(arena)
     # Start off a similarity matrix with 1.0s along the diagonal
     try:
         similarities = numpy.identity(n, "d")
-    except:
-        raise Exception('Input dataset is to large!')
-    chemfp.set_num_threads( args.processors )
+    except Exception:
+        raise Exception("Input dataset is to large!")
+    chemfp.set_num_threads(args.processors)
 
-    ## Compute the full similarity matrix.
+    # Compute the full similarity matrix.
     # The implementation computes the upper-triangle then copies
     # the upper-triangle into lower-triangle. It does not include
     # terms for the diagonal.
-    results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold)
+    results = chemfp.search.threshold_tanimoto_search_symmetric(
+        arena, threshold=tanimoto_threshold
+    )
 
     # Copy the results into the NumPy array.
     for row_index, row in enumerate(results.iter_indices_and_scores()):
@@ -40,41 +44,58 @@
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="""NxN clustering for fps files.
+    parser = argparse.ArgumentParser(
+        description="""NxN clustering for fps files.
 For more details please see the chemfp documentation:
 https://chemfp.readthedocs.org
-""")
+"""
+    )
 
-    parser.add_argument("-i", "--input", dest="input_path",
-                    required=True,
-                    help="Path to the input file.")
+    parser.add_argument(
+        "-i",
+        "--input",
+        dest="input_path",
+        required=True,
+        help="Path to the input file.",
+    )
 
-    parser.add_argument("-c", "--cluster", dest="cluster_image",
-                    help="Path to the output cluster image.")
+    parser.add_argument(
+        "-c",
+        "--cluster",
+        dest="cluster_image",
+        help="Path to the output cluster image.",
+    )
 
-    parser.add_argument("-s", "--smatrix", dest="similarity_matrix",
-                    help="Path to the similarity matrix output file.")
+    parser.add_argument(
+        "-s",
+        "--smatrix",
+        dest="similarity_matrix",
+        help="Path to the similarity matrix output file.",
+    )
 
-    parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", 
-                    type=float, default=0.0,
-                    help="Tanimoto threshold [0.0]")
+    parser.add_argument(
+        "-t",
+        "--threshold",
+        dest="tanimoto_threshold",
+        type=float,
+        default=0.0,
+        help="Tanimoto threshold [0.0]",
+    )
 
-    parser.add_argument("--oformat", default='png', help="Output format (png, svg)")
+    parser.add_argument("--oformat", default="png", help="Output format (png, svg)")
 
-    parser.add_argument('-p', '--processors', type=int, 
-        default=4)
+    parser.add_argument("-p", "--processors", type=int, default=4)
 
     args = parser.parse_args()
 
-    targets = chemfp.open( args.input_path, format='fps' )
-    arena = chemfp.load_fingerprints( targets )
-    distances  = distance_matrix( arena, args.tanimoto_threshold )
+    targets = chemfp.open(args.input_path, format="fps")
+    arena = chemfp.load_fingerprints(targets)
+    distances = distance_matrix(arena, args.tanimoto_threshold)
 
     if args.similarity_matrix:
         numpy.savetxt(args.similarity_matrix, distances)
 
     if args.cluster_image:
         linkage = hcluster.linkage(distances, method="single", metric="euclidean")
-        hcluster.dendrogram(linkage, labels=arena.ids, leaf_rotation=90.)
+        hcluster.dendrogram(linkage, labels=arena.ids, leaf_rotation=90.0)
         pylab.savefig(args.cluster_image, format=args.oformat)
-