chemfp: nxn_clustering.py comparison

comparison nxn_clustering.py @ 12:3b14765c22ee draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 7fb96a3844b4771084f18de2346ed6d5e241d839"

author	bgruening
date	Sat, 25 Sep 2021 19:07:44 +0000
parents	198b1e30c739
children

comparison

equal deleted inserted replaced

-:92c7cdc243e8
+:3b14765c22ee
-#!/usr/bin/env python
+# !/usr/bin/env python
 """
 Modified version of code examples from the chemfp project.
 http://code.google.com/p/chem-fingerprints/
 Thanks to Andrew Dalke of Andrew Dalke Scientific!
 """
+import argparse
+import chemfp
 import matplotlib
-matplotlib.use('Agg')
+matplotlib.use("Agg")  # noqa
-from matplotlib import rcParams
+from matplotlib import rcParams  # noqa
-rcParams.update({'figure.autolayout': True})
+rcParams.update({"figure.autolayout": True})  # noqa
-import argparse
+import numpy  # noqa
-import os
+import pylab  # noqa
-import chemfp
+import scipy.cluster.hierarchy as hcluster  # noqa
-import scipy.cluster.hierarchy as hcluster
-import pylab
-import numpy
-def distance_matrix(arena, tanimoto_threshold = 0.0):
+def distance_matrix(arena, tanimoto_threshold=0.0):
 n = len(arena)
 # Start off a similarity matrix with 1.0s along the diagonal
 try:
 similarities = numpy.identity(n, "d")
-except:
+except Exception:
-raise Exception('Input dataset is to large!')
+raise Exception("Input dataset is to large!")
-chemfp.set_num_threads( args.processors )
+chemfp.set_num_threads(args.processors)
-## Compute the full similarity matrix.
+# Compute the full similarity matrix.
 # The implementation computes the upper-triangle then copies
 # the upper-triangle into lower-triangle. It does not include
 # terms for the diagonal.
-results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold)
+results = chemfp.search.threshold_tanimoto_search_symmetric(
+arena, threshold=tanimoto_threshold
+)
 # Copy the results into the NumPy array.
 for row_index, row in enumerate(results.iter_indices_and_scores()):
 for target_index, target_score in row:
 similarities[row_index, target_index] = target_score
 # Return the distance matrix using the similarity matrix
 return 1.0 - similarities
 if __name__ == "__main__":
-parser = argparse.ArgumentParser(description="""NxN clustering for fps files.
+parser = argparse.ArgumentParser(
+description="""NxN clustering for fps files.
 For more details please see the chemfp documentation:
 https://chemfp.readthedocs.org
-""")
+"""
+)
-parser.add_argument("-i", "--input", dest="input_path",
+parser.add_argument(
-required=True,
+"-i",
-help="Path to the input file.")
+"--input",
+dest="input_path",
+required=True,
+help="Path to the input file.",
+)
-parser.add_argument("-c", "--cluster", dest="cluster_image",
+parser.add_argument(
-help="Path to the output cluster image.")
+"-c",
+"--cluster",
+dest="cluster_image",
+help="Path to the output cluster image.",
+)
-parser.add_argument("-s", "--smatrix", dest="similarity_matrix",
+parser.add_argument(
-help="Path to the similarity matrix output file.")
+"-s",
+"--smatrix",
+dest="similarity_matrix",
+help="Path to the similarity matrix output file.",
+)
-parser.add_argument("-t", "--threshold", dest="tanimoto_threshold",
+parser.add_argument(
-type=float, default=0.0,
+"-t",
-help="Tanimoto threshold [0.0]")
+"--threshold",
+dest="tanimoto_threshold",
+type=float,
+default=0.0,
+help="Tanimoto threshold [0.0]",
+)
-parser.add_argument("--oformat", default='png', help="Output format (png, svg)")
+parser.add_argument("--oformat", default="png", help="Output format (png, svg)")
-parser.add_argument('-p', '--processors', type=int,
+parser.add_argument("-p", "--processors", type=int, default=4)
-default=4)
 args = parser.parse_args()
-targets = chemfp.open( args.input_path, format='fps' )
+targets = chemfp.open(args.input_path, format="fps")
-arena = chemfp.load_fingerprints( targets )
+arena = chemfp.load_fingerprints(targets)
-distances  = distance_matrix( arena, args.tanimoto_threshold )
+distances = distance_matrix(arena, args.tanimoto_threshold)
 if args.similarity_matrix:
 numpy.savetxt(args.similarity_matrix, distances)
 if args.cluster_image:
 linkage = hcluster.linkage(distances, method="single", metric="euclidean")
-hcluster.dendrogram(linkage, labels=arena.ids, leaf_rotation=90.)
+hcluster.dendrogram(linkage, labels=arena.ids, leaf_rotation=90.0)
 pylab.savefig(args.cluster_image, format=args.oformat)

Mercurial > repos > bgruening > chemfp

comparison nxn_clustering.py @ 12:3b14765c22ee draft default tip