Mercurial > repos > bgruening > chemfp
diff nxn_clustering.py @ 12:3b14765c22ee draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit 7fb96a3844b4771084f18de2346ed6d5e241d839"
author | bgruening |
---|---|
date | Sat, 25 Sep 2021 19:07:44 +0000 |
parents | 198b1e30c739 |
children |
line wrap: on
line diff
--- a/nxn_clustering.py Wed Jun 24 13:12:05 2020 -0400 +++ b/nxn_clustering.py Sat Sep 25 19:07:44 2021 +0000 @@ -1,34 +1,38 @@ -#!/usr/bin/env python +# !/usr/bin/env python """ Modified version of code examples from the chemfp project. http://code.google.com/p/chem-fingerprints/ Thanks to Andrew Dalke of Andrew Dalke Scientific! """ -import matplotlib -matplotlib.use('Agg') -from matplotlib import rcParams -rcParams.update({'figure.autolayout': True}) + import argparse -import os + import chemfp -import scipy.cluster.hierarchy as hcluster -import pylab -import numpy +import matplotlib +matplotlib.use("Agg") # noqa +from matplotlib import rcParams # noqa +rcParams.update({"figure.autolayout": True}) # noqa +import numpy # noqa +import pylab # noqa +import scipy.cluster.hierarchy as hcluster # noqa -def distance_matrix(arena, tanimoto_threshold = 0.0): + +def distance_matrix(arena, tanimoto_threshold=0.0): n = len(arena) # Start off a similarity matrix with 1.0s along the diagonal try: similarities = numpy.identity(n, "d") - except: - raise Exception('Input dataset is to large!') - chemfp.set_num_threads( args.processors ) + except Exception: + raise Exception("Input dataset is to large!") + chemfp.set_num_threads(args.processors) - ## Compute the full similarity matrix. + # Compute the full similarity matrix. # The implementation computes the upper-triangle then copies # the upper-triangle into lower-triangle. It does not include # terms for the diagonal. - results = chemfp.search.threshold_tanimoto_search_symmetric(arena, threshold=tanimoto_threshold) + results = chemfp.search.threshold_tanimoto_search_symmetric( + arena, threshold=tanimoto_threshold + ) # Copy the results into the NumPy array. for row_index, row in enumerate(results.iter_indices_and_scores()): @@ -40,41 +44,58 @@ if __name__ == "__main__": - parser = argparse.ArgumentParser(description="""NxN clustering for fps files. + parser = argparse.ArgumentParser( + description="""NxN clustering for fps files. For more details please see the chemfp documentation: https://chemfp.readthedocs.org -""") +""" + ) - parser.add_argument("-i", "--input", dest="input_path", - required=True, - help="Path to the input file.") + parser.add_argument( + "-i", + "--input", + dest="input_path", + required=True, + help="Path to the input file.", + ) - parser.add_argument("-c", "--cluster", dest="cluster_image", - help="Path to the output cluster image.") + parser.add_argument( + "-c", + "--cluster", + dest="cluster_image", + help="Path to the output cluster image.", + ) - parser.add_argument("-s", "--smatrix", dest="similarity_matrix", - help="Path to the similarity matrix output file.") + parser.add_argument( + "-s", + "--smatrix", + dest="similarity_matrix", + help="Path to the similarity matrix output file.", + ) - parser.add_argument("-t", "--threshold", dest="tanimoto_threshold", - type=float, default=0.0, - help="Tanimoto threshold [0.0]") + parser.add_argument( + "-t", + "--threshold", + dest="tanimoto_threshold", + type=float, + default=0.0, + help="Tanimoto threshold [0.0]", + ) - parser.add_argument("--oformat", default='png', help="Output format (png, svg)") + parser.add_argument("--oformat", default="png", help="Output format (png, svg)") - parser.add_argument('-p', '--processors', type=int, - default=4) + parser.add_argument("-p", "--processors", type=int, default=4) args = parser.parse_args() - targets = chemfp.open( args.input_path, format='fps' ) - arena = chemfp.load_fingerprints( targets ) - distances = distance_matrix( arena, args.tanimoto_threshold ) + targets = chemfp.open(args.input_path, format="fps") + arena = chemfp.load_fingerprints(targets) + distances = distance_matrix(arena, args.tanimoto_threshold) if args.similarity_matrix: numpy.savetxt(args.similarity_matrix, distances) if args.cluster_image: linkage = hcluster.linkage(distances, method="single", metric="euclidean") - hcluster.dendrogram(linkage, labels=arena.ids, leaf_rotation=90.) + hcluster.dendrogram(linkage, labels=arena.ids, leaf_rotation=90.0) pylab.savefig(args.cluster_image, format=args.oformat) -