1
2
3
4
5 from rdkit.SimDivFilters import rdSimDivPickers as rdsimdiv
6 if rdsimdiv is None:
7 raise ImportError('rdSimDivPickers not built')
8 from rdkit import DataStructs
9 import numpy
10
12 """ Class to cluster a set of bits based on their correllation
13
14 The correlation matrix is first built using by reading the fingerprints
15 from a database or a list of fingerprints
16 """
17
18 - def __init__(self, idList, nCluster, type=rdsimdiv.ClusterMethod.WARD):
19 self._clusters = []
20 self._bidList = idList
21
22 self._nClusters = nCluster
23 self._type = type
24
26
27 distMat = 1/corrMat
28
29 pkr = rdsimdiv.HierarchicalClusterPicker(self._type)
30
31 cls = pkr.Cluster(distMat, len(self._bidList), self._nClusters)
32
33 self._clusters = []
34 for cl in cls :
35 bcls = []
36 for i in cl :
37 bid = self._bidList[i]
38 bcls.append(bid)
39 self._clusters.append(bcls)
40
42 assert len(clusters) == self._nClusters
43 self._clusters = clusters
44
47
49 """ Map the fingerprint to a real valued vector of score based on the bit clusters
50
51 The dimension of the vector is same as the number of clusters. Each value in the
52 vector corresponds to the number of bits in the corresponding cluster
53 that are turned on in the fingerprint
54
55 ARGUMENTS:
56 - fp : the fingerprint
57 """
58
59 scores = [0]*self._nClusters
60
61 i = 0
62 for cls in self._clusters:
63 for bid in cls :
64 if fp[bid] :
65 scores[i] += 1
66
67 i += 1
68
69 return scores
70
72 """ Map the fingerprint to a smaller sized (= number of clusters) fingerprint
73
74 Each cluster get a bit in the new fingerprint and is turned on if any of the bits in
75 the cluster are turned on in the original fingerprint"""
76
77 ebv = DataStructs.ExplicitBitVect(self._nClusters)
78 i = 0
79
80 for cls in self._clusters:
81 for bid in cls :
82 if fp[bid] :
83 ebv.SetBit(i)
84 break
85 i += 1
86
87 return ebv
88