Package rdkit :: Package ML :: Package InfoTheory :: Module BitClusterer
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.InfoTheory.BitClusterer

 1  # 
 2  #  Copyright (C) 2000-2008  Greg Landrum and Rational Discovery LLC 
 3  # 
 4   
 5  from rdkit.SimDivFilters import rdSimDivPickers as rdsimdiv 
 6  if rdsimdiv is None: 
 7     raise ImportError('rdSimDivPickers not built') 
 8  from rdkit import DataStructs 
 9  import numpy 
10   
11 -class BitClusterer(object):
12 """ Class to cluster a set of bits based on their correllation 13 14 The correlation matrix is first built using by reading the fingerprints 15 from a database or a list of fingerprints 16 """ 17
18 - def __init__(self, idList, nCluster, type=rdsimdiv.ClusterMethod.WARD):
19 self._clusters = [] 20 self._bidList = idList 21 #self._matGen = BitCorrelationMatGenerator(idList) 22 self._nClusters = nCluster 23 self._type = type
24
25 - def ClusterBits(self, corrMat) :
26 # clutering code actually needs distances so, take 1/val for each element in corMat 27 distMat = 1/corrMat 28 29 pkr = rdsimdiv.HierarchicalClusterPicker(self._type) 30 31 cls = pkr.Cluster(distMat, len(self._bidList), self._nClusters) 32 # map the clusters to the actual bit ids 33 self._clusters = [] 34 for cl in cls : 35 bcls = [] 36 for i in cl : 37 bid = self._bidList[i] 38 bcls.append(bid) 39 self._clusters.append(bcls)
40
41 - def SetClusters(self, clusters):
42 assert len(clusters) == self._nClusters 43 self._clusters = clusters
44
45 - def GetClusters(self) :
46 return self._clusters
47
48 - def MapToClusterScores(self, fp) :
49 """ Map the fingerprint to a real valued vector of score based on the bit clusters 50 51 The dimension of the vector is same as the number of clusters. Each value in the 52 vector corresponds to the number of bits in the corresponding cluster 53 that are turned on in the fingerprint 54 55 ARGUMENTS: 56 - fp : the fingerprint 57 """ 58 59 scores = [0]*self._nClusters 60 61 i = 0 62 for cls in self._clusters: 63 for bid in cls : 64 if fp[bid] : 65 scores[i] += 1 66 67 i += 1 68 69 return scores
70
71 - def MapToClusterFP(self, fp) :
72 """ Map the fingerprint to a smaller sized (= number of clusters) fingerprint 73 74 Each cluster get a bit in the new fingerprint and is turned on if any of the bits in 75 the cluster are turned on in the original fingerprint""" 76 77 ebv = DataStructs.ExplicitBitVect(self._nClusters) 78 i = 0 79 80 for cls in self._clusters: 81 for bid in cls : 82 if fp[bid] : 83 ebv.SetBit(i) 84 break 85 i += 1 86 87 return ebv
88