Package rdkit :: Package Chem :: Package Draw :: Module SimilarityMaps
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Draw.SimilarityMaps

  1  # $Id$ 
  2  # 
  3  #  Copyright (c) 2013, Novartis Institutes for BioMedical Research Inc. 
  4  #  All rights reserved. 
  5  #  
  6  # Redistribution and use in source and binary forms, with or without 
  7  # modification, are permitted provided that the following conditions are 
  8  # met:  
  9  # 
 10  #     * Redistributions of source code must retain the above copyright  
 11  #       notice, this list of conditions and the following disclaimer. 
 12  #     * Redistributions in binary form must reproduce the above 
 13  #       copyright notice, this list of conditions and the following  
 14  #       disclaimer in the documentation and/or other materials provided  
 15  #       with the distribution. 
 16  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 17  #       nor the names of its contributors may be used to endorse or promote  
 18  #       products derived from this software without specific prior written permission. 
 19  # 
 20  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 21  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 22  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 23  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 24  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 25  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 26  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 27  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 28  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 29  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 30  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 31  # 
 32  # Created by Sereina Riniker, Aug 2013 
 33   
 34   
 35  from rdkit import Chem 
 36  from rdkit import RDConfig 
 37  from rdkit import DataStructs 
 38  from rdkit.Chem import rdMolDescriptors as rdMD 
 39  from rdkit.Chem import rdmolops 
 40  from rdkit.Chem import Draw 
 41  from rdkit.six import iteritems 
 42  import numpy 
 43  import math 
 44  import copy 
 45  from matplotlib import cm 
 46   
 47   
48 -def GetAtomicWeightsForFingerprint(refMol, probeMol, fpFunction, metric=DataStructs.DiceSimilarity):
49 """ 50 Calculates the atomic weights for the probe molecule 51 based on a fingerprint function and a metric. 52 53 Parameters: 54 refMol -- the reference molecule 55 probeMol -- the probe molecule 56 fpFunction -- the fingerprint function 57 metric -- the similarity metric 58 59 Note: 60 If fpFunction needs additional parameters, use a lambda construct 61 """ 62 if hasattr(probeMol, '_fpInfo'): delattr(probeMol, '_fpInfo') 63 if hasattr(refMol, '_fpInfo'): delattr(refMol, '_fpInfo') 64 refFP = fpFunction(refMol, -1) 65 probeFP = fpFunction(probeMol, -1) 66 baseSimilarity = metric(refFP, probeFP) 67 # loop over atoms 68 weights = [] 69 for atomId in range(probeMol.GetNumAtoms()): 70 newFP = fpFunction(probeMol, atomId) 71 newSimilarity = metric(refFP, newFP) 72 weights.append(baseSimilarity - newSimilarity) 73 if hasattr(probeMol, '_fpInfo'): delattr(probeMol, '_fpInfo') 74 if hasattr(refMol, '_fpInfo'): delattr(refMol, '_fpInfo') 75 return weights
76 77
78 -def GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction):
79 """ 80 Calculates the atomic weights for the probe molecule based on 81 a fingerprint function and the prediction function of a ML model. 82 83 Parameters: 84 probeMol -- the probe molecule 85 fpFunction -- the fingerprint function 86 predictionFunction -- the prediction function of the ML model 87 """ 88 if hasattr(probeMol, '_fpInfo'): delattr(probeMol, '_fpInfo') 89 probeFP = fpFunction(probeMol, -1) 90 baseProba = predictionFunction(probeFP) 91 # loop over atoms 92 weights = [] 93 for atomId in range(probeMol.GetNumAtoms()): 94 newFP = fpFunction(probeMol, atomId) 95 newProba = predictionFunction(newFP) 96 weights.append(baseProba - newProba) 97 if hasattr(probeMol, '_fpInfo'): delattr(probeMol, '_fpInfo') 98 return weights
99 100
101 -def GetStandardizedWeights(weights):
102 """ 103 Normalizes the weights, 104 such that the absolute maximum weight equals 1.0. 105 106 Parameters: 107 weights -- the list with the atomic weights 108 """ 109 tmp = [math.fabs(w) for w in weights] 110 currentMax = max(tmp) 111 if currentMax > 0: 112 return [w/currentMax for w in weights], currentMax 113 else: 114 return weights, currentMax
115 116
117 -def GetSimilarityMapFromWeights(mol, weights, colorMap=cm.PiYG, scale=-1, size=(250, 250), sigma=None, #@UndefinedVariable #pylint: disable=E1101 118 coordScale=1.5, step=0.01, colors='k', contourLines=10, alpha=0.5, **kwargs):
119 """ 120 Generates the similarity map for a molecule given the atomic weights. 121 122 Parameters: 123 mol -- the molecule of interest 124 colorMap -- the matplotlib color map scheme 125 scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale 126 scale = double -> this is the maximum scale 127 size -- the size of the figure 128 sigma -- the sigma for the Gaussians 129 coordScale -- scaling factor for the coordinates 130 step -- the step for calcAtomGaussian 131 colors -- color of the contour lines 132 contourLines -- if integer number N: N contour lines are drawn 133 if list(numbers): contour lines at these numbers are drawn 134 alpha -- the alpha blending value for the contour lines 135 kwargs -- additional arguments for drawing 136 """ 137 if mol.GetNumAtoms() < 2: raise ValueError("too few atoms") 138 fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs) 139 if sigma is None: 140 if mol.GetNumBonds() > 0: 141 bond = mol.GetBondWithIdx(0) 142 idx1 = bond.GetBeginAtomIdx() 143 idx2 = bond.GetEndAtomIdx() 144 sigma = 0.3 * math.sqrt(sum([(mol._atomPs[idx1][i]-mol._atomPs[idx2][i])**2 for i in range(2)])) 145 else: 146 sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i]-mol._atomPs[1][i])**2 for i in range(2)])) 147 sigma = round(sigma, 2) 148 x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step) 149 # scaling 150 if scale <= 0.0: maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z))) 151 else: maxScale = scale 152 # coloring 153 fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower', extent=(0,1,0,1), vmin=-maxScale, vmax=maxScale) 154 # contour lines 155 # only draw them when at least one weight is not zero 156 if len([w for w in weights if w != 0.0]): 157 fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs) 158 return fig
159 160
161 -def GetSimilarityMapForFingerprint(refMol, probeMol, fpFunction, metric=DataStructs.DiceSimilarity, **kwargs):
162 """ 163 Generates the similarity map for a given reference and probe molecule, 164 fingerprint function and similarity metric. 165 166 Parameters: 167 refMol -- the reference molecule 168 probeMol -- the probe molecule 169 fpFunction -- the fingerprint function 170 metric -- the similarity metric. 171 kwargs -- additional arguments for drawing 172 """ 173 weights = GetAtomicWeightsForFingerprint(refMol, probeMol, fpFunction, metric) 174 weights, maxWeight = GetStandardizedWeights(weights) 175 fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) 176 return fig, maxWeight
177 178
179 -def GetSimilarityMapForModel(probeMol, fpFunction, predictionFunction, **kwargs):
180 """ 181 Generates the similarity map for a given ML model and probe molecule, 182 and fingerprint function. 183 184 Parameters: 185 probeMol -- the probe molecule 186 fpFunction -- the fingerprint function 187 predictionFunction -- the prediction function of the ML model 188 kwargs -- additional arguments for drawing 189 """ 190 weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction) 191 weights, maxWeight = GetStandardizedWeights(weights) 192 fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) 193 return fig, maxWeight
194 195 196 apDict = {} 197 apDict['normal'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia) 198 apDict['hashed'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia) 199 apDict['bv'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia) 200 201 # usage: lambda m,i: GetAPFingerprint(m, i, fpType, nBits, minLength, maxLength, nBitsPerEntry)
202 -def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, nBitsPerEntry=4):
203 """ 204 Calculates the atom pairs fingerprint with the torsions of atomId removed. 205 206 Parameters: 207 mol -- the molecule of interest 208 atomId -- the atom to remove the pairs for (if -1, no pair is removed) 209 fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv') 210 nBits -- the size of the bit vector (only for fpType='bv') 211 minLength -- the minimum path length for an atom pair 212 maxLength -- the maxmimum path length for an atom pair 213 nBitsPerEntry -- the number of bits available for each pair 214 """ 215 if fpType not in ['normal', 'hashed', 'bv']: raise ValueError("Unknown Atom pairs fingerprint type") 216 if atomId < 0: 217 return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0) 218 if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms") 219 return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId])
220 221 222 ttDict = {} 223 ttDict['normal'] = lambda m, bits, ts, bpe, ia: rdMD.GetTopologicalTorsionFingerprint(m, targetSize=ts, ignoreAtoms=ia) 224 ttDict['hashed'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprint(m, nBits=bits, targetSize=ts, ignoreAtoms=ia) 225 ttDict['bv'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=bits, targetSize=ts, nBitsPerEntry=bpe, ignoreAtoms=ia) 226 227 # usage: lambda m,i: GetTTFingerprint(m, i, fpType, nBits, targetSize)
228 -def GetTTFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, targetSize=4, nBitsPerEntry=4):
229 """ 230 Calculates the topological torsion fingerprint with the pairs of atomId removed. 231 232 Parameters: 233 mol -- the molecule of interest 234 atomId -- the atom to remove the torsions for (if -1, no torsion is removed) 235 fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv') 236 nBits -- the size of the bit vector (only for fpType='bv') 237 minLength -- the minimum path length for an atom pair 238 maxLength -- the maxmimum path length for an atom pair 239 nBitsPerEntry -- the number of bits available for each torsion 240 """ 241 if fpType not in ['normal', 'hashed', 'bv']: raise ValueError("Unknown Topological torsion fingerprint type") 242 if atomId < 0: 243 return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, 0) 244 if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms") 245 return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, [atomId])
246 247 248 # usage: lambda m,i: GetMorganFingerprint(m, i, radius, fpType, nBits, useFeatures)
249 -def GetMorganFingerprint(mol, atomId=-1, radius=2, fpType='bv', nBits=2048, useFeatures=False):
250 """ 251 Calculates the Morgan fingerprint with the environments of atomId removed. 252 253 Parameters: 254 mol -- the molecule of interest 255 radius -- the maximum radius 256 fpType -- the type of Morgan fingerprint: 'count' or 'bv' 257 atomId -- the atom to remove the environments for (if -1, no environments is removed) 258 nBits -- the size of the bit vector (only for fpType = 'bv') 259 useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan 260 """ 261 if fpType not in ['bv', 'count']: raise ValueError("Unknown Morgan fingerprint type") 262 if not hasattr(mol, '_fpInfo'): 263 info = {} 264 # get the fingerprint 265 if fpType == 'bv': molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures, bitInfo=info) 266 else: molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info) 267 # construct the bit map 268 if fpType == 'bv': bitmap = [DataStructs.ExplicitBitVect(nBits) for x in range(mol.GetNumAtoms())] 269 else: bitmap = [[] for x in range(mol.GetNumAtoms())] 270 for bit, es in iteritems(info): 271 for at1, rad in es: 272 if rad == 0: # for radius 0 273 if fpType == 'bv': bitmap[at1][bit] = 1 274 else: bitmap[at1].append(bit) 275 else: # for radii > 0 276 env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1) 277 amap = {} 278 submol = Chem.PathToSubmol(mol, env, atomMap=amap) 279 for at2 in amap.keys(): 280 if fpType == 'bv': bitmap[at2][bit] = 1 281 else: bitmap[at2].append(bit) 282 mol._fpInfo = (molFp, bitmap) 283 284 if atomId < 0: 285 return mol._fpInfo[0] 286 else: # remove the bits of atomId 287 if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms") 288 if len(mol._fpInfo) != 2: raise ValueError("_fpInfo not set") 289 if fpType == 'bv': 290 molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId] # xor 291 else: # count 292 molFp = copy.deepcopy(mol._fpInfo[0]) 293 # delete the bits with atomId 294 for bit in mol._fpInfo[1][atomId]: 295 molFp[bit] -= 1 296 return molFp
297 298 # usage: lambda m,i: GetRDKFingerprint(m, i, fpType, nBits, minPath, maxPath, nBitsPerHash)
299 -def GetRDKFingerprint(mol, atomId=-1, fpType='bv', nBits=2048, minPath=1, maxPath=5, nBitsPerHash=2):
300 """ 301 Calculates the RDKit fingerprint with the paths of atomId removed. 302 303 Parameters: 304 mol -- the molecule of interest 305 atomId -- the atom to remove the paths for (if -1, no path is removed) 306 fpType -- the type of RDKit fingerprint: 'bv' 307 nBits -- the size of the bit vector 308 minPath -- minimum path length 309 maxPath -- maximum path length 310 nBitsPerHash -- number of to set per path 311 """ 312 if fpType not in ['bv', '']: raise ValueError("Unknown RDKit fingerprint type") 313 fpType = 'bv' 314 if not hasattr(mol, '_fpInfo'): 315 info = [] # list with bits for each atom 316 # get the fingerprint 317 molFp = Chem.RDKFingerprint(mol, fpSize=nBits, minPath=minPath, maxPath=maxPath, nBitsPerHash=nBitsPerHash, atomBits=info) 318 mol._fpInfo = (molFp, info) 319 320 if atomId < 0: 321 return mol._fpInfo[0] 322 else: # remove the bits of atomId 323 if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms") 324 if len(mol._fpInfo) != 2: raise ValueError("_fpInfo not set") 325 molFp = copy.deepcopy(mol._fpInfo[0]) 326 molFp.UnSetBitsFromList(mol._fpInfo[1][atomId]) 327 return molFp
328