1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 from rdkit import Chem
36 from rdkit import RDConfig
37 from rdkit import DataStructs
38 from rdkit.Chem import rdMolDescriptors as rdMD
39 from rdkit.Chem import rdmolops
40 from rdkit.Chem import Draw
41 from rdkit.six import iteritems
42 import numpy
43 import math
44 import copy
45 from matplotlib import cm
46
47
49 """
50 Calculates the atomic weights for the probe molecule
51 based on a fingerprint function and a metric.
52
53 Parameters:
54 refMol -- the reference molecule
55 probeMol -- the probe molecule
56 fpFunction -- the fingerprint function
57 metric -- the similarity metric
58
59 Note:
60 If fpFunction needs additional parameters, use a lambda construct
61 """
62 if hasattr(probeMol, '_fpInfo'): delattr(probeMol, '_fpInfo')
63 if hasattr(refMol, '_fpInfo'): delattr(refMol, '_fpInfo')
64 refFP = fpFunction(refMol, -1)
65 probeFP = fpFunction(probeMol, -1)
66 baseSimilarity = metric(refFP, probeFP)
67
68 weights = []
69 for atomId in range(probeMol.GetNumAtoms()):
70 newFP = fpFunction(probeMol, atomId)
71 newSimilarity = metric(refFP, newFP)
72 weights.append(baseSimilarity - newSimilarity)
73 if hasattr(probeMol, '_fpInfo'): delattr(probeMol, '_fpInfo')
74 if hasattr(refMol, '_fpInfo'): delattr(refMol, '_fpInfo')
75 return weights
76
77
99
100
102 """
103 Normalizes the weights,
104 such that the absolute maximum weight equals 1.0.
105
106 Parameters:
107 weights -- the list with the atomic weights
108 """
109 tmp = [math.fabs(w) for w in weights]
110 currentMax = max(tmp)
111 if currentMax > 0:
112 return [w/currentMax for w in weights], currentMax
113 else:
114 return weights, currentMax
115
116
117 -def GetSimilarityMapFromWeights(mol, weights, colorMap=cm.PiYG, scale=-1, size=(250, 250), sigma=None,
118 coordScale=1.5, step=0.01, colors='k', contourLines=10, alpha=0.5, **kwargs):
119 """
120 Generates the similarity map for a molecule given the atomic weights.
121
122 Parameters:
123 mol -- the molecule of interest
124 colorMap -- the matplotlib color map scheme
125 scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale
126 scale = double -> this is the maximum scale
127 size -- the size of the figure
128 sigma -- the sigma for the Gaussians
129 coordScale -- scaling factor for the coordinates
130 step -- the step for calcAtomGaussian
131 colors -- color of the contour lines
132 contourLines -- if integer number N: N contour lines are drawn
133 if list(numbers): contour lines at these numbers are drawn
134 alpha -- the alpha blending value for the contour lines
135 kwargs -- additional arguments for drawing
136 """
137 if mol.GetNumAtoms() < 2: raise ValueError("too few atoms")
138 fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs)
139 if sigma is None:
140 if mol.GetNumBonds() > 0:
141 bond = mol.GetBondWithIdx(0)
142 idx1 = bond.GetBeginAtomIdx()
143 idx2 = bond.GetEndAtomIdx()
144 sigma = 0.3 * math.sqrt(sum([(mol._atomPs[idx1][i]-mol._atomPs[idx2][i])**2 for i in range(2)]))
145 else:
146 sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i]-mol._atomPs[1][i])**2 for i in range(2)]))
147 sigma = round(sigma, 2)
148 x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step)
149
150 if scale <= 0.0: maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z)))
151 else: maxScale = scale
152
153 fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower', extent=(0,1,0,1), vmin=-maxScale, vmax=maxScale)
154
155
156 if len([w for w in weights if w != 0.0]):
157 fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs)
158 return fig
159
160
162 """
163 Generates the similarity map for a given reference and probe molecule,
164 fingerprint function and similarity metric.
165
166 Parameters:
167 refMol -- the reference molecule
168 probeMol -- the probe molecule
169 fpFunction -- the fingerprint function
170 metric -- the similarity metric.
171 kwargs -- additional arguments for drawing
172 """
173 weights = GetAtomicWeightsForFingerprint(refMol, probeMol, fpFunction, metric)
174 weights, maxWeight = GetStandardizedWeights(weights)
175 fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs)
176 return fig, maxWeight
177
178
194
195
196 apDict = {}
197 apDict['normal'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia)
198 apDict['hashed'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia)
199 apDict['bv'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia)
200
201
202 -def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, nBitsPerEntry=4):
203 """
204 Calculates the atom pairs fingerprint with the torsions of atomId removed.
205
206 Parameters:
207 mol -- the molecule of interest
208 atomId -- the atom to remove the pairs for (if -1, no pair is removed)
209 fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv')
210 nBits -- the size of the bit vector (only for fpType='bv')
211 minLength -- the minimum path length for an atom pair
212 maxLength -- the maxmimum path length for an atom pair
213 nBitsPerEntry -- the number of bits available for each pair
214 """
215 if fpType not in ['normal', 'hashed', 'bv']: raise ValueError("Unknown Atom pairs fingerprint type")
216 if atomId < 0:
217 return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0)
218 if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms")
219 return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId])
220
221
222 ttDict = {}
223 ttDict['normal'] = lambda m, bits, ts, bpe, ia: rdMD.GetTopologicalTorsionFingerprint(m, targetSize=ts, ignoreAtoms=ia)
224 ttDict['hashed'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprint(m, nBits=bits, targetSize=ts, ignoreAtoms=ia)
225 ttDict['bv'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=bits, targetSize=ts, nBitsPerEntry=bpe, ignoreAtoms=ia)
226
227
228 -def GetTTFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, targetSize=4, nBitsPerEntry=4):
229 """
230 Calculates the topological torsion fingerprint with the pairs of atomId removed.
231
232 Parameters:
233 mol -- the molecule of interest
234 atomId -- the atom to remove the torsions for (if -1, no torsion is removed)
235 fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv')
236 nBits -- the size of the bit vector (only for fpType='bv')
237 minLength -- the minimum path length for an atom pair
238 maxLength -- the maxmimum path length for an atom pair
239 nBitsPerEntry -- the number of bits available for each torsion
240 """
241 if fpType not in ['normal', 'hashed', 'bv']: raise ValueError("Unknown Topological torsion fingerprint type")
242 if atomId < 0:
243 return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, 0)
244 if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms")
245 return ttDict[fpType](mol, nBits, targetSize, nBitsPerEntry, [atomId])
246
247
248
250 """
251 Calculates the Morgan fingerprint with the environments of atomId removed.
252
253 Parameters:
254 mol -- the molecule of interest
255 radius -- the maximum radius
256 fpType -- the type of Morgan fingerprint: 'count' or 'bv'
257 atomId -- the atom to remove the environments for (if -1, no environments is removed)
258 nBits -- the size of the bit vector (only for fpType = 'bv')
259 useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan
260 """
261 if fpType not in ['bv', 'count']: raise ValueError("Unknown Morgan fingerprint type")
262 if not hasattr(mol, '_fpInfo'):
263 info = {}
264
265 if fpType == 'bv': molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures, bitInfo=info)
266 else: molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info)
267
268 if fpType == 'bv': bitmap = [DataStructs.ExplicitBitVect(nBits) for x in range(mol.GetNumAtoms())]
269 else: bitmap = [[] for x in range(mol.GetNumAtoms())]
270 for bit, es in iteritems(info):
271 for at1, rad in es:
272 if rad == 0:
273 if fpType == 'bv': bitmap[at1][bit] = 1
274 else: bitmap[at1].append(bit)
275 else:
276 env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1)
277 amap = {}
278 submol = Chem.PathToSubmol(mol, env, atomMap=amap)
279 for at2 in amap.keys():
280 if fpType == 'bv': bitmap[at2][bit] = 1
281 else: bitmap[at2].append(bit)
282 mol._fpInfo = (molFp, bitmap)
283
284 if atomId < 0:
285 return mol._fpInfo[0]
286 else:
287 if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms")
288 if len(mol._fpInfo) != 2: raise ValueError("_fpInfo not set")
289 if fpType == 'bv':
290 molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId]
291 else:
292 molFp = copy.deepcopy(mol._fpInfo[0])
293
294 for bit in mol._fpInfo[1][atomId]:
295 molFp[bit] -= 1
296 return molFp
297
298
299 -def GetRDKFingerprint(mol, atomId=-1, fpType='bv', nBits=2048, minPath=1, maxPath=5, nBitsPerHash=2):
300 """
301 Calculates the RDKit fingerprint with the paths of atomId removed.
302
303 Parameters:
304 mol -- the molecule of interest
305 atomId -- the atom to remove the paths for (if -1, no path is removed)
306 fpType -- the type of RDKit fingerprint: 'bv'
307 nBits -- the size of the bit vector
308 minPath -- minimum path length
309 maxPath -- maximum path length
310 nBitsPerHash -- number of to set per path
311 """
312 if fpType not in ['bv', '']: raise ValueError("Unknown RDKit fingerprint type")
313 fpType = 'bv'
314 if not hasattr(mol, '_fpInfo'):
315 info = []
316
317 molFp = Chem.RDKFingerprint(mol, fpSize=nBits, minPath=minPath, maxPath=maxPath, nBitsPerHash=nBitsPerHash, atomBits=info)
318 mol._fpInfo = (molFp, info)
319
320 if atomId < 0:
321 return mol._fpInfo[0]
322 else:
323 if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms")
324 if len(mol._fpInfo) != 2: raise ValueError("_fpInfo not set")
325 molFp = copy.deepcopy(mol._fpInfo[0])
326 molFp.UnSetBitsFromList(mol._fpInfo[1][atomId])
327 return molFp
328