1
2
3
4
5
6
7
8
9
10
11 from __future__ import print_function
12 from rdkit import RDConfig
13
14 import sys,time,math
15 from rdkit.ML.Data import Stats
16 import rdkit.DistanceGeometry as DG
17 from rdkit import Chem
18 import numpy
19 from rdkit.Chem import rdDistGeom as MolDG
20 from rdkit.Chem import ChemicalFeatures
21 from rdkit.Chem import ChemicalForceFields
22 import Pharmacophore,ExcludedVolume
23 from rdkit import Geometry
24 _times = {}
25
26 from rdkit import RDLogger as logging
27 logger = logging.logger()
28 defaultFeatLength=2.0
29
31 """ returns a list of the heavy-atom neighbors of the
32 atom passed in:
33
34 >>> m = Chem.MolFromSmiles('CCO')
35 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(0))
36 >>> len(l)
37 1
38 >>> isinstance(l[0],Chem.Atom)
39 True
40 >>> l[0].GetIdx()
41 1
42
43 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(1))
44 >>> len(l)
45 2
46 >>> l[0].GetIdx()
47 0
48 >>> l[1].GetIdx()
49 2
50
51 """
52 res=[]
53 for nbr in atom.GetNeighbors():
54 if nbr.GetAtomicNum() != 1:
55 res.append(nbr)
56 return res
57
59 """ Adds an entry at the end of the bounds matrix for a point at
60 the center of a multi-point feature
61
62 returns a 2-tuple:
63 new bounds mat
64 index of point added
65
66 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]])
67 >>> match = [0,1,2]
68 >>> bm,idx = ReplaceGroup(match,boundsMat,slop=0.0)
69
70 the index is at the end:
71 >>> idx == 3
72 True
73
74 and the matrix is one bigger:
75 >>> bm.shape == (4, 4)
76 True
77
78 but the original bounds mat is not altered:
79 >>> boundsMat.shape == (3, 3)
80 True
81
82
83 We make the assumption that the points of the
84 feature form a regular polygon, are listed in order
85 (i.e. pt 0 is a neighbor to pt 1 and pt N-1)
86 and that the replacement point goes at the center:
87 >>> print(', '.join(['%.3f'%x for x in bm[-1]]))
88 0.577, 0.577, 0.577, 0.000
89 >>> print(', '.join(['%.3f'%x for x in bm[:,-1]]))
90 1.155, 1.155, 1.155, 0.000
91
92 The slop argument (default = 0.01) is fractional:
93 >>> bm,idx = ReplaceGroup(match,boundsMat)
94 >>> print(', '.join(['%.3f'%x for x in bm[-1]]))
95 0.572, 0.572, 0.572, 0.000
96 >>> print(', '.join(['%.3f'%x for x in bm[:,-1]]))
97 1.166, 1.166, 1.166, 0.000
98
99
100 """
101 maxVal = -1000.0
102 minVal = 1e8
103 nPts = len(match)
104 for i in range(nPts):
105 idx0 = match[i]
106 if i<nPts-1:
107 idx1 = match[i+1]
108 else:
109 idx1 = match[0]
110 if idx1<idx0:
111 idx0,idx1 = idx1,idx0
112 minVal = min(minVal,bounds[idx1,idx0])
113 maxVal = max(maxVal,bounds[idx0,idx1])
114 maxVal *= (1+slop)
115 minVal *= (1-slop)
116
117 scaleFact = 1.0/(2.0*math.sin(math.pi/nPts))
118 minVal *= scaleFact
119 maxVal *= scaleFact
120
121 replaceIdx = bounds.shape[0]
122 if not useDirs:
123 bm = numpy.zeros((bounds.shape[0]+1,bounds.shape[1]+1),numpy.float)
124 else:
125 bm = numpy.zeros((bounds.shape[0]+2,bounds.shape[1]+2),numpy.float)
126 bm[0:bounds.shape[0],0:bounds.shape[1]]=bounds
127 bm[:replaceIdx,replaceIdx]=1000.
128
129 if useDirs:
130 bm[:replaceIdx+1,replaceIdx+1]=1000.
131
132 bm[replaceIdx,replaceIdx+1]=dirLength+slop
133 bm[replaceIdx+1,replaceIdx]=dirLength-slop
134
135 for idx1 in match:
136 bm[idx1,replaceIdx]=maxVal
137 bm[replaceIdx,idx1]=minVal
138 if useDirs:
139
140 bm[idx1,replaceIdx+1] = numpy.sqrt(bm[replaceIdx,replaceIdx+1]**2+maxVal**2)
141 bm[replaceIdx+1,idx1] = numpy.sqrt(bm[replaceIdx+1,replaceIdx]**2+minVal**2)
142 return bm,replaceIdx
143
144 -def EmbedMol(mol,bm,atomMatch=None,weight=2.0,randomSeed=-1,
145 excludedVolumes=None):
146 """ Generates an embedding for a molecule based on a bounds matrix and adds
147 a conformer (id 0) to the molecule
148
149 if the optional argument atomMatch is provided, it will be used to provide
150 supplemental weights for the embedding routine (used in the optimization
151 phase to ensure that the resulting geometry really does satisfy the
152 pharmacophore).
153
154 if the excludedVolumes is provided, it should be a sequence of
155 ExcludedVolume objects
156
157 >>> m = Chem.MolFromSmiles('c1ccccc1C')
158 >>> bounds = MolDG.GetMoleculeBoundsMatrix(m)
159 >>> bounds.shape == (7, 7)
160 True
161 >>> m.GetNumConformers()
162 0
163 >>> EmbedMol(m,bounds,randomSeed=23)
164 >>> m.GetNumConformers()
165 1
166
167
168 """
169 nAts = mol.GetNumAtoms()
170 weights=[]
171 if(atomMatch):
172 for i in range(len(atomMatch)):
173 for j in range(i+1,len(atomMatch)):
174 weights.append((i,j,weight))
175 if(excludedVolumes):
176 for vol in excludedVolumes:
177 idx = vol.index
178
179 for i in range(nAts):
180 weights.append((i,idx,weight))
181 coords = DG.EmbedBoundsMatrix(bm,weights=weights,numZeroFail=1,randomSeed=randomSeed)
182
183
184
185 conf = Chem.Conformer(nAts)
186 conf.SetId(0)
187 for i in range(nAts):
188 conf.SetAtomPosition(i,list(coords[i]))
189 if excludedVolumes:
190 for vol in excludedVolumes:
191 vol.pos = numpy.array(coords[vol.index])
192
193
194 mol.AddConformer(conf)
195
197 """ Adds a set of excluded volumes to the bounds matrix
198 and returns the new matrix
199
200 excludedVolumes is a list of ExcludedVolume objects
201
202
203 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]])
204 >>> ev1 = ExcludedVolume.ExcludedVolume(([(0,),0.5,1.0],),exclusionDist=1.5)
205 >>> bm = AddExcludedVolumes(boundsMat,(ev1,))
206
207 the results matrix is one bigger:
208 >>> bm.shape == (4, 4)
209 True
210
211 and the original bounds mat is not altered:
212 >>> boundsMat.shape == (3, 3)
213 True
214
215 >>> print(', '.join(['%.3f'%x for x in bm[-1]]))
216 0.500, 1.500, 1.500, 0.000
217 >>> print(', '.join(['%.3f'%x for x in bm[:,-1]]))
218 1.000, 3.000, 3.000, 0.000
219
220 """
221 oDim = bm.shape[0]
222 dim = oDim+len(excludedVolumes)
223 res = numpy.zeros((dim,dim),numpy.float)
224 res[:oDim,:oDim] = bm
225 for i,vol in enumerate(excludedVolumes):
226 bmIdx = oDim+i
227 vol.index = bmIdx
228
229
230 res[bmIdx,:bmIdx] = vol.exclusionDist
231 res[:bmIdx,bmIdx] = 1000.0
232
233
234 for indices,minV,maxV in vol.featInfo:
235 for index in indices:
236 try:
237 res[bmIdx,index] = minV
238 res[index,bmIdx] = maxV
239 except IndexError:
240 logger.error('BAD INDEX: res[%d,%d], shape is %s'%(bmIdx,index,str(res.shape)))
241 raise IndexError
242
243
244 for j in range(bmIdx+1,dim):
245 res[bmIdx,j:dim] = 0
246 res[j:dim,bmIdx] = 1000
247
248 if smoothIt: DG.DoTriangleSmoothing(res)
249 return res
250
254 """ loops over a distance bounds matrix and replaces the elements
255 that are altered by a pharmacophore
256
257 **NOTE** this returns the resulting bounds matrix, but it may also
258 alter the input matrix
259
260 atomMatch is a sequence of sequences containing atom indices
261 for each of the pharmacophore's features.
262
263 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
264 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
265 ... ]
266 >>> pcophore=Pharmacophore.Pharmacophore(feats)
267 >>> pcophore.setLowerBound(0,1, 1.0)
268 >>> pcophore.setUpperBound(0,1, 2.0)
269
270 >>> boundsMat = numpy.array([[0.0,3.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
271 >>> atomMatch = ((0,),(1,))
272 >>> bm = UpdatePharmacophoreBounds(boundsMat,atomMatch,pcophore)
273
274
275 In this case, there are no multi-atom features, so the result matrix
276 is the same as the input:
277 >>> bm is boundsMat
278 True
279
280 this means, of course, that the input boundsMat is altered:
281 >>> print(', '.join(['%.3f'%x for x in boundsMat[0]]))
282 0.000, 2.000, 3.000
283 >>> print(', '.join(['%.3f'%x for x in boundsMat[1]]))
284 1.000, 0.000, 3.000
285 >>> print(', '.join(['%.3f'%x for x in boundsMat[2]]))
286 2.000, 2.000, 0.000
287
288 """
289 replaceMap = {}
290 for i,matchI in enumerate(atomMatch):
291 if len(matchI)>1:
292 bm,replaceIdx = ReplaceGroup(matchI,bm,useDirs=useDirs)
293 replaceMap[i] = replaceIdx
294
295 for i,matchI in enumerate(atomMatch):
296 mi = replaceMap.get(i,matchI[0])
297 for j in range(i+1,len(atomMatch)):
298 mj = replaceMap.get(j,atomMatch[j][0])
299 if mi<mj:
300 idx0,idx1 = mi,mj
301 else:
302 idx0,idx1 = mj,mi
303 bm[idx0,idx1] = pcophore.getUpperBound(i,j)
304 bm[idx1,idx0] = pcophore.getLowerBound(i,j)
305
306 return bm
307
308
309
310 -def EmbedPharmacophore(mol,atomMatch,pcophore,randomSeed=-1,count=10,smoothFirst=True,
311 silent=False,bounds=None,excludedVolumes=None,targetNumber=-1,
312 useDirs=False):
313 """ Generates one or more embeddings for a molecule that satisfy a pharmacophore
314
315 atomMatch is a sequence of sequences containing atom indices
316 for each of the pharmacophore's features.
317
318 - count: is the maximum number of attempts to make a generating an embedding
319 - smoothFirst: toggles triangle smoothing of the molecular bounds matix
320 - bounds: if provided, should be the molecular bounds matrix. If this isn't
321 provided, the matrix will be generated.
322 - targetNumber: if this number is positive, it provides a maximum number
323 of embeddings to generate (i.e. we'll have count attempts to generate
324 targetNumber embeddings).
325
326 returns: a 3 tuple:
327 1) the molecular bounds matrix adjusted for the pharmacophore
328 2) a list of embeddings (molecules with a single conformer)
329 3) the number of failed attempts at embedding
330
331
332 >>> m = Chem.MolFromSmiles('OCCN')
333 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
334 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
335 ... ]
336 >>> pcophore=Pharmacophore.Pharmacophore(feats)
337 >>> pcophore.setLowerBound(0,1, 2.5)
338 >>> pcophore.setUpperBound(0,1, 3.5)
339 >>> atomMatch = ((0,),(3,))
340
341 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
342 >>> len(embeds)
343 10
344 >>> nFail
345 0
346
347 Set up a case that can't succeed:
348 >>> pcophore=Pharmacophore.Pharmacophore(feats)
349 >>> pcophore.setLowerBound(0,1, 2.0)
350 >>> pcophore.setUpperBound(0,1, 2.1)
351 >>> atomMatch = ((0,),(3,))
352
353 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
354 >>> len(embeds)
355 0
356 >>> nFail
357 10
358
359 """
360 global _times
361 if not hasattr(mol,'_chiralCenters'):
362 mol._chiralCenters = Chem.FindMolChiralCenters(mol)
363
364 if bounds is None:
365 bounds = MolDG.GetMoleculeBoundsMatrix(mol)
366 if smoothFirst: DG.DoTriangleSmoothing(bounds)
367
368 bm = bounds.copy()
369
370
371
372
373
374
375 bm = UpdatePharmacophoreBounds(bm,atomMatch,pcophore,useDirs=useDirs,mol=mol)
376
377 if excludedVolumes:
378 bm = AddExcludedVolumes(bm,excludedVolumes,smoothIt=False)
379
380 if not DG.DoTriangleSmoothing(bm):
381 raise ValueError("could not smooth bounds matrix")
382
383
384
385
386
387
388
389
390 if targetNumber<=0:
391 targetNumber=count
392 nFailed = 0
393 res = []
394 for i in range(count):
395 tmpM = bm[:,:]
396 m2 = Chem.Mol(mol)
397 t1 = time.time()
398 try:
399 if randomSeed<=0:
400 seed = i*10+1
401 else:
402 seed = i*10+randomSeed
403 EmbedMol(m2,tmpM,atomMatch,randomSeed=seed,
404 excludedVolumes=excludedVolumes)
405 except ValueError:
406 if not silent:
407 logger.info('Embed failed')
408 nFailed += 1
409 else:
410 t2 = time.time()
411 _times['embed'] = _times.get('embed',0)+t2-t1
412 keepIt=True
413 for idx,stereo in mol._chiralCenters:
414 if stereo in ('R','S'):
415 vol = ComputeChiralVolume(m2,idx)
416 if (stereo=='R' and vol>=0) or \
417 (stereo=='S' and vol<=0):
418 keepIt=False
419 break
420 if keepIt:
421 res.append(m2)
422 else:
423 logger.debug('Removed embedding due to chiral constraints.')
424 if len(res)==targetNumber: break
425 return bm,res,nFailed
426
428 """ provides an OS independent way of detecting NaNs
429 This is intended to be used with values returned from the C++
430 side of things.
431
432 We can't actually test this from Python (which traps
433 zero division errors), but it would work something like
434 this if we could:
435 >>> isNaN(0)
436 False
437
438 #>>> isNan(1/0)
439 #True
440
441 """
442 if v!=v and sys.platform=='win32':
443 return True
444 elif v==0 and v==1 and sys.platform!='win32':
445 return True
446 return False
447
448
449 -def OptimizeMol(mol,bm,atomMatches=None,excludedVolumes=None,
450 forceConstant=1200.0,
451 maxPasses=5,verbose=False):
452 """ carries out a UFF optimization for a molecule optionally subject
453 to the constraints in a bounds matrix
454
455 - atomMatches, if provided, is a sequence of sequences
456 - forceConstant is the force constant of the spring used to enforce
457 the constraints
458
459 returns a 2-tuple:
460 1) the energy of the initial conformation
461 2) the energy post-embedding
462 NOTE that these energies include the energies of the constraints
463
464
465
466 >>> m = Chem.MolFromSmiles('OCCN')
467 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
468 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
469 ... ]
470 >>> pcophore=Pharmacophore.Pharmacophore(feats)
471 >>> pcophore.setLowerBound(0,1, 2.5)
472 >>> pcophore.setUpperBound(0,1, 2.8)
473 >>> atomMatch = ((0,),(3,))
474 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
475 >>> len(embeds)
476 10
477 >>> testM = embeds[0]
478
479 Do the optimization:
480 >>> e1,e2 = OptimizeMol(testM,bm,atomMatches=atomMatch)
481
482 Optimizing should have lowered the energy:
483 >>> e2 < e1
484 True
485
486 Check the constrained distance:
487 >>> conf = testM.GetConformer(0)
488 >>> p0 = conf.GetAtomPosition(0)
489 >>> p3 = conf.GetAtomPosition(3)
490 >>> d03 = p0.Distance(p3)
491 >>> d03 >= pcophore.getLowerBound(0,1)-.01
492 True
493 >>> d03 <= pcophore.getUpperBound(0,1)+.01
494 True
495
496 If we optimize without the distance constraints (provided via the atomMatches
497 argument) we're not guaranteed to get the same results, particularly in a case
498 like the current one where the pharmcophore brings the atoms uncomfortably
499 close together:
500 >>> testM = embeds[1]
501 >>> e1,e2 = OptimizeMol(testM,bm)
502 >>> e2 < e1
503 True
504 >>> conf = testM.GetConformer(0)
505 >>> p0 = conf.GetAtomPosition(0)
506 >>> p3 = conf.GetAtomPosition(3)
507 >>> d03 = p0.Distance(p3)
508 >>> d03 >= pcophore.getLowerBound(0,1)-.01
509 True
510 >>> d03 <= pcophore.getUpperBound(0,1)+.01
511 False
512
513 """
514 try:
515 ff = ChemicalForceFields.UFFGetMoleculeForceField(mol)
516 except Exception:
517 logger.info('Problems building molecular forcefield',exc_info=True)
518 return -1.0,-1.0
519
520 weights=[]
521 if(atomMatches):
522 for k in range(len(atomMatches)):
523 for i in atomMatches[k]:
524 for l in range(k+1,len(atomMatches)):
525 for j in atomMatches[l]:
526 weights.append((i,j))
527 for i,j in weights:
528 if j<i:
529 i,j = j,i
530 minV = bm[j,i]
531 maxV = bm[i,j]
532 ff.AddDistanceConstraint(i,j,minV,maxV,forceConstant)
533 if excludedVolumes:
534 nAts = mol.GetNumAtoms()
535 conf = mol.GetConformer()
536 idx = nAts
537 for exVol in excludedVolumes:
538 assert exVol.pos is not None
539 logger.debug('ff.AddExtraPoint(%.4f,%.4f,%.4f)'%(exVol.pos[0],exVol.pos[1],
540 exVol.pos[2]))
541 ff.AddExtraPoint(exVol.pos[0],exVol.pos[1],exVol.pos[2],True)
542 indices = []
543 for localIndices,foo,bar in exVol.featInfo:
544 indices += list(localIndices)
545 for i in range(nAts):
546 v = numpy.array(conf.GetAtomPosition(i))-numpy.array(exVol.pos)
547 d = numpy.sqrt(numpy.dot(v,v))
548 if i not in indices:
549 if d<5.0:
550 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%d,%.0f)'%(i,idx,exVol.exclusionDist,1000,forceConstant))
551 ff.AddDistanceConstraint(i,idx,exVol.exclusionDist,1000,
552 forceConstant)
553
554 else:
555 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%.3f,%.0f)'%(i,idx,bm[exVol.index,i],bm[i,exVol.index],forceConstant))
556 ff.AddDistanceConstraint(i,idx,bm[exVol.index,i],bm[i,exVol.index],
557 forceConstant)
558 idx += 1
559 ff.Initialize()
560 e1 = ff.CalcEnergy()
561 if isNaN(e1):
562 raise ValueError('bogus energy')
563
564 if verbose:
565 print(Chem.MolToMolBlock(mol))
566 for i,vol in enumerate(excludedVolumes):
567 pos = ff.GetExtraPointPos(i)
568 print(' % 7.4f % 7.4f % 7.4f As 0 0 0 0 0 0 0 0 0 0 0 0'%tuple(pos), file=sys.stderr)
569 needsMore=ff.Minimize()
570 nPasses=0
571 while needsMore and nPasses<maxPasses:
572 needsMore=ff.Minimize()
573 nPasses+=1
574 e2 = ff.CalcEnergy()
575 if isNaN(e2):
576 raise ValueError('bogus energy')
577
578 if verbose:
579 print('--------')
580 print(Chem.MolToMolBlock(mol))
581 for i,vol in enumerate(excludedVolumes):
582 pos = ff.GetExtraPointPos(i)
583 print(' % 7.4f % 7.4f % 7.4f Sb 0 0 0 0 0 0 0 0 0 0 0 0'%tuple(pos), file=sys.stderr)
584 ff = None
585 return e1,e2
586
587 -def EmbedOne(mol,name,match,pcophore,count=1,silent=0,**kwargs):
588 """ generates statistics for a molecule's embeddings
589
590 Four energies are computed for each embedding:
591 1) E1: the energy (with constraints) of the initial embedding
592 2) E2: the energy (with constraints) of the optimized embedding
593 3) E3: the energy (no constraints) the geometry for E2
594 4) E4: the energy (no constraints) of the optimized free-molecule
595 (starting from the E3 geometry)
596
597 Returns a 9-tuple:
598 1) the mean value of E1
599 2) the sample standard deviation of E1
600 3) the mean value of E2
601 4) the sample standard deviation of E2
602 5) the mean value of E3
603 6) the sample standard deviation of E3
604 7) the mean value of E4
605 8) the sample standard deviation of E4
606 9) The number of embeddings that failed
607
608 """
609 global _times
610 atomMatch = [list(x.GetAtomIds()) for x in match]
611 bm,ms,nFailed = EmbedPharmacophore(mol,atomMatch,pcophore,count=count,
612 silent=silent,**kwargs)
613 e1s = []
614 e2s = []
615 e3s = []
616 e4s = []
617 d12s = []
618 d23s = []
619 d34s = []
620 for m in ms:
621 t1 = time.time()
622 try:
623 e1,e2 = OptimizeMol(m,bm,atomMatch)
624 except ValueError:
625 pass
626 else:
627 t2 = time.time()
628 _times['opt1'] = _times.get('opt1',0)+t2-t1
629
630 e1s.append(e1)
631 e2s.append(e2)
632
633 d12s.append(e1-e2)
634 t1 = time.time()
635 try:
636 e3,e4 = OptimizeMol(m,bm)
637 except ValueError:
638 pass
639 else:
640 t2 = time.time()
641 _times['opt2'] = _times.get('opt2',0)+t2-t1
642 e3s.append(e3)
643 e4s.append(e4)
644 d23s.append(e2-e3)
645 d34s.append(e3-e4)
646 count += 1
647 try:
648 e1,e1d = Stats.MeanAndDev(e1s)
649 except Exception:
650 e1 = -1.0
651 e1d=-1.0
652 try:
653 e2,e2d = Stats.MeanAndDev(e2s)
654 except Exception:
655 e2 = -1.0
656 e2d=-1.0
657 try:
658 e3,e3d = Stats.MeanAndDev(e3s)
659 except Exception:
660 e3 = -1.0
661 e3d=-1.0
662
663 try:
664 e4,e4d = Stats.MeanAndDev(e4s)
665 except Exception:
666 e4 = -1.0
667 e4d=-1.0
668 if not silent:
669 print('%s(%d): %.2f(%.2f) -> %.2f(%.2f) : %.2f(%.2f) -> %.2f(%.2f)' %
670 (name,nFailed,e1,e1d,e2,e2d,e3,e3d,e4,e4d))
671 return e1,e1d,e2,e2d,e3,e3d,e4,e4d,nFailed
672
674 """ generates a list of all possible mappings of a pharmacophore to a molecule
675
676 Returns a 2-tuple:
677 1) a boolean indicating whether or not all features were found
678 2) a list, numFeatures long, of sequences of features
679
680
681 >>> import os.path
682 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
683 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(dataDir,'BaseFeatures.fdef'))
684 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
685 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
686 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats)
687 >>> m = Chem.MolFromSmiles('FCCN')
688 >>> match,mList = MatchPharmacophoreToMol(m,featFactory,pcophore)
689 >>> match
690 True
691
692 Two feature types:
693 >>> len(mList)
694 2
695
696 The first feature type, Acceptor, has two matches:
697 >>> len(mList[0])
698 2
699 >>> mList[0][0].GetAtomIds()
700 (0,)
701 >>> mList[0][1].GetAtomIds()
702 (3,)
703
704 The first feature type, Donor, has a single match:
705 >>> len(mList[1])
706 1
707 >>> mList[1][0].GetAtomIds()
708 (3,)
709
710 """
711 return MatchFeatsToMol(mol, featFactory, pcophore.getFeatures())
712
714 """ **INTERNAL USE ONLY**
715
716 >>> import os.path
717 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
718 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(dataDir,'BaseFeatures.fdef'))
719 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
720 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
721 >>> m = Chem.MolFromSmiles('FCCN')
722 >>> d =_getFeatDict(m,featFactory,activeFeats)
723 >>> sorted(list(d.keys()))
724 ['Acceptor', 'Donor']
725 >>> donors = d['Donor']
726 >>> len(donors)
727 1
728 >>> donors[0].GetAtomIds()
729 (3,)
730 >>> acceptors = d['Acceptor']
731 >>> len(acceptors)
732 2
733 >>> acceptors[0].GetAtomIds()
734 (0,)
735 >>> acceptors[1].GetAtomIds()
736 (3,)
737
738 """
739 molFeats = {}
740 for feat in features:
741 family = feat.GetFamily()
742 if not family in molFeats:
743 matches = featFactory.GetFeaturesForMol(mol,includeOnly=family)
744 molFeats[family] = matches
745 return molFeats
746
748 """ generates a list of all possible mappings of each feature to a molecule
749
750 Returns a 2-tuple:
751 1) a boolean indicating whether or not all features were found
752 2) a list, numFeatures long, of sequences of features
753
754
755 >>> import os.path
756 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
757 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(dataDir,'BaseFeatures.fdef'))
758 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
759 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
760 >>> m = Chem.MolFromSmiles('FCCN')
761 >>> match,mList = MatchFeatsToMol(m,featFactory,activeFeats)
762 >>> match
763 True
764
765 Two feature types:
766 >>> len(mList)
767 2
768
769 The first feature type, Acceptor, has two matches:
770 >>> len(mList[0])
771 2
772 >>> mList[0][0].GetAtomIds()
773 (0,)
774 >>> mList[0][1].GetAtomIds()
775 (3,)
776
777 The first feature type, Donor, has a single match:
778 >>> len(mList[1])
779 1
780 >>> mList[1][0].GetAtomIds()
781 (3,)
782
783 """
784 molFeats = _getFeatDict(mol,featFactory,features)
785 res = []
786 for feat in features:
787 matches = molFeats.get(feat.GetFamily(),[])
788 if len(matches) == 0 :
789 return False, None
790 res.append(matches)
791 return True, res
792
794 """ This generator takes a sequence of sequences as an argument and
795 provides all combinations of the elements of the subsequences:
796
797 >>> gen = CombiEnum(((1,2),(10,20)))
798 >>> next(gen)
799 [1, 10]
800 >>> next(gen)
801 [1, 20]
802
803 >>> [x for x in CombiEnum(((1,2),(10,20)))]
804 [[1, 10], [1, 20], [2, 10], [2, 20]]
805
806 >>> [x for x in CombiEnum(((1,2),(10,20),(100,200)))]
807 [[1, 10, 100], [1, 10, 200], [1, 20, 100], [1, 20, 200], [2, 10, 100], [2, 10, 200], [2, 20, 100], [2, 20, 200]]
808
809 """
810 if not len(sequence):
811 yield []
812 elif len(sequence)==1:
813 for entry in sequence[0]:
814 yield [entry]
815 else:
816 for entry in sequence[0]:
817 for subVal in CombiEnum(sequence[1:]):
818 yield [entry]+subVal
819
821 """ removes rows from a bounds matrix that are
822 that are greater than a threshold value away from a set of
823 other points
824
825 returns the modfied bounds matrix
826
827 The goal of this function is to remove rows from the bounds matrix
828 that correspond to atoms that are likely to be quite far from
829 the pharmacophore we're interested in. Because the bounds smoothing
830 we eventually have to do is N^3, this can be a big win
831
832 >>> boundsMat = numpy.array([[0.0,3.0,4.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
833 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),3.5)
834 >>> bm.shape == (2, 2)
835 True
836
837 we don't touch the input matrix:
838 >>> boundsMat.shape == (3, 3)
839 True
840
841 >>> print(', '.join(['%.3f'%x for x in bm[0]]))
842 0.000, 3.000
843 >>> print(', '.join(['%.3f'%x for x in bm[1]]))
844 2.000, 0.000
845
846 if the threshold is high enough, we don't do anything:
847 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
848 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),5.0)
849 >>> bm.shape == (3, 3)
850 True
851
852 If there's a max value that's close enough to *any* of the indices
853 we pass in, we'll keep it:
854 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
855 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,1),3.5)
856 >>> bm.shape == (3, 3)
857 True
858
859 """
860 nPts = bm.shape[0]
861 k = numpy.zeros(nPts,numpy.int0)
862 for idx in indices: k[idx]=1
863 for i in indices:
864 row = bm[i]
865 for j in range(i+1,nPts):
866 if not k[j] and row[j]<maxThresh:
867 k[j]=1
868 keep = numpy.nonzero(k)[0]
869 bm2 = numpy.zeros((len(keep),len(keep)),numpy.float)
870 for i,idx in enumerate(keep):
871 row = bm[idx]
872 bm2[i] = numpy.take(row,keep)
873 return bm2
874
876 """
877
878 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
879 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
880 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)),
881 ... ]
882 >>> pcophore=Pharmacophore.Pharmacophore(feats)
883 >>> pcophore.setLowerBound(0,1, 1.1)
884 >>> pcophore.setUpperBound(0,1, 1.9)
885 >>> pcophore.setLowerBound(0,2, 2.1)
886 >>> pcophore.setUpperBound(0,2, 2.9)
887 >>> pcophore.setLowerBound(1,2, 2.1)
888 >>> pcophore.setUpperBound(1,2, 3.9)
889
890 >>> bounds = numpy.array([[0,2,3],[1,0,4],[2,3,0]],numpy.float)
891 >>> CoarseScreenPharmacophore(((0,),(1,)),bounds,pcophore)
892 True
893
894 >>> CoarseScreenPharmacophore(((0,),(2,)),bounds,pcophore)
895 False
896
897 >>> CoarseScreenPharmacophore(((1,),(2,)),bounds,pcophore)
898 False
899
900 >>> CoarseScreenPharmacophore(((0,),(1,),(2,)),bounds,pcophore)
901 True
902
903 >>> CoarseScreenPharmacophore(((1,),(0,),(2,)),bounds,pcophore)
904 False
905
906 >>> CoarseScreenPharmacophore(((2,),(1,),(0,)),bounds,pcophore)
907 False
908
909 # we ignore the point locations here and just use their definitions:
910 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
911 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
912 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)),
913 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
914 ... ]
915 >>> pcophore=Pharmacophore.Pharmacophore(feats)
916 >>> pcophore.setLowerBound(0,1, 2.1)
917 >>> pcophore.setUpperBound(0,1, 2.9)
918 >>> pcophore.setLowerBound(0,2, 2.1)
919 >>> pcophore.setUpperBound(0,2, 2.9)
920 >>> pcophore.setLowerBound(0,3, 2.1)
921 >>> pcophore.setUpperBound(0,3, 2.9)
922 >>> pcophore.setLowerBound(1,2, 1.1)
923 >>> pcophore.setUpperBound(1,2, 1.9)
924 >>> pcophore.setLowerBound(1,3, 1.1)
925 >>> pcophore.setUpperBound(1,3, 1.9)
926 >>> pcophore.setLowerBound(2,3, 1.1)
927 >>> pcophore.setUpperBound(2,3, 1.9)
928 >>> bounds = numpy.array([[0,3,3,3],[2,0,2,2],[2,1,0,2],[2,1,1,0]],numpy.float)
929
930 >>> CoarseScreenPharmacophore(((0,),(1,),(2,),(3,)),bounds,pcophore)
931 True
932
933 >>> CoarseScreenPharmacophore(((0,),(1,),(3,),(2,)),bounds,pcophore)
934 True
935
936 >>> CoarseScreenPharmacophore(((1,),(0,),(3,),(2,)),bounds,pcophore)
937 False
938
939 """
940 for k in range(len(atomMatch)):
941 if len(atomMatch[k])==1:
942 for l in range(k+1,len(atomMatch)):
943 if len(atomMatch[l])==1:
944 idx0 = atomMatch[k][0]
945 idx1 = atomMatch[l][0]
946 if idx1<idx0:
947 idx0,idx1=idx1,idx0
948 if bounds[idx1,idx0] >= pcophore.getUpperBound(k, l) or \
949 bounds[idx0,idx1] <= pcophore.getLowerBound(k, l) :
950 if verbose:
951 print('\t (%d,%d) [%d,%d] fail'%(idx1,idx0,k,l))
952 print('\t %f,%f - %f,%f' %
953 (bounds[idx1,idx0],pcophore.getUpperBound(k,l),
954 bounds[idx0,idx1],pcophore.getLowerBound(k,l)))
955
956
957
958
959 return False
960 return True
961
963 """ checks to see if a particular mapping of features onto
964 a molecule satisfies a pharmacophore's 2D restrictions
965
966
967 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
968 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
969 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats)
970 >>> pcophore.setUpperBound2D(0,1,3)
971 >>> m = Chem.MolFromSmiles('FCC(N)CN')
972 >>> Check2DBounds(((0,),(3,)),m,pcophore)
973 True
974 >>> Check2DBounds(((0,),(5,)),m,pcophore)
975 False
976
977 """
978 dm = Chem.GetDistanceMatrix(mol,False,False,False)
979 nFeats = len(atomMatch)
980 for i in range(nFeats):
981 for j in range(i+1,nFeats):
982 lowerB = pcophore._boundsMat2D[j,i]
983 upperB = pcophore._boundsMat2D[i,j]
984 dij=10000
985 for atomI in atomMatch[i]:
986 for atomJ in atomMatch[j]:
987 try:
988 dij = min(dij,dm[atomI,atomJ])
989 except IndexError:
990 print('bad indices:',atomI,atomJ)
991 print(' shape:',dm.shape)
992 print(' match:',atomMatch)
993 print(' mol:')
994 print(Chem.MolToMolBlock(mol))
995 raise IndexError
996 if dij<lowerB or dij>upperB:
997 return False
998 return True
999
1000 -def _checkMatch(match,mol,bounds,pcophore,use2DLimits):
1001 """ **INTERNAL USE ONLY**
1002
1003 checks whether a particular atom match can be satisfied by
1004 a molecule
1005
1006 """
1007 atomMatch = ChemicalFeatures.GetAtomMatch(match)
1008 if not atomMatch:
1009 return None
1010 elif use2DLimits:
1011 if not Check2DBounds(atomMatch,mol,pcophore):
1012 return None
1013 if not CoarseScreenPharmacophore(atomMatch,bounds,pcophore):
1014 return None
1015 return atomMatch
1016
1017 -def ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=False,
1018 index=0,soFar=[]):
1019 """ Enumerates the list of atom mappings a molecule
1020 has to a particular pharmacophore.
1021 We do check distance bounds here.
1022
1023
1024 """
1025 nMatches = len(matches)
1026 if index>=nMatches:
1027 yield soFar,[]
1028 elif index==nMatches-1:
1029 for entry in matches[index]:
1030 nextStep = soFar+[entry]
1031 if index != 0:
1032 atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits)
1033 else:
1034 atomMatch = ChemicalFeatures.GetAtomMatch(nextStep)
1035 if atomMatch:
1036 yield soFar+[entry],atomMatch
1037 else:
1038 for entry in matches[index]:
1039 nextStep = soFar+[entry]
1040 if index != 0:
1041 atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits)
1042 if not atomMatch:
1043 continue
1044 for val in ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=use2DLimits,
1045 index=index+1,soFar=nextStep):
1046 if val:
1047 yield val
1048
1049 -def MatchPharmacophore(matches,bounds,pcophore,useDownsampling=False,
1050 use2DLimits=False,mol=None,excludedVolumes=None,
1051 useDirs=False):
1052 """
1053
1054 if use2DLimits is set, the molecule must also be provided and topological
1055 distances will also be used to filter out matches
1056
1057 """
1058 for match,atomMatch in ConstrainedEnum(matches,mol,pcophore,bounds,
1059 use2DLimits=use2DLimits):
1060 bm = bounds.copy()
1061 bm = UpdatePharmacophoreBounds(bm,atomMatch,pcophore,useDirs=useDirs,mol=mol);
1062
1063 if excludedVolumes:
1064 localEvs = []
1065 for eV in excludedVolumes:
1066 featInfo = []
1067 for i,entry in enumerate(atomMatch):
1068 info = list(eV.featInfo[i])
1069 info[0] = entry
1070 featInfo.append(info)
1071 localEvs.append(ExcludedVolume.ExcludedVolume(featInfo,eV.index,
1072 eV.exclusionDist))
1073 bm = AddExcludedVolumes(bm,localEvs,smoothIt=False)
1074
1075 sz = bm.shape[0]
1076 if useDownsampling:
1077 indices = []
1078 for entry in atomMatch:
1079 indices.extend(entry)
1080 if excludedVolumes:
1081 for vol in localEvs:
1082 indices.append(vol.index)
1083 bm = DownsampleBoundsMatrix(bm,indices)
1084 if DG.DoTriangleSmoothing(bm):
1085 return 0,bm,match,(sz,bm.shape[0])
1086
1087 return 1,None,None,None
1088
1089 -def GetAllPharmacophoreMatches(matches,bounds,pcophore,useDownsampling=0,
1090 progressCallback=None,
1091 use2DLimits=False,mol=None,
1092 verbose=False):
1093 res = []
1094 nDone = 0
1095 for match in CombiEnum(matches):
1096 atomMatch = ChemicalFeatures.GetAtomMatch(match)
1097 if atomMatch and use2DLimits and mol:
1098 pass2D = Check2DBounds(atomMatch,mol,pcophore)
1099 if verbose:
1100 print('..',atomMatch)
1101 print(' ..Pass2d:',pass2D)
1102 else:
1103 pass2D = True
1104 if atomMatch and pass2D and \
1105 CoarseScreenPharmacophore(atomMatch,bounds,pcophore,verbose=verbose):
1106 if verbose:
1107 print(' ..CoarseScreen: Pass')
1108
1109 bm = bounds.copy()
1110 if verbose:
1111 print('pre update:')
1112 for row in bm:
1113 print(' ',' '.join(['% 4.2f'%x for x in row]))
1114 bm = UpdatePharmacophoreBounds(bm,atomMatch,pcophore);
1115 sz = bm.shape[0]
1116 if verbose:
1117 print('pre downsample:')
1118 for row in bm:
1119 print(' ',' '.join(['% 4.2f'%x for x in row]))
1120
1121 if useDownsampling:
1122 indices = []
1123 for entry in atomMatch:
1124 indices += list(entry)
1125 bm = DownsampleBoundsMatrix(bm,indices)
1126 if verbose:
1127 print('post downsample:')
1128 for row in bm:
1129 print(' ',' '.join(['% 4.2f'%x for x in row]))
1130
1131 if DG.DoTriangleSmoothing(bm):
1132 res.append(match)
1133 elif verbose:
1134 print('cannot smooth')
1135 nDone+=1
1136 if progressCallback:
1137 progressCallback(nDone)
1138 return res
1139
1140
1142 """ Computes the chiral volume of an atom
1143
1144 We're using the chiral volume formula from Figure 7 of
1145 Blaney and Dixon, Rev. Comp. Chem. V, 299-335 (1994)
1146
1147 >>> import os.path
1148 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
1149
1150 R configuration atoms give negative volumes:
1151 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r.mol'))
1152 >>> Chem.AssignStereochemistry(mol)
1153 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1154 'R'
1155 >>> ComputeChiralVolume(mol,1) < 0
1156 True
1157
1158 S configuration atoms give positive volumes:
1159 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s.mol'))
1160 >>> Chem.AssignStereochemistry(mol)
1161 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1162 'S'
1163 >>> ComputeChiralVolume(mol,1) > 0
1164 True
1165
1166 Non-chiral (or non-specified) atoms give zero volume:
1167 >>> ComputeChiralVolume(mol,0) == 0.0
1168 True
1169
1170 We also work on 3-coordinate atoms (with implicit Hs):
1171 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r-3.mol'))
1172 >>> Chem.AssignStereochemistry(mol)
1173 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1174 'R'
1175 >>> ComputeChiralVolume(mol,1)<0
1176 True
1177
1178 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s-3.mol'))
1179 >>> Chem.AssignStereochemistry(mol)
1180 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1181 'S'
1182 >>> ComputeChiralVolume(mol,1)>0
1183 True
1184
1185
1186
1187 """
1188 conf = mol.GetConformer(confId)
1189 Chem.AssignStereochemistry(mol)
1190 center = mol.GetAtomWithIdx(centerIdx)
1191 if not center.HasProp('_CIPCode'):
1192 return 0.0
1193
1194 nbrs = center.GetNeighbors()
1195 nbrRanks = []
1196 for nbr in nbrs:
1197 rank = int(nbr.GetProp('_CIPRank'))
1198 pos = conf.GetAtomPosition(nbr.GetIdx())
1199 nbrRanks.append((rank,pos))
1200
1201
1202
1203 if len(nbrRanks)==3:
1204 nbrRanks.append((-1,conf.GetAtomPosition(centerIdx)))
1205 nbrRanks.sort()
1206
1207 ps = [x[1] for x in nbrRanks]
1208 v1 = ps[0]-ps[3]
1209 v2 = ps[1]-ps[3]
1210 v3 = ps[2]-ps[3]
1211
1212 res = v1.DotProduct(v2.CrossProduct(v3))
1213 return res
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1225 import doctest,sys
1226 return doctest.testmod(sys.modules["__main__"])
1227
1228 if __name__ == '__main__':
1229 import sys
1230 failed,tried = _test()
1231 sys.exit(failed)
1232