1
2
3
4
5
6
7
8 """ Torsion Fingerprints (Deviation) (TFD)
9 According to a paper from Schulz-Gasch et al., JCIM, 52, 1499-1512 (2012).
10
11 """
12 from rdkit import rdBase
13 from rdkit import RDConfig
14 from rdkit import Geometry
15 from rdkit import Chem
16 from rdkit.Chem import rdchem
17 from rdkit.Chem import rdMolDescriptors
18 import math, os
19
21 """ Helper function to check if all atoms in the list are the same
22
23 Arguments:
24 - inv: atom invariants (used to define equivalence of atoms)
25 - atoms: list of atoms to check
26
27 Return: boolean
28 """
29 match = True
30 for i in range(len(atoms)-1):
31 for j in range(i+1, len(atoms)):
32 if (inv[atoms[i].GetIdx()] != inv[atoms[j].GetIdx()]):
33 match = False
34 return match
35 return match
36
38 """ Helper function to check if all atoms in the list are NOT the same
39
40 Arguments:
41 - inv: atom invariants (used to define equivalence of atoms)
42 - atoms: list of atoms to check
43
44 Return: boolean
45 """
46 match = True
47 for i in range(len(atoms)-1):
48 for j in range(i+1, len(atoms)):
49 if (inv[atoms[i].GetIdx()] == inv[atoms[j].GetIdx()]):
50 match = False
51 return match
52 return match
53
55 """ Helper function to check if two atoms in the list are the same,
56 and one not
57 Note: Works only for three atoms
58
59 Arguments:
60 - inv: atom invariants (used to define equivalence of atoms)
61 - atoms: list of atoms to check
62
63 Return: atom that is different
64 """
65 if len(atoms) != 3:
66 raise ValueError("Number of atoms must be three")
67 a1 = atoms[0].GetIdx()
68 a2 = atoms[1].GetIdx()
69 a3 = atoms[2].GetIdx()
70 if (inv[a1] == inv[a2] and inv[a1] != inv[a3] and inv[a2] != inv[a3]):
71 return atoms[2]
72 elif (inv[a1] != inv[a2] and inv[a1] == inv[a3] and inv[a2] != inv[a3]):
73 return atoms[1]
74 elif (inv[a1] != inv[a2] and inv[a1] != inv[a3] and inv[a2] == inv[a3]):
75 return atoms[0]
76 return None
77
79 """ Helper function to calculate the atom invariants for each atom
80 with a given radius
81
82 Arguments:
83 - mol: the molecule of interest
84 - radius: the radius for the Morgan fingerprint
85
86 Return: list of atom invariants
87 """
88 inv = []
89 for i in range(mol.GetNumAtoms()):
90 info = {}
91 fp = rdMolDescriptors.GetMorganFingerprint(mol, radius, fromAtoms=[i], bitInfo=info)
92 for k in info.keys():
93 if info[k][0][1] == radius:
94 inv.append(k)
95 return inv
96
98 """ Helper function to calculate the number of heavy atom neighbors.
99
100 Arguments:
101 - atom1: the atom of interest
102 - aid2: atom index that should be excluded from neighbors (default: none)
103
104 Return: a list of heavy atom neighbors of the given atom
105 """
106 if aid2 < 0:
107 return [n for n in atom1.GetNeighbors() if n.GetSymbol()!='H']
108 else:
109 return [n for n in atom1.GetNeighbors() if (n.GetSymbol()!='H' and n.GetIdx()!=aid2)]
110
112 """ Helper function to calculate the index of the reference atom for
113 a given atom
114
115 Arguments:
116 - neighbors: list of the neighbors of the atom
117 - inv: atom invariants
118
119 Return: list of atom indices as reference for torsion
120 """
121 if len(neighbors) == 1:
122 return [neighbors[0]]
123 elif _doMatch(inv, neighbors):
124 return neighbors
125 elif _doNotMatch(inv, neighbors):
126
127 neighbors = sorted(neighbors, key = lambda x: inv[x.GetIdx()])
128 return [neighbors[0]]
129 at = _doMatchExcept1(inv, neighbors)
130 if at is None:
131 raise ValueError("Atom neighbors are either all the same or all different")
132 return [at]
133
135 """ Determine the bonds (or pair of atoms treated like a bond) for which
136 torsions should be calculated.
137
138 Arguments:
139 - refmol: the molecule of interest
140 - ignoreColinearBonds: if True (default), single bonds adjacent to
141 triple bonds are ignored
142 if False, alternative not-covalently bound
143 atoms are used to define the torsion
144 """
145
146
147 patts = [Chem.MolFromSmarts(x) for x in ['*#*', '[$([C](=*)=*)]']]
148 atomFlags = [0]*mol.GetNumAtoms()
149 for p in patts:
150 if mol.HasSubstructMatch(p):
151 matches = mol.GetSubstructMatches(p)
152 for match in matches:
153 for a in match:
154 atomFlags[a] = 1
155
156 bonds = []
157 doneBonds = [0]*mol.GetNumBonds()
158 for b in mol.GetBonds():
159 if b.IsInRing(): continue
160 a1 = b.GetBeginAtomIdx()
161 a2 = b.GetEndAtomIdx()
162 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom(), a2)
163 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom(), a1)
164 if not doneBonds[b.GetIdx()] and (nb1 and nb2):
165 doneBonds[b.GetIdx()] = 1;
166
167 if atomFlags[a1] or atomFlags[a2]:
168 if not ignoreColinearBonds:
169 while len(nb1)==1 and atomFlags[a1]:
170 a1old = a1
171 a1 = nb1[0].GetIdx()
172 b = mol.GetBondBetweenAtoms(a1old, a1)
173 if b.GetEndAtom().GetIdx() == a1old:
174 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom(), a1old)
175 else:
176 nb1 = _getHeavyAtomNeighbors(b.GetEndAtom(), a1old)
177 doneBonds[b.GetIdx()] = 1;
178 while len(nb2)==1 and atomFlags[a2]:
179 doneBonds[b.GetIdx()] = 1;
180 a2old = a2
181 a2 = nb2[0].GetIdx()
182 b = mol.GetBondBetweenAtoms(a2old, a2)
183 if b.GetBeginAtom().GetIdx() == a2old:
184 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom(), a2old)
185 else:
186 nb2 = _getHeavyAtomNeighbors(b.GetBeginAtom(), a2old)
187 doneBonds[b.GetIdx()] = 1;
188 if nb1 and nb2:
189 bonds.append((a1, a2, nb1, nb2))
190
191 else:
192 bonds.append((a1, a2, nb1, nb2))
193 return bonds
194
196 """ Calculate a list of torsions for a given molecule. For each torsion
197 the four atom indices are determined and stored in a set.
198
199 Arguments:
200 - mol: the molecule of interest
201 - maxDev: maximal deviation used for normalization
202 'equal': all torsions are normalized using 180.0 (default)
203 'spec': each torsion is normalized using its specific
204 maximal deviation as given in the paper
205 - symmRadius: radius used for calculating the atom invariants
206 (default: 2)
207 - ignoreColinearBonds: if True (default), single bonds adjacent to
208 triple bonds are ignored
209 if False, alternative not-covalently bound
210 atoms are used to define the torsion
211
212 Return: two lists of torsions: non-ring and ring torsions
213 """
214 if maxDev not in ['equal', 'spec']:
215 raise ValueError("maxDev must be either equal or spec")
216
217 bonds = _getBondsForTorsions(mol, ignoreColinearBonds)
218
219 if symmRadius > 0:
220 inv = _getAtomInvariantsWithRadius(mol, symmRadius)
221 else:
222 inv = rdMolDescriptors.GetConnectivityInvariants(mol)
223
224 tors_list = []
225 for a1, a2, nb1, nb2 in bonds:
226 d1 = _getIndexforTorsion(nb1, inv)
227 d2 = _getIndexforTorsion(nb2, inv)
228 if len(d1) == 1 and len(d2) == 1:
229 tors_list.append(([(d1[0].GetIdx(), a1, a2, d2[0].GetIdx())], 180.0))
230 elif len(d1) == 1:
231 if len(nb2) == 2:
232 tors_list.append(([(d1[0].GetIdx(), a1, a2, nb.GetIdx()) for nb in d2], 90.0))
233 else:
234 tors_list.append(([(d1[0].GetIdx(), a1, a2, nb.GetIdx()) for nb in d2], 60.0))
235 elif len(d2) == 1:
236 if len(nb1) == 2:
237 tors_list.append(([(nb.GetIdx(), a1, a2, d2[0].GetIdx()) for nb in d1], 90.0))
238 else:
239 tors_list.append(([(nb.GetIdx(), a1, a2, d2[0].GetIdx()) for nb in d1], 60.0))
240 else:
241 tmp = []
242 for n1 in d1:
243 for n2 in d2:
244 tmp.append((n1.GetIdx(), a1, a2, n2.GetIdx()))
245 if len(nb1) == 2 and len(nb2) == 2:
246 tors_list.append((tmp, 90.0))
247 elif len(nb1) == 3 and len(nb2) == 3:
248 tors_list.append((tmp, 60.0))
249 else:
250 tors_list.append((tmp, 30.0))
251
252 if maxDev == 'equal':
253 tors_list = [(t,180.0) for t,d in tors_list]
254
255 rings = Chem.GetSymmSSSR(mol)
256 tors_list_rings = []
257 for r in rings:
258
259 tmp = []
260 num = len(r)
261 maxdev = 180.0 * math.exp(-0.025*(num-14)*(num-14))
262 for i in range(len(r)):
263 tmp.append((r[i], r[(i+1)%num], r[(i+2)%num], r[(i+3)%num]))
264 tors_list_rings.append((tmp,maxdev))
265 return tors_list, tors_list_rings
266
268 """ Helper function to retrieve the coordinates of the four atoms
269 in a torsion
270
271 Arguments:
272 - atoms: list with the four atoms
273 - conf: conformation of the molecule
274
275 Return: Point3D objects of the four atoms
276 """
277 if len(atoms) != 4:
278 raise ValueError("List must contain exactly four atoms")
279 p1 = conf.GetAtomPosition(atoms[0])
280 p2 = conf.GetAtomPosition(atoms[1])
281 p3 = conf.GetAtomPosition(atoms[2])
282 p4 = conf.GetAtomPosition(atoms[3])
283 return p1, p2, p3, p4
284
286 """ Calculate the torsion angles for a list of non-ring and
287 a list of ring torsions.
288
289 Arguments:
290 - mol: the molecule of interest
291 - tors_list: list of non-ring torsions
292 - tors_list_rings: list of ring torsions
293 - confId: index of the conformation (default: first conformer)
294
295 Return: list of torsion angles
296 """
297 torsions = []
298 conf = mol.GetConformer(confId)
299 for quartets,maxdev in tors_list:
300 tors = []
301
302 for atoms in quartets:
303 p1, p2, p3, p4 = _getTorsionAtomPositions(atoms, conf)
304 tmpTors = (Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4)/math.pi)*180.0
305 if tmpTors < 0: tmpTors += 360.0
306 tors.append(tmpTors)
307 torsions.append((tors, maxdev))
308
309 for quartets,maxdev in tors_list_rings:
310 num = len(quartets)
311
312 tors = 0
313 for atoms in quartets:
314 p1, p2, p3, p4 = _getTorsionAtomPositions(atoms, conf)
315 tmpTors = abs((Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4)/math.pi)*180.0)
316 tors += tmpTors
317 tors /= num
318 torsions.append(([tors], maxdev))
319 return torsions
320
322 """ Helper function to identify the atoms of the most central bond.
323
324 Arguments:
325 - mol: the molecule of interest
326 - distmat: distance matrix of the molecule
327
328 Return: atom indices of the two most central atoms (in order)
329 """
330 from numpy import std
331
332 stds = []
333 for i in range(mol.GetNumAtoms()):
334
335 if len(_getHeavyAtomNeighbors(mol.GetAtomWithIdx(i))) < 2: continue
336 tmp = [d for d in distmat[i]]
337 tmp.pop(i)
338 stds.append((std(tmp), i))
339 stds.sort()
340 aid1 = stds[0][1]
341
342 i = 1
343 while 1:
344 if mol.GetBondBetweenAtoms(aid1, stds[i][1]) is None:
345 i += 1
346 else:
347 aid2 = stds[i][1]
348 break
349 return aid1, aid2
350
352 """ Helper function to calculate the beta for torsion weights
353 according to the formula in the paper.
354 w(dmax/2) = 0.1
355
356 Arguments:
357 - mol: the molecule of interest
358 - distmat: distance matrix of the molecule
359 - aid1: atom index of the most central atom
360
361 Return: value of beta (float)
362 """
363
364 bonds = []
365 for b in mol.GetBonds():
366 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom())
367 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom())
368 if len(nb2) > 1 and len(nb2) > 1:
369 bonds.append(b)
370
371 dmax = 0
372 for b in bonds:
373 bid1 = b.GetBeginAtom().GetIdx()
374 bid2 = b.GetEndAtom().GetIdx()
375 d = max([distmat[aid1][bid1], distmat[aid1][bid2]])
376 if (d > dmax): dmax = d
377 dmax2 = dmax/2.0
378 beta = -math.log(0.1)/(dmax2*dmax2)
379 return beta
380
382 """ Calculate the weights for the torsions in a molecule.
383 By default, the highest weight is given to the bond
384 connecting the two most central atoms.
385 If desired, two alternate atoms can be specified (must
386 be connected by a bond).
387
388 Arguments:
389 - mol: the molecule of interest
390 - aid1: index of the first atom (default: most central)
391 - aid2: index of the second atom (default: second most central)
392 - ignoreColinearBonds: if True (default), single bonds adjacent to
393 triple bonds are ignored
394 if False, alternative not-covalently bound
395 atoms are used to define the torsion
396
397 Return: list of torsion weights (both non-ring and ring)
398 """
399
400 distmat = Chem.GetDistanceMatrix(mol)
401 if aid1 < 0 and aid2 < 0:
402 aid1, aid2 = _findCentralBond(mol, distmat)
403 else:
404 b = mol.GetBondBetweenAtoms(aid1, aid2)
405 if b is None:
406 raise ValueError("Specified atoms must be connected by a bond.")
407
408 beta = _calculateBeta(mol, distmat, aid1)
409
410 bonds = _getBondsForTorsions(mol, ignoreColinearBonds)
411
412 weights = []
413 for bid1, bid2, nb1, nb2 in bonds:
414 if ((bid1, bid2) == (aid1, aid2)
415 or (bid2, bid1) == (aid1, aid2)):
416 d = 0
417 else:
418
419 d = min(distmat[aid1][bid1], distmat[aid1][bid2], distmat[aid2][bid1], distmat[aid2][bid2])+1
420 w = math.exp(-beta*(d*d))
421 weights.append(w)
422
423
424 rings = mol.GetRingInfo()
425 for r in rings.BondRings():
426
427 tmp = []
428 num = len(r)
429 for bidx in r:
430 b = mol.GetBondWithIdx(bidx)
431 bid1 = b.GetBeginAtomIdx()
432 bid2 = b.GetEndAtomIdx()
433
434 d = min(distmat[aid1][bid1], distmat[aid1][bid2], distmat[aid2][bid1], distmat[aid2][bid2])+1
435 tmp.append(d)
436
437
438
439
440 w = sum(tmp)/float(num)
441 w = math.exp(-beta*(w*w))
442 weights.append(w*(num/2.0))
443 return weights
444
446 """ Calculate the torsion deviation fingerprint (TFD) given two lists of
447 torsion angles.
448
449 Arguments:
450 - torsions1: torsion angles of conformation 1
451 - torsions2: torsion angles of conformation 2
452 - weights: list of torsion weights (default: None)
453
454 Return: TFD value (float)
455 """
456 if len(torsions1) != len(torsions2):
457 raise ValueError("List of torsions angles must have the same size.")
458
459 deviations = []
460 for tors1, tors2 in zip(torsions1, torsions2):
461 mindiff = 180.0
462 for t1 in tors1[0]:
463 for t2 in tors2[0]:
464 diff = abs(t1-t2)
465 if (360.0-diff) < diff:
466 diff = 360.0 - diff
467
468 if diff < mindiff:
469 mindiff = diff
470 deviations.append(mindiff/tors1[1])
471
472 if weights is not None:
473 if len(weights) != len(torsions1):
474 raise ValueError("List of torsions angles and weights must have the same size.")
475 deviations = [d*w for d,w in zip(deviations, weights)]
476 sum_weights = sum(weights)
477 else:
478 sum_weights = len(deviations)
479 tfd = sum(deviations)
480 if sum_weights != 0:
481 tfd /= sum_weights
482 return tfd
483
485 """ Generate a new molecule with the atom order of mol1 and coordinates
486 from mol2.
487
488 Arguments:
489 - mol1: first instance of the molecule of interest
490 - mol2: second instance the molecule of interest
491
492 Return: RDKit molecule
493 """
494 match = mol2.GetSubstructMatch(mol1)
495 atomNums = tuple(range(mol1.GetNumAtoms()))
496 if match != atomNums:
497
498 mol3 = Chem.Mol(mol1)
499 mol3.RemoveAllConformers()
500 for conf2 in mol2.GetConformers():
501 confId = conf2.GetId()
502 conf = rdchem.Conformer(mol1.GetNumAtoms())
503 conf.SetId(confId)
504 for i in range(mol1.GetNumAtoms()):
505 conf.SetAtomPosition(i, mol2.GetConformer(confId).GetAtomPosition(match[i]))
506 cid = mol3.AddConformer(conf)
507 return mol3
508 else:
509 return Chem.Mol(mol2)
510
511
548
549 -def GetTFDBetweenMolecules(mol1, mol2, confId1=-1, confId2=-1, useWeights=True, maxDev='equal', symmRadius=2, ignoreColinearBonds=True):
550 """ Wrapper to calculate the TFD between two molecules.
551 Important: The two molecules must be instances of the same molecule
552
553 Arguments:
554 - mol1: first instance of the molecule of interest
555 - mol2: second instance the molecule of interest
556 - confId1: conformer index for mol1 (default: first conformer)
557 - confId2: conformer index for mol2 (default: first conformer)
558 - useWeights: flag for using torsion weights in the TFD calculation
559 - maxDev: maximal deviation used for normalization
560 'equal': all torsions are normalized using 180.0 (default)
561 'spec': each torsion is normalized using its specific
562 maximal deviation as given in the paper
563 - symmRadius: radius used for calculating the atom invariants
564 (default: 2)
565 - ignoreColinearBonds: if True (default), single bonds adjacent to
566 triple bonds are ignored
567 if False, alternative not-covalently bound
568 atoms are used to define the torsion
569
570 Return: TFD value
571 """
572 if (Chem.MolToSmiles(mol1) != Chem.MolToSmiles(mol2)):
573 raise ValueError("The two molecules must be instances of the same molecule!")
574 mol2 = _getSameAtomOrder(mol1, mol2)
575 tl, tlr = CalculateTorsionLists(mol1, maxDev=maxDev, symmRadius=symmRadius, ignoreColinearBonds=ignoreColinearBonds)
576
577 torsion1 = CalculateTorsionAngles(mol1, tl, tlr, confId=confId1)
578
579 torsion2 = CalculateTorsionAngles(mol2, tl, tlr, confId=confId2)
580 if useWeights:
581 weights = CalculateTorsionWeights(mol1, ignoreColinearBonds=ignoreColinearBonds)
582 tfd = CalculateTFD(torsion1, torsion2, weights=weights)
583 else:
584 tfd = CalculateTFD(torsion1, torsion2)
585 return tfd
586
587 -def GetTFDMatrix(mol, useWeights=True, maxDev='equal', symmRadius=2, ignoreColinearBonds=True):
588 """ Wrapper to calculate the matrix of TFD values for the
589 conformers of a molecule.
590
591 Arguments:
592 - mol: the molecule of interest
593 - useWeights: flag for using torsion weights in the TFD calculation
594 - maxDev: maximal deviation used for normalization
595 'equal': all torsions are normalized using 180.0 (default)
596 'spec': each torsion is normalized using its specific
597 maximal deviation as given in the paper
598 - symmRadius: radius used for calculating the atom invariants
599 (default: 2)
600 - ignoreColinearBonds: if True (default), single bonds adjacent to
601 triple bonds are ignored
602 if False, alternative not-covalently bound
603 atoms are used to define the torsion
604
605 Return: matrix of TFD values
606 Note that the returned matrix is symmetrical, i.e. it is the
607 lower half of the matrix, e.g. for 5 conformers:
608 matrix = [ a,
609 b, c,
610 d, e, f,
611 g, h, i, j]
612 """
613 tl, tlr = CalculateTorsionLists(mol, maxDev=maxDev, symmRadius=symmRadius, ignoreColinearBonds=ignoreColinearBonds)
614 numconf = mol.GetNumConformers()
615 torsions = [CalculateTorsionAngles(mol, tl, tlr, confId=conf.GetId()) for conf in mol.GetConformers()]
616 tfdmat = []
617 if useWeights:
618 weights = CalculateTorsionWeights(mol, ignoreColinearBonds=ignoreColinearBonds)
619 for i in range(0, numconf):
620 for j in range(0, i):
621 tfdmat.append(CalculateTFD(torsions[i], torsions[j], weights=weights))
622 else:
623 for i in range(0, numconf):
624 for j in range(0, i):
625 tfdmat.append(CalculateTFD(torsions[i], torsions[j]))
626 return tfdmat
627