1
2
3
4
5
6
7
8
9
10
11 """ Import all RDKit chemistry modules
12
13 """
14 from rdkit import rdBase
15 from rdkit import RDConfig
16 from rdkit import DataStructs
17 from rdkit.Geometry import rdGeometry
18 from rdkit.Chem import *
19 from rdkit.Chem.rdPartialCharges import *
20 from rdkit.Chem.rdDepictor import *
21 from rdkit.Chem.rdForceFieldHelpers import *
22 from rdkit.Chem.ChemicalFeatures import *
23 from rdkit.Chem.rdDistGeom import *
24 from rdkit.Chem.rdMolAlign import *
25 from rdkit.Chem.rdMolTransforms import *
26 from rdkit.Chem.rdShapeHelpers import *
27 from rdkit.Chem.rdChemReactions import *
28 from rdkit.Chem.rdReducedGraphs import *
29 try:
30 from rdkit.Chem.rdSLNParse import *
31 except ImportError:
32 pass
33 from rdkit.Chem.rdMolDescriptors import *
34 from rdkit.Chem.rdqueries import *
35 from rdkit import ForceField
36 Mol.Compute2DCoords = Compute2DCoords
37 Mol.ComputeGasteigerCharges = ComputeGasteigerCharges
38 import numpy, os
39 from rdkit.RDLogger import logger
40 logger = logger()
41 import warnings
56
58 """ returns a grid representation of the molecule's shape
59 """
60 res = rdGeometry.UniformGrid3D(boxDim[0],boxDim[1],boxDim[2],spacing=spacing)
61 EncodeShape(mol,res,confId,**kwargs)
62 return res
63
65 """ Calculates the volume of a particular conformer of a molecule
66 based on a grid-encoding of the molecular shape.
67
68 """
69 mol = rdchem.Mol(mol)
70 conf = mol.GetConformer(confId)
71 CanonicalizeConformer(conf)
72 box = ComputeConfBox(conf)
73 sideLen = ( box[1].x-box[0].x + 2*boxMargin, \
74 box[1].y-box[0].y + 2*boxMargin, \
75 box[1].z-box[0].z + 2*boxMargin )
76 shape = rdGeometry.UniformGrid3D(sideLen[0],sideLen[1],sideLen[2],
77 spacing=gridSpacing)
78 EncodeShape(mol,shape,confId,ignoreHs=False,vdwScale=1.0)
79 voxelVol = gridSpacing**3
80 occVect = shape.GetOccupancyVect()
81 voxels = [1 for x in occVect if x==3]
82 vol = voxelVol*len(voxels)
83 return vol
84
89 """ Generates a depiction for a molecule where a piece of the molecule
90 is constrained to have the same coordinates as a reference.
91
92 This is useful for, for example, generating depictions of SAR data
93 sets so that the cores of the molecules are all oriented the same
94 way.
95
96 Arguments:
97 - mol: the molecule to be aligned, this will come back
98 with a single conformer.
99 - reference: a molecule with the reference atoms to align to;
100 this should have a depiction.
101 - confId: (optional) the id of the reference conformation to use
102 - referencePattern: (optional) an optional molecule to be used to
103 generate the atom mapping between the molecule
104 and the reference.
105 - acceptFailure: (optional) if True, standard depictions will be generated
106 for molecules that don't have a substructure match to the
107 reference; if False, a ValueError will be raised
108
109 """
110 if reference and referencePattern:
111 if not reference.GetNumAtoms(onlyExplicit=True)==referencePattern.GetNumAtoms(onlyExplicit=True):
112 raise ValueError('When a pattern is provided, it must have the same number of atoms as the reference')
113 referenceMatch = reference.GetSubstructMatch(referencePattern)
114 if not referenceMatch:
115 raise ValueError("Reference does not map to itself")
116 else:
117 referenceMatch = range(reference.GetNumAtoms(onlyExplicit=True))
118 if referencePattern:
119 match = mol.GetSubstructMatch(referencePattern)
120 else:
121 match = mol.GetSubstructMatch(reference)
122
123 if not match:
124 if not acceptFailure:
125 raise ValueError('Substructure match with reference not found.')
126 else:
127 coordMap={}
128 else:
129 conf = reference.GetConformer()
130 coordMap={}
131 for i,idx in enumerate(match):
132 pt3 = conf.GetAtomPosition(referenceMatch[i])
133 pt2 = rdGeometry.Point2D(pt3.x,pt3.y)
134 coordMap[idx] = pt2
135 Compute2DCoords(mol,clearConfs=True,coordMap=coordMap,canonOrient=False)
136
139 """ Generates a depiction for a molecule where a piece of the molecule
140 is constrained to have coordinates similar to those of a 3D reference
141 structure.
142
143 Arguments:
144 - mol: the molecule to be aligned, this will come back
145 with a single conformer.
146 - reference: a molecule with the reference atoms to align to;
147 this should have a depiction.
148 - confId: (optional) the id of the reference conformation to use
149
150 """
151 nAts = mol.GetNumAtoms()
152 dm = []
153 conf = reference.GetConformer(confId)
154 for i in range(nAts):
155 pi = conf.GetAtomPosition(i)
156
157 for j in range(i+1,nAts):
158 pj = conf.GetAtomPosition(j)
159
160 dm.append((pi-pj).Length())
161 dm = numpy.array(dm)
162 Compute2DCoordsMimicDistmat(mol,dm,**kwargs)
163
164 -def GetBestRMS(ref,probe,refConfId=-1,probeConfId=-1,maps=None):
165 """ Returns the optimal RMS for aligning two molecules, taking
166 symmetry into account. As a side-effect, the probe molecule is
167 left in the aligned state.
168
169 Arguments:
170 - ref: the reference molecule
171 - probe: the molecule to be aligned to the reference
172 - refConfId: (optional) reference conformation to use
173 - probeConfId: (optional) probe conformation to use
174 - maps: (optional) a list of lists of (probeAtomId,refAtomId)
175 tuples with the atom-atom mappings of the two molecules.
176 If not provided, these will be generated using a substructure
177 search.
178
179 Note:
180 This function will attempt to align all permutations of matching atom
181 orders in both molecules, for some molecules it will lead to 'combinatorial
182 explosion' especially if hydrogens are present.
183 Use 'rdkit.Chem.AllChem.AlignMol' to align molecules without changing the
184 atom order.
185
186 """
187 if not maps:
188 matches = ref.GetSubstructMatches(probe,uniquify=False)
189 if not matches:
190 raise ValueError('mol %s does not match mol %s'%(ref.GetProp('_Name'),
191 probe.GetProp('_Name')))
192 if len(matches) > 1e6:
193 warnings.warn("{} matches detected for molecule {}, this may lead to a performance slowdown.".format(len(matches), probe.GetProp('_Name')))
194 maps = [list(enumerate(match)) for match in matches]
195 bestRMS=1000.
196 for amap in maps:
197 rms=AlignMol(probe,ref,probeConfId,refConfId,atomMap=amap)
198 if rms<bestRMS:
199 bestRMS=rms
200 bestMap = amap
201
202
203 if bestMap != amap:
204 AlignMol(probe,ref,probeConfId,refConfId,atomMap=bestMap)
205 return bestRMS
206
241
283
285 """ Returns a generator for the virtual library defined by
286 a reaction and a sequence of sidechain sets
287
288 >>> from rdkit import Chem
289 >>> from rdkit.Chem import AllChem
290 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')]
291 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')]
292 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]')
293 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1])
294 >>> [Chem.MolToSmiles(x[0]) for x in list(r)]
295 ['CNC=O', 'CCNC=O', 'CNC(C)=O', 'CCNC(C)=O']
296
297 Note that this is all done in a lazy manner, so "infinitely" large libraries can
298 be done without worrying about running out of memory. Your patience will run out first:
299
300 Define a set of 10000 amines:
301 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000))
302
303 ... a set of 10000 acids
304 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000))
305
306 ... now the virtual library (1e8 compounds in principle):
307 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines])
308
309 ... look at the first 4 compounds:
310 >>> [Chem.MolToSmiles(next(r)[0]) for x in range(4)]
311 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O']
312
313
314 """
315 if len(sidechainSets) != reaction.GetNumReactantTemplates():
316 raise ValueError('%d sidechains provided, %d required' %
317 (len(sidechainSets),reaction.GetNumReactantTemplates()))
318
319 def _combiEnumerator(items,depth=0):
320 for item in items[depth]:
321 if depth+1 < len(items):
322 v = _combiEnumerator(items,depth+1)
323 for entry in v:
324 l=[item]
325 l.extend(entry)
326 yield l
327 else:
328 yield [item]
329 for chains in _combiEnumerator(sidechainSets):
330 prodSets = reaction.RunReactants(chains)
331 for prods in prodSets:
332 yield prods
333
334 -def ConstrainedEmbed(mol,core,useTethers=True,coreConfId=-1,
335 randomseed=2342,getForceField=UFFGetMoleculeForceField,**kwargs):
336 """ generates an embedding of a molecule where part of the molecule
337 is constrained to have particular coordinates
338
339 Arguments
340 - mol: the molecule to embed
341 - core: the molecule to use as a source of constraints
342 - useTethers: (optional) if True, the final conformation will be
343 optimized subject to a series of extra forces that pull the
344 matching atoms to the positions of the core atoms. Otherwise
345 simple distance constraints based on the core atoms will be
346 used in the optimization.
347 - coreConfId: (optional) id of the core conformation to use
348 - randomSeed: (optional) seed for the random number generator
349
350
351 An example, start by generating a template with a 3D structure:
352 >>> from rdkit.Chem import AllChem
353 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1")
354 >>> AllChem.EmbedMolecule(template)
355 0
356 >>> AllChem.UFFOptimizeMolecule(template)
357 0
358
359 Here's a molecule:
360 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3")
361
362 Now do the constrained embedding
363 >>> newmol=AllChem.ConstrainedEmbed(mol, template)
364
365 Demonstrate that the positions are the same:
366 >>> newp=newmol.GetConformer().GetAtomPosition(0)
367 >>> molp=mol.GetConformer().GetAtomPosition(0)
368 >>> list(newp-molp)==[0.0,0.0,0.0]
369 True
370 >>> newp=newmol.GetConformer().GetAtomPosition(1)
371 >>> molp=mol.GetConformer().GetAtomPosition(1)
372 >>> list(newp-molp)==[0.0,0.0,0.0]
373 True
374
375 """
376 match = mol.GetSubstructMatch(core)
377 if not match:
378 raise ValueError("molecule doesn't match the core")
379 coordMap={}
380 coreConf = core.GetConformer(coreConfId)
381 for i,idxI in enumerate(match):
382 corePtI = coreConf.GetAtomPosition(i)
383 coordMap[idxI]=corePtI
384
385 ci = EmbedMolecule(mol,coordMap=coordMap,randomSeed=randomseed,**kwargs)
386 if ci<0:
387 raise ValueError('Could not embed molecule.')
388
389 algMap=[(j,i) for i,j in enumerate(match)]
390
391 if not useTethers:
392
393 ff = getForceField(mol,confId=0)
394 for i,idxI in enumerate(match):
395 for j in range(i+1,len(match)):
396 idxJ = match[j]
397 d = coordMap[idxI].Distance(coordMap[idxJ])
398 ff.AddDistanceConstraint(idxI,idxJ,d,d,100.)
399 ff.Initialize()
400 n=4
401 more=ff.Minimize()
402 while more and n:
403 more=ff.Minimize()
404 n-=1
405
406 rms =AlignMol(mol,core,atomMap=algMap)
407 else:
408
409 rms = AlignMol(mol,core,atomMap=algMap)
410 ff = getForceField(mol,confId=0)
411 conf = core.GetConformer()
412 for i in range(core.GetNumAtoms()):
413 p =conf.GetAtomPosition(i)
414 pIdx=ff.AddExtraPoint(p.x,p.y,p.z,fixed=True)-1
415 ff.AddDistanceConstraint(pIdx,match[i],0,0,100.)
416 ff.Initialize()
417 n=4
418 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3)
419 while more and n:
420 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3)
421 n-=1
422
423 rms = AlignMol(mol,core,atomMap=algMap)
424 mol.SetProp('EmbedRMS',str(rms))
425 return mol
426
428 """ assigns bond orders to a molecule based on the
429 bond orders in a template molecule
430
431 Arguments
432 - refmol: the template molecule
433 - mol: the molecule to assign bond orders to
434
435 An example, start by generating a template from a SMILES
436 and read in the PDB structure of the molecule
437 >>> from rdkit.Chem import AllChem
438 >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N")
439 >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb'))
440 >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
441 8
442 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
443 22
444
445 Now assign the bond orders based on the template molecule
446 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
447 >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
448 8
449
450 Note that the template molecule should have no explicit hydrogens
451 else the algorithm will fail.
452
453 It also works if there are different formal charges (this was github issue 235):
454 >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]')
455 >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol'))
456 >>> AllChem.MolToSmiles(mol)
457 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O'
458 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
459 >>> AllChem.MolToSmiles(newMol)
460 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]'
461
462 """
463 refmol2 = rdchem.Mol(refmol)
464 mol2 = rdchem.Mol(mol)
465
466 matching = mol2.GetSubstructMatch(refmol2)
467 if not matching:
468
469 for b in mol2.GetBonds():
470 if b.GetBondType() != BondType.SINGLE:
471 b.SetBondType(BondType.SINGLE)
472 b.SetIsAromatic(False)
473
474 for b in refmol2.GetBonds():
475 b.SetBondType(BondType.SINGLE)
476 b.SetIsAromatic(False)
477
478 for a in refmol2.GetAtoms():
479 a.SetFormalCharge(0)
480 for a in mol2.GetAtoms():
481 a.SetFormalCharge(0)
482
483 matching = mol2.GetSubstructMatches(refmol2, uniquify=False)
484
485 if matching:
486 if len(matching) > 1:
487 logger.warning("More than one matching pattern found - picking one")
488 matching = matching[0]
489
490 for b in refmol.GetBonds():
491 atom1 = matching[b.GetBeginAtomIdx()]
492 atom2 = matching[b.GetEndAtomIdx()]
493 b2 = mol2.GetBondBetweenAtoms(atom1, atom2)
494 b2.SetBondType(b.GetBondType())
495 b2.SetIsAromatic(b.GetIsAromatic())
496
497 for a in refmol.GetAtoms():
498 a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()])
499 a2.SetHybridization(a.GetHybridization())
500 a2.SetIsAromatic(a.GetIsAromatic())
501 a2.SetNumExplicitHs(a.GetNumExplicitHs())
502 a2.SetFormalCharge(a.GetFormalCharge())
503 SanitizeMol(mol2)
504 if hasattr(mol2, '__sssAtoms'):
505 mol2.__sssAtoms = None
506 else:
507 raise ValueError("No matching found")
508 return mol2
509
510
511
512
513
514
516 import doctest,sys
517 return doctest.testmod(sys.modules["__main__"])
518
519
520 if __name__ == '__main__':
521 import sys
522 failed,tried = _test()
523 sys.exit(failed)
524