Package rdkit :: Package Chem :: Module AllChem
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.AllChem

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2006-2011  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Import all RDKit chemistry modules 
 12   
 13  """ 
 14  from rdkit import rdBase 
 15  from rdkit import RDConfig 
 16  from rdkit import DataStructs 
 17  from rdkit.Geometry import rdGeometry 
 18  from rdkit.Chem import * 
 19  from rdkit.Chem.rdPartialCharges import * 
 20  from rdkit.Chem.rdDepictor import * 
 21  from rdkit.Chem.rdForceFieldHelpers import * 
 22  from rdkit.Chem.ChemicalFeatures import * 
 23  from rdkit.Chem.rdDistGeom import * 
 24  from rdkit.Chem.rdMolAlign import * 
 25  from rdkit.Chem.rdMolTransforms import * 
 26  from rdkit.Chem.rdShapeHelpers import * 
 27  from rdkit.Chem.rdChemReactions import * 
 28  from rdkit.Chem.rdReducedGraphs import * 
 29  try: 
 30    from rdkit.Chem.rdSLNParse import * 
 31  except ImportError: 
 32    pass 
 33  from rdkit.Chem.rdMolDescriptors import * 
 34  from rdkit.Chem.rdqueries import * 
 35  from rdkit import ForceField 
 36  Mol.Compute2DCoords = Compute2DCoords 
 37  Mol.ComputeGasteigerCharges = ComputeGasteigerCharges 
 38  import numpy, os 
 39  from rdkit.RDLogger import logger 
 40  logger = logger() 
 41  import warnings 
42 -def TransformMol(mol,tform,confId=-1,keepConfs=False):
43 """ Applies the transformation (usually a 4x4 double matrix) to a molecule 44 if keepConfs is False then all but that conformer are removed 45 46 """ 47 refConf = mol.GetConformer(confId) 48 TransformConformer(refConf,tform) 49 if not keepConfs: 50 if confId==-1: confId=0 51 allConfIds = [c.GetId() for c in mol.GetConformers()] 52 for id in allConfIds: 53 if not id==confId: mol.RemoveConformer(id) 54 #reset the conf Id to zero since there is only one conformer left 55 mol.GetConformer(confId).SetId(0)
56
57 -def ComputeMolShape(mol,confId=-1,boxDim=(20,20,20),spacing=0.5,**kwargs):
58 """ returns a grid representation of the molecule's shape 59 """ 60 res = rdGeometry.UniformGrid3D(boxDim[0],boxDim[1],boxDim[2],spacing=spacing) 61 EncodeShape(mol,res,confId,**kwargs) 62 return res
63
64 -def ComputeMolVolume(mol,confId=-1,gridSpacing=0.2,boxMargin=2.0):
65 """ Calculates the volume of a particular conformer of a molecule 66 based on a grid-encoding of the molecular shape. 67 68 """ 69 mol = rdchem.Mol(mol) 70 conf = mol.GetConformer(confId) 71 CanonicalizeConformer(conf) 72 box = ComputeConfBox(conf) 73 sideLen = ( box[1].x-box[0].x + 2*boxMargin, \ 74 box[1].y-box[0].y + 2*boxMargin, \ 75 box[1].z-box[0].z + 2*boxMargin ) 76 shape = rdGeometry.UniformGrid3D(sideLen[0],sideLen[1],sideLen[2], 77 spacing=gridSpacing) 78 EncodeShape(mol,shape,confId,ignoreHs=False,vdwScale=1.0) 79 voxelVol = gridSpacing**3 80 occVect = shape.GetOccupancyVect() 81 voxels = [1 for x in occVect if x==3] 82 vol = voxelVol*len(voxels) 83 return vol
84
85 -def GenerateDepictionMatching2DStructure(mol,reference,confId=-1, 86 referencePattern=None, 87 acceptFailure=False, 88 **kwargs):
89 """ Generates a depiction for a molecule where a piece of the molecule 90 is constrained to have the same coordinates as a reference. 91 92 This is useful for, for example, generating depictions of SAR data 93 sets so that the cores of the molecules are all oriented the same 94 way. 95 96 Arguments: 97 - mol: the molecule to be aligned, this will come back 98 with a single conformer. 99 - reference: a molecule with the reference atoms to align to; 100 this should have a depiction. 101 - confId: (optional) the id of the reference conformation to use 102 - referencePattern: (optional) an optional molecule to be used to 103 generate the atom mapping between the molecule 104 and the reference. 105 - acceptFailure: (optional) if True, standard depictions will be generated 106 for molecules that don't have a substructure match to the 107 reference; if False, a ValueError will be raised 108 109 """ 110 if reference and referencePattern: 111 if not reference.GetNumAtoms(onlyExplicit=True)==referencePattern.GetNumAtoms(onlyExplicit=True): 112 raise ValueError('When a pattern is provided, it must have the same number of atoms as the reference') 113 referenceMatch = reference.GetSubstructMatch(referencePattern) 114 if not referenceMatch: 115 raise ValueError("Reference does not map to itself") 116 else: 117 referenceMatch = range(reference.GetNumAtoms(onlyExplicit=True)) 118 if referencePattern: 119 match = mol.GetSubstructMatch(referencePattern) 120 else: 121 match = mol.GetSubstructMatch(reference) 122 123 if not match: 124 if not acceptFailure: 125 raise ValueError('Substructure match with reference not found.') 126 else: 127 coordMap={} 128 else: 129 conf = reference.GetConformer() 130 coordMap={} 131 for i,idx in enumerate(match): 132 pt3 = conf.GetAtomPosition(referenceMatch[i]) 133 pt2 = rdGeometry.Point2D(pt3.x,pt3.y) 134 coordMap[idx] = pt2 135 Compute2DCoords(mol,clearConfs=True,coordMap=coordMap,canonOrient=False)
136
137 -def GenerateDepictionMatching3DStructure(mol,reference,confId=-1, 138 **kwargs):
139 """ Generates a depiction for a molecule where a piece of the molecule 140 is constrained to have coordinates similar to those of a 3D reference 141 structure. 142 143 Arguments: 144 - mol: the molecule to be aligned, this will come back 145 with a single conformer. 146 - reference: a molecule with the reference atoms to align to; 147 this should have a depiction. 148 - confId: (optional) the id of the reference conformation to use 149 150 """ 151 nAts = mol.GetNumAtoms() 152 dm = [] 153 conf = reference.GetConformer(confId) 154 for i in range(nAts): 155 pi = conf.GetAtomPosition(i) 156 #npi.z=0 157 for j in range(i+1,nAts): 158 pj = conf.GetAtomPosition(j) 159 #pj.z=0 160 dm.append((pi-pj).Length()) 161 dm = numpy.array(dm) 162 Compute2DCoordsMimicDistmat(mol,dm,**kwargs)
163
164 -def GetBestRMS(ref,probe,refConfId=-1,probeConfId=-1,maps=None):
165 """ Returns the optimal RMS for aligning two molecules, taking 166 symmetry into account. As a side-effect, the probe molecule is 167 left in the aligned state. 168 169 Arguments: 170 - ref: the reference molecule 171 - probe: the molecule to be aligned to the reference 172 - refConfId: (optional) reference conformation to use 173 - probeConfId: (optional) probe conformation to use 174 - maps: (optional) a list of lists of (probeAtomId,refAtomId) 175 tuples with the atom-atom mappings of the two molecules. 176 If not provided, these will be generated using a substructure 177 search. 178 179 Note: 180 This function will attempt to align all permutations of matching atom 181 orders in both molecules, for some molecules it will lead to 'combinatorial 182 explosion' especially if hydrogens are present. 183 Use 'rdkit.Chem.AllChem.AlignMol' to align molecules without changing the 184 atom order. 185 186 """ 187 if not maps: 188 matches = ref.GetSubstructMatches(probe,uniquify=False) 189 if not matches: 190 raise ValueError('mol %s does not match mol %s'%(ref.GetProp('_Name'), 191 probe.GetProp('_Name'))) 192 if len(matches) > 1e6: 193 warnings.warn("{} matches detected for molecule {}, this may lead to a performance slowdown.".format(len(matches), probe.GetProp('_Name'))) 194 maps = [list(enumerate(match)) for match in matches] 195 bestRMS=1000. 196 for amap in maps: 197 rms=AlignMol(probe,ref,probeConfId,refConfId,atomMap=amap) 198 if rms<bestRMS: 199 bestRMS=rms 200 bestMap = amap 201 202 # finally repeate the best alignment : 203 if bestMap != amap: 204 AlignMol(probe,ref,probeConfId,refConfId,atomMap=bestMap) 205 return bestRMS
206
207 -def GetConformerRMS(mol,confId1,confId2,atomIds=None,prealigned=False):
208 """ Returns the RMS between two conformations. 209 By default, the conformers will be aligned to the first conformer 210 of the molecule (i.e. the reference) before RMS calculation and, 211 as a side-effect, will be left in the aligned state. 212 213 Arguments: 214 - mol: the molecule 215 - confId1: the id of the first conformer 216 - confId2: the id of the second conformer 217 - atomIds: (optional) list of atom ids to use a points for 218 alingment - defaults to all atoms 219 - prealigned: (optional) by default the conformers are assumed 220 be unaligned and will therefore be aligned to the 221 first conformer 222 223 """ 224 # align the conformers if necessary 225 # Note: the reference conformer is always the first one 226 if not prealigned: 227 if atomIds: 228 AlignMolConformers(mol, confIds=[confId1,confId2], atomIds=atomIds) 229 else: 230 AlignMolConformers(mol, confIds=[confId1,confId2]) 231 232 # calculate the RMS between the two conformations 233 conf1 = mol.GetConformer(id=confId1) 234 conf2 = mol.GetConformer(id=confId2) 235 ssr = 0 236 for i in range(mol.GetNumAtoms()): 237 d = conf1.GetAtomPosition(i).Distance(conf2.GetAtomPosition(i)) 238 ssr += d*d 239 ssr /= mol.GetNumAtoms() 240 return numpy.sqrt(ssr)
241
242 -def GetConformerRMSMatrix(mol,atomIds=None,prealigned=False):
243 """ Returns the RMS matrix of the conformers of a molecule. 244 As a side-effect, the conformers will be aligned to the first 245 conformer (i.e. the reference) and will left in the aligned state. 246 247 Arguments: 248 - mol: the molecule 249 - atomIds: (optional) list of atom ids to use a points for 250 alingment - defaults to all atoms 251 - prealigned: (optional) by default the conformers are assumed 252 be unaligned and will therefore be aligned to the 253 first conformer 254 255 Note that the returned RMS matrix is symmetrically, i.e. it is the 256 lower half of the matrix, e.g. for 5 conformers: 257 rmsmatrix = [ a, 258 b, c, 259 d, e, f, 260 g, h, i, j] 261 This way it can be directly used as distance matrix in e.g. Butina 262 clustering. 263 264 """ 265 # if necessary, align the conformers 266 # Note: the reference conformer is always the first one 267 rmsvals = [] 268 if not prealigned: 269 if atomIds: 270 AlignMolConformers(mol, atomIds=atomIds, RMSlist=rmsvals) 271 else: 272 AlignMolConformers(mol, RMSlist=rmsvals) 273 else: # already prealigned 274 for i in range(1, mol.GetNumConformers()): 275 rmsvals.append(GetConformerRMS(mol, 0, i, atomIds=atomIds, prealigned=prealigned)) 276 # loop over the conformations (except the reference one) 277 cmat = [] 278 for i in range(1, mol.GetNumConformers()): 279 cmat.append(rmsvals[i-1]) 280 for j in range(1,i): 281 cmat.append(GetConformerRMS(mol, i, j, atomIds=atomIds, prealigned=True)) 282 return cmat
283
284 -def EnumerateLibraryFromReaction(reaction,sidechainSets) :
285 """ Returns a generator for the virtual library defined by 286 a reaction and a sequence of sidechain sets 287 288 >>> from rdkit import Chem 289 >>> from rdkit.Chem import AllChem 290 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')] 291 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')] 292 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]') 293 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1]) 294 >>> [Chem.MolToSmiles(x[0]) for x in list(r)] 295 ['CNC=O', 'CCNC=O', 'CNC(C)=O', 'CCNC(C)=O'] 296 297 Note that this is all done in a lazy manner, so "infinitely" large libraries can 298 be done without worrying about running out of memory. Your patience will run out first: 299 300 Define a set of 10000 amines: 301 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000)) 302 303 ... a set of 10000 acids 304 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000)) 305 306 ... now the virtual library (1e8 compounds in principle): 307 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines]) 308 309 ... look at the first 4 compounds: 310 >>> [Chem.MolToSmiles(next(r)[0]) for x in range(4)] 311 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O'] 312 313 314 """ 315 if len(sidechainSets) != reaction.GetNumReactantTemplates(): 316 raise ValueError('%d sidechains provided, %d required' % 317 (len(sidechainSets),reaction.GetNumReactantTemplates())) 318 319 def _combiEnumerator(items,depth=0): 320 for item in items[depth]: 321 if depth+1 < len(items): 322 v = _combiEnumerator(items,depth+1) 323 for entry in v: 324 l=[item] 325 l.extend(entry) 326 yield l 327 else: 328 yield [item]
329 for chains in _combiEnumerator(sidechainSets): 330 prodSets = reaction.RunReactants(chains) 331 for prods in prodSets: 332 yield prods 333
334 -def ConstrainedEmbed(mol,core,useTethers=True,coreConfId=-1, 335 randomseed=2342,getForceField=UFFGetMoleculeForceField,**kwargs):
336 """ generates an embedding of a molecule where part of the molecule 337 is constrained to have particular coordinates 338 339 Arguments 340 - mol: the molecule to embed 341 - core: the molecule to use as a source of constraints 342 - useTethers: (optional) if True, the final conformation will be 343 optimized subject to a series of extra forces that pull the 344 matching atoms to the positions of the core atoms. Otherwise 345 simple distance constraints based on the core atoms will be 346 used in the optimization. 347 - coreConfId: (optional) id of the core conformation to use 348 - randomSeed: (optional) seed for the random number generator 349 350 351 An example, start by generating a template with a 3D structure: 352 >>> from rdkit.Chem import AllChem 353 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1") 354 >>> AllChem.EmbedMolecule(template) 355 0 356 >>> AllChem.UFFOptimizeMolecule(template) 357 0 358 359 Here's a molecule: 360 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3") 361 362 Now do the constrained embedding 363 >>> newmol=AllChem.ConstrainedEmbed(mol, template) 364 365 Demonstrate that the positions are the same: 366 >>> newp=newmol.GetConformer().GetAtomPosition(0) 367 >>> molp=mol.GetConformer().GetAtomPosition(0) 368 >>> list(newp-molp)==[0.0,0.0,0.0] 369 True 370 >>> newp=newmol.GetConformer().GetAtomPosition(1) 371 >>> molp=mol.GetConformer().GetAtomPosition(1) 372 >>> list(newp-molp)==[0.0,0.0,0.0] 373 True 374 375 """ 376 match = mol.GetSubstructMatch(core) 377 if not match: 378 raise ValueError("molecule doesn't match the core") 379 coordMap={} 380 coreConf = core.GetConformer(coreConfId) 381 for i,idxI in enumerate(match): 382 corePtI = coreConf.GetAtomPosition(i) 383 coordMap[idxI]=corePtI 384 385 ci = EmbedMolecule(mol,coordMap=coordMap,randomSeed=randomseed,**kwargs) 386 if ci<0: 387 raise ValueError('Could not embed molecule.') 388 389 algMap=[(j,i) for i,j in enumerate(match)] 390 391 if not useTethers: 392 # clean up the conformation 393 ff = getForceField(mol,confId=0) 394 for i,idxI in enumerate(match): 395 for j in range(i+1,len(match)): 396 idxJ = match[j] 397 d = coordMap[idxI].Distance(coordMap[idxJ]) 398 ff.AddDistanceConstraint(idxI,idxJ,d,d,100.) 399 ff.Initialize() 400 n=4 401 more=ff.Minimize() 402 while more and n: 403 more=ff.Minimize() 404 n-=1 405 # rotate the embedded conformation onto the core: 406 rms =AlignMol(mol,core,atomMap=algMap) 407 else: 408 # rotate the embedded conformation onto the core: 409 rms = AlignMol(mol,core,atomMap=algMap) 410 ff = getForceField(mol,confId=0) 411 conf = core.GetConformer() 412 for i in range(core.GetNumAtoms()): 413 p =conf.GetAtomPosition(i) 414 pIdx=ff.AddExtraPoint(p.x,p.y,p.z,fixed=True)-1 415 ff.AddDistanceConstraint(pIdx,match[i],0,0,100.) 416 ff.Initialize() 417 n=4 418 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3) 419 while more and n: 420 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3) 421 n-=1 422 # realign 423 rms = AlignMol(mol,core,atomMap=algMap) 424 mol.SetProp('EmbedRMS',str(rms)) 425 return mol
426
427 -def AssignBondOrdersFromTemplate(refmol, mol):
428 """ assigns bond orders to a molecule based on the 429 bond orders in a template molecule 430 431 Arguments 432 - refmol: the template molecule 433 - mol: the molecule to assign bond orders to 434 435 An example, start by generating a template from a SMILES 436 and read in the PDB structure of the molecule 437 >>> from rdkit.Chem import AllChem 438 >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N") 439 >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb')) 440 >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 441 8 442 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 443 22 444 445 Now assign the bond orders based on the template molecule 446 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) 447 >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 448 8 449 450 Note that the template molecule should have no explicit hydrogens 451 else the algorithm will fail. 452 453 It also works if there are different formal charges (this was github issue 235): 454 >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]') 455 >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol')) 456 >>> AllChem.MolToSmiles(mol) 457 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O' 458 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) 459 >>> AllChem.MolToSmiles(newMol) 460 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]' 461 462 """ 463 refmol2 = rdchem.Mol(refmol) 464 mol2 = rdchem.Mol(mol) 465 # do the molecules match already? 466 matching = mol2.GetSubstructMatch(refmol2) 467 if not matching: # no, they don't match 468 # check if bonds of mol are SINGLE 469 for b in mol2.GetBonds(): 470 if b.GetBondType() != BondType.SINGLE: 471 b.SetBondType(BondType.SINGLE) 472 b.SetIsAromatic(False) 473 # set the bonds of mol to SINGLE 474 for b in refmol2.GetBonds(): 475 b.SetBondType(BondType.SINGLE) 476 b.SetIsAromatic(False) 477 # set atom charges to zero; 478 for a in refmol2.GetAtoms(): 479 a.SetFormalCharge(0) 480 for a in mol2.GetAtoms(): 481 a.SetFormalCharge(0) 482 483 matching = mol2.GetSubstructMatches(refmol2, uniquify=False) 484 # do the molecules match now? 485 if matching: 486 if len(matching) > 1: 487 logger.warning("More than one matching pattern found - picking one") 488 matching = matching[0] 489 # apply matching: set bond properties 490 for b in refmol.GetBonds(): 491 atom1 = matching[b.GetBeginAtomIdx()] 492 atom2 = matching[b.GetEndAtomIdx()] 493 b2 = mol2.GetBondBetweenAtoms(atom1, atom2) 494 b2.SetBondType(b.GetBondType()) 495 b2.SetIsAromatic(b.GetIsAromatic()) 496 # apply matching: set atom properties 497 for a in refmol.GetAtoms(): 498 a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()]) 499 a2.SetHybridization(a.GetHybridization()) 500 a2.SetIsAromatic(a.GetIsAromatic()) 501 a2.SetNumExplicitHs(a.GetNumExplicitHs()) 502 a2.SetFormalCharge(a.GetFormalCharge()) 503 SanitizeMol(mol2) 504 if hasattr(mol2, '__sssAtoms'): 505 mol2.__sssAtoms = None # we don't want all bonds highlighted 506 else: 507 raise ValueError("No matching found") 508 return mol2
509 510 511 #------------------------------------ 512 # 513 # doctest boilerplate 514 #
515 -def _test():
516 import doctest,sys 517 return doctest.testmod(sys.modules["__main__"])
518 519 520 if __name__ == '__main__': 521 import sys 522 failed,tried = _test() 523 sys.exit(failed) 524