Package rdkit :: Package Chem :: Package Pharm2D :: Module LazyGenerator
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.LazyGenerator

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  from __future__ import print_function 
 12   
 13  raise NotImplementedError('not finished yet') 
 14  """ lazy generator of 2D pharmacophore signature data 
 15   
 16  """ 
 17  import rdkit.Chem 
 18  from rdkit.Chem.Pharm2D import SigFactory,Matcher,Utils 
 19   
20 -class Generator(object):
21 """ 22 23 Important attributes: 24 25 - mol: the molecules whose signature is being worked with 26 27 - sigFactory : the SigFactory object with signature parameters 28 NOTE: no preprocessing is carried out for _sigFactory_. 29 It *must* be pre-initialized. 30 31 **Notes** 32 33 - 34 """
35 - def __init__(self,sigFactory,mol,dMat=None,bitCache=True):
36 """ constructor 37 38 **Arguments** 39 40 - sigFactory: a signature factory, see class docs 41 42 - mol: a molecule, see class docs 43 44 - dMat: (optional) a distance matrix for the molecule. If this 45 is not provided, one will be calculated 46 47 - bitCache: (optional) if nonzero, a local cache of which bits 48 have been queried will be maintained. Otherwise things must 49 be recalculate each time a bit is queried. 50 51 """ 52 if not isinstance(sigFactory,SigFactory.SigFactory): 53 raise ValueError('bad factory') 54 55 self.sigFactory = sigFactory 56 self.mol = mol 57 58 if dMat is None: 59 useBO = sigFactory.includeBondOrder 60 dMat = Chem.GetDistanceMatrix(mol,useBO) 61 62 self.dMat = dMat 63 64 if bitCache: 65 self.bits = {} 66 else: 67 self.bits = None 68 69 featFamilies=[fam for fam in sigFactory.featFactory.GetFeatureFamilies() if fam not in sigFactory.skipFeats] 70 nFeats = len(featFamilies) 71 featMatches={} 72 for fam in featFamilies: 73 featMatches[fam] = [] 74 feats = sigFactory.featFactory.GetFeaturesForMol(mol) 75 for feat in feats: 76 if feat.GetFamily() not in sigFactory.skipFeats: 77 featMatches[feat.GetFamily()].append(feat.GetAtomIds()) 78 featMatches = [None]*nFeats 79 for i in range(nFeats): 80 featMatches[i]=sigFactory.featFactory.GetMolFeature() 81 self.pattMatches = pattMatches
82
83 - def GetBit(self,idx):
84 """ returns a bool indicating whether or not the bit is set 85 86 """ 87 if idx < 0 or idx >= self.sig.GetSize(): 88 raise IndexError('Index %d invalid'%(idx)) 89 if self.bits is not None and self.bits.has_key(idx): 90 return self.bits[idx] 91 92 tmp = Matcher.GetAtomsMatchingBit(self.sig,idx,self.mol, 93 dMat=self.dMat,justOne=1, 94 matchingAtoms=self.pattMatches) 95 if not tmp or len(tmp)==0: res = 0 96 else: res = 1 97 98 if self.bits is not None: 99 self.bits[idx] = res 100 return res
101
102 - def __len__(self):
103 """ allows class to support len() 104 105 """ 106 return self.sig.GetSize()
107 - def __getitem__(self,itm):
108 """ allows class to support random access. 109 Calls self.GetBit() 110 111 """ 112 return self.GetBit(itm)
113 114 115 116 117 if __name__ == '__main__': 118 import time 119 from rdkit import RDConfig,Chem 120 from rdkit.Chem.Pharm2D import Gobbi_Pharm2D,Generate 121 import random 122 123 factory = Gobbi_Pharm2D.factory 124 nToDo=100 125 inD = open(RDConfig.RDDataDir+"/NCI/first_5K.smi",'r').readlines()[:nToDo] 126 mols = [None]*len(inD) 127 for i in range(len(inD)): 128 smi = inD[i].split('\t')[0] 129 smi.strip() 130 mols[i] = Chem.MolFromSmiles(smi) 131 132 sig = factory.GetSignature() 133 134 nBits = 300 135 random.seed(23) 136 bits = [random.randint(0,sig.GetSize()-1) for x in range(nBits)] 137 138 print('Using the Lazy Generator') 139 t1 = time.time() 140 for i in range(len(mols)): 141 if not i % 10: print('done mol %d of %d'%(i,len(mols))) 142 gen = Generator(factory,mols[i]) 143 for bit in bits: 144 v = gen[bit] 145 t2 = time.time() 146 print('\tthat took %4.2f seconds'%(t2-t1)) 147 148 149 print('Generating and checking signatures') 150 t1 = time.time() 151 for i in range(len(mols)): 152 if not i % 10: print('done mol %d of %d'%(i,len(mols))) 153 sig = Generate.Gen2DFingerprint(mols[i],factory) 154 for bit in bits: 155 v = sig[bit] 156 t2 = time.time() 157 print('\tthat took %4.2f seconds'%(t2-t1)) 158