Package rdkit :: Package Chem :: Module Crippen
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Crippen

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Atom-based calculation of LogP and MR using Crippen's approach 
 12   
 13   
 14      Reference: 
 15        S. A. Wildman and G. M. Crippen *JCICS* _39_ 868-873 (1999) 
 16   
 17   
 18  """ 
 19  from __future__ import print_function 
 20  import os 
 21  from rdkit import RDConfig 
 22  from rdkit import Chem 
 23  from rdkit.Chem import rdMolDescriptors 
 24  import numpy 
 25   
 26  _smartsPatterns = {} 
 27  _patternOrder = [] 
 28  # this is the file containing the atom contributions 
 29  defaultPatternFileName = os.path.join(RDConfig.RDDataDir,'Crippen.txt') 
 30   
31 -def _ReadPatts(fileName):
32 """ *Internal Use Only* 33 34 parses the pattern list from the data file 35 36 """ 37 patts = {} 38 order = [] 39 with open(fileName,'r') as f: 40 lines = f.readlines() 41 for line in lines: 42 if line[0] != '#': 43 splitLine = line.split('\t') 44 if len(splitLine)>=4 and splitLine[0] != '': 45 sma = splitLine[1] 46 if sma!='SMARTS': 47 sma.replace('"','') 48 p = Chem.MolFromSmarts(sma) 49 if p: 50 if len(splitLine[0])>1 and splitLine[0][1] not in 'S0123456789': 51 cha = splitLine[0][:2] 52 else: 53 cha = splitLine[0][0] 54 logP = float(splitLine[2]) 55 if splitLine[3] != '': 56 mr = float(splitLine[3]) 57 else: 58 mr = 0.0 59 if cha not in order: 60 order.append(cha) 61 l = patts.get(cha,[]) 62 l.append((sma,p,logP,mr)) 63 patts[cha] = l 64 else: 65 print('Problems parsing smarts: %s'%(sma)) 66 return order,patts
67 68 _GetAtomContribs=rdMolDescriptors._CalcCrippenContribs
69 -def _pyGetAtomContribs(mol,patts=None,order=None,verbose=0,force=0):
70 """ *Internal Use Only* 71 72 calculates atomic contributions to the LogP and MR values 73 74 if the argument *force* is not set, we'll use the molecules stored 75 _crippenContribs value when possible instead of re-calculating. 76 77 **Note:** Changes here affect the version numbers of MolLogP and MolMR 78 as well as the VSA descriptors in Chem.MolSurf 79 80 """ 81 if not force and hasattr(mol,'_crippenContribs'): 82 return mol._crippenContribs 83 84 if patts is None: 85 patts = _smartsPatterns 86 order = _patternOrder 87 88 nAtoms = mol.GetNumAtoms() 89 atomContribs = [(0.,0.)]*nAtoms 90 doneAtoms=[0]*nAtoms 91 nAtomsFound=0 92 done = False 93 for cha in order: 94 pattVect = patts[cha] 95 for sma,patt,logp,mr in pattVect: 96 #print('try:',entry[0]) 97 for match in mol.GetSubstructMatches(patt,False,False): 98 firstIdx = match[0] 99 if not doneAtoms[firstIdx]: 100 doneAtoms[firstIdx]=1 101 atomContribs[firstIdx] = (logp,mr) 102 if verbose: 103 print('\tAtom %d: %s %4.4f %4.4f'%(match[0],sma,logp,mr)) 104 nAtomsFound+=1 105 if nAtomsFound>=nAtoms: 106 done=True 107 break 108 if done: break 109 mol._crippenContribs = atomContribs 110 return atomContribs
111
112 -def _Init():
113 global _smartsPatterns,_patternOrder 114 if _smartsPatterns == {}: 115 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName)
116
117 -def _pyMolLogP(inMol,patts=None,order=None,verbose=0,addHs=1):
118 """ DEPRECATED 119 """ 120 if addHs < 0: 121 mol = Chem.AddHs(inMol,1) 122 elif addHs > 0: 123 mol = Chem.AddHs(inMol,0) 124 else: 125 mol = inMol 126 127 if patts is None: 128 global _smartsPatterns,_patternOrder 129 if _smartsPatterns == {}: 130 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 131 patts = _smartsPatterns 132 order = _patternOrder 133 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 134 return numpy.sum(atomContribs,0)[0]
135 _pyMolLogP.version="1.1.0" 136
137 -def _pyMolMR(inMol,patts=None,order=None,verbose=0,addHs=1):
138 """ DEPRECATED 139 """ 140 if addHs < 0: 141 mol = Chem.AddHs(inMol,1) 142 elif addHs > 0: 143 mol = Chem.AddHs(inMol,0) 144 else: 145 mol = inMol 146 147 if patts is None: 148 global _smartsPatterns,_patternOrder 149 if _smartsPatterns == {}: 150 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 151 patts = _smartsPatterns 152 order = _patternOrder 153 154 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 155 return numpy.sum(atomContribs,0)[1]
156 _pyMolMR.version="1.1.0" 157 158 MolLogP=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[0] 159 MolLogP.version=rdMolDescriptors._CalcCrippenDescriptors_version 160 MolLogP.__doc__=""" Wildman-Crippen LogP value 161 162 Uses an atom-based scheme based on the values in the paper: 163 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 164 165 **Arguments** 166 167 - inMol: a molecule 168 169 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 170 If true, hydrogens will be added to the molecule and used in the calculation. 171 172 """ 173 174 MolMR=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[1] 175 MolMR.version=rdMolDescriptors._CalcCrippenDescriptors_version 176 MolMR.__doc__=""" Wildman-Crippen MR value 177 178 Uses an atom-based scheme based on the values in the paper: 179 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 180 181 **Arguments** 182 183 - inMol: a molecule 184 185 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 186 If true, hydrogens will be added to the molecule and used in the calculation. 187 188 """ 189 190 if __name__=='__main__': 191 import sys 192 193 if len(sys.argv): 194 ms = [] 195 verbose=0 196 if '-v' in sys.argv: 197 verbose=1 198 sys.argv.remove('-v') 199 for smi in sys.argv[1:]: 200 ms.append((smi,Chem.MolFromSmiles(smi))) 201 202 for smi,m in ms: 203 print('Mol: %s'%(smi)) 204 logp = MolLogP(m,verbose=verbose) 205 print('----') 206 mr = MolMR(m,verbose=verbose) 207 print('Res:',logp,mr) 208 newM = Chem.AddHs(m) 209 logp = MolLogP(newM,addHs=0) 210 mr = MolMR(newM,addHs=0) 211 print('\t',logp,mr) 212 print('-*-*-*-*-*-*-*-*') 213