1
2
3
4
5
6
7
8
9
10
11 """ Atom-based calculation of LogP and MR using Crippen's approach
12
13
14 Reference:
15 S. A. Wildman and G. M. Crippen *JCICS* _39_ 868-873 (1999)
16
17
18 """
19 from __future__ import print_function
20 import os
21 from rdkit import RDConfig
22 from rdkit import Chem
23 from rdkit.Chem import rdMolDescriptors
24 import numpy
25
26 _smartsPatterns = {}
27 _patternOrder = []
28
29 defaultPatternFileName = os.path.join(RDConfig.RDDataDir,'Crippen.txt')
30
32 """ *Internal Use Only*
33
34 parses the pattern list from the data file
35
36 """
37 patts = {}
38 order = []
39 with open(fileName,'r') as f:
40 lines = f.readlines()
41 for line in lines:
42 if line[0] != '#':
43 splitLine = line.split('\t')
44 if len(splitLine)>=4 and splitLine[0] != '':
45 sma = splitLine[1]
46 if sma!='SMARTS':
47 sma.replace('"','')
48 p = Chem.MolFromSmarts(sma)
49 if p:
50 if len(splitLine[0])>1 and splitLine[0][1] not in 'S0123456789':
51 cha = splitLine[0][:2]
52 else:
53 cha = splitLine[0][0]
54 logP = float(splitLine[2])
55 if splitLine[3] != '':
56 mr = float(splitLine[3])
57 else:
58 mr = 0.0
59 if cha not in order:
60 order.append(cha)
61 l = patts.get(cha,[])
62 l.append((sma,p,logP,mr))
63 patts[cha] = l
64 else:
65 print('Problems parsing smarts: %s'%(sma))
66 return order,patts
67
68 _GetAtomContribs=rdMolDescriptors._CalcCrippenContribs
70 """ *Internal Use Only*
71
72 calculates atomic contributions to the LogP and MR values
73
74 if the argument *force* is not set, we'll use the molecules stored
75 _crippenContribs value when possible instead of re-calculating.
76
77 **Note:** Changes here affect the version numbers of MolLogP and MolMR
78 as well as the VSA descriptors in Chem.MolSurf
79
80 """
81 if not force and hasattr(mol,'_crippenContribs'):
82 return mol._crippenContribs
83
84 if patts is None:
85 patts = _smartsPatterns
86 order = _patternOrder
87
88 nAtoms = mol.GetNumAtoms()
89 atomContribs = [(0.,0.)]*nAtoms
90 doneAtoms=[0]*nAtoms
91 nAtomsFound=0
92 done = False
93 for cha in order:
94 pattVect = patts[cha]
95 for sma,patt,logp,mr in pattVect:
96
97 for match in mol.GetSubstructMatches(patt,False,False):
98 firstIdx = match[0]
99 if not doneAtoms[firstIdx]:
100 doneAtoms[firstIdx]=1
101 atomContribs[firstIdx] = (logp,mr)
102 if verbose:
103 print('\tAtom %d: %s %4.4f %4.4f'%(match[0],sma,logp,mr))
104 nAtomsFound+=1
105 if nAtomsFound>=nAtoms:
106 done=True
107 break
108 if done: break
109 mol._crippenContribs = atomContribs
110 return atomContribs
111
116
117 -def _pyMolLogP(inMol,patts=None,order=None,verbose=0,addHs=1):
135 _pyMolLogP.version="1.1.0"
136
137 -def _pyMolMR(inMol,patts=None,order=None,verbose=0,addHs=1):
156 _pyMolMR.version="1.1.0"
157
158 MolLogP=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[0]
159 MolLogP.version=rdMolDescriptors._CalcCrippenDescriptors_version
160 MolLogP.__doc__=""" Wildman-Crippen LogP value
161
162 Uses an atom-based scheme based on the values in the paper:
163 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999)
164
165 **Arguments**
166
167 - inMol: a molecule
168
169 - addHs: (optional) toggles adding of Hs to the molecule for the calculation.
170 If true, hydrogens will be added to the molecule and used in the calculation.
171
172 """
173
174 MolMR=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[1]
175 MolMR.version=rdMolDescriptors._CalcCrippenDescriptors_version
176 MolMR.__doc__=""" Wildman-Crippen MR value
177
178 Uses an atom-based scheme based on the values in the paper:
179 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999)
180
181 **Arguments**
182
183 - inMol: a molecule
184
185 - addHs: (optional) toggles adding of Hs to the molecule for the calculation.
186 If true, hydrogens will be added to the molecule and used in the calculation.
187
188 """
189
190 if __name__=='__main__':
191 import sys
192
193 if len(sys.argv):
194 ms = []
195 verbose=0
196 if '-v' in sys.argv:
197 verbose=1
198 sys.argv.remove('-v')
199 for smi in sys.argv[1:]:
200 ms.append((smi,Chem.MolFromSmiles(smi)))
201
202 for smi,m in ms:
203 print('Mol: %s'%(smi))
204 logp = MolLogP(m,verbose=verbose)
205 print('----')
206 mr = MolMR(m,verbose=verbose)
207 print('Res:',logp,mr)
208 newM = Chem.AddHs(m)
209 logp = MolLogP(newM,addHs=0)
210 mr = MolMR(newM,addHs=0)
211 print('\t',logp,mr)
212 print('-*-*-*-*-*-*-*-*')
213