1
2
3
4
5
6 """ The "parser" for compound descriptors.
7
8 I almost hesitate to document this, because it's not the prettiest
9 thing the world has ever seen... but it does work (for at least some
10 definitions of the word).
11
12 Rather than getting into the whole mess of writing a parser for the
13 compound descriptor expressions, I'm just using string substitutions
14 and python's wonderful ability to *eval* code.
15
16 It would probably be a good idea at some point to replace this with a
17 real parser, if only for the flexibility and intelligent error
18 messages that would become possible.
19
20 The general idea is that we're going to deal with expressions where
21 atomic descriptors have some kind of method applied to them which
22 reduces them to a single number for the entire composition. Compound
23 descriptors (those applicable to the compound as a whole) are not
24 operated on by anything in particular (except for standard math stuff).
25
26 Here's the general flow of things:
27
28 1) Composition descriptor references ($a, $b, etc.) are replaced with the
29 corresponding descriptor names using string subsitution.
30 (*_SubForCompoundDescriptors*)
31
32 2) Atomic descriptor references ($1, $2, etc) are replaced with lookups
33 into the atomic dict with "DEADBEEF" in place of the atom name.
34 (*_SubForAtomicVars*)
35
36 3) Calls to Calculator Functions are augmented with a reference to
37 the composition and atomic dictionary
38 (*_SubMethodArgs*)
39
40 **NOTE:**
41
42 anytime we don't know the answer for a descriptor, rather than
43 throwing a (completely incomprehensible) exception, we just return
44 -666. So bad descriptor values should stand out like sore thumbs.
45
46 """
47 from __future__ import print_function
48 __DEBUG=0
49 from rdkit import RDConfig
50
51
52 from math import *
53
54
55
56
57
58 knownMethods = ['SUM','MIN','MAX','MEAN','AVG','DEV','HAS']
59
60 -def HAS(strArg,composList,atomDict):
61 """ *Calculator Method*
62
63 does a string search
64
65 **Arguments**
66
67 - strArg: the arguments in string form
68
69 - composList: the composition vector
70
71 - atomDict: the atomic dictionary
72
73 **Returns**
74
75 1 or 0
76
77 """
78 splitArgs = string.split(strArg,',')
79 if len(splitArgs)>1:
80 for atom,num in composList:
81 tStr = splitArgs[0].replace('DEADBEEF',atom)
82 where = eval(tStr)
83 what = eval(splitArgs[1])
84 if where.find(what)!= -1:
85 return 1
86 return 0
87 else:
88 return -666
89
90 -def SUM(strArg,composList,atomDict):
91 """ *Calculator Method*
92
93 calculates the sum of a descriptor across a composition
94
95 **Arguments**
96
97 - strArg: the arguments in string form
98
99 - compos: the composition vector
100
101 - atomDict: the atomic dictionary
102
103 **Returns**
104
105 a float
106
107 """
108 accum = 0.0
109 for atom,num in composList:
110 tStr = strArg.replace('DEADBEEF',atom)
111 accum = accum + eval(tStr)*num
112 return accum
113
114 -def MEAN(strArg,composList,atomDict):
115 """ *Calculator Method*
116
117 calculates the average of a descriptor across a composition
118
119 **Arguments**
120
121 - strArg: the arguments in string form
122
123 - compos: the composition vector
124
125 - atomDict: the atomic dictionary
126
127 **Returns**
128
129 a float
130
131 """
132 accum = 0.0
133 nSoFar = 0
134 for atom,num in composList:
135 tStr = strArg.replace('DEADBEEF',atom)
136 accum = accum + eval(tStr)*num
137 nSoFar = nSoFar + num
138 return accum/nSoFar
139 AVG = MEAN
140
141 -def DEV(strArg,composList,atomDict):
142 """ *Calculator Method*
143
144 calculates the average deviation of a descriptor across a composition
145
146 **Arguments**
147
148 - strArg: the arguments in string form
149
150 - compos: the composition vector
151
152 - atomDict: the atomic dictionary
153
154 **Returns**
155
156 a float
157
158 """
159 avg = MEAN(strArg,composList,atomDict)
160 accum = 0.0
161 nSoFar = 0.0
162 for atom,num in composList:
163 tStr = strArg.replace('DEADBEEF',atom)
164 accum = accum + abs(eval(tStr)-avg)*num
165 nSoFar = nSoFar + num
166 return accum/nSoFar
167
168 -def MIN(strArg,composList,atomDict):
169 """ *Calculator Method*
170
171 calculates the minimum value of a descriptor across a composition
172
173 **Arguments**
174
175 - strArg: the arguments in string form
176
177 - compos: the composition vector
178
179 - atomDict: the atomic dictionary
180
181 **Returns**
182
183 a float
184
185 """
186 accum = []
187 for atom,num in composList:
188 tStr = strArg.replace('DEADBEEF',atom)
189 accum.append(eval(tStr))
190 return min(accum)
191
192 -def MAX(strArg,composList,atomDict):
193 """ *Calculator Method*
194
195 calculates the maximum value of a descriptor across a composition
196
197 **Arguments**
198
199 - strArg: the arguments in string form
200
201 - compos: the composition vector
202
203 - atomDict: the atomic dictionary
204
205 **Returns**
206
207 a float
208
209 """
210 accum = []
211 for atom,num in composList:
212 tStr = strArg.replace('DEADBEEF',atom)
213 accum.append(eval(tStr))
214 return max(accum)
215
216
217
218
219
220
221
223 """ replace atomic variables with the appropriate dictionary lookup
224
225 *Not intended for client use*
226
227 """
228 for i in range(len(varList)):
229 cExpr = cExpr.replace('$%d'%(i+1),
230 '%s["DEADBEEF"]["%s"]'%(dictName,varList[i]))
231 return cExpr
232
234 """ replace compound variables with the appropriate list index
235
236 *Not intended for client use*
237
238 """
239 for i in range(len(varList)):
240 cExpr = cExpr.replace('$%s'%chr(ord('a')+i),
241 '%s["%s"]'%(dictName,varList[i]))
242 return cExpr
243
245 """ alters the arguments of calls to calculator methods
246
247 *Not intended for client use*
248
249 This is kind of putrid (and the code ain't so pretty either)
250 The general idea is that the various special methods for atomic
251 descriptors need two extra arguments (the composition and the atomic
252 dict). Rather than make the user type those in, we just find
253 invocations of these methods and fill out the function calls using
254 string replacements.
255 """
256 res = cExpr
257 for method in knownMethods:
258 p = 0
259 while p != -1 and p < len(res):
260 p = res.find(method,p)
261 if p != -1:
262 p = p + len(method) + 1
263 start = p
264 parenCount = 1
265 while parenCount and p < len(res):
266 if res[p] == ')':
267 parenCount = parenCount - 1
268 elif res[p] == '(':
269 parenCount = parenCount + 1
270 p = p + 1
271 if p <= len(res):
272 res = res[0:start]+"'%s',compos,atomDict"%(res[start:p-1])+res[p-1:]
273 return res
274
276 """ calculates the value of the descriptor for a single compound
277
278 **ARGUMENTS:**
279
280 - compos: a vector/tuple containing the composition
281 information... in the form:
282 '[("Fe",1.),("Pt",2.),("Rh",0.02)]'
283
284 - argVect: a vector/tuple with three elements:
285
286 1) AtomicDescriptorNames: a list/tuple of the names of the
287 atomic descriptors being used. These determine the
288 meaning of $1, $2, etc. in the expression
289
290 2) CompoundDescriptorNames: a list/tuple of the names of the
291 compound descriptors being used. These determine the
292 meaning of $a, $b, etc. in the expression
293
294 3) Expr: a string containing the expression to be used to
295 evaluate the final result.
296
297 - atomDict:
298 a dictionary of atomic descriptors. Each atomic entry is
299 another dictionary containing the individual descriptors
300 and their values
301
302 - propVect:
303 a list of descriptors for the composition.
304
305 **RETURNS:**
306
307 the value of the descriptor, -666 if a problem was encountered
308
309 **NOTE:**
310
311 - because it takes rather a lot of work to get everything set
312 up to calculate a descriptor, if you are calculating the
313 same descriptor for multiple compounds, you probably want to
314 be calling _CalcMultipleCompoundsDescriptor()_.
315
316 """
317 try:
318 atomVarNames = argVect[0]
319 compositionVarNames = argVect[1]
320 formula = argVect[2]
321 formula = _SubForCompoundDescriptors(formula,compositionVarNames,'propDict')
322 formula = _SubForAtomicVars(formula,atomVarNames,'atomDict')
323 evalTarget = _SubMethodArgs(formula,knownMethods)
324 except Exception:
325 if __DEBUG:
326 import sys,traceback
327 print('Sub Failure!')
328 traceback.print_exc()
329 print(evalTarget)
330 print(propDict)
331 raise RuntimeError('Failure 1')
332 else:
333 return -666
334
335 try:
336 v = eval(evalTarget)
337 except Exception:
338 if __DEBUG:
339 import sys,traceback
340 outF = open(RDConfig.RDCodeDir+'/ml/descriptors/log.txt','a+')
341 outF.write('#------------------------------\n')
342 outF.write('formula: %s\n'%repr(formula))
343 outF.write('target: %s\n'%repr(evalTarget))
344 outF.write('propDict: %s\n'%(repr(propDict)))
345
346 outF.write('keys: %s\n'%(repr(sorted(atomDict))))
347 outF.close()
348 print('ick!')
349 print('formula:',formula)
350 print('target:',evalTarget)
351 print('propDict:',propDict)
352 print('keys:',atomDict.keys())
353 traceback.print_exc()
354 raise RuntimeError('Failure 2')
355 else:
356 v = -666
357 return v
358
360 """ calculates the value of the descriptor for a list of compounds
361
362 **ARGUMENTS:**
363
364 - composVect: a vector of vector/tuple containing the composition
365 information.
366 See _CalcSingleCompoundDescriptor()_ for an explanation of the elements.
367
368 - argVect: a vector/tuple with three elements:
369
370 1) AtomicDescriptorNames: a list/tuple of the names of the
371 atomic descriptors being used. These determine the
372 meaning of $1, $2, etc. in the expression
373
374 2) CompoundDsscriptorNames: a list/tuple of the names of the
375 compound descriptors being used. These determine the
376 meaning of $a, $b, etc. in the expression
377
378 3) Expr: a string containing the expression to be used to
379 evaluate the final result.
380
381 - atomDict:
382 a dictionary of atomic descriptors. Each atomic entry is
383 another dictionary containing the individual descriptors
384 and their values
385
386 - propVectList:
387 a vector of vectors of descriptors for the composition.
388
389 **RETURNS:**
390
391 a vector containing the values of the descriptor for each
392 compound. Any given entry will be -666 if problems were
393 encountered
394
395 """
396 res = [-666]*len(composVect)
397 try:
398 atomVarNames = argVect[0]
399 compositionVarNames = argVect[1]
400 formula = argVect[2]
401 formula = _SubForCompoundDescriptors(formula,compositionVarNames,'propDict')
402 formula = _SubForAtomicVars(formula,atomVarNames,'atomDict')
403 evalTarget = _SubMethodArgs(formula,knownMethods)
404 except Exception:
405 return res
406 for i in range(len(composVect)):
407 propDict = propDictList[i]
408 compos = composVect[i]
409 try:
410 v = eval(evalTarget)
411 except Exception:
412 v = -666
413 res[i] = v
414 return res
415
416
417
418
419 if __name__ == '__main__':
420 piece1 = [['d1','d2'],['d1','d2']]
421 aDict = {'Fe':{'d1':1.,'d2':2.},'Pt':{'d1':10.,'d2':20.}}
422 pDict = {'d1':100.,'d2':200.}
423 compos = [('Fe',1),('Pt',1)]
424
425 cExprs = ["SUM($1)","SUM($1)+SUM($2)","SUM($1)+SUM($1)","MEAN($1)","DEV($2)","MAX($1)","MIN($1)/MAX($1)",
426 "MIN($2)","SUM($1)/$a","sqrt($a+$b)","SUM((3.*$1)/($2))","foo"]
427
428 for cExpr in cExprs:
429 argVect = piece1 + [cExpr]
430 print(cExpr)
431 print(CalcSingleCompoundDescriptor(compos,argVect,aDict,pDict))
432 print(CalcMultipleCompoundsDescriptor([compos,compos],argVect,aDict,[pDict,pDict]))
433