Package rdkit :: Package Chem :: Package Pharm2D :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.Utils

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ utility functionality for the 2D pharmacophores code 
 12   
 13    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 14    pharmacophores are broken into triangles and labelled. 
 15   
 16    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 17    numbering 
 18   
 19  """ 
 20  from __future__ import print_function, division 
 21  import numpy 
 22   
 23  # 
 24  #  number of points in a scaffold -> sequence of distances (p1,p2) in 
 25  #   the scaffold    
 26  # 
 27  nPointDistDict = { 
 28    2: ((0,1),), 
 29    3: ((0,1),(0,2), 
 30        (1,2)), 
 31    4: ((0,1),(0,2),(0,3), 
 32        (1,2),(2,3)), 
 33    5: ((0,1),(0,2),(0,3),(0,4), 
 34        (1,2),(2,3),(3,4)), 
 35    6: ((0,1),(0,2),(0,3),(0,4),(0,5), 
 36        (1,2),(2,3),(3,4),(4,5)), 
 37    7: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6), 
 38        (1,2),(2,3),(3,4),(4,5),(5,6)), 
 39    8: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7), 
 40        (1,2),(2,3),(3,4),(4,5),(5,6),(6,7)), 
 41    9: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8), 
 42        (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8)), 
 43    10: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8),(0,9), 
 44         (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8),(8,9)), 
 45    } 
 46   
 47  # 
 48  #  number of distances in a scaffold -> number of points in the scaffold 
 49  # 
 50  nDistPointDict = { 
 51    1:2, 
 52    3:3, 
 53    5:4, 
 54    7:5, 
 55    9:6, 
 56    11:7, 
 57    13:8, 
 58    15:9, 
 59    17:10, 
 60    } 
 61   
 62  _trianglesInPharmacophore = {} 
63 -def GetTriangles(nPts):
64 """ returns a tuple with the distance indices for 65 triangles composing an nPts-pharmacophore 66 67 """ 68 global _trianglesInPharmacophore 69 if nPts < 3: return [] 70 res = _trianglesInPharmacophore.get(nPts,[]) 71 if not res: 72 idx1,idx2,idx3=(0,1,nPts-1) 73 while idx1<nPts-2: 74 res.append((idx1,idx2,idx3)) 75 idx1 += 1 76 idx2 += 1 77 idx3 += 1 78 res = tuple(res) 79 _trianglesInPharmacophore[nPts] = res 80 return res
81 82
83 -def _fact(x):
84 if x <= 1: 85 return 1 86 87 accum = 1 88 for i in range(x): 89 accum *= i+1 90 return accum
91
92 -def BinsTriangleInequality(d1,d2,d3):
93 """ checks the triangle inequality for combinations 94 of distance bins. 95 96 the general triangle inequality is: 97 d1 + d2 >= d3 98 the conservative binned form of this is: 99 d1(upper) + d2(upper) >= d3(lower) 100 101 """ 102 if d1[1]+d2[1]<d3[0]: return False 103 if d2[1]+d3[1]<d1[0]: return False 104 if d3[1]+d1[1]<d2[0]: return False 105 106 return True
107
108 -def ScaffoldPasses(combo,bins=None):
109 """ checks the scaffold passed in to see if all 110 contributing triangles can satisfy the triangle inequality 111 112 the scaffold itself (encoded in combo) is a list of binned distances 113 114 """ 115 # this is the number of points in the pharmacophore 116 nPts = nDistPointDict[len(combo)] 117 tris = GetTriangles(nPts) 118 for tri in tris: 119 ds = [bins[combo[x]] for x in tri] 120 if not BinsTriangleInequality(ds[0],ds[1],ds[2]): 121 return False 122 return True
123 124 125 _numCombDict = {}
126 -def NumCombinations(nItems,nSlots):
127 """ returns the number of ways to fit nItems into nSlots 128 129 We assume that (x,y) and (y,x) are equivalent, and 130 (x,x) is allowed. 131 132 General formula is, for N items and S slots: 133 res = (N+S-1)! / ( (N-1)! * S! ) 134 135 """ 136 global _numCombDict 137 res = _numCombDict.get((nItems,nSlots),-1) 138 if res == -1: 139 res = _fact(nItems+nSlots-1) // (_fact(nItems-1)*_fact(nSlots)) 140 _numCombDict[(nItems,nSlots)] = res 141 return res
142 143 _verbose = 0 144 145 _countCache={}
146 -def CountUpTo(nItems,nSlots,vs,idx=0,startAt=0):
147 """ Figures out where a given combination of indices would 148 occur in the combinatorial explosion generated by _GetIndexCombinations_ 149 150 **Arguments** 151 152 - nItems: the number of items to distribute 153 154 - nSlots: the number of slots in which to distribute them 155 156 - vs: a sequence containing the values to find 157 158 - idx: used in the recursion 159 160 - startAt: used in the recursion 161 162 **Returns** 163 164 an integer 165 166 """ 167 global _countCache 168 if _verbose: 169 print(' '*idx,'CountUpTo(%d)'%idx,vs[idx],startAt) 170 if idx==0 and (nItems,nSlots,tuple(vs)) in _countCache: 171 return _countCache[(nItems,nSlots,tuple(vs))] 172 elif idx >= nSlots: 173 accum = 0 174 elif idx == nSlots-1: 175 accum = vs[idx]-startAt 176 else: 177 accum = 0 178 # get the digit at idx correct 179 for i in range(startAt,vs[idx]): 180 nLevsUnder = nSlots-idx-1 181 nValsOver = nItems-i 182 if _verbose: 183 print(' '*idx,' ',i,nValsOver,nLevsUnder, 184 NumCombinations(nValsOver,nLevsUnder)) 185 accum += NumCombinations(nValsOver,nLevsUnder) 186 accum += CountUpTo(nItems,nSlots,vs,idx+1,vs[idx]) 187 if _verbose: print(' '*idx,'>',accum) 188 if idx == 0: 189 _countCache[(nItems,nSlots,tuple(vs))] = accum 190 return accum
191 192 _indexCombinations={}
193 -def GetIndexCombinations(nItems,nSlots,slot=0,lastItemVal=0):
194 """ Generates all combinations of nItems in nSlots without including 195 duplicates 196 197 **Arguments** 198 199 - nItems: the number of items to distribute 200 201 - nSlots: the number of slots in which to distribute them 202 203 - slot: used in recursion 204 205 - lastItemVal: used in recursion 206 207 **Returns** 208 209 a list of lists 210 211 """ 212 global _indexCombinations 213 if not slot and (nItems,nSlots) in _indexCombinations: 214 res = _indexCombinations[(nItems,nSlots)] 215 elif slot >= nSlots: 216 res = [] 217 elif slot == nSlots-1: 218 res = [[x] for x in range(lastItemVal,nItems)] 219 else: 220 res = [] 221 for x in range(lastItemVal,nItems): 222 tmp = GetIndexCombinations(nItems,nSlots,slot+1,x) 223 for entry in tmp: 224 res.append([x]+entry) 225 if not slot: 226 _indexCombinations[(nItems,nSlots)] = res 227 return res
228
229 -def GetAllCombinations(choices,noDups=1,which=0):
230 """ Does the combinatorial explosion of the possible combinations 231 of the elements of _choices_. 232 233 **Arguments** 234 235 - choices: sequence of sequences with the elements to be enumerated 236 237 - noDups: (optional) if this is nonzero, results with duplicates, 238 e.g. (1,1,0), will not be generated 239 240 - which: used in recursion 241 242 **Returns** 243 244 a list of lists 245 246 >>> GetAllCombinations([(0,),(1,),(2,)]) 247 [[0, 1, 2]] 248 >>> GetAllCombinations([(0,),(1,3),(2,)]) 249 [[0, 1, 2], [0, 3, 2]] 250 251 >>> GetAllCombinations([(0,1),(1,3),(2,)]) 252 [[0, 1, 2], [0, 3, 2], [1, 3, 2]] 253 254 """ 255 if which >= len(choices): 256 res = [] 257 elif which == len(choices)-1: 258 res = [[x] for x in choices[which]] 259 else: 260 res = [] 261 tmp = GetAllCombinations(choices,noDups=noDups, 262 which=which+1) 263 for thing in choices[which]: 264 for other in tmp: 265 if not noDups or thing not in other: 266 res.append([thing]+other) 267 return res
268
269 -def GetUniqueCombinations(choices,classes,which=0):
270 """ Does the combinatorial explosion of the possible combinations 271 of the elements of _choices_. 272 273 """ 274 assert len(choices)==len(classes) 275 if which >= len(choices): 276 res = [] 277 elif which == len(choices)-1: 278 res = [[(classes[which],x)] for x in choices[which]] 279 else: 280 res = [] 281 tmp = GetUniqueCombinations(choices,classes, 282 which=which+1) 283 for thing in choices[which]: 284 for other in tmp: 285 idxThere=0 286 for x in other: 287 if x[1]==thing:idxThere+=1 288 if not idxThere: 289 newL = [(classes[which],thing)]+other 290 newL.sort() 291 if newL not in res: 292 res.append(newL) 293 return res
294
295 -def UniquifyCombinations(combos):
296 """ uniquifies the combinations in the argument 297 298 **Arguments**: 299 300 - combos: a sequence of sequences 301 302 **Returns** 303 304 - a list of tuples containing the unique combos 305 306 """ 307 print('>>> u:',combos) 308 resD = {} 309 for combo in combos: 310 k = combo[:] 311 k.sort() 312 resD[tuple(k)] = tuple(combo) 313 print(' >>> u:',resD.values()) 314 return resD.values()
315
316 -def GetPossibleScaffolds(nPts,bins,useTriangleInequality=True):
317 """ gets all realizable scaffolds (passing the triangle inequality) with the 318 given number of points and returns them as a list of tuples 319 320 """ 321 if nPts < 2: 322 res = 0 323 elif nPts == 2: 324 res = [(x,) for x in range(len(bins))] 325 else: 326 nDists = len(nPointDistDict[nPts]) 327 combos = GetAllCombinations([range(len(bins))]*nDists,noDups=0) 328 res = [] 329 for combo in combos: 330 if not useTriangleInequality or ScaffoldPasses(combo,bins): 331 res.append(tuple(combo)) 332 return res
333
334 -def OrderTriangle(featIndices,dists):
335 """ 336 put the distances for a triangle into canonical order 337 338 It's easy if the features are all different: 339 >>> OrderTriangle([0,2,4],[1,2,3]) 340 ([0, 2, 4], [1, 2, 3]) 341 342 It's trickiest if they are all the same: 343 >>> OrderTriangle([0,0,0],[1,2,3]) 344 ([0, 0, 0], [3, 2, 1]) 345 >>> OrderTriangle([0,0,0],[2,1,3]) 346 ([0, 0, 0], [3, 2, 1]) 347 >>> OrderTriangle([0,0,0],[1,3,2]) 348 ([0, 0, 0], [3, 2, 1]) 349 >>> OrderTriangle([0,0,0],[3,1,2]) 350 ([0, 0, 0], [3, 2, 1]) 351 >>> OrderTriangle([0,0,0],[3,2,1]) 352 ([0, 0, 0], [3, 2, 1]) 353 354 >>> OrderTriangle([0,0,1],[3,2,1]) 355 ([0, 0, 1], [3, 2, 1]) 356 >>> OrderTriangle([0,0,1],[1,3,2]) 357 ([0, 0, 1], [1, 3, 2]) 358 >>> OrderTriangle([0,0,1],[1,2,3]) 359 ([0, 0, 1], [1, 3, 2]) 360 >>> OrderTriangle([0,0,1],[1,3,2]) 361 ([0, 0, 1], [1, 3, 2]) 362 363 """ 364 if len(featIndices)!=3: raise ValueError('bad indices') 365 if len(dists)!=3: raise ValueError('bad dists') 366 367 fs = set(featIndices) 368 if len(fs)==3: 369 return featIndices,dists 370 371 dSums=[0]*3 372 dSums[0] = dists[0]+dists[1] 373 dSums[1] = dists[0]+dists[2] 374 dSums[2] = dists[1]+dists[2] 375 mD = max(dSums) 376 if len(fs)==1: 377 if dSums[0]==mD: 378 if dists[0]>dists[1]: 379 ireorder=(0,1,2) 380 dreorder=(0,1,2) 381 else: 382 ireorder=(0,2,1) 383 dreorder=(1,0,2) 384 elif dSums[1]==mD: 385 if dists[0]>dists[2]: 386 ireorder=(1,0,2) 387 dreorder=(0,2,1) 388 else: 389 ireorder=(1,2,0) 390 dreorder=(2,0,1) 391 else: 392 if dists[1]>dists[2]: 393 ireorder = (2,0,1) 394 dreorder=(1,2,0) 395 else: 396 ireorder = (2,1,0) 397 dreorder=(2,1,0) 398 else: 399 # two classes 400 if featIndices[0]==featIndices[1]: 401 if dists[1]>dists[2]: 402 ireorder=(0,1,2) 403 dreorder=(0,1,2) 404 else: 405 ireorder=(1,0,2) 406 dreorder=(0,2,1) 407 elif featIndices[0]==featIndices[2]: 408 if dists[0]>dists[2]: 409 ireorder=(0,1,2) 410 dreorder=(0,1,2) 411 else: 412 ireorder=(2,1,0) 413 dreorder=(2,1,0) 414 else: #featIndices[1]==featIndices[2]: 415 if dists[0]>dists[1]: 416 ireorder=(0,1,2) 417 dreorder=(0,1,2) 418 else: 419 ireorder=(0,2,1) 420 dreorder=(1,0,2) 421 dists = [dists[x] for x in dreorder] 422 featIndices = [featIndices[x] for x in ireorder] 423 return featIndices,dists
424 425 #------------------------------------ 426 # 427 # doctest boilerplate 428 #
429 -def _test():
430 import doctest,sys 431 return doctest.testmod(sys.modules["__main__"])
432 433 434 if __name__ == '__main__': 435 import sys 436 failed,tried = _test() 437 sys.exit(failed) 438