Package rdkit :: Package Chem :: Package AtomPairs :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.AtomPairs.Utils

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2004-2006 Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  from __future__ import print_function 
 12  from rdkit import Chem 
 13  from rdkit.Chem import rdMolDescriptors 
 14  import math 
 15   
16 -def ExplainAtomCode(code,branchSubtract=0):
17 """ 18 19 **Arguments**: 20 21 - the code to be considered 22 23 - branchSubtract: (optional) the constant that was subtracted off 24 the number of neighbors before integrating it into the code. 25 This is used by the topological torsions code. 26 27 28 >>> m = Chem.MolFromSmiles('C=CC(=O)O') 29 >>> code = GetAtomCode(m.GetAtomWithIdx(0)) 30 >>> ExplainAtomCode(code) 31 ('C', 1, 1) 32 >>> code = GetAtomCode(m.GetAtomWithIdx(1)) 33 >>> ExplainAtomCode(code) 34 ('C', 2, 1) 35 >>> code = GetAtomCode(m.GetAtomWithIdx(2)) 36 >>> ExplainAtomCode(code) 37 ('C', 3, 1) 38 >>> code = GetAtomCode(m.GetAtomWithIdx(3)) 39 >>> ExplainAtomCode(code) 40 ('O', 1, 1) 41 >>> code = GetAtomCode(m.GetAtomWithIdx(4)) 42 >>> ExplainAtomCode(code) 43 ('O', 1, 0) 44 45 """ 46 typeMask = (1<<rdMolDescriptors.AtomPairsParameters.numTypeBits)-1 47 branchMask = (1<<rdMolDescriptors.AtomPairsParameters.numBranchBits)-1 48 piMask = (1<<rdMolDescriptors.AtomPairsParameters.numPiBits)-1 49 50 nBranch = int(code&branchMask) 51 #print(code,end='') 52 code = code>>rdMolDescriptors.AtomPairsParameters.numBranchBits 53 nPi = int(code&piMask) 54 #print(code,end='') 55 code = code>>rdMolDescriptors.AtomPairsParameters.numPiBits 56 #print(code,end='') 57 typeIdx=int(code&typeMask) 58 if typeIdx<len(rdMolDescriptors.AtomPairsParameters.atomTypes): 59 atomNum = rdMolDescriptors.AtomPairsParameters.atomTypes[typeIdx] 60 atomSymbol=Chem.GetPeriodicTable().GetElementSymbol(atomNum) 61 else: 62 atomSymbol='X' 63 return (atomSymbol,nBranch,nPi)
64 65 GetAtomCode=rdMolDescriptors.GetAtomPairAtomCode 66
67 -def NumPiElectrons(atom):
68 """ Returns the number of electrons an atom is using for pi bonding 69 70 >>> m = Chem.MolFromSmiles('C=C') 71 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 72 1 73 74 >>> m = Chem.MolFromSmiles('C#CC') 75 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 76 2 77 >>> NumPiElectrons(m.GetAtomWithIdx(1)) 78 2 79 80 >>> m = Chem.MolFromSmiles('O=C=CC') 81 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 82 1 83 >>> NumPiElectrons(m.GetAtomWithIdx(1)) 84 2 85 >>> NumPiElectrons(m.GetAtomWithIdx(2)) 86 1 87 >>> NumPiElectrons(m.GetAtomWithIdx(3)) 88 0 89 90 FIX: this behaves oddly in these cases: 91 >>> m = Chem.MolFromSmiles('S(=O)(=O)') 92 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 93 2 94 95 >>> m = Chem.MolFromSmiles('S(=O)(=O)(O)O') 96 >>> NumPiElectrons(m.GetAtomWithIdx(0)) 97 0 98 99 In the second case, the S atom is tagged as sp3 hybridized. 100 101 """ 102 103 res = 0 104 if atom.GetIsAromatic(): 105 res = 1 106 elif atom.GetHybridization() != Chem.HybridizationType.SP3: 107 # the number of pi electrons is just the number of 108 # unsaturations (valence - degree): 109 res = atom.GetExplicitValence() - atom.GetNumExplicitHs() 110 if res<atom.GetDegree(): 111 raise ValueError("explicit valence exceeds atom degree") 112 res -= atom.GetDegree() 113 return res
114 115
116 -def BitsInCommon(v1,v2):
117 """ Returns the number of bits in common between two vectors 118 119 **Arguments**: 120 121 - two vectors (sequences of bit ids) 122 123 **Returns**: an integer 124 125 **Notes** 126 127 - the vectors must be sorted 128 129 - duplicate bit IDs are counted more than once 130 131 >>> BitsInCommon( (1,2,3,4,10), (2,4,6) ) 132 2 133 134 Here's how duplicates are handled: 135 >>> BitsInCommon( (1,2,2,3,4), (2,2,4,5,6) ) 136 3 137 138 """ 139 res = 0 140 v2Pos = 0 141 nV2 = len(v2) 142 for val in v1: 143 while v2Pos<nV2 and v2[v2Pos]<val: 144 v2Pos+=1 145 if v2Pos >= nV2: 146 break 147 if v2[v2Pos]==val: 148 res += 1 149 v2Pos += 1 150 return res
151 152
153 -def DiceSimilarity(v1,v2,bounds=None):
154 """ Implements the DICE similarity metric. 155 This is the recommended metric in both the Topological torsions 156 and Atom pairs papers. 157 158 **Arguments**: 159 160 - two vectors (sequences of bit ids) 161 162 **Returns**: a float. 163 164 **Notes** 165 166 - the vectors must be sorted 167 168 169 >>> DiceSimilarity( (1,2,3), (1,2,3) ) 170 1.0 171 >>> DiceSimilarity( (1,2,3), (5,6) ) 172 0.0 173 >>> DiceSimilarity( (1,2,3,4), (1,3,5,7) ) 174 0.5 175 >>> DiceSimilarity( (1,2,3,4,5,6), (1,3) ) 176 0.5 177 178 Note that duplicate bit IDs count multiple times: 179 >>> DiceSimilarity( (1,1,3,4,5,6), (1,1) ) 180 0.5 181 182 but only if they are duplicated in both vectors: 183 >>> DiceSimilarity( (1,1,3,4,5,6), (1,) )==2./7 184 True 185 186 """ 187 denom = 1.0*(len(v1)+len(v2)) 188 if not denom: 189 res = 0.0 190 else: 191 if bounds and (min(len(v1),len(v2))/denom) < bounds: 192 numer = 0.0 193 else: 194 numer = 2.0*BitsInCommon(v1,v2) 195 res = numer/denom 196 197 return res
198 199
200 -def Dot(v1,v2):
201 """ Returns the Dot product between two vectors: 202 203 **Arguments**: 204 205 - two vectors (sequences of bit ids) 206 207 **Returns**: an integer 208 209 **Notes** 210 211 - the vectors must be sorted 212 213 - duplicate bit IDs are counted more than once 214 215 >>> Dot( (1,2,3,4,10), (2,4,6) ) 216 2 217 218 Here's how duplicates are handled: 219 >>> Dot( (1,2,2,3,4), (2,2,4,5,6) ) 220 5 221 >>> Dot( (1,2,2,3,4), (2,4,5,6) ) 222 2 223 >>> Dot( (1,2,2,3,4), (5,6) ) 224 0 225 >>> Dot( (), (5,6) ) 226 0 227 228 """ 229 res = 0 230 nV1 = len(v1) 231 nV2 = len(v2) 232 i = 0 233 j = 0 234 while i<nV1: 235 v1Val = v1[i] 236 v1Count = 1 237 i+=1 238 while i<nV1 and v1[i]==v1Val: 239 v1Count += 1 240 i+=1 241 while j<nV2 and v2[j]<v1Val: 242 j+=1 243 if j < nV2 and v2[j]==v1Val: 244 v2Val = v2[j] 245 v2Count = 1 246 j+=1 247 while j<nV2 and v2[j]==v1Val: 248 v2Count+=1 249 j+=1 250 commonCount=min(v1Count,v2Count) 251 res += commonCount*commonCount 252 elif j>=nV2: 253 break 254 return res
255
256 -def CosineSimilarity(v1,v2):
257 """ Implements the Cosine similarity metric. 258 This is the recommended metric in the LaSSI paper 259 260 **Arguments**: 261 262 - two vectors (sequences of bit ids) 263 264 **Returns**: a float. 265 266 **Notes** 267 268 - the vectors must be sorted 269 270 >>> print('%.3f'%CosineSimilarity( (1,2,3,4,10), (2,4,6) )) 271 0.516 272 >>> print('%.3f'%CosineSimilarity( (1,2,2,3,4), (2,2,4,5,6) )) 273 0.714 274 >>> print('%.3f'%CosineSimilarity( (1,2,2,3,4), (1,2,2,3,4) )) 275 1.000 276 >>> print('%.3f'%CosineSimilarity( (1,2,2,3,4), (5,6,7) )) 277 0.000 278 >>> print('%.3f'%CosineSimilarity( (1,2,2,3,4), () )) 279 0.000 280 281 """ 282 d1 = Dot(v1,v1) 283 d2 = Dot(v2,v2) 284 denom = math.sqrt(d1*d2) 285 if not denom: 286 res = 0.0 287 else: 288 numer = Dot(v1,v2) 289 res = numer/denom 290 return res
291 292 293 294 295 #------------------------------------ 296 # 297 # doctest boilerplate 298 #
299 -def _test():
300 import doctest,sys 301 return doctest.testmod(sys.modules["__main__"])
302 303 304 if __name__ == '__main__': 305 import sys 306 failed,tried = _test() 307 sys.exit(failed) 308