1
2
3
4
5
6
7
8
9
10
11 from __future__ import print_function
12 from rdkit import Chem
13 from rdkit.Chem import rdMolDescriptors
14 import math
15
17 """
18
19 **Arguments**:
20
21 - the code to be considered
22
23 - branchSubtract: (optional) the constant that was subtracted off
24 the number of neighbors before integrating it into the code.
25 This is used by the topological torsions code.
26
27
28 >>> m = Chem.MolFromSmiles('C=CC(=O)O')
29 >>> code = GetAtomCode(m.GetAtomWithIdx(0))
30 >>> ExplainAtomCode(code)
31 ('C', 1, 1)
32 >>> code = GetAtomCode(m.GetAtomWithIdx(1))
33 >>> ExplainAtomCode(code)
34 ('C', 2, 1)
35 >>> code = GetAtomCode(m.GetAtomWithIdx(2))
36 >>> ExplainAtomCode(code)
37 ('C', 3, 1)
38 >>> code = GetAtomCode(m.GetAtomWithIdx(3))
39 >>> ExplainAtomCode(code)
40 ('O', 1, 1)
41 >>> code = GetAtomCode(m.GetAtomWithIdx(4))
42 >>> ExplainAtomCode(code)
43 ('O', 1, 0)
44
45 """
46 typeMask = (1<<rdMolDescriptors.AtomPairsParameters.numTypeBits)-1
47 branchMask = (1<<rdMolDescriptors.AtomPairsParameters.numBranchBits)-1
48 piMask = (1<<rdMolDescriptors.AtomPairsParameters.numPiBits)-1
49
50 nBranch = int(code&branchMask)
51
52 code = code>>rdMolDescriptors.AtomPairsParameters.numBranchBits
53 nPi = int(code&piMask)
54
55 code = code>>rdMolDescriptors.AtomPairsParameters.numPiBits
56
57 typeIdx=int(code&typeMask)
58 if typeIdx<len(rdMolDescriptors.AtomPairsParameters.atomTypes):
59 atomNum = rdMolDescriptors.AtomPairsParameters.atomTypes[typeIdx]
60 atomSymbol=Chem.GetPeriodicTable().GetElementSymbol(atomNum)
61 else:
62 atomSymbol='X'
63 return (atomSymbol,nBranch,nPi)
64
65 GetAtomCode=rdMolDescriptors.GetAtomPairAtomCode
66
68 """ Returns the number of electrons an atom is using for pi bonding
69
70 >>> m = Chem.MolFromSmiles('C=C')
71 >>> NumPiElectrons(m.GetAtomWithIdx(0))
72 1
73
74 >>> m = Chem.MolFromSmiles('C#CC')
75 >>> NumPiElectrons(m.GetAtomWithIdx(0))
76 2
77 >>> NumPiElectrons(m.GetAtomWithIdx(1))
78 2
79
80 >>> m = Chem.MolFromSmiles('O=C=CC')
81 >>> NumPiElectrons(m.GetAtomWithIdx(0))
82 1
83 >>> NumPiElectrons(m.GetAtomWithIdx(1))
84 2
85 >>> NumPiElectrons(m.GetAtomWithIdx(2))
86 1
87 >>> NumPiElectrons(m.GetAtomWithIdx(3))
88 0
89
90 FIX: this behaves oddly in these cases:
91 >>> m = Chem.MolFromSmiles('S(=O)(=O)')
92 >>> NumPiElectrons(m.GetAtomWithIdx(0))
93 2
94
95 >>> m = Chem.MolFromSmiles('S(=O)(=O)(O)O')
96 >>> NumPiElectrons(m.GetAtomWithIdx(0))
97 0
98
99 In the second case, the S atom is tagged as sp3 hybridized.
100
101 """
102
103 res = 0
104 if atom.GetIsAromatic():
105 res = 1
106 elif atom.GetHybridization() != Chem.HybridizationType.SP3:
107
108
109 res = atom.GetExplicitValence() - atom.GetNumExplicitHs()
110 if res<atom.GetDegree():
111 raise ValueError("explicit valence exceeds atom degree")
112 res -= atom.GetDegree()
113 return res
114
115
117 """ Returns the number of bits in common between two vectors
118
119 **Arguments**:
120
121 - two vectors (sequences of bit ids)
122
123 **Returns**: an integer
124
125 **Notes**
126
127 - the vectors must be sorted
128
129 - duplicate bit IDs are counted more than once
130
131 >>> BitsInCommon( (1,2,3,4,10), (2,4,6) )
132 2
133
134 Here's how duplicates are handled:
135 >>> BitsInCommon( (1,2,2,3,4), (2,2,4,5,6) )
136 3
137
138 """
139 res = 0
140 v2Pos = 0
141 nV2 = len(v2)
142 for val in v1:
143 while v2Pos<nV2 and v2[v2Pos]<val:
144 v2Pos+=1
145 if v2Pos >= nV2:
146 break
147 if v2[v2Pos]==val:
148 res += 1
149 v2Pos += 1
150 return res
151
152
154 """ Implements the DICE similarity metric.
155 This is the recommended metric in both the Topological torsions
156 and Atom pairs papers.
157
158 **Arguments**:
159
160 - two vectors (sequences of bit ids)
161
162 **Returns**: a float.
163
164 **Notes**
165
166 - the vectors must be sorted
167
168
169 >>> DiceSimilarity( (1,2,3), (1,2,3) )
170 1.0
171 >>> DiceSimilarity( (1,2,3), (5,6) )
172 0.0
173 >>> DiceSimilarity( (1,2,3,4), (1,3,5,7) )
174 0.5
175 >>> DiceSimilarity( (1,2,3,4,5,6), (1,3) )
176 0.5
177
178 Note that duplicate bit IDs count multiple times:
179 >>> DiceSimilarity( (1,1,3,4,5,6), (1,1) )
180 0.5
181
182 but only if they are duplicated in both vectors:
183 >>> DiceSimilarity( (1,1,3,4,5,6), (1,) )==2./7
184 True
185
186 """
187 denom = 1.0*(len(v1)+len(v2))
188 if not denom:
189 res = 0.0
190 else:
191 if bounds and (min(len(v1),len(v2))/denom) < bounds:
192 numer = 0.0
193 else:
194 numer = 2.0*BitsInCommon(v1,v2)
195 res = numer/denom
196
197 return res
198
199
201 """ Returns the Dot product between two vectors:
202
203 **Arguments**:
204
205 - two vectors (sequences of bit ids)
206
207 **Returns**: an integer
208
209 **Notes**
210
211 - the vectors must be sorted
212
213 - duplicate bit IDs are counted more than once
214
215 >>> Dot( (1,2,3,4,10), (2,4,6) )
216 2
217
218 Here's how duplicates are handled:
219 >>> Dot( (1,2,2,3,4), (2,2,4,5,6) )
220 5
221 >>> Dot( (1,2,2,3,4), (2,4,5,6) )
222 2
223 >>> Dot( (1,2,2,3,4), (5,6) )
224 0
225 >>> Dot( (), (5,6) )
226 0
227
228 """
229 res = 0
230 nV1 = len(v1)
231 nV2 = len(v2)
232 i = 0
233 j = 0
234 while i<nV1:
235 v1Val = v1[i]
236 v1Count = 1
237 i+=1
238 while i<nV1 and v1[i]==v1Val:
239 v1Count += 1
240 i+=1
241 while j<nV2 and v2[j]<v1Val:
242 j+=1
243 if j < nV2 and v2[j]==v1Val:
244 v2Val = v2[j]
245 v2Count = 1
246 j+=1
247 while j<nV2 and v2[j]==v1Val:
248 v2Count+=1
249 j+=1
250 commonCount=min(v1Count,v2Count)
251 res += commonCount*commonCount
252 elif j>=nV2:
253 break
254 return res
255
257 """ Implements the Cosine similarity metric.
258 This is the recommended metric in the LaSSI paper
259
260 **Arguments**:
261
262 - two vectors (sequences of bit ids)
263
264 **Returns**: a float.
265
266 **Notes**
267
268 - the vectors must be sorted
269
270 >>> print('%.3f'%CosineSimilarity( (1,2,3,4,10), (2,4,6) ))
271 0.516
272 >>> print('%.3f'%CosineSimilarity( (1,2,2,3,4), (2,2,4,5,6) ))
273 0.714
274 >>> print('%.3f'%CosineSimilarity( (1,2,2,3,4), (1,2,2,3,4) ))
275 1.000
276 >>> print('%.3f'%CosineSimilarity( (1,2,2,3,4), (5,6,7) ))
277 0.000
278 >>> print('%.3f'%CosineSimilarity( (1,2,2,3,4), () ))
279 0.000
280
281 """
282 d1 = Dot(v1,v1)
283 d2 = Dot(v2,v2)
284 denom = math.sqrt(d1*d2)
285 if not denom:
286 res = 0.0
287 else:
288 numer = Dot(v1,v2)
289 res = numer/denom
290 return res
291
292
293
294
295
296
297
298
300 import doctest,sys
301 return doctest.testmod(sys.modules["__main__"])
302
303
304 if __name__ == '__main__':
305 import sys
306 failed,tried = _test()
307 sys.exit(failed)
308