1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 import sys
33 from rdkit import Chem,DataStructs
34
35
36 rdkitFpParams=dict(maxPath=5,fpSize=1024,nBitsPerHash=2)
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
61
62
63 em = Chem.EditableMol(mol)
64
65
66
67
68 for b in bonds:
69
70 em.RemoveBond(b[0],b[1])
71
72
73 newAtomA = em.AddAtom(Chem.Atom(0))
74 em.AddBond(b[0],newAtomA,Chem.BondType.SINGLE)
75
76 newAtomB = em.AddAtom(Chem.Atom(0))
77 em.AddBond(b[1],newAtomB,Chem.BondType.SINGLE)
78
79
80
81 modifiedMol = em.GetMol()
82
83
84 Chem.SanitizeMol(modifiedMol,Chem.SanitizeFlags.SANITIZE_PROPERTIES|Chem.SanitizeFlags.SANITIZE_SYMMRINGS)
85
86 fragmented_smi = Chem.MolToSmiles(modifiedMol,True)
87
88
89 fraggle_framentation = select_fragments(fragmented_smi,ftype,hac)
90
91 return fraggle_framentation
92
94
95
96 atom_count = 0
97 valid = False
98
99 m = Chem.MolFromSmiles(smi)
100 if m is not None:
101
102 if(m.HasSubstructMatch(cSma1) or m.HasSubstructMatch(cSma2)):
103 atom_count = m.GetNumAtoms()
104 valid = True
105
106 return valid,atom_count
107
108
110
111 result = ""
112 result_hcount = 0
113 fragments = f_smi.split(".")
114
115 if(ftype == "acyclic"):
116 for f in fragments:
117 attachments = f.count("*")
118
119
120 if(attachments == 1):
121 fMol = Chem.MolFromSmiles(f)
122 fhac = fMol.GetNumAtoms()
123
124
125
126
127
128 if(fhac > 3):
129 result = "%s.%s" % (result,f)
130 result_hcount = result_hcount + fhac
131
132
133 if( (result != "") and (result_hcount > 0.6*hac) ):
134
135 result = result[1:]
136 else:
137 result = None
138
139 elif(ftype == "cyclic"):
140 result = None
141
142 if( len(fragments) == 2):
143 for f in fragments:
144
145 valid,result_hcount = is_ring_cut_valid(f)
146
147 if(valid):
148
149 if((result_hcount > 3) and (result_hcount > 0.4*hac)):
150 result = f
151
152
153 elif(ftype == "cyclic_and_acyclic"):
154
155 result = ""
156
157
158
159 for f in fragments:
160 attachments = f.count("*")
161
162 if(attachments >= 3):
163 continue
164
165 if(attachments == 2):
166
167 valid,result_hcount = is_ring_cut_valid(f)
168 if(valid):
169
170 if(result_hcount > 3):
171 result = "%s.%s" % (result,f)
172
173 elif(attachments == 1):
174 fMol = Chem.MolFromSmiles(f)
175 fhac = fMol.GetNumAtoms()
176
177 if(fhac > 3):
178 result = "%s.%s" % (result,f)
179 result_hcount = result_hcount + fhac
180
181
182
183
184
185
186 if( (result != "") and (result.count(".") == 2) ):
187
188 fMol = Chem.MolFromSmiles(result[1:])
189 result_hcount = fMol.GetNumAtoms()
190
191 if((result_hcount > 3) and (result_hcount > 0.6*hac)):
192
193 result = result[1:]
194 else:
195 result = None
196 else:
197 result = None
198
199 return result
200
201
202
203 acyc_smarts = Chem.MolFromSmarts("[*]!@!=!#[*]")
204
205
206 cyc_smarts = Chem.MolFromSmarts("[R1,R2]@[r;!R1]")
207
208
209
210
211
212 cSma1 = Chem.MolFromSmarts("[#0][r].[r][#0]")
213 cSma2 = Chem.MolFromSmarts("[#0][r][#0]")
214
216 """
217 >>> q = Chem.MolFromSmiles('COc1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12')
218 >>> list(generate_fraggle_fragmentation(q))
219 ['[*]C(=O)NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C(=O)c1cncc(C)c1.[*]Cc1cc(OC)c2ccccc2c1OC', '[*]C(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]NC(=O)c1cncc(C)c1', '[*]Cc1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1', '[*]N1CCC(NC(=O)c2cncc(C)c2)CC1.[*]c1cc(OC)c2ccccc2c1OC', '[*]NC(=O)c1cncc(C)c1.[*]c1cc(OC)c2ccccc2c1OC', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1', '[*]NC1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1.[*]c1cncc(C)c1', '[*]c1c(CN2CCC(NC(=O)c3cncc(C)c3)CC2)cc(OC)c2ccccc12', '[*]c1c(OC)cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c1[*]', '[*]c1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12', '[*]c1cc(OC)c2ccccc2c1OC.[*]c1cncc(C)c1']
220 """
221
222 hac = mol.GetNumAtoms()
223
224
225
226 out_fragments = set()
227
228
229
230
231
232
233 acyclic_matching_atoms = mol.GetSubstructMatches(acyc_smarts)
234
235
236
237 total_acyclic = len(acyclic_matching_atoms)
238 bonds_selected = []
239
240
241 for x in range( total_acyclic ):
242
243
244
245 for y in range(x+1,total_acyclic):
246
247 bonds_selected.append(acyclic_matching_atoms[x])
248 bonds_selected.append(acyclic_matching_atoms[y])
249 fragment = delete_bonds(mol,bonds_selected,"acyclic",hac)
250 if fragment is not None:
251
252 out_fragments.add(fragment)
253 bonds_selected = []
254
255
256
257
258
259
260
261 cyclic_matching_atoms = mol.GetSubstructMatches(cyc_smarts)
262
263
264
265
266 total_cyclic = len(cyclic_matching_atoms)
267 bonds_selected = []
268
269
270 for x in range( total_cyclic ):
271 for y in range(x+1,total_cyclic):
272
273 bonds_selected.append(cyclic_matching_atoms[x])
274 bonds_selected.append(cyclic_matching_atoms[y])
275 fragment = delete_bonds(mol,bonds_selected,"cyclic",hac)
276 bonds_selected = []
277
278 if fragment is not None:
279
280 out_fragments.add(fragment)
281
282
283 for z in range(total_acyclic):
284 bonds_selected.append(cyclic_matching_atoms[x])
285 bonds_selected.append(cyclic_matching_atoms[y])
286 bonds_selected.append(acyclic_matching_atoms[z])
287 fragment = delete_bonds(mol,bonds_selected,"cyclic_and_acyclic",hac)
288 if fragment is not None:
289
290 out_fragments.add(fragment)
291 bonds_selected = []
292
293 return sorted(list(out_fragments))
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
312 marked = {}
313
314 def partialFP(atomID,tverskyThresh):
315
316
317 modifiedFP = DataStructs.ExplicitBitVect(1024)
318
319 modifiedFP.SetBitsFromList(aBits[atomID])
320
321 tverskySim = DataStructs.TverskySimilarity(subsFp,modifiedFP,0,1)
322
323 if(tverskySim < tverskyThresh):
324
325 marked[atomID] = 1
326
327
328 aBits = [];
329
330 pMol = Chem.Mol(mol.ToBinary())
331 pMolFp = Chem.RDKFingerprint(pMol, atomBits=aBits, **rdkitFpParams)
332
333
334 qsMol = Chem.MolFromSmiles(subs)
335 subsFp = Chem.RDKFingerprint(qsMol, **rdkitFpParams)
336
337
338 for atom in pMol.GetAtoms():
339
340 partialFP(atom.GetIdx(),tverskyThresh)
341
342
343 ssr = pMol.GetRingInfo().AtomRings()
344
345
346 ringsToChange = {}
347 for ringList in range(len(ssr)):
348
349 for ringAtom in range(len(ssr[ringList])):
350
351 if ssr[ringList][ringAtom] in marked:
352
353 ringsToChange[ringList] = 1
354
355
356 for ringList in ringsToChange:
357 for ringAtom in range(len(ssr[ringList])):
358 marked[ ssr[ringList][ringAtom] ] = 1
359
360 if(len(marked) > 0):
361
362 for key in marked:
363
364 if( pMol.GetAtomWithIdx(key).GetIsAromatic() ):
365
366
367 pMol.GetAtomWithIdx(key).SetAtomicNum(0)
368 pMol.GetAtomWithIdx(key).SetNoImplicit(True)
369 else:
370
371 pMol.GetAtomWithIdx(key).SetAtomicNum(21)
372
373
374
375 try:
376 Chem.SanitizeMol(pMol,sanitizeOps=Chem.SANITIZE_ALL^Chem.SANITIZE_KEKULIZE^Chem.SANITIZE_SETAROMATICITY)
377 except Exception:
378 sys.stderr.write("Can't parse smiles: %s\n" % (Chem.MolToSmiles(pMol)))
379 pMol = Chem.Mol(mol.ToBinary())
380
381 return pMol
382
383
384 modified_query_fps = {}
386 qFP = Chem.RDKFingerprint(qMol, **rdkitFpParams)
387 iFP = Chem.RDKFingerprint(inMol, **rdkitFpParams)
388
389 rdkit_sim = DataStructs.TanimotoSimilarity(qFP,iFP)
390
391 qm_key = "%s_%s" % (qSubs,qSmi)
392 if qm_key in modified_query_fps:
393 qmMolFp = modified_query_fps[qm_key]
394 else:
395 qmMol = atomContrib(qSubs,qMol,tverskyThresh)
396 qmMolFp = Chem.RDKFingerprint(qmMol, **rdkitFpParams)
397 modified_query_fps[qm_key] = qmMolFp
398
399 rmMol = atomContrib(qSubs,inMol,tverskyThresh)
400
401
402 try:
403 rmMolFp = Chem.RDKFingerprint(rmMol, **rdkitFpParams)
404
405 fraggle_sim=max(DataStructs.FingerprintSimilarity(qmMolFp,rmMolFp),
406 rdkit_sim)
407
408
409 except Exception:
410 sys.stderr.write("Can't generate fp for: %s\n" % (Chem.MolToSmiles(rmMol,True)))
411 fraggle_sim = 0.0
412
413 return rdkit_sim,fraggle_sim
414
416 """ return the Fraggle similarity between two molecules
417
418 >>> q = Chem.MolFromSmiles('COc1cc(CN2CCC(NC(=O)c3cncc(C)c3)CC2)c(OC)c2ccccc12')
419 >>> m = Chem.MolFromSmiles('COc1cc(CN2CCC(NC(=O)c3ccccc3)CC2)c(OC)c2ccccc12')
420 >>> sim,match = GetFraggleSimilarity(q,m)
421 >>> sim
422 0.980...
423 >>> match
424 '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1'
425
426 >>> m = Chem.MolFromSmiles('COc1cc(CN2CCC(Nc3nc4ccccc4s3)CC2)c(OC)c2ccccc12')
427 >>> sim,match = GetFraggleSimilarity(q,m)
428 >>> sim
429 0.794...
430 >>> match
431 '[*]C1CCN(Cc2cc(OC)c3ccccc3c2OC)CC1'
432
433 >>> q = Chem.MolFromSmiles('COc1ccccc1')
434 >>> sim,match = GetFraggleSimilarity(q,m)
435 >>> sim
436 0.347...
437 >>> match
438 '[*]c1ccccc1'
439
440 """
441 if hasattr(queryMol,'_fraggleDecomp'):
442 frags = queryMol._fraggleDecomp
443 else:
444 frags = generate_fraggle_fragmentation(queryMol)
445 queryMol._fraggleDecomp = frags
446 qSmi = Chem.MolToSmiles(queryMol,True)
447 result=0.0
448 bestMatch=None
449 for frag in frags:
450 rdksim,fragsim= compute_fraggle_similarity_for_subs(refMol,queryMol,qSmi,frag,tverskyThresh)
451 if fragsim>result:
452 result=fragsim
453 bestMatch=frag
454 return result,bestMatch
455
456
457
458
459
460
461
463 import doctest,sys
464 return doctest.testmod(sys.modules["__main__"],optionflags=doctest.ELLIPSIS+doctest.NORMALIZE_WHITESPACE)
465
466
467 if __name__ == '__main__':
468 import sys
469 failed,tried = _test()
470 sys.exit(failed)
471