1
2
3
4
5
6 from __future__ import print_function
7 from rdkit import RDLogger as logging
8 logger = logging.logger()
9 logger.setLevel(logging.INFO)
10 from rdkit import Chem
11 from rdkit.Chem import Crippen
12 from rdkit.Chem import AllChem
13 from rdkit.Chem.ChemUtils.AlignDepict import AlignDepict
14
15 import sys
16 _version="0.8.0"
17 _greet="This is TemplateExpand version %s"%_version
18
19 _usage="""
20 Usage: TemplateExpand [options] template <sidechains>
21
22 Unless otherwise indicated, the template and sidechains are assumed to be
23 Smiles
24
25 Each sidechain entry should be:
26 [-r] SMARTS filename
27 The SMARTS pattern is used to recognize the attachment point,
28 if the -r argument is not provided, then atoms matching the pattern
29 will be removed from the sidechains.
30 or
31 -n filename
32 where the attachment atom is the first atom in each molecule
33 The filename provides the list of potential sidechains.
34
35 options:
36 -o filename.sdf: provides the name of the output file, otherwise
37 stdout is used
38
39 --sdf : expect the sidechains to be in SD files
40
41 --moltemplate: the template(s) are in a mol/SD file, new depiction(s)
42 will not be generated unless the --redraw argument is also
43 provided
44
45 --smilesFileTemplate: extract the template(s) from a SMILES file instead of
46 expecting SMILES on the command line.
47
48 --redraw: generate a new depiction for the molecular template(s)
49
50 --useall:
51 or
52 --useallmatches: generate a product for each possible match of the attachment
53 pattern to each sidechain. If this is not provided, the first
54 match (not canonically defined) will be used.
55
56 --force: by default, the program prompts the user if the library is
57 going to contain more than 1000 compounds. This argument
58 disables the prompt.
59
60 --templateSmarts="smarts": provides a space-delimited list containing the SMARTS
61 patterns to be used to recognize attachment points in
62 the template
63
64 --autoNames: when set this toggle causes the resulting compounds to be named
65 based on there sequence id in the file, e.g.
66 "TemplateEnum: Mol_1", "TemplateEnum: Mol_2", etc.
67 otherwise the names of the template and building blocks (from
68 the input files) will be combined to form a name for each
69 product molecule.
70
71 --3D : Generate 3d coordinates for the product molecules instead of 2d coordinates,
72 requires the --moltemplate option
73
74 --tether : refine the 3d conformations using a tethered minimization
75
76
77 """
79 print(_usage, file=sys.stderr)
80 sys.exit(-1)
81
82
83 nDumped=0
84 -def _exploder(mol,depth,sidechains,core,chainIndices,autoNames=True,templateName='',
85 resetCounter=True,do3D=False,useTethers=False):
86 global nDumped
87 if resetCounter:
88 nDumped=0
89 ourChains = sidechains[depth]
90 patt = '[%d*]'%(depth+1)
91 patt = Chem.MolFromSmiles(patt)
92 for i,(chainIdx,chain) in enumerate(ourChains):
93 tchain = chainIndices[:]
94 tchain.append((i,chainIdx))
95 rs = Chem.ReplaceSubstructs(mol,patt,chain,replaceAll=True)
96 if rs:
97 r = rs[0]
98 if depth<len(sidechains)-1:
99 for entry in _exploder(r,depth+1,sidechains,core,tchain,
100 autoNames=autoNames,templateName=templateName,
101 resetCounter=0,do3D=do3D,useTethers=useTethers):
102 yield entry
103 else:
104 try:
105 Chem.SanitizeMol(r)
106 except ValueError:
107 import traceback
108 traceback.print_exc()
109 continue
110 if not do3D:
111 if r.HasSubstructMatch(core):
112 try:
113 AlignDepict(r,core)
114 except Exception:
115 import traceback
116 traceback.print_exc()
117 print(Chem.MolToSmiles(r), file=sys.stderr)
118 else:
119 print('>>> no match', file=sys.stderr)
120 AllChem.Compute2DCoords(r)
121 else:
122 r = Chem.AddHs(r)
123 AllChem.ConstrainedEmbed(r,core,useTethers)
124 Chem.Kekulize(r)
125 if autoNames:
126 tName = "TemplateEnum: Mol_%d"%(nDumped+1)
127 else:
128 tName = templateName
129 for bbI,bb in enumerate(tchain):
130 bbMol = sidechains[bbI][bb[0]][1]
131 if bbMol.HasProp('_Name'):
132 bbNm = bbMol.GetProp('_Name')
133 else:
134 bbNm = str(bb[1])
135 tName += '_' + bbNm
136
137 r.SetProp("_Name",tName)
138 r.SetProp('seq_num',str(nDumped+1))
139 r.SetProp('reagent_indices','_'.join([str(x[1]) for x in tchain]))
140 for bbI,bb in enumerate(tchain):
141 bbMol = sidechains[bbI][bb[0]][1]
142 if bbMol.HasProp('_Name'):
143 bbNm = bbMol.GetProp('_Name')
144 else:
145 bbNm = str(bb[1])
146 r.SetProp('building_block_%d'%(bbI+1),bbNm)
147 for propN in bbMol.GetPropNames():
148 r.SetProp('building_block_%d_%s'%(bbI+1,propN),bbMol.GetProp(propN))
149 nDumped += 1
150 if not nDumped%100:
151 logger.info('Done %d molecules'%nDumped)
152 yield r
153
154 -def Explode(template,sidechains,outF,autoNames=True,do3D=False,useTethers=False):
155 chainIndices=[]
156 core = Chem.DeleteSubstructs(template,Chem.MolFromSmiles('[*]'))
157 try:
158 templateName = template.GetProp('_Name')
159 except KeyError:
160 templateName="template"
161 for mol in _exploder(template,0,sidechains,core,chainIndices,autoNames=autoNames,
162 templateName=templateName,do3D=do3D,useTethers=useTethers):
163 outF.write(Chem.MolToMolBlock(mol))
164 for pN in mol.GetPropNames():
165 print('> <%s>\n%s\n'%(pN,mol.GetProp(pN)), file=outF)
166 print('$$$$', file=outF)
167
169 dummyPatt=Chem.MolFromSmiles('[*]')
170 matches = mol.GetSubstructMatches(dummyPatt)
171 res = []
172 for match in matches:
173 matchIdx = match[0]
174 smi = Chem.MolToSmiles(mol,True,rootedAtAtom=matchIdx)
175 entry = Chem.MolFromSmiles(smi)
176
177
178 entry = Chem.DeleteSubstructs(entry,dummyPatt)
179 for propN in mol.GetPropNames():
180 entry.SetProp(propN,mol.GetProp(propN));
181
182
183
184 res.append(entry)
185 if not useAll:
186 break
187 return res
188
190 if sma:
191 patt = Chem.MolFromSmarts(sma)
192 if patt is None:
193 logger.error('could not construct pattern from smarts: %s'%sma,
194 exc_info=True)
195 return None
196 else:
197 patt = None
198
199 if replace:
200 replacement = Chem.MolFromSmiles('[*]')
201
202 res = []
203 for idx,mol in enumerate(suppl):
204 if not mol:
205 continue
206 if patt:
207 if not mol.HasSubstructMatch(patt):
208 logger.warning('The substructure pattern did not match sidechain %d. This may result in errors.'%(idx+1))
209 if replace:
210 tmp = list(Chem.ReplaceSubstructs(mol,patt,replacement))
211 if not useAll: tmp = [tmp[0]]
212 for i,entry in enumerate(tmp):
213 entry = MoveDummyNeighborsToBeginning(entry)
214 if not entry:
215 continue
216 entry = entry[0]
217
218 for propN in mol.GetPropNames():
219 entry.SetProp(propN,mol.GetProp(propN));
220
221
222 tmp[i] = (idx+1,entry)
223 else:
224
225
226 matches = mol.GetSubstructMatches(patt)
227 if matches:
228 tmp = []
229 for match in matches:
230 smi = Chem.MolToSmiles(mol,True,rootedAtAtom=match[0])
231 entry = Chem.MolFromSmiles(smi)
232 for propN in mol.GetPropNames():
233 entry.SetProp(propN,mol.GetProp(propN));
234
235
236
237 tmp.append((idx+1,entry))
238 else:
239 tmp = None
240 else:
241 tmp = [(idx+1,mol)]
242 if tmp:
243 res.extend(tmp)
244 return res
245
246 if __name__=='__main__':
247 import getopt
248 print(_greet, file=sys.stderr)
249
250 try:
251 args,extras = getopt.getopt(sys.argv[1:],'o:h',[
252 'sdf',
253 'moltemplate',
254 'molTemplate',
255 'smilesFileTemplate',
256 'templateSmarts=',
257 'redraw',
258 'force',
259 'useall',
260 'useallmatches',
261 'autoNames',
262 '3D','3d',
263 'tethers',
264 'tether',
265 ])
266 except Exception:
267 import traceback
268 traceback.print_exc()
269 Usage()
270
271 if len(extras)<3:
272 Usage()
273
274 tooLong=1000
275 sdLigands=False
276 molTemplate=False
277 redrawTemplate=False
278 outF=None
279 forceIt=False
280 useAll=False
281 templateSmarts=[]
282 smilesFileTemplate=False
283 autoNames=False
284 do3D=False
285 useTethers=False
286 for arg,val in args:
287 if arg=='-o':
288 outF=val
289 elif arg=='--sdf':
290 sdLigands=True
291 elif arg in ('--moltemplate','--molTemplate'):
292 molTemplate=True
293 elif arg=='--smilesFileTemplate':
294 smilesFileTemplate=True
295 elif arg=='--templateSmarts':
296 templateSmarts = val
297 elif arg=='--redraw':
298 redrawTemplate=True
299 elif arg=='--force':
300 forceIt=True
301 elif arg=='--autoNames':
302 autoNames=True
303 elif arg in ('--useall','--useallmatches'):
304 useAll=True
305 elif arg in ('--3D','--3d'):
306 do3D=True
307 elif arg in ('--tethers','--tether'):
308 useTethers=True
309 elif arg=='-h':
310 Usage()
311 sys.exit(0)
312
313 if do3D:
314 if not molTemplate:
315 raise ValueError('the --3D option is only useable in combination with --moltemplate')
316 if redrawTemplate:
317 logger.warning('--redrawTemplate does not make sense in combination with --molTemplate. removing it')
318 redrawTemplate=False
319
320
321 if templateSmarts:
322 splitL = templateSmarts.split(' ')
323 templateSmarts = []
324 for i,sma in enumerate(splitL):
325 patt = Chem.MolFromSmarts(sma)
326 if not patt:
327 raise ValueError('could not convert smarts "%s" to a query'%sma)
328 if i>=4:
329 i+=1
330 replace = Chem.MolFromSmiles('[%d*]'%(i+1))
331 templateSmarts.append((patt,replace))
332
333 if molTemplate:
334 removeHs = not do3D
335 try:
336 s = Chem.SDMolSupplier(extras[0],removeHs=removeHs)
337 templates = [x for x in s]
338 except Exception:
339 logger.error('Could not construct templates from input file: %s'%extras[0],
340 exc_info=True)
341 sys.exit(1)
342 if redrawTemplate:
343 for template in templates:
344 AllChem.Compute2DCoords(template)
345 else:
346 if not smilesFileTemplate:
347 try:
348 templates = [Chem.MolFromSmiles(extras[0])]
349 except Exception:
350 logger.error('Could not construct template from smiles: %s'%extras[0],
351 exc_info=True)
352 sys.exit(1)
353 else:
354 try:
355 s = Chem.SmilesMolSupplier(extras[0],titleLine=False)
356 templates = [x for x in s]
357 except Exception:
358 logger.error('Could not construct templates from input file: %s'%extras[0],
359 exc_info=True)
360 sys.exit(1)
361 for template in templates:
362 AllChem.Compute2DCoords(template)
363 if templateSmarts:
364 finalTs = []
365 for i,template in enumerate(templates):
366 for j,(patt,replace) in enumerate(templateSmarts):
367 if not template.HasSubstructMatch(patt):
368 logger.error('template %d did not match sidechain pattern %d, skipping it'%(i+1,j+1))
369 template =None
370 break
371 template = Chem.ReplaceSubstructs(template,patt,replace)[0]
372 if template:
373 Chem.SanitizeMol(template)
374 finalTs.append(template)
375 templates = finalTs
376
377 sidechains = []
378 pos = 1
379 while pos<len(extras):
380 if extras[pos]=='-r':
381 replaceIt=False
382 pos += 1
383 else:
384 replaceIt=True
385 if extras[pos]=='-n':
386 sma = None
387 else:
388 sma = extras[pos]
389 pos += 1
390 try:
391 dat = extras[pos]
392 except IndexError:
393 logger.error('missing a sidechain filename')
394 sys.exit(-1)
395 pos += 1
396 if sdLigands:
397 try:
398 suppl = Chem.SDMolSupplier(dat)
399 except Exception:
400 logger.error('could not construct supplier from SD file: %s'%dat,
401 exc_info=True)
402 suppl = []
403 else:
404 tmpF = file(dat,'r')
405 inL = tmpF.readline()
406 if len(inL.split(' '))<2:
407 nmCol=-1
408 else:
409 nmCol=1
410 try:
411 suppl = Chem.SmilesMolSupplier(dat,nameColumn=nmCol)
412 except Exception:
413 logger.error('could not construct supplier from smiles file: %s'%dat,
414 exc_info=True)
415 suppl = []
416 suppl = [x for x in suppl]
417 chains = ConstructSidechains(suppl,sma=sma,replace=replaceIt,useAll=useAll)
418 if chains:
419 sidechains.append(chains)
420 count = 1
421 for chain in sidechains:
422 count *= len(chain)
423 count *= len(templates)
424 if not sidechains or not count:
425 print("No molecules to be generated.", file=sys.stderr)
426 sys.exit(0)
427
428 if not forceIt and count>tooLong:
429 print("This will generate %d molecules."%count, file=sys.stderr)
430 print("Continue anyway? [no] ", file=sys.stderr, end='')
431 sys.stderr.flush()
432 ans = sys.stdin.readline().strip()
433 if ans not in ('y','yes','Y','YES'):
434 sys.exit(0)
435
436 if outF and outF!="-":
437 try:
438 outF = file(outF,'w+')
439 except IOError:
440 logger.error('could not open file %s for writing'%(outF),
441 exc_info=True)
442 else:
443 outF = sys.stdout
444
445 for template in templates:
446 Explode(template,sidechains,outF,autoNames=autoNames,do3D=do3D,
447 useTethers=useTethers)
448