Package rdkit :: Package Chem :: Package ChemUtils :: Module TemplateExpand
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.ChemUtils.TemplateExpand

  1  # $Id: TemplateExpand.py 1053 2008-07-30 12:03:29Z landrgr1 $ 
  2  # 
  3  #  Created by Greg Landrum August, 2006 
  4  # 
  5  # 
  6  from __future__ import print_function 
  7  from rdkit import RDLogger as logging 
  8  logger = logging.logger() 
  9  logger.setLevel(logging.INFO) 
 10  from rdkit import Chem 
 11  from rdkit.Chem import Crippen 
 12  from rdkit.Chem import AllChem 
 13  from rdkit.Chem.ChemUtils.AlignDepict import AlignDepict 
 14   
 15  import sys 
 16  _version="0.8.0" 
 17  _greet="This is TemplateExpand version %s"%_version 
 18   
 19  _usage=""" 
 20  Usage: TemplateExpand [options] template <sidechains> 
 21   
 22   Unless otherwise indicated, the template and sidechains are assumed to be 
 23     Smiles 
 24   
 25   Each sidechain entry should be: 
 26     [-r] SMARTS filename 
 27       The SMARTS pattern is used to recognize the attachment point, 
 28       if the -r argument is not provided, then atoms matching the pattern 
 29       will be removed from the sidechains. 
 30     or 
 31     -n filename 
 32       where the attachment atom is the first atom in each molecule 
 33     The filename provides the list of potential sidechains. 
 34   
 35   options: 
 36     -o filename.sdf:      provides the name of the output file, otherwise 
 37                           stdout is used 
 38   
 39     --sdf :               expect the sidechains to be in SD files 
 40   
 41     --moltemplate:        the template(s) are in a mol/SD file, new depiction(s) 
 42                           will not be generated unless the --redraw argument is also 
 43                           provided 
 44   
 45     --smilesFileTemplate: extract the template(s) from a SMILES file instead of  
 46                           expecting SMILES on the command line. 
 47   
 48     --redraw:             generate a new depiction for the molecular template(s) 
 49   
 50     --useall: 
 51       or 
 52     --useallmatches:      generate a product for each possible match of the attachment 
 53                           pattern to each sidechain. If this is not provided, the first 
 54                           match (not canonically defined) will be used. 
 55   
 56     --force:              by default, the program prompts the user if the library is  
 57                           going to contain more than 1000 compounds. This argument  
 58                           disables the prompt. 
 59      
 60     --templateSmarts="smarts":  provides a space-delimited list containing the SMARTS  
 61                                 patterns to be used to recognize attachment points in 
 62                                                   the template 
 63                
 64     --autoNames:          when set this toggle causes the resulting compounds to be named 
 65                           based on there sequence id in the file, e.g.  
 66                           "TemplateEnum: Mol_1", "TemplateEnum: Mol_2", etc. 
 67                           otherwise the names of the template and building blocks (from 
 68                           the input files) will be combined to form a name for each 
 69                           product molecule. 
 70   
 71     --3D :                Generate 3d coordinates for the product molecules instead of 2d coordinates, 
 72                           requires the --moltemplate option 
 73   
 74     --tether :            refine the 3d conformations using a tethered minimization 
 75   
 76   
 77  """ 
78 -def Usage():
79 print(_usage, file=sys.stderr) 80 sys.exit(-1)
81 82 #pylint: disable=C0111,C0103,C0322,C0324,C0323 83 nDumped=0
84 -def _exploder(mol,depth,sidechains,core,chainIndices,autoNames=True,templateName='', 85 resetCounter=True,do3D=False,useTethers=False):
86 global nDumped 87 if resetCounter: 88 nDumped=0 89 ourChains = sidechains[depth] 90 patt = '[%d*]'%(depth+1) 91 patt = Chem.MolFromSmiles(patt) 92 for i,(chainIdx,chain) in enumerate(ourChains): 93 tchain = chainIndices[:] 94 tchain.append((i,chainIdx)) 95 rs = Chem.ReplaceSubstructs(mol,patt,chain,replaceAll=True) 96 if rs: 97 r = rs[0] 98 if depth<len(sidechains)-1: 99 for entry in _exploder(r,depth+1,sidechains,core,tchain, 100 autoNames=autoNames,templateName=templateName, 101 resetCounter=0,do3D=do3D,useTethers=useTethers): 102 yield entry 103 else: 104 try: 105 Chem.SanitizeMol(r) 106 except ValueError: 107 import traceback 108 traceback.print_exc() 109 continue 110 if not do3D: 111 if r.HasSubstructMatch(core): 112 try: 113 AlignDepict(r,core) 114 except Exception: 115 import traceback 116 traceback.print_exc() 117 print(Chem.MolToSmiles(r), file=sys.stderr) 118 else: 119 print('>>> no match', file=sys.stderr) 120 AllChem.Compute2DCoords(r) 121 else: 122 r = Chem.AddHs(r) 123 AllChem.ConstrainedEmbed(r,core,useTethers) 124 Chem.Kekulize(r) 125 if autoNames: 126 tName = "TemplateEnum: Mol_%d"%(nDumped+1) 127 else: 128 tName = templateName 129 for bbI,bb in enumerate(tchain): 130 bbMol = sidechains[bbI][bb[0]][1] 131 if bbMol.HasProp('_Name'): 132 bbNm = bbMol.GetProp('_Name') 133 else: 134 bbNm = str(bb[1]) 135 tName += '_' + bbNm 136 137 r.SetProp("_Name",tName) 138 r.SetProp('seq_num',str(nDumped+1)) 139 r.SetProp('reagent_indices','_'.join([str(x[1]) for x in tchain])) 140 for bbI,bb in enumerate(tchain): 141 bbMol = sidechains[bbI][bb[0]][1] 142 if bbMol.HasProp('_Name'): 143 bbNm = bbMol.GetProp('_Name') 144 else: 145 bbNm = str(bb[1]) 146 r.SetProp('building_block_%d'%(bbI+1),bbNm) 147 for propN in bbMol.GetPropNames(): 148 r.SetProp('building_block_%d_%s'%(bbI+1,propN),bbMol.GetProp(propN)) 149 nDumped += 1 150 if not nDumped%100: 151 logger.info('Done %d molecules'%nDumped) 152 yield r
153
154 -def Explode(template,sidechains,outF,autoNames=True,do3D=False,useTethers=False):
155 chainIndices=[] 156 core = Chem.DeleteSubstructs(template,Chem.MolFromSmiles('[*]')) 157 try: 158 templateName = template.GetProp('_Name') 159 except KeyError: 160 templateName="template" 161 for mol in _exploder(template,0,sidechains,core,chainIndices,autoNames=autoNames, 162 templateName=templateName,do3D=do3D,useTethers=useTethers): 163 outF.write(Chem.MolToMolBlock(mol)) 164 for pN in mol.GetPropNames(): 165 print('> <%s>\n%s\n'%(pN,mol.GetProp(pN)), file=outF) 166 print('$$$$', file=outF)
167
168 -def MoveDummyNeighborsToBeginning(mol,useAll=False):
169 dummyPatt=Chem.MolFromSmiles('[*]') 170 matches = mol.GetSubstructMatches(dummyPatt) 171 res = [] 172 for match in matches: 173 matchIdx = match[0] 174 smi = Chem.MolToSmiles(mol,True,rootedAtAtom=matchIdx) 175 entry = Chem.MolFromSmiles(smi) 176 # entry now has [*] as atom 0 and the neighbor 177 # as atom 1. Cleave the [*]: 178 entry = Chem.DeleteSubstructs(entry,dummyPatt) 179 for propN in mol.GetPropNames(): 180 entry.SetProp(propN,mol.GetProp(propN)); 181 182 # now we have a molecule with the atom to be joined 183 # in position zero; Keep that: 184 res.append(entry) 185 if not useAll: 186 break 187 return res
188
189 -def ConstructSidechains(suppl,sma=None,replace=True,useAll=False):
190 if sma: 191 patt = Chem.MolFromSmarts(sma) 192 if patt is None: 193 logger.error('could not construct pattern from smarts: %s'%sma, 194 exc_info=True) 195 return None 196 else: 197 patt = None 198 199 if replace: 200 replacement = Chem.MolFromSmiles('[*]') 201 202 res = [] 203 for idx,mol in enumerate(suppl): 204 if not mol: 205 continue 206 if patt: 207 if not mol.HasSubstructMatch(patt): 208 logger.warning('The substructure pattern did not match sidechain %d. This may result in errors.'%(idx+1)) 209 if replace: 210 tmp = list(Chem.ReplaceSubstructs(mol,patt,replacement)) 211 if not useAll: tmp = [tmp[0]] 212 for i,entry in enumerate(tmp): 213 entry = MoveDummyNeighborsToBeginning(entry) 214 if not entry: 215 continue 216 entry = entry[0] 217 218 for propN in mol.GetPropNames(): 219 entry.SetProp(propN,mol.GetProp(propN)); 220 # now we have a molecule with the atom to be joined 221 # in position zero; Keep that: 222 tmp[i] = (idx+1,entry) 223 else: 224 # no replacement, use the pattern to reorder 225 # atoms only: 226 matches = mol.GetSubstructMatches(patt) 227 if matches: 228 tmp = [] 229 for match in matches: 230 smi = Chem.MolToSmiles(mol,True,rootedAtAtom=match[0]) 231 entry = Chem.MolFromSmiles(smi) 232 for propN in mol.GetPropNames(): 233 entry.SetProp(propN,mol.GetProp(propN)); 234 235 # now we have a molecule with the atom to be joined 236 # in position zero; Keep that: 237 tmp.append((idx+1,entry)) 238 else: 239 tmp = None 240 else: 241 tmp = [(idx+1,mol)] 242 if tmp: 243 res.extend(tmp) 244 return res
245 246 if __name__=='__main__': 247 import getopt 248 print(_greet, file=sys.stderr) 249 250 try: 251 args,extras = getopt.getopt(sys.argv[1:],'o:h',[ 252 'sdf', 253 'moltemplate', 254 'molTemplate', 255 'smilesFileTemplate', 256 'templateSmarts=', 257 'redraw', 258 'force', 259 'useall', 260 'useallmatches', 261 'autoNames', 262 '3D','3d', 263 'tethers', 264 'tether', 265 ]) 266 except Exception: 267 import traceback 268 traceback.print_exc() 269 Usage() 270 271 if len(extras)<3: 272 Usage() 273 274 tooLong=1000 275 sdLigands=False 276 molTemplate=False 277 redrawTemplate=False 278 outF=None 279 forceIt=False 280 useAll=False 281 templateSmarts=[] 282 smilesFileTemplate=False 283 autoNames=False 284 do3D=False 285 useTethers=False 286 for arg,val in args: 287 if arg=='-o': 288 outF=val 289 elif arg=='--sdf': 290 sdLigands=True 291 elif arg in ('--moltemplate','--molTemplate'): 292 molTemplate=True 293 elif arg=='--smilesFileTemplate': 294 smilesFileTemplate=True 295 elif arg=='--templateSmarts': 296 templateSmarts = val 297 elif arg=='--redraw': 298 redrawTemplate=True 299 elif arg=='--force': 300 forceIt=True 301 elif arg=='--autoNames': 302 autoNames=True 303 elif arg in ('--useall','--useallmatches'): 304 useAll=True 305 elif arg in ('--3D','--3d'): 306 do3D=True 307 elif arg in ('--tethers','--tether'): 308 useTethers=True 309 elif arg=='-h': 310 Usage() 311 sys.exit(0) 312 313 if do3D: 314 if not molTemplate: 315 raise ValueError('the --3D option is only useable in combination with --moltemplate') 316 if redrawTemplate: 317 logger.warning('--redrawTemplate does not make sense in combination with --molTemplate. removing it') 318 redrawTemplate=False 319 320 321 if templateSmarts: 322 splitL = templateSmarts.split(' ') #pylint: disable=E1103 323 templateSmarts = [] 324 for i,sma in enumerate(splitL): 325 patt = Chem.MolFromSmarts(sma) 326 if not patt: 327 raise ValueError('could not convert smarts "%s" to a query'%sma) 328 if i>=4: 329 i+=1 330 replace = Chem.MolFromSmiles('[%d*]'%(i+1)) 331 templateSmarts.append((patt,replace)) 332 333 if molTemplate: 334 removeHs = not do3D 335 try: 336 s = Chem.SDMolSupplier(extras[0],removeHs=removeHs) 337 templates = [x for x in s] 338 except Exception: 339 logger.error('Could not construct templates from input file: %s'%extras[0], 340 exc_info=True) 341 sys.exit(1) 342 if redrawTemplate: 343 for template in templates: 344 AllChem.Compute2DCoords(template) 345 else: 346 if not smilesFileTemplate: 347 try: 348 templates = [Chem.MolFromSmiles(extras[0])] 349 except Exception: 350 logger.error('Could not construct template from smiles: %s'%extras[0], 351 exc_info=True) 352 sys.exit(1) 353 else: 354 try: 355 s = Chem.SmilesMolSupplier(extras[0],titleLine=False) 356 templates = [x for x in s] 357 except Exception: 358 logger.error('Could not construct templates from input file: %s'%extras[0], 359 exc_info=True) 360 sys.exit(1) 361 for template in templates: 362 AllChem.Compute2DCoords(template) 363 if templateSmarts: 364 finalTs = [] 365 for i,template in enumerate(templates): 366 for j,(patt,replace) in enumerate(templateSmarts): 367 if not template.HasSubstructMatch(patt): 368 logger.error('template %d did not match sidechain pattern %d, skipping it'%(i+1,j+1)) 369 template =None 370 break 371 template = Chem.ReplaceSubstructs(template,patt,replace)[0] 372 if template: 373 Chem.SanitizeMol(template) 374 finalTs.append(template) 375 templates = finalTs 376 377 sidechains = [] 378 pos = 1 379 while pos<len(extras): 380 if extras[pos]=='-r': 381 replaceIt=False 382 pos += 1 383 else: 384 replaceIt=True 385 if extras[pos]=='-n': 386 sma = None 387 else: 388 sma = extras[pos] 389 pos += 1 390 try: 391 dat = extras[pos] 392 except IndexError: 393 logger.error('missing a sidechain filename') 394 sys.exit(-1) 395 pos += 1 396 if sdLigands: 397 try: 398 suppl = Chem.SDMolSupplier(dat) 399 except Exception: 400 logger.error('could not construct supplier from SD file: %s'%dat, 401 exc_info=True) 402 suppl = [] 403 else: 404 tmpF = file(dat,'r') 405 inL = tmpF.readline() 406 if len(inL.split(' '))<2: 407 nmCol=-1 408 else: 409 nmCol=1 410 try: 411 suppl = Chem.SmilesMolSupplier(dat,nameColumn=nmCol) 412 except Exception: 413 logger.error('could not construct supplier from smiles file: %s'%dat, 414 exc_info=True) 415 suppl = [] 416 suppl = [x for x in suppl] 417 chains = ConstructSidechains(suppl,sma=sma,replace=replaceIt,useAll=useAll) 418 if chains: 419 sidechains.append(chains) 420 count = 1 421 for chain in sidechains: 422 count *= len(chain) 423 count *= len(templates) 424 if not sidechains or not count: 425 print("No molecules to be generated.", file=sys.stderr) 426 sys.exit(0) 427 428 if not forceIt and count>tooLong: 429 print("This will generate %d molecules."%count, file=sys.stderr) 430 print("Continue anyway? [no] ", file=sys.stderr, end='') 431 sys.stderr.flush() 432 ans = sys.stdin.readline().strip() 433 if ans not in ('y','yes','Y','YES'): 434 sys.exit(0) 435 436 if outF and outF!="-": 437 try: 438 outF = file(outF,'w+') 439 except IOError: 440 logger.error('could not open file %s for writing'%(outF), 441 exc_info=True) 442 else: 443 outF = sys.stdout 444 445 for template in templates: 446 Explode(template,sidechains,outF,autoNames=autoNames,do3D=do3D, 447 useTethers=useTethers) 448