Package rdkit :: Package Chem :: Package SimpleEnum :: Module Enumerator
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.SimpleEnum.Enumerator

  1  # 
  2  #  Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc. 
  3  #  All rights reserved. 
  4  #  
  5  # Redistribution and use in source and binary forms, with or without 
  6  # modification, are permitted provided that the following conditions are 
  7  # met:  
  8  # 
  9  #     * Redistributions of source code must retain the above copyright  
 10  #       notice, this list of conditions and the following disclaimer. 
 11  #     * Redistributions in binary form must reproduce the above 
 12  #       copyright notice, this list of conditions and the following  
 13  #       disclaimer in the documentation and/or other materials provided  
 14  #       with the distribution. 
 15  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 16  #       nor the names of its contributors may be used to endorse or promote  
 17  #       products derived from this software without specific prior written permission. 
 18  # 
 19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 20  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 21  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 22  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 23  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 24  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 25  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 26  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 27  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 28  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 29  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 30  # 
 31  # Created by Greg Landrum, May 2009 
 32  from __future__ import print_function 
 33  from rdkit import RDConfig 
 34  from rdkit import Chem 
 35  from rdkit.Chem import AllChem 
 36  from rdkit.Chem import FunctionalGroups 
 37  from rdkit.Chem import rdChemReactions 
 38   
 39   
 40  import os 
 41   
42 -def PreprocessReaction(reaction,funcGroupFilename=os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt'),propName='molFileValue'):
43 """ 44 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn') 45 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 46 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 47 >>> nWarn 48 0 49 >>> nError 50 0 51 >>> nReacts 52 2 53 >>> nProds 54 1 55 >>> reactantLabels 56 (((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),)) 57 58 If there are functional group labels in the input reaction (via atoms with molFileValue properties), 59 the corresponding atoms will have queries added to them so that they only match such things. We can 60 see this here: 61 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 62 >>> r1 = rxn.GetReactantTemplate(0) 63 >>> m1 = Chem.MolFromSmiles('CCBr') 64 >>> m2 = Chem.MolFromSmiles('c1ccccc1Br') 65 66 These both match because the reaction file itself just has R1-Br: 67 >>> m1.HasSubstructMatch(r1) 68 True 69 >>> m2.HasSubstructMatch(r1) 70 True 71 72 After preprocessing, we only match the aromatic Br: 73 >>> d = PreprocessReaction(rxn) 74 >>> m1.HasSubstructMatch(r1) 75 False 76 >>> m2.HasSubstructMatch(r1) 77 True 78 79 We also support or queries in the values field (separated by commas): 80 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','azide_reaction.rxn') 81 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 82 >>> reactantLabels = PreprocessReaction(rxn)[-1] 83 >>> reactantLabels 84 (((1, 'azide'),), ((1, 'carboxylicacid,acidchloride'),)) 85 >>> m1 = Chem.MolFromSmiles('CC(=O)O') 86 >>> m2 = Chem.MolFromSmiles('CC(=O)Cl') 87 >>> m3 = Chem.MolFromSmiles('CC(=O)N') 88 >>> r2 = rxn.GetReactantTemplate(1) 89 >>> m1.HasSubstructMatch(r2) 90 True 91 >>> m2.HasSubstructMatch(r2) 92 True 93 >>> m3.HasSubstructMatch(r2) 94 False 95 96 unrecognized final group types are returned as None: 97 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value1.rxn') 98 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 99 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 100 Traceback (most recent call last): 101 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run 102 compileflags, 1) in test.globs 103 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module> 104 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 105 File "Enumerator.py", line 105, in PreprocessReaction 106 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True) 107 RuntimeError: KeyErrorException 108 109 One unrecognized group type in a comma-separated list makes the whole thing fail: 110 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value2.rxn') 111 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 112 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 113 Traceback (most recent call last): 114 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run 115 compileflags, 1) in test.globs 116 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module> 117 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 118 File "Enumerator.py", line 105, in PreprocessReaction 119 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True) 120 RuntimeError: KeyErrorException 121 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value3.rxn') 122 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 123 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 124 Traceback (most recent call last): 125 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run 126 compileflags, 1) in test.globs 127 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module> 128 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 129 File "Enumerator.py", line 105, in PreprocessReaction 130 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True) 131 RuntimeError: KeyErrorException 132 >>> rxn = rdChemReactions.ChemicalReaction() 133 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 134 >>> reactantLabels == [] 135 True 136 """ 137 reaction._setImplicitPropertiesFlag(True) 138 reaction.Initialize() 139 nReactants = reaction.GetNumReactantTemplates() 140 nProducts = reaction.GetNumProductTemplates() 141 nWarn,nError = reaction.Validate() 142 143 if not nError: 144 try: 145 queryDict = Chem.ParseMolQueryDefFile(funcGroupFilename) 146 except Exception: 147 raise IOError('cannot open', funcGroupFilename) 148 else: 149 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName, getLabels=True) 150 else: 151 reactantLabels = [] 152 153 return nWarn,nError,nReactants,nProducts,reactantLabels
154
155 -def EnumerateReaction(reaction,bbLists,uniqueProductsOnly=False,funcGroupFilename=os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt'),propName='molFileValue'):
156 """ 157 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn') 158 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 159 >>> reacts1=['Brc1ccccc1','Brc1ncccc1','Brc1cnccc1'] 160 >>> reacts1=[Chem.MolFromSmiles(x) for x in reacts1] 161 >>> reacts2=['CCB(O)O','CCCB(O)O'] 162 >>> reacts2=[Chem.MolFromSmiles(x) for x in reacts2] 163 164 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2)) 165 >>> prods = list(prods) 166 167 This is a bit nasty because of the symmetry of the boronic acid: 168 >>> len(prods) 169 12 170 171 >>> smis = list(set([Chem.MolToSmiles(x[0]) for x in prods])) 172 >>> smis.sort() 173 >>> len(smis) 174 6 175 >>> print(smis) 176 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1'] 177 178 The nastiness can be avoided at the cost of some memory by asking for only unique products: 179 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2),uniqueProductsOnly=True) 180 >>> prods = list(prods) 181 >>> len(prods) 182 6 183 >>> print(sorted([Chem.MolToSmiles(x[0]) for x in prods])) 184 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1'] 185 186 187 """ 188 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(reaction) 189 if nError: raise ValueError('bad reaction') 190 if len(bbLists) != nReacts: raise ValueError('%d reactants in reaction, %d bb lists supplied'%(nReacts,len(bbLists))) 191 def _uniqueOnly(lst): 192 seen=[] 193 for entry in lst: 194 if entry: 195 smi = '.'.join(sorted([Chem.MolToSmiles(x,True) for x in entry])) 196 if smi not in seen: 197 seen.append(smi) 198 yield entry
199 200 ps = AllChem.EnumerateLibraryFromReaction(reaction,bbLists) 201 if not uniqueProductsOnly: 202 return ps 203 else: 204 return _uniqueOnly(ps) 205 206 207 208 209 #------------------------------------ 210 # 211 # doctest boilerplate 212 #
213 -def _test():
214 import doctest,sys 215 return doctest.testmod(sys.modules["__main__"])
216 217 218 if __name__ == '__main__': 219 import sys 220 failed,tried = _test() 221 sys.exit(failed) 222