1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 from __future__ import print_function
33 from rdkit import RDConfig
34 from rdkit import Chem
35 from rdkit.Chem import AllChem
36 from rdkit.Chem import FunctionalGroups
37 from rdkit.Chem import rdChemReactions
38
39
40 import os
41
42 -def PreprocessReaction(reaction,funcGroupFilename=os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt'),propName='molFileValue'):
43 """
44 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn')
45 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
46 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
47 >>> nWarn
48 0
49 >>> nError
50 0
51 >>> nReacts
52 2
53 >>> nProds
54 1
55 >>> reactantLabels
56 (((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),))
57
58 If there are functional group labels in the input reaction (via atoms with molFileValue properties),
59 the corresponding atoms will have queries added to them so that they only match such things. We can
60 see this here:
61 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
62 >>> r1 = rxn.GetReactantTemplate(0)
63 >>> m1 = Chem.MolFromSmiles('CCBr')
64 >>> m2 = Chem.MolFromSmiles('c1ccccc1Br')
65
66 These both match because the reaction file itself just has R1-Br:
67 >>> m1.HasSubstructMatch(r1)
68 True
69 >>> m2.HasSubstructMatch(r1)
70 True
71
72 After preprocessing, we only match the aromatic Br:
73 >>> d = PreprocessReaction(rxn)
74 >>> m1.HasSubstructMatch(r1)
75 False
76 >>> m2.HasSubstructMatch(r1)
77 True
78
79 We also support or queries in the values field (separated by commas):
80 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','azide_reaction.rxn')
81 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
82 >>> reactantLabels = PreprocessReaction(rxn)[-1]
83 >>> reactantLabels
84 (((1, 'azide'),), ((1, 'carboxylicacid,acidchloride'),))
85 >>> m1 = Chem.MolFromSmiles('CC(=O)O')
86 >>> m2 = Chem.MolFromSmiles('CC(=O)Cl')
87 >>> m3 = Chem.MolFromSmiles('CC(=O)N')
88 >>> r2 = rxn.GetReactantTemplate(1)
89 >>> m1.HasSubstructMatch(r2)
90 True
91 >>> m2.HasSubstructMatch(r2)
92 True
93 >>> m3.HasSubstructMatch(r2)
94 False
95
96 unrecognized final group types are returned as None:
97 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value1.rxn')
98 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
99 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
100 Traceback (most recent call last):
101 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
102 compileflags, 1) in test.globs
103 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
104 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
105 File "Enumerator.py", line 105, in PreprocessReaction
106 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
107 RuntimeError: KeyErrorException
108
109 One unrecognized group type in a comma-separated list makes the whole thing fail:
110 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value2.rxn')
111 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
112 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
113 Traceback (most recent call last):
114 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
115 compileflags, 1) in test.globs
116 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
117 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
118 File "Enumerator.py", line 105, in PreprocessReaction
119 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
120 RuntimeError: KeyErrorException
121 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value3.rxn')
122 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
123 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
124 Traceback (most recent call last):
125 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
126 compileflags, 1) in test.globs
127 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
128 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
129 File "Enumerator.py", line 105, in PreprocessReaction
130 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
131 RuntimeError: KeyErrorException
132 >>> rxn = rdChemReactions.ChemicalReaction()
133 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
134 >>> reactantLabels == []
135 True
136 """
137 reaction._setImplicitPropertiesFlag(True)
138 reaction.Initialize()
139 nReactants = reaction.GetNumReactantTemplates()
140 nProducts = reaction.GetNumProductTemplates()
141 nWarn,nError = reaction.Validate()
142
143 if not nError:
144 try:
145 queryDict = Chem.ParseMolQueryDefFile(funcGroupFilename)
146 except Exception:
147 raise IOError('cannot open', funcGroupFilename)
148 else:
149 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName, getLabels=True)
150 else:
151 reactantLabels = []
152
153 return nWarn,nError,nReactants,nProducts,reactantLabels
154
155 -def EnumerateReaction(reaction,bbLists,uniqueProductsOnly=False,funcGroupFilename=os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt'),propName='molFileValue'):
156 """
157 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn')
158 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
159 >>> reacts1=['Brc1ccccc1','Brc1ncccc1','Brc1cnccc1']
160 >>> reacts1=[Chem.MolFromSmiles(x) for x in reacts1]
161 >>> reacts2=['CCB(O)O','CCCB(O)O']
162 >>> reacts2=[Chem.MolFromSmiles(x) for x in reacts2]
163
164 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2))
165 >>> prods = list(prods)
166
167 This is a bit nasty because of the symmetry of the boronic acid:
168 >>> len(prods)
169 12
170
171 >>> smis = list(set([Chem.MolToSmiles(x[0]) for x in prods]))
172 >>> smis.sort()
173 >>> len(smis)
174 6
175 >>> print(smis)
176 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1']
177
178 The nastiness can be avoided at the cost of some memory by asking for only unique products:
179 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2),uniqueProductsOnly=True)
180 >>> prods = list(prods)
181 >>> len(prods)
182 6
183 >>> print(sorted([Chem.MolToSmiles(x[0]) for x in prods]))
184 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1']
185
186
187 """
188 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(reaction)
189 if nError: raise ValueError('bad reaction')
190 if len(bbLists) != nReacts: raise ValueError('%d reactants in reaction, %d bb lists supplied'%(nReacts,len(bbLists)))
191 def _uniqueOnly(lst):
192 seen=[]
193 for entry in lst:
194 if entry:
195 smi = '.'.join(sorted([Chem.MolToSmiles(x,True) for x in entry]))
196 if smi not in seen:
197 seen.append(smi)
198 yield entry
199
200 ps = AllChem.EnumerateLibraryFromReaction(reaction,bbLists)
201 if not uniqueProductsOnly:
202 return ps
203 else:
204 return _uniqueOnly(ps)
205
206
207
208
209
210
211
212
214 import doctest,sys
215 return doctest.testmod(sys.modules["__main__"])
216
217
218 if __name__ == '__main__':
219 import sys
220 failed,tried = _test()
221 sys.exit(failed)
222