1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 import logging
35 import re
36 import unittest
37 from rdkit import Chem
38
39 from rdkit.Chem import inchi
40 if not inchi.INCHI_AVAILABLE:
41 raise ImportError("This code requires the RDKit to be built with InChI suport")
42
44 mol = Chem.MolFromInchi(INCHI)
45 if not mol :
46 mol = Chem.MolFromInchi('InChI=1/{0}'.format(INCHI))
47
48 try :
49 list_chiral = Chem.FindMolChiralCenters(mol, True, True)
50 except Exception :
51 return False
52
53
54 return len(list_chiral) == 0
55
56 console = logging.StreamHandler()
57 UPD_APP = logging.getLogger('inchiinfo.application')
58
59 version_re = re.compile('(.*?)/(.*)')
60 reconnected_re = re.compile('(.*?)/r(.*)')
61 fixed_h_re = re.compile('(.*?)/f(.*)')
62 isotope_re = re.compile('(.*?)/i(.*)')
63
64 stereo_re = re.compile('.*\/t(.*?)\/.*')
65 stereo_all_re = re.compile('.*\/t([^\/]+)')
66 undef_stereo_re = re.compile('(\d+)\?')
67 all_stereo_re = re.compile('(\d+)[?+-]')
68 defined_stereo_re = re.compile('(\d+)[+-]')
69 h_layer_re = re.compile('.*\/h(.*)\/?')
70 mobile_h_group_re = re.compile('(\(H.+?\))')
71 mobile_h_atoms_re = re.compile(',(\d+)')
72
74
76 (version, rest) = version_re.match(inchi_str).groups()
77 reconn_match = reconnected_re.match(rest)
78
79 connection_layers = {}
80 if reconn_match:
81 (connection_layers['id_disconnected'], connection_layers['id_reconnected']) = reconn_match.groups()
82 else:
83 (connection_layers['id']) = rest
84
85 fixed_h_layers = {}
86 for conn_layer in connection_layers:
87 fixed_h_layers[conn_layer] = {}
88 fixed_match = fixed_h_re.match(connection_layers[conn_layer])
89 if fixed_match:
90 (fixed_h_layers[conn_layer]['main'], fixed_h_layers[conn_layer]['fixed_h']) = fixed_match.groups()
91 else:
92 fixed_h_layers[conn_layer]['main'] = connection_layers[conn_layer]
93
94 inchi = {}
95 for i0_layer in fixed_h_layers:
96 inchi[i0_layer] = {}
97 for i1_layer in fixed_h_layers[i0_layer]:
98 inchi[i0_layer][i1_layer] = {}
99 iso_match = isotope_re.match(fixed_h_layers[i0_layer][i1_layer])
100 if iso_match:
101 (inchi[i0_layer][i1_layer]['non-isotopic'], inchi[i0_layer][i1_layer]['isotopic']) = iso_match.groups()
102 else:
103 inchi[i0_layer][i1_layer]['non-isotopic'] = fixed_h_layers[i0_layer][i1_layer]
104
105 self.parsed_inchi = inchi
106
108 ''' retrieve sp3 stereo information
109 return a 4-item tuple containing
110 1) Number of stereocenters detected. If 0, the remaining items of the tuple = None
111 2) Number of undefined stereocenters. Must be smaller or equal to above
112 3) True if the molecule is a meso form (with chiral centers and a plane of symmetry)
113 4) Comma-separated list of internal atom numbers with sp3 stereochemistry
114 '''
115 sp3_stereo = {}
116
117 for con_layer in self.parsed_inchi:
118 for fixed_layer in self.parsed_inchi[con_layer]:
119 sp3_stereo[fixed_layer] = {}
120 for iso_layer in self.parsed_inchi[con_layer][fixed_layer]:
121 sp3_stereo[fixed_layer][iso_layer] = {}
122 stereo_match = stereo_re.match(self.parsed_inchi[con_layer][fixed_layer][iso_layer])
123 stereo_all_match = stereo_all_re.match(self.parsed_inchi[con_layer][fixed_layer][iso_layer])
124 num_stereo = 0
125 num_undef_stereo = 0
126 is_meso = False
127 stereo = ''
128 stereo_centers = []
129 undef_stereo_centers = []
130
131 if stereo_match:
132 stereo = stereo_match.group(1)
133
134 elif stereo_all_match :
135 stereo = stereo_all_match.group(1)
136 is_meso = len(defined_stereo_re.findall(stereo)) > 1
137
138 stereo_centers = all_stereo_re.findall(stereo)
139 num_stereo = len(stereo_centers)
140 undef_stereo_centers = undef_stereo_re.findall(stereo)
141 num_undef_stereo = len(undef_stereo_centers)
142
143 inchi_layer = self.parsed_inchi[con_layer][fixed_layer][iso_layer]
144 is_meso = is_meso or (num_undef_stereo > 1 and _is_achiral_by_symmetry(inchi_layer))
145 sp3_stereo[fixed_layer][iso_layer] = (num_stereo, num_undef_stereo, is_meso, stereo)
146 return sp3_stereo
147
149 ''' retrieve mobile H (tautomer) information
150 return a 2-item tuple containing
151 1) Number of mobile hydrogen groups detected. If 0, next item = ''
152 2) List of groups
153 '''
154 mobile_h = {}
155 for con_layer in self.parsed_inchi:
156 for fixed_layer in self.parsed_inchi[con_layer]:
157 mobile_h[fixed_layer] = {}
158 for iso_layer in self.parsed_inchi[con_layer][fixed_layer]:
159 num_groups = 0
160 mobile_h_groups = ''
161 h_layer_match = h_layer_re.match(self.parsed_inchi[con_layer][fixed_layer][iso_layer])
162 if h_layer_match:
163 mobile_h_matches = mobile_h_group_re.findall(h_layer_match.group(1))
164 num_groups = len(mobile_h_matches)
165 mobile_h_groups = ','.join(mobile_h_matches)
166 mobile_h[fixed_layer][iso_layer] = (num_groups, mobile_h_groups)
167 return mobile_h
168
169
170
171 GUANINE='InChI=1S/C5H5N5O/c6-5-9-3-2(4(11)10-5)7-1-8-3/h1H0,(H4,6,7,8,9,10,11)'
172
173 UREA1 = 'InChI=1/CH4N2O/c2-1(3)4/h(H4,2,3,4)/f/h2,4H,3H2/b2-1?'
174
175 UREA2 = 'InChI=1/CH4N2O/c2-1(3)4/h(H4,2,3,4)/f/h2-3H2'
176 TRITIATED_UREA='InChI=1S/CH4N2O/c2-1(3)4/h(H4,2,3,4)/i/hT3'
177 DEUTERATED_UREA='InChI=1S/CH4N2O/c2-1(3)4/h(H4,2,3,4)/i/hD2'
178 ACETIC_ACID='InChI=1S/C3H6O2/c1-2-3(4)5/h2H2,1H3,(H,4,5)'
179 ACETATE='InChI=1S/C3H6O2/c1-2-3(4)5/h2H2,1H3,(H,4,5)/p-1'
180 mobile1='InChI=1S/C5H5N3O2/c6-4(9)3-1-7-2-8-5(3)10/h1-2H,(H2,6,9)(H,7,8,10)'
181 mobile2='InChI=1S/C7H10N4O/c1-4-2-5(3-6(8)12)11-7(9)10-4/h2H,3H2,1H3,(H2,8,12)(H2,9,10,11)'
182
183
184 sugar1='InChI=1S/C14H20O9/c1-6-11(20-7(2)15)12(21-8(3)16)13(22-9(4)17)14(19-6)23-10(5)18/h6,11-14H,1-5H3/t6-,11-,12+,13+,14?/m0/s1'
185 sugar2='InChI=1S/C12H20O6/c1-11(2)14-5-6(16-11)8-7(13)9-10(15-8)18-12(3,4)17-9/h6-10,13H,5H2,1-4H3/t6-,7-,8-,9-,10-/m1/s1'
186 sp3_unk='InChI=1S/C12H21NO4/c1-8(2)10(12(15)16-3)13-11(14)9-5-4-6-17-7-9/h8-10H,4-7H2,1-3H3,(H,13,14)/t9?,10-/m0/s1'
187
189
190 - def doTest(self, inchi, numSp3=0, numUndefSp3=0, numMobileHGroups=0, layer='non-isotopic'):
191 ii = InchiInfo(inchi)
192 (nSp3, nUndefSp3, isMeso, sp3Atoms) = ii.get_sp3_stereo()['main'][layer]
193 self.assertEqual(nSp3, numSp3)
194 self.assertEqual(nUndefSp3, numUndefSp3)
195
196 (nMobileHGroups, mobileHGroups) = ii.get_mobile_h()['main'][layer]
197 self.assertEqual(nMobileHGroups, numMobileHGroups)
198
200 self.doTest(GUANINE, 0, 0, 1)
202 self.doTest(TRITIATED_UREA, 0, 0, 1)
204 self.doTest(DEUTERATED_UREA, 0, 0, 1)
206 self.doTest(ACETIC_ACID, 0, 0, 1)
208 self.doTest(ACETATE, 0, 0, 1)
209
211 self.doTest(mobile1, 0, 0, 2)
213 self.doTest(mobile2, 0, 0, 2)
214
215
216
218 self.doTest(sugar1, 5, 1, 0)
220 self.doTest(sugar2, 5, 0, 0)
222 self.doTest(sp3_unk, 2, 1, 1)
223
224 if __name__ == '__main__':
225 unittest.main()
226