1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 from rdkit import Chem
36 import os,re
37
38 from rdkit import RDConfig
39
41 defnFilename=os.path.join(RDConfig.RDDataDir,'Salts.txt')
42 defnData = None
43 salts = None
44 - def __init__(self,defnFilename=None,defnData=None):
49
51 """
52
53 >>> remover = SaltRemover()
54 >>> len(remover.salts)>0
55 True
56
57 >>> remover = SaltRemover(defnData="[Cl,Br]")
58 >>> len(remover.salts)
59 1
60
61 >>> remover = SaltRemover(defnData="[Cl,fail]")
62 Traceback (most recent call last):
63 ...
64 ValueError: [Cl,fail]
65
66 """
67 whitespace = re.compile(r'[\t ]+')
68 if self.defnData:
69 from rdkit.six.moves import cStringIO as StringIO
70 inF = StringIO(self.defnData)
71 else:
72 inF = open(self.defnFilename,'r')
73 self.salts = []
74 for line in inF:
75 line = line.strip().split('//')[0]
76 if line:
77 splitL = whitespace.split(line)
78 salt = Chem.MolFromSmarts(splitL[0])
79 if salt is None:
80 raise ValueError(line)
81 self.salts.append(salt)
82
83 - def StripMol(self,mol,dontRemoveEverything=False):
84 """
85
86 >>> remover = SaltRemover(defnData="[Cl,Br]")
87 >>> len(remover.salts)
88 1
89
90 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl')
91 >>> res = remover.StripMol(mol)
92 >>> res is not None
93 True
94 >>> res.GetNumAtoms()
95 4
96
97 Notice that all salts are removed:
98 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl.Br')
99 >>> res = remover.StripMol(mol)
100 >>> res.GetNumAtoms()
101 4
102
103 Matching (e.g. "salt-like") atoms in the molecule are unchanged:
104 >>> mol = Chem.MolFromSmiles('CN(Br)Cl')
105 >>> res = remover.StripMol(mol)
106 >>> res.GetNumAtoms()
107 4
108
109 >>> mol = Chem.MolFromSmiles('CN(Br)Cl.Cl')
110 >>> res = remover.StripMol(mol)
111 >>> res.GetNumAtoms()
112 4
113
114 Charged salts are handled reasonably:
115 >>> mol = Chem.MolFromSmiles('C[NH+](C)(C).[Cl-]')
116 >>> res = remover.StripMol(mol)
117 >>> res.GetNumAtoms()
118 4
119
120
121 Watch out for this case (everything removed):
122 >>> remover = SaltRemover()
123 >>> len(remover.salts)>1
124 True
125 >>> mol = Chem.MolFromSmiles('CC(=O)O.[Na]')
126 >>> res = remover.StripMol(mol)
127 >>> res.GetNumAtoms()
128 0
129
130 dontRemoveEverything helps with this by leaving the last salt:
131 >>> res = remover.StripMol(mol,dontRemoveEverything=True)
132 >>> res.GetNumAtoms()
133 4
134
135 but in cases where the last salts are the same, it can't choose
136 between them, so it returns all of them:
137 >>> mol = Chem.MolFromSmiles('Cl.Cl')
138 >>> res = remover.StripMol(mol,dontRemoveEverything=True)
139 >>> res.GetNumAtoms()
140 2
141
142 """
143 def _applyPattern(m,salt,notEverything):
144 nAts = m.GetNumAtoms()
145 if not nAts:
146 return m
147 res = m
148
149 t = Chem.DeleteSubstructs(res,salt,True)
150 if not t or (notEverything and t.GetNumAtoms()==0):
151 return res;
152 else:
153 res = t
154 while res.GetNumAtoms() and nAts>res.GetNumAtoms():
155 nAts = res.GetNumAtoms()
156 t = Chem.DeleteSubstructs(res,salt,True)
157 if notEverything and t.GetNumAtoms()==0:
158 break
159 else:
160 res = t
161 return res
162
163 if dontRemoveEverything and len(Chem.GetMolFrags(mol))<=1:
164 return mol
165 modified=False
166 for i,salt in enumerate(self.salts):
167 tMol = _applyPattern(mol,salt,dontRemoveEverything)
168 if tMol is not mol:
169 mol = tMol
170 modified=True
171 if dontRemoveEverything and len(Chem.GetMolFrags(mol))<=1:
172 break
173 if modified and mol.GetNumAtoms()>0:
174 Chem.SanitizeMol(mol)
175 return mol
176
177 - def __call__(self,mol,dontRemoveEverything=False):
178 """
179
180 >>> remover = SaltRemover(defnData="[Cl,Br]")
181 >>> len(remover.salts)
182 1
183
184 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl')
185 >>> res = remover(mol)
186 >>> res is not None
187 True
188 >>> res.GetNumAtoms()
189 4
190
191 """
192 return self.StripMol(mol,dontRemoveEverything=dontRemoveEverything)
193
194
195
196
197
198
200 import doctest,sys
201 return doctest.testmod(sys.modules["__main__"])
202
203
204 if __name__ == '__main__':
205 import sys
206 failed,tried = _test()
207 sys.exit(failed)
208