Package rdkit :: Package Chem :: Module SaltRemover
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.SaltRemover

  1  # $Id$ 
  2  # 
  3  #  Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc. 
  4  #  All rights reserved. 
  5  #  
  6  # Redistribution and use in source and binary forms, with or without 
  7  # modification, are permitted provided that the following conditions are 
  8  # met:  
  9  # 
 10  #     * Redistributions of source code must retain the above copyright  
 11  #       notice, this list of conditions and the following disclaimer. 
 12  #     * Redistributions in binary form must reproduce the above 
 13  #       copyright notice, this list of conditions and the following  
 14  #       disclaimer in the documentation and/or other materials provided  
 15  #       with the distribution. 
 16  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 17  #       nor the names of its contributors may be used to endorse or promote  
 18  #       products derived from this software without specific prior written permission. 
 19  # 
 20  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 21  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 22  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 23  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 24  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 25  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 26  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 27  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 28  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 29  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 30  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 31  # 
 32  # Created by Greg Landrum, Dec 2006 
 33  # 
 34   
 35  from rdkit import Chem 
 36  import os,re 
 37   
 38  from rdkit import RDConfig 
 39   
40 -class SaltRemover(object):
41 defnFilename=os.path.join(RDConfig.RDDataDir,'Salts.txt') 42 defnData = None 43 salts = None
44 - def __init__(self,defnFilename=None,defnData=None):
45 if defnFilename: 46 self.defnFilename = defnFilename 47 self.defnData = defnData 48 self._initPatterns()
49
50 - def _initPatterns(self):
51 """ 52 53 >>> remover = SaltRemover() 54 >>> len(remover.salts)>0 55 True 56 57 >>> remover = SaltRemover(defnData="[Cl,Br]") 58 >>> len(remover.salts) 59 1 60 61 >>> remover = SaltRemover(defnData="[Cl,fail]") 62 Traceback (most recent call last): 63 ... 64 ValueError: [Cl,fail] 65 66 """ 67 whitespace = re.compile(r'[\t ]+') 68 if self.defnData: 69 from rdkit.six.moves import cStringIO as StringIO 70 inF = StringIO(self.defnData) 71 else: 72 inF = open(self.defnFilename,'r') 73 self.salts = [] 74 for line in inF: 75 line = line.strip().split('//')[0] 76 if line: 77 splitL = whitespace.split(line) 78 salt = Chem.MolFromSmarts(splitL[0]) 79 if salt is None: 80 raise ValueError(line) 81 self.salts.append(salt)
82
83 - def StripMol(self,mol,dontRemoveEverything=False):
84 """ 85 86 >>> remover = SaltRemover(defnData="[Cl,Br]") 87 >>> len(remover.salts) 88 1 89 90 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') 91 >>> res = remover.StripMol(mol) 92 >>> res is not None 93 True 94 >>> res.GetNumAtoms() 95 4 96 97 Notice that all salts are removed: 98 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl.Cl.Br') 99 >>> res = remover.StripMol(mol) 100 >>> res.GetNumAtoms() 101 4 102 103 Matching (e.g. "salt-like") atoms in the molecule are unchanged: 104 >>> mol = Chem.MolFromSmiles('CN(Br)Cl') 105 >>> res = remover.StripMol(mol) 106 >>> res.GetNumAtoms() 107 4 108 109 >>> mol = Chem.MolFromSmiles('CN(Br)Cl.Cl') 110 >>> res = remover.StripMol(mol) 111 >>> res.GetNumAtoms() 112 4 113 114 Charged salts are handled reasonably: 115 >>> mol = Chem.MolFromSmiles('C[NH+](C)(C).[Cl-]') 116 >>> res = remover.StripMol(mol) 117 >>> res.GetNumAtoms() 118 4 119 120 121 Watch out for this case (everything removed): 122 >>> remover = SaltRemover() 123 >>> len(remover.salts)>1 124 True 125 >>> mol = Chem.MolFromSmiles('CC(=O)O.[Na]') 126 >>> res = remover.StripMol(mol) 127 >>> res.GetNumAtoms() 128 0 129 130 dontRemoveEverything helps with this by leaving the last salt: 131 >>> res = remover.StripMol(mol,dontRemoveEverything=True) 132 >>> res.GetNumAtoms() 133 4 134 135 but in cases where the last salts are the same, it can't choose 136 between them, so it returns all of them: 137 >>> mol = Chem.MolFromSmiles('Cl.Cl') 138 >>> res = remover.StripMol(mol,dontRemoveEverything=True) 139 >>> res.GetNumAtoms() 140 2 141 142 """ 143 def _applyPattern(m,salt,notEverything): 144 nAts = m.GetNumAtoms() 145 if not nAts: 146 return m 147 res = m 148 149 t = Chem.DeleteSubstructs(res,salt,True) 150 if not t or (notEverything and t.GetNumAtoms()==0): 151 return res; 152 else: 153 res = t 154 while res.GetNumAtoms() and nAts>res.GetNumAtoms(): 155 nAts = res.GetNumAtoms() 156 t = Chem.DeleteSubstructs(res,salt,True) 157 if notEverything and t.GetNumAtoms()==0: 158 break 159 else: 160 res = t 161 return res
162 163 if dontRemoveEverything and len(Chem.GetMolFrags(mol))<=1: 164 return mol 165 modified=False 166 for i,salt in enumerate(self.salts): 167 tMol = _applyPattern(mol,salt,dontRemoveEverything) 168 if tMol is not mol: 169 mol = tMol 170 modified=True 171 if dontRemoveEverything and len(Chem.GetMolFrags(mol))<=1: 172 break 173 if modified and mol.GetNumAtoms()>0: 174 Chem.SanitizeMol(mol) 175 return mol
176
177 - def __call__(self,mol,dontRemoveEverything=False):
178 """ 179 180 >>> remover = SaltRemover(defnData="[Cl,Br]") 181 >>> len(remover.salts) 182 1 183 184 >>> mol = Chem.MolFromSmiles('CN(C)C.Cl') 185 >>> res = remover(mol) 186 >>> res is not None 187 True 188 >>> res.GetNumAtoms() 189 4 190 191 """ 192 return self.StripMol(mol,dontRemoveEverything=dontRemoveEverything)
193 194 195 #------------------------------------ 196 # 197 # doctest boilerplate 198 #
199 -def _test():
200 import doctest,sys 201 return doctest.testmod(sys.modules["__main__"])
202 203 204 if __name__ == '__main__': 205 import sys 206 failed,tried = _test() 207 sys.exit(failed) 208