Package rdkit :: Package Chem :: Package EState :: Module AtomTypes
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.EState.AtomTypes

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2002-2006  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ contains SMARTS definitions and calculators for EState atom types 
 12   
 13   defined in: Hall and Kier JCICS _35_ 1039-1045 (1995)  Table 1 
 14  """ 
 15  from rdkit import Chem 
 16   
 17  _rawD = [ 
 18    ('sLi','[LiD1]-*'), 
 19   
 20    ('ssBe','[BeD2](-*)-*'), 
 21    ('ssssBe','[BeD4](-*)(-*)(-*)-*'), 
 22   
 23    ('ssBH', '[BD2H](-*)-*'), 
 24    ('sssB', '[BD3](-*)(-*)-*'), 
 25    ('ssssB','[BD4](-*)(-*)(-*)-*'), 
 26   
 27    ('sCH3', '[CD1H3]-*'), 
 28    ('dCH2', '[CD1H2]=*'), 
 29    ('ssCH2','[CD2H2](-*)-*'), 
 30    ('tCH',  '[CD1H]#*'), 
 31    ('dsCH', '[CD2H](=*)-*'), 
 32    ('aaCH', '[C,c;D2H](:*):*'), 
 33    ('sssCH','[CD3H](-*)(-*)-*'), 
 34    ('ddC',  '[CD2H0](=*)=*'), 
 35    ('tsC',  '[CD2H0](#*)-*'), 
 36    ('dssC', '[CD3H0](=*)(-*)-*'),   
 37    ('aasC', '[C,c;D3H0](:*)(:*)-*'), 
 38    ('aaaC', '[C,c;D3H0](:*)(:*):*'), 
 39    ('ssssC','[CD4H0](-*)(-*)(-*)-*'), 
 40   
 41    ('sNH3', '[ND1H3]-*'), 
 42    ('sNH2', '[ND1H2]-*'), 
 43    ('ssNH2','[ND2H2](-*)-*'), 
 44    ('dNH',  '[ND1H]=*'), 
 45    ('ssNH', '[ND2H](-*)-*'), 
 46    ('aaNH', '[N,nD2H](:*):*'), 
 47    ('tN',   '[ND1H0]#*'), 
 48    ('sssNH','[ND3H](-*)(-*)-*'), 
 49    ('dsN',  '[ND2H0](=*)-*'), 
 50    ('aaN',  '[N,nD2H0](:*):*'), 
 51    ('sssN', '[ND3H0](-*)(-*)-*'), 
 52    ('ddsN', '[ND3H0](~[OD1H0])(~[OD1H0])-,:*'),  # mod 
 53    ('aasN', '[N,nD3H0](:*)(:*)-,:*'),              # mod 
 54    ('ssssN','[ND4H0](-*)(-*)(-*)-*'), 
 55   
 56    ('sOH','[OD1H]-*'), 
 57    ('dO', '[OD1H0]=*'), 
 58    ('ssO','[OD2H0](-*)-*'), 
 59    ('aaO','[O,oD2H0](:*):*'), 
 60   
 61    ('sF','[FD1]-*'), 
 62   
 63    ('sSiH3', '[SiD1H3]-*'), 
 64    ('ssSiH2','[SiD2H2](-*)-*'), 
 65    ('sssSiH','[SiD3H1](-*)(-*)-*'), 
 66    ('ssssSi','[SiD4H0](-*)(-*)(-*)-*'), 
 67   
 68    ('sPH2',  '[PD1H2]-*'), 
 69    ('ssPH',  '[PD2H1](-*)-*'), 
 70    ('sssP',  '[PD3H0](-*)(-*)-*'), 
 71    ('dsssP', '[PD4H0](=*)(-*)(-*)-*'), 
 72    ('sssssP','[PD5H0](-*)(-*)(-*)(-*)-*'), 
 73      
 74    ('sSH',  '[SD1H1]-*'), 
 75    ('dS',   '[SD1H0]=*'), 
 76    ('ssS',  '[SD2H0](-*)-*'), 
 77    ('aaS',  '[S,sD2H0](:*):*'), 
 78    ('dssS', '[SD3H0](=*)(-*)-*'), 
 79    ('ddssS','[SD4H0](~[OD1H0])(~[OD1H0])(-*)-*'),  # mod 
 80   
 81    ('sCl', '[ClD1]-*'), 
 82   
 83    ('sGeH3', '[GeD1H3](-*)'), 
 84    ('ssGeH2','[GeD2H2](-*)-*'), 
 85    ('sssGeH','[GeD3H1](-*)(-*)-*'), 
 86    ('ssssGe','[GeD4H0](-*)(-*)(-*)-*'), 
 87   
 88    ('sAsH2',  '[AsD1H2]-*'), 
 89    ('ssAsH',  '[AsD2H1](-*)-*'), 
 90    ('sssAs',  '[AsD3H0](-*)(-*)-*'), 
 91    ('sssdAs', '[AsD4H0](=*)(-*)(-*)-*'), 
 92    ('sssssAs','[AsD5H0](-*)(-*)(-*)(-*)-*'), 
 93   
 94    ('sSeH',  '[SeD1H1]-*'), 
 95    ('dSe',   '[SeD1H0]=*'), 
 96    ('ssSe',  '[SeD2H0](-*)-*'), 
 97    ('aaSe',  '[SeD2H0](:*):*'), 
 98    ('dssSe', '[SeD3H0](=*)(-*)-*'), 
 99    ('ddssSe','[SeD4H0](=*)(=*)(-*)-*'), 
100   
101    ('sBr','[BrD1]-*'), 
102   
103    ('sSnH3', '[SnD1H3]-*'), 
104    ('ssSnH2','[SnD2H2](-*)-*'), 
105    ('sssSnH','[SnD3H1](-*)(-*)-*'), 
106    ('ssssSn','[SnD4H0](-*)(-*)(-*)-*'), 
107   
108    ('sI','[ID1]-*'), 
109   
110    ('sPbH3', '[PbD1H3]-*'), 
111    ('ssPbH2','[PbD2H2](-*)-*'), 
112    ('sssPbH','[PbD3H1](-*)(-*)-*'), 
113    ('ssssPb','[PbD4H0](-*)(-*)(-*)-*'), 
114  ] 
115   
116  esPatterns=None 
117 -def BuildPatts(rawV=None):
118 """ Internal Use Only 119 120 """ 121 global esPatterns,_rawD 122 if rawV is None: 123 rawV = _rawD 124 125 esPatterns = [None]*len(rawV) 126 for i,(name,sma) in enumerate(rawV): 127 patt = Chem.MolFromSmarts(sma) 128 if patt is None: 129 sys.stderr.write('WARNING: problems with pattern %s (name: %s), skipped.\n'%(sma,name)) 130 else: 131 esPatterns[i] = name,patt
132 133
134 -def TypeAtoms(mol):
135 """ assigns each atom in a molecule to an EState type 136 137 **Returns:** 138 139 list of tuples (atoms can possibly match multiple patterns) with atom types 140 141 """ 142 if esPatterns is None: 143 BuildPatts() 144 nAtoms = mol.GetNumAtoms() 145 res = [None]*nAtoms 146 for name,patt in esPatterns: 147 matches = mol.GetSubstructMatches(patt,uniquify=0) 148 for match in matches: 149 idx = match[0] 150 if res[idx] is None: 151 res[idx] = [name] 152 elif name not in res[idx]: 153 res[idx].append(name) 154 for i,v in enumerate(res): 155 if v is not None: 156 res[i] = tuple(v) 157 else: 158 res[i] = () 159 return res
160