Package rdkit :: Package Chem :: Package Suppliers :: Module DbMolSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Suppliers.DbMolSupplier

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Supplies a class for working with molecules from databases 
 12  #DOC  
 13   
 14  """ 
 15  from rdkit import Chem 
 16  from rdkit.Chem.Suppliers.MolSupplier import MolSupplier 
 17  import sys 
18 -def warning(msg,dest=sys.stderr):
19 dest.write(msg)
20
21 -class DbMolSupplier(MolSupplier):
22 """ 23 new molecules come back with all additional fields from the 24 database set in a "_fieldsFromDb" data member 25 26 """
27 - def __init__(self,dbResults, 28 molColumnFormats={'SMILES':'SMI', 29 'SMI':'SMI', 30 'MOLPKL':'PKL'}, 31 nameCol='', 32 transformFunc=None, 33 **kwargs):
34 """ 35 36 DbResults should be a subclass of Dbase.DbResultSet.DbResultBase 37 38 """ 39 self._data = dbResults 40 self._colNames = [x.upper() for x in self._data.GetColumnNames()] 41 nameCol = nameCol.upper() 42 self.molCol = -1 43 self.transformFunc=transformFunc 44 try: 45 self.nameCol = self._colNames.index(nameCol) 46 except ValueError: 47 self.nameCol = -1 48 for name in molColumnFormats.keys(): 49 name = name.upper() 50 try: 51 idx = self._colNames.index(name) 52 except ValueError: 53 pass 54 else: 55 self.molCol = idx 56 self.molFmt = molColumnFormats[name] 57 break 58 if self.molCol < 0: 59 raise ValueError('DbResultSet has no recognizable molecule column') 60 del self._colNames[self.molCol] 61 self._colNames = tuple(self._colNames) 62 self._numProcessed=0
63 - def GetColumnNames(self):
64 return self._colNames
65
66 - def _BuildMol(self,data):
67 data = list(data) 68 molD = data[self.molCol] 69 del data[self.molCol] 70 self._numProcessed+=1; 71 try: 72 if self.molFmt =='SMI': 73 newM = Chem.MolFromSmiles(str(molD)) 74 if not newM: 75 warning('Problems processing mol %d, smiles: %s\n'%(self._numProcessed,molD)) 76 elif self.molFmt =='PKL': 77 newM = Chem.Mol(str(molD)) 78 except Exception: 79 import traceback 80 traceback.print_exc() 81 newM = None 82 else: 83 if newM and self.transformFunc: 84 try: 85 newM = self.transformFunc(newM,data) 86 except Exception: 87 import traceback 88 traceback.print_exc() 89 newM = None 90 if newM: 91 newM._fieldsFromDb = data 92 nFields = len(data) 93 for i in range(nFields): 94 newM.SetProp(self._colNames[i],str(data[i])) 95 if self.nameCol >=0 : 96 newM.SetProp('_Name',str(data[self.nameCol])) 97 return newM
98
99 -class ForwardDbMolSupplier(DbMolSupplier):
100 """ DbMol supplier supporting only forward iteration 101 102 103 new molecules come back with all additional fields from the 104 database set in a "_fieldsFromDb" data member 105 106 """
107 - def __init__(self,dbResults,**kwargs):
108 """ 109 110 DbResults should be an iterator for Dbase.DbResultSet.DbResultBase 111 112 """ 113 DbMolSupplier.__init__(self,dbResults,**kwargs) 114 self.Reset()
115
116 - def Reset(self):
117 self._dataIter = iter(self._data)
118
119 - def NextMol(self):
120 """ 121 122 NOTE: this has side effects 123 124 """ 125 try: 126 d = self._dataIter.next() 127 except StopIteration: 128 d = None 129 if d is not None: 130 newM = self._BuildMol(d) 131 else: 132 newM = None 133 134 return newM
135
136 -class RandomAccessDbMolSupplier(DbMolSupplier):
137 - def __init__(self,dbResults,**kwargs):
138 """ 139 140 DbResults should be a Dbase.DbResultSet.RandomAccessDbResultSet 141 142 """ 143 DbMolSupplier.__init__(self,dbResults,**kwargs) 144 self._pos = -1
145
146 - def __len__(self):
147 return len(self._data)
148
149 - def __getitem__(self,idx):
150 newD = self._data[idx] 151 return self._BuildMol(newD)
152
153 - def Reset(self):
154 self._pos = -1
155 - def NextMol(self):
156 self._pos += 1 157 res = None 158 if self._pos < len(self): 159 res = self[self._pos] 160 return res
161