Package rdkit :: Package Chem :: Package Fingerprints :: Module DbFpSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Fingerprints.DbFpSupplier

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Supplies a class for working with fingerprints from databases 
 12  #DOC  
 13   
 14  """ 
 15  from rdkit import RDConfig 
 16  from rdkit.VLib.Node import VLibNode 
 17  from rdkit import DataStructs 
 18  from rdkit import six 
 19  from rdkit.six.moves import cPickle 
 20  import sys 
21 -def warning(msg,dest=sys.stderr):
22 dest.write(msg)
23
24 -class DbFpSupplier(VLibNode):
25 """ 26 new fps come back with all additional fields from the 27 database set in a "_fieldsFromDb" data member 28 29 """
30 - def __init__(self,dbResults,fpColName='AutoFragmentFp',usePickles=True):
31 """ 32 33 DbResults should be a subclass of Dbase.DbResultSet.DbResultBase 34 35 """ 36 VLibNode.__init__(self) 37 self._usePickles = usePickles 38 self._data = dbResults 39 self._fpColName = fpColName.upper() 40 self._colNames = [x.upper() for x in self._data.GetColumnNames()] 41 if self._fpColName not in self._colNames: 42 raise ValueError('fp column name "%s" not found in result set: %s'%(self._fpColName,str(self._colNames))) 43 self.fpCol = self._colNames.index(self._fpColName) 44 del self._colNames[self.fpCol] 45 self._colNames = tuple(self._colNames) 46 self._numProcessed=0
47 48
49 - def GetColumnNames(self):
50 return self._colNames
51
52 - def _BuildFp(self,data):
53 data = list(data) 54 if six.PY3: 55 pkl = bytes(data[self.fpCol],encoding='Latin1') 56 else: 57 pkl = str(data[self.fpCol]) 58 del data[self.fpCol] 59 self._numProcessed+=1; 60 try: 61 if self._usePickles: 62 newFp = cPickle.loads(pkl,encoding='bytes') 63 else: 64 newFp = DataStructs.ExplicitBitVect(pkl) 65 except Exception: 66 import traceback 67 traceback.print_exc() 68 newFp = None 69 if newFp: 70 newFp._fieldsFromDb = data 71 return newFp
72
73 - def next(self):
74 itm = self.NextItem() 75 if itm is None: 76 raise StopIteration 77 return itm
78 79 __next__ = next # py3
80 81
82 -class ForwardDbFpSupplier(DbFpSupplier):
83 """ DbFp supplier supporting only forward iteration 84 85 >>> import os.path 86 >>> from rdkit.Dbase.DbConnection import DbConnect 87 >>> fName = RDConfig.RDTestDatabase 88 >>> conn = DbConnect(fName,'simple_combined') 89 >>> suppl = ForwardDbFpSupplier(conn.GetData()) 90 91 we can loop over the supplied fingerprints: 92 >>> fps = [] 93 >>> for fp in suppl: 94 ... fps.append(fp) 95 >>> len(fps) 96 12 97 98 """
99 - def __init__(self,*args,**kwargs):
100 DbFpSupplier.__init__(self,*args,**kwargs) 101 self.reset()
102
103 - def reset(self):
104 DbFpSupplier.reset(self) 105 self._dataIter = iter(self._data)
106
107 - def NextItem(self):
108 """ 109 110 NOTE: this has side effects 111 112 """ 113 try: 114 d = self._dataIter.next() 115 except StopIteration: 116 d = None 117 if d is not None: 118 newFp = self._BuildFp(d) 119 else: 120 newFp = None 121 return newFp
122
123 -class RandomAccessDbFpSupplier(DbFpSupplier):
124 """ DbFp supplier supporting random access: 125 >>> import os.path 126 >>> from rdkit.Dbase.DbConnection import DbConnect 127 >>> fName = RDConfig.RDTestDatabase 128 >>> conn = DbConnect(fName,'simple_combined') 129 >>> suppl = RandomAccessDbFpSupplier(conn.GetData()) 130 >>> len(suppl) 131 12 132 133 we can pull individual fingerprints: 134 >>> fp = suppl[5] 135 >>> fp.GetNumBits() 136 128 137 >>> fp.GetNumOnBits() 138 54 139 140 a standard loop over the fingerprints: 141 >>> fps = [] 142 >>> for fp in suppl: 143 ... fps.append(fp) 144 >>> len(fps) 145 12 146 147 or we can use an indexed loop: 148 >>> fps = [None]*len(suppl) 149 >>> for i in range(len(suppl)): 150 ... fps[i] = suppl[i] 151 >>> len(fps) 152 12 153 154 """
155 - def __init__(self,*args,**kwargs):
156 DbFpSupplier.__init__(self,*args,**kwargs) 157 self.reset()
158
159 - def __len__(self):
160 return len(self._data)
161
162 - def __getitem__(self,idx):
163 newD = self._data[idx] 164 return self._BuildFp(newD)
165
166 - def reset(self):
167 self._pos = -1
168
169 - def NextItem(self):
170 self._pos += 1 171 res = None 172 if self._pos < len(self): 173 res = self[self._pos] 174 return res
175 176 177 178 #------------------------------------ 179 # 180 # doctest boilerplate 181 #
182 -def _test():
183 import doctest,sys 184 return doctest.testmod(sys.modules["__main__"])
185 186 if __name__ == '__main__': 187 import sys 188 failed,tried = _test() 189 sys.exit(failed) 190