1
2
3
4
5
6
7
8
9
10
11 """ Supplies a class for working with fingerprints from databases
12 #DOC
13
14 """
15 from rdkit import RDConfig
16 from rdkit.VLib.Node import VLibNode
17 from rdkit import DataStructs
18 from rdkit import six
19 from rdkit.six.moves import cPickle
20 import sys
23
25 """
26 new fps come back with all additional fields from the
27 database set in a "_fieldsFromDb" data member
28
29 """
30 - def __init__(self,dbResults,fpColName='AutoFragmentFp',usePickles=True):
31 """
32
33 DbResults should be a subclass of Dbase.DbResultSet.DbResultBase
34
35 """
36 VLibNode.__init__(self)
37 self._usePickles = usePickles
38 self._data = dbResults
39 self._fpColName = fpColName.upper()
40 self._colNames = [x.upper() for x in self._data.GetColumnNames()]
41 if self._fpColName not in self._colNames:
42 raise ValueError('fp column name "%s" not found in result set: %s'%(self._fpColName,str(self._colNames)))
43 self.fpCol = self._colNames.index(self._fpColName)
44 del self._colNames[self.fpCol]
45 self._colNames = tuple(self._colNames)
46 self._numProcessed=0
47
48
51
53 data = list(data)
54 if six.PY3:
55 pkl = bytes(data[self.fpCol],encoding='Latin1')
56 else:
57 pkl = str(data[self.fpCol])
58 del data[self.fpCol]
59 self._numProcessed+=1;
60 try:
61 if self._usePickles:
62 newFp = cPickle.loads(pkl,encoding='bytes')
63 else:
64 newFp = DataStructs.ExplicitBitVect(pkl)
65 except Exception:
66 import traceback
67 traceback.print_exc()
68 newFp = None
69 if newFp:
70 newFp._fieldsFromDb = data
71 return newFp
72
74 itm = self.NextItem()
75 if itm is None:
76 raise StopIteration
77 return itm
78
79 __next__ = next
80
81
83 """ DbFp supplier supporting only forward iteration
84
85 >>> import os.path
86 >>> from rdkit.Dbase.DbConnection import DbConnect
87 >>> fName = RDConfig.RDTestDatabase
88 >>> conn = DbConnect(fName,'simple_combined')
89 >>> suppl = ForwardDbFpSupplier(conn.GetData())
90
91 we can loop over the supplied fingerprints:
92 >>> fps = []
93 >>> for fp in suppl:
94 ... fps.append(fp)
95 >>> len(fps)
96 12
97
98 """
102
106
108 """
109
110 NOTE: this has side effects
111
112 """
113 try:
114 d = self._dataIter.next()
115 except StopIteration:
116 d = None
117 if d is not None:
118 newFp = self._BuildFp(d)
119 else:
120 newFp = None
121 return newFp
122
124 """ DbFp supplier supporting random access:
125 >>> import os.path
126 >>> from rdkit.Dbase.DbConnection import DbConnect
127 >>> fName = RDConfig.RDTestDatabase
128 >>> conn = DbConnect(fName,'simple_combined')
129 >>> suppl = RandomAccessDbFpSupplier(conn.GetData())
130 >>> len(suppl)
131 12
132
133 we can pull individual fingerprints:
134 >>> fp = suppl[5]
135 >>> fp.GetNumBits()
136 128
137 >>> fp.GetNumOnBits()
138 54
139
140 a standard loop over the fingerprints:
141 >>> fps = []
142 >>> for fp in suppl:
143 ... fps.append(fp)
144 >>> len(fps)
145 12
146
147 or we can use an indexed loop:
148 >>> fps = [None]*len(suppl)
149 >>> for i in range(len(suppl)):
150 ... fps[i] = suppl[i]
151 >>> len(fps)
152 12
153
154 """
158
160 return len(self._data)
161
163 newD = self._data[idx]
164 return self._BuildFp(newD)
165
168
170 self._pos += 1
171 res = None
172 if self._pos < len(self):
173 res = self[self._pos]
174 return res
175
176
177
178
179
180
181
183 import doctest,sys
184 return doctest.testmod(sys.modules["__main__"])
185
186 if __name__ == '__main__':
187 import sys
188 failed,tried = _test()
189 sys.exit(failed)
190