1
2
3
4
5
6
7
8
9
10
11 from __future__ import print_function
12 import copy,struct,sys
13 from rdkit.six.moves import cPickle
14 from rdkit.six import iterkeys
15 from rdkit import six
16 from rdkit import DataStructs
17
19 """
20
21 >>> vc = VectCollection()
22 >>> bv1 = DataStructs.ExplicitBitVect(10)
23 >>> bv1.SetBitsFromList((1,3,5))
24 >>> vc.AddVect(1,bv1)
25 >>> bv1 = DataStructs.ExplicitBitVect(10)
26 >>> bv1.SetBitsFromList((6,8))
27 >>> vc.AddVect(2,bv1)
28 >>> len(vc)
29 10
30 >>> vc.GetNumBits()
31 10
32 >>> vc[0]
33 0
34 >>> vc[1]
35 1
36 >>> vc[9]
37 0
38 >>> vc[6]
39 1
40 >>> vc.GetBit(6)
41 1
42 >>> list(vc.GetOnBits())
43 [1, 3, 5, 6, 8]
44
45 keys must be unique, so adding a duplicate replaces the
46 previous values:
47 >>> bv1 = DataStructs.ExplicitBitVect(10)
48 >>> bv1.SetBitsFromList((7,9))
49 >>> vc.AddVect(1,bv1)
50 >>> len(vc)
51 10
52 >>> vc[1]
53 0
54 >>> vc[9]
55 1
56 >>> vc[6]
57 1
58
59 we can also query the children:
60 >>> vc.NumChildren()
61 2
62 >>> cs = vc.GetChildren()
63 >>> id,fp = cs[0]
64 >>> id
65 1
66 >>> list(fp.GetOnBits())
67 [7, 9]
68 >>> id,fp = cs[1]
69 >>> id
70 2
71 >>> list(fp.GetOnBits())
72 [6, 8]
73
74 attach/detach operations:
75 >>> bv1 = DataStructs.ExplicitBitVect(10)
76 >>> bv1.SetBitsFromList((5,6))
77 >>> vc.AddVect(3,bv1)
78 >>> vc.NumChildren()
79 3
80 >>> list(vc.GetOnBits())
81 [5, 6, 7, 8, 9]
82 >>> vc.DetachVectsNotMatchingBit(6)
83 >>> vc.NumChildren()
84 2
85 >>> list(vc.GetOnBits())
86 [5, 6, 8]
87
88
89 >>> bv1 = DataStructs.ExplicitBitVect(10)
90 >>> bv1.SetBitsFromList((7,9))
91 >>> vc.AddVect(1,bv1)
92 >>> vc.NumChildren()
93 3
94 >>> list(vc.GetOnBits())
95 [5, 6, 7, 8, 9]
96 >>> vc.DetachVectsMatchingBit(6)
97 >>> vc.NumChildren()
98 1
99 >>> list(vc.GetOnBits())
100 [7, 9]
101
102
103 to copy VectCollections, use the copy module:
104 >>> bv1 = DataStructs.ExplicitBitVect(10)
105 >>> bv1.SetBitsFromList((5,6))
106 >>> vc.AddVect(3,bv1)
107 >>> list(vc.GetOnBits())
108 [5, 6, 7, 9]
109 >>> vc2 = copy.copy(vc)
110 >>> vc.DetachVectsNotMatchingBit(6)
111 >>> list(vc.GetOnBits())
112 [5, 6]
113 >>> list(vc2.GetOnBits())
114 [5, 6, 7, 9]
115
116 The Uniquify() method can be used to remove duplicate vectors:
117 >>> vc = VectCollection()
118 >>> bv1 = DataStructs.ExplicitBitVect(10)
119 >>> bv1.SetBitsFromList((7,9))
120 >>> vc.AddVect(1,bv1)
121 >>> vc.AddVect(2,bv1)
122 >>> bv1 = DataStructs.ExplicitBitVect(10)
123 >>> bv1.SetBitsFromList((2,3,5))
124 >>> vc.AddVect(3,bv1)
125 >>> vc.NumChildren()
126 3
127 >>> vc.Uniquify()
128 >>> vc.NumChildren()
129 2
130
131
132
133 """
135 self.__vects = {}
136 self.__orVect = None
137 self.__numBits = -1
138 self.__needReset=True
139
140
142 if self.__needReset:
143 self.Reset()
144 return self.__orVect
145 orVect = property(GetOrVect)
146
148 self.__vects[id]=vect
149 self.__needReset=True
150
152 if not self.__needReset:
153 return
154 self.__orVect=None
155 if not self.__vects:
156 return
157 ks = list(iterkeys(self.__vects))
158 self.__orVect = copy.copy(self.__vects[ks[0]])
159 self.__numBits = self.__orVect.GetNumBits()
160 for i in range(1,len(ks)):
161 self.__orVect |= self.__vects[ks[i]]
162 self.__needReset=False
163
165 return len(self.__vects.keys())
166
168 return tuple(self.__vects.items())
169
171 if self.__needReset:
172 self.Reset()
173 return self[id]
176
178 if self.__needReset:
179 self.Reset()
180 return self.__orVect.GetOnBits()
181
183 items = list(self.__vects.items())
184 for k,v in items:
185 if not v.GetBit(bit):
186 del(self.__vects[k])
187 self.__needReset=True
188
190 items = list(self.__vects.items())
191 for k,v in items:
192 if v.GetBit(bit):
193 del(self.__vects[k])
194 self.__needReset=True
195
197 obls = {}
198 for k,v in self.__vects.items():
199 obls[k] = list(v.GetOnBits())
200
201 keys = list(self.__vects.keys())
202 nKeys = len(keys)
203 keep = list(self.__vects.keys())
204 for i in range(nKeys):
205 k1 = keys[i]
206 if k1 in keep:
207 obl1 = obls[k1]
208 idx = keys.index(k1)
209 for j in range(idx+1,nKeys):
210 k2 = keys[j]
211 if k2 in keep:
212 obl2 = obls[k2]
213 if obl1==obl2:
214 keep.remove(k2)
215
216 self.__needsReset=True
217 tmp = {}
218 for k in keep:
219 tmp[k] = self.__vects[k]
220 if verbose: print('uniquify:',len(self.__vects),'->',len(tmp))
221 self.__vects=tmp
222
223
225 if self.__needReset:
226 self.Reset()
227 return self.__numBits
229 if self.__needReset:
230 self.Reset()
231 return self.__orVect.GetBit(id)
232
233
234
235
237 pkl = struct.pack('<I',len(self.__vects))
238 for k,v in self.__vects.items():
239 pkl += struct.pack('<I',k)
240 p = v.ToBinary()
241 l = len(p)
242 pkl += struct.pack('<I',l)
243 pkl += struct.pack('%ds'%(l),p)
244 return pkl
245
247 if six.PY3 and isinstance(pkl,str):
248 pkl = bytes(pkl,encoding='Latin1')
249
250 self.__vects = {}
251 self.__orVect = None
252 self.__numBits = -1
253 self.__needReset=True
254 szI = struct.calcsize('I')
255 offset = 0
256 nToRead = struct.unpack('<I',pkl[offset:offset+szI])[0]
257 offset += szI
258 for i in range(nToRead):
259 k = struct.unpack('<I',pkl[offset:offset+szI])[0]
260 offset += szI
261 l = struct.unpack('<I',pkl[offset:offset+szI])[0]
262 offset += szI
263 sz = struct.calcsize('%ds'%l)
264 bv = DataStructs.ExplicitBitVect(struct.unpack('%ds'%l,pkl[offset:offset+sz])[0])
265 offset += sz
266 self.AddVect(k,bv)
267
268
269
270
271
272
273
275 import doctest,sys
276 return doctest.testmod(sys.modules["__main__"])
277
278
279 if __name__ == '__main__':
280 import sys
281 failed,tried = _test()
282 sys.exit(failed)
283