Package rdkit :: Package Chem :: Package Fingerprints :: Module SimilarityScreener
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Fingerprints.SimilarityScreener

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2003-2006 Greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ class definitions for similarity screening 
 12   
 13  See _SimilarityScreener_ for overview of required API 
 14   
 15  """ 
 16  from rdkit import DataStructs 
 17  from rdkit.DataStructs import TopNContainer 
 18  from rdkit import RDConfig 
 19  from rdkit import six 
 20   
21 -class SimilarityScreener(object):
22 """ base class 23 24 important attributes: 25 probe: the probe fingerprint against which we screen. 26 27 metric: a function that takes two arguments and returns a similarity 28 measure between them 29 30 dataSource: the source pool from which to draw, needs to support 31 a next() method 32 33 fingerprinter: a function that takes a molecule and returns a 34 fingerprint of the appropriate format 35 36 37 **Notes** 38 subclasses must support either an iterator interface 39 or __len__ and __getitem__ 40 """
41 - def __init__(self,probe=None,metric=None,dataSource=None,fingerprinter=None):
42 self.metric = metric 43 self.dataSource = dataSource 44 self.fingerprinter = fingerprinter 45 self.probe = probe
46
47 - def Reset(self):
48 """ used to reset screeners that behave as iterators 49 """ 50 pass
51 52 # FIX: add setters/getters for attributes
53 - def SetProbe(self,probeFingerprint):
54 """ sets our probe fingerprint """ 55 self.probe = probeFingerprint
56
57 - def GetSingleFingerprint(self,probe):
58 """ returns a fingerprint for a single probe object 59 60 This is potentially useful in initializing our internal 61 probe object. 62 63 """ 64 return self.fingerprinter(probe)
65
66 -class ThresholdScreener(SimilarityScreener):
67 """ Used to return all compounds that have a similarity 68 to the probe beyond a threshold value 69 70 **Notes**: 71 72 - This is as lazy as possible, so the data source isn't 73 queried until the client asks for a hit. 74 75 - In addition to being lazy, this class is as thin as possible. 76 (Who'd have thought it was possible!) 77 Hits are *not* stored locally, so if a client resets 78 the iteration and starts over, the same amount of work must 79 be done to retrieve the hits. 80 81 - The thinness and laziness forces us to support only forward 82 iteration (not random access) 83 84 """
85 - def __init__(self,threshold,**kwargs):
86 SimilarityScreener.__init__(self,**kwargs) 87 self.threshold = threshold 88 self.dataIter = iter(self.dataSource)
89 # FIX: add setters/getters for attributes 90
91 - def _nextMatch(self):
92 """ *Internal use only* """ 93 done = 0 94 res = None 95 sim = 0 96 while not done: 97 # this is going to crap out when the data source iterator finishes, 98 # that's how we stop when no match is found 99 obj = six.next(self.dataIter) 100 fp = self.fingerprinter(obj) 101 sim = DataStructs.FingerprintSimilarity(fp,self.probe,self.metric) 102 if sim >= self.threshold: 103 res = obj 104 done = 1 105 return sim,res
106
107 - def Reset(self):
108 """ used to reset our internal state so that iteration 109 starts again from the beginning 110 """ 111 self.dataSource.reset() 112 self.dataIter = iter(self.dataSource)
113
114 - def __iter__(self):
115 """ returns an iterator for this screener 116 """ 117 self.Reset() 118 return self
119
120 - def next(self):
121 """ required part of iterator interface """ 122 return self._nextMatch()
123 124 __next__ = next
125 126
127 -class TopNScreener(SimilarityScreener):
128 """ A screener that only returns the top N hits found 129 130 **Notes** 131 132 - supports forward iteration and getitem 133 134 """
135 - def __init__(self,num,**kwargs):
136 SimilarityScreener.__init__(self,**kwargs) 137 self.numToGet = num 138 self.topN = None 139 self._pos = 0
140
141 - def Reset(self):
142 self._pos = 0
143 - def __iter__(self):
144 if self.topN is None: 145 self._initTopN() 146 self.Reset() 147 return self
148
149 - def next(self):
150 if self._pos >= self.numToGet: 151 raise StopIteration 152 else: 153 res = self.topN[self._pos] 154 self._pos += 1 155 return res
156 157 __next__ = next 158
159 - def _initTopN(self):
160 self.topN = TopNContainer.TopNContainer(self.numToGet) 161 for obj in self.dataSource: 162 fp = self.fingerprinter(obj) 163 sim = DataStructs.FingerprintSimilarity(fp,self.probe,self.metric) 164 self.topN.Insert(sim,obj)
165
166 - def __len__(self):
167 if self.topN is None: 168 self._initTopN() 169 return self.numToGet
170
171 - def __getitem__(self,idx):
172 if self.topN is None: 173 self._initTopN() 174 return self.topN[idx]
175