Package rdkit :: Package VLib :: Package NodeLib :: Module SmilesDupeFilter
[hide private]
[frames] | no frames]

Source Code for Module rdkit.VLib.NodeLib.SmilesDupeFilter

 1  #  $Id$ 
 2  # 
 3  #  Copyright (C) 2003 Rational Discovery LLC 
 4  #     All Rights Reserved 
 5  # 
 6  from rdkit import RDConfig 
 7  from rdkit import six 
 8  import sys,os 
 9  from rdkit import Chem 
10  from rdkit.VLib.Filter import FilterNode 
11   
12 -class DupeFilter(FilterNode):
13 """ canonical-smiles based duplicate filter 14 15 Assumptions: 16 17 - inputs are molecules 18 19 20 Sample Usage: 21 >>> from rdkit.VLib.NodeLib.SDSupply import SDSupplyNode 22 >>> fileN = os.path.join(RDConfig.RDCodeDir,'VLib','NodeLib',\ 23 'test_data','NCI_aids.10.sdf') 24 >>> suppl = SDSupplyNode(fileN) 25 >>> filt = DupeFilter() 26 >>> filt.AddParent(suppl) 27 >>> ms = [x for x in filt] 28 >>> len(ms) 29 10 30 >>> ms[0].GetProp("_Name") 31 '48' 32 >>> ms[1].GetProp("_Name") 33 '78' 34 >>> filt.reset() 35 >>> filt.next().GetProp("_Name") 36 '48' 37 38 39 """
40 - def __init__(self,**kwargs):
41 FilterNode.__init__(self,func=self.filter,**kwargs) 42 self._smisSeen = []
43
44 - def reset(self):
45 FilterNode.reset(self) 46 self._smisSeen = []
47
48 - def filter(self,cmpd):
49 smi = Chem.MolToSmiles(cmpd) 50 if smi not in self._smisSeen: 51 self._smisSeen.append(smi) 52 return 1 53 else: 54 return 0
55 56 #------------------------------------ 57 # 58 # doctest boilerplate 59 #
60 -def _test():
61 import doctest,sys 62 return doctest.testmod(sys.modules["__main__"])
63 64 if __name__ == '__main__': 65 import sys 66 failed,tried = _test() 67 sys.exit(failed) 68