Package rdkit :: Package VLib :: Package NodeLib :: Module SmartsMolFilter
[hide private]
[frames] | no frames]

Source Code for Module rdkit.VLib.NodeLib.SmartsMolFilter

  1  #  $Id$ 
  2  # 
  3  #  Copyright (C) 2003 Rational Discovery LLC 
  4  #     All Rights Reserved 
  5  # 
  6  from rdkit import RDConfig 
  7  from rdkit import six 
  8  import sys,os,types 
  9  from rdkit import Chem 
 10  from rdkit.VLib.Filter import FilterNode 
 11   
12 -class SmartsFilter(FilterNode):
13 """ filter out molecules matching one or more SMARTS patterns 14 15 There is a count associated with each pattern. Molecules are 16 allowed to match the pattern up to this number of times. 17 18 Assumptions: 19 20 - inputs are molecules 21 22 23 Sample Usage: 24 >>> smis = ['C1CCC1','C1CCC1C=O','CCCC','CCC=O','CC(=O)C','CCN','NCCN','NCC=O'] 25 >>> mols = [Chem.MolFromSmiles(x) for x in smis] 26 >>> from rdkit.VLib.Supply import SupplyNode 27 >>> suppl = SupplyNode(contents=mols) 28 >>> ms = [x for x in suppl] 29 >>> len(ms) 30 8 31 32 We can pass in SMARTS strings: 33 >>> smas = ['C=O','CN'] 34 >>> counts = [1,2] 35 >>> filt = SmartsFilter(patterns=smas,counts=counts) 36 >>> filt.AddParent(suppl) 37 >>> ms = [x for x in filt] 38 >>> len(ms) 39 5 40 41 Alternatively, we can pass in molecule objects: 42 >>> mols =[Chem.MolFromSmarts(x) for x in smas] 43 >>> counts = [1,2] 44 >>> filt.Destroy() 45 >>> filt = SmartsFilter(patterns=mols,counts=counts) 46 >>> filt.AddParent(suppl) 47 >>> ms = [x for x in filt] 48 >>> len(ms) 49 5 50 51 Negation does what you'd expect: 52 >>> filt.SetNegate(1) 53 >>> ms = [x for x in filt] 54 >>> len(ms) 55 3 56 57 58 """
59 - def __init__(self,patterns=[],counts=[],**kwargs):
60 FilterNode.__init__(self,func=self.filter,**kwargs) 61 self._initPatterns(patterns,counts)
62
63 - def _initPatterns(self,patterns,counts):
64 nPatts = len(patterns) 65 if len(counts) and len(counts)!=nPatts: 66 raise ValueError('if counts is specified, it must match patterns in length') 67 if not len(counts): 68 counts = [1]*nPatts 69 targets = [None]*nPatts 70 for i in range(nPatts): 71 p = patterns[i] 72 c = counts[i] 73 if type(p) in (str,bytes): 74 m = Chem.MolFromSmarts(p) 75 if not m: 76 raise ValueError('bad smarts: %s'%(p)) 77 p = m 78 targets[i] = p,c 79 self._patterns = tuple(targets)
80
81 - def filter(self,cmpd):
82 neg = self.Negate() 83 res = 0 84 #sys.stderr.write('\tFILTER: %s\n'%(Chem.MolToSmiles(cmpd))) 85 for patt,count in self._patterns: 86 ms = cmpd.GetSubstructMatches(patt) 87 nMatches = len(ms) 88 if nMatches >= count: 89 # this query is an or, so we short circuit true: 90 res = 1 91 break 92 return res
93 94 #------------------------------------ 95 # 96 # doctest boilerplate 97 #
98 -def _test():
99 import doctest,sys 100 return doctest.testmod(sys.modules["__main__"])
101 102 103 if __name__ == '__main__': 104 import sys 105 failed,tried = _test() 106 sys.exit(failed) 107