Package rdkit :: Package Chem :: Package Pharm2D :: Module Generate
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.Generate

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ generation of 2D pharmacophores 
 12   
 13  **Notes** 
 14   
 15    - The terminology for this gets a bit rocky, so here's a glossary of what 
 16      terms used here mean: 
 17   
 18        1) *N-point pharmacophore* a combination of N features along with 
 19           distances betwen them. 
 20   
 21        2) *N-point proto-pharmacophore*: a combination of N feature 
 22           definitions without distances.  Each N-point 
 23           proto-pharmacophore defines a manifold of potential N-point 
 24           pharmacophores. 
 25   
 26        3) *N-point scaffold*: a collection of the distances defining 
 27           an N-point pharmacophore without feature identities. 
 28   
 29    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 30    pharmacophores are broken into triangles and labelled. 
 31   
 32    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 33    numbering 
 34   
 35  """ 
 36  from __future__ import print_function 
 37  from rdkit.Chem.Pharm2D import Utils,SigFactory 
 38  from rdkit.RDLogger import logger 
 39  logger = logger() 
 40   
 41  _verbose = 0 
 42   
43 -def _ShortestPathsMatch(match,featureSet,sig,dMat,sigFactory):
44 """ Internal use only 45 46 """ 47 if _verbose: 48 print('match:',match) 49 nPts = len(match) 50 distsToCheck = Utils.nPointDistDict[nPts] 51 nDists = len(distsToCheck) 52 dist = [0]*nDists 53 bins = sigFactory.GetBins() 54 minD,maxD = bins[0][0],bins[-1][1] 55 56 for i in range(nDists): 57 pt0,pt1 = distsToCheck[i] 58 minSeen=maxD 59 for idx1 in match[pt0]: 60 for idx2 in match[pt1]: 61 minSeen=min(minSeen, dMat[idx1,idx2]) 62 if minSeen==0 or minSeen<minD: return 63 # FIX: this won't be an int if we're using the bond order. 64 d = int(minSeen) 65 # do a quick distance filter 66 if d == 0 or d < minD or d >= maxD: 67 return 68 dist[i] = d 69 70 idx = sigFactory.GetBitIdx(featureSet,dist,sortIndices=False) 71 if _verbose: 72 print('\t',dist,minD,maxD,idx) 73 74 if sigFactory.useCounts: 75 sig[idx] = sig[idx]+1 76 else: 77 sig.SetBit(idx) 78 return idx
79 80
81 -def Gen2DFingerprint(mol,sigFactory,perms=None,dMat=None,bitInfo=None):
82 """ generates a 2D fingerprint for a molecule using the 83 parameters in _sig_ 84 85 **Arguments** 86 87 - mol: the molecule for which the signature should be generated 88 89 - sigFactory : the SigFactory object with signature parameters 90 NOTE: no preprocessing is carried out for _sigFactory_. 91 It *must* be pre-initialized. 92 93 - perms: (optional) a sequence of permutation indices limiting which 94 pharmacophore combinations are allowed 95 96 - dMat: (optional) the distance matrix to be used 97 98 - bitInfo: (optional) used to return the atoms involved in the bits 99 100 """ 101 if not isinstance(sigFactory,SigFactory.SigFactory): 102 raise ValueError('bad factory') 103 featFamilies=sigFactory.GetFeatFamilies() 104 if _verbose: 105 print('* feat famillies:',featFamilies) 106 nFeats = len(featFamilies) 107 minCount = sigFactory.minPointCount 108 maxCount = sigFactory.maxPointCount 109 if maxCount>3: 110 logger.warning(' Pharmacophores with more than 3 points are not currently supported.\nSetting maxCount to 3.') 111 maxCount=3 112 113 # generate the molecule's distance matrix, if required 114 if dMat is None: 115 from rdkit import Chem 116 useBO = sigFactory.includeBondOrder 117 dMat = Chem.GetDistanceMatrix(mol,useBO) 118 119 # generate the permutations, if required 120 if perms is None: 121 perms = [] 122 for count in range(minCount,maxCount+1): 123 perms += Utils.GetIndexCombinations(nFeats,count) 124 125 # generate the matches: 126 featMatches = sigFactory.GetMolFeats(mol) 127 if _verbose: 128 print(' featMatches:',featMatches) 129 130 sig = sigFactory.GetSignature() 131 for perm in perms: 132 # the permutation is a combination of feature indices 133 # defining the feature set for a proto-pharmacophore 134 featClasses=[0] 135 for i in range(1,len(perm)): 136 if perm[i]==perm[i-1]: 137 featClasses.append(featClasses[-1]) 138 else: 139 featClasses.append(featClasses[-1]+1) 140 141 # Get a set of matches at each index of 142 # the proto-pharmacophore. 143 matchPerms = [featMatches[x] for x in perm] 144 if _verbose: 145 print('\n->Perm: %s'%(str(perm))) 146 print(' matchPerms: %s'%(str(matchPerms))) 147 148 # Get all unique combinations of those possible matches: 149 matchesToMap=Utils.GetUniqueCombinations(matchPerms,featClasses) 150 for i,entry in enumerate(matchesToMap): 151 entry = [x[1] for x in entry] 152 matchesToMap[i]=entry 153 if _verbose: 154 print(' mtM:',matchesToMap) 155 156 for match in matchesToMap: 157 if sigFactory.shortestPathsOnly: 158 idx=_ShortestPathsMatch(match,perm,sig,dMat,sigFactory) 159 if idx is not None and bitInfo is not None: 160 l = bitInfo.get(idx,[]) 161 l.append(match) 162 bitInfo[idx] = l 163 return sig
164