1
2
3
4
5
6
7
8
9
10
11 """ generation of 2D pharmacophores
12
13 **Notes**
14
15 - The terminology for this gets a bit rocky, so here's a glossary of what
16 terms used here mean:
17
18 1) *N-point pharmacophore* a combination of N features along with
19 distances betwen them.
20
21 2) *N-point proto-pharmacophore*: a combination of N feature
22 definitions without distances. Each N-point
23 proto-pharmacophore defines a manifold of potential N-point
24 pharmacophores.
25
26 3) *N-point scaffold*: a collection of the distances defining
27 an N-point pharmacophore without feature identities.
28
29 See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way
30 pharmacophores are broken into triangles and labelled.
31
32 See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit
33 numbering
34
35 """
36 from __future__ import print_function
37 from rdkit.Chem.Pharm2D import Utils,SigFactory
38 from rdkit.RDLogger import logger
39 logger = logger()
40
41 _verbose = 0
42
44 """ Internal use only
45
46 """
47 if _verbose:
48 print('match:',match)
49 nPts = len(match)
50 distsToCheck = Utils.nPointDistDict[nPts]
51 nDists = len(distsToCheck)
52 dist = [0]*nDists
53 bins = sigFactory.GetBins()
54 minD,maxD = bins[0][0],bins[-1][1]
55
56 for i in range(nDists):
57 pt0,pt1 = distsToCheck[i]
58 minSeen=maxD
59 for idx1 in match[pt0]:
60 for idx2 in match[pt1]:
61 minSeen=min(minSeen, dMat[idx1,idx2])
62 if minSeen==0 or minSeen<minD: return
63
64 d = int(minSeen)
65
66 if d == 0 or d < minD or d >= maxD:
67 return
68 dist[i] = d
69
70 idx = sigFactory.GetBitIdx(featureSet,dist,sortIndices=False)
71 if _verbose:
72 print('\t',dist,minD,maxD,idx)
73
74 if sigFactory.useCounts:
75 sig[idx] = sig[idx]+1
76 else:
77 sig.SetBit(idx)
78 return idx
79
80
82 """ generates a 2D fingerprint for a molecule using the
83 parameters in _sig_
84
85 **Arguments**
86
87 - mol: the molecule for which the signature should be generated
88
89 - sigFactory : the SigFactory object with signature parameters
90 NOTE: no preprocessing is carried out for _sigFactory_.
91 It *must* be pre-initialized.
92
93 - perms: (optional) a sequence of permutation indices limiting which
94 pharmacophore combinations are allowed
95
96 - dMat: (optional) the distance matrix to be used
97
98 - bitInfo: (optional) used to return the atoms involved in the bits
99
100 """
101 if not isinstance(sigFactory,SigFactory.SigFactory):
102 raise ValueError('bad factory')
103 featFamilies=sigFactory.GetFeatFamilies()
104 if _verbose:
105 print('* feat famillies:',featFamilies)
106 nFeats = len(featFamilies)
107 minCount = sigFactory.minPointCount
108 maxCount = sigFactory.maxPointCount
109 if maxCount>3:
110 logger.warning(' Pharmacophores with more than 3 points are not currently supported.\nSetting maxCount to 3.')
111 maxCount=3
112
113
114 if dMat is None:
115 from rdkit import Chem
116 useBO = sigFactory.includeBondOrder
117 dMat = Chem.GetDistanceMatrix(mol,useBO)
118
119
120 if perms is None:
121 perms = []
122 for count in range(minCount,maxCount+1):
123 perms += Utils.GetIndexCombinations(nFeats,count)
124
125
126 featMatches = sigFactory.GetMolFeats(mol)
127 if _verbose:
128 print(' featMatches:',featMatches)
129
130 sig = sigFactory.GetSignature()
131 for perm in perms:
132
133
134 featClasses=[0]
135 for i in range(1,len(perm)):
136 if perm[i]==perm[i-1]:
137 featClasses.append(featClasses[-1])
138 else:
139 featClasses.append(featClasses[-1]+1)
140
141
142
143 matchPerms = [featMatches[x] for x in perm]
144 if _verbose:
145 print('\n->Perm: %s'%(str(perm)))
146 print(' matchPerms: %s'%(str(matchPerms)))
147
148
149 matchesToMap=Utils.GetUniqueCombinations(matchPerms,featClasses)
150 for i,entry in enumerate(matchesToMap):
151 entry = [x[1] for x in entry]
152 matchesToMap[i]=entry
153 if _verbose:
154 print(' mtM:',matchesToMap)
155
156 for match in matchesToMap:
157 if sigFactory.shortestPathsOnly:
158 idx=_ShortestPathsMatch(match,perm,sig,dMat,sigFactory)
159 if idx is not None and bitInfo is not None:
160 l = bitInfo.get(idx,[])
161 l.append(match)
162 bitInfo[idx] = l
163 return sig
164