1
2
3
4
5
6
7
8
9
10
11 """ Contains an implementation of Atom-pair fingerprints, as
12 described in:
13
14 R.E. Carhart, D.H. Smith, R. Venkataraghavan;
15 "Atom Pairs as Molecular Features in Structure-Activity Studies:
16 Definition and Applications" JCICS 25, 64-73 (1985).
17
18 """
19 from rdkit.DataStructs import IntSparseIntVect
20 from rdkit import Chem
21 from rdkit.Chem import rdMolDescriptors
22 from rdkit.Chem.AtomPairs import Utils
23 from rdkit import DataStructs
24
25 from rdkit.Chem.rdMolDescriptors import GetAtomPairFingerprint,GetHashedAtomPairFingerprint
26 GetAtomPairFingerprintAsIntVect=rdMolDescriptors.GetAtomPairFingerprint
27
28 numPathBits=rdMolDescriptors.AtomPairsParameters.numPathBits
29 _maxPathLen=(1<<numPathBits)-1
30 numFpBits=numPathBits+2*rdMolDescriptors.AtomPairsParameters.codeSize
31 fpLen=1<<numFpBits
32
34 """ Returns a score for an individual atom pair.
35
36 >>> m = Chem.MolFromSmiles('CCCCC')
37 >>> c1 = Utils.GetAtomCode(m.GetAtomWithIdx(0))
38 >>> c2 = Utils.GetAtomCode(m.GetAtomWithIdx(1))
39 >>> c3 = Utils.GetAtomCode(m.GetAtomWithIdx(2))
40 >>> t = 1 | min(c1,c2)<<numPathBits | max(c1,c2)<<(rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits)
41 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1)==t
42 1
43 >>> pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(0),1)==t
44 1
45 >>> t = 2 | min(c1,c3)<<numPathBits | max(c1,c3)<<(rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits)
46 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2)==t
47 1
48 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2,
49 ... atomCodes=(Utils.GetAtomCode(m.GetAtomWithIdx(0)),Utils.GetAtomCode(m.GetAtomWithIdx(2))))==t
50 1
51
52 """
53 if not atomCodes:
54 code1 = Utils.GetAtomCode(at1)
55 code2 = Utils.GetAtomCode(at2)
56 else:
57 code1,code2=atomCodes
58 accum = int(dist) % _maxPathLen
59 accum |= min(code1,code2) << numPathBits
60 accum |= max(code1,code2) << (rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits)
61 return accum
62
64 """
65 >>> m = Chem.MolFromSmiles('C=CC')
66 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1)
67 >>> ExplainPairScore(score)
68 (('C', 1, 1), 1, ('C', 2, 1))
69 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2)
70 >>> ExplainPairScore(score)
71 (('C', 1, 0), 2, ('C', 1, 1))
72 >>> score = pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(2),1)
73 >>> ExplainPairScore(score)
74 (('C', 1, 0), 1, ('C', 2, 1))
75 >>> score = pyScorePair(m.GetAtomWithIdx(2),m.GetAtomWithIdx(1),1)
76 >>> ExplainPairScore(score)
77 (('C', 1, 0), 1, ('C', 2, 1))
78
79 """
80 codeMask = (1<<rdMolDescriptors.AtomPairsParameters.codeSize)-1
81 pathMask = (1<<numPathBits)-1
82 dist = score&pathMask
83
84 score = score>>numPathBits
85 code1 = score&codeMask
86 score = score>>rdMolDescriptors.AtomPairsParameters.codeSize
87 code2 = score&codeMask
88
89 res = Utils.ExplainAtomCode(code1),dist,Utils.ExplainAtomCode(code2)
90 return res
91
93 """ Returns the Atom-pair fingerprint for a molecule as
94 a SparseBitVect. Note that this doesn't match the standard
95 definition of atom pairs, which uses counts of the
96 pairs, not just their presence.
97
98 **Arguments**:
99
100 - mol: a molecule
101
102 **Returns**: a SparseBitVect
103
104 >>> m = Chem.MolFromSmiles('CCC')
105 >>> v = [ pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1),
106 ... pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2),
107 ... ]
108 >>> v.sort()
109 >>> fp = GetAtomPairFingerprintAsBitVect(m)
110 >>> list(fp.GetOnBits())==v
111 True
112
113 """
114 res = DataStructs.SparseBitVect(fpLen)
115 fp = rdMolDescriptors.GetAtomPairFingerprint(mol)
116 for val in fp.GetNonzeroElements().keys():
117 res.SetBit(val)
118 return res
119
120
121
122
123
125 import doctest,sys
126 return doctest.testmod(sys.modules["__main__"])
127
128
129 if __name__ == '__main__':
130 import sys
131 failed,tried = _test()
132 sys.exit(failed)
133