1 """
2 $Id$
3
4 Scoring - Calculate rank statistics
5
6 Created by Sereina Riniker, October 2012
7 after a file from Peter Gedeck, Greg Landrum
8
9 """
10
11 import math, exceptions
12
13 """
14 \param scores: ordered list with descending similarity containing
15 active/inactive information
16 \param col: column index in scores where active/inactive information is stored
17 \param fractions: list of fractions at which the value shall be calculated
18 \param alpha: exponential weight
19 """
20
22 """ Determines a ROC curve """
23 numMol = len(scores)
24 if numMol == 0: raise ValueError('score list is empty')
25 TPR = [0]*numMol
26 TNR = [0]*numMol
27 numActives = 0
28 numInactives = 0
29
30
31 for i in range(numMol):
32 if scores[i][col]:
33 numActives += 1
34 else:
35 numInactives +=1
36 TPR[i] = numActives
37 TNR[i] = numInactives
38
39
40 if numActives > 0:
41 TPR = [1.0*i / numActives for i in TPR]
42 if numInactives > 0:
43 TNR = [1.0*i / numInactives for i in TNR]
44
45 return [TNR, TPR]
46
48 """ Determines the area under the ROC curve """
49
50 roc = CalcROC(scores, col)
51 TNR = roc[0]
52 TPR = roc[1]
53
54 numMol = len(scores)
55 AUC = 0
56
57
58 for i in range(0, numMol-1):
59 AUC += (TNR[i+1]-TNR[i]) * (TPR[i+1]+TPR[i])
60
61 return 0.5*AUC
62
64 numMol = len(scores)
65 alpha = float(alpha)
66 if numMol == 0: raise ValueError('score list is empty')
67 if alpha <= 0.0: raise ValueError('alpha must be greater than zero')
68
69 denom = 1.0/numMol * ((1-math.exp(-alpha)) / (math.exp(alpha/numMol) -1))
70 numActives = 0
71 sum_exp = 0
72
73
74 for i in range(numMol):
75 active = scores[i][col]
76 if active:
77 numActives += 1
78 sum_exp += math.exp(-(alpha*(i+1)) / numMol)
79
80 if numActives > 0:
81 RIE = sum_exp / (numActives * denom)
82 else:
83 RIE = 0.0
84
85 return RIE, numActives
86
88 """ RIE original definded here:
89 Sheridan, R.P., Singh, S.B., Fluder, E.M. & Kearsley, S.K.
90 Protocols for Bridging the Peptide to Nonpeptide Gap in Topological Similarity Searches.
91 J. Chem. Inf. Comp. Sci. 41, 1395-1406 (2001).
92 """
93 RIE, numActives = _RIEHelper(scores, col, alpha)
94 return RIE
95
97 """ BEDROC original defined here:
98 Truchon, J. & Bayly, C.I.
99 Evaluating Virtual Screening Methods: Good and Bad Metric for the "Early Recognition"
100 Problem. J. Chem. Inf. Model. 47, 488-508 (2007).
101 """
102
103 RIE, numActives = _RIEHelper(scores, col, alpha)
104
105 if numActives > 0:
106 numMol = len(scores)
107 ratio = 1.0*numActives / numMol
108 RIEmax = (1-math.exp(-alpha*ratio)) / (ratio*(1-math.exp(-alpha)))
109 RIEmin = (1-math.exp(alpha*ratio)) / (ratio*(1-math.exp(alpha)))
110
111 if RIEmax != RIEmin:
112 BEDROC = (RIE - RIEmin) / (RIEmax - RIEmin)
113 else:
114 BEDROC = 1.0
115 else:
116 BEDROC = 0.0
117
118 return BEDROC
119
121 """ Determines the enrichment factor for a set of fractions """
122 numMol = len(scores)
123 if numMol == 0: raise ValueError('score list is empty')
124 if len(fractions) == 0: raise ValueError('fraction list is empty')
125 for i in fractions:
126 if i > 1 or i < 0: raise ValueError('fractions must be between [0,1]')
127
128 numPerFrac = [math.ceil(numMol*f) for f in fractions]
129 numPerFrac.append(numMol)
130 numActives = 0
131 enrich = []
132
133
134 for i in range(numMol):
135 if i > (numPerFrac[0]-1) and i > 0:
136 enrich.append(1.0*numActives*numMol / i)
137 numPerFrac.pop(0)
138 active = scores[i][col]
139 if active: numActives += 1
140
141 if numActives > 0:
142 enrich = [e / numActives for e in enrich]
143 else:
144 enrich = [0.0]*len(fractions)
145 return enrich
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176