-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecsys_evl.py
137 lines (109 loc) · 3.94 KB
/
recsys_evl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
'''This module implements some common indexes used to evaluate the effect of recommender algorithm.
'''
import math
# RMSE:root-mean-square error
def rmse(results):
'''rmse(list) -> float
Get root-mean-square error from input list of the recommendation results.
type of results : list.
results[i] : [uid, iid, rscore, pscore].
'''
return math.sqrt(\
sum([pow(rscore-pscore, 2) for uid, iid, rscore, prcore in results])\
/ float(len(results)))
# MAE:mean-absolute error
def mae(results):
'''mae(list) -> float
Get mean-absolute error from input list of recommendation relults.
type of results : list.
results[i] : [uid, iid, rscore, pscore].
'''
return sum([abs(rscore-pscore) for uid, iid, rscore, pscore in results])\
/ float(len(results))
# Evaluate the result using precision & recall in (Top-N & 0-1) recommendation
def get_pecision_recall(train, test, func, sim_matrix):
'''get_precision_recall(dict, dict, func) -> float, float
Get the precision and recall from input of recommend results compared to the test dataset.
'''
hit = 0
train_num = 0
test_num = 0
for u in test.iterkeys():
if u in train.iterkeys():
tr = train[u]
te = test[u]
else:
continue
hitTrain = set(train[u].itervalues())
hitTest = set(test[u].itervalues())
hit += len(hitTrain & hitTest)
train_num += len(tr)
test_num += len(te)
return hit / float(train_num), hit / float(test_num)
# Evaluate the coverage percentage of the recommend system
def get_coverage(result, items):
'''get_coverage(dict, list) -> string
This will compute the coverage of the result.
The coverage really tells the ability of discovering the long tail item.
'''
result_iid = set()
for iid in result.itervalues():
for i in iid:
result_iid.add(i)
return 'coverage: %f' % (len(items) / len(result_iid))
# Get the popularity of the items
def get_popularity(result, items):
'''get_popularity(dict, list) -> dict
This is will return the popularity of the items of the result.
'''
popularity = dict()
len_of_result = 0
for iid in result.itervalues():
for i in iid:
popularity.setdefault(i, 0)
popularity[i] += 1
len_of_result += 1
for iid in items:
if iid not in popularity:
popularity.setdefault(iid, 0)
else:
popularity[iid] /= float(len_of_result)
return popularity
# Get the recall of the recommendation
def get_recall(train, test, func, sim_matrix):
'''get_recall(dict, dict, func, dict) -> float
Return the recall of the recommendation compared to the test data.
sim_matrix: the similarity matrix comes from the train set.
'''
hit = 0
num = 0
for u in train.iterkeys():
if u in test.keys():
tu = test[u]
else:
continue
result = func(u, train, sim_matrix)
for item, pui in result.iteritems():
if item in tu:
hit += 1
num += len(tu)
return 'Recall: %f' % (hit / float(num))
# Get the precision of the recommedation
def get_precision(train, test, func, sim_matrix):
'''get_precision(dict, dict, func, dict) -> float
Return the precision of the recommendation compared to the test data.
sim_matrix: the similarity matrix comes from the train set.
'''
hit = 0
num = 0
for u in test.iterkeys():
if u in train.keys():
tu = train[u]
else:
continue
result = func(u, train, sim_matrix)
for item, pui in result.iteritems():
if item in test[u]:
hit += 1
num += len(tu)
return 'Precision: %f' % (hit / float(num))