-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmap5.py
77 lines (67 loc) · 2.37 KB
/
map5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from __future__ import division
import numpy as np
import pandas as pd
target_users = pd.read_csv("target_users.csv", delimiter='\t')
users = target_users['user_id'].values
training_data = pd.read_csv('training_data.csv',delimiter='\t',index_col=0)
test_data = pd.read_csv('test_data.csv',delimiter=',')
actual = [0] * 10000
for u in range(10000):
actual[u] = map(int,test_data[test_data['user_id']==users[u]]['recommended_items'].values[0].split(' '))
def apk(actual, predicted, k):
"""
Computes the average precision at k.
This function computes the average prescision at k between two lists of
items.
Parameters
----------
actual : list
A list of elements that are to be predicted (order doesn't matter)
predicted : list
A list of predicted elements (order does matter)
k : int, optional
The maximum number of predicted elements
Returns
-------
score : double
The average precision at k over the input lists
"""
if len(predicted)>k:
predicted = predicted[:k]
score = 0.0
num_hits = 0.0
for i,p in enumerate(predicted):
if p in actual and p not in predicted[:i]:
num_hits += 1.0
score += num_hits / (i+1.0)
if not actual:
return 0.0
return score / min(len(actual), k)
def mapk(actual, predicted, k):
"""
Computes the mean average precision at k.
This function computes the mean average prescision at k between two lists
of lists of items.
Parameters
----------
actual : list
A list of lists of elements that are to be predicted
(order doesn't matter in the lists)
predicted : list
A list of lists of predicted elements
(order matters in the lists)
k : int, optional
The maximum number of predicted elements
Returns
-------
score : double
The mean average precision at k over the input lists
"""
return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])
def evaluate_submission(file):
submission = pd.read_csv(file,delimiter=',')
predicted = [0] * 10000
for u in range(10000):
predicted[u] = map(int, submission[submission['user_id'] == users[u]]['recommended_items'].values[0].split(' '))
print("Submission: " + file + " - MAP@5: " + str(mapk(actual,predicted,5)))
return