1
+ # -*- coding: utf-8 -*-
2
+ # @Author: WuLC
3
+ # @Date: 2016-04-12 15:53:02
4
+ # @Last modified by: WuLC
5
+ # @Last Modified time: 2016-04-12 19:42:16
6
+
7
+ # @Function: implementation of User-based collaborative filetering
8
+ # @Referer: chaper 2 of the book 《programming-collective-intelligence》
9
+
10
+
11
+ # sample data for test
12
+ critics = {'Lisa Rose' : {'Lady in the Water' : 2.5 , 'Snakes on a Plane' : 3.5 ,
13
+ 'Just My Luck' : 3.0 , 'Superman Returns' : 3.5 , 'You, Me and Dupree' : 2.5 ,
14
+ 'The Night Listener' : 3.0 },
15
+ 'Gene Seymour' : {'Lady in the Water' : 3.0 , 'Snakes on a Plane' : 3.5 ,
16
+ 'Just My Luck' : 1.5 , 'Superman Returns' : 5.0 , 'The Night Listener' : 3.0 ,
17
+ 'You, Me and Dupree' : 3.5 },
18
+ 'Michael Phillips' : {'Lady in the Water' : 2.5 , 'Snakes on a Plane' : 3.0 ,
19
+ 'Superman Returns' : 3.5 , 'The Night Listener' : 4.0 },
20
+ 'Claudia Puig' : {'Snakes on a Plane' : 3.5 , 'Just My Luck' : 3.0 ,
21
+ 'The Night Listener' : 4.5 , 'Superman Returns' : 4.0 ,
22
+ 'You, Me and Dupree' : 2.5 },
23
+ 'Mick LaSalle' : {'Lady in the Water' : 3.0 , 'Snakes on a Plane' : 4.0 ,
24
+ 'Just My Luck' : 2.0 , 'Superman Returns' : 3.0 , 'The Night Listener' : 3.0 ,
25
+ 'You, Me and Dupree' : 2.0 },
26
+ 'Jack Matthews' : {'Lady in the Water' : 3.0 , 'Snakes on a Plane' : 4.0 ,
27
+ 'The Night Listener' : 3.0 , 'Superman Returns' : 5.0 , 'You, Me and Dupree' : 3.5 },
28
+ 'Toby' : {'Snakes on a Plane' : 4.5 , 'You, Me and Dupree' : 1.0 ,'Superman Returns' : 4.0 }}
29
+
30
+ import math
31
+
32
+ # four different methonds to caculate users' similarity
33
+ def user_similarity_on_euclidean (scores ,user1 ,user2 ):
34
+ """caculate similarity of two users based on Euclidean Distance
35
+
36
+ Args:
37
+ scores (dict{dict{}}): group of users' scores on some movies
38
+ user1 (str): one of the user
39
+ user2 (str): the other user
40
+
41
+ Returns:
42
+ float: reciprocal of Euclidean Distance(range(0,1)) between user1 and user2, the bigger the more similar
43
+ """
44
+ commom = [movie for movie in scores [user1 ] if movie in scores [user2 ]]
45
+ if len (commom ) == 0 : #no common item of the two users
46
+ return 0
47
+ total = sum ([math .pow (scores [user1 ][movie ] - scores [user2 ][movie ], 2 )
48
+ for movie in commom ])
49
+ similarity = math .sqrt (total )
50
+ return 1 / (total + 1 )
51
+
52
+
53
+ def user_similarity_on_cosine (scores ,user1 ,user2 ):
54
+ """caculate similarity of two users based on cosine similarity
55
+
56
+ Args:
57
+ scores (dict{dict{}}): group of users' scores on some movies
58
+ user1 (str): one of the user
59
+ user2 (str): the other user
60
+
61
+ Returns:
62
+ float: cosine similarity(range(-1,1)) between user1 and user2, the bigger the more similar
63
+ """
64
+ commom = [movie for movie in scores [user1 ] if movie in scores [user2 ]]
65
+ if len (commom ) == 0 : #no common item of the two users
66
+ return 0
67
+
68
+ pow_sum_1 = sum ([math .pow (scores [user1 ][movie ], 2 ) for movie in commom ])
69
+ pow_sum_2 = sum ([math .pow (scores [user2 ][movie ], 2 ) for movie in commom ])
70
+ multiply_sum = sum ([scores [user1 ][movie ] * scores [user2 ][movie ] for movie in commom ])
71
+ if pow_sum_1 == 0 or pow_sum_2 == 0 :
72
+ return 0
73
+ else :
74
+ similarity = multiply_sum / math .sqrt (pow_sum_2 * pow_sum_1 )
75
+ return similarity
76
+
77
+
78
+ def user_similarity_on_modified_cosine (scores , user1 , user2 ):
79
+ """caculate similarity of two users based on modified cosine similarity
80
+
81
+ Args:
82
+ scores (dict{dict{}}): group of users' scores on some movies
83
+ user1 (str): one of the user
84
+ user2 (str): the other user
85
+
86
+ Returns:
87
+ float: modified cosine similarity(range(-1,1)) between user1 and user2, the bigger the more similar
88
+ """
89
+ commom = [movie for movie in scores [user1 ] if movie in scores [user2 ]]
90
+ if len (commom ) == 0 : #no common item of the two users
91
+ return 0
92
+ average1 = float (sum (scores [user1 ][movie ] for movie in scores [user1 ]))/ len (scores [user1 ])
93
+ average2 = float (sum (scores [user2 ][movie ] for movie in scores [user2 ]))/ len (scores [user2 ])
94
+ # denominator
95
+ multiply_sum = sum ( (scores [user1 ][movie ]- average1 ) * (scores [user2 ][movie ]- average2 ) for movie in commom )
96
+ # member
97
+ pow_sum_1 = sum ( math .pow (scores [user1 ][movie ]- average1 , 2 ) for movie in scores [user1 ] )
98
+ pow_sum_2 = sum ( math .pow (scores [user2 ][movie ]- average2 , 2 ) for movie in scores [user2 ] )
99
+
100
+ modified_cosine_similarity = float (multiply_sum )/ math .sqrt (pow_sum_1 * pow_sum_2 )
101
+ return modified_cosine_similarity
102
+
103
+
104
+ def user_similarity_on_pearson (scores , user1 , user2 ):
105
+ """caculate similarity of two users based on Pearson Correlation Coefficient
106
+
107
+ Args:
108
+ scores (dict{dict{}}): group of users' scores on some movies
109
+ user1 (str): one of the user
110
+ user2 (str): the other user
111
+
112
+ Returns:
113
+ float: Pearson Correlation Coefficient(range(-1,1)) between user1 and user2, the bigger the more similar
114
+ """
115
+ commom = [movie for movie in scores [user1 ] if movie in scores [user2 ]]
116
+ if len (commom ) == 0 : #no common item of the two users
117
+ return 0
118
+ average1 = float (sum (scores [user1 ][movie ] for movie in scores [user1 ]))/ len (scores [user1 ])
119
+ average2 = float (sum (scores [user2 ][movie ] for movie in scores [user2 ]))/ len (scores [user2 ])
120
+ # denominator
121
+ multiply_sum = sum ( (scores [user1 ][movie ]- average1 ) * (scores [user2 ][movie ]- average2 ) for movie in commom )
122
+ # member
123
+ pow_sum_1 = sum ( math .pow (scores [user1 ][movie ]- average1 , 2 ) for movie in commom )
124
+ pow_sum_2 = sum ( math .pow (scores [user2 ][movie ]- average2 , 2 ) for movie in commom )
125
+
126
+ modified_cosine_similarity = float (multiply_sum )/ math .sqrt (pow_sum_1 * pow_sum_2 )
127
+ return modified_cosine_similarity
128
+
129
+
130
+ def find_similar_users (scores ,user ,similar_function = user_similarity_on_cosine ):
131
+ """find similar users based on the similar-function defined above
132
+
133
+ Args:
134
+ scores (dict{dict{}}): group of users' scores on some movies
135
+ user (str): certain user
136
+ similar_function (function): certain similar-function defined above
137
+
138
+ Returns:
139
+ list[tuple]: list of users similar to the given user with their score
140
+ """
141
+ similar_users = [(similar_function (critics , user , otherUser ), otherUser ) for otherUser in scores if otherUser != user ]
142
+ similar_users .sort () # sort the users in terms of theri similarity score
143
+ similar_users .reverse ()
144
+ # the above two lines are equal to : similar_users.sort(reverse = True)
145
+ return similar_users
146
+
147
+
148
+ def recommend_item (scores ,user ):
149
+ """recommend items to user in terms of scores ,
150
+ scores are caculated from similar users to the given user
151
+
152
+ Args:
153
+ scores (dict{dict{}}): group of users' scores on some movies
154
+ user (str): certain user
155
+
156
+ Returns:
157
+ list[tuple]: recommend items sorted in terms of their score
158
+ """
159
+ similar_users = find_similar_users (scores , user )
160
+ swap_similar_users = {v :k for k , v in similar_users } # 交换键值,将存储kv对的列表转换为字典,交换后为无序
161
+ all_movies = []
162
+ for (k ,v ) in critics .items ():
163
+ for movie in v :
164
+ if movie not in all_movies :
165
+ all_movies .append (movie )
166
+ item_score = []
167
+ for movie in all_movies :
168
+ score_sum = 0
169
+ similarity_sum = 0
170
+ for similarity , otherUser in similar_users :
171
+ if critics [otherUser ].has_key (movie ):
172
+ score_sum += critics [otherUser ][movie ] * similarity
173
+ similarity_sum += swap_similar_users [otherUser ]
174
+ item_score .append ((score_sum / similarity_sum , movie ))
175
+
176
+ item_score .sort (reverse = True )
177
+ return item_score
178
+
179
+
180
+ if __name__ == '__main__' :
181
+ '''
182
+ similarList = find_similar_users(critics, 'Lisa Rose')
183
+ for i in similarList:
184
+ print i
185
+ '''
186
+ item_score = recommend_item (critics ,'Lisa Rose' )
187
+ for i ,j in item_score :
188
+ print i ,j
189
+
0 commit comments