Skip to content

Commit bdb04d6

Browse files
committed
Add DecisionTree and CollaborativeFiltering
1 parent 58542fd commit bdb04d6

File tree

3 files changed

+456
-10
lines changed

3 files changed

+456
-10
lines changed

python/CollaborativeFiltering.py

+189
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
# -*- coding: utf-8 -*-
2+
# @Author: WuLC
3+
# @Date: 2016-04-12 15:53:02
4+
# @Last modified by: WuLC
5+
# @Last Modified time: 2016-04-12 19:42:16
6+
7+
# @Function: implementation of User-based collaborative filetering
8+
# @Referer: chaper 2 of the book 《programming-collective-intelligence》
9+
10+
11+
# sample data for test
12+
critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
13+
'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
14+
'The Night Listener': 3.0},
15+
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
16+
'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
17+
'You, Me and Dupree': 3.5},
18+
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
19+
'Superman Returns': 3.5, 'The Night Listener': 4.0},
20+
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
21+
'The Night Listener': 4.5, 'Superman Returns': 4.0,
22+
'You, Me and Dupree': 2.5},
23+
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
24+
'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
25+
'You, Me and Dupree': 2.0},
26+
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
27+
'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
28+
'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0,'Superman Returns': 4.0}}
29+
30+
import math
31+
32+
# four different methonds to caculate users' similarity
33+
def user_similarity_on_euclidean(scores,user1,user2):
34+
"""caculate similarity of two users based on Euclidean Distance
35+
36+
Args:
37+
scores (dict{dict{}}): group of users' scores on some movies
38+
user1 (str): one of the user
39+
user2 (str): the other user
40+
41+
Returns:
42+
float: reciprocal of Euclidean Distance(range(0,1)) between user1 and user2, the bigger the more similar
43+
"""
44+
commom = [movie for movie in scores[user1] if movie in scores[user2]]
45+
if len(commom) == 0: #no common item of the two users
46+
return 0
47+
total = sum([math.pow(scores[user1][movie] - scores[user2][movie], 2)
48+
for movie in commom])
49+
similarity=math.sqrt(total)
50+
return 1/(total+1)
51+
52+
53+
def user_similarity_on_cosine(scores,user1,user2):
54+
"""caculate similarity of two users based on cosine similarity
55+
56+
Args:
57+
scores (dict{dict{}}): group of users' scores on some movies
58+
user1 (str): one of the user
59+
user2 (str): the other user
60+
61+
Returns:
62+
float: cosine similarity(range(-1,1)) between user1 and user2, the bigger the more similar
63+
"""
64+
commom = [movie for movie in scores[user1] if movie in scores[user2]]
65+
if len(commom) == 0: #no common item of the two users
66+
return 0
67+
68+
pow_sum_1=sum([math.pow(scores[user1][movie], 2) for movie in commom])
69+
pow_sum_2=sum([math.pow(scores[user2][movie], 2) for movie in commom])
70+
multiply_sum=sum([scores[user1][movie] * scores[user2][movie] for movie in commom])
71+
if pow_sum_1 == 0 or pow_sum_2 == 0:
72+
return 0
73+
else:
74+
similarity = multiply_sum/math.sqrt(pow_sum_2*pow_sum_1)
75+
return similarity
76+
77+
78+
def user_similarity_on_modified_cosine(scores, user1, user2):
79+
"""caculate similarity of two users based on modified cosine similarity
80+
81+
Args:
82+
scores (dict{dict{}}): group of users' scores on some movies
83+
user1 (str): one of the user
84+
user2 (str): the other user
85+
86+
Returns:
87+
float: modified cosine similarity(range(-1,1)) between user1 and user2, the bigger the more similar
88+
"""
89+
commom = [movie for movie in scores[user1] if movie in scores[user2]]
90+
if len(commom) == 0: #no common item of the two users
91+
return 0
92+
average1 = float(sum(scores[user1][movie] for movie in scores[user1]))/len(scores[user1])
93+
average2 = float(sum(scores[user2][movie] for movie in scores[user2]))/len(scores[user2])
94+
# denominator
95+
multiply_sum = sum( (scores[user1][movie]-average1) * (scores[user2][movie]-average2) for movie in commom )
96+
# member
97+
pow_sum_1 = sum( math.pow(scores[user1][movie]-average1, 2) for movie in scores[user1] )
98+
pow_sum_2 = sum( math.pow(scores[user2][movie]-average2, 2) for movie in scores[user2] )
99+
100+
modified_cosine_similarity = float(multiply_sum)/math.sqrt(pow_sum_1*pow_sum_2)
101+
return modified_cosine_similarity
102+
103+
104+
def user_similarity_on_pearson(scores, user1, user2):
105+
"""caculate similarity of two users based on Pearson Correlation Coefficient
106+
107+
Args:
108+
scores (dict{dict{}}): group of users' scores on some movies
109+
user1 (str): one of the user
110+
user2 (str): the other user
111+
112+
Returns:
113+
float: Pearson Correlation Coefficient(range(-1,1)) between user1 and user2, the bigger the more similar
114+
"""
115+
commom = [movie for movie in scores[user1] if movie in scores[user2]]
116+
if len(commom) == 0: #no common item of the two users
117+
return 0
118+
average1 = float(sum(scores[user1][movie] for movie in scores[user1]))/len(scores[user1])
119+
average2 = float(sum(scores[user2][movie] for movie in scores[user2]))/len(scores[user2])
120+
# denominator
121+
multiply_sum = sum( (scores[user1][movie]-average1) * (scores[user2][movie]-average2) for movie in commom )
122+
# member
123+
pow_sum_1 = sum( math.pow(scores[user1][movie]-average1, 2) for movie in commom )
124+
pow_sum_2 = sum( math.pow(scores[user2][movie]-average2, 2) for movie in commom )
125+
126+
modified_cosine_similarity = float(multiply_sum)/math.sqrt(pow_sum_1*pow_sum_2)
127+
return modified_cosine_similarity
128+
129+
130+
def find_similar_users(scores,user,similar_function = user_similarity_on_cosine):
131+
"""find similar users based on the similar-function defined above
132+
133+
Args:
134+
scores (dict{dict{}}): group of users' scores on some movies
135+
user (str): certain user
136+
similar_function (function): certain similar-function defined above
137+
138+
Returns:
139+
list[tuple]: list of users similar to the given user with their score
140+
"""
141+
similar_users = [(similar_function(critics, user, otherUser), otherUser) for otherUser in scores if otherUser!=user]
142+
similar_users.sort() # sort the users in terms of theri similarity score
143+
similar_users.reverse()
144+
# the above two lines are equal to : similar_users.sort(reverse = True)
145+
return similar_users
146+
147+
148+
def recommend_item(scores,user):
149+
"""recommend items to user in terms of scores ,
150+
scores are caculated from similar users to the given user
151+
152+
Args:
153+
scores (dict{dict{}}): group of users' scores on some movies
154+
user (str): certain user
155+
156+
Returns:
157+
list[tuple]: recommend items sorted in terms of their score
158+
"""
159+
similar_users = find_similar_users(scores, user)
160+
swap_similar_users = {v:k for k, v in similar_users} # 交换键值,将存储kv对的列表转换为字典,交换后为无序
161+
all_movies = []
162+
for (k,v) in critics.items():
163+
for movie in v:
164+
if movie not in all_movies:
165+
all_movies.append(movie)
166+
item_score = []
167+
for movie in all_movies:
168+
score_sum = 0
169+
similarity_sum = 0
170+
for similarity, otherUser in similar_users:
171+
if critics[otherUser].has_key(movie):
172+
score_sum += critics[otherUser][movie] * similarity
173+
similarity_sum += swap_similar_users[otherUser]
174+
item_score.append((score_sum/similarity_sum, movie))
175+
176+
item_score.sort(reverse=True)
177+
return item_score
178+
179+
180+
if __name__ == '__main__':
181+
'''
182+
similarList = find_similar_users(critics, 'Lisa Rose')
183+
for i in similarList:
184+
print i
185+
'''
186+
item_score = recommend_item(critics,'Lisa Rose')
187+
for i,j in item_score:
188+
print i,j
189+

0 commit comments

Comments
 (0)