python实现基于用户和项目的协同过滤算法
注:行为物品,列为用户,相似度是根据曼哈顿距离计算的。根据用户对物品的评价(1-10分)来计算用户间、物品间的相似度。
A B C D E F
tom 6 7 9 2 4 10
jerry 3 7 2 5 6 9
hank 5 7 0 2 8 5
alex 0 6 8 5 6 9
cary 6 8 4 6 4 6
jack 2 7 2 5 6 9
ben 1 0 0 6 6 0
#encoding=utf-8classRecommendation:def__init__(self):
self.information ={'Tom':{'A':6.0,'B':7.0,'C':9.0,'D':2.0,'E':4.0,'F':10.0},'Jerry':{'A':3.0,'B':7.0,'C':2.0,'D':5.0,'E':6.0,'F':9.0},'Hank':{'A':5.0,'B':7.0,'D':2.0,'E':8.0,'F':5.0},'Alex':{'B':6.0,'C':8.0,'D':5.0,'E':6.0,'F':9.0},'Cary':{'A':6,'B':8,'C':4,'D':6,'E':4,'F':6,},'Jack':{'A':2.0,'B':7.0,'C':4,'D':5.0,'E':6.0,'F':9.0},'Ben':{'A':2.0,'D':6.0,'E':6.0}}def_cal_mh_dis(self, user1, user2):#计算用户1与用户2的曼哈顿距离
goods = self._transform()
distance =0for good in goods:
user1_good_score = self.information[user1].get(good,0)
user2_good_score = self.information[user2].get(good,0)
score_diff = user1_good_score - user2_good_score
if user1_good_score ==0or user2_good_score ==0:
score_diff =0
distance +=abs(score_diff)#返回两个用户对相同物品的评价差值总和return distance
def_similarity(self, user1, user2):#计算用户1和用户2之间的相似度
distance = self._cal_mh_dis(user1, user2)#归一化处理,temp越大,两个用户之间的相似度越低
temp =1/(distance +1)return temp
def_transform(self):"""
将矩阵用户——物品的键和值进行调换为物品——用户
return:键值调换后的dict: {key=good, value={key=user, value=score}}
如下所示:
{'A': {'Tom': 6.0, 'Jerry': 3.0, 'Hank': 5.0, 'Cary': 6, 'Jack': 2.0, 'Ben': 2.0},
'B': {'Tom': 7.0, 'Jerry': 7.0, 'Hank': 7.0, 'Alex': 6.0, 'Cary': 8, 'Jack': 7.0},}
"""
result ={}for user in self.information:for item in self.information[user]:
result.setdefault(item,{})
result[item][user]= self.information[user][item]return result
def_top_matches(self, user, k=2):#寻找与user相似度最高的3个用户
distances ={}for p in self.information.keys():if p != user:
distances[p]= self._similarity(p, user)returnsorted(distances.items(), key=lambda x: x[1], reverse=True)[0:k]defrecommend_by_people(self, user):#基于用户的协同过滤算法#为用户未购买过的物品打分:基于与用户最相似的其他k个用户对当前物品的评价的加权平均;
top_k_users = self._top_matches(user, k=2)
goods = self._transform()#推荐的物品, 及推荐分数; 相似度之和; top_k_users的分数之和
recommend ={}
simi_sum =0
score =0for good in goods:if good notin self.information[user].keys():for i inrange(len(top_k_users)):
current_user = top_k_users[i][0]if good in self.information[current_user].keys():
score += self.information[current_user][good]* top_k_users[i][1]
simi_sum += top_k_users[i][1]
recommend[good]= score / simi_sum
return recommend
def_cal_goods_similarity(self):#计算物品相似度字典#good_simi_dict: {key=物品, value={key=物品, value=相似度}}
goods = self._transform()
users = self.information.keys()#用户列表
simi_good =0
good_simi_dict ={}for i in goods:
inner_dict ={}for j in goods:if i != j:for p in users:if p in goods[i].keys()and p in goods[j].keys():
simi_good +=abs(goods[i][p]- goods[j][p])
simi_good =1/(simi_good +1)
inner_dict[j]= simi_good
good_simi_dict[i]= inner_dict
return good_simi_dict
defrecommend_by_item(self, user):#依据物品进行推荐
good_simi_dict = self._cal_goods_similarity()
goods = self._transform().keys()
recommend ={}for good in goods:
simi =0
score =0#good表示当前用户未购买过的物品if good notin self.information[user].keys():for i in good_simi_dict[good].keys():if i in self.information[user].keys():
score += good_simi_dict[good][i]* self.information[user].get(i,0)
simi += good_simi_dict[good][i]
simi = score / simi
recommend[good]= simi
return recommend
if __name__ =="__main__":
system = Recommendation()#计算两用户间的距离
similarity = system._similarity('Jerry','Jack')print("当前两用户的相似度为:{0}".format(similarity))#和某用户相似度最高的k个用户
top_k_matches = system._top_matches("Tom", k=2)print("与当前用户相似度最高的是:{0}".format(top_k_matches))#基于用户的协同过滤推荐
recommendation = system.recommend_by_people("Ben")print("给当前用户推荐是:{0}".format(recommendation))#基于物品相似度进行推荐print("基于物品相似度推荐物品影:{0}".format(system.recommend_by_item("Ben")))
版权归原作者 回一幻 所有, 如有侵权,请联系我们删除。