As a continuation of Implementing item-based collaborative filtering in python-using MovieLens as an example, I tried user-based collaborative filtering.
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine
from scipy.stats import pearsonr
df = pd.read_csv('u.data', sep='\t', names=['user_id','item_id', 'rating', 'timestamp'])
shape = (df.max().ix['user_id'], df.max().ix['item_id'])
R = np.zeros(shape)
for i in df.index:
row = df.ix[i]
R[row['user_id'] -1 , row['item_id'] - 1] = row['rating']
class CollaborativeFiltering:
def fit(self, rating_matrix):
u_count = rating_matrix.shape[0]
boolean_matrix = (rating_matrix > 0) * 1
mean_ratings = [self.evaluated_mean(rating_matrix[i,:]) for i in range(u_count)] # of users
self.rating_matrix = rating_matrix
self.rating_matrix_mean = (boolean_matrix.T * mean_ratings).T
def predict(self, x):
sims = self.user_similarities(x)
scores = sims.dot(self.rating_matrix - self.rating_matrix_mean)
norms = np.absolute(sims).dot((self.rating_matrix > 0) * 1)
x_mean = self.evaluated_mean(x)
p = scores / norms + x_mean
for i in range(p.size):
if np.isnan(p[i]):
p[i] = 0.0
return p
def evaluated_mean(self, v):
ev = v[v > 0]
if ev.size > 0:
return np.mean(ev)
else:
return 0.
def user_similarities(self, x):
# n: user counts
n = self.rating_matrix.shape[0]
return np.array([similarity(x, self.rating_matrix[i]) for i in range(n)])
def similarity(self, v1, v2):
# index of items that non-zero
idx = np.logical_and(v1 != 0, v2 != 0)
v1_non_zero = v1[idx]
v2_non_zero = v2[idx]
sim = 0.0
if v1_non_zero.size > 0:
coef, _ = pearsonr(v1_non_zero, v2_non_zero)
if not np.isnan(coef):
sim = coef
return sim
cf = CollaborativeFiltering()
cf.fit(R)
Algorithm of recommender system
Recommended Posts