User-based collaborative filtering in python

As a continuation of Implementing item-based collaborative filtering in python-using MovieLens as an example, I tried user-based collaborative filtering.

import numpy as np
import pandas as pd
from  scipy.spatial.distance import cosine
from scipy.stats import pearsonr

df = pd.read_csv('u.data', sep='\t', names=['user_id','item_id', 'rating', 'timestamp'])

shape = (df.max().ix['user_id'], df.max().ix['item_id'])
R = np.zeros(shape) 

for i in df.index:
    row = df.ix[i]
    R[row['user_id'] -1 , row['item_id'] - 1] = row['rating']


class CollaborativeFiltering:
    def fit(self, rating_matrix):
        u_count = rating_matrix.shape[0]

        boolean_matrix = (rating_matrix > 0) * 1
        mean_ratings = [self.evaluated_mean(rating_matrix[i,:]) for i in range(u_count)] # of users

        self.rating_matrix = rating_matrix
        self.rating_matrix_mean =  (boolean_matrix.T * mean_ratings).T
        
    def predict(self, x):
        sims = self.user_similarities(x)
        
        scores = sims.dot(self.rating_matrix - self.rating_matrix_mean)
        norms = np.absolute(sims).dot((self.rating_matrix > 0) * 1)
        x_mean = self.evaluated_mean(x)
        
        p = scores / norms +  x_mean
        
        for i in range(p.size):
            if np.isnan(p[i]):
                p[i] = 0.0
        
        return p
            
    
    def evaluated_mean(self, v):
            ev = v[v > 0]
            if ev.size > 0:
                return np.mean(ev)
            else:
                return 0.
    
    def user_similarities(self, x):
        # n: user counts
        n = self.rating_matrix.shape[0]
        return np.array([similarity(x, self.rating_matrix[i]) for i in range(n)])

    def similarity(self, v1, v2):
        # index of items that non-zero
        idx = np.logical_and(v1 != 0, v2 != 0)

        v1_non_zero = v1[idx]
        v2_non_zero = v2[idx]

        sim = 0.0
        if v1_non_zero.size > 0:
            coef, _ = pearsonr(v1_non_zero, v2_non_zero)
            if not np.isnan(coef):
                sim = coef

        return sim

cf = CollaborativeFiltering()
cf.fit(R)

reference

Algorithm of recommender system

Recommended Posts

User-based collaborative filtering in python
Notes for implementing simple collaborative filtering in Python
Quadtree in Python --2
Python in optimization
CURL in python
Metaprogramming in Python
Python 3.3 in Anaconda
Geocoding in python
SendKeys in Python
Meta-analysis in Python
Unittest in python
Epoch in Python
Discord in Python
Sudoku in Python
DCI in Python
quicksort in python
nCr in python
N-Gram in Python
Programming in python
Plink in Python
Constant in python
Lifegame in Python.
FizzBuzz in Python
Sqlite in python
StepAIC in Python
N-gram in python
LINE-Bot [0] in Python
Csv in python
Disassemble in Python
Reflection in Python
Constant in python
nCr in Python.
format in python
Scons in Python3
Puyo Puyo in python
python in virtualenv
PPAP in Python
Quad-tree in Python
Reflection in Python
Chemistry in Python
Hashable in python
DirectLiNGAM in Python
LiNGAM in Python
Flatten in python
flatten in python
Sorted list in Python
Daily AtCoder # 36 in Python
Clustering text in Python
Daily AtCoder # 2 in Python
Implement Enigma in python
Daily AtCoder # 32 in Python
Daily AtCoder # 6 in Python
Daily AtCoder # 18 in Python
Edit fonts in Python
Singleton pattern in Python
File operations in Python
Read DXF in python
Daily AtCoder # 53 in Python
Key input in Python
Use config.ini in Python
Daily AtCoder # 33 in Python