Source code for zero.sgd2
import numpy as np
from scipy.sparse import coo_matrix, hstack, diags
import time
from zero.recommendation_algorithm import (RecommendationAlgorithm,
register_algorithm)
[docs]def onehotize(col, depth):
nb_events = len(col)
rows = list(range(nb_events))
return coo_matrix(([1] * nb_events, (rows, col)), shape=(nb_events, depth))
[docs]@register_algorithm('sgd2')
class MangakiSGD2(RecommendationAlgorithm):
def __init__(self, nb_components=20, nb_iterations=10,
gamma=0.01, lambda_=0.1, batches=400,
*args, **kwargs):
super().__init__(*args, **kwargs)
self.nb_components = nb_components
self.nb_iterations = nb_iterations
self.gamma = gamma
self.lambda_ = lambda_
self.batches = batches
[docs] def fit(self, X, y):
N = self.nb_users + self.nb_works
self.w = np.random.random(N)
self.V = np.random.random((N, self.nb_components))
X_users = onehotize(X[:, 0], self.nb_users)
X_works = onehotize(X[:, 1], self.nb_works)
X_fm = hstack([X_users, X_works]).tocsr()
batch_size = max(1, len(X) // self.batches)
for epoch in range(self.nb_iterations):
step = 0
dt = time.time()
batch = np.random.permutation(len(X))
for i in range(self.batches):
X_batch = X_fm[batch[i * batch_size:(i + 1) * batch_size]]
X_bT = X_batch.T.tocsr()
y_batch = y[batch[i * batch_size:(i + 1) * batch_size]]
pred_batch = self.predict_fm(X_batch)
error_batch = pred_batch - y_batch
error_feat = X_bT.dot(error_batch)
w_grad = error_feat / batch_size + self.lambda_ * self.w
V_grad = ((1 / batch_size + self.lambda_) *
(X_bT @ diags(error_batch) @ X_bT.T -
diags(error_feat))) @ self.V
self.w -= self.gamma * w_grad
self.V -= self.gamma * V_grad
step += 1
print('elapsed', time.time() - dt)
self.compute_metrics()
[docs] def fit_single_user(self, rated_works, ratings):
pass
[docs] def predict(self, X):
X_users = onehotize(X[:, 0], self.nb_users)
X_works = onehotize(X[:, 1], self.nb_works)
X_fm = hstack([X_users, X_works]).tocsr()
return self.predict_fm(X_fm)
[docs] def predict_fm(self, X):
return X @ self.w + 1/2 * (np.sum((X @ self.V) ** 2 -
X @ (self.V ** 2), axis=1))
[docs] def predict_single_user(self, work_ids, user_parameters):
pass
@property
def is_serializable(self):
return False # Not yet, but easy to do
def __str__(self):
return '[SGD2] NB_COMPONENTS = %d' % self.nb_components
[docs] def get_shortname(self):
return 'sgd2-%d' % self.nb_components