# 推荐系统之SVD：奇异值分解

A = USVT

## 基于奇异值分解 (SVD) 的电影推荐

``````import numpy as np
import pandas as pd``````

``````data = pd.io.parsers.read_csv('data/ratings.dat', names=['user_id', 'movie_id', 'rating', 'time'],
engine='python', delimiter='::')
movie_data = pd.io.parsers.read_csv('data/movies.dat', names=['movie_id', 'title', 'genre'],
engine='python', delimiter='::')``````

``````ratings_mat = np.ndarray(shape=(np.max(data.movie_id.values), np.max(data.user_id.values)),
dtype=np.uint8)
ratings_mat[data.movie_id.values-1, data.user_id.values-1] = data.rating.values``````

``normalised_mat = ratings_mat - np.asarray([(np.mean(ratings_mat, 1))]).T``

``````A = normalised_mat.T / np.sqrt(ratings_mat.shape[0] - 1)
U, S, V = np.linalg.svd(A)``````

``````def top_cosine_similarity(data, movie_id, top_n=10):
index = movie_id - 1 # Movie id starts from 1 in the dataset
movie_row = data[index, :]
magnitude = np.sqrt(np.einsum('ij, ij -> i', data, data))
similarity = np.dot(movie_row, data.T) / (magnitude[index] * magnitude)
sort_indexes = np.argsort(-similarity)
return sort_indexes[:top_n]``````

``````def print_similar_movies(movie_data, movie_id, top_indexes):
print('Recommendations for {0}: n'.format(
movie_data[movie_data.movie_id == movie_id].title.values[0]))
for id in top_indexes + 1:
print(movie_data[movie_data.movie_id == id].title.values[0])``````

``````k = 50
movie_id = 10 # (getting an id from movies.dat)
top_n = 10
sliced = V.T[:, :k] # representative data
indexes = top_cosine_similarity(sliced, movie_id, top_n)``````

``print_similar_movies(movie_data, movie_id, indexes)``

## Reference:

Aharon, M., Elad, M., & Bruckstein, A. (2006). K-SVD: An algorithm for designing overcomplete dictionaries for sparse representation. IEEE Transactions on signal processing54(11), 4311-4322.

Ba, Q., Li, X., & Bai, Z. (2013, May). Clustering collaborative filtering recommendation system based on SVD algorithm. In 2013 IEEE 4th International Conference on Software Engineering and Service Science (pp. 963-967). IEEE.