知らなかったものを見つける。
Kotoenプロジェクトで協調フィルタを使いたかったので、例を探してたら見つけた次の記事のコードをRubyで書き直してみました。
特異値分解を用いたレコメンデーション - NO!と言えるようになりたい
コードは以下の通り。Pythonからの書き換えはほとんど逐語訳でいけたので、結構すんなり出来あがり。
# from http://d.hatena.ne.jp/ytakano/20081012/1223805723 # this program emplements SVD based recommendation algorithms # # see section 3 of # Bhaskar Mehta, Thomas Hofmann, and Wolfgang Nejdl, Robust Collaborative Filtering, # In Proceedings of the 1st ACM Conference on Recommender Systems, ACM Press, October 2007, # pp. 49-56 # and # Simon's blog (http://sifter.org/~simon/journal/20061211.html) class Hebbian def initialize(users, items) @lrate = 0.1 # learning rate @comps = 40 # the number of components @users = Marshal.load(Marshal.dump(users)) @items = items @item2idx = {} items.each_index{|i| @item2idx[items[i]] = i } end def init_uv t1 = 0 t2 = 0 @users.each{|user| user.values.each{|val| t1 += val t2 += 1 } } ave = t1 / t2.to_f pred = (ave / @comps) ** 0.5 @u = []; @v = [] temp = [pred] * @comps @users.size.times{ @u.push(temp.clone) } @items.size.times{ @v.push(temp.clone) } end def svd @comps.times{|comp| @users.size.times{|i| @users[i].keys.each{|item| j = @item2idx[item] u = @u[i][comp] v = @v[j][comp] x = @users[i][item] # real e = u * v # estimation r = x - e # residual @u[i][comp] += @lrate * v * r @v[j][comp] += @lrate * u * r } } # remove @users.size.times{|i| @users[i].keys.each{|item| j = @item2idx[item] if comp > 0 u = @u[i][comp - 1] v = @v[j][comp - 1] @users[i][item] -= u * v end } } } end def predict(user, item) pred = 0.0 item = @item2idx[item] @comps.times{|comp| pred += @u[user][comp] * @v[item][comp] } return pred end def recommends(user) init_uv svd items = [] @items.each{|x| items.push(x) if @users[user].keys.index(x) == nil } result = [] items.each{|item| pred = predict(user, item) result.push([item, pred]) } return result.sort{|x, y| y[1] <=> x[1] } end end class Simon < Hebbian def initialize(user, item) @k = 0.02 super(user, item) end def svd @comps.times{|comp| @users.size.times{|i| @users[i].keys.each{|item| j = @item2idx[item] u = @u[i][comp] v = @v[j][comp] x = @users[i][item] # real e = u * v # estimation r = x - e # residual @u[i][comp] += @lrate * (v * r - @k * u) @v[j][comp] += @lrate * (u * r - @k * v) } } # remove @users.size.times{|i| @users[i].keys.each{|item| j = @item2idx[item] if comp > 0 u = @u[i][comp - 1] v = @v[j][comp - 1] @users[i][item] -= u * v end } } } end end users = [{'A'=> 4, 'B'=> 5, 'C'=> 2, 'D'=> 4, 'F'=> 5}, # user 0 {'A'=> 2, 'C'=> 3, 'D'=> 4, 'E'=> 3 }, # user 1 {'A'=> 1, 'B'=> 4, 'D'=> 5, 'E'=> 3, 'F'=> 4}, # user 2 { 'B'=> 5, 'E'=> 2, 'F'=> 4}, # user 3 { 'B'=> 3, 'C'=> 1, 'D'=> 3, 'F'=> 3}] # user 4 items = ['A', 'B', 'C', 'D', 'E', 'F'] svd = Hebbian.new(users, items) p svd.recommends(3) svd = Simon.new(users, items) p svd.recommends(3)