知らなかったものを見つける。

Kotoenプロジェクトで協調フィルタを使いたかったので、例を探してたら見つけた次の記事のコードをRubyで書き直してみました。
特異値分解を用いたレコメンデーション - NO!と言えるようになりたい
コードは以下の通り。Pythonからの書き換えはほとんど逐語訳でいけたので、結構すんなり出来あがり。

# from http://d.hatena.ne.jp/ytakano/20081012/1223805723

# this program emplements SVD based recommendation algorithms
#
# see section 3 of
# Bhaskar Mehta, Thomas Hofmann, and Wolfgang Nejdl, Robust Collaborative Filtering,
# In Proceedings of the 1st ACM Conference on Recommender Systems, ACM Press, October 2007,
# pp. 49-56
# and
# Simon's blog (http://sifter.org/~simon/journal/20061211.html)

class Hebbian
  def initialize(users, items)
    @lrate = 0.1 # learning rate
    @comps = 40  # the number of components

    @users = Marshal.load(Marshal.dump(users))
    @items = items

    @item2idx = {}
    
    items.each_index{|i| @item2idx[items[i]] = i }
  end
  
  def init_uv
    t1 = 0
    t2 = 0
    @users.each{|user|
      user.values.each{|val|
        t1 += val
        t2 += 1
      }
    }

    ave = t1 / t2.to_f
    pred = (ave / @comps) ** 0.5
    
    @u = []; @v = []
    temp = [pred] * @comps
    @users.size.times{ @u.push(temp.clone) }
    @items.size.times{ @v.push(temp.clone) }
  end
  
  def svd
    @comps.times{|comp|
      @users.size.times{|i|
        @users[i].keys.each{|item|
          j = @item2idx[item]
          u = @u[i][comp]
          v = @v[j][comp]
    
          x = @users[i][item] # real
          e = u * v # estimation
          r = x - e # residual
          @u[i][comp] += @lrate * v * r
          @v[j][comp] += @lrate * u * r
        }
      }
    
      # remove
      @users.size.times{|i|
        @users[i].keys.each{|item|
          j = @item2idx[item]
          if comp > 0
            u = @u[i][comp - 1]
            v = @v[j][comp - 1]
            @users[i][item] -= u * v
          end
        }
      }
    }
  end
  
  def predict(user, item)
    pred = 0.0
    item = @item2idx[item]
    @comps.times{|comp|
      pred += @u[user][comp] * @v[item][comp]
    }
    
    return pred
  end
  
  def recommends(user)
    init_uv
    svd
    items = []
    @items.each{|x|
      items.push(x) if @users[user].keys.index(x) == nil
    }
    
    result = []
    items.each{|item|
      pred = predict(user, item)
      result.push([item, pred])
    }
    
    return result.sort{|x, y| y[1] <=> x[1] }
  end
end

class Simon < Hebbian
  def initialize(user, item)
    @k = 0.02
    super(user, item)
  end
  
  def svd
    @comps.times{|comp|
      @users.size.times{|i|
        @users[i].keys.each{|item|
          j = @item2idx[item]
          u = @u[i][comp]
          v = @v[j][comp]

          x = @users[i][item] # real
          e = u * v # estimation
          r = x - e # residual
          @u[i][comp] += @lrate * (v * r - @k * u)
          @v[j][comp] += @lrate * (u * r - @k * v)
        }
      }
      
      # remove
      @users.size.times{|i|
        @users[i].keys.each{|item|
          j = @item2idx[item]
          if comp > 0
            u = @u[i][comp - 1]
            v = @v[j][comp - 1]
            @users[i][item] -= u * v
          end
        }
      }
    }
  end
end

users = [{'A'=> 4, 'B'=> 5, 'C'=> 2, 'D'=> 4,     'F'=> 5}, # user 0
     {'A'=> 2,     'C'=> 3, 'D'=> 4, 'E'=> 3    }, # user 1
     {'A'=> 1, 'B'=> 4,     'D'=> 5, 'E'=> 3, 'F'=> 4}, # user 2
     {    'B'=> 5,         'E'=> 2, 'F'=> 4}, # user 3
     {    'B'=> 3, 'C'=> 1, 'D'=> 3,     'F'=> 3}] # user 4

items = ['A', 'B', 'C', 'D', 'E', 'F']

svd = Hebbian.new(users, items)
p svd.recommends(3)

svd = Simon.new(users, items)
p svd.recommends(3)