class BayesianClassifier
    def initialize(*categories)
        @categories = Hash.new
        categories.each { |category| @categories[category.to_s.gsub
                                         ("_"," ").capitalize.intern] = Hash.new }
        @total_words = 0
    end
    def train(category, text)
        category = category.to_s.gsub("_"," ").capitalize.intern
        text.word_hash.each do |word, count|
            @categories[category][word]    ||=    0
            @categories[category][word]     +=    count
            @total_words += count
        end
    end
    def classify(text)
        (classifications(text).sort_by { |a| -a[1] })[0][0]
    end
    def classifications(text)
        score = Hash.new
        @categories.each do |category, category_words|
            score[category.to_s] = 0
            total = category_words.values.inject(0) {|sum, element| sum+element}
            text.word_hash.each do |word, count|
                s = category_words.has_key?(word) ? category_words[word] : 0.1
                score[category.to_s] += Math.log(s/total.to_f)
            end
        end
        return score
    end
    def method_missing(name, *args)
        category = name.to_s.gsub(/train_([\w]+)/, '\1').gsub("_"," ").capitalize.intern
        args.each {|text| train category, text} if @categories.has_key? category
    end
end
class String
    def word_hash
        d = Hash.new
        split.each { |word| (d[word.intern] ||= 0; d[word.intern] += 1) if word.length > 3 }
        return d
    end
end

Figure 1: Bayesian classification in Ruby.

Back to Article