class BayesianClassifier
def initialize(*categories)
@categories = Hash.new
categories.each { |category| @categories[category.to_s.gsub
("_"," ").capitalize.intern] = Hash.new }
@total_words = 0
end
def train(category, text)
category = category.to_s.gsub("_"," ").capitalize.intern
text.word_hash.each do |word, count|
@categories[category][word] ||= 0
@categories[category][word] += count
@total_words += count
end
end
def classify(text)
(classifications(text).sort_by { |a| -a[1] })[0][0]
end
def classifications(text)
score = Hash.new
@categories.each do |category, category_words|
score[category.to_s] = 0
total = category_words.values.inject(0) {|sum, element| sum+element}
text.word_hash.each do |word, count|
s = category_words.has_key?(word) ? category_words[word] : 0.1
score[category.to_s] += Math.log(s/total.to_f)
end
end
return score
end
def method_missing(name, *args)
category = name.to_s.gsub(/train_([\w]+)/, '\1').gsub("_"," ").capitalize.intern
args.each {|text| train category, text} if @categories.has_key? category
end
end
class String
def word_hash
d = Hash.new
split.each { |word| (d[word.intern] ||= 0; d[word.intern] += 1) if word.length > 3 }
return d
end
end
Figure 1: Bayesian classification in Ruby.
Back to Article