Class: Classifier::CRM114

Inherits:
Object
  • Object
show all
Defined in:
lib/crm114.rb

Constant Summary

CLASSIFICATION_TYPE =
'<osb unique microgroom>'
FILE_EXTENSION =
'.css'
CMD_CRM =
'/usr/bin/env crm'
OPT_LEARN =
'-{ learn %s ( %s ) }'
OPT_CLASSIFY =
'-{ isolate (:stats:); classify %s ( %s ) (:stats:); match [:stats:] (:: :best: :prob:) /Best match to file .. \\(%s\\/([[:graph:]]+)\\%s\\) prob: ([0-9.]+)/; output /:*:best:\\t:*:prob:/ }'

Class Method Summary

Instance Method Summary

Constructor Details

- (CRM114) initialize(categories, options = {})

Returns a new CRM114 classifier defined by the given categories.

Parameters:

  • (Array<#to_s>) categories

Options Hash (options):

  • (String) :path — default: '.'


25
26
27
28
29
# File 'lib/crm114.rb', line 25

def initialize(categories, options = {})
  @categories = categories.to_a.collect { |category| category.to_s.to_sym }
  @path = File.expand_path(options[:path] || '.')
  @debug = options[:debug] || false
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

- (Object) method_missing(symbol, *args)



76
77
78
79
80
81
82
83
84
85
86
# File 'lib/crm114.rb', line 76

def method_missing(symbol, *args) # :nodoc:
  case symbol.to_s[-1]
    when ?!
      category = symbol.to_s.chop.to_sym
      return learn!(category, *args) if @categories.include?(category)
    when ?? # it's a predicate
      category = symbol.to_s.chop.to_sym
      return classify(*args).first == category if @categories.include?(category)
  end
  super
end

Class Method Details

+ (String?) version

Returns a string containg the installed CRM114 engine version in a format such as "20060118-BlameTheReavers".

Returns:

  • (String, nil)


16
17
18
# File 'lib/crm114.rb', line 16

def self.version
  $1 if IO.popen(CMD_CRM + ' -v', 'r') { |pipe| pipe.readline } =~ /CRM114, version ([\d\w\-\.]+)/
end

Instance Method Details

- (Array(Symbol, Float)) classify(text = nil, &block)

Returns the classification of the provided text as a tuple containing the highest-probability category and a confidence indicator in the range of 0.5..1.0.

Parameters:

  • (String) text (defaults to: nil)

Returns:

  • (Array(Symbol, Float))


62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/crm114.rb', line 62

def classify(text = nil, &block)
  files = @categories.collect { |category| css_file_path(category) }
  cmd = CMD_CRM + " '" + (OPT_CLASSIFY % [CLASSIFICATION_TYPE, files.join(' '), @path.gsub(/\//, '\/'), FILE_EXTENSION]) + "'"
  puts cmd if @debug
  result = IO.popen(cmd, 'r+') do |pipe|
    block_given? ? block.call(pipe) : pipe.write(text)
    pipe.close_write
    pipe.readline unless pipe.closed? || pipe.eof?
  end
  return [nil, 0.0] unless result && result.include?("\t")
  result = result.split("\t")
  [result.first.to_sym, result.last.to_f]
end

- (void) learn!(category, text, &block) Also known as: train!

This method returns an undefined value.

Trains the classifier to consider the given text to be a sample from the set named by category.

Parameters:

  • (#to_s) category
  • (String) text


38
39
40
41
42
# File 'lib/crm114.rb', line 38

def learn!(category, text, &block)
  cmd = CMD_CRM + " '" + (OPT_LEARN % [CLASSIFICATION_TYPE, css_file_path(category)]) + "'"
  puts cmd if @debug
  IO.popen(cmd, 'w') { |pipe| block_given? ? block.call(pipe) : pipe.write(text) }
end

- (void) unlearn!(category, text, &block) Also known as: untrain!

This method returns an undefined value.

Raises:

  • NotImplementedError


49
50
51
# File 'lib/crm114.rb', line 49

def unlearn!(category, text, &block) # :nodoc:
  raise NotImplementedError.new('unlearning not supported at present')
end