use v6-alpha; my %words; sub load_db returns Void { return() unless "words.db.pl" ~~ :e; my $db = open("words.db.pl") err die "Cannot open the words.db.pl file: $!"; for (=$db) -> $_line { my $line = $_line; my ($key, $value) = split("\t", $line); %words{"$key"} = $value; } $db.close(); } sub save_db returns Void { my $db = open("words.db.pl", :w) err die "Cannot open the words.db.pl file: $!"; for (%words.kv) -> $key, $value { $db.say($key ~ "\t" ~ $value); } $db.close(); } sub parse_file (Str $file) returns Hash { my %words_in_file; my $fh = open("$file") err die "Cannot open the '$file' file: $!"; for (=$fh) -> $_line { my $line = $_line; while ($line ~~ s:perl5/(\w+)[ \t\n\r]//) { %words_in_file{lc($0)}++; } } $fh.close; return %words_in_file; } sub add_words (Str $category, %words_in_file) returns Void { for (%words_in_file.kv) -> $key, $value { %words{"$category-$key"} += $value; } } sub classify (%words_in_file) returns Void { my %count; my $total = 0; for (%words.kv) -> $key, $value { $key ~~ rx:perl5/^(.+)-(.+)$/; %count{$0} += $value; $total += $value; } my %score; for (%words_in_file.keys) -> $word { for (%count.kv) -> $category, $count { if (defined(%words{"$category-$word"})) { %score{$category} += log(%words{"$category-$word"} / $count); } else { %score{$category} += log(0.01 / $count); } } } for (%count.kv) -> $category, $count { %score{$category} += log($count / $total) } # do this weird sort block because: # %score{$^a} <=> %score{$^b} # does not currently work for (%count.keys.sort:{ %score{$^a} == %score{$^b} ?? 0 !! %score{$^a} > %score{$^b} ?? -1 !! 1 }) -> $category { say("$category %score{$category}"); } } load_db(); if (@*ARGS[0] eq 'add' && +@*ARGS == 3) { add_words(@*ARGS[1], parse_file(@*ARGS[2])); } elsif (@*ARGS[0] eq 'classify' && +@*ARGS == 2) { classify(parse_file(@*ARGS[1])); } else { say("USAGE: add classify "); } save_db();