diff options
author | David Phillips <david@sighup.nz> | 2019-06-20 22:13:00 +1200 |
---|---|---|
committer | David Phillips <david@sighup.nz> | 2019-06-20 22:13:18 +1200 |
commit | 5aa0effba2d5268b988db09080ff68e325993b43 (patch) | |
tree | c8c72f43b9735a7938920d62de7919d7a2fc02ff | |
parent | 1a7948225cbdcae9f798ec62a7eaa941f1e8a7cf (diff) | |
download | markov-thing-5aa0effba2d5268b988db09080ff68e325993b43.tar.xz |
Currently chooses only one token based on the immediate previous token.
In future, looking around at 2 tokens might produce more coherent results.
-rwxr-xr-x | process.pl | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/process.pl b/process.pl new file mode 100755 index 0000000..e1f1fae --- /dev/null +++ b/process.pl @@ -0,0 +1,64 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use utf8; +binmode(STDOUT, ":utf8"); + +$| = 1; + +use Data::Dumper; + +sub some { + return $_[rand(@_)]; +} + +# Takes plain text lines in on stdin, performs analysis, outputs custom markov +# file on stdout + +# Global markov data +my %markov_data; + +print "\"" x 80; +print "\nLearning words...\n"; + +while (<STDIN>) { + chomp; + utf8::upgrade($_); + my @words = split /\s+/, $_; + + # Leaning is the same for all but last word + for (my $i = 0; $i < @words - 1; $i++) { + my $word = $words[$i]; + my $next_word = $words[$i + 1]; # +1 safe beacuse of loop bounds + + push @{$markov_data{$word}}, $next_word; + } + + # Now handle special case; last word must be learned as being followed by EOL ("") + push @{$markov_data{$words[@words - 1]}}, ""; +} + +print "Word Patterns:\n"; +#print Dumper(%markov_data); +print "\"" x 80; +print "\n"; + + +while (1) { + my $word = $ARGV[0] || some(keys %markov_data); + print "Taking \"$word\" as the seed\n"; + my $i = 0; + do { + $i++; + print "$word"; + $word = some(@{$markov_data{$word}}); + } until($word eq "" or $i == 100); + print "\n"; + sleep 1; +} + +print "\n"; +print "\"" x 80; +print "\n"; |