aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2019-06-20 22:13:00 +1200
committerDavid Phillips <david@sighup.nz>2019-06-20 22:13:18 +1200
commit5aa0effba2d5268b988db09080ff68e325993b43 (patch)
treec8c72f43b9735a7938920d62de7919d7a2fc02ff
parent1a7948225cbdcae9f798ec62a7eaa941f1e8a7cf (diff)
downloadmarkov-thing-5aa0effba2d5268b988db09080ff68e325993b43.tar.xz
Add initial cut of markov chain toolHEADmaster
Currently chooses only one token based on the immediate previous token. In future, looking around at 2 tokens might produce more coherent results.
-rwxr-xr-xprocess.pl64
1 files changed, 64 insertions, 0 deletions
diff --git a/process.pl b/process.pl
new file mode 100755
index 0000000..e1f1fae
--- /dev/null
+++ b/process.pl
@@ -0,0 +1,64 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use utf8;
+binmode(STDOUT, ":utf8");
+
+$| = 1;
+
+use Data::Dumper;
+
+sub some {
+ return $_[rand(@_)];
+}
+
+# Takes plain text lines in on stdin, performs analysis, outputs custom markov
+# file on stdout
+
+# Global markov data
+my %markov_data;
+
+print "\"" x 80;
+print "\nLearning words...\n";
+
+while (<STDIN>) {
+ chomp;
+ utf8::upgrade($_);
+ my @words = split /\s+/, $_;
+
+ # Leaning is the same for all but last word
+ for (my $i = 0; $i < @words - 1; $i++) {
+ my $word = $words[$i];
+ my $next_word = $words[$i + 1]; # +1 safe beacuse of loop bounds
+
+ push @{$markov_data{$word}}, $next_word;
+ }
+
+ # Now handle special case; last word must be learned as being followed by EOL ("")
+ push @{$markov_data{$words[@words - 1]}}, "";
+}
+
+print "Word Patterns:\n";
+#print Dumper(%markov_data);
+print "\"" x 80;
+print "\n";
+
+
+while (1) {
+ my $word = $ARGV[0] || some(keys %markov_data);
+ print "Taking \"$word\" as the seed\n";
+ my $i = 0;
+ do {
+ $i++;
+ print "$word";
+ $word = some(@{$markov_data{$word}});
+ } until($word eq "" or $i == 100);
+ print "\n";
+ sleep 1;
+}
+
+print "\n";
+print "\"" x 80;
+print "\n";