Add initial cut of markov chain toolHEAD master

Currently chooses only one token based on the immediate previous token. In future, looking around at 2 tokens might produce more coherent results.
author: David Phillips <david@sighup.nz> 2019-06-20 22:13:00 +1200
committer: David Phillips <david@sighup.nz> 2019-06-20 22:13:18 +1200
commit: 5aa0effba2d5268b988db09080ff68e325993b43 (patch)
tree: c8c72f43b9735a7938920d62de7919d7a2fc02ff
parent: 1a7948225cbdcae9f798ec62a7eaa941f1e8a7cf (diff)
download: markov-thing-5aa0effba2d5268b988db09080ff68e325993b43.tar.xz
1 files changed, 64 insertions, 0 deletions
diff --git a/process.pl b/process.pl
new file mode 100755
index 0000000..e1f1fae
--- /dev/null
+++ b/process.pl
@@ -0,0 +1,64 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use utf8;
+binmode(STDOUT, ":utf8");
+
+$| = 1;
+
+use Data::Dumper;
+
+sub some {
+	return $_[rand(@_)];
+}
+
+# Takes plain text lines in on stdin, performs analysis, outputs custom markov
+# file on stdout
+
+# Global markov data
+my %markov_data;
+
+print "\"" x 80;
+print "\nLearning words...\n";
+
+while (<STDIN>) {
+	chomp;
+	utf8::upgrade($_);
+	my @words = split /\s+/, $_;
+	
+	# Leaning is the same for all but last word
+	for (my $i = 0; $i < @words - 1; $i++) {
+		my $word = $words[$i];
+		my $next_word = $words[$i + 1]; # +1 safe beacuse of loop bounds
+
+		push @{$markov_data{$word}}, $next_word;
+	}
+
+	# Now handle special case; last word must be learned as being followed by EOL ("")
+	push @{$markov_data{$words[@words - 1]}}, "";
+}
+
+print "Word Patterns:\n";
+#print Dumper(%markov_data);
+print "\"" x 80;
+print "\n";
+
+
+while (1) {
+	my $word = $ARGV[0] || some(keys %markov_data);
+	print "Taking \"$word\" as the seed\n";
+	my $i = 0;
+	do {
+		$i++;
+		print "$word";
+		$word = some(@{$markov_data{$word}});
+	} until($word eq "" or $i == 100);
+	print "\n";
+	sleep 1;
+}
+
+print "\n";
+print "\"" x 80;
+print "\n";
author	David Phillips <david@sighup.nz>	2019-06-20 22:13:00 +1200
committer	David Phillips <david@sighup.nz>	2019-06-20 22:13:18 +1200
commit	5aa0effba2d5268b988db09080ff68e325993b43 (patch)
tree	c8c72f43b9735a7938920d62de7919d7a2fc02ff
parent	1a7948225cbdcae9f798ec62a7eaa941f1e8a7cf (diff)
download	markov-thing-5aa0effba2d5268b988db09080ff68e325993b43.tar.xz