From 5aa0effba2d5268b988db09080ff68e325993b43 Mon Sep 17 00:00:00 2001
From: David Phillips <david@sighup.nz>
Date: Thu, 20 Jun 2019 22:13:00 +1200
Subject: Add initial cut of markov chain tool

Currently chooses only one token based on the immediate previous token.
In future, looking around at 2 tokens might produce more coherent results.
---
 process.pl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100755 process.pl
diff --git a/process.pl b/process.pl
new file mode 100755
index 0000000..e1f1fae
--- /dev/null
+++ b/process.pl
@@ -0,0 +1,64 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use utf8;
+binmode(STDOUT, ":utf8");
+
+$| = 1;
+
+use Data::Dumper;
+
+sub some {
+	return $_[rand(@_)];
+}
+
+# Takes plain text lines in on stdin, performs analysis, outputs custom markov
+# file on stdout
+
+# Global markov data
+my %markov_data;
+
+print "\"" x 80;
+print "\nLearning words...\n";
+
+while (<STDIN>) {
+	chomp;
+	utf8::upgrade($_);
+	my @words = split /\s+/, $_;
+	
+	# Leaning is the same for all but last word
+	for (my $i = 0; $i < @words - 1; $i++) {
+		my $word = $words[$i];
+		my $next_word = $words[$i + 1]; # +1 safe beacuse of loop bounds
+
+		push @{$markov_data{$word}}, $next_word;
+	}
+
+	# Now handle special case; last word must be learned as being followed by EOL ("")
+	push @{$markov_data{$words[@words - 1]}}, "";
+}
+
+print "Word Patterns:\n";
+#print Dumper(%markov_data);
+print "\"" x 80;
+print "\n";
+
+
+while (1) {
+	my $word = $ARGV[0] || some(keys %markov_data);
+	print "Taking \"$word\" as the seed\n";
+	my $i = 0;
+	do {
+		$i++;
+		print "$word";
+		$word = some(@{$markov_data{$word}});
+	} until($word eq "" or $i == 100);
+	print "\n";
+	sleep 1;
+}
+
+print "\n";
+print "\"" x 80;
+print "\n";
-- 
cgit v1.1