From 989c3e973f18e6533da414384ef0add95c692b89 Mon Sep 17 00:00:00 2001
From: David Phillips <dbphillipsnz@gmail.com>
Date: Thu, 5 May 2016 23:46:02 +1200
Subject: Invert load sharing for thread vs cluster node

Rows are divided between cluster nodes and columns between threads.
This is a major change for anyone using this in a cluster, but it was done so
to enable the frame interlacer to use buffered reads and writes.
---
 algorithms/burning-ship-lattice.c | 12 +++++------
 algorithms/burning-ship.c         | 12 +++++------
 algorithms/mandelbrot.c           | 12 +++++------
 fractal-gen.c                     | 43 ++++++++++++++++++++++++---------------
 fractal-gen.h                     |  1 +
 5 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/algorithms/burning-ship-lattice.c b/algorithms/burning-ship-lattice.c
index 60851cc..cb3e268 100644
--- a/algorithms/burning-ship-lattice.c
+++ b/algorithms/burning-ship-lattice.c
@@ -42,12 +42,12 @@ void *generate_burning_ship_lattice_section(void *section)
 	double left = -1.8f;
 
 	/* FIXME document this */
-	b = (d->core*(size_units/size)+top);
+	b = clust_id*(size_units/size)+top; /* FIXME document this */
 
-	for (y = d->core; y < size; y += cores)
+	for (y = clust_id; y < size; y+=clust_total)
 	{
-		a = clust_id*(size_units/size)+left; /* FIXME document this */
-		for (x = clust_id; x < size; x+=clust_total)
+		a = d->core*(size_units/size)+left;
+		for (x = d->core; x < size; x += cores)
 		{
 			z = 0;
 			c = a+I*b;
@@ -59,9 +59,9 @@ void *generate_burning_ship_lattice_section(void *section)
 				z = cpow( fabs(creal(z)) + I*fabs(cimag(z)) , power) + c;
 			}
 			d->data[d->idx++] = (255*i)/iterat;
-			a += (clust_total*size_units)/size;
+			a += cores*(size_units/size);
 		}
-		b += (cores*size_units)/size;
+		b += clust_total*(size_units/size);
 	}
 	return NULL;
 }
diff --git a/algorithms/burning-ship.c b/algorithms/burning-ship.c
index 5f14c1d..6aea927 100644
--- a/algorithms/burning-ship.c
+++ b/algorithms/burning-ship.c
@@ -38,12 +38,12 @@ void *generate_burning_ship_section(void *section)
 	double left = -2.2f;
 
 	/* FIXME document this */
-	b = (d->core*(size_units/size)+top);
+	b = clust_id*(size_units/size)+top; /* FIXME document this */
 
-	for (y = d->core; y < size; y += cores)
+	for (y = clust_id; y < size; y += clust_total)
 	{
-		a = clust_id*(size_units/size)+left; /* FIXME document this */
-		for (x = clust_id; x < size; x+=clust_total)
+		a = d->core*(size_units/size)+left;
+		for (x = d->core; x < size; x += cores)
 		{
 			z = 0;
 			c = a+I*b;
@@ -55,9 +55,9 @@ void *generate_burning_ship_section(void *section)
 				z = cpow( fabs(creal(z)) + I*fabs(cimag(z)) , power) + c;
 			}
 			d->data[d->idx++] = (255*i)/iterat;
-			a += (clust_total*size_units)/size;
+			a += cores*(size_units/size);
 		}
-		b += (cores*size_units)/size;
+		b += clust_total*(size_units/size);
 	}
 	return NULL;
 }
diff --git a/algorithms/mandelbrot.c b/algorithms/mandelbrot.c
index 1f95843..2d0a2b4 100644
--- a/algorithms/mandelbrot.c
+++ b/algorithms/mandelbrot.c
@@ -39,12 +39,12 @@ void *generate_mandelbrot_section(void *section)
 
 
 	/* FIXME document this */
-	b = (d->core*(size_units/size)+top);
+	b = clust_id*(size_units/size)+top; /* FIXME document this */
 
-	for (y = d->core; y < size; y += cores)
+	for (y = clust_id; y < size; y += clust_total)
 	{
-		a = clust_id*(size_units/size)+left; /* FIXME document this */
-		for (x = clust_id; x < size; x+=clust_total)
+		a = d->core*(size_units/size)+left;
+		for (x = d->core; x < size; x += cores)
 		{
 			z = 0;
 			c = a + I*b;
@@ -56,9 +56,9 @@ void *generate_mandelbrot_section(void *section)
 				z = cpow(z , power) + c;
 			}
 			d->data[d->idx++] = (255*i)/iterat;
-			a += (clust_total*size_units)/size;
+			a += cores*(size_units/size);
 		}
-		b += (cores*size_units)/size;
+		b += clust_total*(size_units/size);
 	}
 	return NULL;
 }
diff --git a/fractal-gen.c b/fractal-gen.c
index 9ceef5d..612e7ce 100644
--- a/fractal-gen.c
+++ b/fractal-gen.c
@@ -45,12 +45,15 @@ static struct section_generator generators[] = {
 int main(int argc, char **argv)
 {
 	unsigned long x = 0;
+	unsigned long width = 0;
+	size_t toalloc = 0;
 	unsigned long y = 0;
 	unsigned long i = 0;
 	double ram_nice = 0.f; /* Forecast RAM usage, divided down to < 1024 */
 	char* ram_unit = NULL; /* Unit for ram_nice */
 	char* bname = NULL;
 	data_section* sections = NULL;
+	data_section *s = NULL;
 	generator_func generator = NULL;
 
 	/* who are we? */
@@ -102,16 +105,16 @@ int main(int argc, char **argv)
 	{
 		/* A bit complex, icky, will document later */
 		if (i < (size%cores))
-			x = (size/cores)+1;
+			width = (size/cores)+1;
 		else
-			x = (size/cores);
+			width = (size/cores);
 
-		x *= size;
-		x = ceilf((double)x/clust_total);
+		toalloc = width*size;
+		toalloc = ceilf((double)toalloc/clust_total);
 
-		if ((sections[i].data = malloc(x)) == NULL)
+		if ((sections[i].data = malloc(toalloc)) == NULL)
 		{
-			fprintf(stderr, "\nmalloc of %lu bytes failed\n", x);
+			fprintf(stderr, "\nmalloc of %lu bytes failed\n", toalloc);
 			perror("malloc");
 
 			/* Free already allocated chunks of memory */
@@ -123,14 +126,18 @@ int main(int argc, char **argv)
 			return 1;
 		}
 		sections[i].core = i;
-		sections[i].datasize = x;
+		sections[i].width = width;
+		sections[i].datasize = toalloc;
 		fprintf(stderr, " -> Thread %lu\r", i);
 		pthread_create(&sections[i].thread, NULL, generator, &(sections[i]));
 	}
 
-	while((x = sections[0].idx) < sections[0].datasize)
+	s = &(sections[cores-1]);
+	while((x = s->idx) < s->datasize)
 	{
-		fprintf(stderr, "Thread 0: %.4f%%\r", 100.f*(double)x/sections[0].datasize );
+		fprintf(stderr, "Thread %d: %.4f%%\r",
+		        cores-1,
+		        100.f*(double)x/s->datasize);
 		sleep(1);
 	}
 
@@ -140,14 +147,18 @@ int main(int argc, char **argv)
 
 
 	/* Output PGM Header */
-	printf("P5\n%d\n%d\n255\n",size/clust_total,size);
-
-	/* Vomit the data segments back onto the screen, deinterlacing
-	 * TO DO: look at fwrite performance benefits over putchar */
-	for (y = 0; y < size; y++)
-		for (x = 0; x < size/clust_total; x++)
-			putchar(sections[y%cores].data[(y/cores)*(size/clust_total) + x]);
+	printf("P5\n%d\n%d\n255\n",size,size/clust_total);
 
+	/* Vomit the data segments onto stdout, interlacing frames from threads
+	 * FIXME: look at buffering if at all possible */
+	for (y = 0; y < size/clust_total; y++)
+	{
+		for (x = 0; x < size; x++)
+		{
+			s = &(sections[x%cores]);
+			putchar(s->data[y*(s->width) + x/cores]);
+		}
+	}
 	fprintf(stderr, "\nDone\n");
 
 	/* Free the memory we allocated for point data */
diff --git a/fractal-gen.h b/fractal-gen.h
index 392f43a..9c9a768 100644
--- a/fractal-gen.h
+++ b/fractal-gen.h
@@ -32,6 +32,7 @@ typedef struct
 {
 	volatile unsigned long idx;
 	unsigned long core;
+	unsigned long width;
 	unsigned long datasize;
 	char* data;
 	pthread_t thread;
-- 
cgit v1.1