From 989c3e973f18e6533da414384ef0add95c692b89 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Thu, 5 May 2016 23:46:02 +1200 Subject: Invert load sharing for thread vs cluster node Rows are divided between cluster nodes and columns between threads. This is a major change for anyone using this in a cluster, but it was done so to enable the frame interlacer to use buffered reads and writes. --- algorithms/burning-ship-lattice.c | 12 +++++------ algorithms/burning-ship.c | 12 +++++------ algorithms/mandelbrot.c | 12 +++++------ fractal-gen.c | 43 ++++++++++++++++++++++++--------------- fractal-gen.h | 1 + 5 files changed, 46 insertions(+), 34 deletions(-) diff --git a/algorithms/burning-ship-lattice.c b/algorithms/burning-ship-lattice.c index 60851cc..cb3e268 100644 --- a/algorithms/burning-ship-lattice.c +++ b/algorithms/burning-ship-lattice.c @@ -42,12 +42,12 @@ void *generate_burning_ship_lattice_section(void *section) double left = -1.8f; /* FIXME document this */ - b = (d->core*(size_units/size)+top); + b = clust_id*(size_units/size)+top; /* FIXME document this */ - for (y = d->core; y < size; y += cores) + for (y = clust_id; y < size; y+=clust_total) { - a = clust_id*(size_units/size)+left; /* FIXME document this */ - for (x = clust_id; x < size; x+=clust_total) + a = d->core*(size_units/size)+left; + for (x = d->core; x < size; x += cores) { z = 0; c = a+I*b; @@ -59,9 +59,9 @@ void *generate_burning_ship_lattice_section(void *section) z = cpow( fabs(creal(z)) + I*fabs(cimag(z)) , power) + c; } d->data[d->idx++] = (255*i)/iterat; - a += (clust_total*size_units)/size; + a += cores*(size_units/size); } - b += (cores*size_units)/size; + b += clust_total*(size_units/size); } return NULL; } diff --git a/algorithms/burning-ship.c b/algorithms/burning-ship.c index 5f14c1d..6aea927 100644 --- a/algorithms/burning-ship.c +++ b/algorithms/burning-ship.c @@ -38,12 +38,12 @@ void *generate_burning_ship_section(void *section) double left = -2.2f; /* FIXME document this */ - b = (d->core*(size_units/size)+top); + b = clust_id*(size_units/size)+top; /* FIXME document this */ - for (y = d->core; y < size; y += cores) + for (y = clust_id; y < size; y += clust_total) { - a = clust_id*(size_units/size)+left; /* FIXME document this */ - for (x = clust_id; x < size; x+=clust_total) + a = d->core*(size_units/size)+left; + for (x = d->core; x < size; x += cores) { z = 0; c = a+I*b; @@ -55,9 +55,9 @@ void *generate_burning_ship_section(void *section) z = cpow( fabs(creal(z)) + I*fabs(cimag(z)) , power) + c; } d->data[d->idx++] = (255*i)/iterat; - a += (clust_total*size_units)/size; + a += cores*(size_units/size); } - b += (cores*size_units)/size; + b += clust_total*(size_units/size); } return NULL; } diff --git a/algorithms/mandelbrot.c b/algorithms/mandelbrot.c index 1f95843..2d0a2b4 100644 --- a/algorithms/mandelbrot.c +++ b/algorithms/mandelbrot.c @@ -39,12 +39,12 @@ void *generate_mandelbrot_section(void *section) /* FIXME document this */ - b = (d->core*(size_units/size)+top); + b = clust_id*(size_units/size)+top; /* FIXME document this */ - for (y = d->core; y < size; y += cores) + for (y = clust_id; y < size; y += clust_total) { - a = clust_id*(size_units/size)+left; /* FIXME document this */ - for (x = clust_id; x < size; x+=clust_total) + a = d->core*(size_units/size)+left; + for (x = d->core; x < size; x += cores) { z = 0; c = a + I*b; @@ -56,9 +56,9 @@ void *generate_mandelbrot_section(void *section) z = cpow(z , power) + c; } d->data[d->idx++] = (255*i)/iterat; - a += (clust_total*size_units)/size; + a += cores*(size_units/size); } - b += (cores*size_units)/size; + b += clust_total*(size_units/size); } return NULL; } diff --git a/fractal-gen.c b/fractal-gen.c index 9ceef5d..612e7ce 100644 --- a/fractal-gen.c +++ b/fractal-gen.c @@ -45,12 +45,15 @@ static struct section_generator generators[] = { int main(int argc, char **argv) { unsigned long x = 0; + unsigned long width = 0; + size_t toalloc = 0; unsigned long y = 0; unsigned long i = 0; double ram_nice = 0.f; /* Forecast RAM usage, divided down to < 1024 */ char* ram_unit = NULL; /* Unit for ram_nice */ char* bname = NULL; data_section* sections = NULL; + data_section *s = NULL; generator_func generator = NULL; /* who are we? */ @@ -102,16 +105,16 @@ int main(int argc, char **argv) { /* A bit complex, icky, will document later */ if (i < (size%cores)) - x = (size/cores)+1; + width = (size/cores)+1; else - x = (size/cores); + width = (size/cores); - x *= size; - x = ceilf((double)x/clust_total); + toalloc = width*size; + toalloc = ceilf((double)toalloc/clust_total); - if ((sections[i].data = malloc(x)) == NULL) + if ((sections[i].data = malloc(toalloc)) == NULL) { - fprintf(stderr, "\nmalloc of %lu bytes failed\n", x); + fprintf(stderr, "\nmalloc of %lu bytes failed\n", toalloc); perror("malloc"); /* Free already allocated chunks of memory */ @@ -123,14 +126,18 @@ int main(int argc, char **argv) return 1; } sections[i].core = i; - sections[i].datasize = x; + sections[i].width = width; + sections[i].datasize = toalloc; fprintf(stderr, " -> Thread %lu\r", i); pthread_create(§ions[i].thread, NULL, generator, &(sections[i])); } - while((x = sections[0].idx) < sections[0].datasize) + s = &(sections[cores-1]); + while((x = s->idx) < s->datasize) { - fprintf(stderr, "Thread 0: %.4f%%\r", 100.f*(double)x/sections[0].datasize ); + fprintf(stderr, "Thread %d: %.4f%%\r", + cores-1, + 100.f*(double)x/s->datasize); sleep(1); } @@ -140,14 +147,18 @@ int main(int argc, char **argv) /* Output PGM Header */ - printf("P5\n%d\n%d\n255\n",size/clust_total,size); - - /* Vomit the data segments back onto the screen, deinterlacing - * TO DO: look at fwrite performance benefits over putchar */ - for (y = 0; y < size; y++) - for (x = 0; x < size/clust_total; x++) - putchar(sections[y%cores].data[(y/cores)*(size/clust_total) + x]); + printf("P5\n%d\n%d\n255\n",size,size/clust_total); + /* Vomit the data segments onto stdout, interlacing frames from threads + * FIXME: look at buffering if at all possible */ + for (y = 0; y < size/clust_total; y++) + { + for (x = 0; x < size; x++) + { + s = &(sections[x%cores]); + putchar(s->data[y*(s->width) + x/cores]); + } + } fprintf(stderr, "\nDone\n"); /* Free the memory we allocated for point data */ diff --git a/fractal-gen.h b/fractal-gen.h index 392f43a..9c9a768 100644 --- a/fractal-gen.h +++ b/fractal-gen.h @@ -32,6 +32,7 @@ typedef struct { volatile unsigned long idx; unsigned long core; + unsigned long width; unsigned long datasize; char* data; pthread_t thread; -- cgit v1.1