Skip to content

Commit

Permalink
fix 6.4{5,6}
Browse files Browse the repository at this point in the history
  • Loading branch information
DreamAndDead committed Jul 12, 2018
1 parent 5824aa5 commit 68a3981
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 89 deletions.
113 changes: 71 additions & 42 deletions chapter6/code/convert.c
Original file line number Diff line number Diff line change
@@ -1,83 +1,112 @@
/*
* convert.c
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <time.h>

// a large prime number
#define MATRIX_N 9973
#define MEM_SIZE (sizeof(int) * MATRIX_N * MATRIX_N)
#define LOOP 1000
#define MAX 1024
#define LEN MAX*MAX
#define BLOCK 16

void convert(int* src, int N) {
void randomize(int *arr, int N) {
srand(time(0));

int i, j;
for (i = 0; i <= N - 1; i++)
for (j = 0; j <= N - 1; j++)
arr[i * N + j] = rand() % 2;
}

void convert(int *src, int N) {
int i, j;

for (i = 0; i <= N-1; i++)
for (j = 0; j <= N-1; j++)
src[j*N+i] = src[i*N+j] || src[j*N+i];
for (i = 0; i <= N - 1; i++)
for (j = 0; j <= N - 1; j++)
src[j * N + i] = src[i * N + j] || src[j * N + i];
}

void effective_convert(int* src, int N) {
void effective_convert(int *src, int N) {
int i, j, a, b, tmp;

for (i = 0; i <= N-BLOCK; i+=BLOCK)
/* brilliant! not j = 0 here */
for (j = i; j <= N-BLOCK; j+=BLOCK)
for (a = i; a < i+BLOCK; a++)
for (b = j; b < j+BLOCK; b++) {
for (i = 0; i <= N - BLOCK; i += BLOCK)
/* not j = 0 here */
for (j = i; j <= N - BLOCK; j += BLOCK)
for (a = i; a < i + BLOCK; a++)
for (b = j; b < j + BLOCK; b++) {
/* brilliant! store two value in one loop */
tmp = src[b*N+a] || src[a*N+b];
src[b*N+a] = tmp;
src[a*N+b] = tmp;
tmp = src[b * N + a] || src[a * N + b];
src[b * N + a] = tmp;
src[a * N + b] = tmp;
}

for (; i <= N-1; i++)
for (; j <= N-1; j++)
src[j*N+i] = src[i*N+j] || src[j*N+i];

int offset = i;

for (i = offset; i <= N - 1; i++)
for (j = 0; j < offset; j += BLOCK)
for (b = j; b < j + BLOCK; b++) {
tmp = src[b * N + i] || src[i * N + b];
src[b * N + i] = tmp;
src[i * N + b] = tmp;
}

for (i = offset; i <= N - 1; i++)
for (j = i; j <= N - 1; j++) {
tmp = src[j * N + i] || src[i * N + j];
src[j * N + i] = tmp;
src[i * N + j] = tmp;
}
}

void test(void) {
int* s = (int*)malloc(sizeof(int)*LEN);
int* e = (int*)malloc(sizeof(int)*LEN);
int *s = (int *)malloc(MEM_SIZE);
int *e = (int *)malloc(MEM_SIZE);

randomize(s, MATRIX_N);
memcpy(e, s, MEM_SIZE);


convert(s, MAX);
effective_convert(e, MAX);
convert(s, MATRIX_N);
effective_convert(e, MATRIX_N);

for (int i = 0; i < MAX; i++)
for (int j = 0; j < MAX; j++)
assert(s[i*MAX+j] == e[i*MAX+j]);
for (int i = 0; i < MATRIX_N; i++)
for (int j = 0; j < MATRIX_N; j++)
assert(s[i * MATRIX_N + j] == e[i * MATRIX_N + j]);

free((void*)s);
free((void*)e);
free((void *)s);
free((void *)e);
}

void prof(void) {
int* s = (int*)malloc(sizeof(int)*LEN);
int *s = (int *)malloc(MEM_SIZE);

for (int c = 0; c < LOOP; c++)
convert(s, MAX);
convert(s, MATRIX_N);

free((void*)s);
free((void *)s);
}

void prof_effect(void) {
int* s = (int*)malloc(sizeof(int)*LEN);
int *e = (int *)malloc(MEM_SIZE);

for (int c = 0; c < LOOP; c++)
effective_convert(s, MAX);
effective_convert(e, MATRIX_N);

free((void*)s);
free((void *)e);
}

int main(int argc, char* argv[]) {
/*test();*/
int main(int argc, char *argv[]) {
test();

/*prof();*/

prof_effect();
/* prof(); */
/* prof_effect(); */

return 0;
}


108 changes: 61 additions & 47 deletions chapter6/code/transpose.c
Original file line number Diff line number Diff line change
@@ -1,87 +1,101 @@
/*
* transpose.c
*/
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <unistd.h>

// a large prime number
#define MATRIX_N 9973
#define MEM_SIZE (sizeof(int) * MATRIX_N * MATRIX_N)
#define LOOP 1000
#define MAX 1024
#define LEN MAX*MAX
#define BLOCK 16

void transpose(int* dst, int* src, int N) {
void randomize(void *mem, size_t size) {
int rnd = open("/dev/urandom", O_RDONLY);
read(rnd, mem, size);
close(rnd);
}

void transpose(int *dst, int *src, int N) {
int i, j;

for (i = 0; i <= N-1; i++)
for (j = 0; j <= N-1; j++)
dst[j*N+i] = src[i*N+j];
for (i = 0; i <= N - 1; i++)
for (j = 0; j <= N - 1; j++)
dst[j * N + i] = src[i * N + j];
}

void effective_transpose(int* dst, int* src, int N) {
void effective_transpose(int *dst, int *src, int N) {
int i, j, a, b;

for (i = 0; i <= N-BLOCK; i+=BLOCK)
for (j = 0; j <= N-BLOCK; j+=BLOCK)
for (a = i; a < i+BLOCK; a++)
for (b = j; b < j+BLOCK; b++)
dst[b*N+a] = src[a*N+b];
for (i = 0; i <= N - BLOCK; i += BLOCK)
for (j = 0; j <= N - BLOCK; j += BLOCK)
for (a = i; a < i + BLOCK; a++)
for (b = j; b < j + BLOCK; b++)
dst[b * N + a] = src[a * N + b];

int offset = i;

for (; i <= N-1; i++)
for (; j <= N-1; j++)
dst[j*N+i] = src[i*N+j];
for (i = offset; i <= N - 1; i++)
for (j = 0; j < offset; j += BLOCK)
for (b = j; b < j + BLOCK; b++)
dst[b * N + i] = src[i * N + b];

for (i = 0; i <= N - 1; i++)
for (j = offset; j <= N - 1; j++)
dst[j * N + i] = src[i * N + j];
}

void test(void) {
int* d = (int*)malloc(sizeof(int)*LEN);
int* s = (int*)malloc(sizeof(int)*LEN);
int *d = (int *)malloc(MEM_SIZE);
int *s = (int *)malloc(MEM_SIZE);
randomize((void *)s, MEM_SIZE);

transpose(d, s, MAX);
transpose(d, s, MATRIX_N);

for (int i = 0; i < MAX; i++)
for (int j = 0; j < MAX; j++)
assert(s[i*MAX+j] == d[j*MAX+i]);
for (int i = 0; i < MATRIX_N; i++)
for (int j = 0; j < MATRIX_N; j++)
assert(s[i * MATRIX_N + j] == d[j * MATRIX_N + i]);

effective_transpose(d, s, MAX);
memset(d, 0, MEM_SIZE);
effective_transpose(d, s, MATRIX_N);

for (int i = 0; i < MAX; i++)
for (int j = 0; j < MAX; j++)
assert(s[i*MAX+j] == d[j*MAX+i]);
for (int i = 0; i < MATRIX_N; i++)
for (int j = 0; j < MATRIX_N; j++)
assert(s[i * MATRIX_N + j] == d[j * MATRIX_N + i]);

free((void*)d);
free((void*)s);
free((void *)d);
free((void *)s);
}

void prof(void) {
int* d = (int*)malloc(sizeof(int)*LEN);
int* s = (int*)malloc(sizeof(int)*LEN);
int *d = (int *)malloc(MEM_SIZE);
int *s = (int *)malloc(MEM_SIZE);

for (int c = 0; c < LOOP; c++)
transpose(d, s, MAX);
for (int c = 0; c < LOOP; c++) transpose(d, s, MATRIX_N);

free((void*)d);
free((void*)s);
free((void *)d);
free((void *)s);
}

void prof_effect(void) {
int* d = (int*)malloc(sizeof(int)*LEN);
int* s = (int*)malloc(sizeof(int)*LEN);
int *d = (int *)malloc(MEM_SIZE);
int *s = (int *)malloc(MEM_SIZE);

for (int c = 0; c < LOOP; c++)
effective_transpose(d, s, MAX);
for (int c = 0; c < LOOP; c++) effective_transpose(d, s, MATRIX_N);

free((void*)d);
free((void*)s);
free((void *)d);
free((void *)s);
}

int main(int argc, char* argv[]) {
/*test();*/

/*prof();*/
int main(int argc, char *argv[]) {
test();

prof_effect();
/* prof(); */
/* prof_effect(); */

return 0;
}


0 comments on commit 68a3981

Please sign in to comment.