/* * Original author: UNKNOWN * * Modified: Kai Shen (January 2010) */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include /* #define DEBUG */ #define SWAP(a, b) \ { \ double tmp; \ tmp = a; \ a = b; \ b = tmp; \ } /* Solve the equation: * matrix * X = R */ double **matrix, *X, *R; /* Pre-set solution. */ double *X__; /* Initialize the matirx. */ int initMatrix(const char *fname) { FILE *file; int l1, l2, l3; double d; int nsize; int i, j; double *tmp; char buffer[1024]; if ((file = fopen(fname, "r")) == NULL) { fprintf(stderr, "The matrix file open error\n"); exit(-1); } /* Parse the first line to get the matrix size. */ fgets(buffer, 1024, file); sscanf(buffer, "%d %d %d", &l1, &l2, &l3); nsize = l1; #ifdef DEBUG fprintf(stdout, "matrix size is %d\n", nsize); #endif /* Initialize the space and set all elements to zero. */ matrix = (double **)malloc(nsize * sizeof(double *)); assert(matrix != NULL); tmp = (double *)malloc(nsize * nsize * sizeof(double)); assert(tmp != NULL); for (i = 0; i < nsize; i++) { matrix[i] = tmp; tmp = tmp + nsize; } for (i = 0; i < nsize; i++) { for (j = 0; j < nsize; j++) { matrix[i][j] = 0.0; } } /* Parse the rest of the input file to fill the matrix. */ for (;;) { fgets(buffer, 1024, file); sscanf(buffer, "%d %d %lf", &l1, &l2, &d); if (l1 == 0) break; matrix[l1 - 1][l2 - 1] = d; #ifdef DEBUG fprintf(stdout, "row %d column %d of matrix is %e\n", l1 - 1, l2 - 1, matrix[l1 - 1][l2 - 1]); #endif } fclose(file); return nsize; } /* Initialize the right-hand-side following the pre-set solution. */ void initRHS(int nsize) { int i, j; X__ = (double *)malloc(nsize * sizeof(double)); assert(X__ != NULL); for (i = 0; i < nsize; i++) { X__[i] = i + 1; } R = (double *)malloc(nsize * sizeof(double)); assert(R != NULL); for (i = 0; i < nsize; i++) { R[i] = 0.0; for (j = 0; j < nsize; j++) { R[i] += matrix[i][j] * X__[j]; } } } /* Initialize the results. */ void initResult(int nsize) { int i; X = (double *)malloc(nsize * sizeof(double)); assert(X != NULL); for (i = 0; i < nsize; i++) { X[i] = 0.0; } } int task_num = 1; int nsize = 0; pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t cond = PTHREAD_COND_INITIALIZER; /* references: sor_pthread.c */ void barrier(int expect) { static int arrived = 0; pthread_mutex_lock(&mut); // lock arrived++; if (arrived < expect) { pthread_cond_wait(&cond, &mut); } else { arrived = 0; // reset the barrier before broadcast is important pthread_cond_broadcast(&cond); } pthread_mutex_unlock(&mut); // unlock } int *globalMax; int *maxArr; /* Get the pivot - the element on column with largest absolute value. */ void getPivot(int nsize, int currow, int task_id) { int i, pivotrow; pivotrow = currow; for (i = currow + 1; i < nsize; i++) { if (i % task_num != task_id) { continue; } if (fabs(matrix[i][currow]) > fabs(matrix[pivotrow][currow])) { pivotrow = i; } } maxArr[task_id] = pivotrow; barrier(task_num); if (task_id == 0) { *globalMax = maxArr[0]; for (i = 1; i < task_num; i++) { if (fabs(matrix[*globalMax][currow]) < fabs(matrix[maxArr[i]][currow])) { *globalMax = maxArr[i]; } } if (fabs(matrix[*globalMax][currow]) == 0.0) { fprintf(stderr, "The matrix is singular\n"); exit(-1); } if (*globalMax != currow) { #ifdef DEBUG fprintf(stdout, "pivot row at step %5d is %5d\n", currow, pivotrow); #endif for (i = currow; i < nsize; i++) { SWAP(matrix[*globalMax][i], matrix[currow][i]); } SWAP(R[*globalMax], R[currow]); *globalMax = 0; } } } void errexit(const char *err_str) { fprintf(stderr, "%s", err_str); exit(1); } //======================= TODO: Assignement Part 2 and 3======================= /* For all the rows, get the pivot and eliminate all rows and columns * for that particular pivot row. */ void *computeGaussThreadCyclical(void *lp) { /* TODO: Assignment part 2. Implement gauss thread cyclical */ // Comment the exit(1) when completing the assignment exit(1); // This line should be commented when filling this function return NULL; } void *computeGaussThreadStripe(void *lp) { /* TODO: Assignment part 3. Implement gauss striped.*/ exit(1); // This line should be commented when filling this function return NULL; } //============================================================================== /* Solve the equation. */ void solveGauss(int nsize) { int i, j; X[nsize - 1] = R[nsize - 1]; for (i = nsize - 2; i >= 0; i--) { X[i] = R[i]; for (j = nsize - 1; j > i; j--) { X[i] -= matrix[i][j] * X[j]; } } #ifdef DEBUG fprintf(stdout, "X = ["); for (i = 0; i < nsize; i++) { fprintf(stdout, "%.6f ", X[i]); } fprintf(stdout, "];\n"); #endif } int main(int argc, char *argv[]) { // ===== DECLRATION ===== int i, c; struct timeval start, finish; double error; /* use stripe or not */ int isStripe = 0; // thread related var pthread_attr_t attr; pthread_t *tid; int *id; globalMax = (int *)malloc(sizeof(int)); int s; cpu_set_t cpuset; while ((c = getopt(argc, argv, "m:p:s")) != -1) { switch (c) { case 'm': nsize = initMatrix(optarg); break; case 's': isStripe = 1; break; case 'p': task_num = atoi(optarg); break; } } if (nsize == 0) { errexit("-m is mandatory\n"); } // ===== INITIALIZATION ===== initRHS(nsize); initResult(nsize); maxArr = (int *)malloc(sizeof(int) * task_num); int process_num = sysconf(_SC_NPROCESSORS_ONLN); gettimeofday(&start, 0); // ===== Bind the threads. // bind the main process to the core 0 CPU_ZERO(&cpuset); CPU_SET(0, &cpuset); s = sched_setaffinity(getpid(), sizeof(cpu_set_t), &cpuset); if (s != 0) { errexit("self sched_setaffinity error\n"); } // ===== Create P-1 worker threads id = (int *)malloc(sizeof(int) * task_num); tid = (pthread_t *)malloc(sizeof(pthread_t) * task_num); if (!id || !tid) { errexit("out of shared memory"); } pthread_attr_init(&attr); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); for (i = 1; i < task_num; i++) { id[i] = i; CPU_ZERO(&cpuset); // need to reset it each time CPU_SET(i % process_num, &cpuset); // mod the i if the # of threads # of cores s = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); if (s != 0) { errexit("pthread_setaffinity_np error\n"); } if (isStripe) { pthread_create(&tid[i], &attr, computeGaussThreadStripe, &id[i]); } else { pthread_create(&tid[i], &attr, computeGaussThreadCyclical, &id[i]); } } //==================== Assigning work to the main thread ================== id[0] = 0; if (isStripe) { computeGaussThreadStripe(&id[0]); } else { computeGaussThreadCyclical(&id[0]); } for (i = 1; i < task_num; i++) { pthread_join(tid[i], NULL); } gettimeofday(&finish, 0); solveGauss(nsize); fprintf(stdout, "Time: %f seconds\n", (finish.tv_sec - start.tv_sec) + (finish.tv_usec - start.tv_usec) * 0.000001); error = 0.0; for (i = 0; i < nsize; i++) { double error__ = (X__[i] == 0.0) ? 1.0 : fabs((X[i] - X__[i]) / X__[i]); if (error < error__) { error = error__; } } fprintf(stdout, "Error: %e\n", error); return 0; }