00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #include <ProfileSPDLinDirectThreadSolver.h>
00040 #include <ProfileSPDLinSOE.h>
00041 #include <math.h>
00042 #include <Channel.h>
00043 #include <FEM_ObjectBroker.h>
00044
00045 #define _REENTRANT
00046
00047 #include <thread.h>
00048
00049 #include <Timer.h>
00050
00051
00052
00053 struct thread_control_block {
00054 mutex_t start_mutex;
00055 mutex_t end_mutex;
00056 mutex_t startBlock_mutex;
00057 mutex_t endBlock_mutex;
00058
00059 cond_t start_cond;
00060 cond_t end_cond;
00061 cond_t startBlock_cond;
00062 cond_t endBlock_cond;
00063
00064 double *A,*X,*B;
00065 int *iDiagLoc;
00066 int size;
00067 int blockSize;
00068 int maxColHeight;
00069 double minDiagTol;
00070 int *RowTop;
00071 double **topRowPtr, *invD;
00072
00073 int currentBlock;
00074 int workToDo;
00075 int info;
00076 int threadID;
00077 int numThreads;
00078 int threadsDone;
00079 int threadsDoneBlock;
00080 } TCB_ProfileSPDDirectThreadSolver;
00081
00082
00083 void *ProfileSPDLinDirectThreadSolver_Worker(void *arg);
00084
00085 ProfileSPDLinDirectThreadSolver::ProfileSPDLinDirectThreadSolver()
00086 :ProfileSPDLinSolver(SOLVER_TAGS_ProfileSPDLinDirectThreadSolver),
00087 NP(2), running(false),
00088 minDiagTol(1.0e-12), blockSize(4), maxColHeight(0),
00089 size(0), RowTop(0), topRowPtr(0), invD(0)
00090 {
00091
00092 }
00093
00094 ProfileSPDLinDirectThreadSolver::ProfileSPDLinDirectThreadSolver
00095 (int numProcessors, int blckSize, double tol)
00096 :ProfileSPDLinSolver(SOLVER_TAGS_ProfileSPDLinDirectThreadSolver),
00097 NP(numProcessors), running(false),
00098 minDiagTol(tol), blockSize(blckSize), maxColHeight(0),
00099 size(0), RowTop(0), topRowPtr(0), invD(0)
00100 {
00101
00102 }
00103
00104
00105 ProfileSPDLinDirectThreadSolver::~ProfileSPDLinDirectThreadSolver()
00106 {
00107 if (RowTop != 0) delete [] RowTop;
00108 if (topRowPtr != 0) free((void *)topRowPtr);
00109 if (invD != 0) delete [] invD;
00110 }
00111
00112 int
00113 ProfileSPDLinDirectThreadSolver::setSize(void)
00114 {
00115 if (theSOE == 0) {
00116 cerr << "ProfileSPDLinDirectThreadSolver::setSize()";
00117 cerr << " No system has been set\n";
00118 return -1;
00119 }
00120
00121
00122 if (theSOE->size == 0)
00123 return 0;
00124 if (size != theSOE->size) {
00125 size = theSOE->size;
00126
00127 if (RowTop != 0) delete [] RowTop;
00128 if (topRowPtr != 0) delete [] topRowPtr;
00129 if (invD != 0) delete [] invD;
00130
00131 RowTop = new int[size];
00132
00133
00134 topRowPtr = (double **)malloc(size *sizeof(double *));
00135
00136 invD = new double[size];
00137
00138 if (RowTop == 0 || topRowPtr == 0 || invD == 0) {
00139 cerr << "Warning :ProfileSPDLinDirectThreadSolver::ProfileSPDLinDirectThreadSolver :";
00140 cerr << " ran out of memory for work areas \n";
00141 return -1;
00142 }
00143 }
00144
00145
00146
00147 double *A = theSOE->A;
00148 int *iDiagLoc = theSOE->iDiagLoc;
00149
00150
00151
00152 maxColHeight = 0;
00153 RowTop[0] = 0;
00154 topRowPtr[0] = A;
00155 for (int j=1; j<size; j++) {
00156 int icolsz = iDiagLoc[j] - iDiagLoc[j-1];
00157 if (icolsz > maxColHeight) maxColHeight = icolsz;
00158 RowTop[j] = j - icolsz + 1;
00159 topRowPtr[j] = &A[iDiagLoc[j-1]];
00160 }
00161
00162 size = theSOE->size;
00163 return 0;
00164 }
00165
00166
00167 int
00168 ProfileSPDLinDirectThreadSolver::solve(void)
00169 {
00170
00171 if (theSOE == 0) {
00172 cerr << "ProfileSPDLinDirectThreadSolver::solve(void): ";
00173 cerr << " - No ProfileSPDSOE has been assigned\n";
00174 return -1;
00175 }
00176
00177 if (theSOE->size == 0)
00178 return 0;
00179
00180
00181 double *A = theSOE->A;
00182 double *B = theSOE->B;
00183 double *X = theSOE->X;
00184 int *iDiagLoc = theSOE->iDiagLoc;
00185 int size = theSOE->size;
00186
00187
00188 for (int ii=0; ii<size; ii++)
00189 X[ii] = B[ii];
00190
00191 if (theSOE->isAfactored == false) {
00192
00193 if (running == false) {
00194 mutex_init(&TCB_ProfileSPDDirectThreadSolver.start_mutex, USYNC_THREAD, 0);
00195 mutex_init(&TCB_ProfileSPDDirectThreadSolver.end_mutex, USYNC_THREAD, 0);
00196 mutex_init(&TCB_ProfileSPDDirectThreadSolver.startBlock_mutex, USYNC_THREAD, 0);
00197 mutex_init(&TCB_ProfileSPDDirectThreadSolver.endBlock_mutex, USYNC_THREAD, 0);
00198 cond_init(&TCB_ProfileSPDDirectThreadSolver.start_cond, USYNC_THREAD, 0);
00199 cond_init(&TCB_ProfileSPDDirectThreadSolver.end_cond, USYNC_THREAD, 0);
00200 cond_init(&TCB_ProfileSPDDirectThreadSolver.startBlock_cond, USYNC_THREAD, 0);
00201 cond_init(&TCB_ProfileSPDDirectThreadSolver.endBlock_cond, USYNC_THREAD, 0);
00202 TCB_ProfileSPDDirectThreadSolver.workToDo = 0;
00203
00204
00205
00206 for (int j = 0; j < NP; j++)
00207 thr_create(NULL,0, ProfileSPDLinDirectThreadSolver_Worker,
00208 NULL, THR_BOUND|THR_DAEMON, NULL);
00209
00210 running = true;
00211 }
00212
00213
00214 mutex_lock(&TCB_ProfileSPDDirectThreadSolver.start_mutex);
00215
00216 invD[0] = 1.0/A[0];
00217
00218
00219 TCB_ProfileSPDDirectThreadSolver.A = A;
00220 TCB_ProfileSPDDirectThreadSolver.X = X;
00221 TCB_ProfileSPDDirectThreadSolver.B = B;
00222 TCB_ProfileSPDDirectThreadSolver.size = size;
00223 TCB_ProfileSPDDirectThreadSolver.minDiagTol = minDiagTol;
00224 TCB_ProfileSPDDirectThreadSolver.maxColHeight = maxColHeight;
00225 TCB_ProfileSPDDirectThreadSolver.RowTop = RowTop;
00226 TCB_ProfileSPDDirectThreadSolver.topRowPtr = topRowPtr;
00227 TCB_ProfileSPDDirectThreadSolver.invD = invD;
00228 TCB_ProfileSPDDirectThreadSolver.blockSize = blockSize;
00229 TCB_ProfileSPDDirectThreadSolver.iDiagLoc = iDiagLoc;
00230
00231 TCB_ProfileSPDDirectThreadSolver.info = 0;
00232 TCB_ProfileSPDDirectThreadSolver.workToDo = NP;
00233 TCB_ProfileSPDDirectThreadSolver.currentBlock = -1;
00234 TCB_ProfileSPDDirectThreadSolver.threadID = 0;
00235 TCB_ProfileSPDDirectThreadSolver.numThreads = NP;
00236 TCB_ProfileSPDDirectThreadSolver.threadsDone = 0;
00237 TCB_ProfileSPDDirectThreadSolver.threadsDoneBlock = NP;
00238
00239
00240 cond_broadcast(&TCB_ProfileSPDDirectThreadSolver.start_cond);
00241 mutex_unlock(&TCB_ProfileSPDDirectThreadSolver.start_mutex);
00242
00243
00244 thr_yield();
00245
00246
00247 mutex_lock(&TCB_ProfileSPDDirectThreadSolver.end_mutex);
00248
00249 while (TCB_ProfileSPDDirectThreadSolver.threadsDone < NP)
00250 cond_wait(&TCB_ProfileSPDDirectThreadSolver.end_cond, &TCB_ProfileSPDDirectThreadSolver.end_mutex);
00251
00252 mutex_unlock(&TCB_ProfileSPDDirectThreadSolver.end_mutex);
00253
00254
00255 theSOE->isAfactored = true;
00256
00257
00258 double *bjPtr = X;
00259 double *aiiPtr = invD;
00260 for (int j=0; j<size; j++)
00261 *bjPtr++ = *aiiPtr++ * X[j];
00262
00263
00264
00265 for (int k=(size-1); k>0; k--) {
00266
00267 int rowktop = RowTop[k];
00268 double bk = X[k];
00269 double *ajiPtr = topRowPtr[k];
00270
00271 for (int j=rowktop; j<k; j++)
00272 X[j] -= *ajiPtr++ * bk;
00273 }
00274 } else {
00275
00276
00277 for (int i=1; i<size; i++) {
00278
00279 int rowitop = RowTop[i];
00280 double *ajiPtr = topRowPtr[i];
00281 double *bjPtr = &X[rowitop];
00282 double tmp = 0;
00283
00284 for (int j=rowitop; j<i; j++)
00285 tmp -= *ajiPtr++ * *bjPtr++;
00286
00287 X[i] += tmp;
00288 }
00289
00290
00291 double *bjPtr = X;
00292 double *aiiPtr = invD;
00293 for (int j=0; j<size; j++)
00294 *bjPtr++ = *aiiPtr++ * X[j];
00295
00296
00297
00298 for (int k=(size-1); k>0; k--) {
00299
00300 int rowktop = RowTop[k];
00301 double bk = X[k];
00302 double *ajiPtr = topRowPtr[k];
00303
00304 for (int j=rowktop; j<k; j++)
00305 X[j] -= *ajiPtr++ * bk;
00306 }
00307 }
00308 return 0;
00309 }
00310
00311 int
00312 ProfileSPDLinDirectThreadSolver::setProfileSOE(ProfileSPDLinSOE &theNewSOE)
00313 {
00314 if (theSOE != 0) {
00315 cerr << "ProfileSPDLinDirectThreadSolver::setProfileSOE() - ";
00316 cerr << " has already been called \n";
00317 return -1;
00318 }
00319
00320 theSOE = &theNewSOE;
00321 return 0;
00322 }
00323
00324 int
00325 ProfileSPDLinDirectThreadSolver::sendSelf(int cTag,
00326 Channel &theChannel)
00327 {
00328 if (size != 0)
00329 cerr << "ProfileSPDLinDirectThreadSolver::sendSelf - does not send itself YET\n";
00330 return 0;
00331 }
00332
00333
00334 int
00335 ProfileSPDLinDirectThreadSolver::recvSelf(int cTag,
00336 Channel &theChannel,
00337 FEM_ObjectBroker &theBroker)
00338 {
00339 return 0;
00340 }
00341
00342
00343
00344 void *ProfileSPDLinDirectThreadSolver_Worker(void *arg)
00345 {
00346
00347 while(true)
00348 {
00349
00350
00351 mutex_lock(&TCB_ProfileSPDDirectThreadSolver.start_mutex);
00352
00353 while (TCB_ProfileSPDDirectThreadSolver.workToDo == 0)
00354 cond_wait(&TCB_ProfileSPDDirectThreadSolver.start_cond, &TCB_ProfileSPDDirectThreadSolver.start_mutex);
00355
00356 TCB_ProfileSPDDirectThreadSolver.workToDo--;
00357
00358
00359 int myID = TCB_ProfileSPDDirectThreadSolver.threadID++;
00360
00361 mutex_unlock(&TCB_ProfileSPDDirectThreadSolver.start_mutex);
00362
00363
00364 double *A = TCB_ProfileSPDDirectThreadSolver.A;
00365 double *B = TCB_ProfileSPDDirectThreadSolver.B;
00366 double *X = TCB_ProfileSPDDirectThreadSolver.X;
00367 int *iDiagLoc = TCB_ProfileSPDDirectThreadSolver.iDiagLoc;
00368 int size = TCB_ProfileSPDDirectThreadSolver.size;
00369 double minDiagTol = TCB_ProfileSPDDirectThreadSolver.minDiagTol;
00370 int maxColHeight = TCB_ProfileSPDDirectThreadSolver.maxColHeight;
00371 int *RowTop = TCB_ProfileSPDDirectThreadSolver.RowTop;
00372 double **topRowPtr = TCB_ProfileSPDDirectThreadSolver.topRowPtr;
00373 double *invD = TCB_ProfileSPDDirectThreadSolver.invD;
00374 int blockSize = TCB_ProfileSPDDirectThreadSolver.blockSize;
00375
00376 int currentBlock =0;
00377 int numThreads = TCB_ProfileSPDDirectThreadSolver.numThreads;
00378
00379
00380
00381 int startRow = 0;
00382 int lastRow = startRow+blockSize-1;
00383 int lastColEffected = lastRow+maxColHeight -1;
00384 int nBlck = size/blockSize;
00385 if ((size % blockSize) != 0)
00386 nBlck++;
00387
00388
00389 for (int i=0; i<nBlck; i++) {
00390
00391 int currentDiagThread = i%numThreads;
00392 if (myID == currentDiagThread) {
00393
00394
00395 int j;
00396 for (j=0; j<blockSize; j++) {
00397 int currentRow = startRow + j;
00398
00399 if (currentRow < size) {
00400
00401 int rowjTop = RowTop[currentRow];
00402
00403 double *akjPtr = topRowPtr[currentRow];
00404 int maxRowijTop;
00405 if (rowjTop < startRow) {
00406 akjPtr += startRow-rowjTop;
00407 maxRowijTop = startRow;
00408 } else
00409 maxRowijTop = rowjTop;
00410
00411 int k;
00412 for (k=maxRowijTop; k<currentRow; k++) {
00413 double tmp = *akjPtr;
00414 int rowkTop = RowTop[k];
00415 int maxRowkjTop;
00416 double *alkPtr, *aljPtr;
00417 if (rowkTop < rowjTop) {
00418 alkPtr = topRowPtr[k] + (rowjTop - rowkTop);
00419 aljPtr = topRowPtr[currentRow];
00420 maxRowkjTop = rowjTop;
00421 } else {
00422 alkPtr = topRowPtr[k];
00423 aljPtr = topRowPtr[currentRow] + (rowkTop - rowjTop);
00424 maxRowkjTop = rowkTop;
00425 }
00426
00427 for (int l = maxRowkjTop; l<k; l++)
00428 tmp -= *alkPtr++ * *aljPtr++;
00429
00430 *akjPtr++ = tmp;
00431 }
00432
00433 double ajj = *akjPtr;
00434 akjPtr = topRowPtr[currentRow];
00435
00436 double *bjPtr = &X[rowjTop];
00437 double tmp = 0;
00438
00439 for (k=rowjTop; k<currentRow; k++){
00440 double akj = *akjPtr;
00441 double lkj = akj * invD[k];
00442 tmp -= lkj * *bjPtr++;
00443 *akjPtr++ = lkj;
00444 ajj = ajj -lkj * akj;
00445 }
00446
00447 X[currentRow] += tmp;
00448
00449
00450 if (ajj <= 0.0) {
00451 cerr << "ProfileSPDLinDirectThreadSolver::solve() - ";
00452 cerr << " aii < 0 (i, aii): (" << currentRow << ", " << ajj << ")\n";
00453 TCB_ProfileSPDDirectThreadSolver.info = -2;
00454 return NULL;
00455 }
00456 if (ajj <= minDiagTol) {
00457 cerr << "ProfileSPDLinDirectThreadSolver::solve() - ";
00458 cerr << " aii < minDiagTol (i, aii): (" << currentRow;
00459 cerr << ", " << ajj << ")\n";
00460 TCB_ProfileSPDDirectThreadSolver.info = -2;
00461 return NULL;
00462 }
00463 invD[currentRow] = 1.0/ajj;
00464
00465 } else
00466 j = blockSize;
00467
00468 }
00469
00470
00471 mutex_lock(&TCB_ProfileSPDDirectThreadSolver.startBlock_mutex);
00472 TCB_ProfileSPDDirectThreadSolver.currentBlock = i;
00473
00474 cond_broadcast(&TCB_ProfileSPDDirectThreadSolver.startBlock_cond);
00475 mutex_unlock(&TCB_ProfileSPDDirectThreadSolver.startBlock_mutex);
00476
00477
00478
00479
00480 int currentCol = startRow + blockSize;
00481 for (j=i+1; j<nBlck; j++) {
00482
00483 if (j%numThreads == myID) {
00484
00485 for (int k=0; k<blockSize; k++) {
00486
00487 if (currentCol < size) {
00488
00489 int rowkTop = RowTop[currentCol];
00490 double *alkPtr = topRowPtr[currentCol];
00491
00492 int maxRowikTop;
00493 if (rowkTop < startRow) {
00494 alkPtr += startRow-rowkTop;
00495 maxRowikTop = startRow;
00496 } else
00497 maxRowikTop = rowkTop;
00498
00499 for (int l=maxRowikTop; l<=lastRow; l++) {
00500 double tmp = *alkPtr;
00501 int rowlTop = RowTop[l];
00502 int maxRowklTop;
00503 double *amlPtr, *amkPtr;
00504 if (rowlTop < rowkTop) {
00505 amlPtr = topRowPtr[l] + (rowkTop - rowlTop);
00506 amkPtr = topRowPtr[currentCol];
00507 maxRowklTop = rowkTop;
00508 } else {
00509 amlPtr = topRowPtr[l];
00510 amkPtr = topRowPtr[currentCol] + (rowlTop - rowkTop);
00511 maxRowklTop = rowlTop;
00512 }
00513
00514 for (int m = maxRowklTop; m<l; m++)
00515 tmp -= *amkPtr++ * *amlPtr++;
00516
00517 *alkPtr++ = tmp;
00518 }
00519 currentCol++;
00520 if (currentCol > lastColEffected) {
00521 k = blockSize;
00522 j = nBlck;
00523 }
00524
00525 } else
00526 k = blockSize;
00527 }
00528 } else
00529 currentCol += blockSize;
00530 }
00531 } else {
00532
00533 mutex_lock(&TCB_ProfileSPDDirectThreadSolver.startBlock_mutex);
00534 while (TCB_ProfileSPDDirectThreadSolver.currentBlock < i)
00535 cond_wait(&TCB_ProfileSPDDirectThreadSolver.startBlock_cond, &TCB_ProfileSPDDirectThreadSolver.startBlock_mutex);
00536 mutex_unlock(&TCB_ProfileSPDDirectThreadSolver.startBlock_mutex);
00537
00538
00539
00540
00541 int currentCol = startRow + blockSize;
00542 for (int j=i+1; j<nBlck; j++) {
00543
00544 if (j%numThreads == myID) {
00545
00546 for (int k=0; k<blockSize; k++) {
00547
00548 if (currentCol < size) {
00549
00550 int rowkTop = RowTop[currentCol];
00551 double *alkPtr = topRowPtr[currentCol];
00552
00553 int maxRowikTop;
00554 if (rowkTop < startRow) {
00555 alkPtr += startRow-rowkTop;
00556 maxRowikTop = startRow;
00557 } else
00558 maxRowikTop = rowkTop;
00559
00560 for (int l=maxRowikTop; l<=lastRow; l++) {
00561 double tmp = *alkPtr;
00562 int rowlTop = RowTop[l];
00563 int maxRowklTop;
00564 double *amlPtr, *amkPtr;
00565 if (rowlTop < rowkTop) {
00566 amlPtr = topRowPtr[l] + (rowkTop - rowlTop);
00567 amkPtr = topRowPtr[currentCol];
00568 maxRowklTop = rowkTop;
00569 } else {
00570 amlPtr = topRowPtr[l];
00571 amkPtr = topRowPtr[currentCol] + (rowlTop - rowkTop);
00572 maxRowklTop = rowlTop;
00573 }
00574
00575 for (int m = maxRowklTop; m<l; m++)
00576 tmp -= *amkPtr++ * *amlPtr++;
00577
00578 *alkPtr++ = tmp;
00579 }
00580 currentCol++;
00581 if (currentCol > lastColEffected) {
00582 k = blockSize;
00583 j = nBlck;
00584 }
00585
00586 } else
00587 k = blockSize;
00588 }
00589 } else
00590 currentCol += blockSize;
00591
00592 }
00593
00594 }
00595
00596
00597 startRow += blockSize;
00598 lastRow = startRow + blockSize -1;
00599 lastColEffected = lastRow + maxColHeight -1;
00600 }
00601
00602
00603 mutex_lock(&TCB_ProfileSPDDirectThreadSolver.end_mutex);
00604 TCB_ProfileSPDDirectThreadSolver.threadsDone++;
00605
00606 cond_signal(&TCB_ProfileSPDDirectThreadSolver.end_cond);
00607 mutex_unlock(&TCB_ProfileSPDDirectThreadSolver.end_mutex);
00608 }
00609 }