00001 #include "numchol.h"
00002
00003 static void UpdSnode(int m,
00004 int n,
00005 int s,
00006 double diaga[],
00007 double *a,
00008 int fira[],
00009 double diagb[],
00010 double *b,
00011 int firb[])
00012 {
00013 int i,k,t,sze;
00014 double rtemp1, rtemp2, rtemp3, rtemp4,
00015 rtemp5, rtemp6, rtemp7, rtemp8,
00016 rtemp9, rtemp10,rtemp11,rtemp12,
00017 rtemp13,rtemp14,rtemp15,rtemp16,
00018 *a1,*a2, *a3, *a4, *a5, *a6, *a7, *a8,
00019 *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16,
00020 *b00,*b1,*b0;
00021
00022 for(i=0; i+1<s; i+=2) {
00023 b00 = b+firb[i];
00024 b0 = b00+1;
00025 b1 = b+firb[i+1];
00026 sze = m-i-2;
00027 k = 0;
00028
00029 for(; k+3<n; k+=4) {
00030 a1 = a+(fira[k+0 ]+i);
00031 a2 = a+(fira[k+1 ]+i);
00032 a3 = a+(fira[k+2 ]+i);
00033 a4 = a+(fira[k+3 ]+i);
00034
00035 rtemp1 = a1[0]/diaga[k+0];
00036 rtemp2 = a2[0]/diaga[k+1];
00037 rtemp3 = a3[0]/diaga[k+2];
00038 rtemp4 = a4[0]/diaga[k+3];
00039
00040 diagb[i] -= rtemp1 * a1[0]
00041 + rtemp2 * a2[0]
00042 + rtemp3 * a3[0]
00043 + rtemp4 * a4[0];
00044
00045 ++ a1;
00046 ++ a2;
00047 ++ a3;
00048 ++ a4;
00049
00050 rtemp5 = a1[0]/diaga[k+0];
00051 rtemp6 = a2[0]/diaga[k+1];
00052 rtemp7 = a3[0]/diaga[k+2];
00053 rtemp8 = a4[0]/diaga[k+3];
00054
00055 b00[0] -= rtemp1 * a1[0]
00056 + rtemp2 * a2[0]
00057 + rtemp3 * a3[0]
00058 + rtemp4 * a4[0];
00059
00060 diagb[i+1] -= rtemp5 * a1[0]
00061 + rtemp6 * a2[0]
00062 + rtemp7 * a3[0]
00063 + rtemp8 * a4[0];
00064
00065 ++ a1;
00066 ++ a2;
00067 ++ a3;
00068 ++ a4;
00069
00070 for(t=0; t<sze; ++t) {
00071 b0[t] -= rtemp1 * a1[t]
00072 + rtemp2 * a2[t]
00073 + rtemp3 * a3[t]
00074 + rtemp4 * a4[t];
00075
00076 b1[t] -= rtemp5 * a1[t]
00077 + rtemp6 * a2[t]
00078 + rtemp7 * a3[t]
00079 + rtemp8 * a4[t];
00080 }
00081 }
00082
00083 for(; k+1<n; k+=2) {
00084 a1 = a+(fira[k+0 ]+i);
00085 a2 = a+(fira[k+1 ]+i);
00086
00087 rtemp1 = a1[0] / diaga[k+0];
00088 rtemp2 = a2[0] / diaga[k+1];
00089
00090 diagb[i] -= a1[0] * rtemp1
00091 + a2[0] * rtemp2;
00092
00093 ++ a1;
00094 ++ a2;
00095
00096 rtemp5 = a1[0] / diaga[k+0];
00097 rtemp6 = a2[0] / diaga[k+1];
00098
00099 b00[0] -= a1[0] * rtemp1
00100 + a2[0] * rtemp2;
00101
00102 diagb[i+1] -= a1[0] * rtemp5
00103 + a2[0] * rtemp6;
00104
00105
00106 ++ a1;
00107 ++ a2;
00108
00109 for(t=0; t<sze; ++t) {
00110 b0[t] -= a1[t] * rtemp1
00111 + a2[t] * rtemp2;
00112
00113 b1[t] -= a1[t] * rtemp5
00114 + a2[t] * rtemp6;
00115 }
00116 }
00117
00118 for(; k<n; ++k) {
00119 a1 = a+(fira[k+0 ]+i);
00120
00121 rtemp1 = a1[0] / diaga[k+0];
00122
00123 diagb[i] -= a1[0] * rtemp1;
00124
00125 ++ a1;
00126
00127 rtemp5 = a1[0] / diaga[k+0];
00128
00129 diagb[i+1] -= a1[0] * rtemp5;
00130
00131 b00[0] -= a1[0] * rtemp1;
00132
00133 ++ a1;
00134
00135 for(t=0; t<sze; ++t)
00136 {
00137 b0[t] -= rtemp1 * a1[t];
00138 b1[t] -= rtemp5 * a1[t];
00139 }
00140 }
00141 }
00142
00143 for(; i<s; ++i) {
00144
00145 b0 = b+firb[i];
00146 sze = m-i-1;
00147 k = 0;
00148
00149 for(; k+15<n; k+=16) {
00150 a1 = a+(fira[k+0 ]+i);
00151 a2 = a+(fira[k+1 ]+i);
00152 a3 = a+(fira[k+2 ]+i);
00153 a4 = a+(fira[k+3 ]+i);
00154 a5 = a+(fira[k+4 ]+i);
00155 a6 = a+(fira[k+5 ]+i);
00156 a7 = a+(fira[k+6 ]+i);
00157 a8 = a+(fira[k+7 ]+i);
00158 a9 = a+(fira[k+8 ]+i);
00159 a10 = a+(fira[k+9 ]+i);
00160 a11 = a+(fira[k+10]+i);
00161 a12 = a+(fira[k+11]+i);
00162 a13 = a+(fira[k+12]+i);
00163 a14 = a+(fira[k+13]+i);
00164 a15 = a+(fira[k+14]+i);
00165 a16 = a+(fira[k+15]+i);
00166
00167 rtemp1 = *a1/diaga[k+0];
00168 rtemp2 = *a2/diaga[k+1];
00169 rtemp3 = *a3/diaga[k+2];
00170 rtemp4 = *a4/diaga[k+3];
00171 rtemp5 = *a5/diaga[k+4];
00172 rtemp6 = *a6/diaga[k+5];
00173 rtemp7 = *a7/diaga[k+6];
00174 rtemp8 = *a8/diaga[k+7];
00175 rtemp9 = *a9/diaga[k+8];
00176 rtemp10 = *a10/diaga[k+9];
00177 rtemp11 = *a11/diaga[k+10];
00178 rtemp12 = *a12/diaga[k+11];
00179 rtemp13 = *a13/diaga[k+12];
00180 rtemp14 = *a14/diaga[k+13];
00181 rtemp15 = *a15/diaga[k+14];
00182 rtemp16 = *a16/diaga[k+15];
00183
00184 diagb[i] -= rtemp1 * (*a1)
00185 + rtemp2 * (*a2)
00186 + rtemp3 * (*a3)
00187 + rtemp4 * (*a4)
00188 + rtemp5 * (*a5)
00189 + rtemp6 * (*a6)
00190 + rtemp7 * (*a7)
00191 + rtemp8 * (*a8)
00192 + rtemp9 * (*a9)
00193 + rtemp10 * (*a10)
00194 + rtemp11 * (*a11)
00195 + rtemp12 * (*a12)
00196 + rtemp13 * (*a13)
00197 + rtemp14 * (*a14)
00198 + rtemp15 * (*a15)
00199 + rtemp16 * (*a16);
00200
00201
00202 ++ a1;
00203 ++ a2;
00204 ++ a3;
00205 ++ a4;
00206 ++ a5;
00207 ++ a6;
00208 ++ a7;
00209 ++ a8;
00210 ++ a9;
00211 ++ a10;
00212 ++ a11;
00213 ++ a12;
00214 ++ a13;
00215 ++ a14;
00216 ++ a15;
00217 ++ a16;
00218
00219 for(t=0; t<sze; ++t)
00220 b0[t] -= rtemp1 * a1[t]
00221 + rtemp2 * a2[t]
00222 + rtemp3 * a3[t]
00223 + rtemp4 * a4[t]
00224 + rtemp5 * a5[t]
00225 + rtemp6 * a6[t]
00226 + rtemp7 * a7[t]
00227 + rtemp8 * a8[t]
00228 + rtemp9 * a9[t]
00229 + rtemp10 * a10[t]
00230 + rtemp11 * a11[t]
00231 + rtemp12 * a12[t]
00232 + rtemp13 * a13[t]
00233 + rtemp14 * a14[t]
00234 + rtemp15 * a15[t]
00235 + rtemp16 * a16[t];
00236
00237 }
00238
00239 for(; k+11<n; k+=12) {
00240 a1 = a+(fira[k+0 ]+i);
00241 a2 = a+(fira[k+1 ]+i);
00242 a3 = a+(fira[k+2 ]+i);
00243 a4 = a+(fira[k+3 ]+i);
00244 a5 = a+(fira[k+4 ]+i);
00245 a6 = a+(fira[k+5 ]+i);
00246 a7 = a+(fira[k+6 ]+i);
00247 a8 = a+(fira[k+7 ]+i);
00248 a9 = a+(fira[k+8 ]+i);
00249 a10 = a+(fira[k+9 ]+i);
00250 a11 = a+(fira[k+10]+i);
00251 a12 = a+(fira[k+11]+i);
00252
00253 rtemp1 = *a1/diaga[k+0];
00254 rtemp2 = *a2/diaga[k+1];
00255 rtemp3 = *a3/diaga[k+2];
00256 rtemp4 = *a4/diaga[k+3];
00257 rtemp5 = *a5/diaga[k+4];
00258 rtemp6 = *a6/diaga[k+5];
00259 rtemp7 = *a7/diaga[k+6];
00260 rtemp8 = *a8/diaga[k+7];
00261 rtemp9 = *a9/diaga[k+8];
00262 rtemp10 = *a10/diaga[k+9];
00263 rtemp11 = *a11/diaga[k+10];
00264 rtemp12 = *a12/diaga[k+11];
00265
00266 diagb[i] -= rtemp1 * (*a1)
00267 + rtemp2 * (*a2)
00268 + rtemp3 * (*a3)
00269 + rtemp4 * (*a4)
00270 + rtemp5 * (*a5)
00271 + rtemp6 * (*a6)
00272 + rtemp7 * (*a7)
00273 + rtemp8 * (*a8)
00274 + rtemp9 * (*a9)
00275 + rtemp10 * (*a10)
00276 + rtemp11 * (*a11)
00277 + rtemp12 * (*a12);
00278
00279 ++ a1;
00280 ++ a2;
00281 ++ a3;
00282 ++ a4;
00283 ++ a5;
00284 ++ a6;
00285 ++ a7;
00286 ++ a8;
00287 ++ a9;
00288 ++ a10;
00289 ++ a11;
00290 ++ a12;
00291
00292 for(t=0; t<sze; ++t)
00293 b0[t] -= rtemp1 * a1[t]
00294 + rtemp2 * a2[t]
00295 + rtemp3 * a3[t]
00296 + rtemp4 * a4[t]
00297 + rtemp5 * a5[t]
00298 + rtemp6 * a6[t]
00299 + rtemp7 * a7[t]
00300 + rtemp8 * a8[t]
00301 + rtemp9 * a9[t]
00302 + rtemp10 * a10[t]
00303 + rtemp11 * a11[t]
00304 + rtemp12 * a12[t];
00305
00306 }
00307
00308 for(; k+7<n; k+=8) {
00309 a1 = a+(fira[k+0 ]+i);
00310 a2 = a+(fira[k+1 ]+i);
00311 a3 = a+(fira[k+2 ]+i);
00312 a4 = a+(fira[k+3 ]+i);
00313 a5 = a+(fira[k+4 ]+i);
00314 a6 = a+(fira[k+5 ]+i);
00315 a7 = a+(fira[k+6 ]+i);
00316 a8 = a+(fira[k+7 ]+i);
00317
00318 rtemp1 = *a1/diaga[k+0];
00319 rtemp2 = *a2/diaga[k+1];
00320 rtemp3 = *a3/diaga[k+2];
00321 rtemp4 = *a4/diaga[k+3];
00322 rtemp5 = *a5/diaga[k+4];
00323 rtemp6 = *a6/diaga[k+5];
00324 rtemp7 = *a7/diaga[k+6];
00325 rtemp8 = *a8/diaga[k+7];
00326
00327 diagb[i] -= rtemp1 * (*a1)
00328 + rtemp2 * (*a2)
00329 + rtemp3 * (*a3)
00330 + rtemp4 * (*a4)
00331 + rtemp5 * (*a5)
00332 + rtemp6 * (*a6)
00333 + rtemp7 * (*a7)
00334 + rtemp8 * (*a8);
00335
00336 ++ a1;
00337 ++ a2;
00338 ++ a3;
00339 ++ a4;
00340 ++ a5;
00341 ++ a6;
00342 ++ a7;
00343 ++ a8;
00344
00345 for(t=0; t<sze; ++t)
00346 b0[t] -= rtemp1 * a1[t]
00347 + rtemp2 * a2[t]
00348 + rtemp3 * a3[t]
00349 + rtemp4 * a4[t]
00350 + rtemp5 * a5[t]
00351 + rtemp6 * a6[t]
00352 + rtemp7 * a7[t]
00353 + rtemp8 * a8[t];
00354
00355 }
00356
00357 for(; k+3<n; k+=4) {
00358 a1 = a+(fira[k+0 ]+i);
00359 a2 = a+(fira[k+1 ]+i);
00360 a3 = a+(fira[k+2 ]+i);
00361 a4 = a+(fira[k+3 ]+i);
00362
00363 rtemp1 = *a1/diaga[k+0];
00364 rtemp2 = *a2/diaga[k+1];
00365 rtemp3 = *a3/diaga[k+2];
00366 rtemp4 = *a4/diaga[k+3];
00367
00368 diagb[i] -= rtemp1 * (*a1)
00369 + rtemp2 * (*a2)
00370 + rtemp3 * (*a3)
00371 + rtemp4 * (*a4);
00372
00373 ++ a1;
00374 ++ a2;
00375 ++ a3;
00376 ++ a4;
00377
00378 for(t=0; t<sze; ++t)
00379 b0[t] -= rtemp1 * a1[t]
00380 + rtemp2 * a2[t]
00381 + rtemp3 * a3[t]
00382 + rtemp4 * a4[t];
00383
00384 }
00385
00386 for(; k+1<n; k+=2) {
00387 a1 = a+(fira[k+0 ]+i);
00388 a2 = a+(fira[k+1 ]+i);
00389
00390 rtemp1 = *a1/diaga[k+0];
00391 rtemp2 = *a2/diaga[k+1];
00392
00393 diagb[i] -= rtemp1 * (*a1)
00394 + rtemp2 * (*a2);
00395
00396 ++ a1;
00397 ++ a2;
00398
00399 for(t=0; t<sze; ++t)
00400 b0[t] -= rtemp1 * a1[t]
00401 + rtemp2 * a2[t];
00402 }
00403
00404 for(; k<n; ++k) {
00405 a1 = a+(fira[k+0 ]+i);
00406
00407 rtemp1 = *a1/diaga[k+0];
00408
00409 diagb[i] -= rtemp1 * (*a1);
00410
00411 ++ a1;
00412
00413 for(t=0; t<sze; ++t)
00414 b0[t] -= rtemp1 * a1[t];
00415 }
00416 }
00417 }
00418
00419 static void iUpdSnode(chfac *cf,
00420 int snde,
00421 int f,
00422 int l,
00423 int uf,
00424 int ul,
00425 int iw[])
00426 {
00427 int k,
00428 *ujsze=cf->ujsze,*uhead=cf->uhead,*subg=cf->subg;
00429 double *diag=cf->diag,*uval=cf->uval;
00430
00431 if (f==l || uf==ul)
00432 return;
00433
00434 f += subg[snde];
00435 l += subg[snde];
00436 uf += subg[snde];
00437 ul += subg[snde];
00438
00439 for(k=f; k<l; ++k)
00440 iw[k-f] = uhead[k]+uf-k-1;
00441
00442 UpdSnode(1+ujsze[uf],l-f,ul-uf,diag+f,uval,iw,diag+uf,uval,uhead+uf);
00443 }
00444
00445 static int DiagUpdate(double *dii,
00446 int mode)
00447 {
00448 int id=TRUE;
00449
00450 if (mode) {
00451 if (*dii<1.0e-13)
00452 return FALSE;
00453 }
00454 else {
00455 if (fabs(*dii)<1.0e-35) {
00456 printf(" diagonal nearly zero: %5.1e.\n",(*dii));
00457 return FALSE;
00458 }
00459 }
00460 return id;
00461 }
00462
00463 static int FacSnode(chfac *cf,
00464 int snde,
00465 int f,
00466 int l,
00467 int iw[],
00468 int mode)
00469 {
00470 int itemp,k;
00471
00472 if (f==l)
00473 return (CfcOk);
00474
00475 itemp = cf->subg[snde]+f;
00476
00477 if (!DiagUpdate(cf->diag+itemp,
00478 mode))
00479 return (CfcIndef);
00480
00481 if (fabs(cf->diag[itemp])<=cf->tolpiv) {
00482 printf("Singular d[%d]=%e\n",
00483 cf->subg[snde]+f,cf->diag[cf->subg[snde]+f]);
00484 return (CfcIndef);
00485 }
00486
00487 for(k=f+1; k<l; ++k) {
00488 iUpdSnode(cf,snde,f,k,k,k+1,iw);
00489
00490 itemp = cf->subg[snde]+k;
00491
00492 if (!DiagUpdate(&cf->diag[itemp],
00493 mode))
00494 return (CfcIndef);
00495
00496 if (fabs(cf->diag[itemp])<=cf->tolpiv) {
00497 printf(" singular d[%d]=%e\n",
00498 cf->subg[snde]+k,cf->diag[cf->subg[snde]+k]);
00499
00500 return (CfcIndef);
00501 }
00502 }
00503
00504 return CfcOk;
00505 }
00506
00507 static void UpdSnodes(int m,
00508 int n,
00509 int s,
00510 double diaga[],
00511 double *a,
00512 int fira[],
00513 double diagb[],
00514 double *b,
00515 int firb[],
00516 int subb[])
00517 {
00518 int i,j,k,t,u,sze,delay,
00519 *ls;
00520 double rtemp1,rtemp2,rtemp3,rtemp4,
00521 rtemp5,rtemp6,rtemp7,rtemp8,
00522 rtemp9,rtemp10,rtemp11,rtemp12,
00523 rtemp13,rtemp14,rtemp15,rtemp16,
00524 *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8,
00525 *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16,
00526 *b0;
00527
00528 if (m<s)
00529 exit(0);
00530
00531 if (m==0 || n==0)
00532 return;
00533
00534 for(i=0; i<s; ++i) {
00535 j = subb[i];
00536 u = j-subb[0];
00537
00538 b0 = b+firb[u];
00539
00540 delay = j+1;
00541 sze = m-i-1;
00542 ls = subb+i+1;
00543
00544 k = 0;
00545
00546 for(; k+15<n; k+=16) {
00547 a1 = a+(fira[k+0 ]+i);
00548 a2 = a+(fira[k+1 ]+i);
00549 a3 = a+(fira[k+2 ]+i);
00550 a4 = a+(fira[k+3 ]+i);
00551 a5 = a+(fira[k+4 ]+i);
00552 a6 = a+(fira[k+5 ]+i);
00553 a7 = a+(fira[k+6 ]+i);
00554 a8 = a+(fira[k+7 ]+i);
00555 a9 = a+(fira[k+8 ]+i);
00556 a10 = a+(fira[k+9 ]+i);
00557 a11 = a+(fira[k+10]+i);
00558 a12 = a+(fira[k+11]+i);
00559 a13 = a+(fira[k+12]+i);
00560 a14 = a+(fira[k+13]+i);
00561 a15 = a+(fira[k+14]+i);
00562 a16 = a+(fira[k+15]+i);
00563
00564 rtemp1 = *a1/diaga[k+0];
00565 rtemp2 = *a2/diaga[k+1];
00566 rtemp3 = *a3/diaga[k+2];
00567 rtemp4 = *a4/diaga[k+3];
00568 rtemp5 = *a5/diaga[k+4];
00569 rtemp6 = *a6/diaga[k+5];
00570 rtemp7 = *a7/diaga[k+6];
00571 rtemp8 = *a8/diaga[k+7];
00572 rtemp9 = *a9/diaga[k+8];
00573 rtemp10 = *a10/diaga[k+9];
00574 rtemp11 = *a11/diaga[k+10];
00575 rtemp12 = *a12/diaga[k+11];
00576 rtemp13 = *a13/diaga[k+12];
00577 rtemp14 = *a14/diaga[k+13];
00578 rtemp15 = *a15/diaga[k+14];
00579 rtemp16 = *a16/diaga[k+15];
00580
00581 diagb[u] -= rtemp1 * (*a1)
00582 + rtemp2 * (*a2)
00583 + rtemp3 * (*a3)
00584 + rtemp4 * (*a4)
00585 + rtemp5 * (*a5)
00586 + rtemp6 * (*a6)
00587 + rtemp7 * (*a7)
00588 + rtemp8 * (*a8)
00589 + rtemp9 * (*a9)
00590 + rtemp10 * (*a10)
00591 + rtemp11 * (*a11)
00592 + rtemp12 * (*a12)
00593 + rtemp13 * (*a13)
00594 + rtemp14 * (*a14)
00595 + rtemp15 * (*a15)
00596 + rtemp16 * (*a16);
00597
00598
00599 ++ a1;
00600 ++ a2;
00601 ++ a3;
00602 ++ a4;
00603 ++ a5;
00604 ++ a6;
00605 ++ a7;
00606 ++ a8;
00607 ++ a9;
00608 ++ a10;
00609 ++ a11;
00610 ++ a12;
00611 ++ a13;
00612 ++ a14;
00613 ++ a15;
00614 ++ a16;
00615
00616 for(t=0; t<sze; ++t)
00617 b0[ls[t]-delay] -= rtemp1 * a1[t]
00618 + rtemp2 * a2[t]
00619 + rtemp3 * a3[t]
00620 + rtemp4 * a4[t]
00621 + rtemp5 * a5[t]
00622 + rtemp6 * a6[t]
00623 + rtemp7 * a7[t]
00624 + rtemp8 * a8[t]
00625 + rtemp9 * a9[t]
00626 + rtemp10 * a10[t]
00627 + rtemp11 * a11[t]
00628 + rtemp12 * a12[t]
00629 + rtemp13 * a13[t]
00630 + rtemp14 * a14[t]
00631 + rtemp15 * a15[t]
00632 + rtemp16 * a16[t];
00633 }
00634
00635 for(; k+11<n; k+=12) {
00636 a1 = a+(fira[k+0 ]+i);
00637 a2 = a+(fira[k+1 ]+i);
00638 a3 = a+(fira[k+2 ]+i);
00639 a4 = a+(fira[k+3 ]+i);
00640 a5 = a+(fira[k+4 ]+i);
00641 a6 = a+(fira[k+5 ]+i);
00642 a7 = a+(fira[k+6 ]+i);
00643 a8 = a+(fira[k+7 ]+i);
00644 a9 = a+(fira[k+8 ]+i);
00645 a10 = a+(fira[k+9 ]+i);
00646 a11 = a+(fira[k+10]+i);
00647 a12 = a+(fira[k+11]+i);
00648
00649 rtemp1 = *a1/diaga[k+0];
00650 rtemp2 = *a2/diaga[k+1];
00651 rtemp3 = *a3/diaga[k+2];
00652 rtemp4 = *a4/diaga[k+3];
00653 rtemp5 = *a5/diaga[k+4];
00654 rtemp6 = *a6/diaga[k+5];
00655 rtemp7 = *a7/diaga[k+6];
00656 rtemp8 = *a8/diaga[k+7];
00657 rtemp9 = *a9/diaga[k+8];
00658 rtemp10 = *a10/diaga[k+9];
00659 rtemp11 = *a11/diaga[k+10];
00660 rtemp12 = *a12/diaga[k+11];
00661
00662 diagb[u] -= rtemp1 * (*a1)
00663 + rtemp2 * (*a2)
00664 + rtemp3 * (*a3)
00665 + rtemp4 * (*a4)
00666 + rtemp5 * (*a5)
00667 + rtemp6 * (*a6)
00668 + rtemp7 * (*a7)
00669 + rtemp8 * (*a8)
00670 + rtemp9 * (*a9)
00671 + rtemp10 * (*a10)
00672 + rtemp11 * (*a11)
00673 + rtemp12 * (*a12);
00674
00675 ++ a1;
00676 ++ a2;
00677 ++ a3;
00678 ++ a4;
00679 ++ a5;
00680 ++ a6;
00681 ++ a7;
00682 ++ a8;
00683 ++ a9;
00684 ++ a10;
00685 ++ a11;
00686 ++ a12;
00687
00688 for(t=0; t<sze; ++t)
00689 b0[ls[t]-delay] -= rtemp1 * a1[t]
00690 + rtemp2 * a2[t]
00691 + rtemp3 * a3[t]
00692 + rtemp4 * a4[t]
00693 + rtemp5 * a5[t]
00694 + rtemp6 * a6[t]
00695 + rtemp7 * a7[t]
00696 + rtemp8 * a8[t]
00697 + rtemp9 * a9[t]
00698 + rtemp10 * a10[t]
00699 + rtemp11 * a11[t]
00700 + rtemp12 * a12[t];
00701 }
00702
00703 for(; k+7<n; k+=8) {
00704 a1 = a+(fira[k+0 ]+i);
00705 a2 = a+(fira[k+1 ]+i);
00706 a3 = a+(fira[k+2 ]+i);
00707 a4 = a+(fira[k+3 ]+i);
00708 a5 = a+(fira[k+4 ]+i);
00709 a6 = a+(fira[k+5 ]+i);
00710 a7 = a+(fira[k+6 ]+i);
00711 a8 = a+(fira[k+7 ]+i);
00712
00713 rtemp1 = *a1/diaga[k+0];
00714 rtemp2 = *a2/diaga[k+1];
00715 rtemp3 = *a3/diaga[k+2];
00716 rtemp4 = *a4/diaga[k+3];
00717 rtemp5 = *a5/diaga[k+4];
00718 rtemp6 = *a6/diaga[k+5];
00719 rtemp7 = *a7/diaga[k+6];
00720 rtemp8 = *a8/diaga[k+7];
00721
00722 diagb[u] -= rtemp1 * (*a1)
00723 + rtemp2 * (*a2)
00724 + rtemp3 * (*a3)
00725 + rtemp4 * (*a4)
00726 + rtemp5 * (*a5)
00727 + rtemp6 * (*a6)
00728 + rtemp7 * (*a7)
00729 + rtemp8 * (*a8);
00730
00731 ++ a1;
00732 ++ a2;
00733 ++ a3;
00734 ++ a4;
00735 ++ a5;
00736 ++ a6;
00737 ++ a7;
00738 ++ a8;
00739
00740 for(t=0; t<sze; ++t)
00741 b0[ls[t]-delay] -= rtemp1 * a1[t]
00742 + rtemp2 * a2[t]
00743 + rtemp3 * a3[t]
00744 + rtemp4 * a4[t]
00745 + rtemp5 * a5[t]
00746 + rtemp6 * a6[t]
00747 + rtemp7 * a7[t]
00748 + rtemp8 * a8[t];
00749
00750 }
00751
00752 for(; k+3<n; k+=4) {
00753 a1 = a+(fira[k+0 ]+i);
00754 a2 = a+(fira[k+1 ]+i);
00755 a3 = a+(fira[k+2 ]+i);
00756 a4 = a+(fira[k+3 ]+i);
00757
00758 rtemp1 = *a1/diaga[k+0];
00759 rtemp2 = *a2/diaga[k+1];
00760 rtemp3 = *a3/diaga[k+2];
00761 rtemp4 = *a4/diaga[k+3];
00762
00763 diagb[u]-= rtemp1 * (*a1)
00764 +rtemp2 * (*a2)
00765 +rtemp3 * (*a3)
00766 +rtemp4 * (*a4);
00767
00768 ++ a1;
00769 ++ a2;
00770 ++ a3;
00771 ++ a4;
00772
00773 for(t=0; t<sze; ++t)
00774 b0[ls[t]-delay] -= rtemp1 * a1[t]
00775 + rtemp2 * a2[t]
00776 + rtemp3 * a3[t]
00777 + rtemp4 * a4[t];
00778
00779 }
00780
00781 for(; k+1<n; k+=2) {
00782 a1 = a+(fira[k+0 ]+i);
00783 a2 = a+(fira[k+1 ]+i);
00784
00785 rtemp1 = *a1/diaga[k+0];
00786 rtemp2 = *a2/diaga[k+1];
00787
00788 diagb[u] -= rtemp1 * (*a1)
00789 + rtemp2 * (*a2);
00790
00791 ++ a1;
00792 ++ a2;
00793
00794 for(t=0; t<sze; ++t)
00795 b0[ls[t]-delay] -= rtemp1 * a1[t]
00796 + rtemp2 * a2[t];
00797 }
00798
00799 for(; k<n; ++k) {
00800 a1 = a+(fira[k+0 ]+i);
00801
00802 rtemp1 = *a1/diaga[k+0];
00803
00804 diagb[u] -= rtemp1 * (*a1);
00805
00806 ++ a1;
00807
00808 for(t=0; t<sze; ++t)
00809 b0[ls[t]-delay] -= rtemp1 * a1[t];
00810 }
00811 }
00812 }
00813
00814 static void ExtUpdSnode(chfac *cf,
00815 int snde,
00816 int usnde,
00817 int f,
00818 int l,
00819 int start,
00820 int iw[])
00821 {
00822 int k,sze,
00823 *ls,
00824 *subg=cf->subg,
00825 *ujsze=cf->ujsze,*usub=cf->usub,*ujbeg=cf->ujbeg,*uhead=cf->uhead;
00826 double *diag=cf->diag,*uval=cf->uval;
00827
00828 f += subg[snde];
00829 l += subg[snde];
00830
00831 if (usnde==cf->nsnds-1) {
00832 if (usub[ujbeg[f]+start]<subg[usnde]) {
00833 printf("\n Index error");
00834 exit(0);
00835 }
00836
00837 if (cf->sdens)
00838 exit(0);
00839
00840 ls = usub+ujbeg[f]+start;
00841 sze = ujsze[f]-start;
00842
00843 for(k=f; k<l; ++k)
00844 iw[k-f] =uhead[k]+start-(k-f);
00845
00846 UpdSnodes(sze,l-f,sze,diag+f,uval,iw,diag+ls[0],uval,uhead+ls[0],ls);
00847 }
00848 else
00849 exit(0);
00850 }
00851
00852 static void PushFward(chfac *cf,
00853 int snde,
00854 int f,
00855 int l,
00856 int iw[])
00857 {
00858 int j,s,t,u,k,stops,offset,sze,itemp,
00859 *ls0,*ls1,
00860 *map=iw,*subg=cf->subg,
00861 *ujsze=cf->ujsze,*uhead=cf->uhead,*ujbeg=cf->ujbeg,*usub=cf->usub;
00862 double rtemp1,*l0,*l1,
00863 *diag=cf->diag,*uval=cf->uval;
00864
00865 if (f>subg[snde+1]-subg[snde]) {
00866 printf("\n PushFward");
00867 exit(0);
00868 }
00869
00870 if (f==l)
00871 return;
00872
00873 f += subg[snde];
00874 l += subg[snde];
00875
00876 offset = subg[snde+1]-f-1;
00877 sze = ujsze[f] - offset;
00878 ls1 = usub+ujbeg[f]+offset;
00879
00880 if (f+1==l) {
00881 l1 = uval+uhead[f]+offset;
00882
00883 stops = sze;
00884 for(t=0; t<sze; ++t) {
00885 j = ls1[0];
00886
00887 if (j>=subg[cf->nsnds-1])
00888 break;
00889
00890 rtemp1 = l1[0]/diag[f];
00891 diag[j] -= rtemp1*l1[0];
00892 ++ l1;
00893
00894 l0 = uval+uhead[j];
00895 ls0 = usub+ujbeg[j];
00896
00897 ++ ls1;
00898 -- stops;
00899
00900 if (stops && ls1[stops-1]==ls0[stops-1]) {
00901 for(s=0; s<stops; ++s)
00902 l0[s] -= rtemp1 * l1[s];
00903 }
00904
00905 else {
00906 for(s=0, u=0; s<stops; ++u) {
00907 if (ls0[u]==ls1[s]) {
00908 l0[u] -= rtemp1 * l1[s];
00909 ++ s;
00910 }
00911 }
00912 }
00913 }
00914
00915 if (t<sze && !cf->sdens)
00916 ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde],l-subg[snde],t,iw);
00917 }
00918
00919 else {
00920 stops = sze;
00921 for(t=0; t<sze; ++t, ++offset) {
00922 j = ls1[0];
00923
00924 if (j>=subg[cf->nsnds-1]) {
00925 if (!cf->sdens)
00926 ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde],
00927 l-subg[snde],offset,iw);
00928 break;
00929 }
00930
00931 ls0 = usub+ujbeg[j];
00932 l0 = uval+uhead[j];
00933
00934 ++ ls1;
00935 -- stops;
00936
00937 k = f;
00938 itemp = offset+f;
00939
00940 if (stops && ls1[stops-1]==ls0[stops-1]) {
00941 for(k=f; k<l; ++k)
00942 map[k-f] = uhead[k]+itemp-k;
00943
00944 UpdSnode(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j);
00945 }
00946
00947 else {
00948 map[l] = 0;
00949 for(s=0, u=0; s<stops; ++u) {
00950 if (ls1[s]==ls0[u]) {
00951 map[1+l+s] = 1+u;
00952 ++ s;
00953 }
00954 }
00955
00956 for(k=f; k<l; ++k)
00957 map[k-f] = uhead[k]+itemp-k;
00958
00959 UpdSnodes(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j,map+l);
00960 }
00961 }
00962 }
00963 }
00964
00965 static int FacDenNode(chfac *cf,
00966 int iw[],
00967 double rw[],
00968 int mode)
00969 {
00970 int c,d,j,s,t,sncl,k,k0,m,cacsze,sze,offset,
00971 *subg=cf->subg,*ujsze=cf->ujsze,
00972 *ujbeg=cf->ujbeg,*uhead=cf->uhead,
00973 *usub=cf->usub,*dhead=cf->dhead,
00974 *dsub=cf->dsub,*dbeg=cf->dbeg,*ls;
00975 double *diag=cf->diag,*uval=cf->uval;
00976 int sresp;
00977
00978 cacsze = cf->cachesize*cf->cacheunit;
00979
00980 if (cf->sdens) {
00981 for(d=0; d<cf->ndens; ++d) {
00982 c = 0;
00983 for(k=dhead[d]; k<dhead[d+1]; ++k) {
00984 offset = dbeg[k];
00985 s = dsub[k];
00986 if (usub[ujbeg[subg[s]]+offset]<subg[cf->nsnds-1]) {
00987 printf("\nindex error1");
00988 exit(0);
00989 }
00990
00991 for(j=subg[s]; j<subg[s+1]; ++c, ++j) {
00992 rw[c] = diag[j];
00993 iw[c] = uhead[j]+offset-(j-subg[s]);
00994
00995 if (usub[ujbeg[j]+offset-(j-subg[s])]<subg[cf->nsnds-1]) {
00996 printf("\nindex error");
00997 exit(0);
00998 }
00999 }
01000 }
01001
01002 if (c) {
01003 k = dhead[d];
01004 s = dsub[k];
01005 m = ujsze[subg[s]]-dbeg[k];
01006 ls = usub+ujbeg[subg[s]]+dbeg[k];
01007 if (m) {
01008 for(k=0; k<c;) {
01009 t = cacsze/(m*sizeof(double));
01010 t = max(t,1);
01011 t = min(c-k,t);
01012
01013 UpdSnodes(m,t,m,rw+k,uval,iw+k,
01014 diag+ls[0],uval,uhead+ls[0],ls);
01015 k+=t;
01016 }
01017 }
01018 }
01019 }
01020 }
01021
01022 s = cf->nsnds-1;
01023
01024 sncl = cf->subg[s+1]-cf->subg[s];
01025 for(k=0; k<sncl;) {
01026 k0 = k;
01027 for(sze=0; sze<cacsze && k<sncl; ++k)
01028 sze += ujsze[subg[s]+k] * sizeof(double);
01029
01030 if (k==k0)
01031 ++k;
01032 else if (k>=k0+2 && sze>cacsze)
01033 --k;
01034
01035 if (k>sncl)
01036 exit(0);
01037
01038 sresp = FacSnode(cf,s,k0,k,iw,mode);
01039 if (sresp!=CfcOk)
01040 return (sresp);
01041
01042 iUpdSnode(cf,s,k0,k,k,sncl,iw);
01043
01044 }
01045 return (CfcOk);
01046 }
01047
01048 int ChlFact(chfac *cf,
01049 int *iw,
01050 double *rw,
01051 int mode)
01052 {
01053 int s,sncl,k,k0,cacsze,sze,
01054 *subg=cf->subg,*ujsze=cf->ujsze;
01055 int cid;
01056
01057 cacsze=cf->cachesize*cf->cacheunit;
01058
01059 for(s=0; s+1<cf->nsnds; ++s) {
01060 sncl = cf->subg[s+1]-cf->subg[s];
01061
01062 for(k=0; k<sncl;) {
01063 k0 = k;
01064 for(sze=0; sze<=cacsze && k<sncl; ++k)
01065 sze += ujsze[subg[s]+k]*sizeof(double);
01066
01067 if (k==k0)
01068 ++k;
01069 else if (k>=k0+2 && sze>cacsze)
01070 --k;
01071
01072 if (k>sncl)
01073 exit(0);
01074
01075 cid=FacSnode(cf,s,k0,k,iw,mode);
01076 if (cid!=CfcOk)
01077 return (cid);
01078
01079 iUpdSnode(cf,s,k0,k,k,sncl,iw);
01080
01081 PushFward(cf,s,k0,k,iw);
01082 }
01083 }
01084
01085 cid=FacDenNode(cf,iw,rw,mode);
01086
01087 for (k=0;k<cf->nrow;k++){
01088 cf->sqrtdiag[k]=sqrt(fabs(cf->diag[k]));
01089 }
01090
01091 return cid;
01092 }