sdpnfac.c

00001 #include "numchol.h"
00002 
00003 static void UpdSnode(int    m,
00004                      int    n,
00005                      int    s,
00006                      double diaga[],
00007                      double *a,
00008                      int    fira[],
00009                      double diagb[],
00010                      double *b,
00011                      int    firb[])
00012 {
00013   int    i,k,t,sze;
00014   double rtemp1, rtemp2, rtemp3, rtemp4,
00015          rtemp5, rtemp6, rtemp7, rtemp8,
00016          rtemp9, rtemp10,rtemp11,rtemp12,
00017          rtemp13,rtemp14,rtemp15,rtemp16,
00018          *a1,*a2, *a3, *a4, *a5, *a6, *a7, *a8,
00019          *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16,
00020          *b00,*b1,*b0;
00021     
00022   for(i=0; i+1<s; i+=2) {
00023     b00 = b+firb[i];
00024     b0  = b00+1;
00025     b1  = b+firb[i+1];
00026     sze = m-i-2;
00027     k   = 0;
00028 
00029     for(; k+3<n; k+=4) {
00030       a1          = a+(fira[k+0 ]+i);
00031       a2          = a+(fira[k+1 ]+i);
00032       a3          = a+(fira[k+2 ]+i);
00033       a4          = a+(fira[k+3 ]+i);
00034 
00035       rtemp1      = a1[0]/diaga[k+0];
00036       rtemp2      = a2[0]/diaga[k+1];
00037       rtemp3      = a3[0]/diaga[k+2];
00038       rtemp4      = a4[0]/diaga[k+3];
00039 
00040       diagb[i]   -=   rtemp1  * a1[0]
00041                     + rtemp2  * a2[0]
00042                     + rtemp3  * a3[0]
00043                     + rtemp4  * a4[0];
00044 
00045                  ++ a1;
00046                  ++ a2;
00047                  ++ a3;
00048                  ++ a4;
00049 
00050       rtemp5      = a1[0]/diaga[k+0];
00051       rtemp6      = a2[0]/diaga[k+1];
00052       rtemp7      = a3[0]/diaga[k+2];
00053       rtemp8      = a4[0]/diaga[k+3];
00054 
00055       b00[0]     -=   rtemp1 * a1[0]
00056                     + rtemp2 * a2[0]
00057                     + rtemp3 * a3[0]
00058                     + rtemp4 * a4[0];
00059 
00060       diagb[i+1] -=   rtemp5  * a1[0]
00061                     + rtemp6  * a2[0]
00062                     + rtemp7  * a3[0]
00063                     + rtemp8  * a4[0];
00064 
00065                ++ a1;
00066                ++ a2;
00067                ++ a3;
00068                ++ a4;
00069 
00070       for(t=0; t<sze; ++t) {
00071         b0[t] -=   rtemp1  * a1[t]
00072                  + rtemp2  * a2[t]
00073                  + rtemp3  * a3[t]
00074                  + rtemp4  * a4[t];
00075 
00076         b1[t] -=   rtemp5  * a1[t]
00077                  + rtemp6  * a2[t]
00078                  + rtemp7  * a3[t]
00079                  + rtemp8  * a4[t];
00080       }
00081     }
00082 
00083     for(; k+1<n; k+=2) {
00084       a1          = a+(fira[k+0 ]+i);
00085       a2          = a+(fira[k+1 ]+i);
00086 
00087       rtemp1      =    a1[0] / diaga[k+0];
00088       rtemp2      =    a2[0] / diaga[k+1];
00089 
00090       diagb[i]   -=   a1[0] * rtemp1
00091                     + a2[0] * rtemp2;
00092 
00093                  ++ a1;
00094                  ++ a2;
00095 
00096       rtemp5      =   a1[0] / diaga[k+0];
00097       rtemp6      =   a2[0] / diaga[k+1];
00098 
00099       b00[0]     -=   a1[0] * rtemp1
00100                     + a2[0] * rtemp2;
00101 
00102       diagb[i+1] -=   a1[0] * rtemp5
00103                     + a2[0] * rtemp6;
00104 
00105 
00106                ++ a1;
00107                ++ a2;
00108 
00109       for(t=0; t<sze; ++t) {
00110         b0[t] -=   a1[t] * rtemp1
00111                  + a2[t] * rtemp2;
00112 
00113         b1[t] -=   a1[t] * rtemp5
00114                  + a2[t] * rtemp6;
00115       }
00116     }
00117 
00118     for(; k<n; ++k) {
00119       a1          = a+(fira[k+0 ]+i);
00120 
00121       rtemp1      = a1[0] / diaga[k+0];
00122 
00123       diagb[i]   -= a1[0] * rtemp1;
00124 
00125                  ++ a1;
00126 
00127       rtemp5      = a1[0] / diaga[k+0];
00128 
00129       diagb[i+1] -= a1[0] * rtemp5;
00130 
00131       b00[0]     -= a1[0] * rtemp1;
00132 
00133                  ++ a1;
00134 
00135       for(t=0; t<sze; ++t)
00136       {
00137         b0[t] -= rtemp1 * a1[t];
00138         b1[t] -= rtemp5 * a1[t];
00139       }
00140     }
00141   }
00142 
00143   for(; i<s; ++i) {
00144 
00145     b0  = b+firb[i];
00146     sze = m-i-1;
00147     k   = 0;
00148 
00149     for(; k+15<n; k+=16) {
00150       a1       = a+(fira[k+0 ]+i);
00151       a2       = a+(fira[k+1 ]+i);
00152       a3       = a+(fira[k+2 ]+i);
00153       a4       = a+(fira[k+3 ]+i);
00154       a5       = a+(fira[k+4 ]+i);
00155       a6       = a+(fira[k+5 ]+i);
00156       a7       = a+(fira[k+6 ]+i);
00157       a8       = a+(fira[k+7 ]+i);
00158       a9       = a+(fira[k+8 ]+i);
00159       a10      = a+(fira[k+9 ]+i);
00160       a11      = a+(fira[k+10]+i);
00161       a12      = a+(fira[k+11]+i);
00162       a13      = a+(fira[k+12]+i);
00163       a14      = a+(fira[k+13]+i);
00164       a15      = a+(fira[k+14]+i);
00165       a16      = a+(fira[k+15]+i);
00166 
00167       rtemp1    = *a1/diaga[k+0];
00168       rtemp2    = *a2/diaga[k+1];
00169       rtemp3    = *a3/diaga[k+2];
00170       rtemp4    = *a4/diaga[k+3];
00171       rtemp5    = *a5/diaga[k+4];
00172       rtemp6    = *a6/diaga[k+5];
00173       rtemp7    = *a7/diaga[k+6];
00174       rtemp8    = *a8/diaga[k+7];
00175       rtemp9    = *a9/diaga[k+8];
00176       rtemp10   = *a10/diaga[k+9];
00177       rtemp11   = *a11/diaga[k+10];
00178       rtemp12   = *a12/diaga[k+11];
00179       rtemp13   = *a13/diaga[k+12];
00180       rtemp14   = *a14/diaga[k+13];
00181       rtemp15   = *a15/diaga[k+14];
00182       rtemp16   = *a16/diaga[k+15];
00183 
00184       diagb[i] -=   rtemp1  * (*a1)
00185                   + rtemp2  * (*a2)
00186                   + rtemp3  * (*a3)
00187                   + rtemp4  * (*a4)
00188                   + rtemp5  * (*a5)
00189                   + rtemp6  * (*a6)
00190                   + rtemp7  * (*a7)
00191                   + rtemp8  * (*a8)
00192                   + rtemp9  * (*a9)
00193                   + rtemp10 * (*a10)
00194                   + rtemp11 * (*a11)
00195                   + rtemp12 * (*a12)
00196                   + rtemp13 * (*a13)
00197                   + rtemp14 * (*a14)
00198                   + rtemp15 * (*a15)
00199                   + rtemp16 * (*a16);
00200 
00201 
00202                ++ a1;
00203                ++ a2;
00204                ++ a3;
00205                ++ a4;
00206                ++ a5;
00207                ++ a6;
00208                ++ a7;
00209                ++ a8;
00210                ++ a9;
00211                ++ a10;
00212                ++ a11;
00213                ++ a12;
00214                ++ a13;
00215                ++ a14;
00216                ++ a15;
00217                ++ a16;
00218 
00219       for(t=0; t<sze; ++t)
00220         b0[t] -=   rtemp1  * a1[t]
00221                  + rtemp2  * a2[t]
00222                  + rtemp3  * a3[t]
00223                  + rtemp4  * a4[t]
00224                  + rtemp5  * a5[t]
00225                  + rtemp6  * a6[t]
00226                  + rtemp7  * a7[t]
00227                  + rtemp8  * a8[t]
00228                  + rtemp9  * a9[t]
00229                  + rtemp10 * a10[t]
00230                  + rtemp11 * a11[t]
00231                  + rtemp12 * a12[t]
00232                  + rtemp13 * a13[t]
00233                  + rtemp14 * a14[t]
00234                  + rtemp15 * a15[t]
00235                  + rtemp16 * a16[t];
00236 
00237     }
00238 
00239     for(; k+11<n; k+=12) {
00240       a1       = a+(fira[k+0 ]+i);
00241       a2       = a+(fira[k+1 ]+i);
00242       a3       = a+(fira[k+2 ]+i);
00243       a4       = a+(fira[k+3 ]+i);
00244       a5       = a+(fira[k+4 ]+i);
00245       a6       = a+(fira[k+5 ]+i);
00246       a7       = a+(fira[k+6 ]+i);
00247       a8       = a+(fira[k+7 ]+i);
00248       a9       = a+(fira[k+8 ]+i);
00249       a10      = a+(fira[k+9 ]+i);
00250       a11      = a+(fira[k+10]+i);
00251       a12      = a+(fira[k+11]+i);
00252 
00253       rtemp1    = *a1/diaga[k+0];
00254       rtemp2    = *a2/diaga[k+1];
00255       rtemp3    = *a3/diaga[k+2];
00256       rtemp4    = *a4/diaga[k+3];
00257       rtemp5    = *a5/diaga[k+4];
00258       rtemp6    = *a6/diaga[k+5];
00259       rtemp7    = *a7/diaga[k+6];
00260       rtemp8    = *a8/diaga[k+7];
00261       rtemp9    = *a9/diaga[k+8];
00262       rtemp10   = *a10/diaga[k+9];
00263       rtemp11   = *a11/diaga[k+10];
00264       rtemp12   = *a12/diaga[k+11];
00265 
00266       diagb[i] -=   rtemp1  * (*a1)
00267                   + rtemp2  * (*a2)
00268                   + rtemp3  * (*a3)
00269                   + rtemp4  * (*a4)
00270                   + rtemp5  * (*a5)
00271                   + rtemp6  * (*a6)
00272                   + rtemp7  * (*a7)
00273                   + rtemp8  * (*a8)
00274                   + rtemp9  * (*a9)
00275                   + rtemp10 * (*a10)
00276                   + rtemp11 * (*a11)
00277                   + rtemp12 * (*a12);
00278 
00279                ++ a1;
00280                ++ a2;
00281                ++ a3;
00282                ++ a4;
00283                ++ a5;
00284                ++ a6;
00285                ++ a7;
00286                ++ a8;
00287                ++ a9;
00288                ++ a10;
00289                ++ a11;
00290                ++ a12;
00291 
00292       for(t=0; t<sze; ++t)
00293         b0[t] -=   rtemp1  * a1[t]
00294                  + rtemp2  * a2[t]
00295                  + rtemp3  * a3[t]
00296                  + rtemp4  * a4[t]
00297                  + rtemp5  * a5[t]
00298                  + rtemp6  * a6[t]
00299                  + rtemp7  * a7[t]
00300                  + rtemp8  * a8[t]
00301                  + rtemp9  * a9[t]
00302                  + rtemp10 * a10[t]
00303                  + rtemp11 * a11[t]
00304                  + rtemp12 * a12[t];
00305 
00306     }
00307     
00308     for(; k+7<n; k+=8) {
00309       a1       = a+(fira[k+0 ]+i);
00310       a2       = a+(fira[k+1 ]+i);
00311       a3       = a+(fira[k+2 ]+i);
00312       a4       = a+(fira[k+3 ]+i);
00313       a5       = a+(fira[k+4 ]+i);
00314       a6       = a+(fira[k+5 ]+i);
00315       a7       = a+(fira[k+6 ]+i);
00316       a8       = a+(fira[k+7 ]+i);
00317 
00318       rtemp1    = *a1/diaga[k+0];
00319       rtemp2    = *a2/diaga[k+1];
00320       rtemp3    = *a3/diaga[k+2];
00321       rtemp4    = *a4/diaga[k+3];
00322       rtemp5    = *a5/diaga[k+4];
00323       rtemp6    = *a6/diaga[k+5];
00324       rtemp7    = *a7/diaga[k+6];
00325       rtemp8    = *a8/diaga[k+7];
00326 
00327       diagb[i] -=   rtemp1  * (*a1)
00328                   + rtemp2  * (*a2)
00329                   + rtemp3  * (*a3)
00330                   + rtemp4  * (*a4)
00331                   + rtemp5  * (*a5)
00332                   + rtemp6  * (*a6)
00333                   + rtemp7  * (*a7)
00334                   + rtemp8  * (*a8);
00335 
00336                ++ a1;
00337                ++ a2;
00338                ++ a3;
00339                ++ a4;
00340                ++ a5;
00341                ++ a6;
00342                ++ a7;
00343                ++ a8;
00344 
00345       for(t=0; t<sze; ++t)
00346         b0[t] -=   rtemp1  * a1[t]
00347                  + rtemp2  * a2[t]
00348                  + rtemp3  * a3[t]
00349                  + rtemp4  * a4[t]
00350                  + rtemp5  * a5[t]
00351                  + rtemp6  * a6[t]
00352                  + rtemp7  * a7[t]
00353                  + rtemp8  * a8[t];
00354 
00355     }
00356 
00357     for(; k+3<n; k+=4) {
00358       a1       = a+(fira[k+0 ]+i);
00359       a2       = a+(fira[k+1 ]+i);
00360       a3       = a+(fira[k+2 ]+i);
00361       a4       = a+(fira[k+3 ]+i);
00362 
00363       rtemp1    = *a1/diaga[k+0];
00364       rtemp2    = *a2/diaga[k+1];
00365       rtemp3    = *a3/diaga[k+2];
00366       rtemp4    = *a4/diaga[k+3];
00367 
00368       diagb[i] -=   rtemp1  * (*a1)
00369                   + rtemp2  * (*a2)
00370                   + rtemp3  * (*a3)
00371                   + rtemp4  * (*a4);
00372 
00373                ++ a1;
00374                ++ a2;
00375                ++ a3;
00376                ++ a4;
00377 
00378       for(t=0; t<sze; ++t)
00379         b0[t] -=   rtemp1  * a1[t]
00380                  + rtemp2  * a2[t]
00381                  + rtemp3  * a3[t]
00382                  + rtemp4  * a4[t];
00383 
00384     }
00385 
00386     for(; k+1<n; k+=2) {
00387       a1       = a+(fira[k+0 ]+i);
00388       a2       = a+(fira[k+1 ]+i);
00389 
00390       rtemp1    = *a1/diaga[k+0];
00391       rtemp2    = *a2/diaga[k+1];
00392 
00393       diagb[i] -=   rtemp1  * (*a1)
00394                   + rtemp2  * (*a2);
00395 
00396                ++ a1;
00397                ++ a2;
00398 
00399       for(t=0; t<sze; ++t)
00400         b0[t] -=   rtemp1  * a1[t]
00401                  + rtemp2  * a2[t];
00402     }
00403 
00404     for(; k<n; ++k) {
00405       a1        = a+(fira[k+0 ]+i);
00406 
00407       rtemp1    = *a1/diaga[k+0];
00408 
00409       diagb[i] -= rtemp1  * (*a1);
00410 
00411                ++ a1;
00412 
00413       for(t=0; t<sze; ++t)
00414         b0[t] -=   rtemp1  * a1[t];
00415     }
00416   }
00417 } /*  UpdSnode */
00418 
00419 static void iUpdSnode(chfac  *cf,
00420                         int    snde,
00421                         int    f,
00422                         int    l,
00423                         int    uf,
00424                         int    ul,
00425                         int    iw[])
00426 {
00427   int    k,
00428          *ujsze=cf->ujsze,*uhead=cf->uhead,*subg=cf->subg;
00429   double *diag=cf->diag,*uval=cf->uval;
00430 
00431   if (f==l || uf==ul)
00432     return;
00433 
00434   f  += subg[snde];
00435   l  += subg[snde];
00436   uf += subg[snde];
00437   ul += subg[snde];
00438 
00439   for(k=f; k<l; ++k)
00440     iw[k-f] = uhead[k]+uf-k-1;
00441 
00442   UpdSnode(1+ujsze[uf],l-f,ul-uf,diag+f,uval,iw,diag+uf,uval,uhead+uf);
00443 } /* iUpdSnode */
00444 
00445 static int DiagUpdate(double *dii,
00446                       int    mode)
00447 {
00448   int    id=TRUE;
00449   
00450   if (mode) {
00451     if (*dii<1.0e-13)
00452       return FALSE;
00453   }
00454   else {
00455     if (fabs(*dii)<1.0e-35) {
00456       printf(" diagonal nearly zero: %5.1e.\n",(*dii));
00457       return FALSE;
00458     }
00459   }
00460   return id;
00461 } /* DiagUpdate */
00462 
00463 static int FacSnode(chfac  *cf,
00464                     int    snde,
00465                     int    f,
00466                     int    l,
00467                     int    iw[],
00468                     int    mode)
00469 {
00470   int    itemp,k;
00471 
00472   if (f==l)
00473     return (CfcOk);
00474 
00475   itemp = cf->subg[snde]+f;
00476 
00477   if (!DiagUpdate(cf->diag+itemp,
00478                   mode))
00479     return (CfcIndef);
00480   
00481   if (fabs(cf->diag[itemp])<=cf->tolpiv) {
00482       printf("Singular d[%d]=%e\n",
00483       cf->subg[snde]+f,cf->diag[cf->subg[snde]+f]);
00484       return (CfcIndef);
00485   }
00486    
00487   for(k=f+1; k<l; ++k) {
00488     iUpdSnode(cf,snde,f,k,k,k+1,iw);
00489      
00490     itemp = cf->subg[snde]+k;
00491 
00492     if (!DiagUpdate(&cf->diag[itemp],
00493                     mode))
00494       return (CfcIndef);
00495     
00496     if (fabs(cf->diag[itemp])<=cf->tolpiv) {
00497       printf(" singular d[%d]=%e\n",
00498              cf->subg[snde]+k,cf->diag[cf->subg[snde]+k]);
00499 
00500       return (CfcIndef);
00501    }
00502   }
00503   
00504   return CfcOk;
00505 } /* FacSnode */
00506 
00507 static void UpdSnodes(int    m,
00508                       int    n,
00509                       int    s,
00510                       double diaga[],
00511                       double *a,
00512                       int    fira[],
00513                       double diagb[],
00514                       double *b,
00515                       int    firb[],
00516                       int    subb[])
00517 {
00518   int    i,j,k,t,u,sze,delay,
00519           *ls;
00520   double rtemp1,rtemp2,rtemp3,rtemp4,
00521          rtemp5,rtemp6,rtemp7,rtemp8,
00522          rtemp9,rtemp10,rtemp11,rtemp12,
00523          rtemp13,rtemp14,rtemp15,rtemp16,
00524          *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8,
00525          *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16,
00526          *b0;
00527 
00528   if (m<s)
00529     exit(0);
00530 
00531   if (m==0 || n==0)
00532     return;
00533 
00534   for(i=0; i<s; ++i) {
00535     j     = subb[i];
00536     u     = j-subb[0];
00537 
00538     b0    = b+firb[u];
00539 
00540     delay = j+1;
00541     sze   = m-i-1;
00542     ls    = subb+i+1;
00543 
00544     k     = 0;
00545 
00546     for(; k+15<n; k+=16) {
00547       a1       = a+(fira[k+0 ]+i);
00548       a2       = a+(fira[k+1 ]+i);
00549       a3       = a+(fira[k+2 ]+i);
00550       a4       = a+(fira[k+3 ]+i);
00551       a5       = a+(fira[k+4 ]+i);
00552       a6       = a+(fira[k+5 ]+i);
00553       a7       = a+(fira[k+6 ]+i);
00554       a8       = a+(fira[k+7 ]+i);
00555       a9       = a+(fira[k+8 ]+i);
00556       a10      = a+(fira[k+9 ]+i);
00557       a11      = a+(fira[k+10]+i);
00558       a12      = a+(fira[k+11]+i);
00559       a13      = a+(fira[k+12]+i);
00560       a14      = a+(fira[k+13]+i);
00561       a15      = a+(fira[k+14]+i);
00562       a16      = a+(fira[k+15]+i);
00563 
00564       rtemp1    = *a1/diaga[k+0];
00565       rtemp2    = *a2/diaga[k+1];
00566       rtemp3    = *a3/diaga[k+2];
00567       rtemp4    = *a4/diaga[k+3];
00568       rtemp5    = *a5/diaga[k+4];
00569       rtemp6    = *a6/diaga[k+5];
00570       rtemp7    = *a7/diaga[k+6];
00571       rtemp8    = *a8/diaga[k+7];
00572       rtemp9    = *a9/diaga[k+8];
00573       rtemp10   = *a10/diaga[k+9];
00574       rtemp11   = *a11/diaga[k+10];
00575       rtemp12   = *a12/diaga[k+11];
00576       rtemp13   = *a13/diaga[k+12];
00577       rtemp14   = *a14/diaga[k+13];
00578       rtemp15   = *a15/diaga[k+14];
00579       rtemp16   = *a16/diaga[k+15];
00580 
00581       diagb[u] -=   rtemp1  * (*a1)
00582                   + rtemp2  * (*a2)
00583                   + rtemp3  * (*a3)
00584                   + rtemp4  * (*a4)
00585                   + rtemp5  * (*a5)
00586                   + rtemp6  * (*a6)
00587                   + rtemp7  * (*a7)
00588                   + rtemp8  * (*a8)
00589                   + rtemp9  * (*a9)
00590                   + rtemp10 * (*a10)
00591                   + rtemp11 * (*a11)
00592                   + rtemp12 * (*a12)
00593                   + rtemp13 * (*a13)
00594                   + rtemp14 * (*a14)
00595                   + rtemp15 * (*a15)
00596                   + rtemp16 * (*a16);
00597 
00598 
00599                ++ a1;
00600                ++ a2;
00601                ++ a3;
00602                ++ a4;
00603                ++ a5;
00604                ++ a6;
00605                ++ a7;
00606                ++ a8;
00607                ++ a9;
00608                ++ a10;
00609                ++ a11;
00610                ++ a12;
00611                ++ a13;
00612                ++ a14;
00613                ++ a15;
00614                ++ a16;
00615 
00616       for(t=0; t<sze; ++t)
00617         b0[ls[t]-delay] -=   rtemp1  * a1[t]
00618                            + rtemp2  * a2[t]
00619                            + rtemp3  * a3[t]
00620                            + rtemp4  * a4[t]
00621                            + rtemp5  * a5[t]
00622                            + rtemp6  * a6[t]
00623                            + rtemp7  * a7[t]
00624                            + rtemp8  * a8[t]
00625                            + rtemp9  * a9[t]
00626                            + rtemp10 * a10[t]
00627                            + rtemp11 * a11[t]
00628                            + rtemp12 * a12[t]
00629                            + rtemp13 * a13[t]
00630                            + rtemp14 * a14[t]
00631                            + rtemp15 * a15[t]
00632                            + rtemp16 * a16[t];
00633     }
00634     
00635     for(; k+11<n; k+=12) {
00636       a1       = a+(fira[k+0 ]+i);
00637       a2       = a+(fira[k+1 ]+i);
00638       a3       = a+(fira[k+2 ]+i);
00639       a4       = a+(fira[k+3 ]+i);
00640       a5       = a+(fira[k+4 ]+i);
00641       a6       = a+(fira[k+5 ]+i);
00642       a7       = a+(fira[k+6 ]+i);
00643       a8       = a+(fira[k+7 ]+i);
00644       a9       = a+(fira[k+8 ]+i);
00645       a10      = a+(fira[k+9 ]+i);
00646       a11      = a+(fira[k+10]+i);
00647       a12      = a+(fira[k+11]+i);
00648 
00649       rtemp1    = *a1/diaga[k+0];
00650       rtemp2    = *a2/diaga[k+1];
00651       rtemp3    = *a3/diaga[k+2];
00652       rtemp4    = *a4/diaga[k+3];
00653       rtemp5    = *a5/diaga[k+4];
00654       rtemp6    = *a6/diaga[k+5];
00655       rtemp7    = *a7/diaga[k+6];
00656       rtemp8    = *a8/diaga[k+7];
00657       rtemp9    = *a9/diaga[k+8];
00658       rtemp10   = *a10/diaga[k+9];
00659       rtemp11   = *a11/diaga[k+10];
00660       rtemp12   = *a12/diaga[k+11];
00661 
00662       diagb[u] -=   rtemp1  * (*a1)
00663                   + rtemp2  * (*a2)
00664                   + rtemp3  * (*a3)
00665                   + rtemp4  * (*a4)
00666                   + rtemp5  * (*a5)
00667                   + rtemp6  * (*a6)
00668                   + rtemp7  * (*a7)
00669                   + rtemp8  * (*a8)
00670                   + rtemp9  * (*a9)
00671                   + rtemp10 * (*a10)
00672                   + rtemp11 * (*a11)
00673                   + rtemp12 * (*a12);
00674 
00675                ++ a1;
00676                ++ a2;
00677                ++ a3;
00678                ++ a4;
00679                ++ a5;
00680                ++ a6;
00681                ++ a7;
00682                ++ a8;
00683                ++ a9;
00684                ++ a10;
00685                ++ a11;
00686                ++ a12;
00687 
00688       for(t=0; t<sze; ++t)
00689         b0[ls[t]-delay] -=   rtemp1  * a1[t]
00690                            + rtemp2  * a2[t]
00691                            + rtemp3  * a3[t]
00692                            + rtemp4  * a4[t]
00693                            + rtemp5  * a5[t]
00694                            + rtemp6  * a6[t]
00695                            + rtemp7  * a7[t]
00696                            + rtemp8  * a8[t]
00697                            + rtemp9  * a9[t]
00698                            + rtemp10 * a10[t]
00699                            + rtemp11 * a11[t]
00700                            + rtemp12 * a12[t];
00701     }
00702     
00703     for(; k+7<n; k+=8) {
00704       a1       = a+(fira[k+0 ]+i);
00705       a2       = a+(fira[k+1 ]+i);
00706       a3       = a+(fira[k+2 ]+i);
00707       a4       = a+(fira[k+3 ]+i);
00708       a5       = a+(fira[k+4 ]+i);
00709       a6       = a+(fira[k+5 ]+i);
00710       a7       = a+(fira[k+6 ]+i);
00711       a8       = a+(fira[k+7 ]+i);
00712 
00713       rtemp1    = *a1/diaga[k+0];
00714       rtemp2    = *a2/diaga[k+1];
00715       rtemp3    = *a3/diaga[k+2];
00716       rtemp4    = *a4/diaga[k+3];
00717       rtemp5    = *a5/diaga[k+4];
00718       rtemp6    = *a6/diaga[k+5];
00719       rtemp7    = *a7/diaga[k+6];
00720       rtemp8    = *a8/diaga[k+7];
00721 
00722       diagb[u] -=   rtemp1  * (*a1)
00723                   + rtemp2  * (*a2)
00724                   + rtemp3  * (*a3)
00725                   + rtemp4  * (*a4)
00726                   + rtemp5  * (*a5)
00727                   + rtemp6  * (*a6)
00728                   + rtemp7  * (*a7)
00729                   + rtemp8  * (*a8);
00730 
00731                ++ a1;
00732                ++ a2;
00733                ++ a3;
00734                ++ a4;
00735                ++ a5;
00736                ++ a6;
00737                ++ a7;
00738                ++ a8;
00739 
00740       for(t=0; t<sze; ++t)
00741         b0[ls[t]-delay] -=   rtemp1  * a1[t]
00742                            + rtemp2  * a2[t]
00743                            + rtemp3  * a3[t]
00744                            + rtemp4  * a4[t]
00745                            + rtemp5  * a5[t]
00746                            + rtemp6  * a6[t]
00747                            + rtemp7  * a7[t]
00748                            + rtemp8  * a8[t];
00749 
00750     }
00751 
00752     for(; k+3<n; k+=4) {
00753       a1       = a+(fira[k+0 ]+i);
00754       a2       = a+(fira[k+1 ]+i);
00755       a3       = a+(fira[k+2 ]+i);
00756       a4       = a+(fira[k+3 ]+i);
00757 
00758       rtemp1   = *a1/diaga[k+0];
00759       rtemp2   = *a2/diaga[k+1];
00760       rtemp3   = *a3/diaga[k+2];
00761       rtemp4   = *a4/diaga[k+3];
00762 
00763       diagb[u]-=   rtemp1  * (*a1)
00764                   +rtemp2  * (*a2)
00765                   +rtemp3  * (*a3)
00766                   +rtemp4  * (*a4);
00767 
00768                ++ a1;
00769                ++ a2;
00770                ++ a3;
00771                ++ a4;
00772 
00773       for(t=0; t<sze; ++t)
00774         b0[ls[t]-delay] -=   rtemp1  * a1[t]
00775                            + rtemp2  * a2[t]
00776                            + rtemp3  * a3[t]
00777                            + rtemp4  * a4[t];
00778 
00779     }
00780 
00781     for(; k+1<n; k+=2) {
00782       a1       = a+(fira[k+0 ]+i);
00783       a2       = a+(fira[k+1 ]+i);
00784 
00785       rtemp1    = *a1/diaga[k+0];
00786       rtemp2    = *a2/diaga[k+1];
00787 
00788       diagb[u] -=   rtemp1  * (*a1)
00789                   + rtemp2  * (*a2);
00790 
00791                ++ a1;
00792                ++ a2;
00793 
00794       for(t=0; t<sze; ++t)
00795         b0[ls[t]-delay] -=   rtemp1  * a1[t]
00796                            + rtemp2  * a2[t];
00797     }
00798 
00799     for(; k<n; ++k) {
00800       a1        = a+(fira[k+0 ]+i);
00801 
00802       rtemp1    = *a1/diaga[k+0];
00803 
00804       diagb[u] -= rtemp1  * (*a1);
00805 
00806                ++ a1;
00807 
00808       for(t=0; t<sze; ++t)
00809         b0[ls[t]-delay] -=   rtemp1  * a1[t];
00810     }
00811   }
00812 } /*  UpdSnodes */
00813 
00814 static void ExtUpdSnode(chfac  *cf,
00815                         int    snde,
00816                         int    usnde,
00817                         int    f,
00818                         int    l,
00819                         int    start,
00820                         int    iw[])
00821 {
00822   int    k,sze,
00823           *ls,
00824           *subg=cf->subg,
00825           *ujsze=cf->ujsze,*usub=cf->usub,*ujbeg=cf->ujbeg,*uhead=cf->uhead;
00826   double *diag=cf->diag,*uval=cf->uval;
00827 
00828   f += subg[snde];
00829   l += subg[snde];
00830 
00831   if (usnde==cf->nsnds-1) {
00832     if (usub[ujbeg[f]+start]<subg[usnde]) {
00833       printf("\n Index error");
00834       exit(0);
00835     }
00836 
00837     if (cf->sdens)
00838       exit(0);
00839 
00840     ls  = usub+ujbeg[f]+start;
00841     sze = ujsze[f]-start;
00842 
00843     for(k=f; k<l; ++k)
00844       iw[k-f] =uhead[k]+start-(k-f);
00845 
00846     UpdSnodes(sze,l-f,sze,diag+f,uval,iw,diag+ls[0],uval,uhead+ls[0],ls);
00847   }
00848   else
00849     exit(0);
00850 } /* ExtUpdSnode */
00851 
00852 static void PushFward(chfac  *cf,
00853                       int    snde,
00854                       int    f,
00855                       int    l,
00856                       int    iw[])
00857 {
00858   int    j,s,t,u,k,stops,offset,sze,itemp,
00859           *ls0,*ls1,
00860           *map=iw,*subg=cf->subg,
00861           *ujsze=cf->ujsze,*uhead=cf->uhead,*ujbeg=cf->ujbeg,*usub=cf->usub;
00862   double rtemp1,*l0,*l1,
00863           *diag=cf->diag,*uval=cf->uval;
00864 
00865   if (f>subg[snde+1]-subg[snde]) {
00866     printf("\n PushFward");
00867     exit(0);
00868   }
00869 
00870   if (f==l)
00871     return;
00872 
00873   f      += subg[snde];
00874   l      += subg[snde];
00875 
00876   offset  = subg[snde+1]-f-1;
00877   sze     = ujsze[f] - offset;
00878   ls1     = usub+ujbeg[f]+offset;
00879 
00880   if (f+1==l) {
00881     l1     = uval+uhead[f]+offset;
00882 
00883     stops = sze;
00884     for(t=0; t<sze; ++t) {
00885       j = ls1[0];
00886 
00887       if (j>=subg[cf->nsnds-1])
00888         break;
00889 
00890       rtemp1   = l1[0]/diag[f];
00891       diag[j] -= rtemp1*l1[0];
00892               ++ l1;
00893 
00894       l0       = uval+uhead[j];
00895       ls0      = usub+ujbeg[j];
00896 
00897               ++ ls1;
00898               -- stops;
00899 
00900       if (stops && ls1[stops-1]==ls0[stops-1]) {
00901         for(s=0; s<stops; ++s)
00902           l0[s] -= rtemp1 * l1[s];
00903       }
00904       
00905       else {
00906         for(s=0, u=0; s<stops; ++u) {
00907           if (ls0[u]==ls1[s]) {
00908             l0[u] -= rtemp1 * l1[s];
00909                   ++ s;
00910           }
00911         }
00912       }
00913     }
00914 
00915     if (t<sze && !cf->sdens)
00916       ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde],l-subg[snde],t,iw);
00917   }
00918   
00919   else {
00920     stops = sze;
00921     for(t=0; t<sze; ++t, ++offset) {
00922       j      = ls1[0];
00923 
00924       if (j>=subg[cf->nsnds-1]) {
00925         if (!cf->sdens)
00926           ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde],
00927                       l-subg[snde],offset,iw);
00928         break;
00929       }
00930 
00931       ls0   = usub+ujbeg[j];
00932       l0    = uval+uhead[j];
00933 
00934            ++ ls1;
00935            -- stops;
00936 
00937       k     = f;
00938       itemp = offset+f;
00939 
00940       if (stops && ls1[stops-1]==ls0[stops-1]) {
00941         for(k=f; k<l; ++k)
00942           map[k-f] = uhead[k]+itemp-k;
00943 
00944         UpdSnode(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j);
00945       }
00946       
00947       else {
00948         map[l] = 0;
00949         for(s=0, u=0; s<stops; ++u) {
00950           if (ls1[s]==ls0[u]) {
00951             map[1+l+s]  = 1+u;
00952                        ++ s;
00953           }
00954         }
00955 
00956         for(k=f; k<l; ++k)
00957           map[k-f] = uhead[k]+itemp-k;
00958 
00959         UpdSnodes(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j,map+l);
00960       }
00961     }
00962   }
00963 } /* PushFwardard */
00964 
00965 static int FacDenNode(chfac  *cf,
00966                       int    iw[],
00967                       double rw[],
00968                       int    mode)
00969 {
00970   int    c,d,j,s,t,sncl,k,k0,m,cacsze,sze,offset,
00971          *subg=cf->subg,*ujsze=cf->ujsze,
00972          *ujbeg=cf->ujbeg,*uhead=cf->uhead,
00973          *usub=cf->usub,*dhead=cf->dhead,
00974          *dsub=cf->dsub,*dbeg=cf->dbeg,*ls;
00975   double *diag=cf->diag,*uval=cf->uval;
00976   int    sresp;
00977 
00978   cacsze = cf->cachesize*cf->cacheunit;
00979 
00980   if (cf->sdens) {
00981     for(d=0; d<cf->ndens; ++d) {
00982       c = 0;
00983       for(k=dhead[d]; k<dhead[d+1]; ++k) {
00984         offset = dbeg[k];
00985         s      = dsub[k];
00986         if (usub[ujbeg[subg[s]]+offset]<subg[cf->nsnds-1]) {
00987           printf("\nindex error1");
00988           exit(0);
00989         }
00990 
00991         for(j=subg[s]; j<subg[s+1]; ++c, ++j) {
00992           rw[c] = diag[j];
00993           iw[c] = uhead[j]+offset-(j-subg[s]);
00994 
00995           if (usub[ujbeg[j]+offset-(j-subg[s])]<subg[cf->nsnds-1]) {
00996             printf("\nindex error");
00997             exit(0);
00998           }
00999         }
01000       }
01001 
01002       if (c) {
01003         k  = dhead[d];
01004         s  = dsub[k];
01005         m  = ujsze[subg[s]]-dbeg[k];
01006         ls = usub+ujbeg[subg[s]]+dbeg[k];
01007         if (m) {
01008           for(k=0; k<c;) {
01009             t = cacsze/(m*sizeof(double));
01010             t = max(t,1);
01011             t = min(c-k,t);
01012 
01013             UpdSnodes(m,t,m,rw+k,uval,iw+k,
01014                       diag+ls[0],uval,uhead+ls[0],ls);
01015             k+=t;
01016           }
01017         }
01018       }
01019     }
01020   }
01021 
01022   s = cf->nsnds-1;
01023 
01024   sncl = cf->subg[s+1]-cf->subg[s];
01025   for(k=0; k<sncl;) {
01026     k0  = k;
01027     for(sze=0; sze<cacsze && k<sncl; ++k)
01028       sze += ujsze[subg[s]+k] * sizeof(double);
01029 
01030     if (k==k0)
01031       ++k;
01032     else if (k>=k0+2 && sze>cacsze)
01033       --k;
01034 
01035     if (k>sncl)
01036       exit(0);
01037 
01038     sresp = FacSnode(cf,s,k0,k,iw,mode);
01039     if (sresp!=CfcOk)
01040       return (sresp);
01041 
01042     iUpdSnode(cf,s,k0,k,k,sncl,iw);
01043 
01044   }
01045   return (CfcOk);
01046 } /* FacDenNode */
01047 
01048 int ChlFact(chfac  *cf,
01049             int    *iw,
01050             double *rw,
01051             int    mode)
01052 {
01053   int s,sncl,k,k0,cacsze,sze,
01054       *subg=cf->subg,*ujsze=cf->ujsze;
01055   int cid;
01056 
01057   cacsze=cf->cachesize*cf->cacheunit;
01058 
01059   for(s=0; s+1<cf->nsnds; ++s) {
01060     sncl = cf->subg[s+1]-cf->subg[s];
01061 
01062     for(k=0; k<sncl;) {
01063       k0  = k;
01064       for(sze=0; sze<=cacsze && k<sncl; ++k)
01065         sze += ujsze[subg[s]+k]*sizeof(double);
01066        
01067       if (k==k0)
01068         ++k;
01069       else if (k>=k0+2 && sze>cacsze)
01070         --k;
01071       
01072       if (k>sncl)
01073         exit(0);
01074 
01075       cid=FacSnode(cf,s,k0,k,iw,mode);
01076       if (cid!=CfcOk)
01077         return (cid);
01078 
01079       iUpdSnode(cf,s,k0,k,k,sncl,iw);
01080 
01081       PushFward(cf,s,k0,k,iw);
01082     }
01083   }
01084 
01085   cid=FacDenNode(cf,iw,rw,mode);
01086 
01087   for (k=0;k<cf->nrow;k++){
01088     cf->sqrtdiag[k]=sqrt(fabs(cf->diag[k]));
01089   }
01090 
01091   return cid;
01092 } /* ChlFact */

Generated on Sun Mar 23 07:30:49 2008 for DSDP by  doxygen 1.5.5