Actual source code: dshep.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2013, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include <slepc-private/dsimpl.h> /*I "slepcds.h" I*/
23: #include <slepcblaslapack.h>
27: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
28: {
32: DSAllocateMat_Private(ds,DS_MAT_A);
33: DSAllocateMat_Private(ds,DS_MAT_Q);
34: DSAllocateMatReal_Private(ds,DS_MAT_T);
35: PetscFree(ds->perm);
36: PetscMalloc(ld*sizeof(PetscInt),&ds->perm);
37: PetscLogObjectMemory(ds,ld*sizeof(PetscInt));
38: return(0);
39: }
41: /* 0 l k n-1
42: -----------------------------------------
43: |* · · |
44: | * · · |
45: | * · · |
46: | * · · |
47: |· · · · o o |
48: | o o |
49: | o o |
50: | o o |
51: | o o |
52: | o o |
53: |· · · · o o o o o o o x |
54: | x x x |
55: | x x x |
56: | x x x |
57: | x x x |
58: | x x x |
59: | x x x |
60: | x x x |
61: | x x x|
62: | x x|
63: -----------------------------------------
64: */
68: static PetscErrorCode DSSwitchFormat_HEP(DS ds,PetscBool tocompact)
69: {
71: PetscReal *T = ds->rmat[DS_MAT_T];
72: PetscScalar *A = ds->mat[DS_MAT_A];
73: PetscInt i,n=ds->n,k=ds->k,ld=ds->ld;
76: if (tocompact) { /* switch from dense (arrow) to compact storage */
77: PetscMemzero(T,3*ld*sizeof(PetscReal));
78: for (i=0;i<k;i++) {
79: T[i] = PetscRealPart(A[i+i*ld]);
80: T[i+ld] = PetscRealPart(A[k+i*ld]);
81: }
82: for (i=k;i<n-1;i++) {
83: T[i] = PetscRealPart(A[i+i*ld]);
84: T[i+ld] = PetscRealPart(A[i+1+i*ld]);
85: }
86: T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
87: if (ds->extrarow) T[n-1+ld] = PetscRealPart(A[n+(n-1)*ld]);
88: } else { /* switch from compact (arrow) to dense storage */
89: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
90: for (i=0;i<k;i++) {
91: A[i+i*ld] = T[i];
92: A[k+i*ld] = T[i+ld];
93: A[i+k*ld] = T[i+ld];
94: }
95: A[k+k*ld] = T[k];
96: for (i=k+1;i<n;i++) {
97: A[i+i*ld] = T[i];
98: A[i-1+i*ld] = T[i-1+ld];
99: A[i+(i-1)*ld] = T[i-1+ld];
100: }
101: if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
102: }
103: return(0);
104: }
108: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
109: {
110: PetscErrorCode ierr;
111: PetscViewerFormat format;
112: PetscInt i,j,r,c,rows;
113: PetscReal value;
114: const char *methodname[] = {
115: "Implicit QR method (_steqr)",
116: "Relatively Robust Representations (_stevr)",
117: "Divide and Conquer method (_stedc)"
118: };
119: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
122: PetscViewerGetFormat(viewer,&format);
123: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
124: if (ds->method>=nmeth) {
125: PetscViewerASCIIPrintf(viewer,"solving the problem with: INVALID METHOD\n");
126: } else {
127: PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
128: }
129: return(0);
130: }
131: if (ds->compact) {
132: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
133: rows = ds->extrarow? ds->n+1: ds->n;
134: if (format == PETSC_VIEWER_ASCII_MATLAB) {
135: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",rows,ds->n);
136: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
137: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
138: for (i=0;i<ds->n;i++) {
139: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_T]+i));
140: }
141: for (i=0;i<rows-1;i++) {
142: r = PetscMax(i+2,ds->k+1);
143: c = i+1;
144: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",r,c,*(ds->rmat[DS_MAT_T]+ds->ld+i));
145: if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
146: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",c,r,*(ds->rmat[DS_MAT_T]+ds->ld+i));
147: }
148: }
149: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
150: } else {
151: for (i=0;i<rows;i++) {
152: for (j=0;j<ds->n;j++) {
153: if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
154: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
155: else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
156: else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
157: else value = 0.0;
158: PetscViewerASCIIPrintf(viewer," %18.16e ",value);
159: }
160: PetscViewerASCIIPrintf(viewer,"\n");
161: }
162: }
163: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
164: PetscViewerFlush(viewer);
165: } else {
166: DSViewMat_Private(ds,viewer,DS_MAT_A);
167: }
168: if (ds->state>DS_STATE_INTERMEDIATE) {
169: DSViewMat_Private(ds,viewer,DS_MAT_Q);
170: }
171: return(0);
172: }
176: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
177: {
178: PetscScalar *Q = ds->mat[DS_MAT_Q];
179: PetscInt ld = ds->ld,i;
183: switch (mat) {
184: case DS_MAT_X:
185: case DS_MAT_Y:
186: if (j) {
187: if (ds->state>=DS_STATE_CONDENSED) {
188: PetscMemcpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld*sizeof(PetscScalar));
189: } else {
190: PetscMemzero(ds->mat[mat]+(*j)*ld,ld*sizeof(PetscScalar));
191: *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
192: }
193: } else {
194: if (ds->state>=DS_STATE_CONDENSED) {
195: PetscMemcpy(ds->mat[mat],Q,ld*ld*sizeof(PetscScalar));
196: } else {
197: PetscMemzero(ds->mat[mat],ld*ld*sizeof(PetscScalar));
198: for (i=0;i<ds->n;i++) *(ds->mat[mat]+i+i*ld) = 1.0;
199: }
200: }
201: if (rnorm) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
202: break;
203: case DS_MAT_U:
204: case DS_MAT_VT:
205: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
206: break;
207: default:
208: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
209: }
210: return(0);
211: }
215: PetscErrorCode DSNormalize_HEP(DS ds,DSMatType mat,PetscInt col)
216: {
218: switch (mat) {
219: case DS_MAT_X:
220: case DS_MAT_Y:
221: case DS_MAT_Q:
222: /* All the matrices resulting from DSVectors and DSSolve are already normalized */
223: break;
224: case DS_MAT_U:
225: case DS_MAT_VT:
226: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
227: break;
228: default:
229: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
230: }
231: return(0);
232: }
236: /*
237: ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form
239: [ d 0 0 0 e ]
240: [ 0 d 0 0 e ]
241: A = [ 0 0 d 0 e ]
242: [ 0 0 0 d e ]
243: [ e e e e d ]
245: to tridiagonal form
247: [ d e 0 0 0 ]
248: [ e d e 0 0 ]
249: T = Q'*A*Q = [ 0 e d e 0 ],
250: [ 0 0 e d e ]
251: [ 0 0 0 e d ]
253: where Q is an orthogonal matrix. Rutishauser's algorithm is used to
254: perform the reduction, which requires O(n**2) flops. The accumulation
255: of the orthogonal factor Q, however, requires O(n**3) flops.
257: Arguments
258: =========
260: N (input) INTEGER
261: The order of the matrix A. N >= 0.
263: D (input/output) DOUBLE PRECISION array, dimension (N)
264: On entry, the diagonal entries of the matrix A to be
265: reduced.
266: On exit, the diagonal entries of the reduced matrix T.
268: E (input/output) DOUBLE PRECISION array, dimension (N-1)
269: On entry, the off-diagonal entries of the matrix A to be
270: reduced.
271: On exit, the subdiagonal entries of the reduced matrix T.
273: Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
274: On exit, the orthogonal matrix Q.
276: LDQ (input) INTEGER
277: The leading dimension of the array Q.
279: Note
280: ====
281: Based on Fortran code contributed by Daniel Kressner
282: */
283: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
284: {
285: #if defined(SLEPC_MISSING_LAPACK_LARTG)
287: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARTG - Lapack routine is unavailable");
288: #else
289: PetscBLASInt i,j,j2,one=1;
290: PetscReal c,s,p,off,temp;
293: if (n<=2) return(0);
295: for (j=0;j<n-2;j++) {
297: /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
298: temp = e[j+1];
299: PetscStackCallBLAS("LAPACKlartg",LAPACKlartg_(&temp,&e[j],&c,&s,&e[j+1]));
300: s = -s;
302: /* Apply rotation to diagonal elements */
303: temp = d[j+1];
304: e[j] = c*s*(temp-d[j]);
305: d[j+1] = s*s*d[j] + c*c*temp;
306: d[j] = c*c*d[j] + s*s*temp;
308: /* Apply rotation to Q */
309: j2 = j+2;
310: PetscStackCallBLAS("BLASrot",BLASrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));
312: /* Chase newly introduced off-diagonal entry to the top left corner */
313: for (i=j-1;i>=0;i--) {
314: off = -s*e[i];
315: e[i] = c*e[i];
316: temp = e[i+1];
317: PetscStackCallBLAS("LAPACKlartg",LAPACKlartg_(&temp,&off,&c,&s,&e[i+1]));
318: s = -s;
319: temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
320: p = s*temp;
321: d[i+1] += p;
322: d[i] -= p;
323: e[i] = -e[i] - c*temp;
324: j2 = j+2;
325: PetscStackCallBLAS("BLASrot",BLASrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
326: }
327: }
328: return(0);
329: #endif
330: }
334: /*
335: Reduce to tridiagonal form by means of ArrowTridiag.
336: */
337: static PetscErrorCode DSIntermediate_HEP(DS ds)
338: {
339: #if defined(SLEPC_MISSING_LAPACK_SYTRD) || defined(SLEPC_MISSING_LAPACK_ORGTR)
341: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYTRD/ORGTR - Lapack routine is unavailable");
342: #else
344: PetscInt i;
345: PetscBLASInt n1,n2,n3,lwork,info,l,n,ld,off;
346: PetscScalar *A,*Q,*work,*tau;
347: PetscReal *d,*e;
350: PetscBLASIntCast(ds->n,&n);
351: PetscBLASIntCast(ds->l,&l);
352: PetscBLASIntCast(ds->ld,&ld);
353: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
354: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
355: n3 = n1+n2;
356: off = l+l*ld;
357: A = ds->mat[DS_MAT_A];
358: Q = ds->mat[DS_MAT_Q];
359: d = ds->rmat[DS_MAT_T];
360: e = ds->rmat[DS_MAT_T]+ld;
361: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
362: for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
364: if (ds->compact) {
366: if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);
368: } else {
370: for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }
372: if (ds->state<DS_STATE_INTERMEDIATE) {
373: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
374: DSAllocateWork_Private(ds,ld+ld*ld,0,0);
375: tau = ds->work;
376: work = ds->work+ld;
377: lwork = ld*ld;
378: PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n3,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
379: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSYTRD %d",info);
380: PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n3,Q+off,&ld,tau,work,&lwork,&info));
381: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGTR %d",info);
382: } else {
383: /* copy tridiagonal to d,e */
384: for (i=l;i<n;i++) d[i] = PetscRealPart(A[i+i*ld]);
385: for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
386: }
387: }
388: return(0);
389: #endif
390: }
394: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
395: {
397: PetscInt n,l,i,*perm,ld=ds->ld;
398: PetscScalar *A;
399: PetscReal *d;
402: if (!ds->comparison) return(0);
403: n = ds->n;
404: l = ds->l;
405: A = ds->mat[DS_MAT_A];
406: d = ds->rmat[DS_MAT_T];
407: perm = ds->perm;
408: if (!rr) {
409: DSSortEigenvaluesReal_Private(ds,d,perm);
410: } else {
411: DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
412: }
413: for (i=l;i<n;i++) wr[i] = d[perm[i]];
414: DSPermuteColumns_Private(ds,l,n,DS_MAT_Q,perm);
415: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
416: if (!ds->compact) {
417: for (i=l;i<n;i++) A[i+i*ld] = wr[i];
418: }
419: return(0);
420: }
424: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
425: {
427: PetscInt i;
428: PetscBLASInt n,ld,incx=1;
429: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
430: PetscReal *e,beta;
433: PetscBLASIntCast(ds->n,&n);
434: PetscBLASIntCast(ds->ld,&ld);
435: A = ds->mat[DS_MAT_A];
436: Q = ds->mat[DS_MAT_Q];
437: e = ds->rmat[DS_MAT_T]+ld;
439: if (ds->compact) {
440: beta = e[n-1];
441: for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
442: ds->k = n;
443: } else {
444: DSAllocateWork_Private(ds,2*ld,0,0);
445: x = ds->work;
446: y = ds->work+ld;
447: for (i=0;i<n;i++) x[i] = A[n+i*ld];
448: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
449: for (i=0;i<n;i++) A[n+i*ld] = y[i];
450: ds->k = n;
451: }
452: return(0);
453: }
457: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
458: {
459: #if defined(PETSC_MISSING_LAPACK_STEQR)
461: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable");
462: #else
464: PetscInt i;
465: PetscBLASInt n1,n2,n3,info,l,n,ld,off;
466: PetscScalar *Q,*A;
467: PetscReal *d,*e;
470: PetscBLASIntCast(ds->n,&n);
471: PetscBLASIntCast(ds->l,&l);
472: PetscBLASIntCast(ds->ld,&ld);
473: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
474: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
475: n3 = n1+n2;
476: off = l+l*ld;
477: Q = ds->mat[DS_MAT_Q];
478: A = ds->mat[DS_MAT_A];
479: d = ds->rmat[DS_MAT_T];
480: e = ds->rmat[DS_MAT_T]+ld;
482: /* Reduce to tridiagonal form */
483: DSIntermediate_HEP(ds);
485: /* Solve the tridiagonal eigenproblem */
486: for (i=0;i<l;i++) wr[i] = d[i];
488: DSAllocateWork_Private(ds,0,2*ld,0);
489: PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n3,d+l,e+l,Q+off,&ld,ds->rwork,&info));
490: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSTEQR %d",info);
491: for (i=l;i<n;i++) wr[i] = d[i];
493: /* Create diagonal matrix as a result */
494: if (ds->compact) {
495: PetscMemzero(e,(n-1)*sizeof(PetscReal));
496: } else {
497: for (i=l;i<n;i++) {
498: PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
499: }
500: for (i=l;i<n;i++) A[i+i*ld] = d[i];
501: }
503: /* Set zero wi */
504: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
505: return(0);
506: #endif
507: }
511: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
512: {
513: #if defined(SLEPC_MISSING_LAPACK_STEVR)
515: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEVR - Lapack routine is unavailable");
516: #else
518: PetscInt i;
519: PetscBLASInt n1,n2,n3,lwork,liwork,info,l,n,m,ld,off,il,iu,*isuppz;
520: PetscScalar *A,*Q,*W=NULL,one=1.0,zero=0.0;
521: PetscReal *d,*e,abstol=0.0,vl,vu;
522: #if defined(PETSC_USE_COMPLEX)
523: PetscInt j;
524: PetscReal *ritz;
525: #endif
528: PetscBLASIntCast(ds->n,&n);
529: PetscBLASIntCast(ds->l,&l);
530: PetscBLASIntCast(ds->ld,&ld);
531: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
532: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
533: n3 = n1+n2;
534: off = l+l*ld;
535: A = ds->mat[DS_MAT_A];
536: Q = ds->mat[DS_MAT_Q];
537: d = ds->rmat[DS_MAT_T];
538: e = ds->rmat[DS_MAT_T]+ld;
540: /* Reduce to tridiagonal form */
541: DSIntermediate_HEP(ds);
543: /* Solve the tridiagonal eigenproblem */
544: for (i=0;i<l;i++) wr[i] = d[i];
546: if (ds->state<DS_STATE_INTERMEDIATE) { /* Q contains useful info */
547: DSAllocateMat_Private(ds,DS_MAT_W);
548: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
549: W = ds->mat[DS_MAT_W];
550: }
551: #if defined(PETSC_USE_COMPLEX)
552: DSAllocateMatReal_Private(ds,DS_MAT_Q);
553: #endif
554: lwork = 20*ld;
555: liwork = 10*ld;
556: DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
557: isuppz = ds->iwork+liwork;
558: #if defined(PETSC_USE_COMPLEX)
559: ritz = ds->rwork+lwork;
560: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
561: for (i=l;i<n;i++) wr[i] = ritz[i];
562: #else
563: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
564: #endif
565: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack DSTEVR %d",info);
566: #if defined(PETSC_USE_COMPLEX)
567: for (i=l;i<n;i++)
568: for (j=l;j<n;j++)
569: Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
570: #endif
571: if (ds->state<DS_STATE_INTERMEDIATE) { /* accumulate previous Q */
572: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
573: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
574: }
575: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
577: /* Create diagonal matrix as a result */
578: if (ds->compact) {
579: PetscMemzero(e,(n-1)*sizeof(PetscReal));
580: } else {
581: for (i=l;i<n;i++) {
582: PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
583: }
584: for (i=l;i<n;i++) A[i+i*ld] = d[i];
585: }
587: /* Set zero wi */
588: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
589: return(0);
590: #endif
591: }
595: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
596: {
597: #if defined(SLEPC_MISSING_LAPACK_STEDC) || defined(SLEPC_MISSING_LAPACK_ORMTR)
599: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEDC/ORMTR - Lapack routine is unavailable");
600: #else
602: PetscInt i;
603: PetscBLASInt n1,info,l,ld,off,lrwork,liwork;
604: PetscScalar *Q,*A;
605: PetscReal *d,*e;
606: #if defined(PETSC_USE_COMPLEX)
607: PetscBLASInt lwork;
608: PetscInt j;
609: #endif
612: PetscBLASIntCast(ds->l,&l);
613: PetscBLASIntCast(ds->ld,&ld);
614: PetscBLASIntCast(ds->n-ds->l,&n1);
615: off = l+l*ld;
616: Q = ds->mat[DS_MAT_Q];
617: A = ds->mat[DS_MAT_A];
618: d = ds->rmat[DS_MAT_T];
619: e = ds->rmat[DS_MAT_T]+ld;
621: /* Reduce to tridiagonal form */
622: DSIntermediate_HEP(ds);
624: /* Solve the tridiagonal eigenproblem */
625: for (i=0;i<l;i++) wr[i] = d[i];
627: lrwork = 5*n1*n1+3*n1+1;
628: liwork = 5*n1*n1+6*n1+6;
629: #if !defined(PETSC_USE_COMPLEX)
630: DSAllocateWork_Private(ds,0,lrwork,liwork);
631: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
632: #else
633: lwork = ld*ld;
634: DSAllocateWork_Private(ds,lwork,lrwork,liwork);
635: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
636: /* Fixing Lapack bug*/
637: for (j=ds->l;j<ds->n;j++)
638: for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
639: #endif
640: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSTEDC %d",info);
641: for (i=l;i<ds->n;i++) wr[i] = d[i];
643: /* Create diagonal matrix as a result */
644: if (ds->compact) {
645: PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
646: } else {
647: for (i=l;i<ds->n;i++) {
648: PetscMemzero(A+l+i*ld,(ds->n-l)*sizeof(PetscScalar));
649: }
650: for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
651: }
653: /* Set zero wi */
654: if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
655: return(0);
656: #endif
657: }
661: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n)
662: {
663: PetscInt i,ld=ds->ld,l=ds->l;
664: PetscScalar *A;
667: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
668: A = ds->mat[DS_MAT_A];
669: if (!ds->compact && ds->extrarow && ds->k==ds->n) {
670: for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
671: }
672: if (ds->extrarow) ds->k = n;
673: else ds->k = 0;
674: ds->n = n;
675: return(0);
676: }
680: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
681: {
682: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(SLEPC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
684: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
685: #else
687: PetscScalar *work;
688: PetscReal *rwork;
689: PetscBLASInt *ipiv;
690: PetscBLASInt lwork,info,n,ld;
691: PetscReal hn,hin;
692: PetscScalar *A;
695: PetscBLASIntCast(ds->n,&n);
696: PetscBLASIntCast(ds->ld,&ld);
697: lwork = 8*ld;
698: DSAllocateWork_Private(ds,lwork,ld,ld);
699: work = ds->work;
700: rwork = ds->rwork;
701: ipiv = ds->iwork;
702: DSSwitchFormat_HEP(ds,PETSC_FALSE);
704: /* use workspace matrix W to avoid overwriting A */
705: DSAllocateMat_Private(ds,DS_MAT_W);
706: A = ds->mat[DS_MAT_W];
707: PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);
709: /* norm of A */
710: hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
712: /* norm of inv(A) */
713: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
714: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRF %d",info);
715: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
716: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRI %d",info);
717: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
719: *cond = hn*hin;
720: return(0);
721: #endif
722: }
726: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
727: {
728: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(SLEPC_MISSING_LAPACK_ORGQR)
730: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
731: #else
733: PetscInt i,j,k=ds->k;
734: PetscScalar *Q,*A,*R,*tau,*work;
735: PetscBLASInt ld,n1,n0,lwork,info;
738: PetscBLASIntCast(ds->ld,&ld);
739: DSAllocateWork_Private(ds,ld*ld,0,0);
740: tau = ds->work;
741: work = ds->work+ld;
742: PetscBLASIntCast(ld*(ld-1),&lwork);
743: DSAllocateMat_Private(ds,DS_MAT_W);
744: A = ds->mat[DS_MAT_A];
745: Q = ds->mat[DS_MAT_Q];
746: R = ds->mat[DS_MAT_W];
747: /* Copy I+alpha*A */
748: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
749: PetscMemzero(R,ld*ld*sizeof(PetscScalar));
750: for (i=0;i<k;i++) {
751: Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
752: Q[k+i*ld] = alpha*A[k+i*ld];
753: }
754: /* Compute qr */
755: PetscBLASIntCast(k+1,&n1);
756: PetscBLASIntCast(k,&n0);
757: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
758: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
759: /* Copy R from Q */
760: for (j=0;j<k;j++)
761: for (i=0;i<=j;i++)
762: R[i+j*ld] = Q[i+j*ld];
763: /* Compute orthogonal matrix in Q */
764: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
765: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
766: /* Compute the updated matrix of projected problem */
767: for (j=0;j<k;j++)
768: for (i=0;i<k+1;i++)
769: A[j*ld+i] = Q[i*ld+j];
770: alpha = -1.0/alpha;
771: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
772: for (i=0;i<k;i++)
773: A[ld*i+i]-=alpha;
774: return(0);
775: #endif
776: }
780: PetscErrorCode DSFunction_EXP_HEP_DIAG(DS ds)
781: {
783: PetscScalar *eig,one=1.0,zero=0.0;
784: PetscInt i,j;
785: PetscBLASInt n,ld;
786: PetscScalar *F,*Q,*W;
789: PetscBLASIntCast(ds->n,&n);
790: PetscBLASIntCast(ds->ld,&ld);
791: PetscMalloc(n*sizeof(PetscScalar),&eig);
792: DSSolve(ds,eig,NULL);
793: if (!ds->mat[DS_MAT_W]) {
794: DSAllocateMat_Private(ds,DS_MAT_W);
795: }
796: W = ds->mat[DS_MAT_W];
797: Q = ds->mat[DS_MAT_Q];
798: F = ds->mat[DS_MAT_F];
800: /* W = exp(Lambda)*Q' */
801: for (i=0;i<n;i++) {
802: for (j=0;j<n;j++) {
803: W[i+j*ld] = Q[j+i*ld]*PetscExpScalar(eig[i]);
804: }
805: }
806: /* F = Q*W */
807: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,W,&ld,&zero,F,&ld));
808: PetscFree(eig);
809: return(0);
810: }
814: PETSC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
815: {
817: ds->ops->allocate = DSAllocate_HEP;
818: ds->ops->view = DSView_HEP;
819: ds->ops->vectors = DSVectors_HEP;
820: ds->ops->solve[0] = DSSolve_HEP_QR;
821: ds->ops->solve[1] = DSSolve_HEP_MRRR;
822: ds->ops->solve[2] = DSSolve_HEP_DC;
823: ds->ops->sort = DSSort_HEP;
824: ds->ops->truncate = DSTruncate_HEP;
825: ds->ops->update = DSUpdateExtraRow_HEP;
826: ds->ops->cond = DSCond_HEP;
827: ds->ops->transrks = DSTranslateRKS_HEP;
828: ds->ops->normalize = DSNormalize_HEP;
830: ds->ops->computefun[SLEPC_FUNCTION_EXP][0] = DSFunction_EXP_HEP_DIAG;
831: return(0);
832: }