Actual source code: dvd_blas.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2013, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include <slepc-private/vecimplslepc.h>
23: #include davidson.h
25: PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
26: PetscLogEvent SLEPC_SlepcDenseNorm = 0;
27: PetscLogEvent SLEPC_SlepcDenseCopy = 0;
28: PetscLogEvent SLEPC_VecsMult = 0;
30: void dvd_sum_local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *t);
31: PetscErrorCode VecsMultS_copy_func(PetscScalar *out,PetscInt size_out,void *ptr);
35: /*
36: Compute C <- a*A*B + b*C, where
37: ldC, the leading dimension of C,
38: ldA, the leading dimension of A,
39: rA, cA, rows and columns of A,
40: At, if true use the transpose of A instead,
41: ldB, the leading dimension of B,
42: rB, cB, rows and columns of B,
43: Bt, if true use the transpose of B instead
44: */
45: PetscErrorCode SlepcDenseMatProd(PetscScalar *C,PetscInt _ldC,PetscScalar b,PetscScalar a,const PetscScalar *A,PetscInt _ldA,PetscInt rA,PetscInt cA,PetscBool At,const PetscScalar *B,PetscInt _ldB,PetscInt rB,PetscInt cB,PetscBool Bt)
46: {
47: PetscErrorCode ierr;
48: PetscInt tmp;
49: PetscBLASInt m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
50: const char *N = "N", *T = "C", *qA = N, *qB = N;
53: if ((rA == 0) || (cB == 0)) return(0);
58: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
60: /* Transpose if needed */
61: if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
62: if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
64: /* Check size */
65: if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
67: /* Do stub */
68: if ((rA == 1) && (cA == 1) && (cB == 1)) {
69: if (!At && !Bt) *C = *A * *B;
70: else if (At && !Bt) *C = PetscConj(*A) * *B;
71: else if (!At && Bt) *C = *A * PetscConj(*B);
72: else *C = PetscConj(*A) * PetscConj(*B);
73: m = n = k = 1;
74: } else {
75: m = rA; n = cB; k = cA;
76: PetscStackCallBLAS("BLASgemm",BLASgemm_(qA,qB,&m,&n,&k,&a,(PetscScalar*)A,&ldA,(PetscScalar*)B,&ldB,&b,C,&ldC));
77: }
79: PetscLogFlops(m*n*2*k);
80: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
81: return(0);
82: }
86: /*
87: Compute C <- A*B, where
88: sC, structure of C,
89: ldC, the leading dimension of C,
90: sA, structure of A,
91: ldA, the leading dimension of A,
92: rA, cA, rows and columns of A,
93: At, if true use the transpose of A instead,
94: sB, structure of B,
95: ldB, the leading dimension of B,
96: rB, cB, rows and columns of B,
97: Bt, if true use the transpose of B instead
98: */
99: PetscErrorCode SlepcDenseMatProdTriang(PetscScalar *C,MatType_t sC,PetscInt ldC,const PetscScalar *A,MatType_t sA,PetscInt ldA,PetscInt rA,PetscInt cA,PetscBool At,const PetscScalar *B,MatType_t sB,PetscInt ldB,PetscInt rB,PetscInt cB,PetscBool Bt)
100: {
101: PetscErrorCode ierr;
102: PetscInt tmp;
103: PetscScalar one=1.0, zero=0.0;
104: PetscBLASInt rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
107: if ((rA == 0) || (cB == 0)) return(0);
112: /* Transpose if needed */
113: if (At) tmp = rA, rA = cA, cA = tmp;
114: if (Bt) tmp = rB, rB = cB, cB = tmp;
116: /* Check size */
117: if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
118: if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");
120: /* Optimized version: trivial case */
121: if ((rA == 1) && (cA == 1) && (cB == 1)) {
122: if (!At && !Bt) *C = *A * *B;
123: else if (At && !Bt) *C = PetscConj(*A) * *B;
124: else if (!At && Bt) *C = *A * PetscConj(*B);
125: else if (At && Bt) *C = PetscConj(*A) * PetscConj(*B);
126: return(0);
127: }
129: /* Optimized versions: sA == 0 && sB == 0 */
130: if ((sA == 0) && (sB == 0)) {
131: if (At) tmp = rA, rA = cA, cA = tmp;
132: if (Bt) tmp = rB, rB = cB, cB = tmp;
133: SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB, cB, Bt);
134: return(0);
135: }
137: /* Optimized versions: A hermitian && (B not triang) */
138: if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
139: DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
140: DVD_ISNOT(sB,DVD_MAT_LTRIANG)) {
141: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
142: rC = rA; cC = cB;
143: PetscStackCallBLAS("BLASsymm",BLASsymm_("L",DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L",&rC,&cC,&one,(PetscScalar*)A,&_ldA,(PetscScalar*)B,&_ldB,&zero,C,&_ldC));
144: PetscLogFlops(rA*cB*cA);
145: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
146: return(0);
147: }
149: /* Optimized versions: B hermitian && (A not triang) */
150: if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
151: DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
152: DVD_ISNOT(sA,DVD_MAT_LTRIANG)) {
153: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
154: rC = rA; cC = cB;
155: PetscStackCallBLAS("BLASsymm",BLASsymm_("R",DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L",&rC,&cC,&one,(PetscScalar*)B,&_ldB,(PetscScalar*)A,&_ldA,&zero,C,&_ldC));
156: PetscLogFlops(rA*cB*cA);
157: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
158: return(0);
159: }
161: SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
162: }
166: /*
167: Normalize the columns of the matrix A, where
168: ldA, the leading dimension of A,
169: rA, cA, rows and columns of A.
170: if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
171: are normalized as being one column.
172: */
173: PetscErrorCode SlepcDenseNorm(PetscScalar *A,PetscInt ldA,PetscInt _rA,PetscInt cA,PetscScalar *eigi)
174: {
175: PetscErrorCode ierr;
176: PetscInt i;
177: PetscScalar norm, norm0;
178: PetscBLASInt rA = _rA, one=1;
184: PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);
186: for (i=0;i<cA;i++) {
187: if (eigi && eigi[i] != 0.0) {
188: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
189: norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
190: norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
191: PetscStackCallBLAS("BLASscal",BLASscal_(&rA, &norm, &A[i*ldA], &one));
192: PetscStackCallBLAS("BLASscal",BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one));
193: i++;
194: } else {
195: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
196: norm = 1.0 / norm;
197: PetscStackCallBLAS("BLASscal",BLASscal_(&rA, &norm, &A[i*ldA], &one));
198: }
199: }
201: PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);
202: return(0);
203: }
207: /*
208: Y <- X, where
209: ldX, leading dimension of X,
210: rX, cX, rows and columns of X
211: ldY, leading dimension of Y
212: */
213: PetscErrorCode SlepcDenseCopy(PetscScalar *Y,PetscInt ldY,PetscScalar *X,PetscInt ldX,PetscInt rX,PetscInt cX)
214: {
215: PetscErrorCode ierr;
216: PetscInt i;
222: if ((ldX < rX) || (ldY < rX)) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
224: /* Quick exit */
225: if (Y == X) {
226: if (ldX != ldY) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
227: return(0);
228: }
230: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
231: for (i=0;i<cX;i++) {
232: PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
233: }
234: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
235: return(0);
236: }
240: /*
241: Y <- X, where
242: ldX, leading dimension of X,
243: rX, cX, rows and columns of X
244: ldY, leading dimension of Y
245: */
246: PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y,MatType_t sY,PetscInt ldY,PetscScalar *X,MatType_t sX,PetscInt ldX,PetscInt rX,PetscInt cX)
247: {
248: PetscErrorCode ierr;
249: PetscInt i,j,c;
255: if ((ldX < rX) || (ldY < rX)) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
257: if (sY == 0 && sX == 0) {
258: SlepcDenseCopy(Y, ldY, X, ldX, rX, cX);
259: return(0);
260: }
262: if (rX != cX) SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");
264: if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
265: DVD_ISNOT(sX,DVD_MAT_LTRIANG)) { /* UpTr to ... */
266: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
267: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
268: c = 0; /* so copy */
269: else if (DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
270: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
271: c = 1; /* so transpose */
272: else /* ... Full, */
273: c = 2; /* so reflect from up */
274: } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
275: DVD_IS(sX,DVD_MAT_LTRIANG)) { /* LoTr to ... */
276: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
277: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
278: c = 1; /* so transpose */
279: else if (DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
280: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
281: c = 0; /* so copy */
282: else /* ... Full, */
283: c = 3; /* so reflect fr. down */
284: } else /* Full to any, */
285: c = 0; /* so copy */
287: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
289: switch (c) {
290: case 0: /* copy */
291: for (i=0;i<cX;i++) {
292: PetscMemcpy(&Y[ldY*i],&X[ldX*i],sizeof(PetscScalar)*rX);
293: }
294: break;
296: case 1: /* transpose */
297: for (i=0;i<cX;i++)
298: for (j=0;j<rX;j++)
299: Y[ldY*j+i] = PetscConj(X[ldX*i+j]);
300: break;
302: case 2: /* reflection from up */
303: for (i=0;i<cX;i++)
304: for (j=0;j<PetscMin(i+1,rX);j++)
305: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
306: break;
308: case 3: /* reflection from down */
309: for (i=0;i<cX;i++)
310: for (j=i;j<rX;j++)
311: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
312: break;
313: }
314: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
315: return(0);
316: }
320: /*
321: Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
322: where X and Y are contiguous global vectors.
323: */
324: PetscErrorCode SlepcUpdateVectorsZ(Vec *Y,PetscScalar beta,PetscScalar alpha,Vec *X,PetscInt cX,const PetscScalar *M,PetscInt ldM,PetscInt rM,PetscInt cM)
325: {
326: PetscErrorCode ierr;
329: SlepcUpdateVectorsS(Y,1,beta,alpha,X,cX,1,M,ldM,rM,cM);
330: return(0);
331: }
335: /*
336: Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
337: where X and Y are contiguous global vectors.
338: */
339: PetscErrorCode SlepcUpdateVectorsS(Vec *Y,PetscInt dY,PetscScalar beta,PetscScalar alpha,Vec *X,PetscInt cX,PetscInt dX,const PetscScalar *M,PetscInt ldM,PetscInt rM,PetscInt cM)
340: {
341: PetscErrorCode ierr;
342: const PetscScalar *px;
343: PetscScalar *py;
344: PetscInt rX, rY, ldX, ldY, i, rcX;
347: SlepcValidVecsContiguous(Y,cM*dY,1);
348: SlepcValidVecsContiguous(X,cX,5);
351: /* Compute the real number of columns */
352: rcX = cX/dX;
353: if (rcX != rM) SETERRQ(PetscObjectComm((PetscObject)*Y),1, "Matrix dimensions do not match");
355: if ((rcX == 0) || (rM == 0) || (cM == 0)) return(0);
356: else if ((Y + cM <= X) || (X + cX <= Y) ||
357: ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
358: /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
360: /* Get the dense matrices and dimensions associated to Y and X */
361: VecGetLocalSize(X[0], &rX);
362: VecGetLocalSize(Y[0], &rY);
363: if (rX != rY) SETERRQ(PetscObjectComm((PetscObject)*Y),1, "The multivectors do not have the same dimension");
364: VecGetArrayRead(X[0], &px);
365: VecGetArray(Y[0], &py);
367: /* Update the strides */
368: ldX = rX*dX; ldY= rY*dY;
370: /* Do operation */
371: SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
372: PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE);
374: VecRestoreArrayRead(X[0], &px);
375: VecRestoreArray(Y[0], &py);
376: for (i=1;i<cM;i++) {
377: PetscObjectStateIncrease((PetscObject)Y[dY*i]);
378: }
380: } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
381: /* If not, call to SlepcUpdateVectors */
382: SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
383: ldM, PETSC_FALSE);
384: if (alpha != 1.0)
385: for (i=0;i<cM;i++) {
386: VecScale(Y[i],alpha);
387: }
388: } else SETERRQ(PetscObjectComm((PetscObject)*Y),1, "Unsupported case");
389: return(0);
390: }
394: /*
395: Compute X <- alpha * X[0:dX:cX-1] * M
396: where X is a matrix with non-consecutive columns
397: */
398: PetscErrorCode SlepcUpdateVectorsD(Vec *X,PetscInt cX,PetscScalar alpha,const PetscScalar *M,PetscInt ldM,PetscInt rM,PetscInt cM,PetscScalar *work,PetscInt lwork)
399: {
401: PetscScalar **px, *Y, *Z;
402: PetscInt rX, i, j, rY, rY0, ldY;
405: SlepcValidVecsContiguous(X,cX,1);
409: if (cX != rM) SETERRQ(PetscObjectComm((PetscObject)*X),1, "Matrix dimensions do not match");
411: rY = (lwork/2)/cX;
412: if (rY <= 0) SETERRQ(PetscObjectComm((PetscObject)*X),1, "Insufficient work space given");
413: Y = work; Z = &Y[cX*rY]; ldY = rY;
415: if ((cX == 0) || (rM == 0) || (cM == 0)) return(0);
417: /* Get the dense vectors associated to the columns of X */
418: PetscMalloc(sizeof(Vec)*cX,&px);
419: for (i=0;i<cX;i++) {
420: VecGetArray(X[i],&px[i]);
421: }
422: VecGetLocalSize(X[0],&rX);
424: for (i=0,rY0=0;i<rX;i+=rY0) {
425: rY0 = PetscMin(rY, rX-i);
427: /* Y <- X[i0:i1,:] */
428: for (j=0;j<cX;j++) {
429: SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
430: }
432: /* Z <- Y * M */
433: SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
434: M, ldM, rM, cM, PETSC_FALSE);
436: /* X <- Z */
437: for (j=0;j<cM;j++) {
438: SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
439: }
440: }
442: for (i=0;i<cX;i++) {
443: VecRestoreArray(X[i],&px[i]);
444: }
445: PetscFree(px);
446: return(0);
447: }
451: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
452: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
453: where W = U' * V.
454: workS0 and workS1 are an auxiliary scalar vector of size
455: (eU-sU)*sV*(sU!=0)+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
456: is needed, and of size eU*eV.
457: */
458: PetscErrorCode VecsMult(PetscScalar *M,MatType_t sM,PetscInt ldM,Vec *U,PetscInt sU,PetscInt eU,Vec *V,PetscInt sV,PetscInt eV,PetscScalar *workS0,PetscScalar *workS1)
459: {
460: PetscErrorCode ierr;
461: PetscInt ldU, ldV, i, j, k, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
462: const PetscScalar *pu, *pv;
463: PetscScalar *W, *Wr;
466: /* Check if quick exit */
467: if ((eU-sU == 0) || (eV-sV == 0)) return(0);
469: SlepcValidVecsContiguous(U,eU,4);
470: SlepcValidVecsContiguous(V,eV,7);
473: /* Get the dense matrices and dimensions associated to U and V */
474: VecGetLocalSize(U[0], &ldU);
475: VecGetLocalSize(V[0], &ldV);
476: if (ldU != ldV) SETERRQ(PetscObjectComm((PetscObject)*U),1, "Matrix dimensions do not match");
477: VecGetArrayRead(U[0], &pu);
478: VecGetArrayRead(V[0], &pv);
480: if (workS0) {
482: W = workS0;
483: } else {
484: PetscMalloc(sizeof(PetscScalar)*ms,&W);
485: }
487: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
489: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
490: /* Use the smart memory usage version */
492: /* W <- U' * V */
493: SlepcDenseMatProdTriang(W, sM, eU,
494: pu, 0, ldU, ldU, eU, PETSC_TRUE,
495: pv, 0, ldV, ldV, eV, PETSC_FALSE);
497: /* ReduceAll(W, SUM) */
498: MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
499: PetscObjectComm((PetscObject)U[0]));
500: /* Full M matrix */
501: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
502: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
503: if (workS1) {
505: Wr = workS1;
506: if (PetscAbs(PetscMin(W-workS1, workS1-W)) < ms) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
507: } else {
508: PetscMalloc(sizeof(PetscScalar)*ms,&Wr);
509: }
511: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
512: if (sU > 0) {
513: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
514: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
515: pv , ldV, ldV, sV, PETSC_FALSE);
516: }
518: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
519: SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
520: pu, ldU, ldU, eU, PETSC_TRUE,
521: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
523: /* ReduceAll(W, SUM) */
524: MPI_Allreduce(W, Wr, ms, MPIU_SCALAR,
525: MPIU_SUM, PetscObjectComm((PetscObject)U[0]));
527: /* M(...,...) <- W */
528: k = 0;
529: if (sU > 0) for (i=0; i<sV; i++)
530: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
531: for (i=sV; i<eV; i++)
532: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
534: if (!workS1) {
535: PetscFree(Wr);
536: }
538: /* Upper triangular M matrix */
539: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
540: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
541: if (workS1) {
543: Wr = workS1;
544: if (PetscAbs(PetscMin(W-workS1,workS1-W)) < (eV-sV)*eU) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
545: } else {
546: PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU,&Wr);
547: }
549: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
550: SlepcDenseMatProd(W, eU, 0.0, 1.0,
551: pu, ldU, ldU, eU, PETSC_TRUE,
552: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
554: /* ReduceAll(W, SUM) */
555: MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
556: PetscObjectComm((PetscObject)U[0]));
558: /* M(...,...) <- W */
559: for (i=sV,k=0; i<eV; i++)
560: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
562: if (!workS1) {
563: PetscFree(Wr);
564: }
566: /* Lower triangular M matrix */
567: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
568: DVD_IS(sM,DVD_MAT_LTRIANG)) {
569: if (workS1) {
571: Wr = workS1;
572: if (PetscMin(W - workS1, workS1 - W) < (eU-sU)*eV) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
573: } else {
574: PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
575: }
577: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
578: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
579: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
580: pv , ldV, ldV, eV, PETSC_FALSE);
582: /* ReduceAll(W, SUM) */
583: MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
584: PetscObjectComm((PetscObject)U[0]));
586: /* M(...,...) <- W */
587: for (i=0,k=0; i<eV; i++)
588: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
590: if (!workS1) {
591: PetscFree(Wr);
592: }
593: }
595: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
597: if (!workS0) {
598: PetscFree(W);
599: }
601: VecRestoreArrayRead(U[0],&pu);
602: VecRestoreArrayRead(V[0],&pv);
603: return(0);
604: }
608: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
609: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
610: where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
611: workS0 and workS1 are an auxiliary scalar vector of size
612: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
613: is needed, and of size eU*eV.
614: */
615: PetscErrorCode VecsMultIa(PetscScalar *M,MatType_t sM,PetscInt ldM,Vec *U,PetscInt sU,PetscInt eU,Vec *V,PetscInt sV,PetscInt eV)
616: {
617: PetscErrorCode ierr;
618: PetscInt ldU, ldV;
619: PetscScalar *pu, *pv;
622: /* Check if quick exit */
623: if ((eU-sU == 0) || (eV-sV == 0)) return(0);
625: SlepcValidVecsContiguous(U,eU,4);
626: SlepcValidVecsContiguous(V,eV,7);
629: /* Get the dense matrices and dimensions associated to U and V */
630: VecGetLocalSize(U[0],&ldU);
631: VecGetLocalSize(V[0],&ldV);
632: if (ldU != ldV) SETERRQ(PetscObjectComm((PetscObject)*U),1, "Matrix dimensions do not match");
633: VecGetArray(U[0],&pu);
634: VecGetArray(V[0],&pv);
636: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
637: /* M <- local_U' * local_V */
638: SlepcDenseMatProdTriang(M, sM, eU,
639: pu, 0, ldU, ldU, eU, PETSC_TRUE,
640: pv, 0, ldV, ldV, eV, PETSC_FALSE);
642: /* Full M matrix */
643: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
644: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
645: /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
646: SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
647: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
648: pv , ldV, ldV, sV, PETSC_FALSE);
650: /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
651: SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
652: pu, ldU, ldU, eU, PETSC_TRUE,
653: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
655: /* Other structures */
656: } else SETERRQ(PetscObjectComm((PetscObject)*U),1, "Matrix structure not supported");
658: VecRestoreArray(U[0],&pu);
659: PetscObjectStateDecrease((PetscObject)U[0]);
660: VecRestoreArray(V[0],&pv);
661: PetscObjectStateDecrease((PetscObject)V[0]);
662: return(0);
663: }
667: /* Computes M <- nprocs*M
668: where nprocs is the number of processors.
669: */
670: PetscErrorCode VecsMultIc(PetscScalar *M,MatType_t sM,PetscInt ldM,PetscInt rM,PetscInt cM,Vec V)
671: {
672: PetscInt i,j;
673: PetscMPIInt n;
676: /* Check if quick exit */
677: if ((rM == 0) || (cM == 0)) return(0);
680: if (sM != 0) SETERRQ(PetscObjectComm((PetscObject)V),1, "Matrix structure not supported");
682: MPI_Comm_size(PetscObjectComm((PetscObject)V), &n);
684: for (i=0;i<cM;i++)
685: for (j=0;j<rM;j++)
686: M[ldM*i+j] /= (PetscScalar)n;
687: return(0);
688: }
692: /* Computes N <- Allreduce([ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ])
693: ([ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ])
694: where W = U' * V.
695: workS0 and workS1 are an auxiliary scalar vector of size
696: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
697: is needed, and of size eU*eV.
698: */
699: PetscErrorCode VecsMultIb(PetscScalar *M,MatType_t sM,PetscInt ldM,PetscInt rM,PetscInt cM,PetscScalar *auxS,Vec V)
700: {
702: PetscScalar *W, *Wr;
705: /* Check if quick exit */
706: if ((rM == 0) || (cM == 0)) return(0);
709: if (auxS) {
711: W = auxS;
712: } else {
713: PetscMalloc(sizeof(PetscScalar)*rM*cM*2,&W);
714: }
715: Wr = W + rM*cM;
717: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
719: if (sM == 0) {
720: /* W <- M */
721: SlepcDenseCopy(W, rM, M, ldM, rM, cM);
723: /* Wr <- ReduceAll(W, SUM) */
724: MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
725: PetscObjectComm((PetscObject)V));
727: /* M <- Wr */
728: SlepcDenseCopy(M, ldM, Wr, rM, rM, cM);
730: /* Other structures */
731: } else SETERRQ(PetscObjectComm((PetscObject)V),1, "Matrix structure not supported");
733: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
735: if (!auxS) {
736: PetscFree(W);
737: }
738: return(0);
739: }
743: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
744: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
745: where W = U' * V.
746: r, a DvdReduction structure,
747: sr, an structure DvdMult_copy_func.
748: */
749: PetscErrorCode VecsMultS(PetscScalar *M,MatType_t sM,PetscInt ldM,Vec *U,PetscInt sU,PetscInt eU,Vec *V,PetscInt sV,PetscInt eV,DvdReduction *r,DvdMult_copy_func *sr)
750: {
751: PetscErrorCode ierr;
752: PetscInt ldU, ldV, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
753: const PetscScalar *pu, *pv;
754: PetscScalar *W;
757: /* Check if quick exit */
758: if ((eU-sU == 0) || (eV-sV == 0)) return(0);
760: SlepcValidVecsContiguous(U,eU,4);
761: SlepcValidVecsContiguous(V,eV,7);
764: /* Get the dense matrices and dimensions associated to U and V */
765: VecGetLocalSize(U[0],&ldU);
766: VecGetLocalSize(V[0],&ldV);
767: if (ldU != ldV) SETERRQ(PetscObjectComm((PetscObject)*U),1, "Matrix dimensions do not match");
768: VecGetArrayRead(U[0],&pu);
769: VecGetArrayRead(V[0],&pv);
771: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
773: if ((sU == 0) && (sV == 0)) {
774: /* Use the smart memory usage version */
776: /* Add the reduction to r */
777: SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
779: /* W <- U' * V */
780: SlepcDenseMatProdTriang(W, sM, eU,
781: pu, 0, ldU, ldU, eU, PETSC_TRUE,
782: pv, 0, ldV, ldV, eV, PETSC_FALSE);
784: /* M <- ReduceAll(W, SUM) */
785: sr->M = M; sr->ld = ldM;
786: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
787: sr->i2 = eV;
789: /* Full M matrix */
790: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
791: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
792: /* Add the reduction to r */
793: SlepcAllReduceSum(r, ms, VecsMultS_copy_func, sr, &W);
795: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
796: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
797: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
798: pv , ldV, ldV, sV, PETSC_FALSE);
800: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
801: SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
802: pu, ldU, ldU, eU, PETSC_TRUE,
803: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
805: /* M <- ReduceAll(W, SUM) */
806: sr->M = M; sr->ld = ldM;
807: sr->i0 = sU>0?0:sV; sr->i1 = sV; sr->s0 = sU; sr->e0 = eU;
808: sr->i2 = eV; sr->s1 = 0; sr->e1 = eU;
810: /* Upper triangular M matrix */
811: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
812: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
813: /* Add the reduction to r */
814: SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
816: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
817: SlepcDenseMatProd(W, eU, 0.0, 1.0,
818: pu, ldU, ldU, eU, PETSC_TRUE,
819: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
821: /* M <- ReduceAll(W, SUM) */
822: sr->M = M; sr->ld = ldM;
823: sr->i0 = sV; sr->i1 = eV; sr->s0 = 0; sr->e0 = eU;
824: sr->i2 = eV;
826: /* Lower triangular M matrix */
827: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
828: DVD_IS(sM,DVD_MAT_LTRIANG)) {
829: /* Add the reduction to r */
830: SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
832: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
833: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
834: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
835: pv , ldV, ldV, eV, PETSC_FALSE);
837: /* ReduceAll(W, SUM) */
838: sr->M = M; sr->ld = ldM;
839: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
840: sr->i2 = eV;
841: }
843: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
845: VecRestoreArrayRead(U[0],&pu);
846: VecRestoreArrayRead(V[0],&pv);
847: return(0);
848: }
852: PetscErrorCode VecsMultS_copy_func(PetscScalar *out,PetscInt size_out,void *ptr)
853: {
854: PetscInt i, j, k;
855: DvdMult_copy_func *sr = (DvdMult_copy_func*)ptr;
860: for (i=sr->i0,k=0; i<sr->i1; i++)
861: for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
862: for (i=sr->i1; i<sr->i2; i++)
863: for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
865: if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
866: return(0);
867: }
871: /* Orthonormalize a chunk of parallel vector.
872: NOTE: wS0 and wS1 must be of size n*n.
873: */
874: PetscErrorCode VecsOrthonormalize(Vec *V,PetscInt n,PetscScalar *wS0,PetscScalar *wS1)
875: {
876: #if defined(PETSC_MISSING_LAPACK_PBTRF)
878: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PBTRF - Lapack routine is unavailable");
879: #else
880: PetscErrorCode ierr;
881: PetscBLASInt nn = n, info, ld;
882: PetscInt ldV, i;
883: PetscScalar *H, *T, one=1.0, *pv;
886: if (!wS0) {
887: PetscMalloc(sizeof(PetscScalar)*n*n,&H);
888: } else {
890: H = wS0;
891: }
892: if (!wS1) {
893: PetscMalloc(sizeof(PetscScalar)*n*n,&T);
894: } else {
896: T = wS1;
897: }
899: /* H <- V' * V */
900: VecsMult(H, 0, n, V, 0, n, V, 0, n, T, NULL);
902: /* H <- chol(H) */
903: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
904: PetscStackCallBLAS("LAPACKpbtrf",LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info));
905: PetscFPTrapPop();
906: if (info) SETERRQ1(PetscObjectComm((PetscObject)*V),PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
908: /* V <- V * inv(H) */
909: VecGetLocalSize(V[0],&ldV);
910: VecGetArray(V[0],&pv);
911: ld = ldV;
912: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld));
913: VecRestoreArray(V[0],&pv);
914: for (i=1;i<n;i++) {
915: PetscObjectStateIncrease((PetscObject)V[i]);
916: }
918: if (!wS0) {
919: PetscFree(H);
920: }
921: if (!wS1) {
922: PetscFree(T);
923: }
924: return(0);
925: #endif
926: }
930: /*
931: Sum up several arrays with only one call to MPIReduce.
932: */
933: PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,PetscInt max_size_ops,PetscScalar *in,PetscScalar *out,PetscInt max_size_in,DvdReduction *r,MPI_Comm comm)
934: {
939: r->in = in;
940: r->out = out;
941: r->size_in = 0;
942: r->max_size_in = max_size_in;
943: r->ops = ops;
944: r->size_ops = 0;
945: r->max_size_ops = max_size_ops;
946: r->comm = comm;
947: return(0);
948: }
952: PetscErrorCode SlepcAllReduceSum(DvdReduction *r,PetscInt size_in,DvdReductionPostF f,void *ptr,PetscScalar **in)
953: {
955: *in = r->in + r->size_in;
956: r->ops[r->size_ops].out = r->out + r->size_in;
957: r->ops[r->size_ops].size_out = size_in;
958: r->ops[r->size_ops].f = f;
959: r->ops[r->size_ops].ptr = ptr;
960: if (++(r->size_ops) > r->max_size_ops) SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
961: if ((r->size_in+= size_in) > r->max_size_in) SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
962: return(0);
963: }
967: PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
968: {
969: PetscErrorCode ierr;
970: PetscInt i;
973: /* Check if quick exit */
974: if (r->size_ops == 0) return(0);
976: /* Call the MPIAllReduce routine */
977: MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM, r->comm);
979: /* Call the postponed routines */
980: for (i=0;i<r->size_ops;i++) {
981: r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
982: }
984: /* Tag the operation as done */
985: r->size_ops = 0;
986: return(0);
987: }
991: /* auxS: size_cX+V_new_e */
992: PetscErrorCode dvd_orthV(IP ip,Vec *defl,PetscInt size_DS,Vec *cX,PetscInt size_cX,Vec *V,PetscInt V_new_s,PetscInt V_new_e,PetscScalar *auxS,PetscRandom rand)
993: {
994: PetscErrorCode ierr;
995: PetscInt i, j;
996: PetscBool lindep;
997: PetscReal norm;
998: PetscScalar *auxS0 = auxS;
1001: /* Orthonormalize V with IP */
1002: for (i=V_new_s;i<V_new_e;i++) {
1003: for (j=0;j<3;j++) {
1004: if (j>0) { SlepcVecSetRandom(V[i], rand); }
1005: if (cX + size_cX == V) {
1006: /* If cX and V are contiguous, orthogonalize in one step */
1007: IPOrthogonalize(ip, size_DS, defl, size_cX+i, NULL, cX,
1008: V[i], auxS0, &norm, &lindep);
1009: } else if (defl) {
1010: /* Else orthogonalize first against defl, and then against cX and V */
1011: IPOrthogonalize(ip, size_DS, defl, size_cX, NULL, cX,
1012: V[i], auxS0, NULL, &lindep);
1013: if (!lindep) {
1014: IPOrthogonalize(ip, 0, NULL, i, NULL, V,
1015: V[i], auxS0, &norm, &lindep);
1016: }
1017: } else {
1018: /* Else orthogonalize first against cX and then against V */
1019: IPOrthogonalize(ip, size_cX, cX, i, NULL, V,
1020: V[i], auxS0, &norm, &lindep);
1021: }
1022: if (!lindep && (norm > PETSC_SQRT_MACHINE_EPSILON)) break;
1023: PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);
1024: }
1025: if (lindep || (norm < PETSC_SQRT_MACHINE_EPSILON)) SETERRQ(PetscObjectComm((PetscObject)ip),1, "Error during orthonormalization of eigenvectors");
1026: VecScale(V[i],1.0/norm);
1027: }
1028: return(0);
1029: }
1033: /* auxS: size_cX+V_new_e+1 */
1034: PetscErrorCode dvd_BorthV_faster(IP ip,Vec *defl,Vec *BDS,PetscReal *BDSn,PetscInt size_DS,Vec *cX,Vec *BcX,PetscReal *BcXn,PetscInt size_cX,Vec *V,Vec *BV,PetscReal *BVn,PetscInt V_new_s,PetscInt V_new_e,PetscScalar *auxS,PetscRandom rand)
1035: {
1036: PetscErrorCode ierr;
1037: PetscInt i, j;
1038: PetscBool lindep;
1039: PetscReal norm;
1040: PetscScalar *auxS0 = auxS;
1043: /* Orthonormalize V with IP */
1044: for (i=V_new_s;i<V_new_e;i++) {
1045: for (j=0;j<3;j++) {
1046: if (j>0) { SlepcVecSetRandom(V[i],rand); }
1047: if (cX + size_cX == V && BcX + size_cX == BV) {
1048: /* If cX and V are contiguous, orthogonalize in one step */
1049: IPBOrthogonalize(ip, size_DS, defl, BDS, BDSn, size_cX+i, NULL, cX, BcX, BcXn,
1050: V[i], BV[i], auxS0, &norm, &lindep);
1051: } else if (defl) {
1052: /* Else orthogonalize first against defl, and then against cX and V */
1053: IPBOrthogonalize(ip, size_DS, defl, BDS, BDSn, size_cX, NULL, cX, BcX, BcXn,
1054: V[i], BV[i], auxS0, NULL, &lindep);
1055: if (!lindep) {
1056: IPBOrthogonalize(ip, 0, NULL, NULL, NULL, i, NULL, V, BV, BVn,
1057: V[i], BV[i], auxS0, &norm, &lindep);
1058: }
1059: } else {
1060: /* Else orthogonalize first against cX and then against V */
1061: IPBOrthogonalize(ip, size_cX, cX, BcX, BcXn, i, NULL, V, BV, BVn,
1062: V[i], BV[i], auxS0, &norm, &lindep);
1063: }
1064: if (!lindep && (PetscAbs(norm) > PETSC_SQRT_MACHINE_EPSILON)) break;
1065: PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1066: }
1067: if (lindep || (PetscAbs(norm) < PETSC_SQRT_MACHINE_EPSILON)) {
1068: SETERRQ(PetscObjectComm((PetscObject)ip),1, "Error during the orthonormalization of the eigenvectors");
1069: }
1070: if (BVn) BVn[i] = norm > 0.0 ? 1.0 : -1.0;
1071: norm = PetscAbs(norm);
1072: VecScale(V[i],1.0/norm);
1073: VecScale(BV[i],1.0/norm);
1074: }
1075: return(0);
1076: }
1080: /* auxS: size_cX+V_new_e+1 */
1081: PetscErrorCode dvd_BorthV_stable(IP ip,Vec *defl,PetscReal *BDSn,PetscInt size_DS,Vec *cX,PetscReal *BcXn,PetscInt size_cX,Vec *V,PetscReal *BVn,PetscInt V_new_s,PetscInt V_new_e,PetscScalar *auxS,PetscRandom rand)
1082: {
1083: PetscErrorCode ierr;
1084: PetscInt i, j;
1085: PetscBool lindep;
1086: PetscReal norm;
1087: PetscScalar *auxS0 = auxS;
1090: /* Orthonormalize V with IP */
1091: for (i=V_new_s;i<V_new_e;i++) {
1092: for (j=0;j<3;j++) {
1093: if (j>0) {
1094: SlepcVecSetRandom(V[i],rand);
1095: PetscInfo1(ip,"Orthonormalization problems adding the vector %d to the searching subspace\n",i);
1096: }
1097: /* Orthogonalize against the deflation, if needed */
1098: if (defl) {
1099: IPPseudoOrthogonalize(ip,size_DS,defl,BDSn,V[i],auxS0,NULL,&lindep);
1100: if (lindep) continue;
1101: }
1102: /* If cX and V are contiguous, orthogonalize in one step */
1103: if (cX + size_cX == V) {
1104: IPPseudoOrthogonalize(ip,size_cX+i,cX,BcXn,V[i],auxS0,&norm,&lindep);
1105: /* Else orthogonalize first against cX and then against V */
1106: } else {
1107: IPPseudoOrthogonalize(ip,size_cX,cX,BcXn,V[i],auxS0,NULL,&lindep);
1108: if (lindep) continue;
1109: IPPseudoOrthogonalize(ip,i,V,BVn,V[i],auxS0,&norm,&lindep);
1110: }
1111: if (!lindep && (PetscAbs(norm) > PETSC_MACHINE_EPSILON)) break;
1112: }
1113: if (lindep || (PetscAbs(norm) < PETSC_MACHINE_EPSILON)) {
1114: SETERRQ(PetscObjectComm((PetscObject)ip),1, "Error during the orthonormalization of the eigenvectors");
1115: }
1116: if (BVn) BVn[i] = norm > 0.0 ? 1.0 : -1.0;
1117: norm = PetscAbs(norm);
1118: VecScale(V[i],1.0/norm);
1119: }
1120: return(0);
1121: }