Revision | a132630a082c59e6da5ef8edf2d33d4357393ec5 (tree) |
---|---|
Time | 2013-10-14 16:26:38 |
Author | Mikiya Fujii <mikiya.fujii@gmai...> |
Commiter | Mikiya Fujii |
Refactoring: INTEL and GNU use Lapacke. #32235
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1540 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -514,6 +514,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
514 | 514 | double*** diisStoredDensityMatrix = NULL; |
515 | 515 | double*** diisStoredErrorVect = NULL; |
516 | 516 | double** diisErrorProducts = NULL; |
517 | + double** tmpDiisErrorProducts = NULL; | |
517 | 518 | double* diisErrorCoefficients = NULL; |
518 | 519 | |
519 | 520 | try{ |
@@ -521,6 +522,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
521 | 522 | &diisStoredDensityMatrix, |
522 | 523 | &diisStoredErrorVect, |
523 | 524 | &diisErrorProducts, |
525 | + &tmpDiisErrorProducts, | |
524 | 526 | &diisErrorCoefficients); |
525 | 527 | // calculate electron integral |
526 | 528 | this->CalcGammaAB(this->gammaAB, *this->molecule); |
@@ -590,6 +592,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
590 | 592 | diisStoredDensityMatrix, |
591 | 593 | diisStoredErrorVect, |
592 | 594 | diisErrorProducts, |
595 | + tmpDiisErrorProducts, | |
593 | 596 | diisErrorCoefficients, |
594 | 597 | diisError, |
595 | 598 | hasAppliedDIIS, |
@@ -612,6 +615,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
612 | 615 | &diisStoredDensityMatrix, |
613 | 616 | &diisStoredErrorVect, |
614 | 617 | &diisErrorProducts, |
618 | + &tmpDiisErrorProducts, | |
615 | 619 | &diisErrorCoefficients); |
616 | 620 | |
617 | 621 | throw ex; |
@@ -620,6 +624,7 @@ void Cndo2::DoSCF(bool requiresGuess){ | ||
620 | 624 | &diisStoredDensityMatrix, |
621 | 625 | &diisStoredErrorVect, |
622 | 626 | &diisErrorProducts, |
627 | + &tmpDiisErrorProducts, | |
623 | 628 | &diisErrorCoefficients); |
624 | 629 | |
625 | 630 | double ompEndTime = omp_get_wtime(); |
@@ -750,6 +755,7 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, | ||
750 | 755 | double**** diisStoredDensityMatrix, |
751 | 756 | double**** diisStoredErrorVect, |
752 | 757 | double*** diisErrorProducts, |
758 | + double*** tmpDiisErrorProducts, | |
753 | 759 | double** diisErrorCoefficients) const{ |
754 | 760 | |
755 | 761 | int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF(); |
@@ -767,6 +773,9 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, | ||
767 | 773 | MallocerFreer::GetInstance()->Free<double>(diisErrorProducts, |
768 | 774 | diisNumErrorVect+1, |
769 | 775 | diisNumErrorVect+1); |
776 | + MallocerFreer::GetInstance()->Free<double>(tmpDiisErrorProducts, | |
777 | + diisNumErrorVect+1, | |
778 | + diisNumErrorVect+1); | |
770 | 779 | MallocerFreer::GetInstance()->Free<double>(diisErrorCoefficients, |
771 | 780 | diisNumErrorVect+1); |
772 | 781 | } |
@@ -775,6 +784,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, | ||
775 | 784 | double**** diisStoredDensityMatrix, |
776 | 785 | double**** diisStoredErrorVect, |
777 | 786 | double*** diisErrorProducts, |
787 | + double*** tmpDiisErrorProducts, | |
778 | 788 | double** diisErrorCoefficients){ |
779 | 789 | |
780 | 790 | int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF(); |
@@ -791,6 +801,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, | ||
791 | 801 | this->molecule->GetTotalNumberAOs(), |
792 | 802 | this->molecule->GetTotalNumberAOs()); |
793 | 803 | MallocerFreer::GetInstance()->Malloc<double>(diisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1); |
804 | + MallocerFreer::GetInstance()->Malloc<double>(tmpDiisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1); | |
794 | 805 | MallocerFreer::GetInstance()->Malloc<double>(diisErrorCoefficients, diisNumErrorVect+1); |
795 | 806 | } |
796 | 807 | } |
@@ -805,6 +816,7 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation, | ||
805 | 816 | double*** diisStoredDensityMatrix, |
806 | 817 | double*** diisStoredErrorVect, |
807 | 818 | double** diisErrorProducts, |
819 | + double** tmpDiisErrorProducts, | |
808 | 820 | double* diisErrorCoefficients, |
809 | 821 | double& diisError, |
810 | 822 | bool& hasAppliedDIIS, |
@@ -873,7 +885,13 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation, | ||
873 | 885 | if(diisNumErrorVect <= step && diisEndError<diisError && diisError<diisStartError){ |
874 | 886 | hasAppliedDIIS = true; |
875 | 887 | try{ |
876 | - MolDS_wrappers::Lapack::GetInstance()->Dsysv(diisErrorProducts, | |
888 | +#pragma omp parallel for schedule(auto) | |
889 | + for(int i=0; i<diisNumErrorVect+1; i++){ | |
890 | + for(int j=0; j<diisNumErrorVect+1; j++){ | |
891 | + tmpDiisErrorProducts[i][j] = diisErrorProducts[i][j]; | |
892 | + } | |
893 | + } | |
894 | + MolDS_wrappers::Lapack::GetInstance()->Dsysv(tmpDiisErrorProducts, | |
877 | 895 | diisErrorCoefficients, |
878 | 896 | diisNumErrorVect+1); |
879 | 897 | }catch(MolDSException ex){ |
@@ -485,6 +485,7 @@ private: | ||
485 | 485 | double*** diisStoredDensityMatrix, |
486 | 486 | double*** diisStoredErrorVect, |
487 | 487 | double** diisErrorProducts, |
488 | + double** tmpDiisErrorProducts, | |
488 | 489 | double* diisErrorCoefficients, |
489 | 490 | double& diisError, |
490 | 491 | bool& hasAppliedDIIS, |
@@ -510,11 +511,13 @@ private: | ||
510 | 511 | double**** diisStoredDensityMatrix, |
511 | 512 | double**** diisStoredErrorVect, |
512 | 513 | double*** diisErrorProducts, |
514 | + double*** tmpDiisErrorProducts, | |
513 | 515 | double** diisErrorCoefficients) const; |
514 | 516 | void MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation, |
515 | 517 | double**** diisStoredDensityMatrix, |
516 | 518 | double**** diisStoredErrorVect, |
517 | 519 | double*** diisErrorProducts, |
520 | + double*** tmpDiisErrorProducts, | |
518 | 521 | double** diisErrorCoefficients); |
519 | 522 | }; |
520 | 523 |
@@ -141,25 +141,25 @@ private: | ||
141 | 141 | int tagBase = origianlTag*numChunks; |
142 | 142 | if(elementsLimit < 0){ |
143 | 143 | std::stringstream ss; |
144 | - ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << endl; | |
144 | + ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << std::endl; | |
145 | 145 | MolDS_base::MolDSException ex(ss.str()); |
146 | 146 | throw ex; |
147 | 147 | } |
148 | 148 | if(numChunks < 0){ |
149 | 149 | std::stringstream ss; |
150 | - ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << endl; | |
150 | + ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << std::endl; | |
151 | 151 | MolDS_base::MolDSException ex(ss.str()); |
152 | 152 | throw ex; |
153 | 153 | } |
154 | 154 | if(remaining < 0){ |
155 | 155 | std::stringstream ss; |
156 | - ss << this->errorMessageSplitMessageRemainingNegative << remaining << endl; | |
156 | + ss << this->errorMessageSplitMessageRemainingNegative << remaining << std::endl; | |
157 | 157 | MolDS_base::MolDSException ex(ss.str()); |
158 | 158 | throw ex; |
159 | 159 | } |
160 | 160 | if(tagBase < 0){ |
161 | 161 | std::stringstream ss; |
162 | - ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << endl; | |
162 | + ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << std::endl; | |
163 | 163 | MolDS_base::MolDSException ex(ss.str()); |
164 | 164 | throw ex; |
165 | 165 | } |
@@ -98,18 +98,10 @@ void Lapack::DeleteInstance(){ | ||
98 | 98 | * ***/ |
99 | 99 | molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors){ |
100 | 100 | molds_lapack_int info = 0; |
101 | - molds_lapack_int k = 0; | |
102 | - molds_lapack_int lwork; | |
103 | - molds_lapack_int liwork; | |
104 | - char job; | |
105 | 101 | char uplo = 'U'; |
106 | 102 | molds_lapack_int lda = size; |
107 | - double* convertedMatrix; | |
108 | - double* tempEigenValues; | |
109 | - double* work; | |
110 | - molds_lapack_int* iwork; | |
111 | - | |
112 | 103 | // set job type |
104 | + char job; | |
113 | 105 | if(calcEigenVectors){ |
114 | 106 | job = 'V'; |
115 | 107 | } |
@@ -117,84 +109,47 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
117 | 109 | job = 'N'; |
118 | 110 | } |
119 | 111 | |
120 | - // calc. lwork and liwork | |
121 | - if(size < 1 ){ | |
122 | - stringstream ss; | |
123 | - ss << errorMessageDsyevdSize; | |
124 | - MolDSException ex(ss.str()); | |
125 | - ex.SetKeyValue<int>(LapackInfo, info); | |
126 | - throw ex; | |
127 | - } | |
128 | - else if(size == 1){ | |
129 | - lwork = 1; | |
130 | - liwork = 1; | |
131 | - } | |
132 | - else if(1 < size && job == 'N'){ | |
133 | - lwork = 2*size + 1; | |
134 | - liwork = 2; | |
135 | - } | |
136 | - else{ | |
137 | - // calc. k | |
138 | - double temp = log((double)size)/log(2.0); | |
139 | - if( (double)((molds_lapack_int)temp) < temp ){ | |
140 | - k = (molds_lapack_int)temp + 1; | |
141 | - } | |
142 | - else{ | |
143 | - k = (molds_lapack_int)temp; | |
144 | - } | |
145 | - lwork = 3*size*size + (5+2*k)*size + 1; | |
146 | - liwork = 5*size + 3; | |
147 | - } | |
148 | - | |
149 | - // malloc | |
150 | - work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 ); | |
151 | - iwork = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*liwork, 16 ); | |
152 | - convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
153 | - tempEigenValues = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 ); | |
154 | - | |
155 | - for(molds_lapack_int i = 0; i < size; i++){ | |
156 | - for(molds_lapack_int j = i; j < size; j++){ | |
157 | - convertedMatrix[i+j*size] = matrix[i][j]; | |
158 | - } | |
159 | - } | |
160 | - | |
161 | 112 | // call Lapack |
162 | 113 | #ifdef __INTEL_COMPILER |
163 | - dsyevd(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info); | |
114 | + info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues); | |
164 | 115 | #else |
165 | - info = LAPACKE_dsyevd_work(LAPACK_COL_MAJOR, job, uplo, size, convertedMatrix, lda, tempEigenValues, work, lwork, iwork, liwork); | |
116 | + info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues); | |
166 | 117 | #endif |
167 | 118 | |
168 | - for(molds_lapack_int i = 0; i < size; i++){ | |
169 | - for(molds_lapack_int j = 0; j < size; j++){ | |
170 | - matrix[i][j] = convertedMatrix[j+i*size]; //i-th row is i-th eigen vector | |
171 | - //matrix[j][i] = convertedMatrix[j+i*size]; //i-th column is i-th eigen vector | |
119 | + // make i-th row i-the eigenvector | |
120 | + double** tmpMatrix=NULL; | |
121 | + try{ | |
122 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrix, size, size); | |
123 | + for(molds_lapack_int i = 0; i < size; i++){ | |
124 | + for(molds_lapack_int j = 0; j < size; j++){ | |
125 | + tmpMatrix[j][i] = matrix[i][j]; | |
126 | + } | |
127 | + } | |
128 | + for(molds_lapack_int i = 0; i < size; i++){ | |
129 | + for(molds_lapack_int j = 0; j < size; j++){ | |
130 | + matrix[i][j] = tmpMatrix[i][j]; | |
131 | + } | |
172 | 132 | } |
173 | 133 | } |
134 | + catch(MolDSException ex){ | |
135 | + MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size); | |
136 | + throw ex; | |
137 | + } | |
138 | + MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size); | |
174 | 139 | |
140 | + // adjust phase of eigenvectors | |
175 | 141 | for(molds_lapack_int i=0;i<size;i++){ |
176 | - double temp = 0.0; | |
142 | + double tmp = 0.0; | |
177 | 143 | for(molds_lapack_int j=0;j<size;j++){ |
178 | - temp += matrix[i][j]; | |
144 | + tmp += matrix[i][j]; | |
179 | 145 | } |
180 | - if(temp<0){ | |
146 | + if(tmp<0){ | |
181 | 147 | for(molds_lapack_int j=0;j<size;j++){ |
182 | 148 | matrix[i][j]*=-1.0; |
183 | 149 | } |
184 | 150 | } |
185 | 151 | } |
186 | 152 | |
187 | - for(molds_lapack_int i = 0; i < size; i++){ | |
188 | - eigenValues[i] = tempEigenValues[i]; | |
189 | - } | |
190 | - //this->OutputLog(boost::format("size=%d lwork=%d liwork=%d k=%d info=%d\n") % size % lwork % liwork % k % info); | |
191 | - | |
192 | - // free | |
193 | - MOLDS_LAPACK_free(work); | |
194 | - MOLDS_LAPACK_free(iwork); | |
195 | - MOLDS_LAPACK_free(convertedMatrix); | |
196 | - MOLDS_LAPACK_free(tempEigenValues); | |
197 | - | |
198 | 153 | if(info != 0){ |
199 | 154 | stringstream ss; |
200 | 155 | ss << errorMessageDsyevdInfo; |
@@ -210,18 +165,15 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
210 | 165 | * |
211 | 166 | * "matrix*X=b" is solved, then we get X by this method. |
212 | 167 | * The X will be stored in b. |
168 | + * The matrix will be overwriten by this method. | |
213 | 169 | * |
214 | 170 | */ |
215 | -molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lapack_int size){ | |
171 | +molds_lapack_int Lapack::Dsysv(double** matrix, double* b, molds_lapack_int size){ | |
216 | 172 | molds_lapack_int info = 0; |
217 | - molds_lapack_int lwork; | |
218 | - char uplo = 'U'; | |
219 | - molds_lapack_int lda = size; | |
220 | - molds_lapack_int ldb = size; | |
173 | + char uplo = 'U'; | |
221 | 174 | molds_lapack_int nrhs = 1; |
222 | - double* convertedMatrix; | |
223 | - double* work; | |
224 | - double* tempB; | |
175 | + molds_lapack_int lda = size; | |
176 | + molds_lapack_int ldb = nrhs; | |
225 | 177 | molds_lapack_int* ipiv; |
226 | 178 | |
227 | 179 | if(size < 1 ){ |
@@ -232,50 +184,15 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap | ||
232 | 184 | |
233 | 185 | // malloc |
234 | 186 | ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 ); |
235 | - convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
236 | - tempB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 ); | |
237 | 187 | |
238 | - for(molds_lapack_int i = 0; i < size; i++){ | |
239 | - for(molds_lapack_int j = i; j < size; j++){ | |
240 | - convertedMatrix[i+j*size] = matrix[i][j]; | |
241 | - } | |
242 | - } | |
243 | - for(molds_lapack_int i = 0; i < size; i++){ | |
244 | - tempB[i] = b[i]; | |
245 | - } | |
246 | - | |
247 | - // calc. lwork | |
248 | - double blockSize=0.0; | |
249 | -#pragma omp critical | |
250 | - { | |
251 | - lwork = -1; | |
252 | - double tempWork[3]={0.0, 0.0, 0.0}; | |
253 | 188 | #ifdef __INTEL_COMPILER |
254 | - dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info); | |
189 | + info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb); | |
255 | 190 | #else |
256 | - info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, tempWork, lwork); | |
191 | + info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb); | |
257 | 192 | #endif |
258 | - blockSize = tempWork[0]/size; | |
259 | - } | |
260 | - info = 0; | |
261 | - lwork = blockSize*size; | |
262 | - work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 ); | |
263 | - | |
264 | - // call Lapack | |
265 | -#ifdef __INTEL_COMPILER | |
266 | - dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info); | |
267 | -#else | |
268 | - info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, work, lwork); | |
269 | -#endif | |
270 | - for(molds_lapack_int i = 0; i < size; i++){ | |
271 | - b[i] = tempB[i]; | |
272 | - } | |
273 | 193 | |
274 | 194 | // free |
275 | - MOLDS_LAPACK_free(convertedMatrix); | |
276 | 195 | MOLDS_LAPACK_free(ipiv); |
277 | - MOLDS_LAPACK_free(work); | |
278 | - MOLDS_LAPACK_free(tempB); | |
279 | 196 | |
280 | 197 | if(info != 0){ |
281 | 198 | stringstream ss; |
@@ -291,17 +208,17 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap | ||
291 | 208 | /*** |
292 | 209 | * |
293 | 210 | * "matrix*X[i]=b[i] (i=0, 1, ... , nrhs-1) is solved, then we get X[i] by this method. |
294 | - * The X[i] will be stored in b[i]. | |
295 | - * b[i][j] is j-th element of i-th solution, b[i]. | |
211 | + * The X[i] will be stored in b[i], namely | |
212 | + * the b[i][j] will be j-th element of i-th solution, b[i]. | |
213 | + * Besides, the matrix will be overwriten by this method. | |
296 | 214 | * |
297 | 215 | */ |
298 | -molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const{ | |
216 | +molds_lapack_int Lapack::Dgetrs(double** matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const{ | |
299 | 217 | molds_lapack_int info = 0; |
300 | 218 | char trans = 'N'; |
301 | 219 | molds_lapack_int lda = size; |
302 | - molds_lapack_int ldb = size; | |
303 | - double* convertedMatrix; | |
304 | - double* convertedB; | |
220 | + molds_lapack_int ldb = nrhs; | |
221 | + double* tmpB; | |
305 | 222 | molds_lapack_int* ipiv; |
306 | 223 | |
307 | 224 | if(size < 1 ){ |
@@ -310,45 +227,37 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
310 | 227 | throw MolDSException(ss.str()); |
311 | 228 | } |
312 | 229 | |
313 | - | |
314 | 230 | try{ |
315 | 231 | // malloc |
316 | 232 | ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 ); |
317 | - convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
318 | - convertedB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*nrhs*size, 16 ); | |
319 | - for(molds_lapack_int i = 0; i < size; i++){ | |
320 | - for(molds_lapack_int j = 0; j < size; j++){ | |
321 | - convertedMatrix[i+j*size] = matrix[i][j]; | |
322 | - } | |
323 | - } | |
233 | + tmpB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*nrhs, 16 ); | |
234 | + // matrix b should be transposed | |
324 | 235 | for(molds_lapack_int i = 0; i < nrhs; i++){ |
325 | 236 | for(molds_lapack_int j = 0; j < size; j++){ |
326 | - convertedB[j+i*size] = b[i][j]; | |
237 | + tmpB[j*nrhs+i] = b[i][j]; | |
327 | 238 | } |
328 | 239 | } |
329 | - this->Dgetrf(convertedMatrix, ipiv, size, size); | |
240 | + this->Dgetrf(&matrix[0][0], ipiv, size, size); | |
330 | 241 | #ifdef __INTEL_COMPILER |
331 | - dgetrs(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info); | |
242 | + info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb); | |
332 | 243 | #else |
333 | - info = LAPACKE_dgetrs_work(LAPACK_COL_MAJOR, trans, size, nrhs, convertedMatrix, lda, ipiv, convertedB, ldb); | |
244 | + info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb); | |
334 | 245 | #endif |
335 | 246 | for(molds_lapack_int i = 0; i < nrhs; i++){ |
336 | 247 | for(molds_lapack_int j = 0; j < size; j++){ |
337 | - b[i][j] = convertedB[j+i*size]; | |
248 | + b[i][j] = tmpB[j*nrhs+i]; | |
338 | 249 | } |
339 | 250 | } |
340 | 251 | } |
341 | 252 | catch(MolDSException ex){ |
342 | 253 | // free |
343 | - MOLDS_LAPACK_free(convertedMatrix); | |
344 | - MOLDS_LAPACK_free(convertedB); | |
254 | + MOLDS_LAPACK_free(tmpB); | |
345 | 255 | MOLDS_LAPACK_free(ipiv); |
346 | 256 | throw ex; |
347 | 257 | } |
348 | 258 | // free |
349 | - MOLDS_LAPACK_free(convertedMatrix); | |
350 | - MOLDS_LAPACK_free(convertedB); | |
351 | 259 | MOLDS_LAPACK_free(ipiv); |
260 | + MOLDS_LAPACK_free(tmpB); | |
352 | 261 | |
353 | 262 | if(info != 0){ |
354 | 263 | stringstream ss; |
@@ -363,41 +272,29 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
363 | 272 | // Argument "matrix" will be LU-decomposed. |
364 | 273 | molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
365 | 274 | molds_lapack_int* ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*sizeM, 16 ); |
366 | - this->Dgetrf(matrix, ipiv, sizeM, sizeN); | |
275 | + this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN); | |
367 | 276 | MOLDS_LAPACK_free(ipiv); |
368 | 277 | molds_lapack_int info = 0; |
369 | 278 | return info; |
370 | 279 | } |
371 | 280 | |
372 | -// Argument "matrix" is sizeM*sizeN matrix. | |
281 | +// Argument "matrix" is sizeM*sizeN matrix in Row-major (C/C++ style) | |
373 | 282 | // Argument "matrix" will be LU-decomposed. |
374 | 283 | molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
375 | - double* convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*sizeM*sizeN, 16 ); | |
376 | - for(molds_lapack_int i=0; i<sizeM; i++){ | |
377 | - for(molds_lapack_int j=0; j<sizeN; j++){ | |
378 | - convertedMatrix[i+j*sizeM] = matrix[i][j]; | |
379 | - } | |
380 | - } | |
381 | - this->Dgetrf(convertedMatrix, ipiv, sizeM, sizeN); | |
382 | - for(molds_lapack_int i=0; i<sizeM; i++){ | |
383 | - for(molds_lapack_int j=0; j<sizeN; j++){ | |
384 | - matrix[i][j] = convertedMatrix[i+j*sizeM]; | |
385 | - } | |
386 | - } | |
387 | - MOLDS_LAPACK_free(convertedMatrix); | |
284 | + this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN); | |
388 | 285 | molds_lapack_int info = 0; |
389 | 286 | return info; |
390 | 287 | } |
391 | 288 | |
392 | 289 | // Argument "matrix" is sizeM*sizeN matrix. |
393 | -// The each element of "matrix" should be stored in 1-dimensional vecotre with column major (Fortran type). | |
290 | +// The each element of "matrix" should be stored in 1-dimensional vecotre with Row major (C/C++ style). | |
394 | 291 | molds_lapack_int Lapack::Dgetrf(double* matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
395 | 292 | molds_lapack_int info = 0; |
396 | 293 | molds_lapack_int lda = sizeM; |
397 | 294 | #ifdef __INTEL_COMPILER |
398 | - dgetrf(&sizeM, &sizeN, matrix, &lda, ipiv, &info); | |
295 | + info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
399 | 296 | #else |
400 | - info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
297 | + info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
401 | 298 | #endif |
402 | 299 | if(info != 0){ |
403 | 300 | stringstream ss; |
@@ -27,8 +27,8 @@ public: | ||
27 | 27 | static Lapack* GetInstance(); |
28 | 28 | static void DeleteInstance(); |
29 | 29 | molds_lapack_int Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors); |
30 | - molds_lapack_int Dsysv(double const* const* matrix, double* b, molds_lapack_int size); | |
31 | - molds_lapack_int Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const; | |
30 | + molds_lapack_int Dsysv(double** matrix, double* b, molds_lapack_int size); | |
31 | + molds_lapack_int Dgetrs(double** matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const; | |
32 | 32 | molds_lapack_int Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const; |
33 | 33 | molds_lapack_int Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const; |
34 | 34 | private: |