OSDN > Developer > ktns > Chamber > MolDS > Commit

MolDS
Fork

(Original repository, No fork origin)

R/O
HTTP
SSH
HTTPS

Commit

Commit MetaInfo

Revision	a132630a082c59e6da5ef8edf2d33d4357393ec5 (tree)
Time	2013-10-14 16:26:38
Author	Mikiya Fujii <mikiya.fujii@gmai...>
Commiter	Mikiya Fujii

Log Message

Refactoring: INTEL and GNU use Lapacke. #32235

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1540 1136aad2-a195-0410-b898-f5ea1d11b9d8

Change Summary

modified: src/cndo/Cndo2.cpp (diff)
modified: src/cndo/Cndo2.h (diff)
modified: src/mpi/MpiProcess.h (diff)
modified: src/wrappers/Lapack.cpp (diff)
modified: src/wrappers/Lapack.h (diff)

Incremental Difference

--- a/src/cndo/Cndo2.cpp

+++ b/src/cndo/Cndo2.cpp

		@@ -514,6 +514,7 @@ void Cndo2::DoSCF(bool requiresGuess){
514	514	double*** diisStoredDensityMatrix = NULL;
515	515	double*** diisStoredErrorVect = NULL;
516	516	double** diisErrorProducts = NULL;
	517	+ double** tmpDiisErrorProducts = NULL;
517	518	double* diisErrorCoefficients = NULL;
518	519
519	520	try{

		@@ -521,6 +522,7 @@ void Cndo2::DoSCF(bool requiresGuess){
521	522	&diisStoredDensityMatrix,
522	523	&diisStoredErrorVect,
523	524	&diisErrorProducts,
	525	+ &tmpDiisErrorProducts,
524	526	&diisErrorCoefficients);
525	527	// calculate electron integral
526	528	this->CalcGammaAB(this->gammaAB, *this->molecule);

		@@ -590,6 +592,7 @@ void Cndo2::DoSCF(bool requiresGuess){
590	592	diisStoredDensityMatrix,
591	593	diisStoredErrorVect,
592	594	diisErrorProducts,
	595	+ tmpDiisErrorProducts,
593	596	diisErrorCoefficients,
594	597	diisError,
595	598	hasAppliedDIIS,

		@@ -612,6 +615,7 @@ void Cndo2::DoSCF(bool requiresGuess){
612	615	&diisStoredDensityMatrix,
613	616	&diisStoredErrorVect,
614	617	&diisErrorProducts,
	618	+ &tmpDiisErrorProducts,
615	619	&diisErrorCoefficients);
616	620
617	621	throw ex;

		@@ -620,6 +624,7 @@ void Cndo2::DoSCF(bool requiresGuess){
620	624	&diisStoredDensityMatrix,
621	625	&diisStoredErrorVect,
622	626	&diisErrorProducts,
	627	+ &tmpDiisErrorProducts,
623	628	&diisErrorCoefficients);
624	629
625	630	double ompEndTime = omp_get_wtime();

		@@ -750,6 +755,7 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
750	755	double**** diisStoredDensityMatrix,
751	756	double**** diisStoredErrorVect,
752	757	double*** diisErrorProducts,
	758	+ double*** tmpDiisErrorProducts,
753	759	double** diisErrorCoefficients) const{
754	760
755	761	int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF();

		@@ -767,6 +773,9 @@ void Cndo2::FreeSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
767	773	MallocerFreer::GetInstance()->Free<double>(diisErrorProducts,
768	774	diisNumErrorVect+1,
769	775	diisNumErrorVect+1);
	776	+ MallocerFreer::GetInstance()->Free<double>(tmpDiisErrorProducts,
	777	+ diisNumErrorVect+1,
	778	+ diisNumErrorVect+1);
770	779	MallocerFreer::GetInstance()->Free<double>(diisErrorCoefficients,
771	780	diisNumErrorVect+1);
772	781	}

		@@ -775,6 +784,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
775	784	double**** diisStoredDensityMatrix,
776	785	double**** diisStoredErrorVect,
777	786	double*** diisErrorProducts,
	787	+ double*** tmpDiisErrorProducts,
778	788	double** diisErrorCoefficients){
779	789
780	790	int diisNumErrorVect = Parameters::GetInstance()->GetDiisNumErrorVectSCF();

		@@ -791,6 +801,7 @@ void Cndo2::MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
791	801	this->molecule->GetTotalNumberAOs(),
792	802	this->molecule->GetTotalNumberAOs());
793	803	MallocerFreer::GetInstance()->Malloc<double>(diisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1);
	804	+ MallocerFreer::GetInstance()->Malloc<double>(tmpDiisErrorProducts, diisNumErrorVect+1, diisNumErrorVect+1);
794	805	MallocerFreer::GetInstance()->Malloc<double>(diisErrorCoefficients, diisNumErrorVect+1);
795	806	}
796	807	}

		@@ -805,6 +816,7 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation,
805	816	double*** diisStoredDensityMatrix,
806	817	double*** diisStoredErrorVect,
807	818	double** diisErrorProducts,
	819	+ double** tmpDiisErrorProducts,
808	820	double* diisErrorCoefficients,
809	821	double& diisError,
810	822	bool& hasAppliedDIIS,

		@@ -873,7 +885,13 @@ void Cndo2::DoDIIS(double** orbitalElectronPopulation,
873	885	if(diisNumErrorVect <= step && diisEndError<diisError && diisError<diisStartError){
874	886	hasAppliedDIIS = true;
875	887	try{
876		- MolDS_wrappers::Lapack::GetInstance()->Dsysv(diisErrorProducts,
	888	+#pragma omp parallel for schedule(auto)
	889	+ for(int i=0; i<diisNumErrorVect+1; i++){
	890	+ for(int j=0; j<diisNumErrorVect+1; j++){
	891	+ tmpDiisErrorProducts[i][j] = diisErrorProducts[i][j];
	892	+ }
	893	+ }
	894	+ MolDS_wrappers::Lapack::GetInstance()->Dsysv(tmpDiisErrorProducts,
877	895	diisErrorCoefficients,
878	896	diisNumErrorVect+1);
879	897	}catch(MolDSException ex){

--- a/src/cndo/Cndo2.h

+++ b/src/cndo/Cndo2.h

		@@ -485,6 +485,7 @@ private:
485	485	double*** diisStoredDensityMatrix,
486	486	double*** diisStoredErrorVect,
487	487	double** diisErrorProducts,
	488	+ double** tmpDiisErrorProducts,
488	489	double* diisErrorCoefficients,
489	490	double& diisError,
490	491	bool& hasAppliedDIIS,

		@@ -510,11 +511,13 @@ private:
510	511	double**** diisStoredDensityMatrix,
511	512	double**** diisStoredErrorVect,
512	513	double*** diisErrorProducts,
	514	+ double*** tmpDiisErrorProducts,
513	515	double** diisErrorCoefficients) const;
514	516	void MallocSCFTemporaryMatrices(double*** oldOrbitalElectronPopulation,
515	517	double**** diisStoredDensityMatrix,
516	518	double**** diisStoredErrorVect,
517	519	double*** diisErrorProducts,
	520	+ double*** tmpDiisErrorProducts,
518	521	double** diisErrorCoefficients);
519	522	};
520	523

--- a/src/mpi/MpiProcess.h

+++ b/src/mpi/MpiProcess.h

		@@ -141,25 +141,25 @@ private:
141	141	int tagBase = origianlTag*numChunks;
142	142	if(elementsLimit < 0){
143	143	std::stringstream ss;
144		- ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << endl;
	144	+ ss << this->errorMessageSplitMessageElemLimNegative << elementsLimit << std::endl;
145	145	MolDS_base::MolDSException ex(ss.str());
146	146	throw ex;
147	147	}
148	148	if(numChunks < 0){
149	149	std::stringstream ss;
150		- ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << endl;
	150	+ ss << this->errorMessageSplitMessageNumChnkNegative << numChunks << std::endl;
151	151	MolDS_base::MolDSException ex(ss.str());
152	152	throw ex;
153	153	}
154	154	if(remaining < 0){
155	155	std::stringstream ss;
156		- ss << this->errorMessageSplitMessageRemainingNegative << remaining << endl;
	156	+ ss << this->errorMessageSplitMessageRemainingNegative << remaining << std::endl;
157	157	MolDS_base::MolDSException ex(ss.str());
158	158	throw ex;
159	159	}
160	160	if(tagBase < 0){
161	161	std::stringstream ss;
162		- ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << endl;
	162	+ ss << this->errorMessageSplitMessageTagBaseNegative << tagBase << std::endl;
163	163	MolDS_base::MolDSException ex(ss.str());
164	164	throw ex;
165	165	}

--- a/src/wrappers/Lapack.cpp

+++ b/src/wrappers/Lapack.cpp

		@@ -98,18 +98,10 @@ void Lapack::DeleteInstance(){
98	98	* ***/
99	99	molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors){
100	100	molds_lapack_int info = 0;
101		- molds_lapack_int k = 0;
102		- molds_lapack_int lwork;
103		- molds_lapack_int liwork;
104		- char job;
105	101	char uplo = 'U';
106	102	molds_lapack_int lda = size;
107		- double* convertedMatrix;
108		- double* tempEigenValues;
109		- double* work;
110		- molds_lapack_int* iwork;
111		-
112	103	// set job type
	104	+ char job;
113	105	if(calcEigenVectors){
114	106	job = 'V';
115	107	}

		@@ -117,84 +109,47 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa
117	109	job = 'N';
118	110	}
119	111
120		- // calc. lwork and liwork
121		- if(size < 1 ){
122		- stringstream ss;
123		- ss << errorMessageDsyevdSize;
124		- MolDSException ex(ss.str());
125		- ex.SetKeyValue<int>(LapackInfo, info);
126		- throw ex;
127		- }
128		- else if(size == 1){
129		- lwork = 1;
130		- liwork = 1;
131		- }
132		- else if(1 < size && job == 'N'){
133		- lwork = 2*size + 1;
134		- liwork = 2;
135		- }
136		- else{
137		- // calc. k
138		- double temp = log((double)size)/log(2.0);
139		- if( (double)((molds_lapack_int)temp) < temp ){
140		- k = (molds_lapack_int)temp + 1;
141		- }
142		- else{
143		- k = (molds_lapack_int)temp;
144		- }
145		- lwork = 3sizesize + (5+2k)size + 1;
146		- liwork = 5*size + 3;
147		- }
148		-
149		- // malloc
150		- work = (double)MOLDS_LAPACK_malloc( sizeof(double)lwork, 16 );
151		- iwork = (molds_lapack_int)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)liwork, 16 );
152		- convertedMatrix = (double)MOLDS_LAPACK_malloc( sizeof(double)size*size, 16 );
153		- tempEigenValues = (double)MOLDS_LAPACK_malloc( sizeof(double)size, 16 );
154		-
155		- for(molds_lapack_int i = 0; i < size; i++){
156		- for(molds_lapack_int j = i; j < size; j++){
157		- convertedMatrix[i+j*size] = matrix[i][j];
158		- }
159		- }
160		-
161	112	// call Lapack
162	113	#ifdef __INTEL_COMPILER
163		- dsyevd(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info);
	114	+ info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues);
164	115	#else
165		- info = LAPACKE_dsyevd_work(LAPACK_COL_MAJOR, job, uplo, size, convertedMatrix, lda, tempEigenValues, work, lwork, iwork, liwork);
	116	+ info = LAPACKE_dsyevd(LAPACK_ROW_MAJOR, job, uplo, size, &matrix[0][0], lda, eigenValues);
166	117	#endif
167	118
168		- for(molds_lapack_int i = 0; i < size; i++){
169		- for(molds_lapack_int j = 0; j < size; j++){
170		- matrix[i][j] = convertedMatrix[j+i*size]; //i-th row is i-th eigen vector
171		- //matrix[j][i] = convertedMatrix[j+i*size]; //i-th column is i-th eigen vector
	119	+ // make i-th row i-the eigenvector
	120	+ double** tmpMatrix=NULL;
	121	+ try{
	122	+ MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrix, size, size);
	123	+ for(molds_lapack_int i = 0; i < size; i++){
	124	+ for(molds_lapack_int j = 0; j < size; j++){
	125	+ tmpMatrix[j][i] = matrix[i][j];
	126	+ }
	127	+ }
	128	+ for(molds_lapack_int i = 0; i < size; i++){
	129	+ for(molds_lapack_int j = 0; j < size; j++){
	130	+ matrix[i][j] = tmpMatrix[i][j];
	131	+ }
172	132	}
173	133	}
	134	+ catch(MolDSException ex){
	135	+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size);
	136	+ throw ex;
	137	+ }
	138	+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, size, size);
174	139
	140	+ // adjust phase of eigenvectors
175	141	for(molds_lapack_int i=0;i<size;i++){
176		- double temp = 0.0;
	142	+ double tmp = 0.0;
177	143	for(molds_lapack_int j=0;j<size;j++){
178		- temp += matrix[i][j];
	144	+ tmp += matrix[i][j];
179	145	}
180		- if(temp<0){
	146	+ if(tmp<0){
181	147	for(molds_lapack_int j=0;j<size;j++){
182	148	matrix[i][j]*=-1.0;
183	149	}
184	150	}
185	151	}
186	152
187		- for(molds_lapack_int i = 0; i < size; i++){
188		- eigenValues[i] = tempEigenValues[i];
189		- }
190		- //this->OutputLog(boost::format("size=%d lwork=%d liwork=%d k=%d info=%d\n") % size % lwork % liwork % k % info);
191		-
192		- // free
193		- MOLDS_LAPACK_free(work);
194		- MOLDS_LAPACK_free(iwork);
195		- MOLDS_LAPACK_free(convertedMatrix);
196		- MOLDS_LAPACK_free(tempEigenValues);
197		-
198	153	if(info != 0){
199	154	stringstream ss;
200	155	ss << errorMessageDsyevdInfo;

		@@ -210,18 +165,15 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa
210	165	*
211	166	* "matrix*X=b" is solved, then we get X by this method.
212	167	* The X will be stored in b.
	168	+ * The matrix will be overwriten by this method.
213	169	*
214	170	*/
215		-molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lapack_int size){
	171	+molds_lapack_int Lapack::Dsysv(double** matrix, double* b, molds_lapack_int size){
216	172	molds_lapack_int info = 0;
217		- molds_lapack_int lwork;
218		- char uplo = 'U';
219		- molds_lapack_int lda = size;
220		- molds_lapack_int ldb = size;
	173	+ char uplo = 'U';
221	174	molds_lapack_int nrhs = 1;
222		- double* convertedMatrix;
223		- double* work;
224		- double* tempB;
	175	+ molds_lapack_int lda = size;
	176	+ molds_lapack_int ldb = nrhs;
225	177	molds_lapack_int* ipiv;
226	178
227	179	if(size < 1 ){

		@@ -232,50 +184,15 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap
232	184
233	185	// malloc
234	186	ipiv = (molds_lapack_int)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)2*size, 16 );
235		- convertedMatrix = (double)MOLDS_LAPACK_malloc( sizeof(double)size*size, 16 );
236		- tempB = (double)MOLDS_LAPACK_malloc( sizeof(double)size, 16 );
237	187
238		- for(molds_lapack_int i = 0; i < size; i++){
239		- for(molds_lapack_int j = i; j < size; j++){
240		- convertedMatrix[i+j*size] = matrix[i][j];
241		- }
242		- }
243		- for(molds_lapack_int i = 0; i < size; i++){
244		- tempB[i] = b[i];
245		- }
246		-
247		- // calc. lwork
248		- double blockSize=0.0;
249		-#pragma omp critical
250		- {
251		- lwork = -1;
252		- double tempWork[3]={0.0, 0.0, 0.0};
253	188	#ifdef __INTEL_COMPILER
254		- dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info);
	189	+ info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb);
255	190	#else
256		- info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, tempWork, lwork);
	191	+ info = LAPACKE_dsysv(LAPACK_ROW_MAJOR, uplo, size, nrhs, &matrix[0][0], lda, ipiv, b, ldb);
257	192	#endif
258		- blockSize = tempWork[0]/size;
259		- }
260		- info = 0;
261		- lwork = blockSize*size;
262		- work = (double)MOLDS_LAPACK_malloc( sizeof(double)lwork, 16 );
263		-
264		- // call Lapack
265		-#ifdef __INTEL_COMPILER
266		- dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info);
267		-#else
268		- info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, work, lwork);
269		-#endif
270		- for(molds_lapack_int i = 0; i < size; i++){
271		- b[i] = tempB[i];
272		- }
273	193
274	194	// free
275		- MOLDS_LAPACK_free(convertedMatrix);
276	195	MOLDS_LAPACK_free(ipiv);
277		- MOLDS_LAPACK_free(work);
278		- MOLDS_LAPACK_free(tempB);
279	196
280	197	if(info != 0){
281	198	stringstream ss;

		@@ -291,17 +208,17 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap
291	208	/***
292	209	*
293	210	* "matrix*X[i]=b[i] (i=0, 1, ... , nrhs-1) is solved, then we get X[i] by this method.
294		- * The X[i] will be stored in b[i].
295		- * b[i][j] is j-th element of i-th solution, b[i].
	211	+ * The X[i] will be stored in b[i], namely
	212	+ * the b[i][j] will be j-th element of i-th solution, b[i].
	213	+ * Besides, the matrix will be overwriten by this method.
296	214	*
297	215	*/
298		-molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const{
	216	+molds_lapack_int Lapack::Dgetrs(double matrix, double b, molds_lapack_int size, molds_lapack_int nrhs) const{
299	217	molds_lapack_int info = 0;
300	218	char trans = 'N';
301	219	molds_lapack_int lda = size;
302		- molds_lapack_int ldb = size;
303		- double* convertedMatrix;
304		- double* convertedB;
	220	+ molds_lapack_int ldb = nrhs;
	221	+ double* tmpB;
305	222	molds_lapack_int* ipiv;
306	223
307	224	if(size < 1 ){

		@@ -310,45 +227,37 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l
310	227	throw MolDSException(ss.str());
311	228	}
312	229
313		-
314	230	try{
315	231	// malloc
316	232	ipiv = (molds_lapack_int)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)2*size, 16 );
317		- convertedMatrix = (double)MOLDS_LAPACK_malloc( sizeof(double)size*size, 16 );
318		- convertedB = (double)MOLDS_LAPACK_malloc( sizeof(double)nrhs*size, 16 );
319		- for(molds_lapack_int i = 0; i < size; i++){
320		- for(molds_lapack_int j = 0; j < size; j++){
321		- convertedMatrix[i+j*size] = matrix[i][j];
322		- }
323		- }
	233	+ tmpB = (double)MOLDS_LAPACK_malloc( sizeof(double)size*nrhs, 16 );
	234	+ // matrix b should be transposed
324	235	for(molds_lapack_int i = 0; i < nrhs; i++){
325	236	for(molds_lapack_int j = 0; j < size; j++){
326		- convertedB[j+i*size] = b[i][j];
	237	+ tmpB[j*nrhs+i] = b[i][j];
327	238	}
328	239	}
329		- this->Dgetrf(convertedMatrix, ipiv, size, size);
	240	+ this->Dgetrf(&matrix[0][0], ipiv, size, size);
330	241	#ifdef __INTEL_COMPILER
331		- dgetrs(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info);
	242	+ info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb);
332	243	#else
333		- info = LAPACKE_dgetrs_work(LAPACK_COL_MAJOR, trans, size, nrhs, convertedMatrix, lda, ipiv, convertedB, ldb);
	244	+ info = LAPACKE_dgetrs(LAPACK_ROW_MAJOR, trans, size, nrhs, &matrix[0][0], lda, ipiv, tmpB, ldb);
334	245	#endif
335	246	for(molds_lapack_int i = 0; i < nrhs; i++){
336	247	for(molds_lapack_int j = 0; j < size; j++){
337		- b[i][j] = convertedB[j+i*size];
	248	+ b[i][j] = tmpB[j*nrhs+i];
338	249	}
339	250	}
340	251	}
341	252	catch(MolDSException ex){
342	253	// free
343		- MOLDS_LAPACK_free(convertedMatrix);
344		- MOLDS_LAPACK_free(convertedB);
	254	+ MOLDS_LAPACK_free(tmpB);
345	255	MOLDS_LAPACK_free(ipiv);
346	256	throw ex;
347	257	}
348	258	// free
349		- MOLDS_LAPACK_free(convertedMatrix);
350		- MOLDS_LAPACK_free(convertedB);
351	259	MOLDS_LAPACK_free(ipiv);
	260	+ MOLDS_LAPACK_free(tmpB);
352	261
353	262	if(info != 0){
354	263	stringstream ss;

		@@ -363,41 +272,29 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l
363	272	// Argument "matrix" will be LU-decomposed.
364	273	molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const{
365	274	molds_lapack_int* ipiv = (molds_lapack_int)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)2*sizeM, 16 );
366		- this->Dgetrf(matrix, ipiv, sizeM, sizeN);
	275	+ this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN);
367	276	MOLDS_LAPACK_free(ipiv);
368	277	molds_lapack_int info = 0;
369	278	return info;
370	279	}
371	280
372		-// Argument "matrix" is sizeM*sizeN matrix.
	281	+// Argument "matrix" is sizeM*sizeN matrix in Row-major (C/C++ style)
373	282	// Argument "matrix" will be LU-decomposed.
374	283	molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{
375		- double* convertedMatrix = (double)MOLDS_LAPACK_malloc( sizeof(double)sizeM*sizeN, 16 );
376		- for(molds_lapack_int i=0; i<sizeM; i++){
377		- for(molds_lapack_int j=0; j<sizeN; j++){
378		- convertedMatrix[i+j*sizeM] = matrix[i][j];
379		- }
380		- }
381		- this->Dgetrf(convertedMatrix, ipiv, sizeM, sizeN);
382		- for(molds_lapack_int i=0; i<sizeM; i++){
383		- for(molds_lapack_int j=0; j<sizeN; j++){
384		- matrix[i][j] = convertedMatrix[i+j*sizeM];
385		- }
386		- }
387		- MOLDS_LAPACK_free(convertedMatrix);
	284	+ this->Dgetrf(&matrix[0][0], ipiv, sizeM, sizeN);
388	285	molds_lapack_int info = 0;
389	286	return info;
390	287	}
391	288
392	289	// Argument "matrix" is sizeM*sizeN matrix.
393		-// The each element of "matrix" should be stored in 1-dimensional vecotre with column major (Fortran type).
	290	+// The each element of "matrix" should be stored in 1-dimensional vecotre with Row major (C/C++ style).
394	291	molds_lapack_int Lapack::Dgetrf(double* matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{
395	292	molds_lapack_int info = 0;
396	293	molds_lapack_int lda = sizeM;
397	294	#ifdef __INTEL_COMPILER
398		- dgetrf(&sizeM, &sizeN, matrix, &lda, ipiv, &info);
	295	+ info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
399	296	#else
400		- info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
	297	+ info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, sizeM, sizeN, matrix, lda, ipiv);
401	298	#endif
402	299	if(info != 0){
403	300	stringstream ss;

--- a/src/wrappers/Lapack.h

+++ b/src/wrappers/Lapack.h

		@@ -27,8 +27,8 @@ public:
27	27	static Lapack* GetInstance();
28	28	static void DeleteInstance();
29	29	molds_lapack_int Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors);
30		- molds_lapack_int Dsysv(double const* const* matrix, double* b, molds_lapack_int size);
31		- molds_lapack_int Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const;
	30	+ molds_lapack_int Dsysv(double** matrix, double* b, molds_lapack_int size);
	31	+ molds_lapack_int Dgetrs(double matrix, double b, molds_lapack_int size, molds_lapack_int nrhs) const;
32	32	molds_lapack_int Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const;
33	33	molds_lapack_int Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const;
34	34	private:

MolDS Fork