• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revision85f1753149951fd114e5b89a4369e19aac083cdf (tree)
Time2013-10-16 10:44:17
AuthorMikiya Fujii <mikiya.fujii@gmai...>
CommiterMikiya Fujii

Log Message

trunk.r1540 is merged to branches/fx10. Refactoring:DGEMM and DGEMMM to cache temporary memory. #32094 #32299

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/branches/fx10@1548 1136aad2-a195-0410-b898-f5ea1d11b9d8

Change Summary

Incremental Difference

--- a/src/cndo/Cndo2.cpp
+++ b/src/cndo/Cndo2.cpp
@@ -182,10 +182,14 @@ void Cndo2::SetMessages(){
182182 = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame diatomicOverlapAOs is NULL.\n";
183183 this->errorMessageRotDiaOverlapAOsToSpaceFrameNullRotMatrix
184184 = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: rotatingMatrix is NULL.\n";
185+ this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpDiaMatrix
186+ = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: tmpDiatomicOverlapAOs is NULL.\n";
185187 this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpOldDiaMatrix
186188 = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: tmpOldDiatomicOverlapAOs is NULL.\n";
187189 this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpMatrixBC
188190 = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: tmpMatrixBC is NULL.\n";
191+ this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpVectorBC
192+ = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: tmpVectorBC is NULL.\n";
189193 this->errorMessageSetOverlapAOsElementNullDiaMatrix
190194 = "Error in cndo::Cndo2::SetOverlapAOsElement: diatomicOverlapAOs is NULL.\n";
191195 this->errorMessageCalcElectronicTransitionDipoleMomentBadState
@@ -3923,10 +3927,12 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
39233927 stringstream ompErrors;
39243928 #pragma omp parallel
39253929 {
3926- double** diatomicOverlapAOs = NULL;
3927- double** rotatingMatrix = NULL;
3930+ double** diatomicOverlapAOs = NULL;
3931+ double** rotatingMatrix = NULL;
3932+ double* tmpDiatomicOverlapAOs = NULL;
39283933 double** tmpOldDiatomicOverlapAOs = NULL;
3929- double** tmpMatrixBC = NULL;
3934+ double** tmpMatrixBC = NULL;
3935+ double* tmpVectorBC = NULL;
39303936 try{
39313937 // malloc
39323938 MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs,
@@ -3935,19 +3941,23 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
39353941 MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix,
39363942 OrbitalType_end,
39373943 OrbitalType_end);
3944+ MallocerFreer::GetInstance()->Malloc<double>(&tmpDiatomicOverlapAOs,
3945+ OrbitalType_end*OrbitalType_end);
39383946 MallocerFreer::GetInstance()->Malloc<double>(&tmpOldDiatomicOverlapAOs,
39393947 OrbitalType_end,
39403948 OrbitalType_end);
39413949 MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC,
39423950 OrbitalType_end,
39433951 OrbitalType_end);
3952+ MallocerFreer::GetInstance()->Malloc<double>(&tmpVectorBC,
3953+ OrbitalType_end*OrbitalType_end);
39443954 bool symmetrize = false;
39453955 #pragma omp for schedule(auto)
39463956 for(int B=A+1; B<totalAtomNumber; B++){
39473957 const Atom& atomB = *molecule.GetAtom(B);
39483958 this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB);
39493959 this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB);
3950- this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix, tmpOldDiatomicOverlapAOs, tmpMatrixBC);
3960+ this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix, tmpDiatomicOverlapAOs, tmpOldDiatomicOverlapAOs, tmpMatrixBC, tmpVectorBC);
39513961 this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB, symmetrize);
39523962 } // end of loop B parallelized with openMP
39533963
@@ -3957,12 +3967,16 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
39573967 ex.Serialize(ompErrors);
39583968 }
39593969 this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix);
3970+ MallocerFreer::GetInstance()->Free<double>(&tmpDiatomicOverlapAOs,
3971+ OrbitalType_end*OrbitalType_end);
39603972 MallocerFreer::GetInstance()->Free<double>(&tmpOldDiatomicOverlapAOs,
39613973 OrbitalType_end,
39623974 OrbitalType_end);
39633975 MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC,
39643976 OrbitalType_end,
39653977 OrbitalType_end);
3978+ MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC,
3979+ OrbitalType_end*OrbitalType_end);
39663980 } // end of omp-parallelized region
39673981 // Exception throwing for omp-region
39683982 if(!ompErrors.str().empty()){
@@ -4020,7 +4034,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
40204034 double** tmpRotMat1stDeriv,
40214035 double*** tmpRotMat1stDerivs,
40224036 double** tmpRotatedDiatomicOverlap,
4023- double** tmpMatrix,
4037+ double* tmpRotatedDiatomicOverlapVec,
4038+ double** tmpMatrixBC,
4039+ double* tmpVectorBC,
40244040 const Atom& atomA,
40254041 const Atom& atomB) const{
40264042 double cartesian[CartesianType_end] = {atomA.GetXyz()[XAxis] - atomB.GetXyz()[XAxis],
@@ -4055,7 +4071,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
40554071 tmpRotMat,
40564072 beta,
40574073 tmpRotatedDiatomicOverlap,
4058- tmpMatrix);
4074+ tmpRotatedDiatomicOverlapVec,
4075+ tmpMatrixBC,
4076+ tmpVectorBC);
40594077 alpha = 1.0;
40604078 beta = 1.0;
40614079 MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix,
@@ -4068,7 +4086,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
40684086 tmpRotMat,
40694087 beta,
40704088 tmpRotatedDiatomicOverlap,
4071- tmpMatrix);
4089+ tmpRotatedDiatomicOverlapVec,
4090+ tmpMatrixBC,
4091+ tmpVectorBC);
40724092 MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix,
40734093 isColumnMajorDiaOverlapAOs,
40744094 !isColumnMajorRotatingMatrix,
@@ -4079,7 +4099,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
40794099 tmpRotMat1stDeriv,
40804100 beta,
40814101 tmpRotatedDiatomicOverlap,
4082- tmpMatrix);
4102+ tmpRotatedDiatomicOverlapVec,
4103+ tmpMatrixBC,
4104+ tmpVectorBC);
40834105 MolDS_wrappers::Blas::GetInstance()->Dcopy(OrbitalType_end*OrbitalType_end,
40844106 &tmpRotatedDiatomicOverlap[0][0], incrementOne,
40854107 &diatomicOverlapAOs1stDerivs[0][0][c], CartesianType_end);
@@ -4123,7 +4145,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
41234145 double** tmpRotMat1stDeriv,
41244146 double*** tmpRotMat1stDerivs,
41254147 double** tmpRotatedDiatomicOverlap,
4126- double** tmpMatrix,
4148+ double* tmpRotatedDiatomicOverlapVec,
4149+ double** tmpMatrixBC,
4150+ double* tmpVectorBC,
41274151 int indexAtomA,
41284152 int indexAtomB) const{
41294153 this->CalcDiatomicOverlapAOs1stDerivatives(diatomicOverlapAOs1stDerivs,
@@ -4133,7 +4157,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
41334157 tmpRotMat1stDeriv,
41344158 tmpRotMat1stDerivs,
41354159 tmpRotatedDiatomicOverlap,
4136- tmpMatrix,
4160+ tmpRotatedDiatomicOverlapVec,
4161+ tmpMatrixBC,
4162+ tmpVectorBC,
41374163 *this->molecule->GetAtom(indexAtomA),
41384164 *this->molecule->GetAtom(indexAtomB));
41394165 }
@@ -5940,10 +5966,12 @@ void Cndo2::CalcDiatomicOverlapAOs2ndDerivativeInDiatomicFrame(double** diatomic
59405966 }
59415967
59425968 // see (B.63) in Pople book.
5943-void Cndo2::RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs,
5969+void Cndo2::RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs,
59445970 double const* const* rotatingMatrix,
5945- double** tmpOldDiatomicOverlapAOs,
5946- double** tmpMatrixBC) const{
5971+ double* tmpDiatomicOverlapAOs,
5972+ double** tmpOldDiatomicOverlapAOs,
5973+ double** tmpMatrixBC,
5974+ double* tmpVectorBC) const{
59475975 #ifdef MOLDS_DBG
59485976 if(diatomicOverlapAOs==NULL){
59495977 throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullDiaMatrix);
@@ -5951,12 +5979,18 @@ void Cndo2::RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs,
59515979 if(rotatingMatrix==NULL){
59525980 throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullRotMatrix);
59535981 }
5982+ if(tmpDiatomicOverlapAOs==NULL){
5983+ throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpDiaMatrix);
5984+ }
59545985 if(tmpOldDiatomicOverlapAOs==NULL){
59555986 throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpOldDiaMatrix);
59565987 }
59575988 if(tmpMatrixBC==NULL){
59585989 throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpMatrixBC);
59595990 }
5991+ if(tmpVectorBC==NULL){
5992+ throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpVectorBC);
5993+ }
59605994 #endif
59615995 for(int i=0; i<OrbitalType_end; i++){
59625996 for(int j=0; j<OrbitalType_end; j++){
@@ -5978,7 +6012,9 @@ void Cndo2::RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs,
59786012 rotatingMatrix,
59796013 beta,
59806014 diatomicOverlapAOs,
5981- tmpMatrixBC);
6015+ tmpDiatomicOverlapAOs,
6016+ tmpMatrixBC,
6017+ tmpVectorBC);
59826018 /*
59836019 for(int i=0;i<OrbitalType_end;i++){
59846020 for(int j=0;j<OrbitalType_end;j++){
--- a/src/cndo/Cndo2.h
+++ b/src/cndo/Cndo2.h
@@ -201,7 +201,9 @@ protected:
201201 double** tmpRotMat1stDeriv,
202202 double*** tmpRotMat1stDerivs,
203203 double** tmpRotatedDiatomicOverlap,
204- double** tmpMatrix,
204+ double* tmpRotatedDiatomicOverlapVec,
205+ double** tmpMatrixBC,
206+ double* tmpVectorBC,
205207 const MolDS_base_atoms::Atom& atomA,
206208 const MolDS_base_atoms::Atom& atomB) const;
207209 void CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1stDerivs,
@@ -211,7 +213,9 @@ protected:
211213 double** tmpRotMat1stDeriv,
212214 double*** tmpRotMat1stDerivs,
213215 double** tmpRotatedDiatomicOverlap,
214- double** tmpMatrix,
216+ double* tmpRotatedDiatomicOverlapVec,
217+ double** tmpMatrixBC,
218+ double* tmpVectorBC,
215219 int indexAtomA,
216220 int indexAtomB) const;
217221 void CalcDiatomicOverlapAOs2ndDerivatives(double**** overlapAOs2ndDeri,
@@ -280,8 +284,10 @@ private:
280284 std::string errorMessageCalcRotatingMatrixNullRotMatrix;
281285 std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullDiaMatrix;
282286 std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullRotMatrix;
287+ std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpDiaMatrix;
283288 std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpOldDiaMatrix;
284289 std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpMatrixBC;
290+ std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpVectorBC;
285291 std::string errorMessageSetOverlapAOsElementNullDiaMatrix;
286292 std::string errorMessageCalcOverlapAOsDifferentConfigurationsDiffAOs;
287293 std::string errorMessageCalcOverlapAOsDifferentConfigurationsDiffAtoms;
@@ -459,10 +465,12 @@ private:
459465 double const* atomicElectronPopulation,
460466 double const* const* const* const* const* const* twoElecTwoCore,
461467 bool isGuess) const;
462- void RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs,
468+ void RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs,
463469 double const* const* rotatingMatrix,
464- double** oldDiatomicOverlapAOs,
465- double** tmpMatrixBC) const;
470+ double* tmpDiatomicOverlapAOs,
471+ double** tmpOldDiatomicOverlapAOs,
472+ double** tmpMatrixBC,
473+ double* tmpVectorBC) const;
466474 void SetOverlapAOsElement(double** overlapAOs,
467475 double const* const* diatomicOverlapAOs,
468476 const MolDS_base_atoms::Atom& atomA,
--- a/src/mndo/Mndo.cpp
+++ b/src/mndo/Mndo.cpp
@@ -1006,7 +1006,9 @@ void Mndo::MallocTempMatricesEachThreadCalcHessianSCF(double***** diatomicOve
10061006 double**** tmpDiaOverlapAOs1stDerivs,
10071007 double***** tmpDiaOverlapAOs2ndDerivs,
10081008 double*** tmpRotatedDiatomicOverlap,
1009- double*** tmpMatrix) const{
1009+ double** tmpRotatedDiatomicOverlapVec,
1010+ double*** tmpMatrixBC,
1011+ double** tmpVectorBC) const{
10101012 MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs,
10111013 this->molecule->GetNumberAtoms(),
10121014 OrbitalType_end,
@@ -1079,9 +1081,13 @@ void Mndo::MallocTempMatricesEachThreadCalcHessianSCF(double***** diatomicOve
10791081 CartesianType_end);
10801082 MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap,
10811083 OrbitalType_end, OrbitalType_end);
1082- MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix,
1084+ MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec,
1085+ OrbitalType_end*OrbitalType_end);
1086+ MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC,
10831087 OrbitalType_end,
10841088 OrbitalType_end);
1089+ MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC,
1090+ OrbitalType_end*OrbitalType_end);
10851091 }
10861092
10871093 void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverlapAOs1stDerivs,
@@ -1100,7 +1106,9 @@ void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverl
11001106 double**** tmpDiaOverlapAOs1stDerivs,
11011107 double***** tmpDiaOverlapAOs2ndDerivs,
11021108 double*** tmpRotatedDiatomicOverlap,
1103- double*** tmpMatrix) const{
1109+ double** tmpRotatedDiatomicOverlapVec,
1110+ double*** tmpMatrixBC,
1111+ double** tmpVectorBC) const{
11041112 MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs,
11051113 this->molecule->GetNumberAtoms(),
11061114 OrbitalType_end,
@@ -1174,9 +1182,13 @@ void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverl
11741182 MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap,
11751183 OrbitalType_end,
11761184 OrbitalType_end);
1177- MallocerFreer::GetInstance()->Free<double>(tmpMatrix,
1185+ MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec,
1186+ OrbitalType_end*OrbitalType_end);
1187+ MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC,
11781188 OrbitalType_end,
11791189 OrbitalType_end);
1190+ MallocerFreer::GetInstance()->Free<double>(tmpVectorBC,
1191+ OrbitalType_end*OrbitalType_end);
11801192 }
11811193
11821194 // mu and nu is included in atomA' AO.
@@ -1734,7 +1746,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{
17341746 double**** tmpDiaOverlapAOs2ndDerivs = NULL; //sedond derivatives of the diaOverlapAOs. This derivatives are related to the all Cartesian coordinates.
17351747 double** tmpRotMat1stDeriv = NULL;
17361748 double** tmpRotatedDiatomicOverlap = NULL;
1737- double** tmpMatrix = NULL;
1749+ double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm
1750+ double** tmpMatrixBC = NULL; // used in dgemmm
1751+ double* tmpVectorBC = NULL; // used in dgemmm
17381752
17391753 try{
17401754 this->MallocTempMatricesEachThreadCalcHessianSCF(&diatomicOverlapAOs1stDerivs,
@@ -1753,7 +1767,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{
17531767 &tmpDiaOverlapAOs1stDerivs,
17541768 &tmpDiaOverlapAOs2ndDerivs,
17551769 &tmpRotatedDiatomicOverlap,
1756- &tmpMatrix);
1770+ &tmpRotatedDiatomicOverlapVec,
1771+ &tmpMatrixBC,
1772+ &tmpVectorBC);
17571773 #pragma omp for schedule(auto)
17581774 for(int indexAtomA=0; indexAtomA<this->molecule->GetNumberAtoms(); indexAtomA++){
17591775 const Atom& atomA = *this->molecule->GetAtom(indexAtomA);
@@ -1771,7 +1787,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{
17711787 tmpRotMat1stDeriv,
17721788 tmpRotMat1stDerivs,
17731789 tmpRotatedDiatomicOverlap,
1774- tmpMatrix,
1790+ tmpRotatedDiatomicOverlapVec,
1791+ tmpMatrixBC,
1792+ tmpVectorBC,
17751793 indexAtomA,
17761794 indexAtomB);
17771795 this->CalcDiatomicOverlapAOs2ndDerivatives(diatomicOverlapAOs2ndDerivs[indexAtomB],
@@ -1868,7 +1886,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{
18681886 &tmpDiaOverlapAOs1stDerivs,
18691887 &tmpDiaOverlapAOs2ndDerivs,
18701888 &tmpRotatedDiatomicOverlap,
1871- &tmpMatrix);
1889+ &tmpRotatedDiatomicOverlapVec,
1890+ &tmpMatrixBC,
1891+ &tmpVectorBC);
18721892 }// end of omp-region
18731893 // Exception throwing for omp-region
18741894 if(!ompErrors.str().empty()){
@@ -2059,7 +2079,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
20592079 double** tmpDiaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms.
20602080 double** tmpRotMat1stDeriv = NULL;
20612081 double** tmpRotatedDiatomicOverlap = NULL;
2062- double** tmpMatrix = NULL;
2082+ double* tmpRotatedDiatomicOverlapVec = NULL;
2083+ double** tmpMatrixBC = NULL;
2084+ double* tmpVectorBC = NULL;
20632085 try{
20642086 this->MallocTempMatricesStaticFirstOrderFock(&diatomicTwoElecTwoCore1stDerivs,
20652087 &diatomicOverlapAOs1stDerivs,
@@ -2070,7 +2092,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
20702092 MallocerFreer::GetInstance()->Malloc<double>(&tmpDiaOverlapAOs1stDerivInDiaFrame, OrbitalType_end, OrbitalType_end);
20712093 MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
20722094 MallocerFreer::GetInstance()->Malloc<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
2073- MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrix, OrbitalType_end, OrbitalType_end);
2095+ MallocerFreer::GetInstance()->Malloc<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
2096+ MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end);
2097+ MallocerFreer::GetInstance()->Malloc<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end);
20742098 const Atom& atomA = *molecule->GetAtom(indexAtomA);
20752099 int firstAOIndexA = atomA.GetFirstAOIndex();
20762100 int lastAOIndexA = atomA.GetLastAOIndex();
@@ -2096,7 +2120,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
20962120 tmpRotMat1stDeriv,
20972121 tmpRotMat1stDerivs,
20982122 tmpRotatedDiatomicOverlap,
2099- tmpMatrix,
2123+ tmpRotatedDiatomicOverlapVec,
2124+ tmpMatrixBC,
2125+ tmpVectorBC,
21002126 atomA,
21012127 atomB);
21022128
@@ -2200,7 +2226,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
22002226 MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
22012227 //MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
22022228 MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
2203- MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, OrbitalType_end, OrbitalType_end);
2229+ MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
2230+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end);
2231+ MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end);
22042232 throw ex;
22052233 }
22062234 this->FreeTempMatricesStaticFirstOrderFock(&diatomicTwoElecTwoCore1stDerivs,
@@ -2214,7 +2242,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
22142242 MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
22152243 //MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
22162244 MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
2217- MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, OrbitalType_end, OrbitalType_end);
2245+ MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
2246+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end);
2247+ MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end);
22182248
22192249 /*
22202250 printf("staticFirstOrderFock(atomA:%d axis:%s)\n",indexAtomA,CartesianTypeStr(axisA));
@@ -2585,8 +2615,10 @@ void Mndo::CalcForce(const vector<int>& elecStates){
25852615 double** tmpDiaOverlapAOsInDiaFrame = NULL; // diatomic overlapAOs in diatomic frame
25862616 double** tmpDiaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms.
25872617 double** tmpRotMat1stDeriv = NULL;
2588- double** tmpRotatedDiatomicOverlap = NULL;
2589- double** tmpMatrix = NULL;
2618+ double** tmpRotatedDiatomicOverlap = NULL; // used in dgemmm
2619+ double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm
2620+ double** tmpMatrixBC = NULL; // used in dgemmm
2621+ double* tmpVectorBC = NULL; // used in dgemmm
25902622 try{
25912623 this->MallocTempMatricesCalcForce(&diatomicOverlapAOs1stDerivs,
25922624 &diatomicTwoElecTwoCore1stDerivs,
@@ -2596,7 +2628,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){
25962628 &tmpRotMat1stDeriv,
25972629 &tmpRotMat1stDerivs,
25982630 &tmpRotatedDiatomicOverlap,
2599- &tmpMatrix,
2631+ &tmpRotatedDiatomicOverlapVec,
2632+ &tmpMatrixBC,
2633+ &tmpVectorBC,
26002634 &tmpDiatomicTwoElecTwoCore);
26012635
26022636 #pragma omp for schedule(auto)
@@ -2614,7 +2648,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){
26142648 tmpRotMat1stDeriv,
26152649 tmpRotMat1stDerivs,
26162650 tmpRotatedDiatomicOverlap,
2617- tmpMatrix,
2651+ tmpRotatedDiatomicOverlapVec,
2652+ tmpMatrixBC,
2653+ tmpVectorBC,
26182654 atomA,
26192655 atomB);
26202656 // calc. first derivative of two elec two core interaction
@@ -2737,7 +2773,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){
27372773 &tmpRotMat1stDeriv,
27382774 &tmpRotMat1stDerivs,
27392775 &tmpRotatedDiatomicOverlap,
2740- &tmpMatrix,
2776+ &tmpRotatedDiatomicOverlapVec,
2777+ &tmpMatrixBC,
2778+ &tmpVectorBC,
27412779 &tmpDiatomicTwoElecTwoCore);
27422780 } // end of omp-parallelized region
27432781 // Exception throwing for omp-region
@@ -2759,7 +2797,9 @@ void Mndo::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
27592797 double*** tmpRotMat1stDeriv,
27602798 double**** tmpRotMat1stDerivs,
27612799 double*** tmpRotatedDiatomicOverlap,
2762- double*** tmpMatrix,
2800+ double** tmpRotatedDiatomicOverlapVec,
2801+ double*** tmpMatrixBC,
2802+ double** tmpVectorBC,
27632803 double***** tmpDiatomicTwoElecTwoCore) const{
27642804 MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs,
27652805 OrbitalType_end,
@@ -2790,9 +2830,13 @@ void Mndo::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
27902830 MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap,
27912831 OrbitalType_end,
27922832 OrbitalType_end);
2793- MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix,
2833+ MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec,
2834+ OrbitalType_end*OrbitalType_end);
2835+ MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC,
27942836 OrbitalType_end,
27952837 OrbitalType_end);
2838+ MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC,
2839+ OrbitalType_end*OrbitalType_end);
27962840 MallocerFreer::GetInstance()->Malloc<double>(tmpDiatomicTwoElecTwoCore,
27972841 dxy,
27982842 dxy,
@@ -2808,7 +2852,9 @@ void Mndo::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
28082852 double*** tmpRotMat1stDeriv,
28092853 double**** tmpRotMat1stDerivs,
28102854 double*** tmpRotatedDiatomicOverlap,
2811- double*** tmpMatrix,
2855+ double** tmpRotatedDiatomicOverlapVec,
2856+ double*** tmpMatrixBC,
2857+ double** tmpVectorBC,
28122858 double***** tmpDiatomicTwoElecTwoCore) const{
28132859 MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs,
28142860 OrbitalType_end,
@@ -2839,9 +2885,13 @@ void Mndo::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
28392885 MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap,
28402886 OrbitalType_end,
28412887 OrbitalType_end);
2842- MallocerFreer::GetInstance()->Free<double>(tmpMatrix,
2888+ MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec,
2889+ OrbitalType_end*OrbitalType_end);
2890+ MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC,
28432891 OrbitalType_end,
28442892 OrbitalType_end);
2893+ MallocerFreer::GetInstance()->Free<double>(tmpVectorBC,
2894+ OrbitalType_end*OrbitalType_end);
28452895 MallocerFreer::GetInstance()->Free<double>(tmpDiatomicTwoElecTwoCore,
28462896 dxy,
28472897 dxy,
@@ -3463,17 +3513,26 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
34633513 stringstream ompErrors;
34643514 #pragma omp parallel
34653515 {
3466- double**** diatomicTwoElecTwoCore = NULL;
3467- double** tmpRotMat = NULL;
3468- double** tmpMatrixBC = NULL;
3516+ double**** diatomicTwoElecTwoCore = NULL;
3517+ double* tmpDiatomicTwoElecTwoCore = NULL;
3518+ double** tmpRotMat = NULL;
3519+ double** tmpMatrixBC = NULL;
3520+ double* tmpVectorBC = NULL;
34693521 try{
3470- MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3471- MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3472- MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC, dxy*dxy, dxy*dxy);
3522+ MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3523+ MallocerFreer::GetInstance()->Malloc<double>(&tmpDiatomicTwoElecTwoCore, dxy*dxy*dxy*dxy);
3524+ MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3525+ MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC, dxy*dxy, dxy*dxy);
3526+ MallocerFreer::GetInstance()->Malloc<double>(&tmpVectorBC, dxy*dxy*dxy*dxy);
34733527 // note that terms with condition a==b are not needed to calculate.
34743528 #pragma omp for schedule(auto)
34753529 for(int b=a+1; b<totalNumberAtoms; b++){
3476- this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, tmpMatrixBC, a, b);
3530+ this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore,
3531+ tmpDiatomicTwoElecTwoCore,
3532+ tmpRotMat,
3533+ tmpMatrixBC,
3534+ tmpVectorBC,
3535+ a, b);
34773536
34783537 int i=0;
34793538 for(int mu=0; mu<dxy; mu++){
@@ -3498,9 +3557,11 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
34983557 #pragma omp critical
34993558 ex.Serialize(ompErrors);
35003559 }
3501- MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3502- MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3503- MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, dxy*dxy, dxy*dxy);
3560+ MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3561+ MallocerFreer::GetInstance()->Free<double>(&tmpDiatomicTwoElecTwoCore, dxy*dxy*dxy*dxy);
3562+ MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3563+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, dxy*dxy, dxy*dxy);
3564+ MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, dxy*dxy*dxy*dxy);
35043565 } // end of omp-parallelized region
35053566 // Exception throwing for omp-region
35063567 if(!ompErrors.str().empty()){
@@ -3556,8 +3617,10 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
35563617 // Note taht d-orbital cannot be treated,
35573618 // that is, matrix[dxy][dxy][dxy][dxy] cannot be treatable.
35583619 void Mndo::CalcDiatomicTwoElecTwoCore(double**** matrix,
3559- double** tmpRotMat,
3560- double** tmpMatrixBC,
3620+ double* tmpVec,
3621+ double** tmpRotMat,
3622+ double** tmpMatrixBC,
3623+ double* tmpVectorBC,
35613624 int indexAtomA,
35623625 int indexAtomB) const{
35633626 const Atom& atomA = *this->molecule->GetAtom(indexAtomA);
@@ -3601,7 +3664,7 @@ void Mndo::CalcDiatomicTwoElecTwoCore(double**** matrix,
36013664 }
36023665 // rotate matirix into the space frame
36033666 this->CalcRotatingMatrix(tmpRotMat, atomA, atomB);
3604- this->RotateDiatomicTwoElecTwoCoreToSpaceFrame(matrix, tmpRotMat, tmpMatrixBC);
3667+ this->RotateDiatomicTwoElecTwoCoreToSpaceFrame(matrix, tmpVec, tmpRotMat, tmpMatrixBC, tmpVectorBC);
36053668
36063669 /*
36073670 this->OutputLog("(mu, nu | lambda, sigma) matrix\n");
@@ -3800,8 +3863,10 @@ void Mndo::CalcDiatomicTwoElecTwoCore2ndDerivatives(double****** matrix,
38003863 // Rotate 4-dimensional matrix from diatomic frame to space frame
38013864 // Note tha in this method d-orbitals can not be treatable.
38023865 void Mndo::RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix,
3866+ double* tmpVec,
38033867 double const* const* rotatingMatrix,
3804- double** tmpMatrixBC) const{
3868+ double** tmpMatrixBC,
3869+ double* tmpVectorBC) const{
38053870 double oldMatrix[dxy][dxy][dxy][dxy];
38063871 MolDS_wrappers::Blas::GetInstance()->Dcopy(dxy*dxy*dxy*dxy, &matrix[0][0][0][0], &oldMatrix[0][0][0][0]);
38073872
@@ -3838,7 +3903,9 @@ void Mndo::RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix,
38383903 &ptrTwiceRotatingMatrix[0],
38393904 beta,
38403905 &ptrMatrix[0],
3841- tmpMatrixBC);
3906+ tmpVec,
3907+ tmpMatrixBC,
3908+ tmpVectorBC);
38423909
38433910 /*
38443911 // rotate (slow algorithm)
@@ -3886,7 +3953,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
38863953 double** twiceRotatingMatrixDerivB = NULL;
38873954 double** oldMatrix = NULL;
38883955 double** rotatedMatrix = NULL;
3956+ double* tmpRotatedVec = NULL;
38893957 double** tmpMatrix = NULL;
3958+ double* tmpVector = NULL;
38903959 double** ptrDiatomic = NULL;
38913960 try{
38923961 this->MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(&twiceRotatingMatrix,
@@ -3894,7 +3963,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
38943963 &twiceRotatingMatrixDerivB,
38953964 &oldMatrix,
38963965 &rotatedMatrix,
3966+ &tmpRotatedVec,
38973967 &tmpMatrix,
3968+ &tmpVector,
38983969 &ptrDiatomic);
38993970 for(int mu=0; mu<dxy; mu++){
39003971 for(int nu=0; nu<dxy; nu++){
@@ -3937,7 +4008,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
39374008 twiceRotatingMatrix,
39384009 beta,
39394010 rotatedMatrix,
3940- tmpMatrix);
4011+ tmpRotatedVec,
4012+ tmpMatrix,
4013+ tmpVector);
39414014 alpha = 1.0;
39424015 beta = 1.0;
39434016 MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
@@ -3950,7 +4023,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
39504023 twiceRotatingMatrix,
39514024 beta,
39524025 rotatedMatrix,
3953- tmpMatrix);
4026+ tmpRotatedVec,
4027+ tmpMatrix,
4028+ tmpVector);
39544029 MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
39554030 isColumnMajorOldMatrix,
39564031 !isColumnMajorTwiceRotatingMatrix,
@@ -3961,7 +4036,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
39614036 twiceRotatingMatrix,
39624037 beta,
39634038 rotatedMatrix,
3964- tmpMatrix);
4039+ tmpRotatedVec,
4040+ tmpMatrix,
4041+ tmpVector);
39654042 MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
39664043 isColumnMajorOldMatrix,
39674044 !isColumnMajorTwiceRotatingMatrix,
@@ -3972,7 +4049,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
39724049 twiceRotatingMatrixDerivA,
39734050 beta,
39744051 rotatedMatrix,
3975- tmpMatrix);
4052+ tmpRotatedVec,
4053+ tmpMatrix,
4054+ tmpVector);
39764055 MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
39774056 isColumnMajorOldMatrix,
39784057 !isColumnMajorTwiceRotatingMatrix,
@@ -3983,7 +4062,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
39834062 twiceRotatingMatrixDerivB,
39844063 beta,
39854064 rotatedMatrix,
3986- tmpMatrix);
4065+ tmpRotatedVec,
4066+ tmpMatrix,
4067+ tmpVector);
39874068
39884069 MolDS_wrappers::Blas::GetInstance()->Dcopy(dxy*dxy*dxy*dxy,
39894070 &rotatedMatrix[0][0] , incrementOne,
@@ -3996,7 +4077,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
39964077 &twiceRotatingMatrixDerivB,
39974078 &oldMatrix,
39984079 &rotatedMatrix,
4080+ &tmpRotatedVec,
39994081 &tmpMatrix,
4082+ &tmpVector,
40004083 &ptrDiatomic);
40014084 throw ex;
40024085 }
@@ -4005,7 +4088,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
40054088 &twiceRotatingMatrixDerivB,
40064089 &oldMatrix,
40074090 &rotatedMatrix,
4091+ &tmpRotatedVec,
40084092 &tmpMatrix,
4093+ &tmpVector,
40094094 &ptrDiatomic);
40104095
40114096 /*
@@ -4067,14 +4152,18 @@ void Mndo::MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twi
40674152 double*** twiceRotatingMatrixDerivB,
40684153 double*** oldMatrix,
40694154 double*** rotatedMatrix,
4155+ double** tmpRotatedVec,
40704156 double*** tmpMatrix,
4157+ double** tmpVector,
40714158 double*** ptrDiatomic) const{
40724159 MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy);
40734160 MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrixDerivA, dxy*dxy, dxy*dxy);
40744161 MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrixDerivB, dxy*dxy, dxy*dxy);
40754162 MallocerFreer::GetInstance()->Malloc<double>(oldMatrix, dxy*dxy, dxy*dxy);
40764163 MallocerFreer::GetInstance()->Malloc<double>(rotatedMatrix, dxy*dxy, dxy*dxy);
4164+ MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedVec, dxy*dxy*dxy*dxy);
40774165 MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, dxy*dxy, dxy*dxy);
4166+ MallocerFreer::GetInstance()->Malloc<double>(tmpVector, dxy*dxy*dxy*dxy);
40784167 MallocerFreer::GetInstance()->Malloc<double*>(ptrDiatomic, dxy*dxy);
40794168 }
40804169
@@ -4083,14 +4172,18 @@ void Mndo::FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twice
40834172 double*** twiceRotatingMatrixDerivB,
40844173 double*** oldMatrix,
40854174 double*** rotatedMatrix,
4175+ double** tmpRotatedVec,
40864176 double*** tmpMatrix,
4177+ double** tmpVector,
40874178 double*** ptrDiatomic) const{
40884179 MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy);
40894180 MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrixDerivA, dxy*dxy, dxy*dxy);
40904181 MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrixDerivB, dxy*dxy, dxy*dxy);
40914182 MallocerFreer::GetInstance()->Free<double>(oldMatrix, dxy*dxy, dxy*dxy);
4092- MallocerFreer::GetInstance()->Free<double>(rotatedMatrix, dxy*dxy, dxy*dxy);
4183+ MallocerFreer::GetInstance()->Free<double>(rotatedMatrix, dxy*dxy, dxy*dxy);
4184+ MallocerFreer::GetInstance()->Free<double>(tmpRotatedVec, dxy*dxy*dxy*dxy);
40934185 MallocerFreer::GetInstance()->Free<double>(tmpMatrix, dxy*dxy, dxy*dxy);
4186+ MallocerFreer::GetInstance()->Free<double>(tmpVector, dxy*dxy*dxy*dxy);
40944187 MallocerFreer::GetInstance()->Free<double*>(ptrDiatomic, dxy*dxy);
40954188 }
40964189
--- a/src/mndo/Mndo.h
+++ b/src/mndo/Mndo.h
@@ -164,7 +164,9 @@ private:
164164 double**** tmpDiaOverlapAOs1stDerivs,
165165 double***** tmpDiaOverlapAOs2ndDerivs,
166166 double*** tmpRotatedDiatomicOverlap,
167- double*** tmpMatrix) const;
167+ double** tmpRotatedDiatomicOverlapVec,
168+ double*** tmpMatrixBC,
169+ double** tmpVectorBC) const;
168170 void FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverlapAOs1stDerivs,
169171 double****** diatomicOverlapAOs2ndDerivs,
170172 double******* diatomicTwoElecTwoCore1stDerivs,
@@ -181,7 +183,9 @@ private:
181183 double**** tmpDiaOverlapAOs1stDerivs,
182184 double***** tmpDiaOverlapAOs2ndDerivs,
183185 double*** tmpRotatedDiatomicOverlap,
184- double*** tmpMatrix) const;
186+ double** tmpRotatedDiatomicOverlapVec,
187+ double*** tmpMatrixBC,
188+ double** tmpVectorBC) const;
185189 double GetAuxiliaryHessianElement1(int mu,
186190 int nu,
187191 int indexAtomA,
@@ -298,8 +302,10 @@ private:
298302 double const* const* const* const* const* diatomicTwoElecTwoCore1stDerivatives,
299303 MolDS_base::CartesianType axisA) const;
300304 void CalcDiatomicTwoElecTwoCore(double**** matrix,
305+ double* tmpVec,
301306 double** tmpRotMat,
302307 double** tmpMatrixBC,
308+ double* tmpVectorBC,
303309 int indexAtomA,
304310 int indexAtomB) const;
305311 void CalcDiatomicTwoElecTwoCore1stDerivatives(double***** matrix,
@@ -317,8 +323,10 @@ private:
317323 int indexAtomA,
318324 int indexAtomB) const;
319325 void RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix,
326+ double* tmpVec,
320327 double const* const* rotatingMatrix,
321- double** tmpMatrixBC) const;
328+ double** tmpMatrixBC,
329+ double* tmpVectorBC) const;
322330 void RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(double***** matrix,
323331 double const* const* const* const* diatomicTwoElecTwoCore,
324332 double const* const* rotatingMatrix,
@@ -334,14 +342,18 @@ private:
334342 double*** twiceRotatingMatrixDerivB,
335343 double*** oldMatrix,
336344 double*** rotatedMatrix,
345+ double** tmpRotatedVec,
337346 double*** tmpMatrix,
347+ double** tmpVector,
338348 double*** ptrDiatomic) const;
339349 void FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix,
340350 double*** twiceRotatingMatrixDerivA,
341351 double*** twiceRotatingMatrixDerivB,
342352 double*** oldMatrix,
343353 double*** rotatedMatrix,
354+ double** tmpRotatedVec,
344355 double*** tmpMatrix,
356+ double** tmpVector,
345357 double*** ptrDiatomic) const;
346358 double GetNddoRepulsionIntegral(const MolDS_base_atoms::Atom& atomA,
347359 MolDS_base::OrbitalType mu,
@@ -387,7 +399,9 @@ private:
387399 double*** tmpRotMat1stDeriv,
388400 double**** tmpRotMat1stDerivs,
389401 double*** tmpRotatedDiatomicOverlap,
390- double*** tmpMatrix,
402+ double** tmpRotatedDiatomicOverlapVec,
403+ double*** tmpMatrixBC,
404+ double** tmpVectorBC,
391405 double***** tmpDiatomicTwoElecTwoCore) const;
392406 void FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
393407 double****** diatomicTwoElecTwoCore1stDerivs,
@@ -397,7 +411,9 @@ private:
397411 double*** tmpRotMat1stDeriv,
398412 double**** tmpRotMat1stDerivs,
399413 double*** tmpRotatedDiatomicOverlap,
400- double*** tmpMatrix,
414+ double** tmpRotatedDiatomicOverlapVec,
415+ double*** tmpMatrixBC,
416+ double** tmpVectorBC,
401417 double***** tmpDiatomicTwoElecTwoCore) const;
402418 void CalcForceSCFElecCoreAttractionPart(double* force,
403419 int indexAtomA,
--- a/src/wrappers/Blas.cpp
+++ b/src/wrappers/Blas.cpp
@@ -358,9 +358,65 @@ void Blas::Dgemm(bool isColumnMajorMatrixA,
358358 double const* const* matrixB,
359359 double beta,
360360 double** matrixC) const{
361+ double* tmpC;
362+#ifdef __INTEL_COMPILER
363+ tmpC = (double*)mkl_malloc( sizeof(double)*m*n, 16 );
364+#else
365+ tmpC = (double*)malloc( sizeof(double)*m*n);
366+#endif
367+ this->Dgemm(isColumnMajorMatrixA,
368+ isColumnMajorMatrixB,
369+ isColumnMajorMatrixC,
370+ m, n, k,
371+ alpha,
372+ matrixA,
373+ matrixB,
374+ beta,
375+ matrixC,
376+ tmpC);
377+
378+#ifdef __INTEL_COMPILER
379+ mkl_free(tmpC);
380+#else
381+ free(tmpC);
382+#endif
383+}
384+
385+// matrixC = alpha*matrixA*matrixB + beta*matrixC
386+// matrixA: m*k-matrix
387+// matrixB: k*n-matrix
388+// matrixC: m*n-matrix (matrixC[m][n] in row-major (C/C++ style))
389+// tmpC: temporary 1-dimensional m*n-array for matrixC
390+void Blas::Dgemm(bool isColumnMajorMatrixA,
391+ bool isColumnMajorMatrixB,
392+ molds_blas_int m, molds_blas_int n, molds_blas_int k,
393+ double alpha,
394+ double const* const* matrixA,
395+ double const* const* matrixB,
396+ double beta,
397+ double** matrixC,
398+ double* tmpC) const{
399+ bool isColumnMajorMatrixC = false;
400+ this->Dgemm(isColumnMajorMatrixA, isColumnMajorMatrixB, isColumnMajorMatrixC,m, n, k, alpha, matrixA, matrixB, beta, matrixC, tmpC);
401+}
402+
403+// matrixC = alpha*matrixA*matrixB + beta*matrixC
404+// matrixA: m*k-matrix
405+// matrixB: k*n-matrix
406+// matrixC: m*n-matrix
407+// tmpC: temporary 1-dimensional m*n-array for matrixC
408+void Blas::Dgemm(bool isColumnMajorMatrixA,
409+ bool isColumnMajorMatrixB,
410+ bool isColumnMajorMatrixC,
411+ molds_blas_int m, molds_blas_int n, molds_blas_int k,
412+ double alpha,
413+ double const* const* matrixA,
414+ double const* const* matrixB,
415+ double beta,
416+ double** matrixC,
417+ double* tmpC) const{
361418 double* a = const_cast<double*>(&matrixA[0][0]);
362419 double* b = const_cast<double*>(&matrixB[0][0]);
363-// double* c = &matrixC[0][0];
364420
365421 molds_blas_int lda;
366422 #ifdef __FCC_VERSION
@@ -410,12 +466,6 @@ void Blas::Dgemm(bool isColumnMajorMatrixA,
410466 }
411467 #endif
412468
413- double* tmpC;
414-#ifdef __INTEL_COMPILER
415- tmpC = (double*)mkl_malloc( sizeof(double)*m*n, 16 );
416-#else
417- tmpC = (double*)malloc( sizeof(double)*m*n);
418-#endif
419469 molds_blas_int ldc = m;
420470 if(isColumnMajorMatrixC){
421471 this->Dcopy(m*n, &matrixC[0][0], tmpC);
@@ -444,11 +494,6 @@ void Blas::Dgemm(bool isColumnMajorMatrixA,
444494 }
445495 }
446496 }
447-#ifdef __INTEL_COMPILER
448- mkl_free(tmpC);
449-#else
450- free(tmpC);
451-#endif
452497 }
453498
454499 // matrixD = matrixA*matrixB*matrixC
@@ -531,6 +576,36 @@ void Blas::Dgemmm(bool isColumnMajorMatrixA,
531576 this->Dgemm(isColumnMajorMatrixA, isColumnMajorMatrixBC, m, n, k, alpha, matrixA, tmpMatrixBC, beta, matrixD );
532577 }
533578
579+// matrixD = alpha*matrixA*matrixB*matrixC + beta*matrixD
580+// matrixA: m*k-matrix
581+// matrixB: k*l-matrix
582+// matrixC: l*n-matrix
583+// matrixD: m*n-matrix (matrixC[m][n] in row-major (C/C++ style))
584+// tmpMatrixBC is temporary calculated matrix in row-major, (C/C++ style)
585+// tmpMatrixBC = matrixB*matrixC
586+// tmpVectorBC is temporary 1 dimensional k*n-array for matrixBC
587+// tmpVectorD is temporary 1 dimensional m*n-array for matrixD
588+void Blas::Dgemmm(bool isColumnMajorMatrixA,
589+ bool isColumnMajorMatrixB,
590+ bool isColumnMajorMatrixC,
591+ molds_blas_int m, molds_blas_int n, molds_blas_int k, molds_blas_int l,
592+ double alpha,
593+ double const* const* matrixA,
594+ double const* const* matrixB,
595+ double const* const* matrixC,
596+ double beta,
597+ double** matrixD,
598+ double* tmpVectorD,
599+ double** tmpMatrixBC,
600+ double* tmpVectorBC) const{
601+
602+ double alphaBC = 1.0;
603+ double betaBC = 0.0;
604+ bool isColumnMajorMatrixBC = false;
605+ this->Dgemm(isColumnMajorMatrixB, isColumnMajorMatrixC, k, n, l, alphaBC, matrixB, matrixC, betaBC, tmpMatrixBC, tmpVectorBC);
606+ this->Dgemm(isColumnMajorMatrixA, isColumnMajorMatrixBC, m, n, k, alpha, matrixA, tmpMatrixBC, beta, matrixD, tmpVectorD);
607+}
608+
534609 // matrixC = matrixA*matrixA^T
535610 // matrixA: n*k-matrix
536611 // matrixC: n*n-matrix,symmetric (Use the upper triangular part, and copy it to the lower part.)
--- a/src/wrappers/Blas.h
+++ b/src/wrappers/Blas.h
@@ -102,6 +102,25 @@ public:
102102 double const* const* matrixB,
103103 double beta,
104104 double** matrixC) const;
105+ void Dgemm(bool isColumnMajorMatrixA,
106+ bool isColumnMajorMatrixB,
107+ molds_blas_int m, molds_blas_int n, molds_blas_int k,
108+ double alpha,
109+ double const* const* matrixA,
110+ double const* const* matrixB,
111+ double beta,
112+ double** matrixC,
113+ double* tmpC) const;
114+ void Dgemm(bool isColumnMajorMatrixA,
115+ bool isColumnMajorMatrixB,
116+ bool isColumnMajorMatrixC,
117+ molds_blas_int m, molds_blas_int n, molds_blas_int k,
118+ double alpha,
119+ double const* const* matrixA,
120+ double const* const* matrixB,
121+ double beta,
122+ double** matrixC,
123+ double* tmpC) const;
105124 void Dgemmm(molds_blas_int m, molds_blas_int n, molds_blas_int k, molds_blas_int l,
106125 double const* const* matrixA,
107126 double const* const* matrixB,
@@ -128,6 +147,19 @@ public:
128147 double beta,
129148 double** matrixD,
130149 double** tmpMatrixBC) const;
150+ void Dgemmm(bool isColumnMajorMatrixA,
151+ bool isColumnMajorMatrixB,
152+ bool isColumnMajorMatrixC,
153+ molds_blas_int m, molds_blas_int n, molds_blas_int k, molds_blas_int l,
154+ double alpha,
155+ double const* const* matrixA,
156+ double const* const* matrixB,
157+ double const* const* matrixC,
158+ double beta,
159+ double** matrixD,
160+ double* tmpVectorD,
161+ double** tmpMatrixBC,
162+ double* tmpVectorBC) const;
131163 void Dsyrk(molds_blas_int n, molds_blas_int k,
132164 double const *const* matrixA,
133165 double** matrixC)const;
--- a/src/zindo/ZindoS.cpp
+++ b/src/zindo/ZindoS.cpp
@@ -3692,8 +3692,10 @@ void ZindoS::CalcForce(const vector<int>& elecStates){
36923692 double** tmpRotMat = NULL; // rotating Matrix from the diatomic frame to space fixed frame.
36933693 double** tmpRotMat1stDeriv = NULL;
36943694 double*** tmpRotMat1stDerivs = NULL; // first derivatives of the rotMat.
3695- double** tmpRotatedDiatomicOverlap = NULL;
3696- double** tmpMatrix = NULL;
3695+ double** tmpRotatedDiatomicOverlap = NULL; // used in dgemmm
3696+ double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm
3697+ double** tmpMatrixBC = NULL; // used in dgemmm
3698+ double* tmpVectorBC = NULL; // used in dgemmm
36973699 try{
36983700 MallocTempMatricesCalcForce(&diatomicOverlapAOs1stDerivs,
36993701 &diatomicTwoElecTwoCore1stDerivs,
@@ -3703,7 +3705,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){
37033705 &tmpRotMat1stDeriv,
37043706 &tmpRotMat1stDerivs,
37053707 &tmpRotatedDiatomicOverlap,
3706- &tmpMatrix);
3708+ &tmpRotatedDiatomicOverlapVec,
3709+ &tmpMatrixBC,
3710+ &tmpVectorBC);
37073711 #pragma omp for schedule(auto)
37083712 for(int b=0; b<this->molecule->GetNumberAtoms(); b++){
37093713 if(a == b){continue;}
@@ -3720,7 +3724,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){
37203724 tmpRotMat1stDeriv,
37213725 tmpRotMat1stDerivs,
37223726 tmpRotatedDiatomicOverlap,
3723- tmpMatrix,
3727+ tmpRotatedDiatomicOverlapVec,
3728+ tmpMatrixBC,
3729+ tmpVectorBC,
37243730 atomA,
37253731 atomB);
37263732
@@ -3842,7 +3848,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){
38423848 &tmpRotMat1stDeriv,
38433849 &tmpRotMat1stDerivs,
38443850 &tmpRotatedDiatomicOverlap,
3845- &tmpMatrix);
3851+ &tmpRotatedDiatomicOverlapVec,
3852+ &tmpMatrixBC,
3853+ &tmpVectorBC);
38463854 } //end of omp-parallelized region
38473855 // Exception throwing for omp-region
38483856 if(!ompErrors.str().empty()){
@@ -3945,7 +3953,9 @@ void ZindoS::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
39453953 double*** tmpRotMat1stDeriv,
39463954 double**** tmpRotMat1stDerivs,
39473955 double*** tmpRotatedDiatomicOverlap,
3948- double*** tmpMatrix) const{
3956+ double** tmpRotatedDiatomicOverlapVec,
3957+ double*** tmpMatrixBC,
3958+ double** tmpVectorBC) const{
39493959 MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39503960 MallocerFreer::GetInstance()->Malloc<double>(diatomicTwoElecTwoCore1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39513961 MallocerFreer::GetInstance()->Malloc<double>(tmpDiaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end);
@@ -3954,7 +3964,9 @@ void ZindoS::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
39543964 MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
39553965 MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39563966 MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
3957- MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, OrbitalType_end, OrbitalType_end);
3967+ MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
3968+ MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC, OrbitalType_end, OrbitalType_end);
3969+ MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC, OrbitalType_end*OrbitalType_end);
39583970 }
39593971
39603972 void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
@@ -3965,7 +3977,9 @@ void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
39653977 double*** tmpRotMat1stDeriv,
39663978 double**** tmpRotMat1stDerivs,
39673979 double*** tmpRotatedDiatomicOverlap,
3968- double*** tmpMatrix) const{
3980+ double** tmpRotatedDiatomicOverlapVec,
3981+ double*** tmpMatrixBC,
3982+ double** tmpVectorBC) const{
39693983 MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39703984 MallocerFreer::GetInstance()->Free<double>(diatomicTwoElecTwoCore1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39713985 MallocerFreer::GetInstance()->Free<double>(tmpDiaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end);
@@ -3974,7 +3988,9 @@ void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
39743988 MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
39753989 MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39763990 MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
3977- MallocerFreer::GetInstance()->Free<double>(tmpMatrix, OrbitalType_end, OrbitalType_end);
3991+ MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
3992+ MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC, OrbitalType_end, OrbitalType_end);
3993+ MallocerFreer::GetInstance()->Free<double>(tmpVectorBC, OrbitalType_end*OrbitalType_end);
39783994 }
39793995
39803996 void ZindoS::CalcForceExcitedStaticPart(double* force,
--- a/src/zindo/ZindoS.h
+++ b/src/zindo/ZindoS.h
@@ -279,7 +279,9 @@ private:
279279 double*** tmpRotMat1stDeriv,
280280 double**** tmpRotMat1stDerivs,
281281 double*** tmpRotatedDiatomicOverlap,
282- double*** tmpMatrix) const;
282+ double** tmpRotatedDiatomicOverlapVec,
283+ double*** tmpMatrixBC,
284+ double** tmpVectorBC) const;
283285 void FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
284286 double**** diatomicTwoElecTwoCore1stDerivs,
285287 double*** tmpDiaOverlapAOsInDiaFrame,
@@ -288,7 +290,9 @@ private:
288290 double*** tmpRotMat1stDeriv,
289291 double**** tmpRotMat1stDerivs,
290292 double*** tmpRotatedDiatomicOverlap,
291- double*** tmpMatrix) const;
293+ double** tmpRotatedDiatomicOverlapVec,
294+ double*** tmpMatrixBC,
295+ double** tmpVectorBC) const;
292296 void CalcForceExcitedStaticPart(double* force,
293297 int elecStateIndex,
294298 int indexAtomA,