• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revision3a4ea2bbc5b21d2264be15a420010fa15335fe74 (tree)
Time2013-10-16 10:34:51
AuthorMikiya Fujii <mikiya.fujii@gmai...>
CommiterMikiya Fujii

Log Message

Refactoring of using DGEMM and DGEMMM to cache temporary memory. #32299

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1547 1136aad2-a195-0410-b898-f5ea1d11b9d8

Change Summary

Incremental Difference

--- a/src/cndo/Cndo2.cpp
+++ b/src/cndo/Cndo2.cpp
@@ -4034,7 +4034,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
40344034 double** tmpRotMat1stDeriv,
40354035 double*** tmpRotMat1stDerivs,
40364036 double** tmpRotatedDiatomicOverlap,
4037+ double* tmpRotatedDiatomicOverlapVec,
40374038 double** tmpMatrixBC,
4039+ double* tmpVectorBC,
40384040 const Atom& atomA,
40394041 const Atom& atomB) const{
40404042 double cartesian[CartesianType_end] = {atomA.GetXyz()[XAxis] - atomB.GetXyz()[XAxis],
@@ -4069,7 +4071,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
40694071 tmpRotMat,
40704072 beta,
40714073 tmpRotatedDiatomicOverlap,
4072- tmpMatrixBC);
4074+ tmpRotatedDiatomicOverlapVec,
4075+ tmpMatrixBC,
4076+ tmpVectorBC);
40734077 alpha = 1.0;
40744078 beta = 1.0;
40754079 MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix,
@@ -4082,7 +4086,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
40824086 tmpRotMat,
40834087 beta,
40844088 tmpRotatedDiatomicOverlap,
4085- tmpMatrixBC);
4089+ tmpRotatedDiatomicOverlapVec,
4090+ tmpMatrixBC,
4091+ tmpVectorBC);
40864092 MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix,
40874093 isColumnMajorDiaOverlapAOs,
40884094 !isColumnMajorRotatingMatrix,
@@ -4093,7 +4099,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
40934099 tmpRotMat1stDeriv,
40944100 beta,
40954101 tmpRotatedDiatomicOverlap,
4096- tmpMatrixBC);
4102+ tmpRotatedDiatomicOverlapVec,
4103+ tmpMatrixBC,
4104+ tmpVectorBC);
40974105 MolDS_wrappers::Blas::GetInstance()->Dcopy(OrbitalType_end*OrbitalType_end,
40984106 &tmpRotatedDiatomicOverlap[0][0], incrementOne,
40994107 &diatomicOverlapAOs1stDerivs[0][0][c], CartesianType_end);
@@ -4137,7 +4145,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
41374145 double** tmpRotMat1stDeriv,
41384146 double*** tmpRotMat1stDerivs,
41394147 double** tmpRotatedDiatomicOverlap,
4148+ double* tmpRotatedDiatomicOverlapVec,
41404149 double** tmpMatrixBC,
4150+ double* tmpVectorBC,
41414151 int indexAtomA,
41424152 int indexAtomB) const{
41434153 this->CalcDiatomicOverlapAOs1stDerivatives(diatomicOverlapAOs1stDerivs,
@@ -4147,7 +4157,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
41474157 tmpRotMat1stDeriv,
41484158 tmpRotMat1stDerivs,
41494159 tmpRotatedDiatomicOverlap,
4160+ tmpRotatedDiatomicOverlapVec,
41504161 tmpMatrixBC,
4162+ tmpVectorBC,
41514163 *this->molecule->GetAtom(indexAtomA),
41524164 *this->molecule->GetAtom(indexAtomB));
41534165 }
--- a/src/cndo/Cndo2.h
+++ b/src/cndo/Cndo2.h
@@ -201,7 +201,9 @@ protected:
201201 double** tmpRotMat1stDeriv,
202202 double*** tmpRotMat1stDerivs,
203203 double** tmpRotatedDiatomicOverlap,
204+ double* tmpRotatedDiatomicOverlapVec,
204205 double** tmpMatrixBC,
206+ double* tmpVectorBC,
205207 const MolDS_base_atoms::Atom& atomA,
206208 const MolDS_base_atoms::Atom& atomB) const;
207209 void CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1stDerivs,
@@ -211,7 +213,9 @@ protected:
211213 double** tmpRotMat1stDeriv,
212214 double*** tmpRotMat1stDerivs,
213215 double** tmpRotatedDiatomicOverlap,
216+ double* tmpRotatedDiatomicOverlapVec,
214217 double** tmpMatrixBC,
218+ double* tmpVectorBC,
215219 int indexAtomA,
216220 int indexAtomB) const;
217221 void CalcDiatomicOverlapAOs2ndDerivatives(double**** overlapAOs2ndDeri,
--- a/src/mndo/Mndo.cpp
+++ b/src/mndo/Mndo.cpp
@@ -1006,7 +1006,9 @@ void Mndo::MallocTempMatricesEachThreadCalcHessianSCF(double***** diatomicOve
10061006 double**** tmpDiaOverlapAOs1stDerivs,
10071007 double***** tmpDiaOverlapAOs2ndDerivs,
10081008 double*** tmpRotatedDiatomicOverlap,
1009- double*** tmpMatrix) const{
1009+ double** tmpRotatedDiatomicOverlapVec,
1010+ double*** tmpMatrixBC,
1011+ double** tmpVectorBC) const{
10101012 MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs,
10111013 this->molecule->GetNumberAtoms(),
10121014 OrbitalType_end,
@@ -1079,9 +1081,13 @@ void Mndo::MallocTempMatricesEachThreadCalcHessianSCF(double***** diatomicOve
10791081 CartesianType_end);
10801082 MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap,
10811083 OrbitalType_end, OrbitalType_end);
1082- MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix,
1084+ MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec,
1085+ OrbitalType_end*OrbitalType_end);
1086+ MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC,
10831087 OrbitalType_end,
10841088 OrbitalType_end);
1089+ MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC,
1090+ OrbitalType_end*OrbitalType_end);
10851091 }
10861092
10871093 void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverlapAOs1stDerivs,
@@ -1100,7 +1106,9 @@ void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverl
11001106 double**** tmpDiaOverlapAOs1stDerivs,
11011107 double***** tmpDiaOverlapAOs2ndDerivs,
11021108 double*** tmpRotatedDiatomicOverlap,
1103- double*** tmpMatrix) const{
1109+ double** tmpRotatedDiatomicOverlapVec,
1110+ double*** tmpMatrixBC,
1111+ double** tmpVectorBC) const{
11041112 MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs,
11051113 this->molecule->GetNumberAtoms(),
11061114 OrbitalType_end,
@@ -1174,9 +1182,13 @@ void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverl
11741182 MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap,
11751183 OrbitalType_end,
11761184 OrbitalType_end);
1177- MallocerFreer::GetInstance()->Free<double>(tmpMatrix,
1185+ MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec,
1186+ OrbitalType_end*OrbitalType_end);
1187+ MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC,
11781188 OrbitalType_end,
11791189 OrbitalType_end);
1190+ MallocerFreer::GetInstance()->Free<double>(tmpVectorBC,
1191+ OrbitalType_end*OrbitalType_end);
11801192 }
11811193
11821194 // mu and nu is included in atomA' AO.
@@ -1734,7 +1746,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{
17341746 double**** tmpDiaOverlapAOs2ndDerivs = NULL; //sedond derivatives of the diaOverlapAOs. This derivatives are related to the all Cartesian coordinates.
17351747 double** tmpRotMat1stDeriv = NULL;
17361748 double** tmpRotatedDiatomicOverlap = NULL;
1737- double** tmpMatrixBC = NULL;
1749+ double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm
1750+ double** tmpMatrixBC = NULL; // used in dgemmm
1751+ double* tmpVectorBC = NULL; // used in dgemmm
17381752
17391753 try{
17401754 this->MallocTempMatricesEachThreadCalcHessianSCF(&diatomicOverlapAOs1stDerivs,
@@ -1753,7 +1767,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{
17531767 &tmpDiaOverlapAOs1stDerivs,
17541768 &tmpDiaOverlapAOs2ndDerivs,
17551769 &tmpRotatedDiatomicOverlap,
1756- &tmpMatrixBC);
1770+ &tmpRotatedDiatomicOverlapVec,
1771+ &tmpMatrixBC,
1772+ &tmpVectorBC);
17571773 #pragma omp for schedule(auto)
17581774 for(int indexAtomA=0; indexAtomA<this->molecule->GetNumberAtoms(); indexAtomA++){
17591775 const Atom& atomA = *this->molecule->GetAtom(indexAtomA);
@@ -1771,7 +1787,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{
17711787 tmpRotMat1stDeriv,
17721788 tmpRotMat1stDerivs,
17731789 tmpRotatedDiatomicOverlap,
1790+ tmpRotatedDiatomicOverlapVec,
17741791 tmpMatrixBC,
1792+ tmpVectorBC,
17751793 indexAtomA,
17761794 indexAtomB);
17771795 this->CalcDiatomicOverlapAOs2ndDerivatives(diatomicOverlapAOs2ndDerivs[indexAtomB],
@@ -1868,7 +1886,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{
18681886 &tmpDiaOverlapAOs1stDerivs,
18691887 &tmpDiaOverlapAOs2ndDerivs,
18701888 &tmpRotatedDiatomicOverlap,
1871- &tmpMatrixBC);
1889+ &tmpRotatedDiatomicOverlapVec,
1890+ &tmpMatrixBC,
1891+ &tmpVectorBC);
18721892 }// end of omp-region
18731893 // Exception throwing for omp-region
18741894 if(!ompErrors.str().empty()){
@@ -2059,7 +2079,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
20592079 double** tmpDiaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms.
20602080 double** tmpRotMat1stDeriv = NULL;
20612081 double** tmpRotatedDiatomicOverlap = NULL;
2082+ double* tmpRotatedDiatomicOverlapVec = NULL;
20622083 double** tmpMatrixBC = NULL;
2084+ double* tmpVectorBC = NULL;
20632085 try{
20642086 this->MallocTempMatricesStaticFirstOrderFock(&diatomicTwoElecTwoCore1stDerivs,
20652087 &diatomicOverlapAOs1stDerivs,
@@ -2070,7 +2092,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
20702092 MallocerFreer::GetInstance()->Malloc<double>(&tmpDiaOverlapAOs1stDerivInDiaFrame, OrbitalType_end, OrbitalType_end);
20712093 MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
20722094 MallocerFreer::GetInstance()->Malloc<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
2095+ MallocerFreer::GetInstance()->Malloc<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
20732096 MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end);
2097+ MallocerFreer::GetInstance()->Malloc<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end);
20742098 const Atom& atomA = *molecule->GetAtom(indexAtomA);
20752099 int firstAOIndexA = atomA.GetFirstAOIndex();
20762100 int lastAOIndexA = atomA.GetLastAOIndex();
@@ -2096,7 +2120,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
20962120 tmpRotMat1stDeriv,
20972121 tmpRotMat1stDerivs,
20982122 tmpRotatedDiatomicOverlap,
2123+ tmpRotatedDiatomicOverlapVec,
20992124 tmpMatrixBC,
2125+ tmpVectorBC,
21002126 atomA,
21012127 atomB);
21022128
@@ -2200,7 +2226,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
22002226 MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
22012227 //MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
22022228 MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
2229+ MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
22032230 MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end);
2231+ MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end);
22042232 throw ex;
22052233 }
22062234 this->FreeTempMatricesStaticFirstOrderFock(&diatomicTwoElecTwoCore1stDerivs,
@@ -2214,7 +2242,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock,
22142242 MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
22152243 //MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
22162244 MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
2245+ MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
22172246 MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end);
2247+ MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end);
22182248
22192249 /*
22202250 printf("staticFirstOrderFock(atomA:%d axis:%s)\n",indexAtomA,CartesianTypeStr(axisA));
@@ -2585,8 +2615,10 @@ void Mndo::CalcForce(const vector<int>& elecStates){
25852615 double** tmpDiaOverlapAOsInDiaFrame = NULL; // diatomic overlapAOs in diatomic frame
25862616 double** tmpDiaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms.
25872617 double** tmpRotMat1stDeriv = NULL;
2588- double** tmpRotatedDiatomicOverlap = NULL;
2589- double** tmpMatrixBC = NULL;
2618+ double** tmpRotatedDiatomicOverlap = NULL; // used in dgemmm
2619+ double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm
2620+ double** tmpMatrixBC = NULL; // used in dgemmm
2621+ double* tmpVectorBC = NULL; // used in dgemmm
25902622 try{
25912623 this->MallocTempMatricesCalcForce(&diatomicOverlapAOs1stDerivs,
25922624 &diatomicTwoElecTwoCore1stDerivs,
@@ -2596,7 +2628,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){
25962628 &tmpRotMat1stDeriv,
25972629 &tmpRotMat1stDerivs,
25982630 &tmpRotatedDiatomicOverlap,
2631+ &tmpRotatedDiatomicOverlapVec,
25992632 &tmpMatrixBC,
2633+ &tmpVectorBC,
26002634 &tmpDiatomicTwoElecTwoCore);
26012635
26022636 #pragma omp for schedule(auto)
@@ -2614,7 +2648,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){
26142648 tmpRotMat1stDeriv,
26152649 tmpRotMat1stDerivs,
26162650 tmpRotatedDiatomicOverlap,
2651+ tmpRotatedDiatomicOverlapVec,
26172652 tmpMatrixBC,
2653+ tmpVectorBC,
26182654 atomA,
26192655 atomB);
26202656 // calc. first derivative of two elec two core interaction
@@ -2737,7 +2773,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){
27372773 &tmpRotMat1stDeriv,
27382774 &tmpRotMat1stDerivs,
27392775 &tmpRotatedDiatomicOverlap,
2776+ &tmpRotatedDiatomicOverlapVec,
27402777 &tmpMatrixBC,
2778+ &tmpVectorBC,
27412779 &tmpDiatomicTwoElecTwoCore);
27422780 } // end of omp-parallelized region
27432781 // Exception throwing for omp-region
@@ -2759,7 +2797,9 @@ void Mndo::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
27592797 double*** tmpRotMat1stDeriv,
27602798 double**** tmpRotMat1stDerivs,
27612799 double*** tmpRotatedDiatomicOverlap,
2800+ double** tmpRotatedDiatomicOverlapVec,
27622801 double*** tmpMatrixBC,
2802+ double** tmpVectorBC,
27632803 double***** tmpDiatomicTwoElecTwoCore) const{
27642804 MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs,
27652805 OrbitalType_end,
@@ -2790,9 +2830,13 @@ void Mndo::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
27902830 MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap,
27912831 OrbitalType_end,
27922832 OrbitalType_end);
2833+ MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec,
2834+ OrbitalType_end*OrbitalType_end);
27932835 MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC,
27942836 OrbitalType_end,
27952837 OrbitalType_end);
2838+ MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC,
2839+ OrbitalType_end*OrbitalType_end);
27962840 MallocerFreer::GetInstance()->Malloc<double>(tmpDiatomicTwoElecTwoCore,
27972841 dxy,
27982842 dxy,
@@ -2808,7 +2852,9 @@ void Mndo::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
28082852 double*** tmpRotMat1stDeriv,
28092853 double**** tmpRotMat1stDerivs,
28102854 double*** tmpRotatedDiatomicOverlap,
2855+ double** tmpRotatedDiatomicOverlapVec,
28112856 double*** tmpMatrixBC,
2857+ double** tmpVectorBC,
28122858 double***** tmpDiatomicTwoElecTwoCore) const{
28132859 MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs,
28142860 OrbitalType_end,
@@ -2839,9 +2885,13 @@ void Mndo::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
28392885 MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap,
28402886 OrbitalType_end,
28412887 OrbitalType_end);
2888+ MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec,
2889+ OrbitalType_end*OrbitalType_end);
28422890 MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC,
28432891 OrbitalType_end,
28442892 OrbitalType_end);
2893+ MallocerFreer::GetInstance()->Free<double>(tmpVectorBC,
2894+ OrbitalType_end*OrbitalType_end);
28452895 MallocerFreer::GetInstance()->Free<double>(tmpDiatomicTwoElecTwoCore,
28462896 dxy,
28472897 dxy,
--- a/src/mndo/Mndo.h
+++ b/src/mndo/Mndo.h
@@ -164,7 +164,9 @@ private:
164164 double**** tmpDiaOverlapAOs1stDerivs,
165165 double***** tmpDiaOverlapAOs2ndDerivs,
166166 double*** tmpRotatedDiatomicOverlap,
167- double*** tmpMatrix) const;
167+ double** tmpRotatedDiatomicOverlapVec,
168+ double*** tmpMatrixBC,
169+ double** tmpVectorBC) const;
168170 void FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverlapAOs1stDerivs,
169171 double****** diatomicOverlapAOs2ndDerivs,
170172 double******* diatomicTwoElecTwoCore1stDerivs,
@@ -181,7 +183,9 @@ private:
181183 double**** tmpDiaOverlapAOs1stDerivs,
182184 double***** tmpDiaOverlapAOs2ndDerivs,
183185 double*** tmpRotatedDiatomicOverlap,
184- double*** tmpMatrix) const;
186+ double** tmpRotatedDiatomicOverlapVec,
187+ double*** tmpMatrixBC,
188+ double** tmpVectorBC) const;
185189 double GetAuxiliaryHessianElement1(int mu,
186190 int nu,
187191 int indexAtomA,
@@ -395,7 +399,9 @@ private:
395399 double*** tmpRotMat1stDeriv,
396400 double**** tmpRotMat1stDerivs,
397401 double*** tmpRotatedDiatomicOverlap,
402+ double** tmpRotatedDiatomicOverlapVec,
398403 double*** tmpMatrixBC,
404+ double** tmpVectorBC,
399405 double***** tmpDiatomicTwoElecTwoCore) const;
400406 void FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
401407 double****** diatomicTwoElecTwoCore1stDerivs,
@@ -405,7 +411,9 @@ private:
405411 double*** tmpRotMat1stDeriv,
406412 double**** tmpRotMat1stDerivs,
407413 double*** tmpRotatedDiatomicOverlap,
414+ double** tmpRotatedDiatomicOverlapVec,
408415 double*** tmpMatrixBC,
416+ double** tmpVectorBC,
409417 double***** tmpDiatomicTwoElecTwoCore) const;
410418 void CalcForceSCFElecCoreAttractionPart(double* force,
411419 int indexAtomA,
--- a/src/zindo/ZindoS.cpp
+++ b/src/zindo/ZindoS.cpp
@@ -3677,8 +3677,10 @@ void ZindoS::CalcForce(const vector<int>& elecStates){
36773677 double** tmpRotMat = NULL; // rotating Matrix from the diatomic frame to space fixed frame.
36783678 double** tmpRotMat1stDeriv = NULL;
36793679 double*** tmpRotMat1stDerivs = NULL; // first derivatives of the rotMat.
3680- double** tmpRotatedDiatomicOverlap = NULL;
3681- double** tmpMatrixBC = NULL;
3680+ double** tmpRotatedDiatomicOverlap = NULL; // used in dgemmm
3681+ double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm
3682+ double** tmpMatrixBC = NULL; // used in dgemmm
3683+ double* tmpVectorBC = NULL; // used in dgemmm
36823684 try{
36833685 MallocTempMatricesCalcForce(&diatomicOverlapAOs1stDerivs,
36843686 &diatomicTwoElecTwoCore1stDerivs,
@@ -3688,7 +3690,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){
36883690 &tmpRotMat1stDeriv,
36893691 &tmpRotMat1stDerivs,
36903692 &tmpRotatedDiatomicOverlap,
3691- &tmpMatrixBC);
3693+ &tmpRotatedDiatomicOverlapVec,
3694+ &tmpMatrixBC,
3695+ &tmpVectorBC);
36923696 #pragma omp for schedule(auto)
36933697 for(int b=0; b<this->molecule->GetNumberAtoms(); b++){
36943698 if(a == b){continue;}
@@ -3705,7 +3709,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){
37053709 tmpRotMat1stDeriv,
37063710 tmpRotMat1stDerivs,
37073711 tmpRotatedDiatomicOverlap,
3712+ tmpRotatedDiatomicOverlapVec,
37083713 tmpMatrixBC,
3714+ tmpVectorBC,
37093715 atomA,
37103716 atomB);
37113717
@@ -3827,7 +3833,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){
38273833 &tmpRotMat1stDeriv,
38283834 &tmpRotMat1stDerivs,
38293835 &tmpRotatedDiatomicOverlap,
3830- &tmpMatrixBC);
3836+ &tmpRotatedDiatomicOverlapVec,
3837+ &tmpMatrixBC,
3838+ &tmpVectorBC);
38313839 } //end of omp-parallelized region
38323840 // Exception throwing for omp-region
38333841 if(!ompErrors.str().empty()){
@@ -3930,7 +3938,9 @@ void ZindoS::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
39303938 double*** tmpRotMat1stDeriv,
39313939 double**** tmpRotMat1stDerivs,
39323940 double*** tmpRotatedDiatomicOverlap,
3933- double*** tmpMatrixBC) const{
3941+ double** tmpRotatedDiatomicOverlapVec,
3942+ double*** tmpMatrixBC,
3943+ double** tmpVectorBC) const{
39343944 MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39353945 MallocerFreer::GetInstance()->Malloc<double>(diatomicTwoElecTwoCore1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39363946 MallocerFreer::GetInstance()->Malloc<double>(tmpDiaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end);
@@ -3939,7 +3949,9 @@ void ZindoS::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
39393949 MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
39403950 MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39413951 MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
3952+ MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
39423953 MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC, OrbitalType_end, OrbitalType_end);
3954+ MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC, OrbitalType_end*OrbitalType_end);
39433955 }
39443956
39453957 void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
@@ -3950,7 +3962,9 @@ void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
39503962 double*** tmpRotMat1stDeriv,
39513963 double**** tmpRotMat1stDerivs,
39523964 double*** tmpRotatedDiatomicOverlap,
3953- double*** tmpMatrixBC) const{
3965+ double** tmpRotatedDiatomicOverlapVec,
3966+ double*** tmpMatrixBC,
3967+ double** tmpVectorBC) const{
39543968 MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39553969 MallocerFreer::GetInstance()->Free<double>(diatomicTwoElecTwoCore1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39563970 MallocerFreer::GetInstance()->Free<double>(tmpDiaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end);
@@ -3959,7 +3973,9 @@ void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
39593973 MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
39603974 MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
39613975 MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end);
3976+ MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end);
39623977 MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC, OrbitalType_end, OrbitalType_end);
3978+ MallocerFreer::GetInstance()->Free<double>(tmpVectorBC, OrbitalType_end*OrbitalType_end);
39633979 }
39643980
39653981 void ZindoS::CalcForceExcitedStaticPart(double* force,
--- a/src/zindo/ZindoS.h
+++ b/src/zindo/ZindoS.h
@@ -279,7 +279,9 @@ private:
279279 double*** tmpRotMat1stDeriv,
280280 double**** tmpRotMat1stDerivs,
281281 double*** tmpRotatedDiatomicOverlap,
282- double*** tmpMatrixBC) const;
282+ double** tmpRotatedDiatomicOverlapVec,
283+ double*** tmpMatrixBC,
284+ double** tmpVectorBC) const;
283285 void FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs,
284286 double**** diatomicTwoElecTwoCore1stDerivs,
285287 double*** tmpDiaOverlapAOsInDiaFrame,
@@ -288,7 +290,9 @@ private:
288290 double*** tmpRotMat1stDeriv,
289291 double**** tmpRotMat1stDerivs,
290292 double*** tmpRotatedDiatomicOverlap,
291- double*** tmpMatrixBC) const;
293+ double** tmpRotatedDiatomicOverlapVec,
294+ double*** tmpMatrixBC,
295+ double** tmpVectorBC) const;
292296 void CalcForceExcitedStaticPart(double* force,
293297 int elecStateIndex,
294298 int indexAtomA,