Revision | 85f1753149951fd114e5b89a4369e19aac083cdf (tree) |
---|---|
Time | 2013-10-16 10:44:17 |
Author | Mikiya Fujii <mikiya.fujii@gmai...> |
Commiter | Mikiya Fujii |
trunk.r1540 is merged to branches/fx10. Refactoring:DGEMM and DGEMMM to cache temporary memory. #32094 #32299
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/branches/fx10@1548 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -182,10 +182,14 @@ void Cndo2::SetMessages(){ | ||
182 | 182 | = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame diatomicOverlapAOs is NULL.\n"; |
183 | 183 | this->errorMessageRotDiaOverlapAOsToSpaceFrameNullRotMatrix |
184 | 184 | = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: rotatingMatrix is NULL.\n"; |
185 | + this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpDiaMatrix | |
186 | + = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: tmpDiatomicOverlapAOs is NULL.\n"; | |
185 | 187 | this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpOldDiaMatrix |
186 | 188 | = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: tmpOldDiatomicOverlapAOs is NULL.\n"; |
187 | 189 | this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpMatrixBC |
188 | 190 | = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: tmpMatrixBC is NULL.\n"; |
191 | + this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpVectorBC | |
192 | + = "Error in cndo::Cndo2::RotateDiatmicOverlapAOsToSpaceFrame: tmpVectorBC is NULL.\n"; | |
189 | 193 | this->errorMessageSetOverlapAOsElementNullDiaMatrix |
190 | 194 | = "Error in cndo::Cndo2::SetOverlapAOsElement: diatomicOverlapAOs is NULL.\n"; |
191 | 195 | this->errorMessageCalcElectronicTransitionDipoleMomentBadState |
@@ -3923,10 +3927,12 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{ | ||
3923 | 3927 | stringstream ompErrors; |
3924 | 3928 | #pragma omp parallel |
3925 | 3929 | { |
3926 | - double** diatomicOverlapAOs = NULL; | |
3927 | - double** rotatingMatrix = NULL; | |
3930 | + double** diatomicOverlapAOs = NULL; | |
3931 | + double** rotatingMatrix = NULL; | |
3932 | + double* tmpDiatomicOverlapAOs = NULL; | |
3928 | 3933 | double** tmpOldDiatomicOverlapAOs = NULL; |
3929 | - double** tmpMatrixBC = NULL; | |
3934 | + double** tmpMatrixBC = NULL; | |
3935 | + double* tmpVectorBC = NULL; | |
3930 | 3936 | try{ |
3931 | 3937 | // malloc |
3932 | 3938 | MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs, |
@@ -3935,19 +3941,23 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{ | ||
3935 | 3941 | MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix, |
3936 | 3942 | OrbitalType_end, |
3937 | 3943 | OrbitalType_end); |
3944 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpDiatomicOverlapAOs, | |
3945 | + OrbitalType_end*OrbitalType_end); | |
3938 | 3946 | MallocerFreer::GetInstance()->Malloc<double>(&tmpOldDiatomicOverlapAOs, |
3939 | 3947 | OrbitalType_end, |
3940 | 3948 | OrbitalType_end); |
3941 | 3949 | MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC, |
3942 | 3950 | OrbitalType_end, |
3943 | 3951 | OrbitalType_end); |
3952 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpVectorBC, | |
3953 | + OrbitalType_end*OrbitalType_end); | |
3944 | 3954 | bool symmetrize = false; |
3945 | 3955 | #pragma omp for schedule(auto) |
3946 | 3956 | for(int B=A+1; B<totalAtomNumber; B++){ |
3947 | 3957 | const Atom& atomB = *molecule.GetAtom(B); |
3948 | 3958 | this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB); |
3949 | 3959 | this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB); |
3950 | - this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix, tmpOldDiatomicOverlapAOs, tmpMatrixBC); | |
3960 | + this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix, tmpDiatomicOverlapAOs, tmpOldDiatomicOverlapAOs, tmpMatrixBC, tmpVectorBC); | |
3951 | 3961 | this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB, symmetrize); |
3952 | 3962 | } // end of loop B parallelized with openMP |
3953 | 3963 |
@@ -3957,12 +3967,16 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{ | ||
3957 | 3967 | ex.Serialize(ompErrors); |
3958 | 3968 | } |
3959 | 3969 | this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix); |
3970 | + MallocerFreer::GetInstance()->Free<double>(&tmpDiatomicOverlapAOs, | |
3971 | + OrbitalType_end*OrbitalType_end); | |
3960 | 3972 | MallocerFreer::GetInstance()->Free<double>(&tmpOldDiatomicOverlapAOs, |
3961 | 3973 | OrbitalType_end, |
3962 | 3974 | OrbitalType_end); |
3963 | 3975 | MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, |
3964 | 3976 | OrbitalType_end, |
3965 | 3977 | OrbitalType_end); |
3978 | + MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, | |
3979 | + OrbitalType_end*OrbitalType_end); | |
3966 | 3980 | } // end of omp-parallelized region |
3967 | 3981 | // Exception throwing for omp-region |
3968 | 3982 | if(!ompErrors.str().empty()){ |
@@ -4020,7 +4034,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4020 | 4034 | double** tmpRotMat1stDeriv, |
4021 | 4035 | double*** tmpRotMat1stDerivs, |
4022 | 4036 | double** tmpRotatedDiatomicOverlap, |
4023 | - double** tmpMatrix, | |
4037 | + double* tmpRotatedDiatomicOverlapVec, | |
4038 | + double** tmpMatrixBC, | |
4039 | + double* tmpVectorBC, | |
4024 | 4040 | const Atom& atomA, |
4025 | 4041 | const Atom& atomB) const{ |
4026 | 4042 | double cartesian[CartesianType_end] = {atomA.GetXyz()[XAxis] - atomB.GetXyz()[XAxis], |
@@ -4055,7 +4071,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4055 | 4071 | tmpRotMat, |
4056 | 4072 | beta, |
4057 | 4073 | tmpRotatedDiatomicOverlap, |
4058 | - tmpMatrix); | |
4074 | + tmpRotatedDiatomicOverlapVec, | |
4075 | + tmpMatrixBC, | |
4076 | + tmpVectorBC); | |
4059 | 4077 | alpha = 1.0; |
4060 | 4078 | beta = 1.0; |
4061 | 4079 | MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix, |
@@ -4068,7 +4086,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4068 | 4086 | tmpRotMat, |
4069 | 4087 | beta, |
4070 | 4088 | tmpRotatedDiatomicOverlap, |
4071 | - tmpMatrix); | |
4089 | + tmpRotatedDiatomicOverlapVec, | |
4090 | + tmpMatrixBC, | |
4091 | + tmpVectorBC); | |
4072 | 4092 | MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix, |
4073 | 4093 | isColumnMajorDiaOverlapAOs, |
4074 | 4094 | !isColumnMajorRotatingMatrix, |
@@ -4079,7 +4099,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4079 | 4099 | tmpRotMat1stDeriv, |
4080 | 4100 | beta, |
4081 | 4101 | tmpRotatedDiatomicOverlap, |
4082 | - tmpMatrix); | |
4102 | + tmpRotatedDiatomicOverlapVec, | |
4103 | + tmpMatrixBC, | |
4104 | + tmpVectorBC); | |
4083 | 4105 | MolDS_wrappers::Blas::GetInstance()->Dcopy(OrbitalType_end*OrbitalType_end, |
4084 | 4106 | &tmpRotatedDiatomicOverlap[0][0], incrementOne, |
4085 | 4107 | &diatomicOverlapAOs1stDerivs[0][0][c], CartesianType_end); |
@@ -4123,7 +4145,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4123 | 4145 | double** tmpRotMat1stDeriv, |
4124 | 4146 | double*** tmpRotMat1stDerivs, |
4125 | 4147 | double** tmpRotatedDiatomicOverlap, |
4126 | - double** tmpMatrix, | |
4148 | + double* tmpRotatedDiatomicOverlapVec, | |
4149 | + double** tmpMatrixBC, | |
4150 | + double* tmpVectorBC, | |
4127 | 4151 | int indexAtomA, |
4128 | 4152 | int indexAtomB) const{ |
4129 | 4153 | this->CalcDiatomicOverlapAOs1stDerivatives(diatomicOverlapAOs1stDerivs, |
@@ -4133,7 +4157,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4133 | 4157 | tmpRotMat1stDeriv, |
4134 | 4158 | tmpRotMat1stDerivs, |
4135 | 4159 | tmpRotatedDiatomicOverlap, |
4136 | - tmpMatrix, | |
4160 | + tmpRotatedDiatomicOverlapVec, | |
4161 | + tmpMatrixBC, | |
4162 | + tmpVectorBC, | |
4137 | 4163 | *this->molecule->GetAtom(indexAtomA), |
4138 | 4164 | *this->molecule->GetAtom(indexAtomB)); |
4139 | 4165 | } |
@@ -5940,10 +5966,12 @@ void Cndo2::CalcDiatomicOverlapAOs2ndDerivativeInDiatomicFrame(double** diatomic | ||
5940 | 5966 | } |
5941 | 5967 | |
5942 | 5968 | // see (B.63) in Pople book. |
5943 | -void Cndo2::RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs, | |
5969 | +void Cndo2::RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs, | |
5944 | 5970 | double const* const* rotatingMatrix, |
5945 | - double** tmpOldDiatomicOverlapAOs, | |
5946 | - double** tmpMatrixBC) const{ | |
5971 | + double* tmpDiatomicOverlapAOs, | |
5972 | + double** tmpOldDiatomicOverlapAOs, | |
5973 | + double** tmpMatrixBC, | |
5974 | + double* tmpVectorBC) const{ | |
5947 | 5975 | #ifdef MOLDS_DBG |
5948 | 5976 | if(diatomicOverlapAOs==NULL){ |
5949 | 5977 | throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullDiaMatrix); |
@@ -5951,12 +5979,18 @@ void Cndo2::RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs, | ||
5951 | 5979 | if(rotatingMatrix==NULL){ |
5952 | 5980 | throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullRotMatrix); |
5953 | 5981 | } |
5982 | + if(tmpDiatomicOverlapAOs==NULL){ | |
5983 | + throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpDiaMatrix); | |
5984 | + } | |
5954 | 5985 | if(tmpOldDiatomicOverlapAOs==NULL){ |
5955 | 5986 | throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpOldDiaMatrix); |
5956 | 5987 | } |
5957 | 5988 | if(tmpMatrixBC==NULL){ |
5958 | 5989 | throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpMatrixBC); |
5959 | 5990 | } |
5991 | + if(tmpVectorBC==NULL){ | |
5992 | + throw MolDSException(this->errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpVectorBC); | |
5993 | + } | |
5960 | 5994 | #endif |
5961 | 5995 | for(int i=0; i<OrbitalType_end; i++){ |
5962 | 5996 | for(int j=0; j<OrbitalType_end; j++){ |
@@ -5978,7 +6012,9 @@ void Cndo2::RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs, | ||
5978 | 6012 | rotatingMatrix, |
5979 | 6013 | beta, |
5980 | 6014 | diatomicOverlapAOs, |
5981 | - tmpMatrixBC); | |
6015 | + tmpDiatomicOverlapAOs, | |
6016 | + tmpMatrixBC, | |
6017 | + tmpVectorBC); | |
5982 | 6018 | /* |
5983 | 6019 | for(int i=0;i<OrbitalType_end;i++){ |
5984 | 6020 | for(int j=0;j<OrbitalType_end;j++){ |
@@ -201,7 +201,9 @@ protected: | ||
201 | 201 | double** tmpRotMat1stDeriv, |
202 | 202 | double*** tmpRotMat1stDerivs, |
203 | 203 | double** tmpRotatedDiatomicOverlap, |
204 | - double** tmpMatrix, | |
204 | + double* tmpRotatedDiatomicOverlapVec, | |
205 | + double** tmpMatrixBC, | |
206 | + double* tmpVectorBC, | |
205 | 207 | const MolDS_base_atoms::Atom& atomA, |
206 | 208 | const MolDS_base_atoms::Atom& atomB) const; |
207 | 209 | void CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1stDerivs, |
@@ -211,7 +213,9 @@ protected: | ||
211 | 213 | double** tmpRotMat1stDeriv, |
212 | 214 | double*** tmpRotMat1stDerivs, |
213 | 215 | double** tmpRotatedDiatomicOverlap, |
214 | - double** tmpMatrix, | |
216 | + double* tmpRotatedDiatomicOverlapVec, | |
217 | + double** tmpMatrixBC, | |
218 | + double* tmpVectorBC, | |
215 | 219 | int indexAtomA, |
216 | 220 | int indexAtomB) const; |
217 | 221 | void CalcDiatomicOverlapAOs2ndDerivatives(double**** overlapAOs2ndDeri, |
@@ -280,8 +284,10 @@ private: | ||
280 | 284 | std::string errorMessageCalcRotatingMatrixNullRotMatrix; |
281 | 285 | std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullDiaMatrix; |
282 | 286 | std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullRotMatrix; |
287 | + std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpDiaMatrix; | |
283 | 288 | std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpOldDiaMatrix; |
284 | 289 | std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpMatrixBC; |
290 | + std::string errorMessageRotDiaOverlapAOsToSpaceFrameNullTmpVectorBC; | |
285 | 291 | std::string errorMessageSetOverlapAOsElementNullDiaMatrix; |
286 | 292 | std::string errorMessageCalcOverlapAOsDifferentConfigurationsDiffAOs; |
287 | 293 | std::string errorMessageCalcOverlapAOsDifferentConfigurationsDiffAtoms; |
@@ -459,10 +465,12 @@ private: | ||
459 | 465 | double const* atomicElectronPopulation, |
460 | 466 | double const* const* const* const* const* const* twoElecTwoCore, |
461 | 467 | bool isGuess) const; |
462 | - void RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs, | |
468 | + void RotateDiatmicOverlapAOsToSpaceFrame(double** diatomicOverlapAOs, | |
463 | 469 | double const* const* rotatingMatrix, |
464 | - double** oldDiatomicOverlapAOs, | |
465 | - double** tmpMatrixBC) const; | |
470 | + double* tmpDiatomicOverlapAOs, | |
471 | + double** tmpOldDiatomicOverlapAOs, | |
472 | + double** tmpMatrixBC, | |
473 | + double* tmpVectorBC) const; | |
466 | 474 | void SetOverlapAOsElement(double** overlapAOs, |
467 | 475 | double const* const* diatomicOverlapAOs, |
468 | 476 | const MolDS_base_atoms::Atom& atomA, |
@@ -1006,7 +1006,9 @@ void Mndo::MallocTempMatricesEachThreadCalcHessianSCF(double***** diatomicOve | ||
1006 | 1006 | double**** tmpDiaOverlapAOs1stDerivs, |
1007 | 1007 | double***** tmpDiaOverlapAOs2ndDerivs, |
1008 | 1008 | double*** tmpRotatedDiatomicOverlap, |
1009 | - double*** tmpMatrix) const{ | |
1009 | + double** tmpRotatedDiatomicOverlapVec, | |
1010 | + double*** tmpMatrixBC, | |
1011 | + double** tmpVectorBC) const{ | |
1010 | 1012 | MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs, |
1011 | 1013 | this->molecule->GetNumberAtoms(), |
1012 | 1014 | OrbitalType_end, |
@@ -1079,9 +1081,13 @@ void Mndo::MallocTempMatricesEachThreadCalcHessianSCF(double***** diatomicOve | ||
1079 | 1081 | CartesianType_end); |
1080 | 1082 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap, |
1081 | 1083 | OrbitalType_end, OrbitalType_end); |
1082 | - MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, | |
1084 | + MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec, | |
1085 | + OrbitalType_end*OrbitalType_end); | |
1086 | + MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC, | |
1083 | 1087 | OrbitalType_end, |
1084 | 1088 | OrbitalType_end); |
1089 | + MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC, | |
1090 | + OrbitalType_end*OrbitalType_end); | |
1085 | 1091 | } |
1086 | 1092 | |
1087 | 1093 | void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverlapAOs1stDerivs, |
@@ -1100,7 +1106,9 @@ void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverl | ||
1100 | 1106 | double**** tmpDiaOverlapAOs1stDerivs, |
1101 | 1107 | double***** tmpDiaOverlapAOs2ndDerivs, |
1102 | 1108 | double*** tmpRotatedDiatomicOverlap, |
1103 | - double*** tmpMatrix) const{ | |
1109 | + double** tmpRotatedDiatomicOverlapVec, | |
1110 | + double*** tmpMatrixBC, | |
1111 | + double** tmpVectorBC) const{ | |
1104 | 1112 | MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs, |
1105 | 1113 | this->molecule->GetNumberAtoms(), |
1106 | 1114 | OrbitalType_end, |
@@ -1174,9 +1182,13 @@ void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverl | ||
1174 | 1182 | MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap, |
1175 | 1183 | OrbitalType_end, |
1176 | 1184 | OrbitalType_end); |
1177 | - MallocerFreer::GetInstance()->Free<double>(tmpMatrix, | |
1185 | + MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec, | |
1186 | + OrbitalType_end*OrbitalType_end); | |
1187 | + MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC, | |
1178 | 1188 | OrbitalType_end, |
1179 | 1189 | OrbitalType_end); |
1190 | + MallocerFreer::GetInstance()->Free<double>(tmpVectorBC, | |
1191 | + OrbitalType_end*OrbitalType_end); | |
1180 | 1192 | } |
1181 | 1193 | |
1182 | 1194 | // mu and nu is included in atomA' AO. |
@@ -1734,7 +1746,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{ | ||
1734 | 1746 | double**** tmpDiaOverlapAOs2ndDerivs = NULL; //sedond derivatives of the diaOverlapAOs. This derivatives are related to the all Cartesian coordinates. |
1735 | 1747 | double** tmpRotMat1stDeriv = NULL; |
1736 | 1748 | double** tmpRotatedDiatomicOverlap = NULL; |
1737 | - double** tmpMatrix = NULL; | |
1749 | + double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm | |
1750 | + double** tmpMatrixBC = NULL; // used in dgemmm | |
1751 | + double* tmpVectorBC = NULL; // used in dgemmm | |
1738 | 1752 | |
1739 | 1753 | try{ |
1740 | 1754 | this->MallocTempMatricesEachThreadCalcHessianSCF(&diatomicOverlapAOs1stDerivs, |
@@ -1753,7 +1767,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{ | ||
1753 | 1767 | &tmpDiaOverlapAOs1stDerivs, |
1754 | 1768 | &tmpDiaOverlapAOs2ndDerivs, |
1755 | 1769 | &tmpRotatedDiatomicOverlap, |
1756 | - &tmpMatrix); | |
1770 | + &tmpRotatedDiatomicOverlapVec, | |
1771 | + &tmpMatrixBC, | |
1772 | + &tmpVectorBC); | |
1757 | 1773 | #pragma omp for schedule(auto) |
1758 | 1774 | for(int indexAtomA=0; indexAtomA<this->molecule->GetNumberAtoms(); indexAtomA++){ |
1759 | 1775 | const Atom& atomA = *this->molecule->GetAtom(indexAtomA); |
@@ -1771,7 +1787,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{ | ||
1771 | 1787 | tmpRotMat1stDeriv, |
1772 | 1788 | tmpRotMat1stDerivs, |
1773 | 1789 | tmpRotatedDiatomicOverlap, |
1774 | - tmpMatrix, | |
1790 | + tmpRotatedDiatomicOverlapVec, | |
1791 | + tmpMatrixBC, | |
1792 | + tmpVectorBC, | |
1775 | 1793 | indexAtomA, |
1776 | 1794 | indexAtomB); |
1777 | 1795 | this->CalcDiatomicOverlapAOs2ndDerivatives(diatomicOverlapAOs2ndDerivs[indexAtomB], |
@@ -1868,7 +1886,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{ | ||
1868 | 1886 | &tmpDiaOverlapAOs1stDerivs, |
1869 | 1887 | &tmpDiaOverlapAOs2ndDerivs, |
1870 | 1888 | &tmpRotatedDiatomicOverlap, |
1871 | - &tmpMatrix); | |
1889 | + &tmpRotatedDiatomicOverlapVec, | |
1890 | + &tmpMatrixBC, | |
1891 | + &tmpVectorBC); | |
1872 | 1892 | }// end of omp-region |
1873 | 1893 | // Exception throwing for omp-region |
1874 | 1894 | if(!ompErrors.str().empty()){ |
@@ -2059,7 +2079,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2059 | 2079 | double** tmpDiaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms. |
2060 | 2080 | double** tmpRotMat1stDeriv = NULL; |
2061 | 2081 | double** tmpRotatedDiatomicOverlap = NULL; |
2062 | - double** tmpMatrix = NULL; | |
2082 | + double* tmpRotatedDiatomicOverlapVec = NULL; | |
2083 | + double** tmpMatrixBC = NULL; | |
2084 | + double* tmpVectorBC = NULL; | |
2063 | 2085 | try{ |
2064 | 2086 | this->MallocTempMatricesStaticFirstOrderFock(&diatomicTwoElecTwoCore1stDerivs, |
2065 | 2087 | &diatomicOverlapAOs1stDerivs, |
@@ -2070,7 +2092,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2070 | 2092 | MallocerFreer::GetInstance()->Malloc<double>(&tmpDiaOverlapAOs1stDerivInDiaFrame, OrbitalType_end, OrbitalType_end); |
2071 | 2093 | MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
2072 | 2094 | MallocerFreer::GetInstance()->Malloc<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
2073 | - MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrix, OrbitalType_end, OrbitalType_end); | |
2095 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
2096 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end); | |
2097 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
2074 | 2098 | const Atom& atomA = *molecule->GetAtom(indexAtomA); |
2075 | 2099 | int firstAOIndexA = atomA.GetFirstAOIndex(); |
2076 | 2100 | int lastAOIndexA = atomA.GetLastAOIndex(); |
@@ -2096,7 +2120,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2096 | 2120 | tmpRotMat1stDeriv, |
2097 | 2121 | tmpRotMat1stDerivs, |
2098 | 2122 | tmpRotatedDiatomicOverlap, |
2099 | - tmpMatrix, | |
2123 | + tmpRotatedDiatomicOverlapVec, | |
2124 | + tmpMatrixBC, | |
2125 | + tmpVectorBC, | |
2100 | 2126 | atomA, |
2101 | 2127 | atomB); |
2102 | 2128 |
@@ -2200,7 +2226,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2200 | 2226 | MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
2201 | 2227 | //MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
2202 | 2228 | MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
2203 | - MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, OrbitalType_end, OrbitalType_end); | |
2229 | + MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
2230 | + MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end); | |
2231 | + MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
2204 | 2232 | throw ex; |
2205 | 2233 | } |
2206 | 2234 | this->FreeTempMatricesStaticFirstOrderFock(&diatomicTwoElecTwoCore1stDerivs, |
@@ -2214,7 +2242,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2214 | 2242 | MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
2215 | 2243 | //MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
2216 | 2244 | MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
2217 | - MallocerFreer::GetInstance()->Free<double>(&tmpMatrix, OrbitalType_end, OrbitalType_end); | |
2245 | + MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
2246 | + MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end); | |
2247 | + MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
2218 | 2248 | |
2219 | 2249 | /* |
2220 | 2250 | printf("staticFirstOrderFock(atomA:%d axis:%s)\n",indexAtomA,CartesianTypeStr(axisA)); |
@@ -2585,8 +2615,10 @@ void Mndo::CalcForce(const vector<int>& elecStates){ | ||
2585 | 2615 | double** tmpDiaOverlapAOsInDiaFrame = NULL; // diatomic overlapAOs in diatomic frame |
2586 | 2616 | double** tmpDiaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms. |
2587 | 2617 | double** tmpRotMat1stDeriv = NULL; |
2588 | - double** tmpRotatedDiatomicOverlap = NULL; | |
2589 | - double** tmpMatrix = NULL; | |
2618 | + double** tmpRotatedDiatomicOverlap = NULL; // used in dgemmm | |
2619 | + double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm | |
2620 | + double** tmpMatrixBC = NULL; // used in dgemmm | |
2621 | + double* tmpVectorBC = NULL; // used in dgemmm | |
2590 | 2622 | try{ |
2591 | 2623 | this->MallocTempMatricesCalcForce(&diatomicOverlapAOs1stDerivs, |
2592 | 2624 | &diatomicTwoElecTwoCore1stDerivs, |
@@ -2596,7 +2628,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){ | ||
2596 | 2628 | &tmpRotMat1stDeriv, |
2597 | 2629 | &tmpRotMat1stDerivs, |
2598 | 2630 | &tmpRotatedDiatomicOverlap, |
2599 | - &tmpMatrix, | |
2631 | + &tmpRotatedDiatomicOverlapVec, | |
2632 | + &tmpMatrixBC, | |
2633 | + &tmpVectorBC, | |
2600 | 2634 | &tmpDiatomicTwoElecTwoCore); |
2601 | 2635 | |
2602 | 2636 | #pragma omp for schedule(auto) |
@@ -2614,7 +2648,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){ | ||
2614 | 2648 | tmpRotMat1stDeriv, |
2615 | 2649 | tmpRotMat1stDerivs, |
2616 | 2650 | tmpRotatedDiatomicOverlap, |
2617 | - tmpMatrix, | |
2651 | + tmpRotatedDiatomicOverlapVec, | |
2652 | + tmpMatrixBC, | |
2653 | + tmpVectorBC, | |
2618 | 2654 | atomA, |
2619 | 2655 | atomB); |
2620 | 2656 | // calc. first derivative of two elec two core interaction |
@@ -2737,7 +2773,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){ | ||
2737 | 2773 | &tmpRotMat1stDeriv, |
2738 | 2774 | &tmpRotMat1stDerivs, |
2739 | 2775 | &tmpRotatedDiatomicOverlap, |
2740 | - &tmpMatrix, | |
2776 | + &tmpRotatedDiatomicOverlapVec, | |
2777 | + &tmpMatrixBC, | |
2778 | + &tmpVectorBC, | |
2741 | 2779 | &tmpDiatomicTwoElecTwoCore); |
2742 | 2780 | } // end of omp-parallelized region |
2743 | 2781 | // Exception throwing for omp-region |
@@ -2759,7 +2797,9 @@ void Mndo::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
2759 | 2797 | double*** tmpRotMat1stDeriv, |
2760 | 2798 | double**** tmpRotMat1stDerivs, |
2761 | 2799 | double*** tmpRotatedDiatomicOverlap, |
2762 | - double*** tmpMatrix, | |
2800 | + double** tmpRotatedDiatomicOverlapVec, | |
2801 | + double*** tmpMatrixBC, | |
2802 | + double** tmpVectorBC, | |
2763 | 2803 | double***** tmpDiatomicTwoElecTwoCore) const{ |
2764 | 2804 | MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs, |
2765 | 2805 | OrbitalType_end, |
@@ -2790,9 +2830,13 @@ void Mndo::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
2790 | 2830 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap, |
2791 | 2831 | OrbitalType_end, |
2792 | 2832 | OrbitalType_end); |
2793 | - MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, | |
2833 | + MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec, | |
2834 | + OrbitalType_end*OrbitalType_end); | |
2835 | + MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC, | |
2794 | 2836 | OrbitalType_end, |
2795 | 2837 | OrbitalType_end); |
2838 | + MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC, | |
2839 | + OrbitalType_end*OrbitalType_end); | |
2796 | 2840 | MallocerFreer::GetInstance()->Malloc<double>(tmpDiatomicTwoElecTwoCore, |
2797 | 2841 | dxy, |
2798 | 2842 | dxy, |
@@ -2808,7 +2852,9 @@ void Mndo::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
2808 | 2852 | double*** tmpRotMat1stDeriv, |
2809 | 2853 | double**** tmpRotMat1stDerivs, |
2810 | 2854 | double*** tmpRotatedDiatomicOverlap, |
2811 | - double*** tmpMatrix, | |
2855 | + double** tmpRotatedDiatomicOverlapVec, | |
2856 | + double*** tmpMatrixBC, | |
2857 | + double** tmpVectorBC, | |
2812 | 2858 | double***** tmpDiatomicTwoElecTwoCore) const{ |
2813 | 2859 | MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs, |
2814 | 2860 | OrbitalType_end, |
@@ -2839,9 +2885,13 @@ void Mndo::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
2839 | 2885 | MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap, |
2840 | 2886 | OrbitalType_end, |
2841 | 2887 | OrbitalType_end); |
2842 | - MallocerFreer::GetInstance()->Free<double>(tmpMatrix, | |
2888 | + MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec, | |
2889 | + OrbitalType_end*OrbitalType_end); | |
2890 | + MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC, | |
2843 | 2891 | OrbitalType_end, |
2844 | 2892 | OrbitalType_end); |
2893 | + MallocerFreer::GetInstance()->Free<double>(tmpVectorBC, | |
2894 | + OrbitalType_end*OrbitalType_end); | |
2845 | 2895 | MallocerFreer::GetInstance()->Free<double>(tmpDiatomicTwoElecTwoCore, |
2846 | 2896 | dxy, |
2847 | 2897 | dxy, |
@@ -3463,17 +3513,26 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore, | ||
3463 | 3513 | stringstream ompErrors; |
3464 | 3514 | #pragma omp parallel |
3465 | 3515 | { |
3466 | - double**** diatomicTwoElecTwoCore = NULL; | |
3467 | - double** tmpRotMat = NULL; | |
3468 | - double** tmpMatrixBC = NULL; | |
3516 | + double**** diatomicTwoElecTwoCore = NULL; | |
3517 | + double* tmpDiatomicTwoElecTwoCore = NULL; | |
3518 | + double** tmpRotMat = NULL; | |
3519 | + double** tmpMatrixBC = NULL; | |
3520 | + double* tmpVectorBC = NULL; | |
3469 | 3521 | try{ |
3470 | - MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3471 | - MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3472 | - MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC, dxy*dxy, dxy*dxy); | |
3522 | + MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3523 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpDiatomicTwoElecTwoCore, dxy*dxy*dxy*dxy); | |
3524 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3525 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC, dxy*dxy, dxy*dxy); | |
3526 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpVectorBC, dxy*dxy*dxy*dxy); | |
3473 | 3527 | // note that terms with condition a==b are not needed to calculate. |
3474 | 3528 | #pragma omp for schedule(auto) |
3475 | 3529 | for(int b=a+1; b<totalNumberAtoms; b++){ |
3476 | - this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, tmpMatrixBC, a, b); | |
3530 | + this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, | |
3531 | + tmpDiatomicTwoElecTwoCore, | |
3532 | + tmpRotMat, | |
3533 | + tmpMatrixBC, | |
3534 | + tmpVectorBC, | |
3535 | + a, b); | |
3477 | 3536 | |
3478 | 3537 | int i=0; |
3479 | 3538 | for(int mu=0; mu<dxy; mu++){ |
@@ -3498,9 +3557,11 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore, | ||
3498 | 3557 | #pragma omp critical |
3499 | 3558 | ex.Serialize(ompErrors); |
3500 | 3559 | } |
3501 | - MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3502 | - MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3503 | - MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, dxy*dxy, dxy*dxy); | |
3560 | + MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3561 | + MallocerFreer::GetInstance()->Free<double>(&tmpDiatomicTwoElecTwoCore, dxy*dxy*dxy*dxy); | |
3562 | + MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3563 | + MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, dxy*dxy, dxy*dxy); | |
3564 | + MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, dxy*dxy*dxy*dxy); | |
3504 | 3565 | } // end of omp-parallelized region |
3505 | 3566 | // Exception throwing for omp-region |
3506 | 3567 | if(!ompErrors.str().empty()){ |
@@ -3556,8 +3617,10 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore, | ||
3556 | 3617 | // Note taht d-orbital cannot be treated, |
3557 | 3618 | // that is, matrix[dxy][dxy][dxy][dxy] cannot be treatable. |
3558 | 3619 | void Mndo::CalcDiatomicTwoElecTwoCore(double**** matrix, |
3559 | - double** tmpRotMat, | |
3560 | - double** tmpMatrixBC, | |
3620 | + double* tmpVec, | |
3621 | + double** tmpRotMat, | |
3622 | + double** tmpMatrixBC, | |
3623 | + double* tmpVectorBC, | |
3561 | 3624 | int indexAtomA, |
3562 | 3625 | int indexAtomB) const{ |
3563 | 3626 | const Atom& atomA = *this->molecule->GetAtom(indexAtomA); |
@@ -3601,7 +3664,7 @@ void Mndo::CalcDiatomicTwoElecTwoCore(double**** matrix, | ||
3601 | 3664 | } |
3602 | 3665 | // rotate matirix into the space frame |
3603 | 3666 | this->CalcRotatingMatrix(tmpRotMat, atomA, atomB); |
3604 | - this->RotateDiatomicTwoElecTwoCoreToSpaceFrame(matrix, tmpRotMat, tmpMatrixBC); | |
3667 | + this->RotateDiatomicTwoElecTwoCoreToSpaceFrame(matrix, tmpVec, tmpRotMat, tmpMatrixBC, tmpVectorBC); | |
3605 | 3668 | |
3606 | 3669 | /* |
3607 | 3670 | this->OutputLog("(mu, nu | lambda, sigma) matrix\n"); |
@@ -3800,8 +3863,10 @@ void Mndo::CalcDiatomicTwoElecTwoCore2ndDerivatives(double****** matrix, | ||
3800 | 3863 | // Rotate 4-dimensional matrix from diatomic frame to space frame |
3801 | 3864 | // Note tha in this method d-orbitals can not be treatable. |
3802 | 3865 | void Mndo::RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix, |
3866 | + double* tmpVec, | |
3803 | 3867 | double const* const* rotatingMatrix, |
3804 | - double** tmpMatrixBC) const{ | |
3868 | + double** tmpMatrixBC, | |
3869 | + double* tmpVectorBC) const{ | |
3805 | 3870 | double oldMatrix[dxy][dxy][dxy][dxy]; |
3806 | 3871 | MolDS_wrappers::Blas::GetInstance()->Dcopy(dxy*dxy*dxy*dxy, &matrix[0][0][0][0], &oldMatrix[0][0][0][0]); |
3807 | 3872 |
@@ -3838,7 +3903,9 @@ void Mndo::RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix, | ||
3838 | 3903 | &ptrTwiceRotatingMatrix[0], |
3839 | 3904 | beta, |
3840 | 3905 | &ptrMatrix[0], |
3841 | - tmpMatrixBC); | |
3906 | + tmpVec, | |
3907 | + tmpMatrixBC, | |
3908 | + tmpVectorBC); | |
3842 | 3909 | |
3843 | 3910 | /* |
3844 | 3911 | // rotate (slow algorithm) |
@@ -3886,7 +3953,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
3886 | 3953 | double** twiceRotatingMatrixDerivB = NULL; |
3887 | 3954 | double** oldMatrix = NULL; |
3888 | 3955 | double** rotatedMatrix = NULL; |
3956 | + double* tmpRotatedVec = NULL; | |
3889 | 3957 | double** tmpMatrix = NULL; |
3958 | + double* tmpVector = NULL; | |
3890 | 3959 | double** ptrDiatomic = NULL; |
3891 | 3960 | try{ |
3892 | 3961 | this->MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(&twiceRotatingMatrix, |
@@ -3894,7 +3963,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
3894 | 3963 | &twiceRotatingMatrixDerivB, |
3895 | 3964 | &oldMatrix, |
3896 | 3965 | &rotatedMatrix, |
3966 | + &tmpRotatedVec, | |
3897 | 3967 | &tmpMatrix, |
3968 | + &tmpVector, | |
3898 | 3969 | &ptrDiatomic); |
3899 | 3970 | for(int mu=0; mu<dxy; mu++){ |
3900 | 3971 | for(int nu=0; nu<dxy; nu++){ |
@@ -3937,7 +4008,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
3937 | 4008 | twiceRotatingMatrix, |
3938 | 4009 | beta, |
3939 | 4010 | rotatedMatrix, |
3940 | - tmpMatrix); | |
4011 | + tmpRotatedVec, | |
4012 | + tmpMatrix, | |
4013 | + tmpVector); | |
3941 | 4014 | alpha = 1.0; |
3942 | 4015 | beta = 1.0; |
3943 | 4016 | MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, |
@@ -3950,7 +4023,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
3950 | 4023 | twiceRotatingMatrix, |
3951 | 4024 | beta, |
3952 | 4025 | rotatedMatrix, |
3953 | - tmpMatrix); | |
4026 | + tmpRotatedVec, | |
4027 | + tmpMatrix, | |
4028 | + tmpVector); | |
3954 | 4029 | MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, |
3955 | 4030 | isColumnMajorOldMatrix, |
3956 | 4031 | !isColumnMajorTwiceRotatingMatrix, |
@@ -3961,7 +4036,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
3961 | 4036 | twiceRotatingMatrix, |
3962 | 4037 | beta, |
3963 | 4038 | rotatedMatrix, |
3964 | - tmpMatrix); | |
4039 | + tmpRotatedVec, | |
4040 | + tmpMatrix, | |
4041 | + tmpVector); | |
3965 | 4042 | MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, |
3966 | 4043 | isColumnMajorOldMatrix, |
3967 | 4044 | !isColumnMajorTwiceRotatingMatrix, |
@@ -3972,7 +4049,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
3972 | 4049 | twiceRotatingMatrixDerivA, |
3973 | 4050 | beta, |
3974 | 4051 | rotatedMatrix, |
3975 | - tmpMatrix); | |
4052 | + tmpRotatedVec, | |
4053 | + tmpMatrix, | |
4054 | + tmpVector); | |
3976 | 4055 | MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, |
3977 | 4056 | isColumnMajorOldMatrix, |
3978 | 4057 | !isColumnMajorTwiceRotatingMatrix, |
@@ -3983,7 +4062,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
3983 | 4062 | twiceRotatingMatrixDerivB, |
3984 | 4063 | beta, |
3985 | 4064 | rotatedMatrix, |
3986 | - tmpMatrix); | |
4065 | + tmpRotatedVec, | |
4066 | + tmpMatrix, | |
4067 | + tmpVector); | |
3987 | 4068 | |
3988 | 4069 | MolDS_wrappers::Blas::GetInstance()->Dcopy(dxy*dxy*dxy*dxy, |
3989 | 4070 | &rotatedMatrix[0][0] , incrementOne, |
@@ -3996,7 +4077,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
3996 | 4077 | &twiceRotatingMatrixDerivB, |
3997 | 4078 | &oldMatrix, |
3998 | 4079 | &rotatedMatrix, |
4080 | + &tmpRotatedVec, | |
3999 | 4081 | &tmpMatrix, |
4082 | + &tmpVector, | |
4000 | 4083 | &ptrDiatomic); |
4001 | 4084 | throw ex; |
4002 | 4085 | } |
@@ -4005,7 +4088,9 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
4005 | 4088 | &twiceRotatingMatrixDerivB, |
4006 | 4089 | &oldMatrix, |
4007 | 4090 | &rotatedMatrix, |
4091 | + &tmpRotatedVec, | |
4008 | 4092 | &tmpMatrix, |
4093 | + &tmpVector, | |
4009 | 4094 | &ptrDiatomic); |
4010 | 4095 | |
4011 | 4096 | /* |
@@ -4067,14 +4152,18 @@ void Mndo::MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twi | ||
4067 | 4152 | double*** twiceRotatingMatrixDerivB, |
4068 | 4153 | double*** oldMatrix, |
4069 | 4154 | double*** rotatedMatrix, |
4155 | + double** tmpRotatedVec, | |
4070 | 4156 | double*** tmpMatrix, |
4157 | + double** tmpVector, | |
4071 | 4158 | double*** ptrDiatomic) const{ |
4072 | 4159 | MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy); |
4073 | 4160 | MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrixDerivA, dxy*dxy, dxy*dxy); |
4074 | 4161 | MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrixDerivB, dxy*dxy, dxy*dxy); |
4075 | 4162 | MallocerFreer::GetInstance()->Malloc<double>(oldMatrix, dxy*dxy, dxy*dxy); |
4076 | 4163 | MallocerFreer::GetInstance()->Malloc<double>(rotatedMatrix, dxy*dxy, dxy*dxy); |
4164 | + MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedVec, dxy*dxy*dxy*dxy); | |
4077 | 4165 | MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, dxy*dxy, dxy*dxy); |
4166 | + MallocerFreer::GetInstance()->Malloc<double>(tmpVector, dxy*dxy*dxy*dxy); | |
4078 | 4167 | MallocerFreer::GetInstance()->Malloc<double*>(ptrDiatomic, dxy*dxy); |
4079 | 4168 | } |
4080 | 4169 |
@@ -4083,14 +4172,18 @@ void Mndo::FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twice | ||
4083 | 4172 | double*** twiceRotatingMatrixDerivB, |
4084 | 4173 | double*** oldMatrix, |
4085 | 4174 | double*** rotatedMatrix, |
4175 | + double** tmpRotatedVec, | |
4086 | 4176 | double*** tmpMatrix, |
4177 | + double** tmpVector, | |
4087 | 4178 | double*** ptrDiatomic) const{ |
4088 | 4179 | MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy); |
4089 | 4180 | MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrixDerivA, dxy*dxy, dxy*dxy); |
4090 | 4181 | MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrixDerivB, dxy*dxy, dxy*dxy); |
4091 | 4182 | MallocerFreer::GetInstance()->Free<double>(oldMatrix, dxy*dxy, dxy*dxy); |
4092 | - MallocerFreer::GetInstance()->Free<double>(rotatedMatrix, dxy*dxy, dxy*dxy); | |
4183 | + MallocerFreer::GetInstance()->Free<double>(rotatedMatrix, dxy*dxy, dxy*dxy); | |
4184 | + MallocerFreer::GetInstance()->Free<double>(tmpRotatedVec, dxy*dxy*dxy*dxy); | |
4093 | 4185 | MallocerFreer::GetInstance()->Free<double>(tmpMatrix, dxy*dxy, dxy*dxy); |
4186 | + MallocerFreer::GetInstance()->Free<double>(tmpVector, dxy*dxy*dxy*dxy); | |
4094 | 4187 | MallocerFreer::GetInstance()->Free<double*>(ptrDiatomic, dxy*dxy); |
4095 | 4188 | } |
4096 | 4189 |
@@ -164,7 +164,9 @@ private: | ||
164 | 164 | double**** tmpDiaOverlapAOs1stDerivs, |
165 | 165 | double***** tmpDiaOverlapAOs2ndDerivs, |
166 | 166 | double*** tmpRotatedDiatomicOverlap, |
167 | - double*** tmpMatrix) const; | |
167 | + double** tmpRotatedDiatomicOverlapVec, | |
168 | + double*** tmpMatrixBC, | |
169 | + double** tmpVectorBC) const; | |
168 | 170 | void FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverlapAOs1stDerivs, |
169 | 171 | double****** diatomicOverlapAOs2ndDerivs, |
170 | 172 | double******* diatomicTwoElecTwoCore1stDerivs, |
@@ -181,7 +183,9 @@ private: | ||
181 | 183 | double**** tmpDiaOverlapAOs1stDerivs, |
182 | 184 | double***** tmpDiaOverlapAOs2ndDerivs, |
183 | 185 | double*** tmpRotatedDiatomicOverlap, |
184 | - double*** tmpMatrix) const; | |
186 | + double** tmpRotatedDiatomicOverlapVec, | |
187 | + double*** tmpMatrixBC, | |
188 | + double** tmpVectorBC) const; | |
185 | 189 | double GetAuxiliaryHessianElement1(int mu, |
186 | 190 | int nu, |
187 | 191 | int indexAtomA, |
@@ -298,8 +302,10 @@ private: | ||
298 | 302 | double const* const* const* const* const* diatomicTwoElecTwoCore1stDerivatives, |
299 | 303 | MolDS_base::CartesianType axisA) const; |
300 | 304 | void CalcDiatomicTwoElecTwoCore(double**** matrix, |
305 | + double* tmpVec, | |
301 | 306 | double** tmpRotMat, |
302 | 307 | double** tmpMatrixBC, |
308 | + double* tmpVectorBC, | |
303 | 309 | int indexAtomA, |
304 | 310 | int indexAtomB) const; |
305 | 311 | void CalcDiatomicTwoElecTwoCore1stDerivatives(double***** matrix, |
@@ -317,8 +323,10 @@ private: | ||
317 | 323 | int indexAtomA, |
318 | 324 | int indexAtomB) const; |
319 | 325 | void RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix, |
326 | + double* tmpVec, | |
320 | 327 | double const* const* rotatingMatrix, |
321 | - double** tmpMatrixBC) const; | |
328 | + double** tmpMatrixBC, | |
329 | + double* tmpVectorBC) const; | |
322 | 330 | void RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(double***** matrix, |
323 | 331 | double const* const* const* const* diatomicTwoElecTwoCore, |
324 | 332 | double const* const* rotatingMatrix, |
@@ -334,14 +342,18 @@ private: | ||
334 | 342 | double*** twiceRotatingMatrixDerivB, |
335 | 343 | double*** oldMatrix, |
336 | 344 | double*** rotatedMatrix, |
345 | + double** tmpRotatedVec, | |
337 | 346 | double*** tmpMatrix, |
347 | + double** tmpVector, | |
338 | 348 | double*** ptrDiatomic) const; |
339 | 349 | void FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix, |
340 | 350 | double*** twiceRotatingMatrixDerivA, |
341 | 351 | double*** twiceRotatingMatrixDerivB, |
342 | 352 | double*** oldMatrix, |
343 | 353 | double*** rotatedMatrix, |
354 | + double** tmpRotatedVec, | |
344 | 355 | double*** tmpMatrix, |
356 | + double** tmpVector, | |
345 | 357 | double*** ptrDiatomic) const; |
346 | 358 | double GetNddoRepulsionIntegral(const MolDS_base_atoms::Atom& atomA, |
347 | 359 | MolDS_base::OrbitalType mu, |
@@ -387,7 +399,9 @@ private: | ||
387 | 399 | double*** tmpRotMat1stDeriv, |
388 | 400 | double**** tmpRotMat1stDerivs, |
389 | 401 | double*** tmpRotatedDiatomicOverlap, |
390 | - double*** tmpMatrix, | |
402 | + double** tmpRotatedDiatomicOverlapVec, | |
403 | + double*** tmpMatrixBC, | |
404 | + double** tmpVectorBC, | |
391 | 405 | double***** tmpDiatomicTwoElecTwoCore) const; |
392 | 406 | void FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, |
393 | 407 | double****** diatomicTwoElecTwoCore1stDerivs, |
@@ -397,7 +411,9 @@ private: | ||
397 | 411 | double*** tmpRotMat1stDeriv, |
398 | 412 | double**** tmpRotMat1stDerivs, |
399 | 413 | double*** tmpRotatedDiatomicOverlap, |
400 | - double*** tmpMatrix, | |
414 | + double** tmpRotatedDiatomicOverlapVec, | |
415 | + double*** tmpMatrixBC, | |
416 | + double** tmpVectorBC, | |
401 | 417 | double***** tmpDiatomicTwoElecTwoCore) const; |
402 | 418 | void CalcForceSCFElecCoreAttractionPart(double* force, |
403 | 419 | int indexAtomA, |
@@ -358,9 +358,65 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
358 | 358 | double const* const* matrixB, |
359 | 359 | double beta, |
360 | 360 | double** matrixC) const{ |
361 | + double* tmpC; | |
362 | +#ifdef __INTEL_COMPILER | |
363 | + tmpC = (double*)mkl_malloc( sizeof(double)*m*n, 16 ); | |
364 | +#else | |
365 | + tmpC = (double*)malloc( sizeof(double)*m*n); | |
366 | +#endif | |
367 | + this->Dgemm(isColumnMajorMatrixA, | |
368 | + isColumnMajorMatrixB, | |
369 | + isColumnMajorMatrixC, | |
370 | + m, n, k, | |
371 | + alpha, | |
372 | + matrixA, | |
373 | + matrixB, | |
374 | + beta, | |
375 | + matrixC, | |
376 | + tmpC); | |
377 | + | |
378 | +#ifdef __INTEL_COMPILER | |
379 | + mkl_free(tmpC); | |
380 | +#else | |
381 | + free(tmpC); | |
382 | +#endif | |
383 | +} | |
384 | + | |
385 | +// matrixC = alpha*matrixA*matrixB + beta*matrixC | |
386 | +// matrixA: m*k-matrix | |
387 | +// matrixB: k*n-matrix | |
388 | +// matrixC: m*n-matrix (matrixC[m][n] in row-major (C/C++ style)) | |
389 | +// tmpC: temporary 1-dimensional m*n-array for matrixC | |
390 | +void Blas::Dgemm(bool isColumnMajorMatrixA, | |
391 | + bool isColumnMajorMatrixB, | |
392 | + molds_blas_int m, molds_blas_int n, molds_blas_int k, | |
393 | + double alpha, | |
394 | + double const* const* matrixA, | |
395 | + double const* const* matrixB, | |
396 | + double beta, | |
397 | + double** matrixC, | |
398 | + double* tmpC) const{ | |
399 | + bool isColumnMajorMatrixC = false; | |
400 | + this->Dgemm(isColumnMajorMatrixA, isColumnMajorMatrixB, isColumnMajorMatrixC,m, n, k, alpha, matrixA, matrixB, beta, matrixC, tmpC); | |
401 | +} | |
402 | + | |
403 | +// matrixC = alpha*matrixA*matrixB + beta*matrixC | |
404 | +// matrixA: m*k-matrix | |
405 | +// matrixB: k*n-matrix | |
406 | +// matrixC: m*n-matrix | |
407 | +// tmpC: temporary 1-dimensional m*n-array for matrixC | |
408 | +void Blas::Dgemm(bool isColumnMajorMatrixA, | |
409 | + bool isColumnMajorMatrixB, | |
410 | + bool isColumnMajorMatrixC, | |
411 | + molds_blas_int m, molds_blas_int n, molds_blas_int k, | |
412 | + double alpha, | |
413 | + double const* const* matrixA, | |
414 | + double const* const* matrixB, | |
415 | + double beta, | |
416 | + double** matrixC, | |
417 | + double* tmpC) const{ | |
361 | 418 | double* a = const_cast<double*>(&matrixA[0][0]); |
362 | 419 | double* b = const_cast<double*>(&matrixB[0][0]); |
363 | -// double* c = &matrixC[0][0]; | |
364 | 420 | |
365 | 421 | molds_blas_int lda; |
366 | 422 | #ifdef __FCC_VERSION |
@@ -410,12 +466,6 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
410 | 466 | } |
411 | 467 | #endif |
412 | 468 | |
413 | - double* tmpC; | |
414 | -#ifdef __INTEL_COMPILER | |
415 | - tmpC = (double*)mkl_malloc( sizeof(double)*m*n, 16 ); | |
416 | -#else | |
417 | - tmpC = (double*)malloc( sizeof(double)*m*n); | |
418 | -#endif | |
419 | 469 | molds_blas_int ldc = m; |
420 | 470 | if(isColumnMajorMatrixC){ |
421 | 471 | this->Dcopy(m*n, &matrixC[0][0], tmpC); |
@@ -444,11 +494,6 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
444 | 494 | } |
445 | 495 | } |
446 | 496 | } |
447 | -#ifdef __INTEL_COMPILER | |
448 | - mkl_free(tmpC); | |
449 | -#else | |
450 | - free(tmpC); | |
451 | -#endif | |
452 | 497 | } |
453 | 498 | |
454 | 499 | // matrixD = matrixA*matrixB*matrixC |
@@ -531,6 +576,36 @@ void Blas::Dgemmm(bool isColumnMajorMatrixA, | ||
531 | 576 | this->Dgemm(isColumnMajorMatrixA, isColumnMajorMatrixBC, m, n, k, alpha, matrixA, tmpMatrixBC, beta, matrixD ); |
532 | 577 | } |
533 | 578 | |
579 | +// matrixD = alpha*matrixA*matrixB*matrixC + beta*matrixD | |
580 | +// matrixA: m*k-matrix | |
581 | +// matrixB: k*l-matrix | |
582 | +// matrixC: l*n-matrix | |
583 | +// matrixD: m*n-matrix (matrixC[m][n] in row-major (C/C++ style)) | |
584 | +// tmpMatrixBC is temporary calculated matrix in row-major, (C/C++ style) | |
585 | +// tmpMatrixBC = matrixB*matrixC | |
586 | +// tmpVectorBC is temporary 1 dimensional k*n-array for matrixBC | |
587 | +// tmpVectorD is temporary 1 dimensional m*n-array for matrixD | |
588 | +void Blas::Dgemmm(bool isColumnMajorMatrixA, | |
589 | + bool isColumnMajorMatrixB, | |
590 | + bool isColumnMajorMatrixC, | |
591 | + molds_blas_int m, molds_blas_int n, molds_blas_int k, molds_blas_int l, | |
592 | + double alpha, | |
593 | + double const* const* matrixA, | |
594 | + double const* const* matrixB, | |
595 | + double const* const* matrixC, | |
596 | + double beta, | |
597 | + double** matrixD, | |
598 | + double* tmpVectorD, | |
599 | + double** tmpMatrixBC, | |
600 | + double* tmpVectorBC) const{ | |
601 | + | |
602 | + double alphaBC = 1.0; | |
603 | + double betaBC = 0.0; | |
604 | + bool isColumnMajorMatrixBC = false; | |
605 | + this->Dgemm(isColumnMajorMatrixB, isColumnMajorMatrixC, k, n, l, alphaBC, matrixB, matrixC, betaBC, tmpMatrixBC, tmpVectorBC); | |
606 | + this->Dgemm(isColumnMajorMatrixA, isColumnMajorMatrixBC, m, n, k, alpha, matrixA, tmpMatrixBC, beta, matrixD, tmpVectorD); | |
607 | +} | |
608 | + | |
534 | 609 | // matrixC = matrixA*matrixA^T |
535 | 610 | // matrixA: n*k-matrix |
536 | 611 | // matrixC: n*n-matrix,symmetric (Use the upper triangular part, and copy it to the lower part.) |
@@ -102,6 +102,25 @@ public: | ||
102 | 102 | double const* const* matrixB, |
103 | 103 | double beta, |
104 | 104 | double** matrixC) const; |
105 | + void Dgemm(bool isColumnMajorMatrixA, | |
106 | + bool isColumnMajorMatrixB, | |
107 | + molds_blas_int m, molds_blas_int n, molds_blas_int k, | |
108 | + double alpha, | |
109 | + double const* const* matrixA, | |
110 | + double const* const* matrixB, | |
111 | + double beta, | |
112 | + double** matrixC, | |
113 | + double* tmpC) const; | |
114 | + void Dgemm(bool isColumnMajorMatrixA, | |
115 | + bool isColumnMajorMatrixB, | |
116 | + bool isColumnMajorMatrixC, | |
117 | + molds_blas_int m, molds_blas_int n, molds_blas_int k, | |
118 | + double alpha, | |
119 | + double const* const* matrixA, | |
120 | + double const* const* matrixB, | |
121 | + double beta, | |
122 | + double** matrixC, | |
123 | + double* tmpC) const; | |
105 | 124 | void Dgemmm(molds_blas_int m, molds_blas_int n, molds_blas_int k, molds_blas_int l, |
106 | 125 | double const* const* matrixA, |
107 | 126 | double const* const* matrixB, |
@@ -128,6 +147,19 @@ public: | ||
128 | 147 | double beta, |
129 | 148 | double** matrixD, |
130 | 149 | double** tmpMatrixBC) const; |
150 | + void Dgemmm(bool isColumnMajorMatrixA, | |
151 | + bool isColumnMajorMatrixB, | |
152 | + bool isColumnMajorMatrixC, | |
153 | + molds_blas_int m, molds_blas_int n, molds_blas_int k, molds_blas_int l, | |
154 | + double alpha, | |
155 | + double const* const* matrixA, | |
156 | + double const* const* matrixB, | |
157 | + double const* const* matrixC, | |
158 | + double beta, | |
159 | + double** matrixD, | |
160 | + double* tmpVectorD, | |
161 | + double** tmpMatrixBC, | |
162 | + double* tmpVectorBC) const; | |
131 | 163 | void Dsyrk(molds_blas_int n, molds_blas_int k, |
132 | 164 | double const *const* matrixA, |
133 | 165 | double** matrixC)const; |
@@ -3692,8 +3692,10 @@ void ZindoS::CalcForce(const vector<int>& elecStates){ | ||
3692 | 3692 | double** tmpRotMat = NULL; // rotating Matrix from the diatomic frame to space fixed frame. |
3693 | 3693 | double** tmpRotMat1stDeriv = NULL; |
3694 | 3694 | double*** tmpRotMat1stDerivs = NULL; // first derivatives of the rotMat. |
3695 | - double** tmpRotatedDiatomicOverlap = NULL; | |
3696 | - double** tmpMatrix = NULL; | |
3695 | + double** tmpRotatedDiatomicOverlap = NULL; // used in dgemmm | |
3696 | + double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm | |
3697 | + double** tmpMatrixBC = NULL; // used in dgemmm | |
3698 | + double* tmpVectorBC = NULL; // used in dgemmm | |
3697 | 3699 | try{ |
3698 | 3700 | MallocTempMatricesCalcForce(&diatomicOverlapAOs1stDerivs, |
3699 | 3701 | &diatomicTwoElecTwoCore1stDerivs, |
@@ -3703,7 +3705,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){ | ||
3703 | 3705 | &tmpRotMat1stDeriv, |
3704 | 3706 | &tmpRotMat1stDerivs, |
3705 | 3707 | &tmpRotatedDiatomicOverlap, |
3706 | - &tmpMatrix); | |
3708 | + &tmpRotatedDiatomicOverlapVec, | |
3709 | + &tmpMatrixBC, | |
3710 | + &tmpVectorBC); | |
3707 | 3711 | #pragma omp for schedule(auto) |
3708 | 3712 | for(int b=0; b<this->molecule->GetNumberAtoms(); b++){ |
3709 | 3713 | if(a == b){continue;} |
@@ -3720,7 +3724,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){ | ||
3720 | 3724 | tmpRotMat1stDeriv, |
3721 | 3725 | tmpRotMat1stDerivs, |
3722 | 3726 | tmpRotatedDiatomicOverlap, |
3723 | - tmpMatrix, | |
3727 | + tmpRotatedDiatomicOverlapVec, | |
3728 | + tmpMatrixBC, | |
3729 | + tmpVectorBC, | |
3724 | 3730 | atomA, |
3725 | 3731 | atomB); |
3726 | 3732 |
@@ -3842,7 +3848,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){ | ||
3842 | 3848 | &tmpRotMat1stDeriv, |
3843 | 3849 | &tmpRotMat1stDerivs, |
3844 | 3850 | &tmpRotatedDiatomicOverlap, |
3845 | - &tmpMatrix); | |
3851 | + &tmpRotatedDiatomicOverlapVec, | |
3852 | + &tmpMatrixBC, | |
3853 | + &tmpVectorBC); | |
3846 | 3854 | } //end of omp-parallelized region |
3847 | 3855 | // Exception throwing for omp-region |
3848 | 3856 | if(!ompErrors.str().empty()){ |
@@ -3945,7 +3953,9 @@ void ZindoS::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
3945 | 3953 | double*** tmpRotMat1stDeriv, |
3946 | 3954 | double**** tmpRotMat1stDerivs, |
3947 | 3955 | double*** tmpRotatedDiatomicOverlap, |
3948 | - double*** tmpMatrix) const{ | |
3956 | + double** tmpRotatedDiatomicOverlapVec, | |
3957 | + double*** tmpMatrixBC, | |
3958 | + double** tmpVectorBC) const{ | |
3949 | 3959 | MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3950 | 3960 | MallocerFreer::GetInstance()->Malloc<double>(diatomicTwoElecTwoCore1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3951 | 3961 | MallocerFreer::GetInstance()->Malloc<double>(tmpDiaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end); |
@@ -3954,7 +3964,9 @@ void ZindoS::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
3954 | 3964 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
3955 | 3965 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3956 | 3966 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
3957 | - MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, OrbitalType_end, OrbitalType_end); | |
3967 | + MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
3968 | + MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC, OrbitalType_end, OrbitalType_end); | |
3969 | + MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
3958 | 3970 | } |
3959 | 3971 | |
3960 | 3972 | void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, |
@@ -3965,7 +3977,9 @@ void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
3965 | 3977 | double*** tmpRotMat1stDeriv, |
3966 | 3978 | double**** tmpRotMat1stDerivs, |
3967 | 3979 | double*** tmpRotatedDiatomicOverlap, |
3968 | - double*** tmpMatrix) const{ | |
3980 | + double** tmpRotatedDiatomicOverlapVec, | |
3981 | + double*** tmpMatrixBC, | |
3982 | + double** tmpVectorBC) const{ | |
3969 | 3983 | MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3970 | 3984 | MallocerFreer::GetInstance()->Free<double>(diatomicTwoElecTwoCore1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3971 | 3985 | MallocerFreer::GetInstance()->Free<double>(tmpDiaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end); |
@@ -3974,7 +3988,9 @@ void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
3974 | 3988 | MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
3975 | 3989 | MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3976 | 3990 | MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
3977 | - MallocerFreer::GetInstance()->Free<double>(tmpMatrix, OrbitalType_end, OrbitalType_end); | |
3991 | + MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
3992 | + MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC, OrbitalType_end, OrbitalType_end); | |
3993 | + MallocerFreer::GetInstance()->Free<double>(tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
3978 | 3994 | } |
3979 | 3995 | |
3980 | 3996 | void ZindoS::CalcForceExcitedStaticPart(double* force, |
@@ -279,7 +279,9 @@ private: | ||
279 | 279 | double*** tmpRotMat1stDeriv, |
280 | 280 | double**** tmpRotMat1stDerivs, |
281 | 281 | double*** tmpRotatedDiatomicOverlap, |
282 | - double*** tmpMatrix) const; | |
282 | + double** tmpRotatedDiatomicOverlapVec, | |
283 | + double*** tmpMatrixBC, | |
284 | + double** tmpVectorBC) const; | |
283 | 285 | void FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, |
284 | 286 | double**** diatomicTwoElecTwoCore1stDerivs, |
285 | 287 | double*** tmpDiaOverlapAOsInDiaFrame, |
@@ -288,7 +290,9 @@ private: | ||
288 | 290 | double*** tmpRotMat1stDeriv, |
289 | 291 | double**** tmpRotMat1stDerivs, |
290 | 292 | double*** tmpRotatedDiatomicOverlap, |
291 | - double*** tmpMatrix) const; | |
293 | + double** tmpRotatedDiatomicOverlapVec, | |
294 | + double*** tmpMatrixBC, | |
295 | + double** tmpVectorBC) const; | |
292 | 296 | void CalcForceExcitedStaticPart(double* force, |
293 | 297 | int elecStateIndex, |
294 | 298 | int indexAtomA, |