• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revisiondcb26e32571feb61101d55b77dfcbbd4dc85b164 (tree)
Time2013-03-09 06:34:46
AuthorMikiya Fujii <mikiya.fujii@gmai...>
CommiterMikiya Fujii

Log Message

speed up of calculation of forces on the GS #30829

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1309 1136aad2-a195-0410-b898-f5ea1d11b9d8

Change Summary

Incremental Difference

--- a/src/cndo/Cndo2.cpp
+++ b/src/cndo/Cndo2.cpp
@@ -3702,22 +3702,76 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
37023702 pow(cartesian[YAxis],2.0) +
37033703 pow(cartesian[ZAxis],2.0) );
37043704
3705- double** diaOverlapAOsInDiaFrame = NULL; // diatomic overlapAOs in diatomic frame
3706- double** diaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms.
3705+ double** diaOverlapAOsInDiaFrame = NULL; // diatomic overlapAOs in diatomic frame
3706+ double** diaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms.
37073707 double** rotMat = NULL; // rotating Matrix from the diatomic frame to space fixed frame.
37083708 double*** rotMat1stDerivs = NULL; // first derivatives of the rotMat.
3709+ double** tmpRotMat1stDeriv = NULL;
3710+ double** tmpMatrix = NULL;
37093711
37103712 try{
37113713 this->MallocDiatomicOverlapAOs1stDeriTemps(&diaOverlapAOsInDiaFrame,
37123714 &diaOverlapAOs1stDerivInDiaFrame,
37133715 &rotMat,
3714- &rotMat1stDerivs);
3716+ &rotMat1stDerivs,
3717+ &tmpRotMat1stDeriv,
3718+ &tmpMatrix);
37153719 this->CalcDiatomicOverlapAOsInDiatomicFrame(diaOverlapAOsInDiaFrame, atomA, atomB);
37163720 this->CalcDiatomicOverlapAOs1stDerivativeInDiatomicFrame(diaOverlapAOs1stDerivInDiaFrame, atomA, atomB);
37173721 this->CalcRotatingMatrix(rotMat, atomA, atomB);
37183722 this->CalcRotatingMatrix1stDerivatives(rotMat1stDerivs, atomA, atomB);
37193723
3720- // rotate
3724+ // rotate (fast algorithm, see also slow algorithm shown later)
3725+ int incrementOne = 1;
3726+ bool isColumnMajorRotatingMatrix = false;
3727+ bool isColumnMajorDiaOverlapAOs = false;
3728+ double alpha = 0.0;
3729+ double beta = 0.0;
3730+ for(int c=0; c<CartesianType_end; c++){
3731+ MolDS_wrappers::Blas::GetInstance()->Dcopy(OrbitalType_end*OrbitalType_end,
3732+ &rotMat1stDerivs[0][0][c], CartesianType_end,
3733+ &tmpRotMat1stDeriv[0][0], incrementOne);
3734+ alpha = cartesian[c]/R;
3735+ beta = 0.0;
3736+ MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix,
3737+ isColumnMajorDiaOverlapAOs,
3738+ !isColumnMajorRotatingMatrix,
3739+ OrbitalType_end, OrbitalType_end, OrbitalType_end, OrbitalType_end,
3740+ alpha,
3741+ rotMat,
3742+ diaOverlapAOs1stDerivInDiaFrame,
3743+ rotMat,
3744+ beta,
3745+ tmpMatrix);
3746+ alpha = 1.0;
3747+ beta = 1.0;
3748+ MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix,
3749+ isColumnMajorDiaOverlapAOs,
3750+ !isColumnMajorRotatingMatrix,
3751+ OrbitalType_end, OrbitalType_end, OrbitalType_end, OrbitalType_end,
3752+ alpha,
3753+ tmpRotMat1stDeriv,
3754+ diaOverlapAOsInDiaFrame,
3755+ rotMat,
3756+ beta,
3757+ tmpMatrix);
3758+ MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix,
3759+ isColumnMajorDiaOverlapAOs,
3760+ !isColumnMajorRotatingMatrix,
3761+ OrbitalType_end, OrbitalType_end, OrbitalType_end, OrbitalType_end,
3762+ alpha,
3763+ rotMat,
3764+ diaOverlapAOsInDiaFrame,
3765+ tmpRotMat1stDeriv,
3766+ beta,
3767+ tmpMatrix);
3768+ MolDS_wrappers::Blas::GetInstance()->Dcopy(OrbitalType_end*OrbitalType_end,
3769+ &tmpMatrix[0][0], incrementOne,
3770+ &diatomicOverlapAOs1stDerivs[0][0][c], CartesianType_end);
3771+ }
3772+
3773+ /*
3774+ // rotate (slow)
37213775 for(int i=0; i<OrbitalType_end; i++){
37223776 for(int j=0; j<OrbitalType_end; j++){
37233777 for(int c=0; c<CartesianType_end; c++){
@@ -3744,19 +3798,25 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st
37443798 }
37453799 }
37463800 }
3801+ */
3802+
37473803 }
37483804 catch(MolDSException ex){
37493805 this->FreeDiatomicOverlapAOs1stDeriTemps(&diaOverlapAOsInDiaFrame,
37503806 &diaOverlapAOs1stDerivInDiaFrame,
37513807 &rotMat,
3752- &rotMat1stDerivs);
3808+ &rotMat1stDerivs,
3809+ &tmpRotMat1stDeriv,
3810+ &tmpMatrix);
37533811 throw ex;
37543812 }
37553813 // free
37563814 this->FreeDiatomicOverlapAOs1stDeriTemps(&diaOverlapAOsInDiaFrame,
37573815 &diaOverlapAOs1stDerivInDiaFrame,
37583816 &rotMat,
3759- &rotMat1stDerivs);
3817+ &rotMat1stDerivs,
3818+ &tmpRotMat1stDeriv,
3819+ &tmpMatrix);
37603820 }
37613821
37623822 void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1stDerivs,
@@ -3944,14 +4004,18 @@ double Cndo2::Get2ndDerivativeElementFromDistanceDerivatives(double firstDistanc
39444004 return value;
39454005 }
39464006
3947-void Cndo2::MallocDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame,
3948- double*** diaOverlapAOs1stDerivInDiaFrame,
3949- double*** rotMat,
3950- double**** rotMat1stDerivs) const{
4007+void Cndo2::MallocDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame,
4008+ double*** diaOverlapAOs1stDerivInDiaFrame,
4009+ double*** rotMat,
4010+ double**** rotMat1stDerivs,
4011+ double*** tmpRotMat1stDeriv,
4012+ double*** tmpMatrix) const{
39514013 MallocerFreer::GetInstance()->Malloc<double>(diaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end);
39524014 MallocerFreer::GetInstance()->Malloc<double>(diaOverlapAOs1stDerivInDiaFrame, OrbitalType_end, OrbitalType_end);
39534015 MallocerFreer::GetInstance()->Malloc<double>(rotMat, OrbitalType_end, OrbitalType_end);
39544016 MallocerFreer::GetInstance()->Malloc<double>(rotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
4017+ MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, OrbitalType_end, OrbitalType_end);
4018+ MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
39554019 }
39564020
39574021 void Cndo2::MallocDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFrame,
@@ -3975,11 +4039,15 @@ void Cndo2::MallocDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFra
39754039 void Cndo2::FreeDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame,
39764040 double*** diaOverlapAOs1stDerivInDiaFrame,
39774041 double*** rotMat,
3978- double**** rotMat1stDerivs) const{
4042+ double**** rotMat1stDerivs,
4043+ double*** tmpRotMat1stDeriv,
4044+ double*** tmpMatrix) const{
39794045 MallocerFreer::GetInstance()->Free<double>(diaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end);
39804046 MallocerFreer::GetInstance()->Free<double>(diaOverlapAOs1stDerivInDiaFrame, OrbitalType_end, OrbitalType_end);
39814047 MallocerFreer::GetInstance()->Free<double>(rotMat, OrbitalType_end, OrbitalType_end);
39824048 MallocerFreer::GetInstance()->Free<double>(rotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end);
4049+ MallocerFreer::GetInstance()->Free<double>(tmpMatrix, OrbitalType_end, OrbitalType_end);
4050+ MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end);
39834051 }
39844052
39854053 void Cndo2::FreeDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFrame,
--- a/src/cndo/Cndo2.h
+++ b/src/cndo/Cndo2.h
@@ -462,7 +462,9 @@ private:
462462 void MallocDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame,
463463 double*** diaOverlapAOs1stDerivInDiaFrame,
464464 double*** rotMat,
465- double**** rotMat1stDerivs) const;
465+ double**** rotMat1stDerivs,
466+ double*** tmpRotMat1stDeriv,
467+ double*** tmpMatrix) const;
466468 void MallocDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFrame,
467469 double*** diaOverlapAOs1stDerivInDiaFrame,
468470 double*** diaOverlapAOs2ndDerivInDiaFrame,
@@ -474,7 +476,9 @@ private:
474476 void FreeDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame,
475477 double*** diaOverlapAOs1stDerivInDiaFrame,
476478 double*** rotMat,
477- double**** rotMat1stDerivs) const;
479+ double**** rotMat1stDerivs,
480+ double*** tmpRotMat1stDeriv,
481+ double*** tmpMatrix) const;
478482 void FreeDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFrame,
479483 double*** diaOverlapAOs1stDerivInDiaFrame,
480484 double*** diaOverlapAOs2ndDerivInDiaFrame,
--- a/src/md/MD.cpp
+++ b/src/md/MD.cpp
@@ -97,19 +97,9 @@ void MD::DoMD(){
9797 for(int s=0; s<totalSteps; s++){
9898 this->OutputLog(boost::format("%s%d\n") % this->messageStartStepMD.c_str() % (s+1) );
9999
100- // update momenta
101- this->UpdateMomenta(*this->molecule, matrixForce, dt);
102-
103- // update coordinates
104- for(int a=0; a<this->molecule->GetNumberAtoms(); a++){
105- Atom* atom = this->molecule->GetAtom(a);
106- double coreMass = atom->GetAtomicMass() - static_cast<double>(atom->GetNumberValenceElectrons());
107- for(int i=0; i<CartesianType_end; i++){
108- atom->GetXyz()[i] += dt*atom->GetPxyz()[i]/coreMass;
109- }
110- }
111- this->molecule->CalcXyzCOM();
112- this->molecule->CalcXyzCOC();
100+ // update momenta & coordinates
101+ this->UpdateMomenta (*this->molecule, matrixForce, dt);
102+ this->UpdateCoordinates(*this->molecule, dt);
113103
114104 // update electronic structure
115105 electronicStructure->DoSCF(requireGuess);
@@ -138,6 +128,7 @@ void MD::DoMD(){
138128 }
139129
140130 void MD::UpdateMomenta(const Molecule& molecule, double const* const* matrixForce, double dt) const{
131+#pragma omp parallel for schedule(auto)
141132 for(int a=0; a<molecule.GetNumberAtoms(); a++){
142133 Atom* atom = molecule.GetAtom(a);
143134 for(int i=0; i<CartesianType_end; i++){
@@ -146,6 +137,19 @@ void MD::UpdateMomenta(const Molecule& molecule, double const* const* matrixForc
146137 }
147138 }
148139
140+void MD::UpdateCoordinates(Molecule& molecule, double dt) const{
141+#pragma omp parallel for schedule(auto)
142+ for(int a=0; a<molecule.GetNumberAtoms(); a++){
143+ Atom* atom = molecule.GetAtom(a);
144+ double coreMass = atom->GetAtomicMass() - static_cast<double>(atom->GetNumberValenceElectrons());
145+ for(int i=0; i<CartesianType_end; i++){
146+ atom->GetXyz()[i] += dt*atom->GetPxyz()[i]/coreMass;
147+ }
148+ }
149+ molecule.CalcXyzCOM();
150+ molecule.CalcXyzCOC();
151+}
152+
149153 void MD::SetMessages(){
150154 this->errorMessageTheoryType = "\ttheory type = ";
151155 this->errorMessageNotEnebleTheoryType
--- a/src/md/MD.h
+++ b/src/md/MD.h
@@ -52,7 +52,8 @@ private:
5252 void CheckEnableTheoryType(MolDS_base::TheoryType theoryType);
5353 void SetMessages();
5454 void SetEnableTheoryTypes();
55- void UpdateMomenta(const MolDS_base::Molecule& molecule, double const* const* matrixForce, double dt) const;
55+ void UpdateMomenta (const MolDS_base::Molecule& molecule, double const* const* matrixForce, double dt) const;
56+ void UpdateCoordinates( MolDS_base::Molecule& molecule, double dt) const;
5657 void OutputEnergies(boost::shared_ptr<MolDS_base::ElectronicStructure> electronicStructure, double initialEnergy);
5758 double OutputEnergies(boost::shared_ptr<MolDS_base::ElectronicStructure> electronicStructure);
5859 };
--- a/src/mndo/Mndo.cpp
+++ b/src/mndo/Mndo.cpp
@@ -3935,40 +3935,46 @@ void Mndo::RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix,
39353935 double** twiceRotatingMatrix = NULL;
39363936 double** ptrOldMatrix = NULL;
39373937 double** ptrMatrix = NULL;
3938- MallocerFreer::GetInstance()->Malloc<double>(&twiceRotatingMatrix, dxy*dxy, dxy*dxy);
3939- MallocerFreer::GetInstance()->Malloc<double*>(&ptrOldMatrix, dxy*dxy);
3940- MallocerFreer::GetInstance()->Malloc<double*>(&ptrMatrix, dxy*dxy);
3941- for(int mu=0; mu<dxy; mu++){
3942- for(int nu=0; nu<dxy; nu++){
3943- int i=mu*dxy+nu;
3944- for(int lambda=0; lambda<dxy; lambda++){
3945- for(int sigma=0; sigma<dxy; sigma++){
3946- int j=lambda*dxy+sigma;
3947- twiceRotatingMatrix[i][j] = rotatingMatrix[mu][lambda]*rotatingMatrix[nu][sigma];
3938+ try{
3939+ MallocTempMatricesRotateDiatomicTwoElecTwoCore(&twiceRotatingMatrix,
3940+ &ptrOldMatrix,
3941+ &ptrMatrix);
3942+ for(int mu=0; mu<dxy; mu++){
3943+ for(int nu=0; nu<dxy; nu++){
3944+ int i=mu*dxy+nu;
3945+ for(int lambda=0; lambda<dxy; lambda++){
3946+ for(int sigma=0; sigma<dxy; sigma++){
3947+ int j=lambda*dxy+sigma;
3948+ twiceRotatingMatrix[i][j] = rotatingMatrix[mu][lambda]*rotatingMatrix[nu][sigma];
3949+ }
39483950 }
3951+ ptrOldMatrix[i] = &oldMatrix[mu][nu][0][0];
3952+ ptrMatrix [i] = &matrix [mu][nu][0][0];
39493953 }
3950- ptrOldMatrix[i] = &oldMatrix[mu][nu][0][0];
3951- ptrMatrix [i] = &matrix [mu][nu][0][0];
39523954 }
3955+ bool isColumnMajorTwiceRotatingMatrix = false;
3956+ bool isColumnMajorPtrOldMatrix = false;
3957+ double alpha = 1.0;
3958+ double beta = 0.0;
3959+ MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
3960+ isColumnMajorPtrOldMatrix,
3961+ !isColumnMajorTwiceRotatingMatrix,
3962+ dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy,
3963+ alpha,
3964+ twiceRotatingMatrix,
3965+ ptrOldMatrix,
3966+ twiceRotatingMatrix,
3967+ beta,
3968+ ptrMatrix);
39533969 }
3954- bool isColumnMajorTwiceRotatingMatrix = false;
3955- bool isColumnMajorPtrOldMatrix = false;
3956- double alpha = 1.0;
3957- double beta = 0.0;
3958- MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
3959- isColumnMajorPtrOldMatrix,
3960- !isColumnMajorTwiceRotatingMatrix,
3961- dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy,
3962- alpha,
3963- twiceRotatingMatrix,
3964- ptrOldMatrix,
3965- twiceRotatingMatrix,
3966- beta,
3967- ptrMatrix);
3968- MallocerFreer::GetInstance()->Free<double>(&twiceRotatingMatrix, dxy*dxy, dxy*dxy);
3969- MallocerFreer::GetInstance()->Free<double*>(&ptrOldMatrix, dxy*dxy);
3970- MallocerFreer::GetInstance()->Free<double*>(&ptrMatrix, dxy*dxy);
3971-
3970+ catch(MolDSException ex){
3971+ FreeTempMatricesRotateDiatomicTwoElecTwoCore(&twiceRotatingMatrix,
3972+ &ptrOldMatrix,
3973+ &ptrMatrix);
3974+ }
3975+ FreeTempMatricesRotateDiatomicTwoElecTwoCore(&twiceRotatingMatrix,
3976+ &ptrOldMatrix,
3977+ &ptrMatrix);
39723978 /*
39733979 // rotate (slow algorithm)
39743980 for(int mu=0; mu<dxy; mu++){
@@ -3996,86 +4002,153 @@ void Mndo::RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix,
39964002 */
39974003 }
39984004
4005+void Mndo::MallocTempMatricesRotateDiatomicTwoElecTwoCore(double*** twiceRotatingMatrix,
4006+ double*** ptrOldMatrix,
4007+ double*** ptrMatrix) const{
4008+ MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy);
4009+ MallocerFreer::GetInstance()->Malloc<double*>(ptrOldMatrix, dxy*dxy);
4010+ MallocerFreer::GetInstance()->Malloc<double*>(ptrMatrix, dxy*dxy);
4011+}
4012+
4013+void Mndo::FreeTempMatricesRotateDiatomicTwoElecTwoCore(double*** twiceRotatingMatrix,
4014+ double*** ptrOldMatrix,
4015+ double*** ptrMatrix) const{
4016+ MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy);
4017+ MallocerFreer::GetInstance()->Free<double*>(ptrOldMatrix, dxy*dxy);
4018+ MallocerFreer::GetInstance()->Free<double*>(ptrMatrix, dxy*dxy);
4019+}
4020+
39994021 // Rotate 5-dimensional matrix from diatomic frame to space frame
40004022 // Note tha in this method d-orbitals can not be treatable.
40014023 void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
40024024 double***** matrix,
4003- double const* const* const* const* diatomicTwoElecTwoCore,
4025+ double const* const*const* const* diatomicTwoElecTwoCore,
40044026 double const* const* rotatingMatrix,
40054027 double const* const* const* rotMat1stDerivatives) const{
4006- double oldMatrix[dxy][dxy][dxy][dxy][CartesianType_end];
4007- for(int mu=0; mu<dxy; mu++){
4008- for(int nu=0; nu<dxy; nu++){
4009- for(int lambda=0; lambda<dxy; lambda++){
4010- for(int sigma=0; sigma<dxy; sigma++){
4011- for(int c=0; c<CartesianType_end; c++){
4012- oldMatrix[mu][nu][lambda][sigma][c] = matrix[mu][nu][lambda][sigma][c];
4028+
4029+ // rotate (fast algorithm, see also slow algorithm shown later)
4030+ int incrementOne = 1;
4031+ bool isColumnMajorTwiceRotatingMatrix = false;
4032+ bool isColumnMajorOldMatrix = false;
4033+ double alpha;
4034+ double beta;
4035+ double** twiceRotatingMatrix = NULL;
4036+ double** twiceRotatingMatrixDerivA = NULL;
4037+ double** twiceRotatingMatrixDerivB = NULL;
4038+ double** oldMatrix = NULL;
4039+ double** tmpMatrix = NULL;
4040+ double** ptrDiatomic = NULL;
4041+ try{
4042+ this->MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(&twiceRotatingMatrix,
4043+ &twiceRotatingMatrixDerivA,
4044+ &twiceRotatingMatrixDerivB,
4045+ &oldMatrix,
4046+ &tmpMatrix,
4047+ &ptrDiatomic);
4048+ for(int mu=0; mu<dxy; mu++){
4049+ for(int nu=0; nu<dxy; nu++){
4050+ int i=mu*dxy+nu;
4051+ for(int lambda=0; lambda<dxy; lambda++){
4052+ for(int sigma=0; sigma<dxy; sigma++){
4053+ int j=lambda*dxy+sigma;
4054+ twiceRotatingMatrix[i][j] = rotatingMatrix[mu][lambda]
4055+ *rotatingMatrix[nu][sigma ];
40134056 }
40144057 }
4058+ ptrDiatomic[i] = const_cast<double*>(&diatomicTwoElecTwoCore[mu][nu][0][0]);
40154059 }
40164060 }
4017- }
4018-
4019- // rotate (fast algorithm, see also slow algorithm shown later)
4020- for(int mu=0; mu<dxy; mu++){
4021- for(int nu=mu; nu<dxy; nu++){
4022- for(int lambda=0; lambda<dxy; lambda++){
4023- for(int sigma=lambda; sigma<dxy; sigma++){
4024- for(int c=0; c<CartesianType_end; c++){
4025-
4026- double value=0.0;
4027- for(int i=0; i<dxy; i++){
4028- double tempI_1 = 0.0;
4029- double tempI_2 = 0.0;
4030- double tempI_3 = 0.0;
4031- double tempI_4 = 0.0;
4032- double tempI_5 = 0.0;
4033- for(int j=0; j<dxy; j++){
4034- double tempIJ_1 = 0.0;
4035- double tempIJ_2 = 0.0;
4036- double tempIJ_3 = 0.0;
4037- double tempIJ_4 = 0.0;
4038- double tempIJ_5 = 0.0;
4039- for(int k=0; k<dxy; k++){
4040- double tempIJK_1 = 0.0;
4041- double tempIJK_2 = 0.0;
4042- double tempIJK_3 = 0.0;
4043- double tempIJK_4 = 0.0;
4044- double tempIJK_5 = 0.0;
4045- for(int l=0; l<dxy; l++){
4046- tempIJK_1 += oldMatrix[i][j][k][l][c]*rotatingMatrix[sigma][l];
4047- tempIJK_2 += diatomicTwoElecTwoCore[i][j][k][l]*rotatingMatrix[sigma][l];
4048- tempIJK_3 += diatomicTwoElecTwoCore[i][j][k][l]*rotatingMatrix[sigma][l];
4049- tempIJK_4 += diatomicTwoElecTwoCore[i][j][k][l]*rotatingMatrix[sigma][l];
4050- tempIJK_5 += diatomicTwoElecTwoCore[i][j][k][l]*rotMat1stDerivatives[sigma][l][c];
4051- }
4052- tempIJ_1 += tempIJK_1*rotatingMatrix[lambda][k];
4053- tempIJ_2 += tempIJK_2*rotatingMatrix[lambda][k];
4054- tempIJ_3 += tempIJK_3*rotatingMatrix[lambda][k];
4055- tempIJ_4 += tempIJK_4*rotMat1stDerivatives[lambda][k][c];
4056- tempIJ_5 += tempIJK_5*rotatingMatrix[lambda][k];
4057- }
4058- tempI_1 += tempIJ_1*rotatingMatrix[nu][j];
4059- tempI_2 += tempIJ_2*rotatingMatrix[nu][j];
4060- tempI_3 += tempIJ_3*rotMat1stDerivatives[nu][j][c];
4061- tempI_4 += tempIJ_4*rotatingMatrix[nu][j];
4062- tempI_5 += tempIJ_5*rotatingMatrix[nu][j];
4063- }
4064- value += tempI_1*rotatingMatrix[mu][i];
4065- value += tempI_2*rotMat1stDerivatives[mu][i][c];
4066- value += tempI_3*rotatingMatrix[mu][i];
4067- value += tempI_4*rotatingMatrix[mu][i];
4068- value += tempI_5*rotatingMatrix[mu][i];
4061+ for(int axis=0; axis<CartesianType_end; axis++){
4062+ for(int mu=0; mu<dxy; mu++){
4063+ for(int nu=0; nu<dxy; nu++){
4064+ int i=mu*dxy+nu;
4065+ for(int lambda=0; lambda<dxy; lambda++){
4066+ for(int sigma=0; sigma<dxy; sigma++){
4067+ int j=lambda*dxy+sigma;
4068+ twiceRotatingMatrixDerivA[i][j] = rotMat1stDerivatives[mu][lambda][axis]
4069+ *rotatingMatrix [nu][sigma ];
4070+ twiceRotatingMatrixDerivB[i][j] = rotatingMatrix [mu][lambda]
4071+ *rotMat1stDerivatives[nu][sigma ][axis];
4072+ oldMatrix[i][j] = matrix[mu][nu][lambda][sigma][axis];
40694073 }
4070- matrix[mu][nu][lambda][sigma][c] = value;
4071- matrix[mu][nu][sigma][lambda][c] = value;
4072- matrix[nu][mu][lambda][sigma][c] = value;
4073- matrix[nu][mu][sigma][lambda][c] = value;
40744074 }
40754075 }
40764076 }
4077+ alpha = 1.0;
4078+ beta = 0.0;
4079+ MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
4080+ isColumnMajorOldMatrix,
4081+ !isColumnMajorTwiceRotatingMatrix,
4082+ dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy,
4083+ alpha,
4084+ twiceRotatingMatrix,
4085+ oldMatrix,
4086+ twiceRotatingMatrix,
4087+ beta,
4088+ tmpMatrix);
4089+ alpha = 1.0;
4090+ beta = 1.0;
4091+ MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
4092+ isColumnMajorOldMatrix,
4093+ !isColumnMajorTwiceRotatingMatrix,
4094+ dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy,
4095+ alpha,
4096+ twiceRotatingMatrixDerivA,
4097+ ptrDiatomic,
4098+ twiceRotatingMatrix,
4099+ beta,
4100+ tmpMatrix);
4101+ MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
4102+ isColumnMajorOldMatrix,
4103+ !isColumnMajorTwiceRotatingMatrix,
4104+ dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy,
4105+ alpha,
4106+ twiceRotatingMatrixDerivB,
4107+ ptrDiatomic,
4108+ twiceRotatingMatrix,
4109+ beta,
4110+ tmpMatrix);
4111+ MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
4112+ isColumnMajorOldMatrix,
4113+ !isColumnMajorTwiceRotatingMatrix,
4114+ dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy,
4115+ alpha,
4116+ twiceRotatingMatrix,
4117+ ptrDiatomic,
4118+ twiceRotatingMatrixDerivA,
4119+ beta,
4120+ tmpMatrix);
4121+ MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix,
4122+ isColumnMajorOldMatrix,
4123+ !isColumnMajorTwiceRotatingMatrix,
4124+ dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy,
4125+ alpha,
4126+ twiceRotatingMatrix,
4127+ ptrDiatomic,
4128+ twiceRotatingMatrixDerivB,
4129+ beta,
4130+ tmpMatrix);
4131+
4132+ MolDS_wrappers::Blas::GetInstance()->Dcopy(dxy*dxy*dxy*dxy,
4133+ &tmpMatrix[0][0] , incrementOne,
4134+ &matrix[0][0][0][0][axis], CartesianType_end);
40774135 }
40784136 }
4137+ catch(MolDSException ex){
4138+ this->FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(&twiceRotatingMatrix,
4139+ &twiceRotatingMatrixDerivA,
4140+ &twiceRotatingMatrixDerivB,
4141+ &oldMatrix,
4142+ &tmpMatrix,
4143+ &ptrDiatomic);
4144+ throw ex;
4145+ }
4146+ this->FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(&twiceRotatingMatrix,
4147+ &twiceRotatingMatrixDerivA,
4148+ &twiceRotatingMatrixDerivB,
4149+ &oldMatrix,
4150+ &tmpMatrix,
4151+ &ptrDiatomic);
40794152
40804153 /*
40814154 // rotate (slow algorithm)
@@ -4131,6 +4204,34 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
41314204 */
41324205 }
41334206
4207+void Mndo::MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix,
4208+ double*** twiceRotatingMatrixDerivA,
4209+ double*** twiceRotatingMatrixDerivB,
4210+ double*** oldMatrix,
4211+ double*** tmpMatrix,
4212+ double*** ptrDiatomic) const{
4213+ MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy);
4214+ MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrixDerivA, dxy*dxy, dxy*dxy);
4215+ MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrixDerivB, dxy*dxy, dxy*dxy);
4216+ MallocerFreer::GetInstance()->Malloc<double>(oldMatrix, dxy*dxy, dxy*dxy);
4217+ MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, dxy*dxy, dxy*dxy);
4218+ MallocerFreer::GetInstance()->Malloc<double*>(ptrDiatomic, dxy*dxy);
4219+}
4220+
4221+void Mndo::FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix,
4222+ double*** twiceRotatingMatrixDerivA,
4223+ double*** twiceRotatingMatrixDerivB,
4224+ double*** oldMatrix,
4225+ double*** tmpMatrix,
4226+ double*** ptrDiatomic) const{
4227+ MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy);
4228+ MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrixDerivA, dxy*dxy, dxy*dxy);
4229+ MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrixDerivB, dxy*dxy, dxy*dxy);
4230+ MallocerFreer::GetInstance()->Free<double>(oldMatrix, dxy*dxy, dxy*dxy);
4231+ MallocerFreer::GetInstance()->Free<double>(tmpMatrix, dxy*dxy, dxy*dxy);
4232+ MallocerFreer::GetInstance()->Free<double*>(ptrDiatomic, dxy*dxy);
4233+}
4234+
41344235 // Rotate 6-dimensional matrix from diatomic frame to space frame
41354236 // Note tha in this method d-orbitals can not be treatable.
41364237 void Mndo::RotateDiatomicTwoElecTwoCore2ndDerivativesToSpaceFrame(
--- a/src/mndo/Mndo.h
+++ b/src/mndo/Mndo.h
@@ -360,19 +360,35 @@ private:
360360 double***** diatomicTwoElecTwoCore,
361361 double****** diatomicTwoElecTwoCore1stDerivatives) const;
362362 void RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix,
363- double const* const* rotatingMatrix) const;
364- void RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(
365- double***** matrix,
366- double const* const* const* const* diatomicTwoElecTwoCore,
367- double const* const* rotatingMatrix,
368- double const* const* const* rotMat1stDerivatives) const;
369- void RotateDiatomicTwoElecTwoCore2ndDerivativesToSpaceFrame(
370- double****** matrix,
371- double const* const* const* const* diatomicTwoElecTwoCore,
372- double const* const* const* const* const* diatomicTwoElecTwoCore1stDerivatives,
373- double const* const* rotatingMatrix,
374- double const* const* const* rotMat1stDerivatives,
375- double const* const* const* const* rotMat2ndDerivatives) const;
363+ double const* const* rotatingMatrix) const;
364+ void RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(double***** matrix,
365+ double const* const* const* const* diatomicTwoElecTwoCore,
366+ double const* const* rotatingMatrix,
367+ double const* const* const* rotMat1stDerivatives) const;
368+ void RotateDiatomicTwoElecTwoCore2ndDerivativesToSpaceFrame(double****** matrix,
369+ double const* const* const* const* diatomicTwoElecTwoCore,
370+ double const* const* const* const* const* diatomicTwoElecTwoCore1stDerivatives,
371+ double const* const* rotatingMatrix,
372+ double const* const* const* rotMat1stDerivatives,
373+ double const* const* const* const* rotMat2ndDerivatives) const;
374+ void MallocTempMatricesRotateDiatomicTwoElecTwoCore(double*** twiceRotatingMatrix,
375+ double*** ptrOldMatrix,
376+ double*** ptrMatrix) const;
377+ void FreeTempMatricesRotateDiatomicTwoElecTwoCore(double*** twiceRotatingMatrix,
378+ double*** ptrOldMatrix,
379+ double*** ptrMatrix) const;
380+ void MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix,
381+ double*** twiceRotatingMatrixDerivA,
382+ double*** twiceRotatingMatrixDerivB,
383+ double*** oldMatrix,
384+ double*** tmpMatrix,
385+ double*** ptrDiatomic) const;
386+ void FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix,
387+ double*** twiceRotatingMatrixDerivA,
388+ double*** twiceRotatingMatrixDerivB,
389+ double*** oldMatrix,
390+ double*** tmpMatrix,
391+ double*** ptrDiatomic) const;
376392 double GetNddoRepulsionIntegral(const MolDS_base_atoms::Atom& atomA,
377393 MolDS_base::OrbitalType mu,
378394 MolDS_base::OrbitalType nu,