Revision | dcb26e32571feb61101d55b77dfcbbd4dc85b164 (tree) |
---|---|
Time | 2013-03-09 06:34:46 |
Author | Mikiya Fujii <mikiya.fujii@gmai...> |
Commiter | Mikiya Fujii |
speed up of calculation of forces on the GS #30829
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1309 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -3702,22 +3702,76 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
3702 | 3702 | pow(cartesian[YAxis],2.0) + |
3703 | 3703 | pow(cartesian[ZAxis],2.0) ); |
3704 | 3704 | |
3705 | - double** diaOverlapAOsInDiaFrame = NULL; // diatomic overlapAOs in diatomic frame | |
3706 | - double** diaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms. | |
3705 | + double** diaOverlapAOsInDiaFrame = NULL; // diatomic overlapAOs in diatomic frame | |
3706 | + double** diaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms. | |
3707 | 3707 | double** rotMat = NULL; // rotating Matrix from the diatomic frame to space fixed frame. |
3708 | 3708 | double*** rotMat1stDerivs = NULL; // first derivatives of the rotMat. |
3709 | + double** tmpRotMat1stDeriv = NULL; | |
3710 | + double** tmpMatrix = NULL; | |
3709 | 3711 | |
3710 | 3712 | try{ |
3711 | 3713 | this->MallocDiatomicOverlapAOs1stDeriTemps(&diaOverlapAOsInDiaFrame, |
3712 | 3714 | &diaOverlapAOs1stDerivInDiaFrame, |
3713 | 3715 | &rotMat, |
3714 | - &rotMat1stDerivs); | |
3716 | + &rotMat1stDerivs, | |
3717 | + &tmpRotMat1stDeriv, | |
3718 | + &tmpMatrix); | |
3715 | 3719 | this->CalcDiatomicOverlapAOsInDiatomicFrame(diaOverlapAOsInDiaFrame, atomA, atomB); |
3716 | 3720 | this->CalcDiatomicOverlapAOs1stDerivativeInDiatomicFrame(diaOverlapAOs1stDerivInDiaFrame, atomA, atomB); |
3717 | 3721 | this->CalcRotatingMatrix(rotMat, atomA, atomB); |
3718 | 3722 | this->CalcRotatingMatrix1stDerivatives(rotMat1stDerivs, atomA, atomB); |
3719 | 3723 | |
3720 | - // rotate | |
3724 | + // rotate (fast algorithm, see also slow algorithm shown later) | |
3725 | + int incrementOne = 1; | |
3726 | + bool isColumnMajorRotatingMatrix = false; | |
3727 | + bool isColumnMajorDiaOverlapAOs = false; | |
3728 | + double alpha = 0.0; | |
3729 | + double beta = 0.0; | |
3730 | + for(int c=0; c<CartesianType_end; c++){ | |
3731 | + MolDS_wrappers::Blas::GetInstance()->Dcopy(OrbitalType_end*OrbitalType_end, | |
3732 | + &rotMat1stDerivs[0][0][c], CartesianType_end, | |
3733 | + &tmpRotMat1stDeriv[0][0], incrementOne); | |
3734 | + alpha = cartesian[c]/R; | |
3735 | + beta = 0.0; | |
3736 | + MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix, | |
3737 | + isColumnMajorDiaOverlapAOs, | |
3738 | + !isColumnMajorRotatingMatrix, | |
3739 | + OrbitalType_end, OrbitalType_end, OrbitalType_end, OrbitalType_end, | |
3740 | + alpha, | |
3741 | + rotMat, | |
3742 | + diaOverlapAOs1stDerivInDiaFrame, | |
3743 | + rotMat, | |
3744 | + beta, | |
3745 | + tmpMatrix); | |
3746 | + alpha = 1.0; | |
3747 | + beta = 1.0; | |
3748 | + MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix, | |
3749 | + isColumnMajorDiaOverlapAOs, | |
3750 | + !isColumnMajorRotatingMatrix, | |
3751 | + OrbitalType_end, OrbitalType_end, OrbitalType_end, OrbitalType_end, | |
3752 | + alpha, | |
3753 | + tmpRotMat1stDeriv, | |
3754 | + diaOverlapAOsInDiaFrame, | |
3755 | + rotMat, | |
3756 | + beta, | |
3757 | + tmpMatrix); | |
3758 | + MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix, | |
3759 | + isColumnMajorDiaOverlapAOs, | |
3760 | + !isColumnMajorRotatingMatrix, | |
3761 | + OrbitalType_end, OrbitalType_end, OrbitalType_end, OrbitalType_end, | |
3762 | + alpha, | |
3763 | + rotMat, | |
3764 | + diaOverlapAOsInDiaFrame, | |
3765 | + tmpRotMat1stDeriv, | |
3766 | + beta, | |
3767 | + tmpMatrix); | |
3768 | + MolDS_wrappers::Blas::GetInstance()->Dcopy(OrbitalType_end*OrbitalType_end, | |
3769 | + &tmpMatrix[0][0], incrementOne, | |
3770 | + &diatomicOverlapAOs1stDerivs[0][0][c], CartesianType_end); | |
3771 | + } | |
3772 | + | |
3773 | + /* | |
3774 | + // rotate (slow) | |
3721 | 3775 | for(int i=0; i<OrbitalType_end; i++){ |
3722 | 3776 | for(int j=0; j<OrbitalType_end; j++){ |
3723 | 3777 | for(int c=0; c<CartesianType_end; c++){ |
@@ -3744,19 +3798,25 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
3744 | 3798 | } |
3745 | 3799 | } |
3746 | 3800 | } |
3801 | + */ | |
3802 | + | |
3747 | 3803 | } |
3748 | 3804 | catch(MolDSException ex){ |
3749 | 3805 | this->FreeDiatomicOverlapAOs1stDeriTemps(&diaOverlapAOsInDiaFrame, |
3750 | 3806 | &diaOverlapAOs1stDerivInDiaFrame, |
3751 | 3807 | &rotMat, |
3752 | - &rotMat1stDerivs); | |
3808 | + &rotMat1stDerivs, | |
3809 | + &tmpRotMat1stDeriv, | |
3810 | + &tmpMatrix); | |
3753 | 3811 | throw ex; |
3754 | 3812 | } |
3755 | 3813 | // free |
3756 | 3814 | this->FreeDiatomicOverlapAOs1stDeriTemps(&diaOverlapAOsInDiaFrame, |
3757 | 3815 | &diaOverlapAOs1stDerivInDiaFrame, |
3758 | 3816 | &rotMat, |
3759 | - &rotMat1stDerivs); | |
3817 | + &rotMat1stDerivs, | |
3818 | + &tmpRotMat1stDeriv, | |
3819 | + &tmpMatrix); | |
3760 | 3820 | } |
3761 | 3821 | |
3762 | 3822 | void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1stDerivs, |
@@ -3944,14 +4004,18 @@ double Cndo2::Get2ndDerivativeElementFromDistanceDerivatives(double firstDistanc | ||
3944 | 4004 | return value; |
3945 | 4005 | } |
3946 | 4006 | |
3947 | -void Cndo2::MallocDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame, | |
3948 | - double*** diaOverlapAOs1stDerivInDiaFrame, | |
3949 | - double*** rotMat, | |
3950 | - double**** rotMat1stDerivs) const{ | |
4007 | +void Cndo2::MallocDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame, | |
4008 | + double*** diaOverlapAOs1stDerivInDiaFrame, | |
4009 | + double*** rotMat, | |
4010 | + double**** rotMat1stDerivs, | |
4011 | + double*** tmpRotMat1stDeriv, | |
4012 | + double*** tmpMatrix) const{ | |
3951 | 4013 | MallocerFreer::GetInstance()->Malloc<double>(diaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end); |
3952 | 4014 | MallocerFreer::GetInstance()->Malloc<double>(diaOverlapAOs1stDerivInDiaFrame, OrbitalType_end, OrbitalType_end); |
3953 | 4015 | MallocerFreer::GetInstance()->Malloc<double>(rotMat, OrbitalType_end, OrbitalType_end); |
3954 | 4016 | MallocerFreer::GetInstance()->Malloc<double>(rotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
4017 | + MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, OrbitalType_end, OrbitalType_end); | |
4018 | + MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); | |
3955 | 4019 | } |
3956 | 4020 | |
3957 | 4021 | void Cndo2::MallocDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFrame, |
@@ -3975,11 +4039,15 @@ void Cndo2::MallocDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFra | ||
3975 | 4039 | void Cndo2::FreeDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame, |
3976 | 4040 | double*** diaOverlapAOs1stDerivInDiaFrame, |
3977 | 4041 | double*** rotMat, |
3978 | - double**** rotMat1stDerivs) const{ | |
4042 | + double**** rotMat1stDerivs, | |
4043 | + double*** tmpRotMat1stDeriv, | |
4044 | + double*** tmpMatrix) const{ | |
3979 | 4045 | MallocerFreer::GetInstance()->Free<double>(diaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end); |
3980 | 4046 | MallocerFreer::GetInstance()->Free<double>(diaOverlapAOs1stDerivInDiaFrame, OrbitalType_end, OrbitalType_end); |
3981 | 4047 | MallocerFreer::GetInstance()->Free<double>(rotMat, OrbitalType_end, OrbitalType_end); |
3982 | 4048 | MallocerFreer::GetInstance()->Free<double>(rotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
4049 | + MallocerFreer::GetInstance()->Free<double>(tmpMatrix, OrbitalType_end, OrbitalType_end); | |
4050 | + MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); | |
3983 | 4051 | } |
3984 | 4052 | |
3985 | 4053 | void Cndo2::FreeDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFrame, |
@@ -462,7 +462,9 @@ private: | ||
462 | 462 | void MallocDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame, |
463 | 463 | double*** diaOverlapAOs1stDerivInDiaFrame, |
464 | 464 | double*** rotMat, |
465 | - double**** rotMat1stDerivs) const; | |
465 | + double**** rotMat1stDerivs, | |
466 | + double*** tmpRotMat1stDeriv, | |
467 | + double*** tmpMatrix) const; | |
466 | 468 | void MallocDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFrame, |
467 | 469 | double*** diaOverlapAOs1stDerivInDiaFrame, |
468 | 470 | double*** diaOverlapAOs2ndDerivInDiaFrame, |
@@ -474,7 +476,9 @@ private: | ||
474 | 476 | void FreeDiatomicOverlapAOs1stDeriTemps(double*** diaOverlapAOsInDiaFrame, |
475 | 477 | double*** diaOverlapAOs1stDerivInDiaFrame, |
476 | 478 | double*** rotMat, |
477 | - double**** rotMat1stDerivs) const; | |
479 | + double**** rotMat1stDerivs, | |
480 | + double*** tmpRotMat1stDeriv, | |
481 | + double*** tmpMatrix) const; | |
478 | 482 | void FreeDiatomicOverlapAOs2ndDeriTemps(double*** diaOverlapAOsInDiaFrame, |
479 | 483 | double*** diaOverlapAOs1stDerivInDiaFrame, |
480 | 484 | double*** diaOverlapAOs2ndDerivInDiaFrame, |
@@ -97,19 +97,9 @@ void MD::DoMD(){ | ||
97 | 97 | for(int s=0; s<totalSteps; s++){ |
98 | 98 | this->OutputLog(boost::format("%s%d\n") % this->messageStartStepMD.c_str() % (s+1) ); |
99 | 99 | |
100 | - // update momenta | |
101 | - this->UpdateMomenta(*this->molecule, matrixForce, dt); | |
102 | - | |
103 | - // update coordinates | |
104 | - for(int a=0; a<this->molecule->GetNumberAtoms(); a++){ | |
105 | - Atom* atom = this->molecule->GetAtom(a); | |
106 | - double coreMass = atom->GetAtomicMass() - static_cast<double>(atom->GetNumberValenceElectrons()); | |
107 | - for(int i=0; i<CartesianType_end; i++){ | |
108 | - atom->GetXyz()[i] += dt*atom->GetPxyz()[i]/coreMass; | |
109 | - } | |
110 | - } | |
111 | - this->molecule->CalcXyzCOM(); | |
112 | - this->molecule->CalcXyzCOC(); | |
100 | + // update momenta & coordinates | |
101 | + this->UpdateMomenta (*this->molecule, matrixForce, dt); | |
102 | + this->UpdateCoordinates(*this->molecule, dt); | |
113 | 103 | |
114 | 104 | // update electronic structure |
115 | 105 | electronicStructure->DoSCF(requireGuess); |
@@ -138,6 +128,7 @@ void MD::DoMD(){ | ||
138 | 128 | } |
139 | 129 | |
140 | 130 | void MD::UpdateMomenta(const Molecule& molecule, double const* const* matrixForce, double dt) const{ |
131 | +#pragma omp parallel for schedule(auto) | |
141 | 132 | for(int a=0; a<molecule.GetNumberAtoms(); a++){ |
142 | 133 | Atom* atom = molecule.GetAtom(a); |
143 | 134 | for(int i=0; i<CartesianType_end; i++){ |
@@ -146,6 +137,19 @@ void MD::UpdateMomenta(const Molecule& molecule, double const* const* matrixForc | ||
146 | 137 | } |
147 | 138 | } |
148 | 139 | |
140 | +void MD::UpdateCoordinates(Molecule& molecule, double dt) const{ | |
141 | +#pragma omp parallel for schedule(auto) | |
142 | + for(int a=0; a<molecule.GetNumberAtoms(); a++){ | |
143 | + Atom* atom = molecule.GetAtom(a); | |
144 | + double coreMass = atom->GetAtomicMass() - static_cast<double>(atom->GetNumberValenceElectrons()); | |
145 | + for(int i=0; i<CartesianType_end; i++){ | |
146 | + atom->GetXyz()[i] += dt*atom->GetPxyz()[i]/coreMass; | |
147 | + } | |
148 | + } | |
149 | + molecule.CalcXyzCOM(); | |
150 | + molecule.CalcXyzCOC(); | |
151 | +} | |
152 | + | |
149 | 153 | void MD::SetMessages(){ |
150 | 154 | this->errorMessageTheoryType = "\ttheory type = "; |
151 | 155 | this->errorMessageNotEnebleTheoryType |
@@ -52,7 +52,8 @@ private: | ||
52 | 52 | void CheckEnableTheoryType(MolDS_base::TheoryType theoryType); |
53 | 53 | void SetMessages(); |
54 | 54 | void SetEnableTheoryTypes(); |
55 | - void UpdateMomenta(const MolDS_base::Molecule& molecule, double const* const* matrixForce, double dt) const; | |
55 | + void UpdateMomenta (const MolDS_base::Molecule& molecule, double const* const* matrixForce, double dt) const; | |
56 | + void UpdateCoordinates( MolDS_base::Molecule& molecule, double dt) const; | |
56 | 57 | void OutputEnergies(boost::shared_ptr<MolDS_base::ElectronicStructure> electronicStructure, double initialEnergy); |
57 | 58 | double OutputEnergies(boost::shared_ptr<MolDS_base::ElectronicStructure> electronicStructure); |
58 | 59 | }; |
@@ -3935,40 +3935,46 @@ void Mndo::RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix, | ||
3935 | 3935 | double** twiceRotatingMatrix = NULL; |
3936 | 3936 | double** ptrOldMatrix = NULL; |
3937 | 3937 | double** ptrMatrix = NULL; |
3938 | - MallocerFreer::GetInstance()->Malloc<double>(&twiceRotatingMatrix, dxy*dxy, dxy*dxy); | |
3939 | - MallocerFreer::GetInstance()->Malloc<double*>(&ptrOldMatrix, dxy*dxy); | |
3940 | - MallocerFreer::GetInstance()->Malloc<double*>(&ptrMatrix, dxy*dxy); | |
3941 | - for(int mu=0; mu<dxy; mu++){ | |
3942 | - for(int nu=0; nu<dxy; nu++){ | |
3943 | - int i=mu*dxy+nu; | |
3944 | - for(int lambda=0; lambda<dxy; lambda++){ | |
3945 | - for(int sigma=0; sigma<dxy; sigma++){ | |
3946 | - int j=lambda*dxy+sigma; | |
3947 | - twiceRotatingMatrix[i][j] = rotatingMatrix[mu][lambda]*rotatingMatrix[nu][sigma]; | |
3938 | + try{ | |
3939 | + MallocTempMatricesRotateDiatomicTwoElecTwoCore(&twiceRotatingMatrix, | |
3940 | + &ptrOldMatrix, | |
3941 | + &ptrMatrix); | |
3942 | + for(int mu=0; mu<dxy; mu++){ | |
3943 | + for(int nu=0; nu<dxy; nu++){ | |
3944 | + int i=mu*dxy+nu; | |
3945 | + for(int lambda=0; lambda<dxy; lambda++){ | |
3946 | + for(int sigma=0; sigma<dxy; sigma++){ | |
3947 | + int j=lambda*dxy+sigma; | |
3948 | + twiceRotatingMatrix[i][j] = rotatingMatrix[mu][lambda]*rotatingMatrix[nu][sigma]; | |
3949 | + } | |
3948 | 3950 | } |
3951 | + ptrOldMatrix[i] = &oldMatrix[mu][nu][0][0]; | |
3952 | + ptrMatrix [i] = &matrix [mu][nu][0][0]; | |
3949 | 3953 | } |
3950 | - ptrOldMatrix[i] = &oldMatrix[mu][nu][0][0]; | |
3951 | - ptrMatrix [i] = &matrix [mu][nu][0][0]; | |
3952 | 3954 | } |
3955 | + bool isColumnMajorTwiceRotatingMatrix = false; | |
3956 | + bool isColumnMajorPtrOldMatrix = false; | |
3957 | + double alpha = 1.0; | |
3958 | + double beta = 0.0; | |
3959 | + MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, | |
3960 | + isColumnMajorPtrOldMatrix, | |
3961 | + !isColumnMajorTwiceRotatingMatrix, | |
3962 | + dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy, | |
3963 | + alpha, | |
3964 | + twiceRotatingMatrix, | |
3965 | + ptrOldMatrix, | |
3966 | + twiceRotatingMatrix, | |
3967 | + beta, | |
3968 | + ptrMatrix); | |
3953 | 3969 | } |
3954 | - bool isColumnMajorTwiceRotatingMatrix = false; | |
3955 | - bool isColumnMajorPtrOldMatrix = false; | |
3956 | - double alpha = 1.0; | |
3957 | - double beta = 0.0; | |
3958 | - MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, | |
3959 | - isColumnMajorPtrOldMatrix, | |
3960 | - !isColumnMajorTwiceRotatingMatrix, | |
3961 | - dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy, | |
3962 | - alpha, | |
3963 | - twiceRotatingMatrix, | |
3964 | - ptrOldMatrix, | |
3965 | - twiceRotatingMatrix, | |
3966 | - beta, | |
3967 | - ptrMatrix); | |
3968 | - MallocerFreer::GetInstance()->Free<double>(&twiceRotatingMatrix, dxy*dxy, dxy*dxy); | |
3969 | - MallocerFreer::GetInstance()->Free<double*>(&ptrOldMatrix, dxy*dxy); | |
3970 | - MallocerFreer::GetInstance()->Free<double*>(&ptrMatrix, dxy*dxy); | |
3971 | - | |
3970 | + catch(MolDSException ex){ | |
3971 | + FreeTempMatricesRotateDiatomicTwoElecTwoCore(&twiceRotatingMatrix, | |
3972 | + &ptrOldMatrix, | |
3973 | + &ptrMatrix); | |
3974 | + } | |
3975 | + FreeTempMatricesRotateDiatomicTwoElecTwoCore(&twiceRotatingMatrix, | |
3976 | + &ptrOldMatrix, | |
3977 | + &ptrMatrix); | |
3972 | 3978 | /* |
3973 | 3979 | // rotate (slow algorithm) |
3974 | 3980 | for(int mu=0; mu<dxy; mu++){ |
@@ -3996,86 +4002,153 @@ void Mndo::RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix, | ||
3996 | 4002 | */ |
3997 | 4003 | } |
3998 | 4004 | |
4005 | +void Mndo::MallocTempMatricesRotateDiatomicTwoElecTwoCore(double*** twiceRotatingMatrix, | |
4006 | + double*** ptrOldMatrix, | |
4007 | + double*** ptrMatrix) const{ | |
4008 | + MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy); | |
4009 | + MallocerFreer::GetInstance()->Malloc<double*>(ptrOldMatrix, dxy*dxy); | |
4010 | + MallocerFreer::GetInstance()->Malloc<double*>(ptrMatrix, dxy*dxy); | |
4011 | +} | |
4012 | + | |
4013 | +void Mndo::FreeTempMatricesRotateDiatomicTwoElecTwoCore(double*** twiceRotatingMatrix, | |
4014 | + double*** ptrOldMatrix, | |
4015 | + double*** ptrMatrix) const{ | |
4016 | + MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy); | |
4017 | + MallocerFreer::GetInstance()->Free<double*>(ptrOldMatrix, dxy*dxy); | |
4018 | + MallocerFreer::GetInstance()->Free<double*>(ptrMatrix, dxy*dxy); | |
4019 | +} | |
4020 | + | |
3999 | 4021 | // Rotate 5-dimensional matrix from diatomic frame to space frame |
4000 | 4022 | // Note tha in this method d-orbitals can not be treatable. |
4001 | 4023 | void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( |
4002 | 4024 | double***** matrix, |
4003 | - double const* const* const* const* diatomicTwoElecTwoCore, | |
4025 | + double const* const*const* const* diatomicTwoElecTwoCore, | |
4004 | 4026 | double const* const* rotatingMatrix, |
4005 | 4027 | double const* const* const* rotMat1stDerivatives) const{ |
4006 | - double oldMatrix[dxy][dxy][dxy][dxy][CartesianType_end]; | |
4007 | - for(int mu=0; mu<dxy; mu++){ | |
4008 | - for(int nu=0; nu<dxy; nu++){ | |
4009 | - for(int lambda=0; lambda<dxy; lambda++){ | |
4010 | - for(int sigma=0; sigma<dxy; sigma++){ | |
4011 | - for(int c=0; c<CartesianType_end; c++){ | |
4012 | - oldMatrix[mu][nu][lambda][sigma][c] = matrix[mu][nu][lambda][sigma][c]; | |
4028 | + | |
4029 | + // rotate (fast algorithm, see also slow algorithm shown later) | |
4030 | + int incrementOne = 1; | |
4031 | + bool isColumnMajorTwiceRotatingMatrix = false; | |
4032 | + bool isColumnMajorOldMatrix = false; | |
4033 | + double alpha; | |
4034 | + double beta; | |
4035 | + double** twiceRotatingMatrix = NULL; | |
4036 | + double** twiceRotatingMatrixDerivA = NULL; | |
4037 | + double** twiceRotatingMatrixDerivB = NULL; | |
4038 | + double** oldMatrix = NULL; | |
4039 | + double** tmpMatrix = NULL; | |
4040 | + double** ptrDiatomic = NULL; | |
4041 | + try{ | |
4042 | + this->MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(&twiceRotatingMatrix, | |
4043 | + &twiceRotatingMatrixDerivA, | |
4044 | + &twiceRotatingMatrixDerivB, | |
4045 | + &oldMatrix, | |
4046 | + &tmpMatrix, | |
4047 | + &ptrDiatomic); | |
4048 | + for(int mu=0; mu<dxy; mu++){ | |
4049 | + for(int nu=0; nu<dxy; nu++){ | |
4050 | + int i=mu*dxy+nu; | |
4051 | + for(int lambda=0; lambda<dxy; lambda++){ | |
4052 | + for(int sigma=0; sigma<dxy; sigma++){ | |
4053 | + int j=lambda*dxy+sigma; | |
4054 | + twiceRotatingMatrix[i][j] = rotatingMatrix[mu][lambda] | |
4055 | + *rotatingMatrix[nu][sigma ]; | |
4013 | 4056 | } |
4014 | 4057 | } |
4058 | + ptrDiatomic[i] = const_cast<double*>(&diatomicTwoElecTwoCore[mu][nu][0][0]); | |
4015 | 4059 | } |
4016 | 4060 | } |
4017 | - } | |
4018 | - | |
4019 | - // rotate (fast algorithm, see also slow algorithm shown later) | |
4020 | - for(int mu=0; mu<dxy; mu++){ | |
4021 | - for(int nu=mu; nu<dxy; nu++){ | |
4022 | - for(int lambda=0; lambda<dxy; lambda++){ | |
4023 | - for(int sigma=lambda; sigma<dxy; sigma++){ | |
4024 | - for(int c=0; c<CartesianType_end; c++){ | |
4025 | - | |
4026 | - double value=0.0; | |
4027 | - for(int i=0; i<dxy; i++){ | |
4028 | - double tempI_1 = 0.0; | |
4029 | - double tempI_2 = 0.0; | |
4030 | - double tempI_3 = 0.0; | |
4031 | - double tempI_4 = 0.0; | |
4032 | - double tempI_5 = 0.0; | |
4033 | - for(int j=0; j<dxy; j++){ | |
4034 | - double tempIJ_1 = 0.0; | |
4035 | - double tempIJ_2 = 0.0; | |
4036 | - double tempIJ_3 = 0.0; | |
4037 | - double tempIJ_4 = 0.0; | |
4038 | - double tempIJ_5 = 0.0; | |
4039 | - for(int k=0; k<dxy; k++){ | |
4040 | - double tempIJK_1 = 0.0; | |
4041 | - double tempIJK_2 = 0.0; | |
4042 | - double tempIJK_3 = 0.0; | |
4043 | - double tempIJK_4 = 0.0; | |
4044 | - double tempIJK_5 = 0.0; | |
4045 | - for(int l=0; l<dxy; l++){ | |
4046 | - tempIJK_1 += oldMatrix[i][j][k][l][c]*rotatingMatrix[sigma][l]; | |
4047 | - tempIJK_2 += diatomicTwoElecTwoCore[i][j][k][l]*rotatingMatrix[sigma][l]; | |
4048 | - tempIJK_3 += diatomicTwoElecTwoCore[i][j][k][l]*rotatingMatrix[sigma][l]; | |
4049 | - tempIJK_4 += diatomicTwoElecTwoCore[i][j][k][l]*rotatingMatrix[sigma][l]; | |
4050 | - tempIJK_5 += diatomicTwoElecTwoCore[i][j][k][l]*rotMat1stDerivatives[sigma][l][c]; | |
4051 | - } | |
4052 | - tempIJ_1 += tempIJK_1*rotatingMatrix[lambda][k]; | |
4053 | - tempIJ_2 += tempIJK_2*rotatingMatrix[lambda][k]; | |
4054 | - tempIJ_3 += tempIJK_3*rotatingMatrix[lambda][k]; | |
4055 | - tempIJ_4 += tempIJK_4*rotMat1stDerivatives[lambda][k][c]; | |
4056 | - tempIJ_5 += tempIJK_5*rotatingMatrix[lambda][k]; | |
4057 | - } | |
4058 | - tempI_1 += tempIJ_1*rotatingMatrix[nu][j]; | |
4059 | - tempI_2 += tempIJ_2*rotatingMatrix[nu][j]; | |
4060 | - tempI_3 += tempIJ_3*rotMat1stDerivatives[nu][j][c]; | |
4061 | - tempI_4 += tempIJ_4*rotatingMatrix[nu][j]; | |
4062 | - tempI_5 += tempIJ_5*rotatingMatrix[nu][j]; | |
4063 | - } | |
4064 | - value += tempI_1*rotatingMatrix[mu][i]; | |
4065 | - value += tempI_2*rotMat1stDerivatives[mu][i][c]; | |
4066 | - value += tempI_3*rotatingMatrix[mu][i]; | |
4067 | - value += tempI_4*rotatingMatrix[mu][i]; | |
4068 | - value += tempI_5*rotatingMatrix[mu][i]; | |
4061 | + for(int axis=0; axis<CartesianType_end; axis++){ | |
4062 | + for(int mu=0; mu<dxy; mu++){ | |
4063 | + for(int nu=0; nu<dxy; nu++){ | |
4064 | + int i=mu*dxy+nu; | |
4065 | + for(int lambda=0; lambda<dxy; lambda++){ | |
4066 | + for(int sigma=0; sigma<dxy; sigma++){ | |
4067 | + int j=lambda*dxy+sigma; | |
4068 | + twiceRotatingMatrixDerivA[i][j] = rotMat1stDerivatives[mu][lambda][axis] | |
4069 | + *rotatingMatrix [nu][sigma ]; | |
4070 | + twiceRotatingMatrixDerivB[i][j] = rotatingMatrix [mu][lambda] | |
4071 | + *rotMat1stDerivatives[nu][sigma ][axis]; | |
4072 | + oldMatrix[i][j] = matrix[mu][nu][lambda][sigma][axis]; | |
4069 | 4073 | } |
4070 | - matrix[mu][nu][lambda][sigma][c] = value; | |
4071 | - matrix[mu][nu][sigma][lambda][c] = value; | |
4072 | - matrix[nu][mu][lambda][sigma][c] = value; | |
4073 | - matrix[nu][mu][sigma][lambda][c] = value; | |
4074 | 4074 | } |
4075 | 4075 | } |
4076 | 4076 | } |
4077 | + alpha = 1.0; | |
4078 | + beta = 0.0; | |
4079 | + MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, | |
4080 | + isColumnMajorOldMatrix, | |
4081 | + !isColumnMajorTwiceRotatingMatrix, | |
4082 | + dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy, | |
4083 | + alpha, | |
4084 | + twiceRotatingMatrix, | |
4085 | + oldMatrix, | |
4086 | + twiceRotatingMatrix, | |
4087 | + beta, | |
4088 | + tmpMatrix); | |
4089 | + alpha = 1.0; | |
4090 | + beta = 1.0; | |
4091 | + MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, | |
4092 | + isColumnMajorOldMatrix, | |
4093 | + !isColumnMajorTwiceRotatingMatrix, | |
4094 | + dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy, | |
4095 | + alpha, | |
4096 | + twiceRotatingMatrixDerivA, | |
4097 | + ptrDiatomic, | |
4098 | + twiceRotatingMatrix, | |
4099 | + beta, | |
4100 | + tmpMatrix); | |
4101 | + MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, | |
4102 | + isColumnMajorOldMatrix, | |
4103 | + !isColumnMajorTwiceRotatingMatrix, | |
4104 | + dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy, | |
4105 | + alpha, | |
4106 | + twiceRotatingMatrixDerivB, | |
4107 | + ptrDiatomic, | |
4108 | + twiceRotatingMatrix, | |
4109 | + beta, | |
4110 | + tmpMatrix); | |
4111 | + MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, | |
4112 | + isColumnMajorOldMatrix, | |
4113 | + !isColumnMajorTwiceRotatingMatrix, | |
4114 | + dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy, | |
4115 | + alpha, | |
4116 | + twiceRotatingMatrix, | |
4117 | + ptrDiatomic, | |
4118 | + twiceRotatingMatrixDerivA, | |
4119 | + beta, | |
4120 | + tmpMatrix); | |
4121 | + MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorTwiceRotatingMatrix, | |
4122 | + isColumnMajorOldMatrix, | |
4123 | + !isColumnMajorTwiceRotatingMatrix, | |
4124 | + dxy*dxy, dxy*dxy, dxy*dxy, dxy*dxy, | |
4125 | + alpha, | |
4126 | + twiceRotatingMatrix, | |
4127 | + ptrDiatomic, | |
4128 | + twiceRotatingMatrixDerivB, | |
4129 | + beta, | |
4130 | + tmpMatrix); | |
4131 | + | |
4132 | + MolDS_wrappers::Blas::GetInstance()->Dcopy(dxy*dxy*dxy*dxy, | |
4133 | + &tmpMatrix[0][0] , incrementOne, | |
4134 | + &matrix[0][0][0][0][axis], CartesianType_end); | |
4077 | 4135 | } |
4078 | 4136 | } |
4137 | + catch(MolDSException ex){ | |
4138 | + this->FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(&twiceRotatingMatrix, | |
4139 | + &twiceRotatingMatrixDerivA, | |
4140 | + &twiceRotatingMatrixDerivB, | |
4141 | + &oldMatrix, | |
4142 | + &tmpMatrix, | |
4143 | + &ptrDiatomic); | |
4144 | + throw ex; | |
4145 | + } | |
4146 | + this->FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(&twiceRotatingMatrix, | |
4147 | + &twiceRotatingMatrixDerivA, | |
4148 | + &twiceRotatingMatrixDerivB, | |
4149 | + &oldMatrix, | |
4150 | + &tmpMatrix, | |
4151 | + &ptrDiatomic); | |
4079 | 4152 | |
4080 | 4153 | /* |
4081 | 4154 | // rotate (slow algorithm) |
@@ -4131,6 +4204,34 @@ void Mndo::RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | ||
4131 | 4204 | */ |
4132 | 4205 | } |
4133 | 4206 | |
4207 | +void Mndo::MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix, | |
4208 | + double*** twiceRotatingMatrixDerivA, | |
4209 | + double*** twiceRotatingMatrixDerivB, | |
4210 | + double*** oldMatrix, | |
4211 | + double*** tmpMatrix, | |
4212 | + double*** ptrDiatomic) const{ | |
4213 | + MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy); | |
4214 | + MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrixDerivA, dxy*dxy, dxy*dxy); | |
4215 | + MallocerFreer::GetInstance()->Malloc<double>(twiceRotatingMatrixDerivB, dxy*dxy, dxy*dxy); | |
4216 | + MallocerFreer::GetInstance()->Malloc<double>(oldMatrix, dxy*dxy, dxy*dxy); | |
4217 | + MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, dxy*dxy, dxy*dxy); | |
4218 | + MallocerFreer::GetInstance()->Malloc<double*>(ptrDiatomic, dxy*dxy); | |
4219 | +} | |
4220 | + | |
4221 | +void Mndo::FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix, | |
4222 | + double*** twiceRotatingMatrixDerivA, | |
4223 | + double*** twiceRotatingMatrixDerivB, | |
4224 | + double*** oldMatrix, | |
4225 | + double*** tmpMatrix, | |
4226 | + double*** ptrDiatomic) const{ | |
4227 | + MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrix, dxy*dxy, dxy*dxy); | |
4228 | + MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrixDerivA, dxy*dxy, dxy*dxy); | |
4229 | + MallocerFreer::GetInstance()->Free<double>(twiceRotatingMatrixDerivB, dxy*dxy, dxy*dxy); | |
4230 | + MallocerFreer::GetInstance()->Free<double>(oldMatrix, dxy*dxy, dxy*dxy); | |
4231 | + MallocerFreer::GetInstance()->Free<double>(tmpMatrix, dxy*dxy, dxy*dxy); | |
4232 | + MallocerFreer::GetInstance()->Free<double*>(ptrDiatomic, dxy*dxy); | |
4233 | +} | |
4234 | + | |
4134 | 4235 | // Rotate 6-dimensional matrix from diatomic frame to space frame |
4135 | 4236 | // Note tha in this method d-orbitals can not be treatable. |
4136 | 4237 | void Mndo::RotateDiatomicTwoElecTwoCore2ndDerivativesToSpaceFrame( |
@@ -360,19 +360,35 @@ private: | ||
360 | 360 | double***** diatomicTwoElecTwoCore, |
361 | 361 | double****** diatomicTwoElecTwoCore1stDerivatives) const; |
362 | 362 | void RotateDiatomicTwoElecTwoCoreToSpaceFrame(double**** matrix, |
363 | - double const* const* rotatingMatrix) const; | |
364 | - void RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame( | |
365 | - double***** matrix, | |
366 | - double const* const* const* const* diatomicTwoElecTwoCore, | |
367 | - double const* const* rotatingMatrix, | |
368 | - double const* const* const* rotMat1stDerivatives) const; | |
369 | - void RotateDiatomicTwoElecTwoCore2ndDerivativesToSpaceFrame( | |
370 | - double****** matrix, | |
371 | - double const* const* const* const* diatomicTwoElecTwoCore, | |
372 | - double const* const* const* const* const* diatomicTwoElecTwoCore1stDerivatives, | |
373 | - double const* const* rotatingMatrix, | |
374 | - double const* const* const* rotMat1stDerivatives, | |
375 | - double const* const* const* const* rotMat2ndDerivatives) const; | |
363 | + double const* const* rotatingMatrix) const; | |
364 | + void RotateDiatomicTwoElecTwoCore1stDerivativesToSpaceFrame(double***** matrix, | |
365 | + double const* const* const* const* diatomicTwoElecTwoCore, | |
366 | + double const* const* rotatingMatrix, | |
367 | + double const* const* const* rotMat1stDerivatives) const; | |
368 | + void RotateDiatomicTwoElecTwoCore2ndDerivativesToSpaceFrame(double****** matrix, | |
369 | + double const* const* const* const* diatomicTwoElecTwoCore, | |
370 | + double const* const* const* const* const* diatomicTwoElecTwoCore1stDerivatives, | |
371 | + double const* const* rotatingMatrix, | |
372 | + double const* const* const* rotMat1stDerivatives, | |
373 | + double const* const* const* const* rotMat2ndDerivatives) const; | |
374 | + void MallocTempMatricesRotateDiatomicTwoElecTwoCore(double*** twiceRotatingMatrix, | |
375 | + double*** ptrOldMatrix, | |
376 | + double*** ptrMatrix) const; | |
377 | + void FreeTempMatricesRotateDiatomicTwoElecTwoCore(double*** twiceRotatingMatrix, | |
378 | + double*** ptrOldMatrix, | |
379 | + double*** ptrMatrix) const; | |
380 | + void MallocTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix, | |
381 | + double*** twiceRotatingMatrixDerivA, | |
382 | + double*** twiceRotatingMatrixDerivB, | |
383 | + double*** oldMatrix, | |
384 | + double*** tmpMatrix, | |
385 | + double*** ptrDiatomic) const; | |
386 | + void FreeTempMatricesRotateDiatomicTwoElecTwoCore1stDerivs(double*** twiceRotatingMatrix, | |
387 | + double*** twiceRotatingMatrixDerivA, | |
388 | + double*** twiceRotatingMatrixDerivB, | |
389 | + double*** oldMatrix, | |
390 | + double*** tmpMatrix, | |
391 | + double*** ptrDiatomic) const; | |
376 | 392 | double GetNddoRepulsionIntegral(const MolDS_base_atoms::Atom& atomA, |
377 | 393 | MolDS_base::OrbitalType mu, |
378 | 394 | MolDS_base::OrbitalType nu, |