Revision | 3a4ea2bbc5b21d2264be15a420010fa15335fe74 (tree) |
---|---|
Time | 2013-10-16 10:34:51 |
Author | Mikiya Fujii <mikiya.fujii@gmai...> |
Commiter | Mikiya Fujii |
Refactoring of using DGEMM and DGEMMM to cache temporary memory. #32299
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1547 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -4034,7 +4034,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4034 | 4034 | double** tmpRotMat1stDeriv, |
4035 | 4035 | double*** tmpRotMat1stDerivs, |
4036 | 4036 | double** tmpRotatedDiatomicOverlap, |
4037 | + double* tmpRotatedDiatomicOverlapVec, | |
4037 | 4038 | double** tmpMatrixBC, |
4039 | + double* tmpVectorBC, | |
4038 | 4040 | const Atom& atomA, |
4039 | 4041 | const Atom& atomB) const{ |
4040 | 4042 | double cartesian[CartesianType_end] = {atomA.GetXyz()[XAxis] - atomB.GetXyz()[XAxis], |
@@ -4069,7 +4071,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4069 | 4071 | tmpRotMat, |
4070 | 4072 | beta, |
4071 | 4073 | tmpRotatedDiatomicOverlap, |
4072 | - tmpMatrixBC); | |
4074 | + tmpRotatedDiatomicOverlapVec, | |
4075 | + tmpMatrixBC, | |
4076 | + tmpVectorBC); | |
4073 | 4077 | alpha = 1.0; |
4074 | 4078 | beta = 1.0; |
4075 | 4079 | MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix, |
@@ -4082,7 +4086,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4082 | 4086 | tmpRotMat, |
4083 | 4087 | beta, |
4084 | 4088 | tmpRotatedDiatomicOverlap, |
4085 | - tmpMatrixBC); | |
4089 | + tmpRotatedDiatomicOverlapVec, | |
4090 | + tmpMatrixBC, | |
4091 | + tmpVectorBC); | |
4086 | 4092 | MolDS_wrappers::Blas::GetInstance()->Dgemmm(isColumnMajorRotatingMatrix, |
4087 | 4093 | isColumnMajorDiaOverlapAOs, |
4088 | 4094 | !isColumnMajorRotatingMatrix, |
@@ -4093,7 +4099,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4093 | 4099 | tmpRotMat1stDeriv, |
4094 | 4100 | beta, |
4095 | 4101 | tmpRotatedDiatomicOverlap, |
4096 | - tmpMatrixBC); | |
4102 | + tmpRotatedDiatomicOverlapVec, | |
4103 | + tmpMatrixBC, | |
4104 | + tmpVectorBC); | |
4097 | 4105 | MolDS_wrappers::Blas::GetInstance()->Dcopy(OrbitalType_end*OrbitalType_end, |
4098 | 4106 | &tmpRotatedDiatomicOverlap[0][0], incrementOne, |
4099 | 4107 | &diatomicOverlapAOs1stDerivs[0][0][c], CartesianType_end); |
@@ -4137,7 +4145,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4137 | 4145 | double** tmpRotMat1stDeriv, |
4138 | 4146 | double*** tmpRotMat1stDerivs, |
4139 | 4147 | double** tmpRotatedDiatomicOverlap, |
4148 | + double* tmpRotatedDiatomicOverlapVec, | |
4140 | 4149 | double** tmpMatrixBC, |
4150 | + double* tmpVectorBC, | |
4141 | 4151 | int indexAtomA, |
4142 | 4152 | int indexAtomB) const{ |
4143 | 4153 | this->CalcDiatomicOverlapAOs1stDerivatives(diatomicOverlapAOs1stDerivs, |
@@ -4147,7 +4157,9 @@ void Cndo2::CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1st | ||
4147 | 4157 | tmpRotMat1stDeriv, |
4148 | 4158 | tmpRotMat1stDerivs, |
4149 | 4159 | tmpRotatedDiatomicOverlap, |
4160 | + tmpRotatedDiatomicOverlapVec, | |
4150 | 4161 | tmpMatrixBC, |
4162 | + tmpVectorBC, | |
4151 | 4163 | *this->molecule->GetAtom(indexAtomA), |
4152 | 4164 | *this->molecule->GetAtom(indexAtomB)); |
4153 | 4165 | } |
@@ -201,7 +201,9 @@ protected: | ||
201 | 201 | double** tmpRotMat1stDeriv, |
202 | 202 | double*** tmpRotMat1stDerivs, |
203 | 203 | double** tmpRotatedDiatomicOverlap, |
204 | + double* tmpRotatedDiatomicOverlapVec, | |
204 | 205 | double** tmpMatrixBC, |
206 | + double* tmpVectorBC, | |
205 | 207 | const MolDS_base_atoms::Atom& atomA, |
206 | 208 | const MolDS_base_atoms::Atom& atomB) const; |
207 | 209 | void CalcDiatomicOverlapAOs1stDerivatives(double*** diatomicOverlapAOs1stDerivs, |
@@ -211,7 +213,9 @@ protected: | ||
211 | 213 | double** tmpRotMat1stDeriv, |
212 | 214 | double*** tmpRotMat1stDerivs, |
213 | 215 | double** tmpRotatedDiatomicOverlap, |
216 | + double* tmpRotatedDiatomicOverlapVec, | |
214 | 217 | double** tmpMatrixBC, |
218 | + double* tmpVectorBC, | |
215 | 219 | int indexAtomA, |
216 | 220 | int indexAtomB) const; |
217 | 221 | void CalcDiatomicOverlapAOs2ndDerivatives(double**** overlapAOs2ndDeri, |
@@ -1006,7 +1006,9 @@ void Mndo::MallocTempMatricesEachThreadCalcHessianSCF(double***** diatomicOve | ||
1006 | 1006 | double**** tmpDiaOverlapAOs1stDerivs, |
1007 | 1007 | double***** tmpDiaOverlapAOs2ndDerivs, |
1008 | 1008 | double*** tmpRotatedDiatomicOverlap, |
1009 | - double*** tmpMatrix) const{ | |
1009 | + double** tmpRotatedDiatomicOverlapVec, | |
1010 | + double*** tmpMatrixBC, | |
1011 | + double** tmpVectorBC) const{ | |
1010 | 1012 | MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs, |
1011 | 1013 | this->molecule->GetNumberAtoms(), |
1012 | 1014 | OrbitalType_end, |
@@ -1079,9 +1081,13 @@ void Mndo::MallocTempMatricesEachThreadCalcHessianSCF(double***** diatomicOve | ||
1079 | 1081 | CartesianType_end); |
1080 | 1082 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap, |
1081 | 1083 | OrbitalType_end, OrbitalType_end); |
1082 | - MallocerFreer::GetInstance()->Malloc<double>(tmpMatrix, | |
1084 | + MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec, | |
1085 | + OrbitalType_end*OrbitalType_end); | |
1086 | + MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC, | |
1083 | 1087 | OrbitalType_end, |
1084 | 1088 | OrbitalType_end); |
1089 | + MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC, | |
1090 | + OrbitalType_end*OrbitalType_end); | |
1085 | 1091 | } |
1086 | 1092 | |
1087 | 1093 | void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverlapAOs1stDerivs, |
@@ -1100,7 +1106,9 @@ void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverl | ||
1100 | 1106 | double**** tmpDiaOverlapAOs1stDerivs, |
1101 | 1107 | double***** tmpDiaOverlapAOs2ndDerivs, |
1102 | 1108 | double*** tmpRotatedDiatomicOverlap, |
1103 | - double*** tmpMatrix) const{ | |
1109 | + double** tmpRotatedDiatomicOverlapVec, | |
1110 | + double*** tmpMatrixBC, | |
1111 | + double** tmpVectorBC) const{ | |
1104 | 1112 | MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs, |
1105 | 1113 | this->molecule->GetNumberAtoms(), |
1106 | 1114 | OrbitalType_end, |
@@ -1174,9 +1182,13 @@ void Mndo::FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverl | ||
1174 | 1182 | MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap, |
1175 | 1183 | OrbitalType_end, |
1176 | 1184 | OrbitalType_end); |
1177 | - MallocerFreer::GetInstance()->Free<double>(tmpMatrix, | |
1185 | + MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec, | |
1186 | + OrbitalType_end*OrbitalType_end); | |
1187 | + MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC, | |
1178 | 1188 | OrbitalType_end, |
1179 | 1189 | OrbitalType_end); |
1190 | + MallocerFreer::GetInstance()->Free<double>(tmpVectorBC, | |
1191 | + OrbitalType_end*OrbitalType_end); | |
1180 | 1192 | } |
1181 | 1193 | |
1182 | 1194 | // mu and nu is included in atomA' AO. |
@@ -1734,7 +1746,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{ | ||
1734 | 1746 | double**** tmpDiaOverlapAOs2ndDerivs = NULL; //sedond derivatives of the diaOverlapAOs. This derivatives are related to the all Cartesian coordinates. |
1735 | 1747 | double** tmpRotMat1stDeriv = NULL; |
1736 | 1748 | double** tmpRotatedDiatomicOverlap = NULL; |
1737 | - double** tmpMatrixBC = NULL; | |
1749 | + double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm | |
1750 | + double** tmpMatrixBC = NULL; // used in dgemmm | |
1751 | + double* tmpVectorBC = NULL; // used in dgemmm | |
1738 | 1752 | |
1739 | 1753 | try{ |
1740 | 1754 | this->MallocTempMatricesEachThreadCalcHessianSCF(&diatomicOverlapAOs1stDerivs, |
@@ -1753,7 +1767,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{ | ||
1753 | 1767 | &tmpDiaOverlapAOs1stDerivs, |
1754 | 1768 | &tmpDiaOverlapAOs2ndDerivs, |
1755 | 1769 | &tmpRotatedDiatomicOverlap, |
1756 | - &tmpMatrixBC); | |
1770 | + &tmpRotatedDiatomicOverlapVec, | |
1771 | + &tmpMatrixBC, | |
1772 | + &tmpVectorBC); | |
1757 | 1773 | #pragma omp for schedule(auto) |
1758 | 1774 | for(int indexAtomA=0; indexAtomA<this->molecule->GetNumberAtoms(); indexAtomA++){ |
1759 | 1775 | const Atom& atomA = *this->molecule->GetAtom(indexAtomA); |
@@ -1771,7 +1787,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{ | ||
1771 | 1787 | tmpRotMat1stDeriv, |
1772 | 1788 | tmpRotMat1stDerivs, |
1773 | 1789 | tmpRotatedDiatomicOverlap, |
1790 | + tmpRotatedDiatomicOverlapVec, | |
1774 | 1791 | tmpMatrixBC, |
1792 | + tmpVectorBC, | |
1775 | 1793 | indexAtomA, |
1776 | 1794 | indexAtomB); |
1777 | 1795 | this->CalcDiatomicOverlapAOs2ndDerivatives(diatomicOverlapAOs2ndDerivs[indexAtomB], |
@@ -1868,7 +1886,9 @@ void Mndo::CalcHessianSCF(double** hessianSCF, bool isMassWeighted) const{ | ||
1868 | 1886 | &tmpDiaOverlapAOs1stDerivs, |
1869 | 1887 | &tmpDiaOverlapAOs2ndDerivs, |
1870 | 1888 | &tmpRotatedDiatomicOverlap, |
1871 | - &tmpMatrixBC); | |
1889 | + &tmpRotatedDiatomicOverlapVec, | |
1890 | + &tmpMatrixBC, | |
1891 | + &tmpVectorBC); | |
1872 | 1892 | }// end of omp-region |
1873 | 1893 | // Exception throwing for omp-region |
1874 | 1894 | if(!ompErrors.str().empty()){ |
@@ -2059,7 +2079,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2059 | 2079 | double** tmpDiaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms. |
2060 | 2080 | double** tmpRotMat1stDeriv = NULL; |
2061 | 2081 | double** tmpRotatedDiatomicOverlap = NULL; |
2082 | + double* tmpRotatedDiatomicOverlapVec = NULL; | |
2062 | 2083 | double** tmpMatrixBC = NULL; |
2084 | + double* tmpVectorBC = NULL; | |
2063 | 2085 | try{ |
2064 | 2086 | this->MallocTempMatricesStaticFirstOrderFock(&diatomicTwoElecTwoCore1stDerivs, |
2065 | 2087 | &diatomicOverlapAOs1stDerivs, |
@@ -2070,7 +2092,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2070 | 2092 | MallocerFreer::GetInstance()->Malloc<double>(&tmpDiaOverlapAOs1stDerivInDiaFrame, OrbitalType_end, OrbitalType_end); |
2071 | 2093 | MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
2072 | 2094 | MallocerFreer::GetInstance()->Malloc<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
2095 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
2073 | 2096 | MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end); |
2097 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
2074 | 2098 | const Atom& atomA = *molecule->GetAtom(indexAtomA); |
2075 | 2099 | int firstAOIndexA = atomA.GetFirstAOIndex(); |
2076 | 2100 | int lastAOIndexA = atomA.GetLastAOIndex(); |
@@ -2096,7 +2120,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2096 | 2120 | tmpRotMat1stDeriv, |
2097 | 2121 | tmpRotMat1stDerivs, |
2098 | 2122 | tmpRotatedDiatomicOverlap, |
2123 | + tmpRotatedDiatomicOverlapVec, | |
2099 | 2124 | tmpMatrixBC, |
2125 | + tmpVectorBC, | |
2100 | 2126 | atomA, |
2101 | 2127 | atomB); |
2102 | 2128 |
@@ -2200,7 +2226,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2200 | 2226 | MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
2201 | 2227 | //MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
2202 | 2228 | MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
2229 | + MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
2203 | 2230 | MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end); |
2231 | + MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
2204 | 2232 | throw ex; |
2205 | 2233 | } |
2206 | 2234 | this->FreeTempMatricesStaticFirstOrderFock(&diatomicTwoElecTwoCore1stDerivs, |
@@ -2214,7 +2242,9 @@ void Mndo::CalcStaticFirstOrderFock(double* staticFirstOrderFock, | ||
2214 | 2242 | MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
2215 | 2243 | //MallocerFreer::GetInstance()->Free<double>(&tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
2216 | 2244 | MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
2245 | + MallocerFreer::GetInstance()->Free<double>(&tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
2217 | 2246 | MallocerFreer::GetInstance()->Free<double>(&tmpMatrixBC, OrbitalType_end, OrbitalType_end); |
2247 | + MallocerFreer::GetInstance()->Free<double>(&tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
2218 | 2248 | |
2219 | 2249 | /* |
2220 | 2250 | printf("staticFirstOrderFock(atomA:%d axis:%s)\n",indexAtomA,CartesianTypeStr(axisA)); |
@@ -2585,8 +2615,10 @@ void Mndo::CalcForce(const vector<int>& elecStates){ | ||
2585 | 2615 | double** tmpDiaOverlapAOsInDiaFrame = NULL; // diatomic overlapAOs in diatomic frame |
2586 | 2616 | double** tmpDiaOverlapAOs1stDerivInDiaFrame = NULL; // first derivative of the diaOverlapAOs. This derivative is related to the distance between two atoms. |
2587 | 2617 | double** tmpRotMat1stDeriv = NULL; |
2588 | - double** tmpRotatedDiatomicOverlap = NULL; | |
2589 | - double** tmpMatrixBC = NULL; | |
2618 | + double** tmpRotatedDiatomicOverlap = NULL; // used in dgemmm | |
2619 | + double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm | |
2620 | + double** tmpMatrixBC = NULL; // used in dgemmm | |
2621 | + double* tmpVectorBC = NULL; // used in dgemmm | |
2590 | 2622 | try{ |
2591 | 2623 | this->MallocTempMatricesCalcForce(&diatomicOverlapAOs1stDerivs, |
2592 | 2624 | &diatomicTwoElecTwoCore1stDerivs, |
@@ -2596,7 +2628,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){ | ||
2596 | 2628 | &tmpRotMat1stDeriv, |
2597 | 2629 | &tmpRotMat1stDerivs, |
2598 | 2630 | &tmpRotatedDiatomicOverlap, |
2631 | + &tmpRotatedDiatomicOverlapVec, | |
2599 | 2632 | &tmpMatrixBC, |
2633 | + &tmpVectorBC, | |
2600 | 2634 | &tmpDiatomicTwoElecTwoCore); |
2601 | 2635 | |
2602 | 2636 | #pragma omp for schedule(auto) |
@@ -2614,7 +2648,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){ | ||
2614 | 2648 | tmpRotMat1stDeriv, |
2615 | 2649 | tmpRotMat1stDerivs, |
2616 | 2650 | tmpRotatedDiatomicOverlap, |
2651 | + tmpRotatedDiatomicOverlapVec, | |
2617 | 2652 | tmpMatrixBC, |
2653 | + tmpVectorBC, | |
2618 | 2654 | atomA, |
2619 | 2655 | atomB); |
2620 | 2656 | // calc. first derivative of two elec two core interaction |
@@ -2737,7 +2773,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){ | ||
2737 | 2773 | &tmpRotMat1stDeriv, |
2738 | 2774 | &tmpRotMat1stDerivs, |
2739 | 2775 | &tmpRotatedDiatomicOverlap, |
2776 | + &tmpRotatedDiatomicOverlapVec, | |
2740 | 2777 | &tmpMatrixBC, |
2778 | + &tmpVectorBC, | |
2741 | 2779 | &tmpDiatomicTwoElecTwoCore); |
2742 | 2780 | } // end of omp-parallelized region |
2743 | 2781 | // Exception throwing for omp-region |
@@ -2759,7 +2797,9 @@ void Mndo::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
2759 | 2797 | double*** tmpRotMat1stDeriv, |
2760 | 2798 | double**** tmpRotMat1stDerivs, |
2761 | 2799 | double*** tmpRotatedDiatomicOverlap, |
2800 | + double** tmpRotatedDiatomicOverlapVec, | |
2762 | 2801 | double*** tmpMatrixBC, |
2802 | + double** tmpVectorBC, | |
2763 | 2803 | double***** tmpDiatomicTwoElecTwoCore) const{ |
2764 | 2804 | MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs, |
2765 | 2805 | OrbitalType_end, |
@@ -2790,9 +2830,13 @@ void Mndo::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
2790 | 2830 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap, |
2791 | 2831 | OrbitalType_end, |
2792 | 2832 | OrbitalType_end); |
2833 | + MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec, | |
2834 | + OrbitalType_end*OrbitalType_end); | |
2793 | 2835 | MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC, |
2794 | 2836 | OrbitalType_end, |
2795 | 2837 | OrbitalType_end); |
2838 | + MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC, | |
2839 | + OrbitalType_end*OrbitalType_end); | |
2796 | 2840 | MallocerFreer::GetInstance()->Malloc<double>(tmpDiatomicTwoElecTwoCore, |
2797 | 2841 | dxy, |
2798 | 2842 | dxy, |
@@ -2808,7 +2852,9 @@ void Mndo::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
2808 | 2852 | double*** tmpRotMat1stDeriv, |
2809 | 2853 | double**** tmpRotMat1stDerivs, |
2810 | 2854 | double*** tmpRotatedDiatomicOverlap, |
2855 | + double** tmpRotatedDiatomicOverlapVec, | |
2811 | 2856 | double*** tmpMatrixBC, |
2857 | + double** tmpVectorBC, | |
2812 | 2858 | double***** tmpDiatomicTwoElecTwoCore) const{ |
2813 | 2859 | MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs, |
2814 | 2860 | OrbitalType_end, |
@@ -2839,9 +2885,13 @@ void Mndo::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
2839 | 2885 | MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap, |
2840 | 2886 | OrbitalType_end, |
2841 | 2887 | OrbitalType_end); |
2888 | + MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec, | |
2889 | + OrbitalType_end*OrbitalType_end); | |
2842 | 2890 | MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC, |
2843 | 2891 | OrbitalType_end, |
2844 | 2892 | OrbitalType_end); |
2893 | + MallocerFreer::GetInstance()->Free<double>(tmpVectorBC, | |
2894 | + OrbitalType_end*OrbitalType_end); | |
2845 | 2895 | MallocerFreer::GetInstance()->Free<double>(tmpDiatomicTwoElecTwoCore, |
2846 | 2896 | dxy, |
2847 | 2897 | dxy, |
@@ -164,7 +164,9 @@ private: | ||
164 | 164 | double**** tmpDiaOverlapAOs1stDerivs, |
165 | 165 | double***** tmpDiaOverlapAOs2ndDerivs, |
166 | 166 | double*** tmpRotatedDiatomicOverlap, |
167 | - double*** tmpMatrix) const; | |
167 | + double** tmpRotatedDiatomicOverlapVec, | |
168 | + double*** tmpMatrixBC, | |
169 | + double** tmpVectorBC) const; | |
168 | 170 | void FreeTempMatricesEachThreadCalcHessianSCF(double***** diatomicOverlapAOs1stDerivs, |
169 | 171 | double****** diatomicOverlapAOs2ndDerivs, |
170 | 172 | double******* diatomicTwoElecTwoCore1stDerivs, |
@@ -181,7 +183,9 @@ private: | ||
181 | 183 | double**** tmpDiaOverlapAOs1stDerivs, |
182 | 184 | double***** tmpDiaOverlapAOs2ndDerivs, |
183 | 185 | double*** tmpRotatedDiatomicOverlap, |
184 | - double*** tmpMatrix) const; | |
186 | + double** tmpRotatedDiatomicOverlapVec, | |
187 | + double*** tmpMatrixBC, | |
188 | + double** tmpVectorBC) const; | |
185 | 189 | double GetAuxiliaryHessianElement1(int mu, |
186 | 190 | int nu, |
187 | 191 | int indexAtomA, |
@@ -395,7 +399,9 @@ private: | ||
395 | 399 | double*** tmpRotMat1stDeriv, |
396 | 400 | double**** tmpRotMat1stDerivs, |
397 | 401 | double*** tmpRotatedDiatomicOverlap, |
402 | + double** tmpRotatedDiatomicOverlapVec, | |
398 | 403 | double*** tmpMatrixBC, |
404 | + double** tmpVectorBC, | |
399 | 405 | double***** tmpDiatomicTwoElecTwoCore) const; |
400 | 406 | void FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, |
401 | 407 | double****** diatomicTwoElecTwoCore1stDerivs, |
@@ -405,7 +411,9 @@ private: | ||
405 | 411 | double*** tmpRotMat1stDeriv, |
406 | 412 | double**** tmpRotMat1stDerivs, |
407 | 413 | double*** tmpRotatedDiatomicOverlap, |
414 | + double** tmpRotatedDiatomicOverlapVec, | |
408 | 415 | double*** tmpMatrixBC, |
416 | + double** tmpVectorBC, | |
409 | 417 | double***** tmpDiatomicTwoElecTwoCore) const; |
410 | 418 | void CalcForceSCFElecCoreAttractionPart(double* force, |
411 | 419 | int indexAtomA, |
@@ -3677,8 +3677,10 @@ void ZindoS::CalcForce(const vector<int>& elecStates){ | ||
3677 | 3677 | double** tmpRotMat = NULL; // rotating Matrix from the diatomic frame to space fixed frame. |
3678 | 3678 | double** tmpRotMat1stDeriv = NULL; |
3679 | 3679 | double*** tmpRotMat1stDerivs = NULL; // first derivatives of the rotMat. |
3680 | - double** tmpRotatedDiatomicOverlap = NULL; | |
3681 | - double** tmpMatrixBC = NULL; | |
3680 | + double** tmpRotatedDiatomicOverlap = NULL; // used in dgemmm | |
3681 | + double* tmpRotatedDiatomicOverlapVec = NULL; // used in dgemmm | |
3682 | + double** tmpMatrixBC = NULL; // used in dgemmm | |
3683 | + double* tmpVectorBC = NULL; // used in dgemmm | |
3682 | 3684 | try{ |
3683 | 3685 | MallocTempMatricesCalcForce(&diatomicOverlapAOs1stDerivs, |
3684 | 3686 | &diatomicTwoElecTwoCore1stDerivs, |
@@ -3688,7 +3690,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){ | ||
3688 | 3690 | &tmpRotMat1stDeriv, |
3689 | 3691 | &tmpRotMat1stDerivs, |
3690 | 3692 | &tmpRotatedDiatomicOverlap, |
3691 | - &tmpMatrixBC); | |
3693 | + &tmpRotatedDiatomicOverlapVec, | |
3694 | + &tmpMatrixBC, | |
3695 | + &tmpVectorBC); | |
3692 | 3696 | #pragma omp for schedule(auto) |
3693 | 3697 | for(int b=0; b<this->molecule->GetNumberAtoms(); b++){ |
3694 | 3698 | if(a == b){continue;} |
@@ -3705,7 +3709,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){ | ||
3705 | 3709 | tmpRotMat1stDeriv, |
3706 | 3710 | tmpRotMat1stDerivs, |
3707 | 3711 | tmpRotatedDiatomicOverlap, |
3712 | + tmpRotatedDiatomicOverlapVec, | |
3708 | 3713 | tmpMatrixBC, |
3714 | + tmpVectorBC, | |
3709 | 3715 | atomA, |
3710 | 3716 | atomB); |
3711 | 3717 |
@@ -3827,7 +3833,9 @@ void ZindoS::CalcForce(const vector<int>& elecStates){ | ||
3827 | 3833 | &tmpRotMat1stDeriv, |
3828 | 3834 | &tmpRotMat1stDerivs, |
3829 | 3835 | &tmpRotatedDiatomicOverlap, |
3830 | - &tmpMatrixBC); | |
3836 | + &tmpRotatedDiatomicOverlapVec, | |
3837 | + &tmpMatrixBC, | |
3838 | + &tmpVectorBC); | |
3831 | 3839 | } //end of omp-parallelized region |
3832 | 3840 | // Exception throwing for omp-region |
3833 | 3841 | if(!ompErrors.str().empty()){ |
@@ -3930,7 +3938,9 @@ void ZindoS::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
3930 | 3938 | double*** tmpRotMat1stDeriv, |
3931 | 3939 | double**** tmpRotMat1stDerivs, |
3932 | 3940 | double*** tmpRotatedDiatomicOverlap, |
3933 | - double*** tmpMatrixBC) const{ | |
3941 | + double** tmpRotatedDiatomicOverlapVec, | |
3942 | + double*** tmpMatrixBC, | |
3943 | + double** tmpVectorBC) const{ | |
3934 | 3944 | MallocerFreer::GetInstance()->Malloc<double>(diatomicOverlapAOs1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3935 | 3945 | MallocerFreer::GetInstance()->Malloc<double>(diatomicTwoElecTwoCore1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3936 | 3946 | MallocerFreer::GetInstance()->Malloc<double>(tmpDiaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end); |
@@ -3939,7 +3949,9 @@ void ZindoS::MallocTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
3939 | 3949 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
3940 | 3950 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3941 | 3951 | MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
3952 | + MallocerFreer::GetInstance()->Malloc<double>(tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
3942 | 3953 | MallocerFreer::GetInstance()->Malloc<double>(tmpMatrixBC, OrbitalType_end, OrbitalType_end); |
3954 | + MallocerFreer::GetInstance()->Malloc<double>(tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
3943 | 3955 | } |
3944 | 3956 | |
3945 | 3957 | void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, |
@@ -3950,7 +3962,9 @@ void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
3950 | 3962 | double*** tmpRotMat1stDeriv, |
3951 | 3963 | double**** tmpRotMat1stDerivs, |
3952 | 3964 | double*** tmpRotatedDiatomicOverlap, |
3953 | - double*** tmpMatrixBC) const{ | |
3965 | + double** tmpRotatedDiatomicOverlapVec, | |
3966 | + double*** tmpMatrixBC, | |
3967 | + double** tmpVectorBC) const{ | |
3954 | 3968 | MallocerFreer::GetInstance()->Free<double>(diatomicOverlapAOs1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3955 | 3969 | MallocerFreer::GetInstance()->Free<double>(diatomicTwoElecTwoCore1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3956 | 3970 | MallocerFreer::GetInstance()->Free<double>(tmpDiaOverlapAOsInDiaFrame, OrbitalType_end, OrbitalType_end); |
@@ -3959,7 +3973,9 @@ void ZindoS::FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, | ||
3959 | 3973 | MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDeriv, OrbitalType_end, OrbitalType_end); |
3960 | 3974 | MallocerFreer::GetInstance()->Free<double>(tmpRotMat1stDerivs, OrbitalType_end, OrbitalType_end, CartesianType_end); |
3961 | 3975 | MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlap, OrbitalType_end, OrbitalType_end); |
3976 | + MallocerFreer::GetInstance()->Free<double>(tmpRotatedDiatomicOverlapVec, OrbitalType_end*OrbitalType_end); | |
3962 | 3977 | MallocerFreer::GetInstance()->Free<double>(tmpMatrixBC, OrbitalType_end, OrbitalType_end); |
3978 | + MallocerFreer::GetInstance()->Free<double>(tmpVectorBC, OrbitalType_end*OrbitalType_end); | |
3963 | 3979 | } |
3964 | 3980 | |
3965 | 3981 | void ZindoS::CalcForceExcitedStaticPart(double* force, |
@@ -279,7 +279,9 @@ private: | ||
279 | 279 | double*** tmpRotMat1stDeriv, |
280 | 280 | double**** tmpRotMat1stDerivs, |
281 | 281 | double*** tmpRotatedDiatomicOverlap, |
282 | - double*** tmpMatrixBC) const; | |
282 | + double** tmpRotatedDiatomicOverlapVec, | |
283 | + double*** tmpMatrixBC, | |
284 | + double** tmpVectorBC) const; | |
283 | 285 | void FreeTempMatricesCalcForce(double**** diatomicOverlapAOs1stDerivs, |
284 | 286 | double**** diatomicTwoElecTwoCore1stDerivs, |
285 | 287 | double*** tmpDiaOverlapAOsInDiaFrame, |
@@ -288,7 +290,9 @@ private: | ||
288 | 290 | double*** tmpRotMat1stDeriv, |
289 | 291 | double**** tmpRotMat1stDerivs, |
290 | 292 | double*** tmpRotatedDiatomicOverlap, |
291 | - double*** tmpMatrixBC) const; | |
293 | + double** tmpRotatedDiatomicOverlapVec, | |
294 | + double*** tmpMatrixBC, | |
295 | + double** tmpVectorBC) const; | |
292 | 296 | void CalcForceExcitedStaticPart(double* force, |
293 | 297 | int elecStateIndex, |
294 | 298 | int indexAtomA, |