• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revision59d1e5e4eee8487da6fdbe5f274800ca75711421 (tree)
Time2013-08-08 14:29:13
AuthorMikiya Fujii <mikiya.fujii@gmai...>
CommiterMikiya Fujii

Log Message

Constructionf fock matrix is MPI-parallelized. #31851

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1461 1136aad2-a195-0410-b898-f5ea1d11b9d8

Change Summary

Incremental Difference

--- a/src/cndo/Cndo2.cpp
+++ b/src/cndo/Cndo2.cpp
@@ -1374,22 +1374,27 @@ void Cndo2::CalcFockMatrix(double** fockMatrix,
13741374 double const* atomicElectronPopulation,
13751375 double const* const* const* const* const* const* twoElecTwoCore,
13761376 bool isGuess) const{
1377+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1378+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1379+ int totalNumberAOs = molecule.GetTotalNumberAOs();
1380+ int totalNumberAtoms = molecule.GetNumberAtoms();
13771381 MallocerFreer::GetInstance()->Initialize<double>(fockMatrix,
1378- molecule.GetTotalNumberAOs(),
1379- molecule.GetTotalNumberAOs());
1380- int totalNumberAtoms=molecule.GetNumberAtoms();
1381- stringstream ompErrors;
1382-#pragma omp parallel for schedule(auto)
1382+ totalNumberAOs,
1383+ totalNumberAOs);
13831384 for(int A=0; A<totalNumberAtoms; A++){
1384- try{
1385- const Atom& atomA = *molecule.GetAtom(A);
1386- int firstAOIndexA = atomA.GetFirstAOIndex();
1387- int lastAOIndexA = atomA.GetLastAOIndex();
1385+ const Atom& atomA = *molecule.GetAtom(A);
1386+ int firstAOIndexA = atomA.GetFirstAOIndex();
1387+ int lastAOIndexA = atomA.GetLastAOIndex();
1388+ for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){
1389+ if(mu%mpiSize != mpiRank){continue;}
1390+
1391+ stringstream ompErrors;
1392+#pragma omp parallel for schedule(auto)
13881393 for(int B=A; B<totalNumberAtoms; B++){
1389- const Atom& atomB = *molecule.GetAtom(B);
1390- int firstAOIndexB = atomB.GetFirstAOIndex();
1391- int lastAOIndexB = atomB.GetLastAOIndex();
1392- for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){
1394+ try{
1395+ const Atom& atomB = *molecule.GetAtom(B);
1396+ int firstAOIndexB = atomB.GetFirstAOIndex();
1397+ int lastAOIndexB = atomB.GetLastAOIndex();
13931398 for(int nu=firstAOIndexB; nu<=lastAOIndexB; nu++){
13941399 if(mu == nu){
13951400 // diagonal part
@@ -1421,20 +1426,44 @@ void Cndo2::CalcFockMatrix(double** fockMatrix,
14211426 else{
14221427 // lower left part (not calculated)
14231428 }
1424-
1425- }
1429+ } // end of loop nu
1430+ } // end of try
1431+ catch(MolDSException ex){
1432+#pragma omp critical
1433+ ex.Serialize(ompErrors);
14261434 }
1435+ } // end of loop B parallelized with openMP
1436+ // Exception throwing for omp-region
1437+ if(!ompErrors.str().empty()){
1438+ throw MolDSException::Deserialize(ompErrors);
14271439 }
1428- }
1429- catch(MolDSException ex){
1430-#pragma omp critical
1431- ex.Serialize(ompErrors);
1440+ } // end of loop mu parallelized with MPI
1441+ } // end of loop A
1442+
1443+ // communication to collect all matrix data on head-rank
1444+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1445+ if(mpiRank == mpiHeadRank){
1446+ // receive the matrix data from other ranks
1447+ for(int mu=0; mu<totalNumberAOs; mu++){
1448+ if(mu%mpiSize == mpiHeadRank){continue;}
1449+ int source = mu%mpiSize;
1450+ int tag = mu;
1451+ MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, fockMatrix[mu], totalNumberAOs);
14321452 }
14331453 }
1434- // Exception throwing for omp-region
1435- if(!ompErrors.str().empty()){
1436- throw MolDSException::Deserialize(ompErrors);
1454+ else{
1455+ // send the matrix data to head-rank
1456+ for(int mu=0; mu<totalNumberAOs; mu++){
1457+ if(mu%mpiSize != mpiRank){continue;}
1458+ int dest = mpiHeadRank;
1459+ int tag = mu;
1460+ MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, fockMatrix[mu], totalNumberAOs);
1461+ }
14371462 }
1463+ // broadcast all matrix data to all rank
1464+ int root=mpiHeadRank;
1465+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, root);
1466+
14381467 /*
14391468 this->OutputLog("fock matrix\n");
14401469 for(int o=0; o<this->molecule.GetTotalNumberAOs(); o++){
@@ -1561,16 +1590,21 @@ void Cndo2::CalcAtomicElectronPopulation(double* atomicElectronPopulation,
15611590
15621591 // calculate gammaAB matrix. (B.56) and (B.62) in J. A. Pople book.
15631592 void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{
1593+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1594+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
15641595 int totalAtomNumber = molecule.GetNumberAtoms();
1565- stringstream ompErrors;
1566-#pragma omp parallel for schedule(auto)
1596+
1597+ // This loop (A) is parallelized by MPI
15671598 for(int A=0; A<totalAtomNumber; A++){
1568- try{
1569- const Atom& atomA = *molecule.GetAtom(A);
1570- int na = atomA.GetValenceShellType() + 1;
1571- double orbitalExponentA = atomA.GetOrbitalExponent(
1572- atomA.GetValenceShellType(), s, this->theory);
1573- for(int B=A; B<totalAtomNumber; B++){
1599+ if(A%mpiSize != mpiRank){continue;}
1600+ const Atom& atomA = *molecule.GetAtom(A);
1601+ int na = atomA.GetValenceShellType() + 1;
1602+ double orbitalExponentA = atomA.GetOrbitalExponent(
1603+ atomA.GetValenceShellType(), s, this->theory);
1604+ stringstream ompErrors;
1605+#pragma omp parallel for schedule(auto)
1606+ for(int B=A; B<totalAtomNumber; B++){
1607+ try{
15741608 const Atom& atomB = *molecule.GetAtom(B);
15751609 int nb = atomB.GetValenceShellType() + 1;
15761610 double orbitalExponentB = atomB.GetOrbitalExponent(
@@ -1619,16 +1653,41 @@ void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{
16191653 }
16201654 gammaAB[A][B] = value;
16211655 }
1656+ catch(MolDSException ex){
1657+ #pragma omp critical
1658+ ex.Serialize(ompErrors);
1659+ }
1660+ } // end of loop B parallelized by openMP
1661+ // Exception throwing for omp-region
1662+ if(!ompErrors.str().empty()){
1663+ throw MolDSException::Deserialize(ompErrors);
16221664 }
1623- catch(MolDSException ex){
1624-#pragma omp critical
1625- ex.Serialize(ompErrors);
1665+ } // end of loop A prallelized by MPI
1666+
1667+ // communication to collect all matrix data on head-rank
1668+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1669+ if(mpiRank == mpiHeadRank){
1670+ // receive the matrix data from other ranks
1671+ for(int A=0; A<totalAtomNumber; A++){
1672+ if(A%mpiSize == mpiHeadRank){continue;}
1673+ int source = A%mpiSize;
1674+ int tag = A;
1675+ MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, &gammaAB[A][A], totalAtomNumber-A);
16261676 }
16271677 }
1628- // Exception throwing for omp-region
1629- if(!ompErrors.str().empty()){
1630- throw MolDSException::Deserialize(ompErrors);
1678+ else{
1679+ // send the matrix data to head-rank
1680+ for(int A=0; A<totalAtomNumber; A++){
1681+ if(A%mpiSize != mpiRank){continue;}
1682+ int dest = mpiHeadRank;
1683+ int tag = A;
1684+ MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, &gammaAB[A][A], totalAtomNumber-A);
1685+ }
16311686 }
1687+ // broadcast all matrix data to all rank
1688+ int root=mpiHeadRank;
1689+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, root);
1690+
16321691
16331692 #pragma omp parallel for schedule(auto)
16341693 for(int A=0; A<totalAtomNumber; A++){
@@ -1727,21 +1786,25 @@ void Cndo2::CalcElectronicTransitionDipoleMoment(double* transitionDipoleMoment,
17271786 void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
17281787 const Molecule& molecule,
17291788 STOnGType stonG) const{
1730- int totalAONumber = molecule.GetTotalNumberAOs();
1789+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1790+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1791+ int totalAONumber = molecule.GetTotalNumberAOs();
17311792 int totalAtomNumber = molecule.GetNumberAtoms();
17321793
1733- stringstream ompErrors;
1734-#pragma omp parallel for schedule(auto)
1794+ // This loop (A and mu) is parallelized by MPI
17351795 for(int A=0; A<totalAtomNumber; A++){
1736- try{
1737- const Atom& atomA = *molecule.GetAtom(A);
1738- int firstAOIndexAtomA = atomA.GetFirstAOIndex();
1796+ const Atom& atomA = *molecule.GetAtom(A);
1797+ int firstAOIndexAtomA = atomA.GetFirstAOIndex();
1798+ for(int a=0; a<atomA.GetValenceSize(); a++){
1799+ int mu = firstAOIndexAtomA + a;
1800+ if(mu%mpiSize != mpiRank){continue;}
1801+ stringstream ompErrors;
1802+ #pragma omp parallel for schedule(auto)
17391803 for(int B=0; B<totalAtomNumber; B++){
1740- const Atom& atomB = *molecule.GetAtom(B);
1741- int firstAOIndexAtomB = atomB.GetFirstAOIndex();
1742- for(int a=0; a<atomA.GetValenceSize(); a++){
1804+ try{
1805+ const Atom& atomB = *molecule.GetAtom(B);
1806+ int firstAOIndexAtomB = atomB.GetFirstAOIndex();
17431807 for(int b=0; b<atomB.GetValenceSize(); b++){
1744- int mu = firstAOIndexAtomA + a;
17451808 int nu = firstAOIndexAtomB + b;
17461809 this->CalcCartesianMatrixElementsByGTOExpansion(cartesianMatrix[XAxis][mu][nu],
17471810 cartesianMatrix[YAxis][mu][nu],
@@ -1749,18 +1812,51 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
17491812 atomA, a, atomB, b, stonG);
17501813 }
17511814 }
1752-
1815+ catch(MolDSException ex){
1816+ #pragma omp critical
1817+ ex.Serialize(ompErrors);
1818+ }
1819+ }// end of loop for int B with openMP
1820+ // Exception throwing for omp-region
1821+ if(!ompErrors.str().empty()){
1822+ throw MolDSException::Deserialize(ompErrors);
17531823 }
1754- }
1755- catch(MolDSException ex){
1756-#pragma omp critical
1757- ex.Serialize(ompErrors);
1824+ }
1825+ } // end of loop for int A with openMP
1826+
1827+ // communication to collect all matrix data on head-rank
1828+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1829+ if(mpiRank == mpiHeadRank){
1830+ // receive the matrix data from other ranks
1831+ for(int mu=0; mu<totalAONumber; mu++){
1832+ if(mu%mpiSize == mpiHeadRank){continue;}
1833+ int source = mu%mpiSize;
1834+ int tagBase = 3*mu;
1835+ int tagX = tagBase + XAxis;
1836+ int tagY = tagBase + YAxis;
1837+ int tagZ = tagBase + ZAxis;
1838+ MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tagX, cartesianMatrix[XAxis][mu], totalAONumber);
1839+ MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tagY, cartesianMatrix[YAxis][mu], totalAONumber);
1840+ MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tagZ, cartesianMatrix[ZAxis][mu], totalAONumber);
17581841 }
17591842 }
1760- // Exception throwing for omp-region
1761- if(!ompErrors.str().empty()){
1762- throw MolDSException::Deserialize(ompErrors);
1763- }
1843+ else{
1844+ // send the matrix data to head-rank
1845+ for(int mu=0; mu<totalAONumber; mu++){
1846+ if(mu%mpiSize != mpiRank){continue;}
1847+ int dest = mpiHeadRank;
1848+ int tagBase = 3*mu;
1849+ int tagX = tagBase + XAxis;
1850+ int tagY = tagBase + YAxis;
1851+ int tagZ = tagBase + ZAxis;
1852+ MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tagX, cartesianMatrix[XAxis][mu], totalAONumber);
1853+ MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tagY, cartesianMatrix[YAxis][mu], totalAONumber);
1854+ MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tagZ, cartesianMatrix[ZAxis][mu], totalAONumber);
1855+ }
1856+ }
1857+ // broadcast all matrix data to all rank
1858+ int root=mpiHeadRank;
1859+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_end*totalAONumber*totalAONumber, root);
17641860 }
17651861
17661862 // Calculate elements of Cartesian matrix between atomic orbitals.
@@ -3721,49 +3817,64 @@ void Cndo2::CalcOverlapESsWithAnotherElectronicStructure(double** overlapESs,
37213817
37223818 // calculate OverlapAOs matrix. E.g. S_{\mu\nu} in (3.74) in J. A. Pople book.
37233819 void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
3820+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3821+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
37243822 int totalAONumber = molecule.GetTotalNumberAOs();
37253823 int totalAtomNumber = molecule.GetNumberAtoms();
3824+ MallocerFreer::GetInstance()->Initialize<double>(overlapAOs,
3825+ totalAONumber,
3826+ totalAONumber);
37263827
3727- stringstream ompErrors;
3828+ // This loop A is parallelized with MPI
3829+ for(int A=0; A<totalAtomNumber; A++){
3830+ const Atom& atomA = *molecule.GetAtom(A);
3831+ if(A%mpiSize != mpiRank){continue;}
3832+
3833+ stringstream ompErrors;
37283834 #pragma omp parallel
3729- {
3730- double** diatomicOverlapAOs = NULL;
3731- double** rotatingMatrix = NULL;
3732- try{
3733- // malloc
3734- MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs,
3735- OrbitalType_end,
3736- OrbitalType_end);
3737- MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix,
3738- OrbitalType_end,
3739- OrbitalType_end);
3740- // calculation overlapAOs matrix
3741- for(int mu=0; mu<totalAONumber; mu++){
3742- overlapAOs[mu][mu] = 1.0;
3743- }
3835+ {
3836+ double** diatomicOverlapAOs = NULL;
3837+ double** rotatingMatrix = NULL;
3838+ try{
3839+ // malloc
3840+ MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs,
3841+ OrbitalType_end,
3842+ OrbitalType_end);
3843+ MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix,
3844+ OrbitalType_end,
3845+ OrbitalType_end);
37443846
37453847 #pragma omp for schedule(auto)
3746- for(int A=0; A<totalAtomNumber; A++){
3747- const Atom& atomA = *molecule.GetAtom(A);
37483848 for(int B=A+1; B<totalAtomNumber; B++){
37493849 const Atom& atomB = *molecule.GetAtom(B);
37503850 this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB);
37513851 this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB);
37523852 this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix);
37533853 this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB);
3754- }
3755- }
3756- }
3757- catch(MolDSException ex){
3854+ } // end of loop B parallelized with openMP
3855+
3856+ } // end of try
3857+ catch(MolDSException ex){
37583858 #pragma omp critical
3759- ex.Serialize(ompErrors);
3859+ ex.Serialize(ompErrors);
3860+ }
3861+ this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix);
3862+ } // end of omp-parallelized region
3863+ // Exception throwing for omp-region
3864+ if(!ompErrors.str().empty()){
3865+ throw MolDSException::Deserialize(ompErrors);
37603866 }
3761- this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix);
3762- }
3763- // Exception throwing for omp-region
3764- if(!ompErrors.str().empty()){
3765- throw MolDSException::Deserialize(ompErrors);
3867+ } // end of loop A parallelized with MPI
3868+
3869+ // communication to reduce thsi->matrixForce on all node (namely, all_reduce)
3870+ int numTransported = totalAONumber*totalAONumber;
3871+ MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&overlapAOs[0][0], numTransported, std::plus<double>());
3872+
3873+ #pragma omp parallel for schedule(auto)
3874+ for(int mu=0; mu<totalAONumber; mu++){
3875+ overlapAOs[mu][mu] = 1.0;
37663876 }
3877+
37673878 /*
37683879 this->OutputLog("overlapAOs matrix\n");
37693880 for(int o=0; o<molecule.GetTotalNumberAOs(); o++){
--- a/src/mndo/Mndo.cpp
+++ b/src/mndo/Mndo.cpp
@@ -3416,29 +3416,37 @@ double Mndo::GetAuxiliaryKNRKRElement(int moI, int moJ, int moK, int moL) const{
34163416
34173417 void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
34183418 const Molecule& molecule) const{
3419+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3420+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3421+ int totalAtomNumber = molecule.GetNumberAtoms();
3422+
34193423 #ifdef MOLDS_DBG
34203424 if(twoElecTwoCore == NULL){
34213425 throw MolDSException(this->errorMessageCalcTwoElecTwoCoreNullMatrix);
34223426 }
34233427 #endif
34243428 MallocerFreer::GetInstance()->Initialize<double>(twoElecTwoCore,
3425- molecule.GetNumberAtoms(),
3426- molecule.GetNumberAtoms(),
3429+ totalAtomNumber,
3430+ totalAtomNumber,
34273431 dxy, dxy, dxy, dxy);
34283432
3429- stringstream ompErrors;
3433+
3434+ // this loop-a is MPI-parallelized
3435+ for(int a=0; a<totalAtomNumber; a++){
3436+ if(a%mpiSize != mpiRank){continue;}
3437+ stringstream ompErrors;
34303438 #pragma omp parallel
3431- {
3432- double**** diatomicTwoElecTwoCore = NULL;
3433- double** tmpRotMat = NULL;
3434- try{
3435- MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3436- MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3437- // note that terms with condition a==b are not needed to calculate.
3439+ {
3440+ double**** diatomicTwoElecTwoCore = NULL;
3441+ double** tmpRotMat = NULL;
3442+ try{
3443+ MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3444+ MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3445+ // note that terms with condition a==b are not needed to calculate.
34383446 #pragma omp for schedule(auto)
3439- for(int a=0; a<molecule.GetNumberAtoms(); a++){
3440- for(int b=a+1; b<molecule.GetNumberAtoms(); b++){
3447+ for(int b=a+1; b<totalAtomNumber; b++){
34413448 this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, a, b);
3449+
34423450 for(int mu=0; mu<dxy; mu++){
34433451 for(int nu=mu; nu<dxy; nu++){
34443452 for(int lambda=0; lambda<dxy; lambda++){
@@ -3456,20 +3464,26 @@ void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
34563464 }
34573465 }
34583466 }
3459- }
3460- }
3461- }
3462- catch(MolDSException ex){
3467+
3468+ } // end of loop b parallelized with MPI
3469+
3470+ } // end of try
3471+ catch(MolDSException ex){
34633472 #pragma omp critical
3464- ex.Serialize(ompErrors);
3473+ ex.Serialize(ompErrors);
3474+ }
3475+ MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3476+ MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3477+ } // end of omp-parallelized region
3478+ // Exception throwing for omp-region
3479+ if(!ompErrors.str().empty()){
3480+ throw MolDSException::Deserialize(ompErrors);
34653481 }
3466- MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3467- MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3468- }
3469- // Exception throwing for omp-region
3470- if(!ompErrors.str().empty()){
3471- throw MolDSException::Deserialize(ompErrors);
3472- }
3482+ } // end of loop a parallelized with MPI
3483+
3484+ // communication to reduce thsi->matrixForce on all node (namely, all_reduce)
3485+ int numTransported = totalAtomNumber*totalAtomNumber*dxy*dxy*dxy*dxy;
3486+ MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&twoElecTwoCore[0][0][0][0][0][0], numTransported, std::plus<double>());
34733487 }
34743488
34753489 // Calculation of two electrons two cores integral (mu, nu | lambda, sigma) in space fixed frame,