• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revision5613cccc631750dc5dfa929dce1de388f9e39eca (tree)
Time2013-08-19 19:05:47
AuthorMikiya Fujii <mikiya.fujii@gmai...>
CommiterMikiya Fujii

Log Message

Asynchronous MPI communication is implemented. #31814

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1474 1136aad2-a195-0410-b898-f5ea1d11b9d8

Change Summary

Incremental Difference

--- a/src/Makefile
+++ b/src/Makefile
@@ -26,7 +26,7 @@ endif
2626 BOOST_TOP_DIR = /usr/local/boost/
2727 BOOST_INC_DIR = $(BOOST_TOP_DIR)include/
2828 BOOST_LIB_DIR = $(BOOST_TOP_DIR)lib/
29-BOOST_LIBS = -lboost_serialization -lboost_mpi
29+BOOST_LIBS = -lboost_serialization -lboost_mpi -lboost_thread
3030 LIBSBASE = -lmkl_intel_thread -lmkl_core -liomp5 -lpthread
3131 ifeq ($(INTEL), 64)
3232 LIBS = -lmkl_intel_ilp64 $(LIBSBASE) $(BOOST_LIBS)
@@ -37,9 +37,9 @@ EXENAME = MolDS.out
3737 DEPFILE = obj/objfile.dep
3838 LDFLAGS =
3939
40-ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp
41-ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/MallocerFreer.h mpi/MpiProcess.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h
42-ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o
40+ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp mpi/AsyncCommunicator.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp
41+ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/containers/ThreadSafeQueue.h base/MallocerFreer.h mpi/MpiProcess.h mpi/AsyncCommunicator.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h
42+ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/AsyncCommunicator.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o
4343
4444 $(EXENAME): $(ALL_OBJ_FILES)
4545 $(CC) -o $@ -Wl,-rpath=$(BOOST_LIB_DIR) -L$(BOOST_LIB_DIR) $(LDFLAGS) $(ALL_OBJ_FILES) $(LIBS)
--- a/src/Makefile_GNU
+++ b/src/Makefile_GNU
@@ -23,7 +23,7 @@ override CFLAGS += -fopenmp
2323 BOOST_TOP_DIR = /usr/local/boost/
2424 BOOST_INC_DIR = $(BOOST_TOP_DIR)include/
2525 BOOST_LIB_DIR = $(BOOST_TOP_DIR)lib/
26-BOOST_LIBS = -lboost_serialization -lboost_mpi
26+BOOST_LIBS = -lboost_serialization -lboost_mpi -lboost_thread
2727 OPENBLAS_TOP_DIR = /usr/local/openblas/
2828 OPENBLAS_INC_DIR = $(OPENBLAS_TOP_DIR)include/
2929 OPENBLAS_LIB_DIR = $(OPENBLAS_TOP_DIR)lib/
@@ -34,9 +34,9 @@ EXENAME = MolDS.out
3434 DEPFILE = obj/objfile.dep
3535 LDFLAGS =
3636
37-ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp
38-ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/MallocerFreer.h mpi/MpiProcess.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h
39-ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o
37+ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp mpi/AsyncCommunicator.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp
38+ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/containers/ThreadSafeQueue.h base/MallocerFreer.h mpi/MpiProcess.h mpi/AsyncCommunicator.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h
39+ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/AsyncCommunicator.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o
4040
4141 $(EXENAME): $(ALL_OBJ_FILES)
4242 $(CC) -o $@ $(LDFLAGS) -Wl,-rpath=$(BOOST_LIB_DIR) -Wl,-rpath=$(OPENBLAS_LIB_DIR) $(LDFLAGS) $(ALL_OBJ_FILES) -L$(BOOST_LIB_DIR) -L$(OPENBLAS_LIB_DIR) $(LIBS)
--- a/src/base/Enums.h
+++ b/src/base/Enums.h
@@ -154,9 +154,17 @@ RENUMSTR_END()
154154
155155 RENUMSTR_BEGIN( ExceptionKey, ExceptionKeyStr )
156156 RENUMSTR( LapackInfo, "LapackInfo" )
157+ RENUMSTR( EmptyQueue, "EmptyQueue" )
157158 RENUMSTR( ExceptionKey_end, "ExceptionKey_end" )
158159 RENUMSTR_END()
159160
161+RENUMSTR_BEGIN( MpiFunctionType, MpiFunctionTypeStr )
162+ RENUMSTR( Send, "Send" )
163+ RENUMSTR( Recv, "Recv" )
164+ RENUMSTR( Broadcast, "Broadcast" )
165+ RENUMSTR( MpiFunctionType_end, "MpiFunctionType_end" )
166+RENUMSTR_END()
167+
160168 }
161169 #endif
162170
--- a/src/base/Molecule.cpp
+++ b/src/base/Molecule.cpp
@@ -24,6 +24,7 @@
2424 #include<string>
2525 #include<vector>
2626 #include<stdexcept>
27+#include<omp.h>
2728 #include<boost/format.hpp>
2829 #include"Enums.h"
2930 #include"Uncopyable.h"
--- /dev/null
+++ b/src/base/containers/ThreadSafeQueue.cpp
@@ -0,0 +1,41 @@
1+//************************************************************************//
2+// Copyright (C) 2011-2013 Mikiya Fujii //
3+// //
4+// This file is part of MolDS. //
5+// //
6+// MolDS is free software: you can redistribute it and/or modify //
7+// it under the terms of the GNU General Public License as published by //
8+// the Free Software Foundation, either version 3 of the License, or //
9+// (at your option) any later version. //
10+// //
11+// MolDS is distributed in the hope that it will be useful, //
12+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
13+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
14+// GNU General Public License for more details. //
15+// //
16+// You should have received a copy of the GNU General Public License //
17+// along with MolDS. If not, see <http://www.gnu.org/licenses/>. //
18+//************************************************************************//
19+#include<stdio.h>
20+#include<sstream>
21+#include<queue>
22+#include<boost/format.hpp>
23+#include"../Enums.h"
24+#include"../Uncopyable.h"
25+#include"../PrintController.h"
26+#include"../MolDSException.h"
27+#include"ThreadSafeQueue.h"
28+using namespace std;
29+//using namespace MolDS_base;
30+
31+namespace MolDS_base_containers{
32+ int ThreadSafeQueue::Size(){
33+ boost::mutex::scoped_lock lk(this->stateGuard);
34+ return this->stdQueue.size();
35+ }
36+
37+ bool ThreadSafeQueue::Empty(){
38+ boost::mutex::scoped_lock lk(this->stateGuard);
39+ return this->stdQueue.empty();
40+ }
41+};
--- /dev/null
+++ b/src/base/containers/ThreadSafeQueue.h
@@ -0,0 +1,75 @@
1+//************************************************************************//
2+// Copyright (C) 2011-2013 Mikiya Fujii //
3+// Copyright (C) 2012-2012 Katushiko Nishimra //
4+// //
5+// This file is part of MolDS. //
6+// //
7+// MolDS is free software: you can redistribute it and/or modify //
8+// it under the terms of the GNU General Public License as published by //
9+// the Free Software Foundation, either version 3 of the License, or //
10+// (at your option) any later version. //
11+// //
12+// MolDS is distributed in the hope that it will be useful, //
13+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
14+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
15+// GNU General Public License for more details. //
16+// //
17+// You should have received a copy of the GNU General Public License //
18+// along with MolDS. If not, see <http://www.gnu.org/licenses/>. //
19+//************************************************************************//
20+#ifndef INCLUDED_THREADSAFEQUEQUE
21+#define INCLUDED_THREADSAFEQUEQUE
22+#include<queue>
23+#include<boost/shared_ptr.hpp>
24+#include<boost/thread.hpp>
25+#include<boost/thread/condition.hpp>
26+namespace MolDS_base_containers{
27+
28+// This Queue class is thread-safe
29+
30+template <typename T>
31+class ThreadSafeQueue
32+{
33+public:
34+ ThreadSafeQueue(){}
35+ ~ThreadSafeQueue(){}
36+
37+ void Push(const T& data){
38+ boost::mutex::scoped_lock lk(this->stateGuard);
39+ this->stdQueue.push(data);
40+ this->stateChange.notify_all();
41+ }
42+
43+ T FrontPop(){
44+ boost::mutex::scoped_lock lk(this->stateGuard);
45+ if(this->stdQueue.empty()){
46+ std::stringstream ss;
47+ ss << "naitive queue has no member\n";
48+ MolDS_base::MolDSException ex(ss.str());
49+ int info = 0;
50+ ex.SetKeyValue<int>(MolDS_base::EmptyQueue, info);
51+ throw ex;
52+ }
53+ T ret = this->stdQueue.front();
54+ this->stdQueue.pop();
55+ this->stateChange.notify_all();
56+ return ret;
57+ }
58+
59+ int Size(){
60+ boost::mutex::scoped_lock lk(this->stateGuard);
61+ return this->stdQueue.size();
62+ }
63+
64+ bool Empty(){
65+ boost::mutex::scoped_lock lk(this->stateGuard);
66+ return this->stdQueue.empty();
67+ }
68+private:
69+ std::queue<T> stdQueue;
70+ boost::mutex stateGuard;
71+ boost::condition_variable stateChange;
72+};
73+
74+}
75+#endif
--- a/src/cndo/Cndo2.cpp
+++ b/src/cndo/Cndo2.cpp
@@ -33,7 +33,9 @@
3333 #include"../base/PrintController.h"
3434 #include"../base/MolDSException.h"
3535 #include"../base/MallocerFreer.h"
36+#include"../base/containers/ThreadSafeQueue.h"
3637 #include"../mpi/MpiProcess.h"
38+#include"../mpi/AsyncCommunicator.h"
3739 #include"../wrappers/Blas.h"
3840 #include"../wrappers/Lapack.h"
3941 #include"../base/MathUtilities.h"
@@ -1374,95 +1376,100 @@ void Cndo2::CalcFockMatrix(double** fockMatrix,
13741376 double const* atomicElectronPopulation,
13751377 double const* const* const* const* const* const* twoElecTwoCore,
13761378 bool isGuess) const{
1377- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1378- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
13791379 int totalNumberAOs = molecule.GetTotalNumberAOs();
13801380 int totalNumberAtoms = molecule.GetNumberAtoms();
1381- MallocerFreer::GetInstance()->Initialize<double>(fockMatrix,
1382- totalNumberAOs,
1383- totalNumberAOs);
1381+
1382+ // MPI setting of each rank
1383+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1384+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1385+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1386+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalNumberAOs);
1387+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
1388+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
1389+ &asyncCommunicator,
1390+ mPassingTimes) );
1391+
1392+ MallocerFreer::GetInstance()->Initialize<double>(fockMatrix, totalNumberAOs, totalNumberAOs);
13841393 for(int A=0; A<totalNumberAtoms; A++){
13851394 const Atom& atomA = *molecule.GetAtom(A);
13861395 int firstAOIndexA = atomA.GetFirstAOIndex();
13871396 int lastAOIndexA = atomA.GetLastAOIndex();
13881397 for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){
1389- if(mu%mpiSize != mpiRank){continue;}
1390-
1391- stringstream ompErrors;
1392-#pragma omp parallel for schedule(auto)
1393- for(int B=A; B<totalNumberAtoms; B++){
1394- try{
1395- const Atom& atomB = *molecule.GetAtom(B);
1396- int firstAOIndexB = atomB.GetFirstAOIndex();
1397- int lastAOIndexB = atomB.GetLastAOIndex();
1398- for(int nu=firstAOIndexB; nu<=lastAOIndexB; nu++){
1399- if(mu == nu){
1400- // diagonal part
1401- fockMatrix[mu][mu] = this->GetFockDiagElement(atomA,
1402- A,
1403- mu,
1404- molecule,
1405- gammaAB,
1406- orbitalElectronPopulation,
1407- atomicElectronPopulation,
1408- twoElecTwoCore,
1409- isGuess);
1410- }
1411- else if(mu < nu){
1412- // upper right part
1413- fockMatrix[mu][nu] = this->GetFockOffDiagElement(atomA,
1414- atomB,
1398+ int calcRank = mu%mpiSize;
1399+ if(mpiRank == calcRank){
1400+ int maxThreads = omp_get_max_threads();
1401+ stringstream ompErrors;
1402+#pragma omp parallel for schedule(auto) num_threads(maxThreads-1)
1403+ for(int B=A; B<totalNumberAtoms; B++){
1404+ try{
1405+ const Atom& atomB = *molecule.GetAtom(B);
1406+ int firstAOIndexB = atomB.GetFirstAOIndex();
1407+ int lastAOIndexB = atomB.GetLastAOIndex();
1408+ for(int nu=firstAOIndexB; nu<=lastAOIndexB; nu++){
1409+ if(mu == nu){
1410+ // diagonal part
1411+ fockMatrix[mu][mu] = this->GetFockDiagElement(atomA,
14151412 A,
1416- B,
14171413 mu,
1418- nu,
14191414 molecule,
14201415 gammaAB,
1421- overlapAOs,
14221416 orbitalElectronPopulation,
1417+ atomicElectronPopulation,
14231418 twoElecTwoCore,
14241419 isGuess);
1425- }
1426- else{
1427- // lower left part (not calculated)
1428- }
1429- } // end of loop nu
1430- } // end of try
1431- catch(MolDSException ex){
1420+ }
1421+ else if(mu < nu){
1422+ // upper right part
1423+ fockMatrix[mu][nu] = this->GetFockOffDiagElement(atomA,
1424+ atomB,
1425+ A,
1426+ B,
1427+ mu,
1428+ nu,
1429+ molecule,
1430+ gammaAB,
1431+ overlapAOs,
1432+ orbitalElectronPopulation,
1433+ twoElecTwoCore,
1434+ isGuess);
1435+ }
1436+ else{
1437+ // lower left part (not calculated)
1438+ }
1439+ } // end of loop nu
1440+ } // end of try
1441+ catch(MolDSException ex){
14321442 #pragma omp critical
1433- ex.Serialize(ompErrors);
1443+ ex.Serialize(ompErrors);
1444+ }
1445+ } // end of loop B parallelized with openMP
1446+ // Exception throwing for omp-region
1447+ if(!ompErrors.str().empty()){
1448+ throw MolDSException::Deserialize(ompErrors);
14341449 }
1435- } // end of loop B parallelized with openMP
1436- // Exception throwing for omp-region
1437- if(!ompErrors.str().empty()){
1438- throw MolDSException::Deserialize(ompErrors);
1450+ } // end of if(mpiRank == calcRank)
1451+
1452+ // set data to gater in mpiHeadRank with asynchronous MPI
1453+ int tag = mu;
1454+ int source = calcRank;
1455+ int dest = mpiHeadRank;
1456+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
1457+ asyncCommunicator.SetRecvedVector(&fockMatrix[mu][mu],
1458+ totalNumberAOs-mu,
1459+ source,
1460+ tag);
1461+ }
1462+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
1463+ asyncCommunicator.SetSentVector(&fockMatrix[mu][mu],
1464+ totalNumberAOs-mu,
1465+ dest,
1466+ tag);
14391467 }
14401468 } // end of loop mu parallelized with MPI
14411469 } // end of loop A
1442-
1443- // communication to collect all matrix data on head-rank
1444- int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1445- if(mpiRank == mpiHeadRank){
1446- // receive the matrix data from other ranks
1447- for(int mu=0; mu<totalNumberAOs; mu++){
1448- if(mu%mpiSize == mpiHeadRank){continue;}
1449- int source = mu%mpiSize;
1450- int tag = mu;
1451- MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, &fockMatrix[mu][mu], totalNumberAOs-mu);
1452- }
1453- }
1454- else{
1455- // send the matrix data to head-rank
1456- for(int mu=0; mu<totalNumberAOs; mu++){
1457- if(mu%mpiSize != mpiRank){continue;}
1458- int dest = mpiHeadRank;
1459- int tag = mu;
1460- MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, &fockMatrix[mu][mu], totalNumberAOs-mu);
1461- }
1462- }
1463- // broadcast all matrix data to all rank
1464- int root=mpiHeadRank;
1465- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, root);
1470+ // Delete the communication thread.
1471+ communicationThread.join();
1472+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, mpiHeadRank);
14661473
14671474 /*
14681475 this->OutputLog("fock matrix\n");
@@ -1590,104 +1597,109 @@ void Cndo2::CalcAtomicElectronPopulation(double* atomicElectronPopulation,
15901597
15911598 // calculate gammaAB matrix. (B.56) and (B.62) in J. A. Pople book.
15921599 void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{
1593- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1594- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
15951600 int totalAtomNumber = molecule.GetNumberAtoms();
15961601
1602+ // MPI setting of each rank
1603+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1604+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1605+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1606+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
1607+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
1608+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
1609+ &asyncCommunicator,
1610+ mPassingTimes) );
1611+
15971612 // This loop (A) is parallelized by MPI
15981613 for(int A=0; A<totalAtomNumber; A++){
1599- if(A%mpiSize != mpiRank){continue;}
1600- const Atom& atomA = *molecule.GetAtom(A);
1601- int na = atomA.GetValenceShellType() + 1;
1602- double orbitalExponentA = atomA.GetOrbitalExponent(
1603- atomA.GetValenceShellType(), s, this->theory);
1604- stringstream ompErrors;
1605-#pragma omp parallel for schedule(auto)
1606- for(int B=A; B<totalAtomNumber; B++){
1607- try{
1608- const Atom& atomB = *molecule.GetAtom(B);
1609- int nb = atomB.GetValenceShellType() + 1;
1610- double orbitalExponentB = atomB.GetOrbitalExponent(
1611- atomB.GetValenceShellType(), s, this->theory);
1612-
1613- double value = 0.0;
1614- double R = molecule.GetDistanceAtoms(A, B);
1615- double temp = 0.0;
1616- if(R>0.0){
1617- // (B.56)
1618- value = pow(0.5*R, 2.0*na);
1619- value *= this->GetReducedOverlapAOs(2*na-1, 0, 2.0*orbitalExponentA*R, 0);
1620-
1621- for(int l=1; l<=2*nb; l++){
1622- temp = 0.0;
1623- temp = l;
1624- temp *= pow(2.0*orbitalExponentB, 2*nb-l);
1625- temp /= Factorial(2*nb-l)*2.0*nb;
1626- temp *= pow(0.5*R, 2.0*nb-l+2.0*na);
1627- temp *= this->GetReducedOverlapAOs(2*na-1,
1628- 2*nb-l,
1629- 2.0*orbitalExponentA*R,
1630- 2.0*orbitalExponentB*R);
1631- value -= temp;
1632- }
1614+ int calcRank = A%mpiSize;
1615+ if(mpiRank == calcRank){
1616+ const Atom& atomA = *molecule.GetAtom(A);
1617+ int na = atomA.GetValenceShellType() + 1;
1618+ double orbitalExponentA = atomA.GetOrbitalExponent(
1619+ atomA.GetValenceShellType(), s, this->theory);
1620+ int maxThreads = omp_get_max_threads();
1621+ stringstream ompErrors;
1622+#pragma omp parallel for schedule(auto) num_threads(maxThreads-1)
1623+ for(int B=A; B<totalAtomNumber; B++){
1624+ try{
1625+ const Atom& atomB = *molecule.GetAtom(B);
1626+ int nb = atomB.GetValenceShellType() + 1;
1627+ double orbitalExponentB = atomB.GetOrbitalExponent(
1628+ atomB.GetValenceShellType(), s, this->theory);
1629+
1630+ double value = 0.0;
1631+ double R = molecule.GetDistanceAtoms(A, B);
1632+ double temp = 0.0;
1633+ if(R>0.0){
1634+ // (B.56)
1635+ value = pow(0.5*R, 2.0*na);
1636+ value *= this->GetReducedOverlapAOs(2*na-1, 0, 2.0*orbitalExponentA*R, 0);
1637+
1638+ for(int l=1; l<=2*nb; l++){
1639+ temp = 0.0;
1640+ temp = l;
1641+ temp *= pow(2.0*orbitalExponentB, 2*nb-l);
1642+ temp /= Factorial(2*nb-l)*2.0*nb;
1643+ temp *= pow(0.5*R, 2.0*nb-l+2.0*na);
1644+ temp *= this->GetReducedOverlapAOs(2*na-1,
1645+ 2*nb-l,
1646+ 2.0*orbitalExponentA*R,
1647+ 2.0*orbitalExponentB*R);
1648+ value -= temp;
1649+ }
16331650
1634- value *= pow(2.0*orbitalExponentA, 2.0*na+1.0);
1635- value /= Factorial(2*na);
1636- }
1637- else{
1638- // (B.62)
1639- value = Factorial(2*na-1);
1640- value /= pow(2.0*orbitalExponentA, 2.0*na);
1641-
1642- for(int l=1; l<=2*nb; l++){
1643- temp = l;
1644- temp *= pow(2.0*orbitalExponentB, 2*nb-l);
1645- temp *= Factorial(2*na+2*nb-l-1);
1646- temp /= Factorial(2*nb-l);
1647- temp /= 2.0*nb;
1648- temp /= pow( 2.0*orbitalExponentA + 2.0*orbitalExponentB, 2.0*(na+nb)-l );
1649- value -= temp;
1651+ value *= pow(2.0*orbitalExponentA, 2.0*na+1.0);
1652+ value /= Factorial(2*na);
1653+ }
1654+ else{
1655+ // (B.62)
1656+ value = Factorial(2*na-1);
1657+ value /= pow(2.0*orbitalExponentA, 2.0*na);
1658+
1659+ for(int l=1; l<=2*nb; l++){
1660+ temp = l;
1661+ temp *= pow(2.0*orbitalExponentB, 2*nb-l);
1662+ temp *= Factorial(2*na+2*nb-l-1);
1663+ temp /= Factorial(2*nb-l);
1664+ temp /= 2.0*nb;
1665+ temp /= pow( 2.0*orbitalExponentA + 2.0*orbitalExponentB, 2.0*(na+nb)-l );
1666+ value -= temp;
1667+ }
1668+ value *= pow(2.0*orbitalExponentA, 2.0*na+1);
1669+ value /= Factorial(2*na);
16501670 }
1651- value *= pow(2.0*orbitalExponentA, 2.0*na+1);
1652- value /= Factorial(2*na);
1671+ gammaAB[A][B] = value;
16531672 }
1654- gammaAB[A][B] = value;
1655- }
1656- catch(MolDSException ex){
1657- #pragma omp critical
1658- ex.Serialize(ompErrors);
1673+ catch(MolDSException ex){
1674+ #pragma omp critical
1675+ ex.Serialize(ompErrors);
1676+ }
1677+ } // end of loop B parallelized by openMP
1678+ // Exception throwing for omp-region
1679+ if(!ompErrors.str().empty()){
1680+ throw MolDSException::Deserialize(ompErrors);
16591681 }
1660- } // end of loop B parallelized by openMP
1661- // Exception throwing for omp-region
1662- if(!ompErrors.str().empty()){
1663- throw MolDSException::Deserialize(ompErrors);
1682+ } // end of if(mpiRank==calcRank)
1683+
1684+ // set data to gater in mpiHeadRank with asynchronous MPI
1685+ int tag = A;
1686+ int source = calcRank;
1687+ int dest = mpiHeadRank;
1688+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
1689+ asyncCommunicator.SetRecvedVector(&gammaAB[A][A],
1690+ totalAtomNumber-A,
1691+ source,
1692+ tag);
1693+ }
1694+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
1695+ asyncCommunicator.SetSentVector(&gammaAB[A][A],
1696+ totalAtomNumber-A,
1697+ dest,
1698+ tag);
16641699 }
16651700 } // end of loop A prallelized by MPI
1666-
1667- // communication to collect all matrix data on head-rank
1668- int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1669- if(mpiRank == mpiHeadRank){
1670- // receive the matrix data from other ranks
1671- for(int A=0; A<totalAtomNumber; A++){
1672- if(A%mpiSize == mpiHeadRank){continue;}
1673- int source = A%mpiSize;
1674- int tag = A;
1675- MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, &gammaAB[A][A], totalAtomNumber-A);
1676- }
1677- }
1678- else{
1679- // send the matrix data to head-rank
1680- for(int A=0; A<totalAtomNumber; A++){
1681- if(A%mpiSize != mpiRank){continue;}
1682- int dest = mpiHeadRank;
1683- int tag = A;
1684- MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, &gammaAB[A][A], totalAtomNumber-A);
1685- }
1686- }
1687- // broadcast all matrix data to all rank
1688- int root=mpiHeadRank;
1689- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, root);
1690-
1701+ communicationThread.join();
1702+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, mpiHeadRank);
16911703
16921704 #pragma omp parallel for schedule(auto)
16931705 for(int A=0; A<totalAtomNumber; A++){
@@ -1786,44 +1798,97 @@ void Cndo2::CalcElectronicTransitionDipoleMoment(double* transitionDipoleMoment,
17861798 void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
17871799 const Molecule& molecule,
17881800 STOnGType stonG) const{
1789- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1790- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
17911801 int totalAONumber = molecule.GetTotalNumberAOs();
17921802 int totalAtomNumber = molecule.GetNumberAtoms();
17931803
1804+ // MPI setting of each rank
1805+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1806+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1807+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1808+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
1809+ mPassingTimes *= CartesianType_end;
1810+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
1811+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
1812+ &asyncCommunicator,
1813+ mPassingTimes) );
1814+
17941815 // This loop (A and mu) is parallelized by MPI
17951816 for(int A=0; A<totalAtomNumber; A++){
1796- const Atom& atomA = *molecule.GetAtom(A);
1797- int firstAOIndexAtomA = atomA.GetFirstAOIndex();
1798- for(int a=0; a<atomA.GetValenceSize(); a++){
1799- int mu = firstAOIndexAtomA + a;
1800- if(mu%mpiSize != mpiRank){continue;}
1801- stringstream ompErrors;
1802- #pragma omp parallel for schedule(auto)
1803- for(int B=0; B<totalAtomNumber; B++){
1804- try{
1805- const Atom& atomB = *molecule.GetAtom(B);
1806- int firstAOIndexAtomB = atomB.GetFirstAOIndex();
1807- for(int b=0; b<atomB.GetValenceSize(); b++){
1808- int nu = firstAOIndexAtomB + b;
1809- this->CalcCartesianMatrixElementsByGTOExpansion(cartesianMatrix[XAxis][mu][nu],
1810- cartesianMatrix[YAxis][mu][nu],
1811- cartesianMatrix[ZAxis][mu][nu],
1812- atomA, a, atomB, b, stonG);
1817+ const Atom& atomA = *molecule.GetAtom(A);
1818+ int firstAOIndexA = atomA.GetFirstAOIndex();
1819+ int numValenceAOsA = atomA.GetValenceSize();
1820+ int calcRank = A%mpiSize;
1821+ if(mpiRank == calcRank){
1822+ for(int a=0; a<numValenceAOsA; a++){
1823+ int mu = firstAOIndexA + a;
1824+ int maxThreads = omp_get_max_threads();
1825+ stringstream ompErrors;
1826+#pragma omp parallel for schedule(auto) num_threads(maxThreads-1)
1827+ for(int B=0; B<totalAtomNumber; B++){
1828+ try{
1829+ const Atom& atomB = *molecule.GetAtom(B);
1830+ int firstAOIndexB = atomB.GetFirstAOIndex();
1831+ int numValenceAOsB = atomB.GetValenceSize();
1832+ for(int b=0; b<numValenceAOsB; b++){
1833+ int nu = firstAOIndexB + b;
1834+ this->CalcCartesianMatrixElementsByGTOExpansion(cartesianMatrix[XAxis][mu][nu],
1835+ cartesianMatrix[YAxis][mu][nu],
1836+ cartesianMatrix[ZAxis][mu][nu],
1837+ atomA, a, atomB, b, stonG);
1838+ }
18131839 }
1840+ catch(MolDSException ex){
1841+#pragma omp critical
1842+ ex.Serialize(ompErrors);
1843+ }
1844+ }// end of loop for int B with openMP
1845+ // Exception throwing for omp-region
1846+ if(!ompErrors.str().empty()){
1847+ throw MolDSException::Deserialize(ompErrors);
18141848 }
1815- catch(MolDSException ex){
1816- #pragma omp critical
1817- ex.Serialize(ompErrors);
1818- }
1819- }// end of loop for int B with openMP
1820- // Exception throwing for omp-region
1821- if(!ompErrors.str().empty()){
1822- throw MolDSException::Deserialize(ompErrors);
1823- }
1824- }
1825- } // end of loop for int A with openMP
1826-
1849+ }
1850+ } // end lof if(mpiRank == calcRank)
1851+
1852+ // set data to gater in mpiHeadRank with asynchronous MPI
1853+ int tagX = A* CartesianType_end + XAxis;
1854+ int tagY = A* CartesianType_end + YAxis;
1855+ int tagZ = A* CartesianType_end + ZAxis;
1856+ int source = calcRank;
1857+ int dest = mpiHeadRank;
1858+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
1859+ asyncCommunicator.SetRecvedVector(&cartesianMatrix[XAxis][firstAOIndexA][0],
1860+ numValenceAOsA*totalAONumber,
1861+ source,
1862+ tagX);
1863+ asyncCommunicator.SetRecvedVector(&cartesianMatrix[YAxis][firstAOIndexA][0],
1864+ numValenceAOsA*totalAONumber,
1865+ source,
1866+ tagY);
1867+ asyncCommunicator.SetRecvedVector(&cartesianMatrix[ZAxis][firstAOIndexA][0],
1868+ numValenceAOsA*totalAONumber,
1869+ source,
1870+ tagZ);
1871+ }
1872+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
1873+ asyncCommunicator.SetSentVector(&cartesianMatrix[XAxis][firstAOIndexA][0],
1874+ numValenceAOsA*totalAONumber,
1875+ dest,
1876+ tagX);
1877+ asyncCommunicator.SetSentVector(&cartesianMatrix[YAxis][firstAOIndexA][0],
1878+ numValenceAOsA*totalAONumber,
1879+ dest,
1880+ tagY);
1881+ asyncCommunicator.SetSentVector(&cartesianMatrix[ZAxis][firstAOIndexA][0],
1882+ numValenceAOsA*totalAONumber,
1883+ dest,
1884+ tagZ);
1885+ }
1886+ } // end of loop for int A with MPI
1887+ // Delete the communication thread.
1888+ communicationThread.join();
1889+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_end*totalAONumber*totalAONumber, mpiHeadRank);
1890+
1891+/*
18271892 // communication to collect all matrix data on head-rank
18281893 int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
18291894 if(mpiRank == mpiHeadRank){
@@ -1857,6 +1922,7 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
18571922 // broadcast all matrix data to all rank
18581923 int root=mpiHeadRank;
18591924 MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_end*totalAONumber*totalAONumber, root);
1925+ */
18601926 }
18611927
18621928 // Calculate elements of Cartesian matrix between atomic orbitals.
@@ -3817,10 +3883,19 @@ void Cndo2::CalcOverlapESsWithAnotherElectronicStructure(double** overlapESs,
38173883
38183884 // calculate OverlapAOs matrix. E.g. S_{\mu\nu} in (3.74) in J. A. Pople book.
38193885 void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
3820- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3821- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
38223886 int totalAONumber = molecule.GetTotalNumberAOs();
38233887 int totalAtomNumber = molecule.GetNumberAtoms();
3888+
3889+ // MPI setting of each rank
3890+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3891+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3892+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3893+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
3894+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
3895+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3896+ &asyncCommunicator,
3897+ mPassingTimes) );
3898+
38243899 MallocerFreer::GetInstance()->Initialize<double>(overlapAOs,
38253900 totalAONumber,
38263901 totalAONumber);
@@ -3828,51 +3903,73 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
38283903 // This loop A is parallelized with MPI
38293904 for(int A=0; A<totalAtomNumber; A++){
38303905 const Atom& atomA = *molecule.GetAtom(A);
3831- if(A%mpiSize != mpiRank){continue;}
3832-
3833- stringstream ompErrors;
3834-#pragma omp parallel
3835- {
3836- double** diatomicOverlapAOs = NULL;
3837- double** rotatingMatrix = NULL;
3838- try{
3839- // malloc
3840- MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs,
3841- OrbitalType_end,
3842- OrbitalType_end);
3843- MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix,
3844- OrbitalType_end,
3845- OrbitalType_end);
3846-
3906+ int firstAOIndexA = atomA.GetFirstAOIndex();
3907+ int numValenceAOs = atomA.GetValenceSize();
3908+ int calcRank = A%mpiSize;
3909+ if(mpiRank == calcRank){
3910+ int maxThreads = omp_get_max_threads();
3911+ stringstream ompErrors;
3912+#pragma omp parallel num_threads(maxThreads-1)
3913+ {
3914+ double** diatomicOverlapAOs = NULL;
3915+ double** rotatingMatrix = NULL;
3916+ try{
3917+ // malloc
3918+ MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs,
3919+ OrbitalType_end,
3920+ OrbitalType_end);
3921+ MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix,
3922+ OrbitalType_end,
3923+ OrbitalType_end);
3924+ bool symmetrize = false;
38473925 #pragma omp for schedule(auto)
3848- for(int B=A+1; B<totalAtomNumber; B++){
3849- const Atom& atomB = *molecule.GetAtom(B);
3850- this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB);
3851- this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB);
3852- this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix);
3853- this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB);
3854- } // end of loop B parallelized with openMP
3926+ for(int B=A+1; B<totalAtomNumber; B++){
3927+ const Atom& atomB = *molecule.GetAtom(B);
3928+ this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB);
3929+ this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB);
3930+ this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix);
3931+ this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB, symmetrize);
3932+ } // end of loop B parallelized with openMP
38553933
3856- } // end of try
3857- catch(MolDSException ex){
3934+ } // end of try
3935+ catch(MolDSException ex){
38583936 #pragma omp critical
3859- ex.Serialize(ompErrors);
3937+ ex.Serialize(ompErrors);
3938+ }
3939+ this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix);
3940+ } // end of omp-parallelized region
3941+ // Exception throwing for omp-region
3942+ if(!ompErrors.str().empty()){
3943+ throw MolDSException::Deserialize(ompErrors);
38603944 }
3861- this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix);
3862- } // end of omp-parallelized region
3863- // Exception throwing for omp-region
3864- if(!ompErrors.str().empty()){
3865- throw MolDSException::Deserialize(ompErrors);
3945+ } // end of if(mpiRank == calcRnak)
3946+
3947+ // set data to gater in mpiHeadRank with asynchronous MPI
3948+ int tag = A;
3949+ int source = calcRank;
3950+ int dest = mpiHeadRank;
3951+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
3952+ asyncCommunicator.SetRecvedVector(overlapAOs[firstAOIndexA],
3953+ totalAONumber*numValenceAOs,
3954+ source,
3955+ tag);
3956+ }
3957+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
3958+ asyncCommunicator.SetSentVector(overlapAOs[firstAOIndexA],
3959+ totalAONumber*numValenceAOs,
3960+ dest,
3961+ tag);
38663962 }
38673963 } // end of loop A parallelized with MPI
3868-
3869- // communication to reduce thsi->matrixForce on all node (namely, all_reduce)
3870- int numTransported = totalAONumber*totalAONumber;
3871- MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&overlapAOs[0][0], numTransported, std::plus<double>());
3964+ communicationThread.join();
3965+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&overlapAOs[0][0], totalAONumber*totalAONumber, mpiHeadRank);
38723966
38733967 #pragma omp parallel for schedule(auto)
38743968 for(int mu=0; mu<totalAONumber; mu++){
38753969 overlapAOs[mu][mu] = 1.0;
3970+ for(int nu=mu+1; nu<totalAONumber; nu++){
3971+ overlapAOs[nu][mu] = overlapAOs[mu][nu];
3972+ }
38763973 }
38773974
38783975 /*
--- a/src/mndo/Mndo.cpp
+++ b/src/mndo/Mndo.cpp
@@ -31,7 +31,9 @@
3131 #include"../base/PrintController.h"
3232 #include"../base/MolDSException.h"
3333 #include"../base/MallocerFreer.h"
34+#include"../base/containers/ThreadSafeQueue.h"
3435 #include"../mpi/MpiProcess.h"
36+#include"../mpi/AsyncCommunicator.h"
3537 #include"../wrappers/Blas.h"
3638 #include"../wrappers/Lapack.h"
3739 #include"../base/MallocerFreer.h"
@@ -739,8 +741,9 @@ void Mndo::CalcCISMatrix(double** matrixCIS) const{
739741 // single excitation from I-th (occupied)MO to A-th (virtual)MO
740742 int moI = this->GetActiveOccIndex(*this->molecule, k);
741743 int moA = this->GetActiveVirIndex(*this->molecule, k);
744+ int maxThreads = omp_get_max_threads();
742745 stringstream ompErrors;
743-#pragma omp parallel for schedule(auto)
746+#pragma omp parallel for schedule(auto) num_threads(maxThreads-1)
744747 for(int l=k; l<this->matrixCISdimension; l++){
745748 try{
746749 // single excitation from J-th (occupied)MO to B-th (virtual)MO
@@ -2552,8 +2555,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){
25522555 const Atom& atomA = *molecule->GetAtom(a);
25532556 int firstAOIndexA = atomA.GetFirstAOIndex();
25542557 int lastAOIndexA = atomA.GetLastAOIndex();
2558+ int maxThreads = omp_get_max_threads();
25552559 stringstream ompErrors;
2556-#pragma omp parallel
2560+#pragma omp parallel num_threads(maxThreads-1)
25572561 {
25582562 double*** diatomicOverlapAOs1stDerivs = NULL;
25592563 double***** diatomicTwoElecTwoCore1stDerivs = NULL;
@@ -3416,74 +3420,113 @@ double Mndo::GetAuxiliaryKNRKRElement(int moI, int moJ, int moK, int moL) const{
34163420
34173421 void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
34183422 const Molecule& molecule) const{
3419- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3420- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3421- int totalAtomNumber = molecule.GetNumberAtoms();
3422-
3423+ int totalNumberAtoms = molecule.GetNumberAtoms();
3424+
3425+ // MPI setting of each rank
3426+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3427+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3428+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
3429+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalNumberAtoms);
3430+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
3431+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
3432+ &asyncCommunicator,
3433+ mPassingTimes) );
34233434 #ifdef MOLDS_DBG
34243435 if(twoElecTwoCore == NULL){
34253436 throw MolDSException(this->errorMessageCalcTwoElecTwoCoreNullMatrix);
34263437 }
34273438 #endif
34283439 MallocerFreer::GetInstance()->Initialize<double>(twoElecTwoCore,
3429- totalAtomNumber,
3430- totalAtomNumber,
3440+ totalNumberAtoms,
3441+ totalNumberAtoms,
34313442 dxy, dxy, dxy, dxy);
34323443
3433-
34343444 // this loop-a is MPI-parallelized
3435- for(int a=0; a<totalAtomNumber; a++){
3436- if(a%mpiSize != mpiRank){continue;}
3437- stringstream ompErrors;
3438-#pragma omp parallel
3439- {
3440- double**** diatomicTwoElecTwoCore = NULL;
3441- double** tmpRotMat = NULL;
3442- try{
3443- MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3444- MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3445- // note that terms with condition a==b are not needed to calculate.
3445+ for(int a=0; a<totalNumberAtoms; a++){
3446+ int calcRank = a%mpiSize;
3447+ if(mpiRank == calcRank){
3448+ int maxThreads = omp_get_max_threads();
3449+ stringstream ompErrors;
3450+#pragma omp parallel num_threads(maxThreads-1)
3451+ {
3452+ double**** diatomicTwoElecTwoCore = NULL;
3453+ double** tmpRotMat = NULL;
3454+ try{
3455+ MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3456+ MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3457+ // note that terms with condition a==b are not needed to calculate.
34463458 #pragma omp for schedule(auto)
3447- for(int b=a+1; b<totalAtomNumber; b++){
3448- this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, a, b);
3449-
3450- for(int mu=0; mu<dxy; mu++){
3451- for(int nu=mu; nu<dxy; nu++){
3452- for(int lambda=0; lambda<dxy; lambda++){
3453- for(int sigma=lambda; sigma<dxy; sigma++){
3454- double value = diatomicTwoElecTwoCore[mu][nu][lambda][sigma];
3455- twoElecTwoCore[a][b][mu][nu][lambda][sigma] = value;
3456- twoElecTwoCore[a][b][mu][nu][sigma][lambda] = value;
3457- twoElecTwoCore[a][b][nu][mu][lambda][sigma] = value;
3458- twoElecTwoCore[a][b][nu][mu][sigma][lambda] = value;
3459- twoElecTwoCore[b][a][lambda][sigma][mu][nu] = value;
3460- twoElecTwoCore[b][a][lambda][sigma][nu][mu] = value;
3461- twoElecTwoCore[b][a][sigma][lambda][mu][nu] = value;
3462- twoElecTwoCore[b][a][sigma][lambda][nu][mu] = value;
3459+ for(int b=a+1; b<totalNumberAtoms; b++){
3460+ this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, a, b);
3461+
3462+ for(int mu=0; mu<dxy; mu++){
3463+ for(int nu=mu; nu<dxy; nu++){
3464+ for(int lambda=0; lambda<dxy; lambda++){
3465+ for(int sigma=lambda; sigma<dxy; sigma++){
3466+ double value = diatomicTwoElecTwoCore[mu][nu][lambda][sigma];
3467+ twoElecTwoCore[a][b][mu][nu][lambda][sigma] = value;
3468+ twoElecTwoCore[a][b][mu][nu][sigma][lambda] = value;
3469+ twoElecTwoCore[a][b][nu][mu][lambda][sigma] = value;
3470+ twoElecTwoCore[a][b][nu][mu][sigma][lambda] = value;
3471+ }
34633472 }
34643473 }
34653474 }
3466- }
34673475
3468- } // end of loop b parallelized with MPI
3476+ } // end of loop b parallelized with MPI
34693477
3470- } // end of try
3471- catch(MolDSException ex){
3478+ } // end of try
3479+ catch(MolDSException ex){
34723480 #pragma omp critical
3473- ex.Serialize(ompErrors);
3481+ ex.Serialize(ompErrors);
3482+ }
3483+ MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3484+ MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3485+ } // end of omp-parallelized region
3486+ // Exception throwing for omp-region
3487+ if(!ompErrors.str().empty()){
3488+ throw MolDSException::Deserialize(ompErrors);
34743489 }
3475- MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3476- MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3477- } // end of omp-parallelized region
3478- // Exception throwing for omp-region
3479- if(!ompErrors.str().empty()){
3480- throw MolDSException::Deserialize(ompErrors);
3490+ } // end of if(mpiRnak == calcRank)
3491+ // set data to gater in mpiHeadRank with asynchronous MPI
3492+ int tag = a;
3493+ int source = calcRank;
3494+ int dest = mpiHeadRank;
3495+ int numTransported = totalNumberAtoms*dxy*dxy*dxy*dxy;
3496+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
3497+ asyncCommunicator.SetRecvedVector(&twoElecTwoCore[a][0][0][0][0][0],
3498+ numTransported,
3499+ source,
3500+ tag);
3501+ }
3502+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
3503+ asyncCommunicator.SetSentVector(&twoElecTwoCore[a][0][0][0][0][0],
3504+ numTransported,
3505+ dest,
3506+ tag);
34813507 }
34823508 } // end of loop a parallelized with MPI
3509+ communicationThread.join();
3510+ int numTransported = totalNumberAtoms*totalNumberAtoms*dxy*dxy*dxy*dxy;
3511+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&twoElecTwoCore[0][0][0][0][0][0], numTransported, mpiHeadRank);
34833512
3484- // communication to reduce thsi->matrixForce on all node (namely, all_reduce)
3485- int numTransported = totalAtomNumber*totalAtomNumber*dxy*dxy*dxy*dxy;
3486- MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&twoElecTwoCore[0][0][0][0][0][0], numTransported, std::plus<double>());
3513+ for(int a=0; a<totalNumberAtoms; a++){
3514+ for(int b=a+1; b<totalNumberAtoms; b++){
3515+ for(int mu=0; mu<dxy; mu++){
3516+ for(int nu=mu; nu<dxy; nu++){
3517+ for(int lambda=0; lambda<dxy; lambda++){
3518+ for(int sigma=lambda; sigma<dxy; sigma++){
3519+ double value = twoElecTwoCore[a][b][mu][nu][lambda][sigma];
3520+ twoElecTwoCore[b][a][lambda][sigma][mu][nu] = value;
3521+ twoElecTwoCore[b][a][lambda][sigma][nu][mu] = value;
3522+ twoElecTwoCore[b][a][sigma][lambda][mu][nu] = value;
3523+ twoElecTwoCore[b][a][sigma][lambda][nu][mu] = value;
3524+ }
3525+ }
3526+ }
3527+ }
3528+ }
3529+ }
34873530 }
34883531
34893532 // Calculation of two electrons two cores integral (mu, nu | lambda, sigma) in space fixed frame,
--- /dev/null
+++ b/src/mpi/AsyncCommunicator.cpp
@@ -0,0 +1,43 @@
1+//************************************************************************//
2+// Copyright (C) 2011-2012 Mikiya Fujii //
3+// //
4+// This file is part of MolDS. //
5+// //
6+// MolDS is free software: you can redistribute it and/or modify //
7+// it under the terms of the GNU General Public License as published by //
8+// the Free Software Foundation, either version 3 of the License, or //
9+// (at your option) any later version. //
10+// //
11+// MolDS is distributed in the hope that it will be useful, //
12+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
13+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
14+// GNU General Public License for more details. //
15+// //
16+// You should have received a copy of the GNU General Public License //
17+// along with MolDS. If not, see <http://www.gnu.org/licenses/>. //
18+//************************************************************************//
19+#include<stdio.h>
20+#include<stdlib.h>
21+#include<iostream>
22+#include<sstream>
23+#include<math.h>
24+#include<string>
25+#include<stdexcept>
26+#include<boost/format.hpp>
27+#include"../base/Enums.h"
28+#include"../base/Uncopyable.h"
29+#include"../base/PrintController.h"
30+#include"../base/MolDSException.h"
31+#include"../base/containers/ThreadSafeQueue.h"
32+#include"../base/MallocerFreer.h"
33+#include"MpiProcess.h"
34+#include"AsyncCommunicator.h"
35+using namespace std;
36+namespace MolDS_mpi{
37+AsyncCommunicator::AsyncCommunicator(){}
38+AsyncCommunicator::~AsyncCommunicator(){}
39+}
40+
41+
42+
43+
--- /dev/null
+++ b/src/mpi/AsyncCommunicator.h
@@ -0,0 +1,127 @@
1+//************************************************************************//
2+// Copyright (C) 2011-2013 Mikiya Fujii //
3+// //
4+// This file is part of MolDS. //
5+// //
6+// MolDS is free software: you can redistribute it and/or modify //
7+// it under the terms of the GNU General Public License as published by //
8+// the Free Software Foundation, either version 3 of the License, or //
9+// (at your option) any later version. //
10+// //
11+// MolDS is distributed in the hope that it will be useful, //
12+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
13+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
14+// GNU General Public License for more details. //
15+// //
16+// You should have received a copy of the GNU General Public License //
17+// along with MolDS. If not, see <http://www.gnu.org/licenses/>. //
18+//************************************************************************//
19+#ifndef INCLUDED_ASYNCCOMMUNICATOR
20+#define INCLUDED_ASYNCCOMMUNICATOR
21+#include<boost/thread.hpp>
22+#include<boost/thread/condition.hpp>
23+#include<boost/bind.hpp>
24+#define NON_USED 0
25+namespace MolDS_mpi{
26+
27+class AsyncCommunicator{
28+public:
29+ AsyncCommunicator();
30+ ~AsyncCommunicator();
31+ template<typename T> void Run(int passingTimes){
32+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
33+ while(0<passingTimes){
34+ sleep(0.1);
35+ boost::mutex::scoped_lock lk(this->stateGuard);
36+ try{
37+ DataInfo dInfo = this->dataQueue.FrontPop();
38+ if(dInfo.mpiFuncType == MolDS_base::Send){
39+ MolDS_mpi::MpiProcess::GetInstance()->Send(dInfo.dest,
40+ dInfo.tag,
41+ reinterpret_cast<T*>(dInfo.vectorPtr),
42+ dInfo.num);
43+ }
44+ else if(dInfo.mpiFuncType == MolDS_base::Recv){
45+ MolDS_mpi::MpiProcess::GetInstance()->Recv(dInfo.source,
46+ dInfo.tag,
47+ reinterpret_cast<T*>(dInfo.vectorPtr),
48+ dInfo.num);
49+ }
50+ else if(dInfo.mpiFuncType == MolDS_base::Broadcast){
51+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(reinterpret_cast<T*>(dInfo.vectorPtr),
52+ dInfo.num,
53+ dInfo.source);
54+ }
55+ else{
56+ std::stringstream ss;
57+ ss << "non valid mpi function type\n";
58+ MolDS_base::MolDSException ex(ss.str());
59+ throw ex;
60+ }
61+ this->stateChange.notify_all();
62+ passingTimes--;
63+ }
64+ catch(MolDS_base::MolDSException ex){
65+ if(ex.HasKey(MolDS_base::EmptyQueue)){
66+ this->stateChange.wait(lk);
67+ continue;
68+ }
69+ else{
70+ throw ex;
71+ }
72+ }
73+ }
74+ }
75+
76+ template<typename T> void SetSentVector(T* vector,
77+ intptr_t num,
78+ int dest,
79+ int tag){
80+ int source = NON_USED;
81+ MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Send;
82+ this->SetVector(vector, num, source, dest, tag, mpiFuncType);
83+ }
84+
85+ template<typename T> void SetRecvedVector(T* vector,
86+ intptr_t num,
87+ int source,
88+ int tag){
89+ int dest = NON_USED;
90+ MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Recv;
91+ this->SetVector(vector, num, source, dest, tag, mpiFuncType);
92+ }
93+
94+ template<typename T> void SetBroadcastedVector(T* vector, intptr_t num, int root){
95+ int source = root;
96+ int dest = NON_USED;
97+ int tag = NON_USED;
98+ MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Broadcast;
99+ this->SetVector(vector, num, source, dest, tag, mpiFuncType);
100+ }
101+
102+private:
103+ struct DataInfo{intptr_t vectorPtr;
104+ intptr_t num;
105+ int source;
106+ int dest;
107+ int tag;
108+ MolDS_base::MpiFunctionType mpiFuncType;};
109+ boost::mutex stateGuard;
110+ boost::condition stateChange;
111+ MolDS_base_containers::ThreadSafeQueue<DataInfo> dataQueue;
112+ template<typename T> void SetVector(T* vector,
113+ intptr_t num,
114+ int source,
115+ int dest,
116+ int tag,
117+ MolDS_base::MpiFunctionType mpiFuncType){
118+ boost::mutex::scoped_lock lk(this->stateGuard);
119+ DataInfo dInfo = {reinterpret_cast<intptr_t>(vector), num, source, dest, tag, mpiFuncType};
120+ this->dataQueue.Push(dInfo);
121+ this->stateChange.notify_all();
122+ }
123+};
124+
125+}
126+#endif
127+
--- a/src/mpi/MpiProcess.cpp
+++ b/src/mpi/MpiProcess.cpp
@@ -23,6 +23,7 @@
2323 #include<math.h>
2424 #include<string>
2525 #include<stdexcept>
26+#include<omp.h>
2627 #include<boost/format.hpp>
2728 #include"../base/Uncopyable.h"
2829 #include"../base/PrintController.h"
@@ -41,9 +42,22 @@ MpiProcess::MpiProcess(int argc, char *argv[]){
4142 this->environment = new boost::mpi::environment(argc, argv);
4243 this->communicator = new boost::mpi::communicator();
4344 this->messageLimit = INT_MAX;
45+ this->mpiConsumingTime=0.0;
46+ this->mpiConsumingTimeSend=0.0;
47+ this->mpiConsumingTimeRecv=0.0;
48+ this->mpiConsumingTimeBrodCast=0.0;
49+ this->mpiConsumingTimeAllReduce=0.0;
4450 }
4551
4652 MpiProcess::~MpiProcess(){
53+ /*
54+ int rank = this->GetRank();
55+ printf("\nrnk:%d mpiconsumingtime = %e [s]\n",rank, this->mpiConsumingTime);
56+ printf("\nrnk:%d mpiconsumingtimeSend = %e [s]\n",rank, this->mpiConsumingTimeSend);
57+ printf("\nrnk:%d mpiconsumingtimeRecv = %e [s]\n",rank, this->mpiConsumingTimeRecv);
58+ printf("\nrnk:%d mpiconsumingtimeBroadcast = %e [s]\n",rank, this->mpiConsumingTimeBrodCast);
59+ printf("\nrnk:%d mpiconsumingtimeAllReduce = %e [s]\n",rank, this->mpiConsumingTimeAllReduce);
60+ */
4761 delete this->environment;
4862 delete this->communicator;
4963 }
@@ -70,6 +84,25 @@ MpiProcess* MpiProcess::GetInstance(){
7084 return mpiProcess;
7185 }
7286
87+void MpiProcess::Barrier(){this->communicator->barrier();}
88+
89+int MpiProcess::GetMessagePassingTimes(intptr_t num)const{
90+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
91+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
92+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
93+ int calcTimes = num/mpiSize;
94+ if(mpiRank < num%mpiSize){calcTimes+=1;}
95+ int mpiPassingTimes;
96+ if(mpiRank == mpiHeadRank){
97+ mpiPassingTimes = num - calcTimes;
98+ }
99+ else{
100+ mpiPassingTimes = calcTimes;
101+ }
102+ return mpiPassingTimes;
103+}
104+
105+
73106 }
74107
75108
--- a/src/mpi/MpiProcess.h
+++ b/src/mpi/MpiProcess.h
@@ -19,6 +19,7 @@
1919 #ifndef INCLUDED_MPIPROCESS
2020 #define INCLUDED_MPIPROCESS
2121 #include<limits.h>
22+#include<omp.h>
2223 #include<boost/mpi.hpp>
2324 namespace MolDS_mpi{
2425 // MpiProcess is singleton
@@ -30,26 +31,47 @@ public:
3031 int GetHeadRank() const{return 0;}
3132 int GetRank() const{return this->communicator->rank();}
3233 int GetSize() const{return this->communicator->size();}
33- template<typename T> void Send(int dest, int tag, const T* values, intptr_t num) const{
34+ //template<typename T> void Send(int dest, int tag, const T* values, intptr_t num) const{
35+ template<typename T> void Send(int dest, int tag, const T* values, intptr_t num) {
36+ double startTime=0.0;
37+ double endTime=0.0;
3438 std::vector<Chunk> chunks;
3539 this->SplitMessage2Chunks(chunks, tag, values, num);
3640 for(intptr_t i=0; i<chunks.size(); i++){
41+ startTime = omp_get_wtime();
3742 this->communicator->send(dest, chunks[i].tag, &values[chunks[i].first], chunks[i].num);
43+ endTime = omp_get_wtime();
44+ this->mpiConsumingTime += endTime - startTime;
45+ this->mpiConsumingTimeSend += endTime - startTime;
3846 }
3947 }
40- template<typename T> void Recv(int source, int tag, T* values, intptr_t num) const{
48+ //template<typename T> void Recv(int source, int tag, T* values, intptr_t num) const{
49+ template<typename T> void Recv(int source, int tag, T* values, intptr_t num) {
50+ double startTime=0.0;
51+ double endTime=0.0;
4152 std::vector<Chunk> chunks;
4253 this->SplitMessage2Chunks(chunks, tag, values, num);
4354 for(intptr_t i=0; i<chunks.size(); i++){
55+ startTime = omp_get_wtime();
4456 this->communicator->recv(source, chunks[i].tag, &values[chunks[i].first], chunks[i].num);
57+ endTime = omp_get_wtime();
58+ this->mpiConsumingTime += endTime - startTime;
59+ this->mpiConsumingTimeRecv += endTime - startTime;
4560 }
4661 }
47- template<typename T> void Broadcast(T* values, intptr_t num, int root) const{
62+ //template<typename T> void Broadcast(T* values, intptr_t num, int root) const{
63+ template<typename T> void Broadcast(T* values, intptr_t num, int root){
64+ double startTime=0.0;
65+ double endTime=0.0;
4866 std::vector<Chunk> chunks;
4967 intptr_t tag=0;
5068 this->SplitMessage2Chunks(chunks, tag, values, num);
5169 for(intptr_t i=0; i<chunks.size(); i++){
70+ startTime = omp_get_wtime();
5271 broadcast(*this->communicator, &values[chunks[i].first], chunks[i].num, root);
72+ endTime = omp_get_wtime();
73+ this->mpiConsumingTime += endTime - startTime;
74+ this->mpiConsumingTimeBrodCast += endTime - startTime;
5375 }
5476 }
5577 template<typename T, typename Op> void Reduce(const T* inValues, intptr_t num, T* outValues, Op op, int root) const{
@@ -60,15 +82,23 @@ public:
6082 reduce(*this->communicator, &inValues[chunks[i].first], chunks[i].num, &outValues[chunks[i].first], op, root);
6183 }
6284 }
63- template<typename T, typename Op> void AllReduce(const T* inValues, intptr_t num, T* outValues, Op op) const{
85+ //template<typename T, typename Op> void AllReduce(const T* inValues, intptr_t num, T* outValues, Op op) const{
86+ template<typename T, typename Op> void AllReduce(const T* inValues, intptr_t num, T* outValues, Op op){
87+ double startTime=0.0;
88+ double endTime=0.0;
6489 std::vector<Chunk> chunks;
6590 intptr_t tag=0;
6691 this->SplitMessage2Chunks(chunks, tag, inValues, num);
6792 for(intptr_t i=0; i<chunks.size(); i++){
93+ startTime = omp_get_wtime();
6894 all_reduce(*this->communicator, &inValues[chunks[i].first], chunks[i].num, &outValues[chunks[i].first], op);
95+ endTime = omp_get_wtime();
96+ this->mpiConsumingTime += endTime - startTime;
97+ this->mpiConsumingTimeAllReduce += endTime - startTime;
6998 }
7099 }
71- template<typename T, typename Op> void AllReduce(T* values, intptr_t num, Op op) const{
100+ //template<typename T, typename Op> void AllReduce(T* values, intptr_t num, Op op) const{
101+ template<typename T, typename Op> void AllReduce(T* values, intptr_t num, Op op){
72102 double* tmpValues=NULL;
73103 try{
74104 MolDS_base::MallocerFreer::GetInstance()->Malloc<double>(&tmpValues, num);
@@ -83,7 +113,8 @@ public:
83113 }
84114 MolDS_base::MallocerFreer::GetInstance()->Free<double>(&tmpValues, num);
85115 }
86-
116+ void Barrier();
117+ int GetMessagePassingTimes(intptr_t num) const;
87118 private:
88119 static MpiProcess* mpiProcess;
89120 MpiProcess();
@@ -116,6 +147,11 @@ private:
116147 chunks.push_back(chunk);
117148 }
118149 }
150+ double mpiConsumingTime;
151+ double mpiConsumingTimeSend;
152+ double mpiConsumingTimeRecv;
153+ double mpiConsumingTimeBrodCast;
154+ double mpiConsumingTimeAllReduce;
119155 };
120156
121157 }