OSDN > Developer > ktns > Chamber > MolDS > Commit

MolDS
Fork

(Original repository, No fork origin)

R/O
HTTP
SSH
HTTPS

Commit

Commit MetaInfo

Revision	5613cccc631750dc5dfa929dce1de388f9e39eca (tree)
Time	2013-08-19 19:05:47
Author	Mikiya Fujii <mikiya.fujii@gmai...>
Commiter	Mikiya Fujii

Log Message

Asynchronous MPI communication is implemented. #31814

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1474 1136aad2-a195-0410-b898-f5ea1d11b9d8

Change Summary

modified: src/Makefile (diff)
modified: src/Makefile_GNU (diff)
modified: src/base/Enums.h (diff)
modified: src/base/Molecule.cpp (diff)
add: src/base/containers/ThreadSafeQueue.cpp (diff)
add: src/base/containers/ThreadSafeQueue.h (diff)
modified: src/cndo/Cndo2.cpp (diff)
modified: src/mndo/Mndo.cpp (diff)
add: src/mpi/AsyncCommunicator.cpp (diff)
add: src/mpi/AsyncCommunicator.h (diff)
modified: src/mpi/MpiProcess.cpp (diff)
modified: src/mpi/MpiProcess.h (diff)

Incremental Difference

--- a/src/Makefile

+++ b/src/Makefile

		@@ -26,7 +26,7 @@ endif
26	26	BOOST_TOP_DIR = /usr/local/boost/
27	27	BOOST_INC_DIR = $(BOOST_TOP_DIR)include/
28	28	BOOST_LIB_DIR = $(BOOST_TOP_DIR)lib/
29		-BOOST_LIBS = -lboost_serialization -lboost_mpi
	29	+BOOST_LIBS = -lboost_serialization -lboost_mpi -lboost_thread
30	30	LIBSBASE = -lmkl_intel_thread -lmkl_core -liomp5 -lpthread
31	31	ifeq ($(INTEL), 64)
32	32	LIBS = -lmkl_intel_ilp64 $(LIBSBASE) $(BOOST_LIBS)

		@@ -37,9 +37,9 @@ EXENAME = MolDS.out
37	37	DEPFILE = obj/objfile.dep
38	38	LDFLAGS =
39	39
40		-ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp
41		-ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/MallocerFreer.h mpi/MpiProcess.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h
42		-ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o
	40	+ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp mpi/AsyncCommunicator.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp
	41	+ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/containers/ThreadSafeQueue.h base/MallocerFreer.h mpi/MpiProcess.h mpi/AsyncCommunicator.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h
	42	+ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/AsyncCommunicator.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o
43	43
44	44	$(EXENAME): $(ALL_OBJ_FILES)
45	45	$(CC) -o $@ -Wl,-rpath=$(BOOST_LIB_DIR) -L$(BOOST_LIB_DIR) $(LDFLAGS) $(ALL_OBJ_FILES) $(LIBS)

--- a/src/Makefile_GNU

+++ b/src/Makefile_GNU

		@@ -23,7 +23,7 @@ override CFLAGS += -fopenmp
23	23	BOOST_TOP_DIR = /usr/local/boost/
24	24	BOOST_INC_DIR = $(BOOST_TOP_DIR)include/
25	25	BOOST_LIB_DIR = $(BOOST_TOP_DIR)lib/
26		-BOOST_LIBS = -lboost_serialization -lboost_mpi
	26	+BOOST_LIBS = -lboost_serialization -lboost_mpi -lboost_thread
27	27	OPENBLAS_TOP_DIR = /usr/local/openblas/
28	28	OPENBLAS_INC_DIR = $(OPENBLAS_TOP_DIR)include/
29	29	OPENBLAS_LIB_DIR = $(OPENBLAS_TOP_DIR)lib/

		@@ -34,9 +34,9 @@ EXENAME = MolDS.out
34	34	DEPFILE = obj/objfile.dep
35	35	LDFLAGS =
36	36
37		-ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp
38		-ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/MallocerFreer.h mpi/MpiProcess.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h
39		-ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o
	37	+ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp mpi/AsyncCommunicator.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp
	38	+ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/containers/ThreadSafeQueue.h base/MallocerFreer.h mpi/MpiProcess.h mpi/AsyncCommunicator.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h
	39	+ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/AsyncCommunicator.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o
40	40
41	41	$(EXENAME): $(ALL_OBJ_FILES)
42	42	$(CC) -o $@ $(LDFLAGS) -Wl,-rpath=$(BOOST_LIB_DIR) -Wl,-rpath=$(OPENBLAS_LIB_DIR) $(LDFLAGS) $(ALL_OBJ_FILES) -L$(BOOST_LIB_DIR) -L$(OPENBLAS_LIB_DIR) $(LIBS)

--- a/src/base/Enums.h

+++ b/src/base/Enums.h

		@@ -154,9 +154,17 @@ RENUMSTR_END()
154	154
155	155	RENUMSTR_BEGIN( ExceptionKey, ExceptionKeyStr )
156	156	RENUMSTR( LapackInfo, "LapackInfo" )
	157	+ RENUMSTR( EmptyQueue, "EmptyQueue" )
157	158	RENUMSTR( ExceptionKey_end, "ExceptionKey_end" )
158	159	RENUMSTR_END()
159	160
	161	+RENUMSTR_BEGIN( MpiFunctionType, MpiFunctionTypeStr )
	162	+ RENUMSTR( Send, "Send" )
	163	+ RENUMSTR( Recv, "Recv" )
	164	+ RENUMSTR( Broadcast, "Broadcast" )
	165	+ RENUMSTR( MpiFunctionType_end, "MpiFunctionType_end" )
	166	+RENUMSTR_END()
	167	+
160	168	}
161	169	#endif
162	170

--- a/src/base/Molecule.cpp

+++ b/src/base/Molecule.cpp

		@@ -24,6 +24,7 @@
24	24	#include<string>
25	25	#include<vector>
26	26	#include<stdexcept>
	27	+#include<omp.h>
27	28	#include<boost/format.hpp>
28	29	#include"Enums.h"
29	30	#include"Uncopyable.h"

--- /dev/null

+++ b/src/base/containers/ThreadSafeQueue.cpp

		@@ -0,0 +1,41 @@
	1	+//************************************************************************//
	2	+// Copyright (C) 2011-2013 Mikiya Fujii //
	3	+// //
	4	+// This file is part of MolDS. //
	5	+// //
	6	+// MolDS is free software: you can redistribute it and/or modify //
	7	+// it under the terms of the GNU General Public License as published by //
	8	+// the Free Software Foundation, either version 3 of the License, or //
	9	+// (at your option) any later version. //
	10	+// //
	11	+// MolDS is distributed in the hope that it will be useful, //
	12	+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
	13	+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
	14	+// GNU General Public License for more details. //
	15	+// //
	16	+// You should have received a copy of the GNU General Public License //
	17	+// along with MolDS. If not, see <http://www.gnu.org/licenses/>. //
	18	+//************************************************************************//
	19	+#include<stdio.h>
	20	+#include<sstream>
	21	+#include<queue>
	22	+#include<boost/format.hpp>
	23	+#include"../Enums.h"
	24	+#include"../Uncopyable.h"
	25	+#include"../PrintController.h"
	26	+#include"../MolDSException.h"
	27	+#include"ThreadSafeQueue.h"
	28	+using namespace std;
	29	+//using namespace MolDS_base;
	30	+
	31	+namespace MolDS_base_containers{
	32	+ int ThreadSafeQueue::Size(){
	33	+ boost::mutex::scoped_lock lk(this->stateGuard);
	34	+ return this->stdQueue.size();
	35	+ }
	36	+
	37	+ bool ThreadSafeQueue::Empty(){
	38	+ boost::mutex::scoped_lock lk(this->stateGuard);
	39	+ return this->stdQueue.empty();
	40	+ }
	41	+};

--- /dev/null

+++ b/src/base/containers/ThreadSafeQueue.h

		@@ -0,0 +1,75 @@
	1	+//************************************************************************//
	2	+// Copyright (C) 2011-2013 Mikiya Fujii //
	3	+// Copyright (C) 2012-2012 Katushiko Nishimra //
	4	+// //
	5	+// This file is part of MolDS. //
	6	+// //
	7	+// MolDS is free software: you can redistribute it and/or modify //
	8	+// it under the terms of the GNU General Public License as published by //
	9	+// the Free Software Foundation, either version 3 of the License, or //
	10	+// (at your option) any later version. //
	11	+// //
	12	+// MolDS is distributed in the hope that it will be useful, //
	13	+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
	14	+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
	15	+// GNU General Public License for more details. //
	16	+// //
	17	+// You should have received a copy of the GNU General Public License //
	18	+// along with MolDS. If not, see <http://www.gnu.org/licenses/>. //
	19	+//************************************************************************//
	20	+#ifndef INCLUDED_THREADSAFEQUEQUE
	21	+#define INCLUDED_THREADSAFEQUEQUE
	22	+#include<queue>
	23	+#include<boost/shared_ptr.hpp>
	24	+#include<boost/thread.hpp>
	25	+#include<boost/thread/condition.hpp>
	26	+namespace MolDS_base_containers{
	27	+
	28	+// This Queue class is thread-safe
	29	+
	30	+template <typename T>
	31	+class ThreadSafeQueue
	32	+{
	33	+public:
	34	+ ThreadSafeQueue(){}
	35	+ ~ThreadSafeQueue(){}
	36	+
	37	+ void Push(const T& data){
	38	+ boost::mutex::scoped_lock lk(this->stateGuard);
	39	+ this->stdQueue.push(data);
	40	+ this->stateChange.notify_all();
	41	+ }
	42	+
	43	+ T FrontPop(){
	44	+ boost::mutex::scoped_lock lk(this->stateGuard);
	45	+ if(this->stdQueue.empty()){
	46	+ std::stringstream ss;
	47	+ ss << "naitive queue has no member\n";
	48	+ MolDS_base::MolDSException ex(ss.str());
	49	+ int info = 0;
	50	+ ex.SetKeyValue<int>(MolDS_base::EmptyQueue, info);
	51	+ throw ex;
	52	+ }
	53	+ T ret = this->stdQueue.front();
	54	+ this->stdQueue.pop();
	55	+ this->stateChange.notify_all();
	56	+ return ret;
	57	+ }
	58	+
	59	+ int Size(){
	60	+ boost::mutex::scoped_lock lk(this->stateGuard);
	61	+ return this->stdQueue.size();
	62	+ }
	63	+
	64	+ bool Empty(){
	65	+ boost::mutex::scoped_lock lk(this->stateGuard);
	66	+ return this->stdQueue.empty();
	67	+ }
	68	+private:
	69	+ std::queue<T> stdQueue;
	70	+ boost::mutex stateGuard;
	71	+ boost::condition_variable stateChange;
	72	+};
	73	+
	74	+}
	75	+#endif

--- a/src/cndo/Cndo2.cpp

+++ b/src/cndo/Cndo2.cpp

		@@ -33,7 +33,9 @@
33	33	#include"../base/PrintController.h"
34	34	#include"../base/MolDSException.h"
35	35	#include"../base/MallocerFreer.h"
	36	+#include"../base/containers/ThreadSafeQueue.h"
36	37	#include"../mpi/MpiProcess.h"
	38	+#include"../mpi/AsyncCommunicator.h"
37	39	#include"../wrappers/Blas.h"
38	40	#include"../wrappers/Lapack.h"
39	41	#include"../base/MathUtilities.h"

		@@ -1374,95 +1376,100 @@ void Cndo2::CalcFockMatrix(double** fockMatrix,
1374	1376	double const* atomicElectronPopulation,
1375	1377	double const* const* const* const* const* const* twoElecTwoCore,
1376	1378	bool isGuess) const{
1377		- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1378		- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1379	1379	int totalNumberAOs = molecule.GetTotalNumberAOs();
1380	1380	int totalNumberAtoms = molecule.GetNumberAtoms();
1381		- MallocerFreer::GetInstance()->Initialize<double>(fockMatrix,
1382		- totalNumberAOs,
1383		- totalNumberAOs);
	1381	+
	1382	+ // MPI setting of each rank
	1383	+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
	1384	+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
	1385	+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
	1386	+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalNumberAOs);
	1387	+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
	1388	+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
	1389	+ &asyncCommunicator,
	1390	+ mPassingTimes) );
	1391	+
	1392	+ MallocerFreer::GetInstance()->Initialize<double>(fockMatrix, totalNumberAOs, totalNumberAOs);
1384	1393	for(int A=0; A<totalNumberAtoms; A++){
1385	1394	const Atom& atomA = *molecule.GetAtom(A);
1386	1395	int firstAOIndexA = atomA.GetFirstAOIndex();
1387	1396	int lastAOIndexA = atomA.GetLastAOIndex();
1388	1397	for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){
1389		- if(mu%mpiSize != mpiRank){continue;}
1390		-
1391		- stringstream ompErrors;
1392		-#pragma omp parallel for schedule(auto)
1393		- for(int B=A; B<totalNumberAtoms; B++){
1394		- try{
1395		- const Atom& atomB = *molecule.GetAtom(B);
1396		- int firstAOIndexB = atomB.GetFirstAOIndex();
1397		- int lastAOIndexB = atomB.GetLastAOIndex();
1398		- for(int nu=firstAOIndexB; nu<=lastAOIndexB; nu++){
1399		- if(mu == nu){
1400		- // diagonal part
1401		- fockMatrix[mu][mu] = this->GetFockDiagElement(atomA,
1402		- A,
1403		- mu,
1404		- molecule,
1405		- gammaAB,
1406		- orbitalElectronPopulation,
1407		- atomicElectronPopulation,
1408		- twoElecTwoCore,
1409		- isGuess);
1410		- }
1411		- else if(mu < nu){
1412		- // upper right part
1413		- fockMatrix[mu][nu] = this->GetFockOffDiagElement(atomA,
1414		- atomB,
	1398	+ int calcRank = mu%mpiSize;
	1399	+ if(mpiRank == calcRank){
	1400	+ int maxThreads = omp_get_max_threads();
	1401	+ stringstream ompErrors;
	1402	+#pragma omp parallel for schedule(auto) num_threads(maxThreads-1)
	1403	+ for(int B=A; B<totalNumberAtoms; B++){
	1404	+ try{
	1405	+ const Atom& atomB = *molecule.GetAtom(B);
	1406	+ int firstAOIndexB = atomB.GetFirstAOIndex();
	1407	+ int lastAOIndexB = atomB.GetLastAOIndex();
	1408	+ for(int nu=firstAOIndexB; nu<=lastAOIndexB; nu++){
	1409	+ if(mu == nu){
	1410	+ // diagonal part
	1411	+ fockMatrix[mu][mu] = this->GetFockDiagElement(atomA,
1415	1412	A,
1416		- B,
1417	1413	mu,
1418		- nu,
1419	1414	molecule,
1420	1415	gammaAB,
1421		- overlapAOs,
1422	1416	orbitalElectronPopulation,
	1417	+ atomicElectronPopulation,
1423	1418	twoElecTwoCore,
1424	1419	isGuess);
1425		- }
1426		- else{
1427		- // lower left part (not calculated)
1428		- }
1429		- } // end of loop nu
1430		- } // end of try
1431		- catch(MolDSException ex){
	1420	+ }
	1421	+ else if(mu < nu){
	1422	+ // upper right part
	1423	+ fockMatrix[mu][nu] = this->GetFockOffDiagElement(atomA,
	1424	+ atomB,
	1425	+ A,
	1426	+ B,
	1427	+ mu,
	1428	+ nu,
	1429	+ molecule,
	1430	+ gammaAB,
	1431	+ overlapAOs,
	1432	+ orbitalElectronPopulation,
	1433	+ twoElecTwoCore,
	1434	+ isGuess);
	1435	+ }
	1436	+ else{
	1437	+ // lower left part (not calculated)
	1438	+ }
	1439	+ } // end of loop nu
	1440	+ } // end of try
	1441	+ catch(MolDSException ex){
1432	1442	#pragma omp critical
1433		- ex.Serialize(ompErrors);
	1443	+ ex.Serialize(ompErrors);
	1444	+ }
	1445	+ } // end of loop B parallelized with openMP
	1446	+ // Exception throwing for omp-region
	1447	+ if(!ompErrors.str().empty()){
	1448	+ throw MolDSException::Deserialize(ompErrors);
1434	1449	}
1435		- } // end of loop B parallelized with openMP
1436		- // Exception throwing for omp-region
1437		- if(!ompErrors.str().empty()){
1438		- throw MolDSException::Deserialize(ompErrors);
	1450	+ } // end of if(mpiRank == calcRank)
	1451	+
	1452	+ // set data to gater in mpiHeadRank with asynchronous MPI
	1453	+ int tag = mu;
	1454	+ int source = calcRank;
	1455	+ int dest = mpiHeadRank;
	1456	+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
	1457	+ asyncCommunicator.SetRecvedVector(&fockMatrix[mu][mu],
	1458	+ totalNumberAOs-mu,
	1459	+ source,
	1460	+ tag);
	1461	+ }
	1462	+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
	1463	+ asyncCommunicator.SetSentVector(&fockMatrix[mu][mu],
	1464	+ totalNumberAOs-mu,
	1465	+ dest,
	1466	+ tag);
1439	1467	}
1440	1468	} // end of loop mu parallelized with MPI
1441	1469	} // end of loop A
1442		-
1443		- // communication to collect all matrix data on head-rank
1444		- int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1445		- if(mpiRank == mpiHeadRank){
1446		- // receive the matrix data from other ranks
1447		- for(int mu=0; mu<totalNumberAOs; mu++){
1448		- if(mu%mpiSize == mpiHeadRank){continue;}
1449		- int source = mu%mpiSize;
1450		- int tag = mu;
1451		- MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, &fockMatrix[mu][mu], totalNumberAOs-mu);
1452		- }
1453		- }
1454		- else{
1455		- // send the matrix data to head-rank
1456		- for(int mu=0; mu<totalNumberAOs; mu++){
1457		- if(mu%mpiSize != mpiRank){continue;}
1458		- int dest = mpiHeadRank;
1459		- int tag = mu;
1460		- MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, &fockMatrix[mu][mu], totalNumberAOs-mu);
1461		- }
1462		- }
1463		- // broadcast all matrix data to all rank
1464		- int root=mpiHeadRank;
1465		- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, root);
	1470	+ // Delete the communication thread.
	1471	+ communicationThread.join();
	1472	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, mpiHeadRank);
1466	1473
1467	1474	/*
1468	1475	this->OutputLog("fock matrix\n");

		@@ -1590,104 +1597,109 @@ void Cndo2::CalcAtomicElectronPopulation(double* atomicElectronPopulation,
1590	1597
1591	1598	// calculate gammaAB matrix. (B.56) and (B.62) in J. A. Pople book.
1592	1599	void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{
1593		- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1594		- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1595	1600	int totalAtomNumber = molecule.GetNumberAtoms();
1596	1601
	1602	+ // MPI setting of each rank
	1603	+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
	1604	+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
	1605	+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
	1606	+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
	1607	+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
	1608	+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
	1609	+ &asyncCommunicator,
	1610	+ mPassingTimes) );
	1611	+
1597	1612	// This loop (A) is parallelized by MPI
1598	1613	for(int A=0; A<totalAtomNumber; A++){
1599		- if(A%mpiSize != mpiRank){continue;}
1600		- const Atom& atomA = *molecule.GetAtom(A);
1601		- int na = atomA.GetValenceShellType() + 1;
1602		- double orbitalExponentA = atomA.GetOrbitalExponent(
1603		- atomA.GetValenceShellType(), s, this->theory);
1604		- stringstream ompErrors;
1605		-#pragma omp parallel for schedule(auto)
1606		- for(int B=A; B<totalAtomNumber; B++){
1607		- try{
1608		- const Atom& atomB = *molecule.GetAtom(B);
1609		- int nb = atomB.GetValenceShellType() + 1;
1610		- double orbitalExponentB = atomB.GetOrbitalExponent(
1611		- atomB.GetValenceShellType(), s, this->theory);
1612		-
1613		- double value = 0.0;
1614		- double R = molecule.GetDistanceAtoms(A, B);
1615		- double temp = 0.0;
1616		- if(R>0.0){
1617		- // (B.56)
1618		- value = pow(0.5R, 2.0na);
1619		- value = this->GetReducedOverlapAOs(2na-1, 0, 2.0orbitalExponentAR, 0);
1620		-
1621		- for(int l=1; l<=2*nb; l++){
1622		- temp = 0.0;
1623		- temp = l;
1624		- temp = pow(2.0orbitalExponentB, 2*nb-l);
1625		- temp /= Factorial(2nb-l)2.0*nb;
1626		- temp = pow(0.5R, 2.0nb-l+2.0na);
1627		- temp = this->GetReducedOverlapAOs(2na-1,
1628		- 2*nb-l,
1629		- 2.0orbitalExponentAR,
1630		- 2.0orbitalExponentBR);
1631		- value -= temp;
1632		- }
	1614	+ int calcRank = A%mpiSize;
	1615	+ if(mpiRank == calcRank){
	1616	+ const Atom& atomA = *molecule.GetAtom(A);
	1617	+ int na = atomA.GetValenceShellType() + 1;
	1618	+ double orbitalExponentA = atomA.GetOrbitalExponent(
	1619	+ atomA.GetValenceShellType(), s, this->theory);
	1620	+ int maxThreads = omp_get_max_threads();
	1621	+ stringstream ompErrors;
	1622	+#pragma omp parallel for schedule(auto) num_threads(maxThreads-1)
	1623	+ for(int B=A; B<totalAtomNumber; B++){
	1624	+ try{
	1625	+ const Atom& atomB = *molecule.GetAtom(B);
	1626	+ int nb = atomB.GetValenceShellType() + 1;
	1627	+ double orbitalExponentB = atomB.GetOrbitalExponent(
	1628	+ atomB.GetValenceShellType(), s, this->theory);
	1629	+
	1630	+ double value = 0.0;
	1631	+ double R = molecule.GetDistanceAtoms(A, B);
	1632	+ double temp = 0.0;
	1633	+ if(R>0.0){
	1634	+ // (B.56)
	1635	+ value = pow(0.5R, 2.0na);
	1636	+ value = this->GetReducedOverlapAOs(2na-1, 0, 2.0orbitalExponentAR, 0);
	1637	+
	1638	+ for(int l=1; l<=2*nb; l++){
	1639	+ temp = 0.0;
	1640	+ temp = l;
	1641	+ temp = pow(2.0orbitalExponentB, 2*nb-l);
	1642	+ temp /= Factorial(2nb-l)2.0*nb;
	1643	+ temp = pow(0.5R, 2.0nb-l+2.0na);
	1644	+ temp = this->GetReducedOverlapAOs(2na-1,
	1645	+ 2*nb-l,
	1646	+ 2.0orbitalExponentAR,
	1647	+ 2.0orbitalExponentBR);
	1648	+ value -= temp;
	1649	+ }
1633	1650
1634		- value = pow(2.0orbitalExponentA, 2.0*na+1.0);
1635		- value /= Factorial(2*na);
1636		- }
1637		- else{
1638		- // (B.62)
1639		- value = Factorial(2*na-1);
1640		- value /= pow(2.0orbitalExponentA, 2.0na);
1641		-
1642		- for(int l=1; l<=2*nb; l++){
1643		- temp = l;
1644		- temp = pow(2.0orbitalExponentB, 2*nb-l);
1645		- temp = Factorial(2na+2*nb-l-1);
1646		- temp /= Factorial(2*nb-l);
1647		- temp /= 2.0*nb;
1648		- temp /= pow( 2.0orbitalExponentA + 2.0orbitalExponentB, 2.0*(na+nb)-l );
1649		- value -= temp;
	1651	+ value = pow(2.0orbitalExponentA, 2.0*na+1.0);
	1652	+ value /= Factorial(2*na);
	1653	+ }
	1654	+ else{
	1655	+ // (B.62)
	1656	+ value = Factorial(2*na-1);
	1657	+ value /= pow(2.0orbitalExponentA, 2.0na);
	1658	+
	1659	+ for(int l=1; l<=2*nb; l++){
	1660	+ temp = l;
	1661	+ temp = pow(2.0orbitalExponentB, 2*nb-l);
	1662	+ temp = Factorial(2na+2*nb-l-1);
	1663	+ temp /= Factorial(2*nb-l);
	1664	+ temp /= 2.0*nb;
	1665	+ temp /= pow( 2.0orbitalExponentA + 2.0orbitalExponentB, 2.0*(na+nb)-l );
	1666	+ value -= temp;
	1667	+ }
	1668	+ value = pow(2.0orbitalExponentA, 2.0*na+1);
	1669	+ value /= Factorial(2*na);
1650	1670	}
1651		- value = pow(2.0orbitalExponentA, 2.0*na+1);
1652		- value /= Factorial(2*na);
	1671	+ gammaAB[A][B] = value;
1653	1672	}
1654		- gammaAB[A][B] = value;
1655		- }
1656		- catch(MolDSException ex){
1657		- #pragma omp critical
1658		- ex.Serialize(ompErrors);
	1673	+ catch(MolDSException ex){
	1674	+ #pragma omp critical
	1675	+ ex.Serialize(ompErrors);
	1676	+ }
	1677	+ } // end of loop B parallelized by openMP
	1678	+ // Exception throwing for omp-region
	1679	+ if(!ompErrors.str().empty()){
	1680	+ throw MolDSException::Deserialize(ompErrors);
1659	1681	}
1660		- } // end of loop B parallelized by openMP
1661		- // Exception throwing for omp-region
1662		- if(!ompErrors.str().empty()){
1663		- throw MolDSException::Deserialize(ompErrors);
	1682	+ } // end of if(mpiRank==calcRank)
	1683	+
	1684	+ // set data to gater in mpiHeadRank with asynchronous MPI
	1685	+ int tag = A;
	1686	+ int source = calcRank;
	1687	+ int dest = mpiHeadRank;
	1688	+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
	1689	+ asyncCommunicator.SetRecvedVector(&gammaAB[A][A],
	1690	+ totalAtomNumber-A,
	1691	+ source,
	1692	+ tag);
	1693	+ }
	1694	+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
	1695	+ asyncCommunicator.SetSentVector(&gammaAB[A][A],
	1696	+ totalAtomNumber-A,
	1697	+ dest,
	1698	+ tag);
1664	1699	}
1665	1700	} // end of loop A prallelized by MPI
1666		-
1667		- // communication to collect all matrix data on head-rank
1668		- int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1669		- if(mpiRank == mpiHeadRank){
1670		- // receive the matrix data from other ranks
1671		- for(int A=0; A<totalAtomNumber; A++){
1672		- if(A%mpiSize == mpiHeadRank){continue;}
1673		- int source = A%mpiSize;
1674		- int tag = A;
1675		- MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, &gammaAB[A][A], totalAtomNumber-A);
1676		- }
1677		- }
1678		- else{
1679		- // send the matrix data to head-rank
1680		- for(int A=0; A<totalAtomNumber; A++){
1681		- if(A%mpiSize != mpiRank){continue;}
1682		- int dest = mpiHeadRank;
1683		- int tag = A;
1684		- MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, &gammaAB[A][A], totalAtomNumber-A);
1685		- }
1686		- }
1687		- // broadcast all matrix data to all rank
1688		- int root=mpiHeadRank;
1689		- MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, root);
1690		-
	1701	+ communicationThread.join();
	1702	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, mpiHeadRank);
1691	1703
1692	1704	#pragma omp parallel for schedule(auto)
1693	1705	for(int A=0; A<totalAtomNumber; A++){

		@@ -1786,44 +1798,97 @@ void Cndo2::CalcElectronicTransitionDipoleMoment(double* transitionDipoleMoment,
1786	1798	void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
1787	1799	const Molecule& molecule,
1788	1800	STOnGType stonG) const{
1789		- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
1790		- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
1791	1801	int totalAONumber = molecule.GetTotalNumberAOs();
1792	1802	int totalAtomNumber = molecule.GetNumberAtoms();
1793	1803
	1804	+ // MPI setting of each rank
	1805	+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
	1806	+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
	1807	+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
	1808	+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
	1809	+ mPassingTimes *= CartesianType_end;
	1810	+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
	1811	+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
	1812	+ &asyncCommunicator,
	1813	+ mPassingTimes) );
	1814	+
1794	1815	// This loop (A and mu) is parallelized by MPI
1795	1816	for(int A=0; A<totalAtomNumber; A++){
1796		- const Atom& atomA = *molecule.GetAtom(A);
1797		- int firstAOIndexAtomA = atomA.GetFirstAOIndex();
1798		- for(int a=0; a<atomA.GetValenceSize(); a++){
1799		- int mu = firstAOIndexAtomA + a;
1800		- if(mu%mpiSize != mpiRank){continue;}
1801		- stringstream ompErrors;
1802		- #pragma omp parallel for schedule(auto)
1803		- for(int B=0; B<totalAtomNumber; B++){
1804		- try{
1805		- const Atom& atomB = *molecule.GetAtom(B);
1806		- int firstAOIndexAtomB = atomB.GetFirstAOIndex();
1807		- for(int b=0; b<atomB.GetValenceSize(); b++){
1808		- int nu = firstAOIndexAtomB + b;
1809		- this->CalcCartesianMatrixElementsByGTOExpansion(cartesianMatrix[XAxis][mu][nu],
1810		- cartesianMatrix[YAxis][mu][nu],
1811		- cartesianMatrix[ZAxis][mu][nu],
1812		- atomA, a, atomB, b, stonG);
	1817	+ const Atom& atomA = *molecule.GetAtom(A);
	1818	+ int firstAOIndexA = atomA.GetFirstAOIndex();
	1819	+ int numValenceAOsA = atomA.GetValenceSize();
	1820	+ int calcRank = A%mpiSize;
	1821	+ if(mpiRank == calcRank){
	1822	+ for(int a=0; a<numValenceAOsA; a++){
	1823	+ int mu = firstAOIndexA + a;
	1824	+ int maxThreads = omp_get_max_threads();
	1825	+ stringstream ompErrors;
	1826	+#pragma omp parallel for schedule(auto) num_threads(maxThreads-1)
	1827	+ for(int B=0; B<totalAtomNumber; B++){
	1828	+ try{
	1829	+ const Atom& atomB = *molecule.GetAtom(B);
	1830	+ int firstAOIndexB = atomB.GetFirstAOIndex();
	1831	+ int numValenceAOsB = atomB.GetValenceSize();
	1832	+ for(int b=0; b<numValenceAOsB; b++){
	1833	+ int nu = firstAOIndexB + b;
	1834	+ this->CalcCartesianMatrixElementsByGTOExpansion(cartesianMatrix[XAxis][mu][nu],
	1835	+ cartesianMatrix[YAxis][mu][nu],
	1836	+ cartesianMatrix[ZAxis][mu][nu],
	1837	+ atomA, a, atomB, b, stonG);
	1838	+ }
1813	1839	}
	1840	+ catch(MolDSException ex){
	1841	+#pragma omp critical
	1842	+ ex.Serialize(ompErrors);
	1843	+ }
	1844	+ }// end of loop for int B with openMP
	1845	+ // Exception throwing for omp-region
	1846	+ if(!ompErrors.str().empty()){
	1847	+ throw MolDSException::Deserialize(ompErrors);
1814	1848	}
1815		- catch(MolDSException ex){
1816		- #pragma omp critical
1817		- ex.Serialize(ompErrors);
1818		- }
1819		- }// end of loop for int B with openMP
1820		- // Exception throwing for omp-region
1821		- if(!ompErrors.str().empty()){
1822		- throw MolDSException::Deserialize(ompErrors);
1823		- }
1824		- }
1825		- } // end of loop for int A with openMP
1826		-
	1849	+ }
	1850	+ } // end lof if(mpiRank == calcRank)
	1851	+
	1852	+ // set data to gater in mpiHeadRank with asynchronous MPI
	1853	+ int tagX = A* CartesianType_end + XAxis;
	1854	+ int tagY = A* CartesianType_end + YAxis;
	1855	+ int tagZ = A* CartesianType_end + ZAxis;
	1856	+ int source = calcRank;
	1857	+ int dest = mpiHeadRank;
	1858	+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
	1859	+ asyncCommunicator.SetRecvedVector(&cartesianMatrix[XAxis][firstAOIndexA][0],
	1860	+ numValenceAOsA*totalAONumber,
	1861	+ source,
	1862	+ tagX);
	1863	+ asyncCommunicator.SetRecvedVector(&cartesianMatrix[YAxis][firstAOIndexA][0],
	1864	+ numValenceAOsA*totalAONumber,
	1865	+ source,
	1866	+ tagY);
	1867	+ asyncCommunicator.SetRecvedVector(&cartesianMatrix[ZAxis][firstAOIndexA][0],
	1868	+ numValenceAOsA*totalAONumber,
	1869	+ source,
	1870	+ tagZ);
	1871	+ }
	1872	+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
	1873	+ asyncCommunicator.SetSentVector(&cartesianMatrix[XAxis][firstAOIndexA][0],
	1874	+ numValenceAOsA*totalAONumber,
	1875	+ dest,
	1876	+ tagX);
	1877	+ asyncCommunicator.SetSentVector(&cartesianMatrix[YAxis][firstAOIndexA][0],
	1878	+ numValenceAOsA*totalAONumber,
	1879	+ dest,
	1880	+ tagY);
	1881	+ asyncCommunicator.SetSentVector(&cartesianMatrix[ZAxis][firstAOIndexA][0],
	1882	+ numValenceAOsA*totalAONumber,
	1883	+ dest,
	1884	+ tagZ);
	1885	+ }
	1886	+ } // end of loop for int A with MPI
	1887	+ // Delete the communication thread.
	1888	+ communicationThread.join();
	1889	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_endtotalAONumbertotalAONumber, mpiHeadRank);
	1890	+
	1891	+/*
1827	1892	// communication to collect all matrix data on head-rank
1828	1893	int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
1829	1894	if(mpiRank == mpiHeadRank){

		@@ -1857,6 +1922,7 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix,
1857	1922	// broadcast all matrix data to all rank
1858	1923	int root=mpiHeadRank;
1859	1924	MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_endtotalAONumbertotalAONumber, root);
	1925	+ */
1860	1926	}
1861	1927
1862	1928	// Calculate elements of Cartesian matrix between atomic orbitals.

		@@ -3817,10 +3883,19 @@ void Cndo2::CalcOverlapESsWithAnotherElectronicStructure(double** overlapESs,
3817	3883
3818	3884	// calculate OverlapAOs matrix. E.g. S_{\mu\nu} in (3.74) in J. A. Pople book.
3819	3885	void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
3820		- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3821		- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3822	3886	int totalAONumber = molecule.GetTotalNumberAOs();
3823	3887	int totalAtomNumber = molecule.GetNumberAtoms();
	3888	+
	3889	+ // MPI setting of each rank
	3890	+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
	3891	+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
	3892	+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
	3893	+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber);
	3894	+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
	3895	+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
	3896	+ &asyncCommunicator,
	3897	+ mPassingTimes) );
	3898	+
3824	3899	MallocerFreer::GetInstance()->Initialize<double>(overlapAOs,
3825	3900	totalAONumber,
3826	3901	totalAONumber);

		@@ -3828,51 +3903,73 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{
3828	3903	// This loop A is parallelized with MPI
3829	3904	for(int A=0; A<totalAtomNumber; A++){
3830	3905	const Atom& atomA = *molecule.GetAtom(A);
3831		- if(A%mpiSize != mpiRank){continue;}
3832		-
3833		- stringstream ompErrors;
3834		-#pragma omp parallel
3835		- {
3836		- double** diatomicOverlapAOs = NULL;
3837		- double** rotatingMatrix = NULL;
3838		- try{
3839		- // malloc
3840		- MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs,
3841		- OrbitalType_end,
3842		- OrbitalType_end);
3843		- MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix,
3844		- OrbitalType_end,
3845		- OrbitalType_end);
3846		-
	3906	+ int firstAOIndexA = atomA.GetFirstAOIndex();
	3907	+ int numValenceAOs = atomA.GetValenceSize();
	3908	+ int calcRank = A%mpiSize;
	3909	+ if(mpiRank == calcRank){
	3910	+ int maxThreads = omp_get_max_threads();
	3911	+ stringstream ompErrors;
	3912	+#pragma omp parallel num_threads(maxThreads-1)
	3913	+ {
	3914	+ double** diatomicOverlapAOs = NULL;
	3915	+ double** rotatingMatrix = NULL;
	3916	+ try{
	3917	+ // malloc
	3918	+ MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs,
	3919	+ OrbitalType_end,
	3920	+ OrbitalType_end);
	3921	+ MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix,
	3922	+ OrbitalType_end,
	3923	+ OrbitalType_end);
	3924	+ bool symmetrize = false;
3847	3925	#pragma omp for schedule(auto)
3848		- for(int B=A+1; B<totalAtomNumber; B++){
3849		- const Atom& atomB = *molecule.GetAtom(B);
3850		- this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB);
3851		- this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB);
3852		- this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix);
3853		- this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB);
3854		- } // end of loop B parallelized with openMP
	3926	+ for(int B=A+1; B<totalAtomNumber; B++){
	3927	+ const Atom& atomB = *molecule.GetAtom(B);
	3928	+ this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB);
	3929	+ this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB);
	3930	+ this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix);
	3931	+ this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB, symmetrize);
	3932	+ } // end of loop B parallelized with openMP
3855	3933
3856		- } // end of try
3857		- catch(MolDSException ex){
	3934	+ } // end of try
	3935	+ catch(MolDSException ex){
3858	3936	#pragma omp critical
3859		- ex.Serialize(ompErrors);
	3937	+ ex.Serialize(ompErrors);
	3938	+ }
	3939	+ this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix);
	3940	+ } // end of omp-parallelized region
	3941	+ // Exception throwing for omp-region
	3942	+ if(!ompErrors.str().empty()){
	3943	+ throw MolDSException::Deserialize(ompErrors);
3860	3944	}
3861		- this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix);
3862		- } // end of omp-parallelized region
3863		- // Exception throwing for omp-region
3864		- if(!ompErrors.str().empty()){
3865		- throw MolDSException::Deserialize(ompErrors);
	3945	+ } // end of if(mpiRank == calcRnak)
	3946	+
	3947	+ // set data to gater in mpiHeadRank with asynchronous MPI
	3948	+ int tag = A;
	3949	+ int source = calcRank;
	3950	+ int dest = mpiHeadRank;
	3951	+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
	3952	+ asyncCommunicator.SetRecvedVector(overlapAOs[firstAOIndexA],
	3953	+ totalAONumber*numValenceAOs,
	3954	+ source,
	3955	+ tag);
	3956	+ }
	3957	+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
	3958	+ asyncCommunicator.SetSentVector(overlapAOs[firstAOIndexA],
	3959	+ totalAONumber*numValenceAOs,
	3960	+ dest,
	3961	+ tag);
3866	3962	}
3867	3963	} // end of loop A parallelized with MPI
3868		-
3869		- // communication to reduce thsi->matrixForce on all node (namely, all_reduce)
3870		- int numTransported = totalAONumber*totalAONumber;
3871		- MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&overlapAOs[0][0], numTransported, std::plus<double>());
	3964	+ communicationThread.join();
	3965	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&overlapAOs[0][0], totalAONumber*totalAONumber, mpiHeadRank);
3872	3966
3873	3967	#pragma omp parallel for schedule(auto)
3874	3968	for(int mu=0; mu<totalAONumber; mu++){
3875	3969	overlapAOs[mu][mu] = 1.0;
	3970	+ for(int nu=mu+1; nu<totalAONumber; nu++){
	3971	+ overlapAOs[nu][mu] = overlapAOs[mu][nu];
	3972	+ }
3876	3973	}
3877	3974
3878	3975	/*

--- a/src/mndo/Mndo.cpp

+++ b/src/mndo/Mndo.cpp

		@@ -31,7 +31,9 @@
31	31	#include"../base/PrintController.h"
32	32	#include"../base/MolDSException.h"
33	33	#include"../base/MallocerFreer.h"
	34	+#include"../base/containers/ThreadSafeQueue.h"
34	35	#include"../mpi/MpiProcess.h"
	36	+#include"../mpi/AsyncCommunicator.h"
35	37	#include"../wrappers/Blas.h"
36	38	#include"../wrappers/Lapack.h"
37	39	#include"../base/MallocerFreer.h"

		@@ -739,8 +741,9 @@ void Mndo::CalcCISMatrix(double** matrixCIS) const{
739	741	// single excitation from I-th (occupied)MO to A-th (virtual)MO
740	742	int moI = this->GetActiveOccIndex(*this->molecule, k);
741	743	int moA = this->GetActiveVirIndex(*this->molecule, k);
	744	+ int maxThreads = omp_get_max_threads();
742	745	stringstream ompErrors;
743		-#pragma omp parallel for schedule(auto)
	746	+#pragma omp parallel for schedule(auto) num_threads(maxThreads-1)
744	747	for(int l=k; l<this->matrixCISdimension; l++){
745	748	try{
746	749	// single excitation from J-th (occupied)MO to B-th (virtual)MO

		@@ -2552,8 +2555,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){
2552	2555	const Atom& atomA = *molecule->GetAtom(a);
2553	2556	int firstAOIndexA = atomA.GetFirstAOIndex();
2554	2557	int lastAOIndexA = atomA.GetLastAOIndex();
	2558	+ int maxThreads = omp_get_max_threads();
2555	2559	stringstream ompErrors;
2556		-#pragma omp parallel
	2560	+#pragma omp parallel num_threads(maxThreads-1)
2557	2561	{
2558	2562	double*** diatomicOverlapAOs1stDerivs = NULL;
2559	2563	double***** diatomicTwoElecTwoCore1stDerivs = NULL;

		@@ -3416,74 +3420,113 @@ double Mndo::GetAuxiliaryKNRKRElement(int moI, int moJ, int moK, int moL) const{
3416	3420
3417	3421	void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore,
3418	3422	const Molecule& molecule) const{
3419		- int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
3420		- int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
3421		- int totalAtomNumber = molecule.GetNumberAtoms();
3422		-
	3423	+ int totalNumberAtoms = molecule.GetNumberAtoms();
	3424	+
	3425	+ // MPI setting of each rank
	3426	+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
	3427	+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
	3428	+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
	3429	+ int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalNumberAtoms);
	3430	+ MolDS_mpi::AsyncCommunicator asyncCommunicator;
	3431	+ boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>,
	3432	+ &asyncCommunicator,
	3433	+ mPassingTimes) );
3423	3434	#ifdef MOLDS_DBG
3424	3435	if(twoElecTwoCore == NULL){
3425	3436	throw MolDSException(this->errorMessageCalcTwoElecTwoCoreNullMatrix);
3426	3437	}
3427	3438	#endif
3428	3439	MallocerFreer::GetInstance()->Initialize<double>(twoElecTwoCore,
3429		- totalAtomNumber,
3430		- totalAtomNumber,
	3440	+ totalNumberAtoms,
	3441	+ totalNumberAtoms,
3431	3442	dxy, dxy, dxy, dxy);
3432	3443
3433		-
3434	3444	// this loop-a is MPI-parallelized
3435		- for(int a=0; a<totalAtomNumber; a++){
3436		- if(a%mpiSize != mpiRank){continue;}
3437		- stringstream ompErrors;
3438		-#pragma omp parallel
3439		- {
3440		- double**** diatomicTwoElecTwoCore = NULL;
3441		- double** tmpRotMat = NULL;
3442		- try{
3443		- MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3444		- MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3445		- // note that terms with condition a==b are not needed to calculate.
	3445	+ for(int a=0; a<totalNumberAtoms; a++){
	3446	+ int calcRank = a%mpiSize;
	3447	+ if(mpiRank == calcRank){
	3448	+ int maxThreads = omp_get_max_threads();
	3449	+ stringstream ompErrors;
	3450	+#pragma omp parallel num_threads(maxThreads-1)
	3451	+ {
	3452	+ double**** diatomicTwoElecTwoCore = NULL;
	3453	+ double** tmpRotMat = NULL;
	3454	+ try{
	3455	+ MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
	3456	+ MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
	3457	+ // note that terms with condition a==b are not needed to calculate.
3446	3458	#pragma omp for schedule(auto)
3447		- for(int b=a+1; b<totalAtomNumber; b++){
3448		- this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, a, b);
3449		-
3450		- for(int mu=0; mu<dxy; mu++){
3451		- for(int nu=mu; nu<dxy; nu++){
3452		- for(int lambda=0; lambda<dxy; lambda++){
3453		- for(int sigma=lambda; sigma<dxy; sigma++){
3454		- double value = diatomicTwoElecTwoCore[mu][nu][lambda][sigma];
3455		- twoElecTwoCore[a][b][mu][nu][lambda][sigma] = value;
3456		- twoElecTwoCore[a][b][mu][nu][sigma][lambda] = value;
3457		- twoElecTwoCore[a][b][nu][mu][lambda][sigma] = value;
3458		- twoElecTwoCore[a][b][nu][mu][sigma][lambda] = value;
3459		- twoElecTwoCore[b][a][lambda][sigma][mu][nu] = value;
3460		- twoElecTwoCore[b][a][lambda][sigma][nu][mu] = value;
3461		- twoElecTwoCore[b][a][sigma][lambda][mu][nu] = value;
3462		- twoElecTwoCore[b][a][sigma][lambda][nu][mu] = value;
	3459	+ for(int b=a+1; b<totalNumberAtoms; b++){
	3460	+ this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, a, b);
	3461	+
	3462	+ for(int mu=0; mu<dxy; mu++){
	3463	+ for(int nu=mu; nu<dxy; nu++){
	3464	+ for(int lambda=0; lambda<dxy; lambda++){
	3465	+ for(int sigma=lambda; sigma<dxy; sigma++){
	3466	+ double value = diatomicTwoElecTwoCore[mu][nu][lambda][sigma];
	3467	+ twoElecTwoCore[a][b][mu][nu][lambda][sigma] = value;
	3468	+ twoElecTwoCore[a][b][mu][nu][sigma][lambda] = value;
	3469	+ twoElecTwoCore[a][b][nu][mu][lambda][sigma] = value;
	3470	+ twoElecTwoCore[a][b][nu][mu][sigma][lambda] = value;
	3471	+ }
3463	3472	}
3464	3473	}
3465	3474	}
3466		- }
3467	3475
3468		- } // end of loop b parallelized with MPI
	3476	+ } // end of loop b parallelized with MPI
3469	3477
3470		- } // end of try
3471		- catch(MolDSException ex){
	3478	+ } // end of try
	3479	+ catch(MolDSException ex){
3472	3480	#pragma omp critical
3473		- ex.Serialize(ompErrors);
	3481	+ ex.Serialize(ompErrors);
	3482	+ }
	3483	+ MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
	3484	+ MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
	3485	+ } // end of omp-parallelized region
	3486	+ // Exception throwing for omp-region
	3487	+ if(!ompErrors.str().empty()){
	3488	+ throw MolDSException::Deserialize(ompErrors);
3474	3489	}
3475		- MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy);
3476		- MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end);
3477		- } // end of omp-parallelized region
3478		- // Exception throwing for omp-region
3479		- if(!ompErrors.str().empty()){
3480		- throw MolDSException::Deserialize(ompErrors);
	3490	+ } // end of if(mpiRnak == calcRank)
	3491	+ // set data to gater in mpiHeadRank with asynchronous MPI
	3492	+ int tag = a;
	3493	+ int source = calcRank;
	3494	+ int dest = mpiHeadRank;
	3495	+ int numTransported = totalNumberAtomsdxydxydxydxy;
	3496	+ if(mpiRank == mpiHeadRank && mpiRank != calcRank){
	3497	+ asyncCommunicator.SetRecvedVector(&twoElecTwoCore[a][0][0][0][0][0],
	3498	+ numTransported,
	3499	+ source,
	3500	+ tag);
	3501	+ }
	3502	+ if(mpiRank != mpiHeadRank && mpiRank == calcRank){
	3503	+ asyncCommunicator.SetSentVector(&twoElecTwoCore[a][0][0][0][0][0],
	3504	+ numTransported,
	3505	+ dest,
	3506	+ tag);
3481	3507	}
3482	3508	} // end of loop a parallelized with MPI
	3509	+ communicationThread.join();
	3510	+ int numTransported = totalNumberAtomstotalNumberAtomsdxydxydxy*dxy;
	3511	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&twoElecTwoCore[0][0][0][0][0][0], numTransported, mpiHeadRank);
3483	3512
3484		- // communication to reduce thsi->matrixForce on all node (namely, all_reduce)
3485		- int numTransported = totalAtomNumbertotalAtomNumberdxydxydxy*dxy;
3486		- MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&twoElecTwoCore[0][0][0][0][0][0], numTransported, std::plus<double>());
	3513	+ for(int a=0; a<totalNumberAtoms; a++){
	3514	+ for(int b=a+1; b<totalNumberAtoms; b++){
	3515	+ for(int mu=0; mu<dxy; mu++){
	3516	+ for(int nu=mu; nu<dxy; nu++){
	3517	+ for(int lambda=0; lambda<dxy; lambda++){
	3518	+ for(int sigma=lambda; sigma<dxy; sigma++){
	3519	+ double value = twoElecTwoCore[a][b][mu][nu][lambda][sigma];
	3520	+ twoElecTwoCore[b][a][lambda][sigma][mu][nu] = value;
	3521	+ twoElecTwoCore[b][a][lambda][sigma][nu][mu] = value;
	3522	+ twoElecTwoCore[b][a][sigma][lambda][mu][nu] = value;
	3523	+ twoElecTwoCore[b][a][sigma][lambda][nu][mu] = value;
	3524	+ }
	3525	+ }
	3526	+ }
	3527	+ }
	3528	+ }
	3529	+ }
3487	3530	}
3488	3531
3489	3532	// Calculation of two electrons two cores integral (mu, nu \| lambda, sigma) in space fixed frame,

--- /dev/null

+++ b/src/mpi/AsyncCommunicator.cpp

		@@ -0,0 +1,43 @@
	1	+//************************************************************************//
	2	+// Copyright (C) 2011-2012 Mikiya Fujii //
	3	+// //
	4	+// This file is part of MolDS. //
	5	+// //
	6	+// MolDS is free software: you can redistribute it and/or modify //
	7	+// it under the terms of the GNU General Public License as published by //
	8	+// the Free Software Foundation, either version 3 of the License, or //
	9	+// (at your option) any later version. //
	10	+// //
	11	+// MolDS is distributed in the hope that it will be useful, //
	12	+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
	13	+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
	14	+// GNU General Public License for more details. //
	15	+// //
	16	+// You should have received a copy of the GNU General Public License //
	17	+// along with MolDS. If not, see <http://www.gnu.org/licenses/>. //
	18	+//************************************************************************//
	19	+#include<stdio.h>
	20	+#include<stdlib.h>
	21	+#include<iostream>
	22	+#include<sstream>
	23	+#include<math.h>
	24	+#include<string>
	25	+#include<stdexcept>
	26	+#include<boost/format.hpp>
	27	+#include"../base/Enums.h"
	28	+#include"../base/Uncopyable.h"
	29	+#include"../base/PrintController.h"
	30	+#include"../base/MolDSException.h"
	31	+#include"../base/containers/ThreadSafeQueue.h"
	32	+#include"../base/MallocerFreer.h"
	33	+#include"MpiProcess.h"
	34	+#include"AsyncCommunicator.h"
	35	+using namespace std;
	36	+namespace MolDS_mpi{
	37	+AsyncCommunicator::AsyncCommunicator(){}
	38	+AsyncCommunicator::~AsyncCommunicator(){}
	39	+}
	40	+
	41	+
	42	+
	43	+

--- /dev/null

+++ b/src/mpi/AsyncCommunicator.h

		@@ -0,0 +1,127 @@
	1	+//************************************************************************//
	2	+// Copyright (C) 2011-2013 Mikiya Fujii //
	3	+// //
	4	+// This file is part of MolDS. //
	5	+// //
	6	+// MolDS is free software: you can redistribute it and/or modify //
	7	+// it under the terms of the GNU General Public License as published by //
	8	+// the Free Software Foundation, either version 3 of the License, or //
	9	+// (at your option) any later version. //
	10	+// //
	11	+// MolDS is distributed in the hope that it will be useful, //
	12	+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
	13	+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
	14	+// GNU General Public License for more details. //
	15	+// //
	16	+// You should have received a copy of the GNU General Public License //
	17	+// along with MolDS. If not, see <http://www.gnu.org/licenses/>. //
	18	+//************************************************************************//
	19	+#ifndef INCLUDED_ASYNCCOMMUNICATOR
	20	+#define INCLUDED_ASYNCCOMMUNICATOR
	21	+#include<boost/thread.hpp>
	22	+#include<boost/thread/condition.hpp>
	23	+#include<boost/bind.hpp>
	24	+#define NON_USED 0
	25	+namespace MolDS_mpi{
	26	+
	27	+class AsyncCommunicator{
	28	+public:
	29	+ AsyncCommunicator();
	30	+ ~AsyncCommunicator();
	31	+ template<typename T> void Run(int passingTimes){
	32	+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
	33	+ while(0<passingTimes){
	34	+ sleep(0.1);
	35	+ boost::mutex::scoped_lock lk(this->stateGuard);
	36	+ try{
	37	+ DataInfo dInfo = this->dataQueue.FrontPop();
	38	+ if(dInfo.mpiFuncType == MolDS_base::Send){
	39	+ MolDS_mpi::MpiProcess::GetInstance()->Send(dInfo.dest,
	40	+ dInfo.tag,
	41	+ reinterpret_cast<T*>(dInfo.vectorPtr),
	42	+ dInfo.num);
	43	+ }
	44	+ else if(dInfo.mpiFuncType == MolDS_base::Recv){
	45	+ MolDS_mpi::MpiProcess::GetInstance()->Recv(dInfo.source,
	46	+ dInfo.tag,
	47	+ reinterpret_cast<T*>(dInfo.vectorPtr),
	48	+ dInfo.num);
	49	+ }
	50	+ else if(dInfo.mpiFuncType == MolDS_base::Broadcast){
	51	+ MolDS_mpi::MpiProcess::GetInstance()->Broadcast(reinterpret_cast<T*>(dInfo.vectorPtr),
	52	+ dInfo.num,
	53	+ dInfo.source);
	54	+ }
	55	+ else{
	56	+ std::stringstream ss;
	57	+ ss << "non valid mpi function type\n";
	58	+ MolDS_base::MolDSException ex(ss.str());
	59	+ throw ex;
	60	+ }
	61	+ this->stateChange.notify_all();
	62	+ passingTimes--;
	63	+ }
	64	+ catch(MolDS_base::MolDSException ex){
	65	+ if(ex.HasKey(MolDS_base::EmptyQueue)){
	66	+ this->stateChange.wait(lk);
	67	+ continue;
	68	+ }
	69	+ else{
	70	+ throw ex;
	71	+ }
	72	+ }
	73	+ }
	74	+ }
	75	+
	76	+ template<typename T> void SetSentVector(T* vector,
	77	+ intptr_t num,
	78	+ int dest,
	79	+ int tag){
	80	+ int source = NON_USED;
	81	+ MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Send;
	82	+ this->SetVector(vector, num, source, dest, tag, mpiFuncType);
	83	+ }
	84	+
	85	+ template<typename T> void SetRecvedVector(T* vector,
	86	+ intptr_t num,
	87	+ int source,
	88	+ int tag){
	89	+ int dest = NON_USED;
	90	+ MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Recv;
	91	+ this->SetVector(vector, num, source, dest, tag, mpiFuncType);
	92	+ }
	93	+
	94	+ template<typename T> void SetBroadcastedVector(T* vector, intptr_t num, int root){
	95	+ int source = root;
	96	+ int dest = NON_USED;
	97	+ int tag = NON_USED;
	98	+ MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Broadcast;
	99	+ this->SetVector(vector, num, source, dest, tag, mpiFuncType);
	100	+ }
	101	+
	102	+private:
	103	+ struct DataInfo{intptr_t vectorPtr;
	104	+ intptr_t num;
	105	+ int source;
	106	+ int dest;
	107	+ int tag;
	108	+ MolDS_base::MpiFunctionType mpiFuncType;};
	109	+ boost::mutex stateGuard;
	110	+ boost::condition stateChange;
	111	+ MolDS_base_containers::ThreadSafeQueue<DataInfo> dataQueue;
	112	+ template<typename T> void SetVector(T* vector,
	113	+ intptr_t num,
	114	+ int source,
	115	+ int dest,
	116	+ int tag,
	117	+ MolDS_base::MpiFunctionType mpiFuncType){
	118	+ boost::mutex::scoped_lock lk(this->stateGuard);
	119	+ DataInfo dInfo = {reinterpret_cast<intptr_t>(vector), num, source, dest, tag, mpiFuncType};
	120	+ this->dataQueue.Push(dInfo);
	121	+ this->stateChange.notify_all();
	122	+ }
	123	+};
	124	+
	125	+}
	126	+#endif
	127	+

--- a/src/mpi/MpiProcess.cpp

+++ b/src/mpi/MpiProcess.cpp

		@@ -23,6 +23,7 @@
23	23	#include<math.h>
24	24	#include<string>
25	25	#include<stdexcept>
	26	+#include<omp.h>
26	27	#include<boost/format.hpp>
27	28	#include"../base/Uncopyable.h"
28	29	#include"../base/PrintController.h"

		@@ -41,9 +42,22 @@ MpiProcess::MpiProcess(int argc, char *argv[]){
41	42	this->environment = new boost::mpi::environment(argc, argv);
42	43	this->communicator = new boost::mpi::communicator();
43	44	this->messageLimit = INT_MAX;
	45	+ this->mpiConsumingTime=0.0;
	46	+ this->mpiConsumingTimeSend=0.0;
	47	+ this->mpiConsumingTimeRecv=0.0;
	48	+ this->mpiConsumingTimeBrodCast=0.0;
	49	+ this->mpiConsumingTimeAllReduce=0.0;
44	50	}
45	51
46	52	MpiProcess::~MpiProcess(){
	53	+ /*
	54	+ int rank = this->GetRank();
	55	+ printf("\nrnk:%d mpiconsumingtime = %e [s]\n",rank, this->mpiConsumingTime);
	56	+ printf("\nrnk:%d mpiconsumingtimeSend = %e [s]\n",rank, this->mpiConsumingTimeSend);
	57	+ printf("\nrnk:%d mpiconsumingtimeRecv = %e [s]\n",rank, this->mpiConsumingTimeRecv);
	58	+ printf("\nrnk:%d mpiconsumingtimeBroadcast = %e [s]\n",rank, this->mpiConsumingTimeBrodCast);
	59	+ printf("\nrnk:%d mpiconsumingtimeAllReduce = %e [s]\n",rank, this->mpiConsumingTimeAllReduce);
	60	+ */
47	61	delete this->environment;
48	62	delete this->communicator;
49	63	}

		@@ -70,6 +84,25 @@ MpiProcess* MpiProcess::GetInstance(){
70	84	return mpiProcess;
71	85	}
72	86
	87	+void MpiProcess::Barrier(){this->communicator->barrier();}
	88	+
	89	+int MpiProcess::GetMessagePassingTimes(intptr_t num)const{
	90	+ int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank();
	91	+ int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize();
	92	+ int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank();
	93	+ int calcTimes = num/mpiSize;
	94	+ if(mpiRank < num%mpiSize){calcTimes+=1;}
	95	+ int mpiPassingTimes;
	96	+ if(mpiRank == mpiHeadRank){
	97	+ mpiPassingTimes = num - calcTimes;
	98	+ }
	99	+ else{
	100	+ mpiPassingTimes = calcTimes;
	101	+ }
	102	+ return mpiPassingTimes;
	103	+}
	104	+
	105	+
73	106	}
74	107
75	108

--- a/src/mpi/MpiProcess.h

+++ b/src/mpi/MpiProcess.h

		@@ -19,6 +19,7 @@
19	19	#ifndef INCLUDED_MPIPROCESS
20	20	#define INCLUDED_MPIPROCESS
21	21	#include<limits.h>
	22	+#include<omp.h>
22	23	#include<boost/mpi.hpp>
23	24	namespace MolDS_mpi{
24	25	// MpiProcess is singleton

		@@ -30,26 +31,47 @@ public:
30	31	int GetHeadRank() const{return 0;}
31	32	int GetRank() const{return this->communicator->rank();}
32	33	int GetSize() const{return this->communicator->size();}
33		- template<typename T> void Send(int dest, int tag, const T* values, intptr_t num) const{
	34	+ //template<typename T> void Send(int dest, int tag, const T* values, intptr_t num) const{
	35	+ template<typename T> void Send(int dest, int tag, const T* values, intptr_t num) {
	36	+ double startTime=0.0;
	37	+ double endTime=0.0;
34	38	std::vector<Chunk> chunks;
35	39	this->SplitMessage2Chunks(chunks, tag, values, num);
36	40	for(intptr_t i=0; i<chunks.size(); i++){
	41	+ startTime = omp_get_wtime();
37	42	this->communicator->send(dest, chunks[i].tag, &values[chunks[i].first], chunks[i].num);
	43	+ endTime = omp_get_wtime();
	44	+ this->mpiConsumingTime += endTime - startTime;
	45	+ this->mpiConsumingTimeSend += endTime - startTime;
38	46	}
39	47	}
40		- template<typename T> void Recv(int source, int tag, T* values, intptr_t num) const{
	48	+ //template<typename T> void Recv(int source, int tag, T* values, intptr_t num) const{
	49	+ template<typename T> void Recv(int source, int tag, T* values, intptr_t num) {
	50	+ double startTime=0.0;
	51	+ double endTime=0.0;
41	52	std::vector<Chunk> chunks;
42	53	this->SplitMessage2Chunks(chunks, tag, values, num);
43	54	for(intptr_t i=0; i<chunks.size(); i++){
	55	+ startTime = omp_get_wtime();
44	56	this->communicator->recv(source, chunks[i].tag, &values[chunks[i].first], chunks[i].num);
	57	+ endTime = omp_get_wtime();
	58	+ this->mpiConsumingTime += endTime - startTime;
	59	+ this->mpiConsumingTimeRecv += endTime - startTime;
45	60	}
46	61	}
47		- template<typename T> void Broadcast(T* values, intptr_t num, int root) const{
	62	+ //template<typename T> void Broadcast(T* values, intptr_t num, int root) const{
	63	+ template<typename T> void Broadcast(T* values, intptr_t num, int root){
	64	+ double startTime=0.0;
	65	+ double endTime=0.0;
48	66	std::vector<Chunk> chunks;
49	67	intptr_t tag=0;
50	68	this->SplitMessage2Chunks(chunks, tag, values, num);
51	69	for(intptr_t i=0; i<chunks.size(); i++){
	70	+ startTime = omp_get_wtime();
52	71	broadcast(*this->communicator, &values[chunks[i].first], chunks[i].num, root);
	72	+ endTime = omp_get_wtime();
	73	+ this->mpiConsumingTime += endTime - startTime;
	74	+ this->mpiConsumingTimeBrodCast += endTime - startTime;
53	75	}
54	76	}
55	77	template<typename T, typename Op> void Reduce(const T* inValues, intptr_t num, T* outValues, Op op, int root) const{

		@@ -60,15 +82,23 @@ public:
60	82	reduce(*this->communicator, &inValues[chunks[i].first], chunks[i].num, &outValues[chunks[i].first], op, root);
61	83	}
62	84	}
63		- template<typename T, typename Op> void AllReduce(const T* inValues, intptr_t num, T* outValues, Op op) const{
	85	+ //template<typename T, typename Op> void AllReduce(const T* inValues, intptr_t num, T* outValues, Op op) const{
	86	+ template<typename T, typename Op> void AllReduce(const T* inValues, intptr_t num, T* outValues, Op op){
	87	+ double startTime=0.0;
	88	+ double endTime=0.0;
64	89	std::vector<Chunk> chunks;
65	90	intptr_t tag=0;
66	91	this->SplitMessage2Chunks(chunks, tag, inValues, num);
67	92	for(intptr_t i=0; i<chunks.size(); i++){
	93	+ startTime = omp_get_wtime();
68	94	all_reduce(*this->communicator, &inValues[chunks[i].first], chunks[i].num, &outValues[chunks[i].first], op);
	95	+ endTime = omp_get_wtime();
	96	+ this->mpiConsumingTime += endTime - startTime;
	97	+ this->mpiConsumingTimeAllReduce += endTime - startTime;
69	98	}
70	99	}
71		- template<typename T, typename Op> void AllReduce(T* values, intptr_t num, Op op) const{
	100	+ //template<typename T, typename Op> void AllReduce(T* values, intptr_t num, Op op) const{
	101	+ template<typename T, typename Op> void AllReduce(T* values, intptr_t num, Op op){
72	102	double* tmpValues=NULL;
73	103	try{
74	104	MolDS_base::MallocerFreer::GetInstance()->Malloc<double>(&tmpValues, num);

		@@ -83,7 +113,8 @@ public:
83	113	}
84	114	MolDS_base::MallocerFreer::GetInstance()->Free<double>(&tmpValues, num);
85	115	}
86		-
	116	+ void Barrier();
	117	+ int GetMessagePassingTimes(intptr_t num) const;
87	118	private:
88	119	static MpiProcess* mpiProcess;
89	120	MpiProcess();

		@@ -116,6 +147,11 @@ private:
116	147	chunks.push_back(chunk);
117	148	}
118	149	}
	150	+ double mpiConsumingTime;
	151	+ double mpiConsumingTimeSend;
	152	+ double mpiConsumingTimeRecv;
	153	+ double mpiConsumingTimeBrodCast;
	154	+ double mpiConsumingTimeAllReduce;
119	155	};
120	156
121	157	}

MolDS Fork

Commit

Tags

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

Change Summary

Incremental Difference

MolDS
Fork