24 #include <sys/param.h> 25 #include <unordered_map> 27 #ifdef FLOW123D_HAVE_PYTHON 29 #endif // FLOW123D_HAVE_PYTHON 35 #include <boost/format.hpp> 100 #ifdef FLOW123D_DEBUG_PROFILER 105 const int timer_no_child=-1;
107 Timer::Timer(
const CodePoint &cp,
int parent)
113 full_hash_(cp.hash_),
114 hash_idx_(cp.hash_idx_),
115 parent_timer(parent),
117 total_deallocated_(0),
119 current_allocated_(0),
122 #ifdef FLOW123D_HAVE_PETSC 123 , petsc_start_memory(0),
124 petsc_end_memory (0),
125 petsc_memory_difference(0),
126 petsc_peak_memory(0),
127 petsc_local_peak_memory(0)
128 #endif // FLOW123D_HAVE_PETSC 130 for(
unsigned int i=0; i< max_n_childs ;i++) child_timers[i]=timer_no_child;
138 os <<
" Timer: " << timer.tag() << endl;
139 os <<
" malloc: " << timer.total_allocated_ << endl;
140 os <<
" dalloc: " << timer.total_deallocated_ << endl;
141 #ifdef FLOW123D_HAVE_PETSC 142 os <<
" start: " << timer.petsc_start_memory << endl;
143 os <<
" stop : " << timer.petsc_end_memory << endl;
144 os <<
" diff : " << timer.petsc_memory_difference <<
" (" << timer.petsc_end_memory - timer.petsc_start_memory <<
")" << endl;
145 os <<
" peak : " << timer.petsc_peak_memory <<
" (" << timer.petsc_local_peak_memory <<
")" << endl;
146 #endif // FLOW123D_HAVE_PETSC 152 double Timer::cumulative_time()
const {
156 void Profiler::accept_from_child(
Timer &parent,
Timer &child) {
158 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
159 child_timer = child.child_timers[i];
160 if (child_timer != timer_no_child) {
162 accept_from_child(child, timers_[child_timer]);
166 parent.total_allocated_ += child.total_allocated_;
167 parent.total_deallocated_ += child.total_deallocated_;
168 parent.alloc_called += child.alloc_called;
169 parent.dealloc_called += child.dealloc_called;
171 #ifdef FLOW123D_HAVE_PETSC 172 if (petsc_monitor_memory) {
174 parent.petsc_memory_difference += child.petsc_memory_difference;
175 parent.current_allocated_ += child.current_allocated_;
179 parent.petsc_peak_memory =
max(parent.petsc_peak_memory, child.petsc_peak_memory);
181 #endif // FLOW123D_HAVE_PETSC 183 parent.max_allocated_ =
max(parent.max_allocated_, child.max_allocated_);
187 void Timer::pause() {
188 #ifdef FLOW123D_HAVE_PETSC 189 if (Profiler::get_petsc_memory_monitoring()) {
191 PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
192 if (petsc_peak_memory < petsc_local_peak_memory)
193 petsc_peak_memory = petsc_local_peak_memory;
195 #endif // FLOW123D_HAVE_PETSC 198 void Timer::resume() {
199 #ifdef FLOW123D_HAVE_PETSC 200 if (Profiler::get_petsc_memory_monitoring()) {
203 PetscMemorySetGetMaximumUsage();
205 #endif // FLOW123D_HAVE_PETSC 209 #ifdef FLOW123D_HAVE_PETSC 210 if (Profiler::get_petsc_memory_monitoring()) {
213 PetscMemorySetGetMaximumUsage();
214 PetscMemoryGetCurrentUsage (&petsc_start_memory);
216 #endif // FLOW123D_HAVE_PETSC 218 if (start_count == 0) {
228 #ifdef FLOW123D_HAVE_PETSC 229 if (Profiler::get_petsc_memory_monitoring()) {
231 PetscMemoryGetCurrentUsage (&petsc_end_memory);
232 petsc_memory_difference += petsc_end_memory - petsc_start_memory;
235 PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
236 if (petsc_peak_memory < petsc_local_peak_memory)
237 petsc_peak_memory = petsc_local_peak_memory;
239 #endif // FLOW123D_HAVE_PETSC 241 if (forced) start_count=1;
243 if (start_count == 1) {
244 cumul_time += (
TimePoint() - start_time);
255 void Timer::add_child(
int child_index,
const Timer &child)
257 unsigned int idx = child.hash_idx_;
258 if (child_timers[idx] != timer_no_child) {
262 i=( i < max_n_childs ? i+1 : 0);
263 }
while (i!=idx && child_timers[i] != timer_no_child);
264 ASSERT(i!=idx)(tag()).error(
"Too many children of the timer");
268 child_timers[idx] = child_index;
273 string Timer::code_point_str()
const {
275 code_point_->file_, code_point_->line_, code_point_->func_ );
288 if (_instance != NULL) {
295 if (_instance == NULL) {
296 MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
305 const long Profiler::malloc_map_reserve = 100 * 1000;
310 start_time( time(NULL) ),
314 static CONSTEXPR_ CodePoint main_cp = CODE_POINT(
"Whole Program");
315 set_memory_monitoring(
true,
true);
316 #ifdef FLOW123D_DEBUG_PROFILER 317 MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
318 timers_.push_back(
Timer(main_cp, 0) );
325 void Profiler::propagate_timers() {
326 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
327 unsigned int child_timer = timers_[0].child_timers[i];
328 if ((
signed int)child_timer != timer_no_child) {
330 accept_from_child(timers_[0], timers_[child_timer]);
338 task_description_ = description;
345 flow_name_ = program_name;
346 flow_version_ = program_version;
347 flow_branch_ = branch;
348 flow_revision_ = revision;
354 int Profiler::start_timer(
const CodePoint &cp) {
355 unsigned int parent_node = actual_node;
357 int child_idx = find_child(cp);
361 child_idx=timers_.size();
362 timers_.push_back(
Timer(cp, actual_node) );
363 timers_[actual_node].add_child(child_idx , timers_.back() );
365 actual_node=child_idx;
368 timers_[parent_node].pause();
370 timers_[actual_node].start();
377 int Profiler::find_child(
const CodePoint &cp) {
378 Timer &timer =timers_[actual_node];
379 unsigned int idx = cp.hash_idx_;
380 unsigned int child_idx;
382 if (timer.child_timers[idx] == timer_no_child)
break;
384 child_idx=timer.child_timers[idx];
385 ASSERT_LT(child_idx, timers_.size()).error();
386 if (timers_[child_idx].full_hash_ == cp.hash_)
return child_idx;
387 idx = ( (
unsigned int)(idx)==(Timer::max_n_childs - 1) ? 0 : idx+1 );
388 }
while ( (
unsigned int)(idx) != cp.hash_idx_ );
394 void Profiler::stop_timer(
const CodePoint &cp) {
395 #ifdef FLOW123D_DEBUG 397 Timer &timer=timers_[actual_node];
398 for(
unsigned int i=0; i < Timer::max_n_childs; i++)
399 if (timer.child_timers[i] != timer_no_child)
400 ASSERT(! timers_[timer.child_timers[i]].running())(timers_[timer.child_timers[i]].tag())(timer.tag())
401 .error(
"Child timer running while closing timer.");
403 unsigned int child_timer = actual_node;
404 if ( cp.hash_ != timers_[actual_node].full_hash_) {
406 for(
unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer) {
407 if ( cp.hash_ == timers_[node].full_hash_) {
409 for(; (
unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
410 WarningOut() <<
"Timer to close '" << cp.tag_ <<
"' do not match actual timer '" 411 << timers_[actual_node].tag() <<
"'. Force closing actual." << std::endl;
412 timers_[actual_node].stop(
true);
415 timers_[actual_node].stop(
false);
416 actual_node = timers_[actual_node].parent_timer;
419 if (actual_node == child_timer)
423 timers_[actual_node].resume();
431 timers_[actual_node].stop(
false);
432 actual_node = timers_[actual_node].parent_timer;
435 if (actual_node == child_timer)
439 timers_[actual_node].resume();
444 void Profiler::stop_timer(
int timer_index) {
448 if (timers_[timer_index].running()) {
449 ASSERT_EQ(timer_index, (
int)actual_node).error();
450 stop_timer(*timers_[timer_index].code_point_);
457 void Profiler::add_calls(
unsigned int n_calls) {
458 timers_[actual_node].call_count += n_calls-1;
464 if (!global_monitor_memory)
467 MemoryAlloc::malloc_map()[p] =
static_cast<int>(size);
468 timers_[actual_node].total_allocated_ += size;
469 timers_[actual_node].current_allocated_ += size;
470 timers_[actual_node].alloc_called++;
472 if (timers_[actual_node].current_allocated_ > timers_[actual_node].max_allocated_)
473 timers_[actual_node].max_allocated_ = timers_[actual_node].current_allocated_;
479 if (!global_monitor_memory)
482 int size =
sizeof(p);
483 if (MemoryAlloc::malloc_map()[(long)p] > 0) {
484 size = MemoryAlloc::malloc_map()[(long)p];
485 MemoryAlloc::malloc_map().erase((
long)p);
487 timers_[actual_node].total_deallocated_ += size;
488 timers_[actual_node].current_allocated_ -= size;
489 timers_[actual_node].dealloc_called++;
494 const int measurements = 100;
498 for (
unsigned int i = 1; i < measurements; i++) {
503 while ((t2 - t1) == 0) t2 =
TimePoint ();
509 return (result / measurements) * 1000;
513 std::string common_prefix( std::string a, std::string b ) {
514 if( a.size() > b.size() )
std::swap(a,b) ;
515 return std::string( a.begin(), std::mismatch( a.begin(), a.end(), b.begin() ).first ) ;
520 template<
typename ReduceFunctor>
521 void Profiler::add_timer_info(ReduceFunctor reduce,
nlohmann::json* holder,
int timer_idx,
double parent_time) {
524 Timer &timer = timers_[timer_idx];
525 ASSERT(timer_idx >=0)(timer_idx).error(
"Wrong timer index.");
526 ASSERT(timer.parent_timer >=0).error(
"Inconsistent tree.");
529 string filepath = timer.code_point_->file_;
532 #ifdef FLOW123D_SOURCE_DIR 533 string common_path = common_prefix (
string(FLOW123D_SOURCE_DIR), filepath);
534 filepath.erase (0, common_path.size());
541 double cumul_time_sum;
542 node[
"tag"] = timer.tag();
543 node[
"file-path"] = filepath;
544 node[
"file-line"] = timer.code_point_->line_;
545 node[
"function"] = timer.code_point_->func_;
546 cumul_time_sum = reduce(timer, node);
550 if (timer_idx == 0) parent_time = cumul_time_sum;
551 double percent = parent_time > 1.0e-10 ? cumul_time_sum / parent_time * 100.0 : 0.0;
552 node[
"percent"] = percent;
556 bool has_children =
false;
557 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
558 if (timer.child_timers[i] != timer_no_child) {
559 add_timer_info (reduce, &children, timer.child_timers[i], cumul_time_sum);
569 node[
"children"] = children;
578 void save_nonmpi_metric (
nlohmann::json &node, T * ptr,
string name) {
579 node[name +
"-min"] = *ptr;
580 node[name +
"-max"] = *ptr;
581 node[name +
"-sum"] = *ptr;
584 std::shared_ptr<std::ostream> Profiler::get_default_output_stream() {
588 return make_shared<ofstream>(json_filepath.c_str());
592 #ifdef FLOW123D_HAVE_MPI 601 int mpi_rank, mpi_size;
608 bool temp_memory_monitoring = global_monitor_memory;
609 set_memory_monitoring(
false, petsc_monitor_memory);
621 int call_count = timer.call_count;
622 double cumul_time = timer.cumulative_time ();
624 long memory_allocated = (long)timer.total_allocated_;
625 long memory_deallocated = (
long)timer.total_deallocated_;
626 long memory_peak = (long)timer.max_allocated_;
628 int alloc_called = timer.alloc_called;
629 int dealloc_called = timer.dealloc_called;
632 save_mpi_metric<double>(node, comm, &cumul_time,
"cumul-time");
633 save_mpi_metric<int>(node, comm, &call_count,
"call-count");
635 save_mpi_metric<long>(node, comm, &memory_allocated,
"memory-alloc");
636 save_mpi_metric<long>(node, comm, &memory_deallocated,
"memory-dealloc");
637 save_mpi_metric<long>(node, comm, &memory_peak,
"memory-peak");
639 save_mpi_metric<int>(node, comm, &alloc_called,
"memory-alloc-called");
640 save_mpi_metric<int>(node, comm, &dealloc_called,
"memory-dealloc-called");
642 #ifdef FLOW123D_HAVE_PETSC 643 long petsc_memory_difference = (long)timer.petsc_memory_difference;
644 long petsc_peak_memory = (
long)timer.petsc_peak_memory;
645 save_mpi_metric<long>(node, comm, &petsc_memory_difference,
"memory-petsc-diff");
646 save_mpi_metric<long>(node, comm, &petsc_peak_memory,
"memory-petsc-peak");
647 #endif // FLOW123D_HAVE_PETSC 652 add_timer_info (reduce, &jsonChildren, 0, 0.0);
653 jsonRoot[
"children"] = jsonChildren;
654 output_header(jsonRoot, mpi_size);
665 const int FLOW123D_JSON_HUMAN_READABLE = 2;
667 os << jsonRoot.
dump(FLOW123D_JSON_HUMAN_READABLE) << endl;
669 }
catch (exception & e) {
671 ss <<
"nlohmann::json::dump error: " << e.what() <<
"\n";
672 THROW( ExcMessage() << EI_Message(ss.str()) );
676 set_memory_monitoring(temp_memory_monitoring, petsc_monitor_memory);
685 if (profiler_path ==
"") {
686 output(comm, *get_default_output_stream());
688 json_filepath = profiler_path;
689 std::shared_ptr<std::ostream> os = make_shared<ofstream>(profiler_path.c_str());
711 const int FLOW123D_MPI_SINGLE_PROCESS = 1;
712 output_header(jsonRoot, FLOW123D_MPI_SINGLE_PROCESS);
719 int call_count = timer.call_count;
720 double cumul_time = timer.cumulative_time ();
722 long memory_allocated = (long)timer.total_allocated_;
723 long memory_deallocated = (
long)timer.total_deallocated_;
724 long memory_peak = (long)timer.max_allocated_;
726 int alloc_called = timer.alloc_called;
727 int dealloc_called = timer.dealloc_called;
729 save_nonmpi_metric<double>(node, &cumul_time,
"cumul-time");
730 save_nonmpi_metric<int>(node, &call_count,
"call-count");
732 save_nonmpi_metric<long>(node, &memory_allocated,
"memory-alloc");
733 save_nonmpi_metric<long>(node, &memory_deallocated,
"memory-dealloc");
734 save_nonmpi_metric<long>(node, &memory_peak,
"memory-peak");
736 save_nonmpi_metric<int>(node, &alloc_called,
"memory-alloc-called");
737 save_nonmpi_metric<int>(node, &dealloc_called,
"memory-dealloc-called");
739 #ifdef FLOW123D_HAVE_PETSC 740 long petsc_memory_difference = (long)timer.petsc_memory_difference;
741 long petsc_peak_memory = (
long)timer.petsc_peak_memory;
742 save_nonmpi_metric<long>(node, &petsc_memory_difference,
"memory-petsc-diff");
743 save_nonmpi_metric<long>(node, &petsc_peak_memory,
"memory-petsc-peak");
744 #endif // FLOW123D_HAVE_PETSC 749 add_timer_info(reduce, &jsonChildren, 0, 0.0);
750 jsonRoot[
"children"] = jsonChildren;
757 const int FLOW123D_JSON_HUMAN_READABLE = 2;
759 os << jsonRoot.
dump(FLOW123D_JSON_HUMAN_READABLE) << endl;
761 }
catch (exception & e) {
763 ss <<
"nlohmann::json::dump error: " << e.what() <<
"\n";
764 THROW( ExcMessage() << EI_Message(ss.str()) );
770 if(profiler_path ==
"") {
771 output(*get_default_output_stream());
773 json_filepath = profiler_path;
774 std::shared_ptr<std::ostream> os = make_shared<ofstream>(profiler_path.c_str());
779 void Profiler::output_header (
nlohmann::json &root,
int mpi_size) {
780 time_t end_time = time(NULL);
782 const char format[] =
"%x %X";
783 char start_time_string[BUFSIZ] = {0};
784 strftime(start_time_string,
sizeof (start_time_string) - 1, format, localtime(&start_time));
786 char end_time_string[BUFSIZ] = {0};
787 strftime(end_time_string,
sizeof (end_time_string) - 1, format, localtime(&end_time));
790 root[
"program-name"] = flow_name_;
791 root[
"program-version"] = flow_version_;
792 root[
"program-branch"] = flow_branch_;
793 root[
"program-revision"] = flow_revision_;
794 root[
"program-build"] = flow_build_;
798 root[
"task-description"] = task_description_;
799 root[
"task-size"] = task_size_;
802 root[
"run-process-count"] = mpi_size;
803 root[
"run-started-at"] = start_time_string;
804 root[
"run-finished-at"] = end_time_string;
807 #ifdef FLOW123D_HAVE_PYTHON 810 if (json_filepath ==
"")
return;
819 #ifndef FLOW123D_HAVE_CYGWIN 821 PyObject * python_module = PythonLoader::load_module_by_name (
"profiler.profiler_formatter_module");
825 PyObject * convert_method = PythonLoader::get_callable (python_module,
"convert" );
827 int argument_index = 0;
828 PyObject * arguments = PyTuple_New (3);
831 PyObject * tmp = PyUnicode_FromString (json_filepath.c_str());
832 PyTuple_SetItem (arguments, argument_index++, tmp);
835 tmp = PyUnicode_FromString ((json_filepath + output_file_suffix).c_str());
836 PyTuple_SetItem (arguments, argument_index++, tmp);
839 tmp = PyUnicode_FromString (formatter.c_str());
840 PyTuple_SetItem (arguments, argument_index++, tmp);
843 PyObject_CallObject (convert_method, arguments);
845 PythonLoader::check_error();
850 MessageOut() <<
"# Note: converting json profiler reports is not" 851 <<
" supported under Windows or Cygwin environment for now.\n" 852 <<
"# You can use python script located in bin/python folder" 853 <<
" in order to convert json report to txt or csv format.\n" 854 <<
"python profiler_formatter_script.py --input \"" << json_filepath
855 <<
"\" --output \"profiler.txt\"" << std::endl;
856 #endif // FLOW123D_HAVE_CYGWIN 862 #endif // FLOW123D_HAVE_PYTHON 868 .error(
"Forbidden to uninitialize the Profiler when actual timer is not zero.");
870 set_memory_monitoring(
false,
false);
874 bool Profiler::global_monitor_memory =
false;
875 bool Profiler::petsc_monitor_memory =
true;
876 void Profiler::set_memory_monitoring(
const bool global_monitor,
const bool petsc_monitor) {
877 global_monitor_memory = global_monitor;
878 petsc_monitor_memory = petsc_monitor;
881 bool Profiler::get_global_memory_monitoring() {
882 return global_monitor_memory;
885 bool Profiler::get_petsc_memory_monitoring() {
886 return petsc_monitor_memory;
889 unordered_map_with_alloc & MemoryAlloc::malloc_map() {
890 static unordered_map_with_alloc static_malloc_map;
891 return static_malloc_map;
894 void * Profiler::operator
new (
size_t size) {
895 return malloc (size);
898 void Profiler::operator
delete (
void* p) {
903 void * p = malloc(size);
909 void * p = malloc(size);
914 void *
operator new[] (std::size_t size,
const std::nothrow_t&)
throw() {
915 void * p = malloc(size);
920 void operator delete(
void *p)
throw() {
925 void operator delete(
void *p, std::size_t)
throw() {
930 void operator delete[](
void *p)
throw() {
935 void operator delete[](
void *p, std::size_t)
throw() {
940 #else // def FLOW123D_DEBUG_PROFILER 958 .error(
"Forbidden to uninitialize the Profiler when actual timer is not zero.");
959 set_memory_monitoring(
false,
false);
967 #endif // def FLOW123D_DEBUG_PROFILER
void set_task_info(string, int)
static int min(int *val, MPI_Comm comm)
#define OPERATOR_NEW_THROW_EXCEPTION
static int sum(int *val, MPI_Comm comm)
a class to store JSON values
#define MessageOut()
Macro defining 'message' record of log.
double get_resolution() const
std::string format(CStringRef format_str, ArgList args)
void chkerr(unsigned int ierr)
Replacement of new/delete operator in the spirit of xmalloc.
#define ASSERT(expr)
Allow use shorter versions of macro names if these names is not used with external library...
void set_program_info(string, string, string, string, string)
#define MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm)
static void uninitialize()
void output(MPI_Comm, ostream &)
void notify_malloc(const size_t)
static long min(long *val, MPI_Comm comm)
static int max(int *val, MPI_Comm comm)
void swap(nlohmann::json &j1, nlohmann::json &j2) noexcept(is_nothrow_move_constructible< nlohmann::json >::value andis_nothrow_move_assignable< nlohmann::json >::value)
exchanges the values of two JSON objects
STREAM & operator<<(STREAM &s, UpdateFlags u)
void push_back(basic_json &&val)
add an object to an array
Dedicated class for storing path to input and output files.
void notify_free(const size_t)
#define ASSERT_LT(a, b)
Definition of comparative assert macro (Less Than)
string_t dump(const int indent=-1) const
serialization
#define WarningOut()
Macro defining 'warning' record of log.
static Profiler * instance(bool clear=false)
#define MPI_Barrier(comm)
#define THROW(whole_exception_expr)
Wrapper for throw. Saves the throwing point.
#define ASSERT_EQ(a, b)
Definition of comparative assert macro (EQual)
void transform_profiler_data(const string &, const string &)