24 #include <sys/param.h>
25 #include <unordered_map>
27 #ifdef FLOW123D_HAVE_PYTHON
29 #endif // FLOW123D_HAVE_PYTHON
35 #include <boost/format.hpp>
99 #ifdef FLOW123D_DEBUG_PROFILER
104 const int timer_no_child=-1;
106 Timer::Timer(
const CodePoint &cp,
int parent)
112 full_hash_(cp.hash_),
113 hash_idx_(cp.hash_idx_),
114 parent_timer(parent),
116 total_deallocated_(0),
118 current_allocated_(0),
121 #ifdef FLOW123D_HAVE_PETSC
122 , petsc_start_memory(0),
123 petsc_end_memory (0),
124 petsc_memory_difference(0),
125 petsc_peak_memory(0),
126 petsc_local_peak_memory(0)
129 for(
unsigned int i=0; i< max_n_childs ;i++) child_timers[i]=timer_no_child;
137 os <<
" Timer: " << timer.tag() << endl;
138 os <<
" malloc: " << timer.total_allocated_ << endl;
139 os <<
" dalloc: " << timer.total_deallocated_ << endl;
140 #ifdef FLOW123D_HAVE_PETSC
141 os <<
" start: " << timer.petsc_start_memory << endl;
142 os <<
" stop : " << timer.petsc_end_memory << endl;
143 os <<
" diff : " << timer.petsc_memory_difference <<
" (" << timer.petsc_end_memory - timer.petsc_start_memory <<
")" << endl;
144 os <<
" peak : " << timer.petsc_peak_memory <<
" (" << timer.petsc_local_peak_memory <<
")" << endl;
145 #endif // FLOW123D_HAVE_PETSC
151 double Timer::cumulative_time()
const {
155 void Profiler::accept_from_child(
Timer &parent,
Timer &child) {
157 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
158 child_timer = child.child_timers[i];
159 if (child_timer != timer_no_child) {
161 accept_from_child(child, timers_[child_timer]);
165 parent.total_allocated_ += child.total_allocated_;
166 parent.total_deallocated_ += child.total_deallocated_;
167 parent.alloc_called += child.alloc_called;
168 parent.dealloc_called += child.dealloc_called;
170 #ifdef FLOW123D_HAVE_PETSC
171 if (petsc_monitor_memory) {
173 parent.petsc_memory_difference += child.petsc_memory_difference;
174 parent.current_allocated_ += child.current_allocated_;
178 parent.petsc_peak_memory = max(parent.petsc_peak_memory, child.petsc_peak_memory);
180 #endif // FLOW123D_HAVE_PETSC
182 parent.max_allocated_ = max(parent.max_allocated_, child.max_allocated_);
186 void Timer::pause() {
187 #ifdef FLOW123D_HAVE_PETSC
188 if (Profiler::get_petsc_memory_monitoring()) {
190 PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
191 if (petsc_peak_memory < petsc_local_peak_memory)
192 petsc_peak_memory = petsc_local_peak_memory;
194 #endif // FLOW123D_HAVE_PETSC
197 void Timer::resume() {
198 #ifdef FLOW123D_HAVE_PETSC
199 if (Profiler::get_petsc_memory_monitoring()) {
202 PetscMemorySetGetMaximumUsage();
204 #endif // FLOW123D_HAVE_PETSC
208 #ifdef FLOW123D_HAVE_PETSC
209 if (Profiler::get_petsc_memory_monitoring()) {
212 PetscMemorySetGetMaximumUsage();
213 PetscMemoryGetCurrentUsage (&petsc_start_memory);
215 #endif // FLOW123D_HAVE_PETSC
217 if (start_count == 0) {
227 #ifdef FLOW123D_HAVE_PETSC
228 if (Profiler::get_petsc_memory_monitoring()) {
230 PetscMemoryGetCurrentUsage (&petsc_end_memory);
231 petsc_memory_difference += petsc_end_memory - petsc_start_memory;
234 PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
235 if (petsc_peak_memory < petsc_local_peak_memory)
236 petsc_peak_memory = petsc_local_peak_memory;
238 #endif // FLOW123D_HAVE_PETSC
240 if (forced) start_count=1;
242 if (start_count == 1) {
243 cumul_time += (
TimePoint() - start_time);
254 void Timer::add_child(
int child_index,
const Timer &child)
256 unsigned int idx = child.hash_idx_;
257 if (child_timers[idx] != timer_no_child) {
261 i=( i < max_n_childs ? i+1 : 0);
262 }
while (i!=idx && child_timers[i] != timer_no_child);
267 child_timers[idx] = child_index;
272 string Timer::code_point_str()
const {
274 code_point_->file_, code_point_->line_, code_point_->func_ );
287 if (_instance != NULL) {
294 if (_instance == NULL) {
295 MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
304 const long Profiler::malloc_map_reserve = 100 * 1000;
309 start_time( time(NULL) ),
311 none_timer_(CODE_POINT(
"NONE TIMER"), 0),
312 calibration_time_(-1)
315 static CONSTEXPR_ CodePoint main_cp = CODE_POINT(
"Whole Program");
316 set_memory_monitoring(
true,
true);
317 #ifdef FLOW123D_DEBUG_PROFILER
318 MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
319 timers_.push_back(
Timer(main_cp, 0) );
327 uint SIZE = 64 * 1024;
328 uint HALF = SIZE / 2;
330 Timer &timer = timers_[actual_node];
333 while(
TimePoint() - timer.start_time < 0.1) {
334 double * block =
new double[SIZE];
335 for(
uint i=0; i<HALF; i++) {
336 block[HALF+i] = block[i]*block[i] + i;
346 const double reference_count = 7730;
350 calibration_time_ = 10 * timer.cumul_time * reference_count / count;
351 LogOut() <<
"Profiler calibration count: " << count << std::endl;
352 LogOut() <<
"Profiler calibration time: " << calibration_time_ << std::endl;
358 void Profiler::propagate_timers() {
359 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
360 unsigned int child_timer = timers_[0].child_timers[i];
361 if ((
signed int)child_timer != timer_no_child) {
363 accept_from_child(timers_[0], timers_[child_timer]);
371 task_description_ = description;
378 flow_name_ = program_name;
379 flow_version_ = program_version;
380 flow_branch_ = branch;
381 flow_revision_ = revision;
387 int Profiler::start_timer(
const CodePoint &cp) {
388 unsigned int parent_node = actual_node;
390 int child_idx = find_child(cp);
394 child_idx=timers_.size();
395 timers_.push_back(
Timer(cp, actual_node) );
396 timers_[actual_node].add_child(child_idx , timers_.back() );
398 actual_node=child_idx;
401 timers_[parent_node].pause();
403 timers_[actual_node].start();
410 int Profiler::find_child(
const CodePoint &cp) {
411 Timer &timer =timers_[actual_node];
412 unsigned int idx = cp.hash_idx_;
413 unsigned int child_idx;
415 if (timer.child_timers[idx] == timer_no_child)
break;
417 child_idx=timer.child_timers[idx];
419 if (timers_[child_idx].full_hash_ == cp.hash_)
return child_idx;
420 idx = ( (
unsigned int)(idx)==(Timer::max_n_childs - 1) ? 0 : idx+1 );
421 }
while ( (
unsigned int)(idx) != cp.hash_idx_ );
427 void Profiler::stop_timer(
const CodePoint &cp) {
428 #ifdef FLOW123D_DEBUG
430 Timer &timer=timers_[actual_node];
431 for(
unsigned int i=0; i < Timer::max_n_childs; i++)
432 if (timer.child_timers[i] != timer_no_child)
433 ASSERT_PERMANENT(! timers_[timer.child_timers[i]].running())(timers_[timer.child_timers[i]].tag())(timer.tag())
434 .error(
"Child timer running while closing timer.");
436 unsigned int child_timer = actual_node;
437 if ( cp.hash_ != timers_[actual_node].full_hash_) {
439 for(
unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer) {
440 if ( cp.hash_ == timers_[node].full_hash_) {
442 for(; (
unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
443 WarningOut() <<
"Timer to close '" << cp.tag_ <<
"' do not match actual timer '"
444 << timers_[actual_node].tag() <<
"'. Force closing actual." << std::endl;
445 timers_[actual_node].stop(
true);
448 timers_[actual_node].stop(
false);
449 actual_node = timers_[actual_node].parent_timer;
452 if (actual_node == child_timer)
456 timers_[actual_node].resume();
464 timers_[actual_node].stop(
false);
465 actual_node = timers_[actual_node].parent_timer;
468 if (actual_node == child_timer)
472 timers_[actual_node].resume();
477 void Profiler::stop_timer(
int timer_index) {
481 if (timers_[timer_index].running()) {
483 stop_timer(*timers_[timer_index].code_point_);
490 void Profiler::add_calls(
unsigned int n_calls) {
491 timers_[actual_node].call_count += n_calls-1;
497 if (!global_monitor_memory)
500 MemoryAlloc::malloc_map()[p] =
static_cast<int>(size);
501 timers_[actual_node].total_allocated_ += size;
502 timers_[actual_node].current_allocated_ += size;
503 timers_[actual_node].alloc_called++;
505 if (timers_[actual_node].current_allocated_ > timers_[actual_node].max_allocated_)
506 timers_[actual_node].max_allocated_ = timers_[actual_node].current_allocated_;
512 if (!global_monitor_memory)
515 int size =
sizeof(p);
516 if (MemoryAlloc::malloc_map()[(long)p] > 0) {
517 size = MemoryAlloc::malloc_map()[(long)p];
518 MemoryAlloc::malloc_map().erase((
long)p);
520 timers_[actual_node].total_deallocated_ += size;
521 timers_[actual_node].current_allocated_ -= size;
522 timers_[actual_node].dealloc_called++;
527 const int measurements = 100;
531 for (
unsigned int i = 1; i < measurements; i++) {
536 while ((t2 - t1) == 0) t2 =
TimePoint ();
542 return (result / measurements) * 1000;
546 Timer Profiler::find_timer(
string tag) {
547 for(
auto t : timers_) {
548 if (t.tag() == tag)
return t;
554 std::string common_prefix( std::string a, std::string b ) {
555 if( a.size() > b.size() )
std::swap(a,b) ;
556 return std::string( a.begin(), std::mismatch( a.begin(), a.end(), b.begin() ).first ) ;
561 template<
typename ReduceFunctor>
562 void Profiler::add_timer_info(ReduceFunctor reduce,
nlohmann::json* holder,
int timer_idx,
double parent_time) {
565 Timer &timer = timers_[timer_idx];
570 string filepath = timer.code_point_->file_;
573 #ifdef FLOW123D_SOURCE_DIR
574 string common_path = common_prefix (
string(FLOW123D_SOURCE_DIR), filepath);
575 filepath.erase (0, common_path.size());
582 double cumul_time_sum;
583 node[
"tag"] = timer.tag();
584 node[
"file-path"] = filepath;
585 node[
"file-line"] = timer.code_point_->line_;
586 node[
"function"] = timer.code_point_->func_;
587 cumul_time_sum = reduce(timer, node);
591 if (timer_idx == 0) parent_time = cumul_time_sum;
592 double percent = parent_time > 1.0e-10 ? cumul_time_sum / parent_time * 100.0 : 0.0;
593 node[
"percent"] = percent;
600 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
601 if (timer.child_timers[i] != timer_no_child)
602 child_timers_values.push_back(timer.child_timers[i]);
604 std::sort(child_timers_values.begin(), child_timers_values.end());
606 for(
int idx : child_timers_values)
607 add_timer_info(reduce, &children, idx, cumul_time_sum);
610 if (child_timers_values.size() > 0) {
611 node[
"children"] = children;
620 void save_nonmpi_metric (
nlohmann::json &node, T * ptr,
string name) {
621 node[name +
"-min"] = *ptr;
622 node[name +
"-max"] = *ptr;
623 node[name +
"-sum"] = *ptr;
626 std::shared_ptr<std::ostream> Profiler::get_default_output_stream() {
630 return make_shared<ofstream>(json_filepath.c_str());
634 #ifdef FLOW123D_HAVE_MPI
643 int mpi_rank, mpi_size;
650 bool temp_memory_monitoring = global_monitor_memory;
651 set_memory_monitoring(
false, petsc_monitor_memory);
663 int call_count = timer.call_count;
664 double cumul_time = timer.cumulative_time ();
666 long memory_allocated = (long)timer.total_allocated_;
667 long memory_deallocated = (
long)timer.total_deallocated_;
668 long memory_peak = (long)timer.max_allocated_;
670 int alloc_called = timer.alloc_called;
671 int dealloc_called = timer.dealloc_called;
674 save_mpi_metric<double>(node, comm, &cumul_time,
"cumul-time");
675 save_mpi_metric<int>(node, comm, &call_count,
"call-count");
677 save_mpi_metric<long>(node, comm, &memory_allocated,
"memory-alloc");
678 save_mpi_metric<long>(node, comm, &memory_deallocated,
"memory-dealloc");
679 save_mpi_metric<long>(node, comm, &memory_peak,
"memory-peak");
681 save_mpi_metric<int>(node, comm, &alloc_called,
"memory-alloc-called");
682 save_mpi_metric<int>(node, comm, &dealloc_called,
"memory-dealloc-called");
684 #ifdef FLOW123D_HAVE_PETSC
685 long petsc_memory_difference = (long)timer.petsc_memory_difference;
686 long petsc_peak_memory = (
long)timer.petsc_peak_memory;
687 save_mpi_metric<long>(node, comm, &petsc_memory_difference,
"memory-petsc-diff");
688 save_mpi_metric<long>(node, comm, &petsc_peak_memory,
"memory-petsc-peak");
689 #endif // FLOW123D_HAVE_PETSC
694 add_timer_info (reduce, &jsonChildren, 0, 0.0);
695 jsonRoot[
"children"] = jsonChildren;
696 output_header(jsonRoot, mpi_size);
707 const int FLOW123D_JSON_HUMAN_READABLE = 2;
709 os << jsonRoot.
dump(FLOW123D_JSON_HUMAN_READABLE) << endl;
711 }
catch (exception & e) {
713 ss <<
"nlohmann::json::dump error: " << e.what() <<
"\n";
714 THROW( ExcMessage() << EI_Message(ss.str()) );
718 set_memory_monitoring(temp_memory_monitoring, petsc_monitor_memory);
727 if (profiler_path ==
"") {
728 output(comm, *get_default_output_stream());
730 json_filepath = profiler_path;
731 std::shared_ptr<std::ostream> os = make_shared<ofstream>(profiler_path.c_str());
753 const int FLOW123D_MPI_SINGLE_PROCESS = 1;
754 output_header(jsonRoot, FLOW123D_MPI_SINGLE_PROCESS);
761 int call_count = timer.call_count;
762 double cumul_time = timer.cumulative_time ();
764 long memory_allocated = (long)timer.total_allocated_;
765 long memory_deallocated = (
long)timer.total_deallocated_;
766 long memory_peak = (long)timer.max_allocated_;
768 int alloc_called = timer.alloc_called;
769 int dealloc_called = timer.dealloc_called;
771 save_nonmpi_metric<double>(node, &cumul_time,
"cumul-time");
772 save_nonmpi_metric<int>(node, &call_count,
"call-count");
774 save_nonmpi_metric<long>(node, &memory_allocated,
"memory-alloc");
775 save_nonmpi_metric<long>(node, &memory_deallocated,
"memory-dealloc");
776 save_nonmpi_metric<long>(node, &memory_peak,
"memory-peak");
778 save_nonmpi_metric<int>(node, &alloc_called,
"memory-alloc-called");
779 save_nonmpi_metric<int>(node, &dealloc_called,
"memory-dealloc-called");
781 #ifdef FLOW123D_HAVE_PETSC
782 long petsc_memory_difference = (long)timer.petsc_memory_difference;
783 long petsc_peak_memory = (
long)timer.petsc_peak_memory;
784 save_nonmpi_metric<long>(node, &petsc_memory_difference,
"memory-petsc-diff");
785 save_nonmpi_metric<long>(node, &petsc_peak_memory,
"memory-petsc-peak");
786 #endif // FLOW123D_HAVE_PETSC
791 add_timer_info(reduce, &jsonChildren, 0, 0.0);
792 jsonRoot[
"children"] = jsonChildren;
799 const int FLOW123D_JSON_HUMAN_READABLE = 2;
801 os << jsonRoot.
dump(FLOW123D_JSON_HUMAN_READABLE) << endl;
803 }
catch (exception & e) {
805 ss <<
"nlohmann::json::dump error: " << e.what() <<
"\n";
806 THROW( ExcMessage() << EI_Message(ss.str()) );
812 if(profiler_path ==
"") {
813 output(*get_default_output_stream());
815 json_filepath = profiler_path;
816 std::shared_ptr<std::ostream> os = make_shared<ofstream>(profiler_path.c_str());
821 void Profiler::output_header (
nlohmann::json &root,
int mpi_size) {
822 time_t end_time = time(NULL);
824 const char format[] =
"%x %X";
825 char start_time_string[BUFSIZ] = {0};
826 strftime(start_time_string,
sizeof (start_time_string) - 1,
format, localtime(&start_time));
828 char end_time_string[BUFSIZ] = {0};
829 strftime(end_time_string,
sizeof (end_time_string) - 1,
format, localtime(&end_time));
831 if (timers_[0].cumul_time > 60) {
835 root[
"program-name"] = flow_name_;
836 root[
"program-version"] = flow_version_;
837 root[
"program-branch"] = flow_branch_;
838 root[
"program-revision"] = flow_revision_;
839 root[
"program-build"] = flow_build_;
841 root[
"timer-calibration"] = calibration_time_;
844 root[
"task-description"] = task_description_;
845 root[
"task-size"] = task_size_;
848 root[
"run-process-count"] = mpi_size;
849 root[
"run-started-at"] = start_time_string;
850 root[
"run-finished-at"] = end_time_string;
853 #ifdef FLOW123D_HAVE_PYTHON
856 if (json_filepath ==
"")
return;
865 #ifndef FLOW123D_HAVE_CYGWIN
867 PyObject * python_module = PythonLoader::load_module_by_name (
"profiler.profiler_formatter_module");
871 PyObject * convert_method = PythonLoader::get_callable (python_module,
"convert" );
873 int argument_index = 0;
874 PyObject * arguments = PyTuple_New (3);
877 PyObject * tmp = PyUnicode_FromString (json_filepath.c_str());
878 PyTuple_SetItem (arguments, argument_index++, tmp);
881 tmp = PyUnicode_FromString ((json_filepath + output_file_suffix).c_str());
882 PyTuple_SetItem (arguments, argument_index++, tmp);
885 tmp = PyUnicode_FromString (formatter.c_str());
886 PyTuple_SetItem (arguments, argument_index++, tmp);
889 PyObject_CallObject (convert_method, arguments);
891 PythonLoader::check_error();
896 MessageOut() <<
"# Note: converting json profiler reports is not"
897 <<
" supported under Windows or Cygwin environment for now.\n"
898 <<
"# You can use python script located in bin/python folder"
899 <<
" in order to convert json report to txt or csv format.\n"
900 <<
"python profiler_formatter_script.py --input \"" << json_filepath
901 <<
"\" --output \"profiler.txt\"" << std::endl;
902 #endif // FLOW123D_HAVE_CYGWIN
908 #endif // FLOW123D_HAVE_PYTHON
914 .error(
"Forbidden to uninitialize the Profiler when actual timer is not zero.");
916 set_memory_monitoring(
false,
false);
920 bool Profiler::global_monitor_memory =
false;
921 bool Profiler::petsc_monitor_memory =
true;
922 void Profiler::set_memory_monitoring(
const bool global_monitor,
const bool petsc_monitor) {
923 global_monitor_memory = global_monitor;
924 petsc_monitor_memory = petsc_monitor;
927 bool Profiler::get_global_memory_monitoring() {
928 return global_monitor_memory;
931 bool Profiler::get_petsc_memory_monitoring() {
932 return petsc_monitor_memory;
935 unordered_map_with_alloc & MemoryAlloc::malloc_map() {
936 static unordered_map_with_alloc static_malloc_map;
937 return static_malloc_map;
940 void * Profiler::operator
new (
size_t size) {
941 return malloc (size);
944 void Profiler::operator
delete (
void* p) {
949 void * p = malloc(size);
955 void * p = malloc(size);
960 void *
operator new[] (std::size_t size,
const std::nothrow_t&)
throw() {
961 void * p = malloc(size);
966 void operator delete(
void *p)
throw() {
971 void operator delete(
void *p, std::size_t)
throw() {
976 void operator delete[](
void *p)
throw() {
981 void operator delete[](
void *p, std::size_t)
throw() {
986 #else // def FLOW123D_DEBUG_PROFILER
1004 #endif // def FLOW123D_DEBUG_PROFILER