24 #include <sys/param.h>
25 #include <unordered_map>
27 #ifdef FLOW123D_HAVE_PYTHON
29 #endif // FLOW123D_HAVE_PYTHON
35 #include <boost/format.hpp>
36 #include <boost/property_tree/ptree.hpp>
37 #include <boost/property_tree/json_parser.hpp>
45 namespace property_tree = boost::property_tree;
105 #ifdef FLOW123D_DEBUG_PROFILER
110 const int timer_no_child=-1;
112 Timer::Timer(
const CodePoint &cp,
int parent)
118 full_hash_(cp.hash_),
119 hash_idx_(cp.hash_idx_),
120 parent_timer(parent),
122 total_deallocated_(0),
124 current_allocated_(0),
127 #ifdef FLOW123D_HAVE_PETSC
128 , petsc_start_memory(0),
129 petsc_end_memory (0),
130 petsc_memory_difference(0),
131 petsc_peak_memory(0),
132 petsc_local_peak_memory(0)
135 for(
unsigned int i=0; i< max_n_childs ;i++) child_timers[i]=timer_no_child;
143 os <<
" Timer: " << timer.tag() << endl;
144 os <<
" malloc: " << timer.total_allocated_ << endl;
145 os <<
" dalloc: " << timer.total_deallocated_ << endl;
146 #ifdef FLOW123D_HAVE_PETSC
147 os <<
" start: " << timer.petsc_start_memory << endl;
148 os <<
" stop : " << timer.petsc_end_memory << endl;
149 os <<
" diff : " << timer.petsc_memory_difference <<
" (" << timer.petsc_end_memory - timer.petsc_start_memory <<
")" << endl;
150 os <<
" peak : " << timer.petsc_peak_memory <<
" (" << timer.petsc_local_peak_memory <<
")" << endl;
151 #endif // FLOW123D_HAVE_PETSC
157 double Timer::cumulative_time()
const {
161 void Profiler::accept_from_child(
Timer &parent,
Timer &child) {
163 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
164 child_timer = child.child_timers[i];
165 if (child_timer != timer_no_child) {
167 accept_from_child(child, timers_[child_timer]);
171 parent.total_allocated_ += child.total_allocated_;
172 parent.total_deallocated_ += child.total_deallocated_;
173 parent.alloc_called += child.alloc_called;
174 parent.dealloc_called += child.dealloc_called;
176 #ifdef FLOW123D_HAVE_PETSC
177 if (petsc_monitor_memory) {
179 parent.petsc_memory_difference += child.petsc_memory_difference;
180 parent.current_allocated_ += child.current_allocated_;
184 parent.petsc_peak_memory = max(parent.petsc_peak_memory, child.petsc_peak_memory);
186 #endif // FLOW123D_HAVE_PETSC
188 parent.max_allocated_ = max(parent.max_allocated_, child.max_allocated_);
192 void Timer::pause() {
193 #ifdef FLOW123D_HAVE_PETSC
194 if (Profiler::get_petsc_memory_monitoring()) {
196 PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
197 if (petsc_peak_memory < petsc_local_peak_memory)
198 petsc_peak_memory = petsc_local_peak_memory;
200 #endif // FLOW123D_HAVE_PETSC
203 void Timer::resume() {
204 #ifdef FLOW123D_HAVE_PETSC
205 if (Profiler::get_petsc_memory_monitoring()) {
208 PetscMemorySetGetMaximumUsage();
210 #endif // FLOW123D_HAVE_PETSC
214 #ifdef FLOW123D_HAVE_PETSC
215 if (Profiler::get_petsc_memory_monitoring()) {
218 PetscMemorySetGetMaximumUsage();
219 PetscMemoryGetCurrentUsage (&petsc_start_memory);
221 #endif // FLOW123D_HAVE_PETSC
223 if (start_count == 0) {
233 #ifdef FLOW123D_HAVE_PETSC
234 if (Profiler::get_petsc_memory_monitoring()) {
236 PetscMemoryGetCurrentUsage (&petsc_end_memory);
237 petsc_memory_difference += petsc_end_memory - petsc_start_memory;
240 PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
241 if (petsc_peak_memory < petsc_local_peak_memory)
242 petsc_peak_memory = petsc_local_peak_memory;
244 #endif // FLOW123D_HAVE_PETSC
246 if (forced) start_count=1;
248 if (start_count == 1) {
249 cumul_time += (
TimePoint() - start_time);
260 void Timer::add_child(
int child_index,
const Timer &child)
262 unsigned int idx = child.hash_idx_;
263 if (child_timers[idx] != timer_no_child) {
267 i=( i < max_n_childs ? i+1 : 0);
268 }
while (i!=idx && child_timers[i] != timer_no_child);
269 ASSERT(i!=idx)(tag()).error(
"Too many children of the timer");
273 child_timers[idx] = child_index;
278 string Timer::code_point_str()
const {
280 code_point_->file_, code_point_->line_, code_point_->func_ );
291 MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
298 static CONSTEXPR_ CodePoint main_cp = CODE_POINT(
"Whole Program");
300 const long Profiler::malloc_map_reserve = 100 * 1000;
301 CodePoint Profiler::null_code_point = CodePoint(
"__no_tag__",
"__no_file__",
"__no_func__", 0);
305 set_memory_monitoring(
true,
true);
312 start_time( time(NULL) ),
316 #ifdef FLOW123D_DEBUG_PROFILER
317 timers_.push_back(
Timer(main_cp, 0) );
324 void Profiler::propagate_timers() {
325 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
326 unsigned int child_timer = timers_[0].child_timers[i];
327 if ((
signed int)child_timer != timer_no_child) {
329 accept_from_child(timers_[0], timers_[child_timer]);
337 task_description_ = description;
344 flow_name_ = program_name;
345 flow_version_ = program_version;
346 flow_branch_ = branch;
347 flow_revision_ = revision;
353 int Profiler::start_timer(
const CodePoint &cp) {
354 unsigned int parent_node = actual_node;
356 int child_idx = find_child(cp);
360 child_idx=timers_.size();
361 timers_.push_back(
Timer(cp, actual_node) );
362 timers_[actual_node].add_child(child_idx , timers_.back() );
364 actual_node=child_idx;
367 timers_[parent_node].pause();
369 timers_[actual_node].start();
376 int Profiler::find_child(
const CodePoint &cp) {
377 Timer &timer =timers_[actual_node];
378 unsigned int idx = cp.hash_idx_;
379 unsigned int child_idx;
381 if (timer.child_timers[idx] == timer_no_child)
break;
383 child_idx=timer.child_timers[idx];
384 ASSERT_LT(child_idx, timers_.size()).error();
385 if (timers_[child_idx].full_hash_ == cp.hash_)
return child_idx;
386 idx = ( (
unsigned int)(idx)==(Timer::max_n_childs - 1) ? 0 : idx+1 );
387 }
while ( (
unsigned int)(idx) != cp.hash_idx_ );
393 void Profiler::stop_timer(
const CodePoint &cp) {
394 #ifdef FLOW123D_DEBUG
396 Timer &timer=timers_[actual_node];
397 for(
unsigned int i=0; i < Timer::max_n_childs; i++)
398 if (timer.child_timers[i] != timer_no_child)
399 ASSERT(! timers_[timer.child_timers[i]].running())(timers_[timer.child_timers[i]].tag())(timer.tag())
400 .error(
"Child timer running while closing timer.");
402 unsigned int child_timer = actual_node;
403 if ( cp.hash_ != timers_[actual_node].full_hash_) {
405 for(
unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer) {
406 if ( cp.hash_ == timers_[node].full_hash_) {
408 for(; (
unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
409 WarningOut() <<
"Timer to close '" << cp.tag_ <<
"' do not match actual timer '"
410 << timers_[actual_node].tag() <<
"'. Force closing actual." << std::endl;
411 timers_[actual_node].stop(
true);
414 timers_[actual_node].stop(
false);
415 actual_node = timers_[actual_node].parent_timer;
418 if (actual_node == child_timer)
422 timers_[actual_node].resume();
430 timers_[actual_node].stop(
false);
431 actual_node = timers_[actual_node].parent_timer;
434 if (actual_node == child_timer)
438 timers_[actual_node].resume();
443 void Profiler::stop_timer(
int timer_index) {
447 if (timers_[timer_index].running()) {
448 ASSERT_EQ(timer_index, (
int)actual_node).error();
449 stop_timer(*timers_[timer_index].code_point_);
456 void Profiler::add_calls(
unsigned int n_calls) {
457 timers_[actual_node].call_count += n_calls-1;
463 if (!global_monitor_memory)
466 MemoryAlloc::malloc_map()[p] =
static_cast<int>(size);
467 timers_[actual_node].total_allocated_ += size;
468 timers_[actual_node].current_allocated_ += size;
469 timers_[actual_node].alloc_called++;
471 if (timers_[actual_node].current_allocated_ > timers_[actual_node].max_allocated_)
472 timers_[actual_node].max_allocated_ = timers_[actual_node].current_allocated_;
478 if (!global_monitor_memory)
481 int size =
sizeof(p);
482 if (MemoryAlloc::malloc_map()[(long)p] > 0) {
483 size = MemoryAlloc::malloc_map()[(long)p];
484 MemoryAlloc::malloc_map().erase((
long)p);
486 timers_[actual_node].total_deallocated_ += size;
487 timers_[actual_node].current_allocated_ -= size;
488 timers_[actual_node].dealloc_called++;
493 const int measurements = 100;
497 for (
unsigned int i = 1; i < measurements; i++) {
502 while ((t2 - t1) == 0) t2 =
TimePoint ();
508 return (result / measurements) * 1000;
512 std::string common_prefix( std::string a, std::string b ) {
513 if( a.size() > b.size() )
std::swap(a,b) ;
514 return std::string( a.begin(), std::mismatch( a.begin(), a.end(), b.begin() ).first ) ;
519 template<
typename ReduceFunctor>
520 void Profiler::add_timer_info(ReduceFunctor reduce, property_tree::ptree* holder,
int timer_idx,
double parent_time) {
523 Timer &timer = timers_[timer_idx];
524 ASSERT(timer_idx >=0)(timer_idx).error(
"Wrong timer index.");
525 ASSERT(timer.parent_timer >=0).error(
"Inconsistent tree.");
528 string filepath = timer.code_point_->file_;
531 #ifdef FLOW123D_SOURCE_DIR
532 string common_path = common_prefix (
string(FLOW123D_SOURCE_DIR), filepath);
533 filepath.erase (0, common_path.size());
539 property_tree::ptree node;
540 double cumul_time_sum;
541 node.put (
"tag", (timer.tag()) );
542 node.put (
"file-path", (filepath) );
543 node.put (
"file-line", (timer.code_point_->line_) );
544 node.put (
"function", (timer.code_point_->func_) );
545 cumul_time_sum = reduce (timer, node);
549 if (timer_idx == 0) parent_time = cumul_time_sum;
550 double percent = parent_time > 1.0e-10 ? cumul_time_sum / parent_time * 100.0 : 0.0;
551 node.put<
double> (
"percent", percent);
582 property_tree::ptree children;
583 bool has_children =
false;
584 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
585 if (timer.child_timers[i] != timer_no_child) {
586 add_timer_info (reduce, &children, timer.child_timers[i], cumul_time_sum);
596 node.add_child (
"children", children);
599 holder->push_back (std::make_pair (
"", node));
604 void save_nonmpi_metric (property_tree::ptree &node, T * ptr,
string name) {
605 node.put (name+
"-min", *ptr);
606 node.put (name+
"-max", *ptr);
607 node.put (name+
"-sum", *ptr);
610 std::shared_ptr<std::ostream> Profiler::get_default_output_stream() {
611 char filename[PATH_MAX];
612 strftime(filename,
sizeof (filename) - 1,
"profiler_info_%y.%m.%d_%H-%M-%S.log.json", localtime(&start_time));
616 return make_shared<ofstream>(json_filepath.c_str());
620 #ifdef FLOW123D_HAVE_MPI
622 void save_mpi_metric (property_tree::ptree &node,
MPI_Comm comm, T * ptr,
string name) {
629 int mpi_rank, mpi_size;
636 bool temp_memory_monitoring = global_monitor_memory;
637 set_memory_monitoring(
false, petsc_monitor_memory);
643 property_tree::ptree root, children;
644 output_header (root, mpi_size);
649 auto reduce = [=] (
Timer &timer, property_tree::ptree &node) ->
double {
650 int call_count = timer.call_count;
651 double cumul_time = timer.cumulative_time ();
653 long memory_allocated = (long)timer.total_allocated_;
654 long memory_deallocated = (
long)timer.total_deallocated_;
655 long memory_peak = (long)timer.max_allocated_;
657 int alloc_called = timer.alloc_called;
658 int dealloc_called = timer.dealloc_called;
661 save_mpi_metric<double>(node, comm, &cumul_time,
"cumul-time");
662 save_mpi_metric<int>(node, comm, &call_count,
"call-count");
664 save_mpi_metric<long>(node, comm, &memory_allocated,
"memory-alloc");
665 save_mpi_metric<long>(node, comm, &memory_deallocated,
"memory-dealloc");
666 save_mpi_metric<long>(node, comm, &memory_peak,
"memory-peak");
668 save_mpi_metric<int>(node, comm, &alloc_called,
"memory-alloc-called");
669 save_mpi_metric<int>(node, comm, &dealloc_called,
"memory-dealloc-called");
671 #ifdef FLOW123D_HAVE_PETSC
672 long petsc_memory_difference = (long)timer.petsc_memory_difference;
673 long petsc_peak_memory = (
long)timer.petsc_peak_memory;
674 save_mpi_metric<long>(node, comm, &petsc_memory_difference,
"memory-petsc-diff");
675 save_mpi_metric<long>(node, comm, &petsc_peak_memory,
"memory-petsc-peak");
676 #endif // FLOW123D_HAVE_PETSC
681 add_timer_info (reduce, &children, 0, 0.0);
682 root.add_child (
"children", children);
693 const int FLOW123D_JSON_HUMAN_READABLE = 1;
695 property_tree::write_json (os, root, FLOW123D_JSON_HUMAN_READABLE);
696 }
catch (property_tree::json_parser::json_parser_error & e) {
698 ss <<
"Throw json_parser_error: " << e.message() <<
"\nIn file: " << e.filename() <<
", at line " << e.line() <<
"\n";
699 THROW( ExcMessage() << EI_Message(ss.str()) );
703 set_memory_monitoring(temp_memory_monitoring, petsc_monitor_memory);
711 output(comm, *get_default_output_stream());
726 property_tree::ptree root, children;
731 const int FLOW123D_MPI_SINGLE_PROCESS = 1;
732 output_header (root, FLOW123D_MPI_SINGLE_PROCESS);
738 auto reduce = [=] (
Timer &timer, property_tree::ptree &node) ->
double {
739 int call_count = timer.call_count;
740 double cumul_time = timer.cumulative_time ();
742 long memory_allocated = (long)timer.total_allocated_;
743 long memory_deallocated = (
long)timer.total_deallocated_;
744 long memory_peak = (long)timer.max_allocated_;
746 int alloc_called = timer.alloc_called;
747 int dealloc_called = timer.dealloc_called;
749 save_nonmpi_metric<double>(node, &cumul_time,
"cumul-time");
750 save_nonmpi_metric<int>(node, &call_count,
"call-count");
752 save_nonmpi_metric<long>(node, &memory_allocated,
"memory-alloc");
753 save_nonmpi_metric<long>(node, &memory_deallocated,
"memory-dealloc");
754 save_nonmpi_metric<long>(node, &memory_peak,
"memory-peak");
756 save_nonmpi_metric<int>(node, &alloc_called,
"memory-alloc-called");
757 save_nonmpi_metric<int>(node, &dealloc_called,
"memory-dealloc-called");
759 #ifdef FLOW123D_HAVE_PETSC
760 long petsc_memory_difference = (long)timer.petsc_memory_difference;
761 long petsc_peak_memory = (
long)timer.petsc_peak_memory;
762 save_nonmpi_metric<long>(node, &petsc_memory_difference,
"memory-petsc-diff");
763 save_nonmpi_metric<long>(node, &petsc_peak_memory,
"memory-petsc-peak");
764 #endif // FLOW123D_HAVE_PETSC
769 add_timer_info (reduce, &children, 0, 0.0);
770 root.add_child (
"children", children);
778 const int FLOW123D_JSON_HUMAN_READABLE = 1;
780 property_tree::write_json (os, root, FLOW123D_JSON_HUMAN_READABLE);
781 }
catch (property_tree::json_parser::json_parser_error & e) {
783 ss <<
"Throw json_parser_error: " << e.message() <<
"\nIn file: " << e.filename() <<
", at line " << e.line() <<
"\n";
784 THROW( ExcMessage() << EI_Message(ss.str()) );
790 output(*get_default_output_stream());
793 void Profiler::output_header (property_tree::ptree &root,
int mpi_size) {
794 time_t end_time = time(NULL);
796 const char format[] =
"%x %X";
797 char start_time_string[BUFSIZ] = {0};
798 strftime(start_time_string,
sizeof (start_time_string) - 1,
format, localtime(&start_time));
800 char end_time_string[BUFSIZ] = {0};
801 strftime(end_time_string,
sizeof (end_time_string) - 1,
format, localtime(&end_time));
805 root.put (
"program-name", flow_name_);
806 root.put (
"program-version", flow_version_);
807 root.put (
"program-branch", flow_branch_);
808 root.put (
"program-revision", flow_revision_);
809 root.put (
"program-build", flow_build_);
812 #ifdef FLOW123D_SOURCE_DIR
816 root.put (
"task-description", task_description_);
817 root.put (
"task-size", task_size_);
820 root.put (
"run-process-count", mpi_size);
821 root.put (
"run-started-at", start_time_string);
822 root.put (
"run-finished-at", end_time_string);
825 #ifdef FLOW123D_HAVE_PYTHON
828 if (json_filepath==
"")
return;
837 #ifndef FLOW123D_HAVE_CYGWIN
839 PyObject * python_module = PythonLoader::load_module_by_name (
"profiler.profiler_formatter_module");
843 PyObject * convert_method = PythonLoader::get_callable (python_module,
"convert" );
845 int argument_index = 0;
846 PyObject * arguments = PyTuple_New (3);
849 PyObject * tmp = PyUnicode_FromString (json_filepath.c_str());
850 PyTuple_SetItem (arguments, argument_index++, tmp);
853 tmp = PyUnicode_FromString ((json_filepath + output_file_suffix).c_str());
854 PyTuple_SetItem (arguments, argument_index++, tmp);
857 tmp = PyUnicode_FromString (formatter.c_str());
858 PyTuple_SetItem (arguments, argument_index++, tmp);
861 PyObject_CallObject (convert_method, arguments);
863 PythonLoader::check_error();
868 MessageOut() <<
"# Note: converting json profiler reports is not"
869 <<
" supported under Windows or Cygwin environment for now.\n"
870 <<
"# You can use python script located in bin/python folder"
871 <<
" in order to convert json report to txt or csv format.\n"
872 <<
"python profiler_formatter_script.py --input \"" << json_filepath
873 <<
"\" --output \"profiler.txt\"" << std::endl;
874 #endif // FLOW123D_HAVE_CYGWIN
880 #endif // FLOW123D_HAVE_PYTHON
888 set_memory_monitoring(
false,
false);
893 bool Profiler::global_monitor_memory =
false;
894 bool Profiler::petsc_monitor_memory =
true;
895 void Profiler::set_memory_monitoring(
const bool global_monitor,
const bool petsc_monitor) {
896 global_monitor_memory = global_monitor;
897 petsc_monitor_memory = petsc_monitor;
900 bool Profiler::get_global_memory_monitoring() {
901 return global_monitor_memory;
904 bool Profiler::get_petsc_memory_monitoring() {
905 return petsc_monitor_memory;
908 unordered_map_with_alloc & MemoryAlloc::malloc_map() {
909 static unordered_map_with_alloc static_malloc_map;
910 return static_malloc_map;
913 void * Profiler::operator
new (
size_t size) {
914 return malloc (size);
917 void Profiler::operator
delete (
void* p) {
922 void * p = malloc(size);
928 void * p = malloc(size);
933 void *
operator new[] (std::size_t size,
const std::nothrow_t&)
throw() {
934 void * p = malloc(size);
939 void operator delete(
void *p)
throw() {
944 void operator delete[](
void *p)
throw() {
949 #else // def FLOW123D_DEBUG_PROFILER
977 #endif // def FLOW123D_DEBUG_PROFILER