Flow123d  release_3.0.0-973-g92f55e826
sys_profiler.cc
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file sys_profiler.cc
15  * @ingroup system
16  * @brief Profiler
17  */
18 
19 
20 // Fat header
21 
22 #include <fstream>
23 #include <iomanip>
24 #include <sys/param.h>
25 #include <unordered_map>
26 
27 #ifdef FLOW123D_HAVE_PYTHON
28  #include "Python.h"
29 #endif // FLOW123D_HAVE_PYTHON
30 
31 #include "sys_profiler.hh"
32 #include "system/system.hh"
33 #include "system/python_loader.hh"
34 #include <iostream>
35 #include <boost/format.hpp>
36 #include <boost/property_tree/ptree.hpp>
37 #include <boost/property_tree/json_parser.hpp>
38 
39 #include "system/file_path.hh"
40 #include "system/python_loader.hh"
41 #include "mpi.h"
42 #include "time_point.hh"
43 
44 // namespace alias
45 namespace property_tree = boost::property_tree;
46 
47 /*
48  * These should be replaced by using boost MPI interface
49  */
50 int MPI_Functions::sum(int* val, MPI_Comm comm) {
51  int total = 0;
52  MPI_Reduce(val, &total, 1, MPI_INT, MPI_SUM, 0, comm);
53  return total;
54  }
55 
56 double MPI_Functions::sum(double* val, MPI_Comm comm) {
57  double total = 0;
58  MPI_Reduce(val, &total, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
59  return total;
60  }
61 
62 long MPI_Functions::sum(long* val, MPI_Comm comm) {
63  long total = 0;
64  MPI_Reduce(val, &total, 1, MPI_LONG, MPI_SUM, 0, comm);
65  return total;
66  }
67 
68 int MPI_Functions::min(int* val, MPI_Comm comm) {
69  int min = 0;
70  MPI_Reduce(val, &min, 1, MPI_INT, MPI_MIN, 0, comm);
71  return min;
72  }
73 
74 double MPI_Functions::min(double* val, MPI_Comm comm) {
75  double min = 0;
76  MPI_Reduce(val, &min, 1, MPI_DOUBLE, MPI_MIN, 0, comm);
77  return min;
78  }
79 
80 long MPI_Functions::min(long* val, MPI_Comm comm) {
81  long min = 0;
82  MPI_Reduce(val, &min, 1, MPI_LONG, MPI_MIN, 0, comm);
83  return min;
84  }
85 
86 int MPI_Functions::max(int* val, MPI_Comm comm) {
87  int max = 0;
88  MPI_Reduce(val, &max, 1, MPI_INT, MPI_MAX, 0, comm);
89  return max;
90  }
91 
92 double MPI_Functions::max(double* val, MPI_Comm comm) {
93  double max = 0;
94  MPI_Reduce(val, &max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
95  return max;
96  }
97 
98 long MPI_Functions::max(long* val, MPI_Comm comm) {
99  long max = 0;
100  MPI_Reduce(val, &max, 1, MPI_LONG, MPI_MAX, 0, comm);
101  return max;
102  }
103 
104 
105 #ifdef FLOW123D_DEBUG_PROFILER
106 /*********************************************************************************************
107  * Implementation of class Timer
108  */
109 
110 const int timer_no_child=-1;
111 
112 Timer::Timer(const CodePoint &cp, int parent)
113 : start_time(TimePoint()),
114  cumul_time(0.0),
115  call_count(0),
116  start_count(0),
117  code_point_(&cp),
118  full_hash_(cp.hash_),
119  hash_idx_(cp.hash_idx_),
120  parent_timer(parent),
121  total_allocated_(0),
122  total_deallocated_(0),
123  max_allocated_(0),
124  current_allocated_(0),
125  alloc_called(0),
126  dealloc_called(0)
127 #ifdef FLOW123D_HAVE_PETSC
128 , petsc_start_memory(0),
129  petsc_end_memory (0),
130  petsc_memory_difference(0),
131  petsc_peak_memory(0),
132  petsc_local_peak_memory(0)
133 #endif // FLOW123D_HAVE_PETSC
134 {
135  for(unsigned int i=0; i< max_n_childs ;i++) child_timers[i]=timer_no_child;
136 }
137 
138 
139 /**
140  * Debug information of the timer
141  */
142 ostream & operator <<(ostream& os, const Timer& timer) {
143  os << " Timer: " << timer.tag() << endl;
144  os << " malloc: " << timer.total_allocated_ << endl;
145  os << " dalloc: " << timer.total_deallocated_ << endl;
146  #ifdef FLOW123D_HAVE_PETSC
147  os << " start: " << timer.petsc_start_memory << endl;
148  os << " stop : " << timer.petsc_end_memory << endl;
149  os << " diff : " << timer.petsc_memory_difference << " (" << timer.petsc_end_memory - timer.petsc_start_memory << ")" << endl;
150  os << " peak : " << timer.petsc_peak_memory << " (" << timer.petsc_local_peak_memory << ")" << endl;
151  #endif // FLOW123D_HAVE_PETSC
152  os << endl;
153  return os;
154 }
155 
156 
157 double Timer::cumulative_time() const {
158  return cumul_time;
159 }
160 
161 void Profiler::accept_from_child(Timer &parent, Timer &child) {
162  int child_timer = 0;
163  for (unsigned int i = 0; i < Timer::max_n_childs; i++) {
164  child_timer = child.child_timers[i];
165  if (child_timer != timer_no_child) {
166  // propagate metrics from child to parent
167  accept_from_child(child, timers_[child_timer]);
168  }
169  }
170  // compute totals by adding values from child
171  parent.total_allocated_ += child.total_allocated_;
172  parent.total_deallocated_ += child.total_deallocated_;
173  parent.alloc_called += child.alloc_called;
174  parent.dealloc_called += child.dealloc_called;
175 
176 #ifdef FLOW123D_HAVE_PETSC
177  if (petsc_monitor_memory) {
178  // add differences from child
179  parent.petsc_memory_difference += child.petsc_memory_difference;
180  parent.current_allocated_ += child.current_allocated_;
181 
182  // when computing maximum, we take greater value from parent and child
183  // PetscMemoryGetCurrentUsage always return absolute (not relative) value
184  parent.petsc_peak_memory = max(parent.petsc_peak_memory, child.petsc_peak_memory);
185  }
186 #endif // FLOW123D_HAVE_PETSC
187 
188  parent.max_allocated_ = max(parent.max_allocated_, child.max_allocated_);
189 }
190 
191 
192 void Timer::pause() {
193 #ifdef FLOW123D_HAVE_PETSC
194  if (Profiler::get_petsc_memory_monitoring()) {
195  // get the maximum resident set size (memory used) for the program.
196  PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
197  if (petsc_peak_memory < petsc_local_peak_memory)
198  petsc_peak_memory = petsc_local_peak_memory;
199  }
200 #endif // FLOW123D_HAVE_PETSC
201 }
202 
203 void Timer::resume() {
204 #ifdef FLOW123D_HAVE_PETSC
205  if (Profiler::get_petsc_memory_monitoring()) {
206  // tell PETSc to monitor the maximum memory usage so
207  // that PetscMemoryGetMaximumUsage() will work.
208  PetscMemorySetGetMaximumUsage();
209  }
210 #endif // FLOW123D_HAVE_PETSC
211 }
212 
213 void Timer::start() {
214 #ifdef FLOW123D_HAVE_PETSC
215  if (Profiler::get_petsc_memory_monitoring()) {
216  // Tell PETSc to monitor the maximum memory usage so
217  // that PetscMemoryGetMaximumUsage() will work.
218  PetscMemorySetGetMaximumUsage();
219  PetscMemoryGetCurrentUsage (&petsc_start_memory);
220  }
221 #endif // FLOW123D_HAVE_PETSC
222 
223  if (start_count == 0) {
224  start_time = TimePoint();
225  }
226  call_count++;
227  start_count++;
228 }
229 
230 
231 
232 bool Timer::stop(bool forced) {
233 #ifdef FLOW123D_HAVE_PETSC
234  if (Profiler::get_petsc_memory_monitoring()) {
235  // get current memory usage
236  PetscMemoryGetCurrentUsage (&petsc_end_memory);
237  petsc_memory_difference += petsc_end_memory - petsc_start_memory;
238 
239  // get the maximum resident set size (memory used) for the program.
240  PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
241  if (petsc_peak_memory < petsc_local_peak_memory)
242  petsc_peak_memory = petsc_local_peak_memory;
243  }
244 #endif // FLOW123D_HAVE_PETSC
245 
246  if (forced) start_count=1;
247 
248  if (start_count == 1) {
249  cumul_time += (TimePoint() - start_time);
250  start_count--;
251  return true;
252  } else {
253  start_count--;
254  }
255  return false;
256 }
257 
258 
259 
260 void Timer::add_child(int child_index, const Timer &child)
261 {
262  unsigned int idx = child.hash_idx_;
263  if (child_timers[idx] != timer_no_child) {
264  // hash collision, find first empty place
265  unsigned int i=idx;
266  do {
267  i=( i < max_n_childs ? i+1 : 0);
268  } while (i!=idx && child_timers[i] != timer_no_child);
269  ASSERT(i!=idx)(tag()).error("Too many children of the timer");
270  idx=i;
271  }
272  //DebugOut().fmt("Adding child {} at index: {}\n", child_index, idx);
273  child_timers[idx] = child_index;
274 }
275 
276 
277 
278 string Timer::code_point_str() const {
279  return fmt::format("{%s}:{%d}, {%s}()",
280  code_point_->file_, code_point_->line_, code_point_->func_ );
281 }
282 
283 
284 /***********************************************************************************************
285  * Implementation of Profiler
286  */
287 
288 
290  if (_instance == NULL) {
291  MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
292  _instance = new Profiler();
293  }
294  return _instance;
295  }
296 
297 
298 static CONSTEXPR_ CodePoint main_cp = CODE_POINT("Whole Program");
300 const long Profiler::malloc_map_reserve = 100 * 1000;
301 CodePoint Profiler::null_code_point = CodePoint("__no_tag__", "__no_file__", "__no_func__", 0);
302 
303 void Profiler::initialize() {
304  instance();
305  set_memory_monitoring(true, true);
306 }
307 
308 
310 : actual_node(0),
311  task_size_(1),
312  start_time( time(NULL) ),
313  json_filepath("")
314 
315 {
316 #ifdef FLOW123D_DEBUG_PROFILER
317  timers_.push_back( Timer(main_cp, 0) );
318  timers_[0].start();
319 #endif
320 }
321 
322 
323 
324 void Profiler::propagate_timers() {
325  for (unsigned int i = 0; i < Timer::max_n_childs; i++) {
326  unsigned int child_timer = timers_[0].child_timers[i];
327  if ((signed int)child_timer != timer_no_child) {
328  // propagate metrics from child to Whole-Program time-frame
329  accept_from_child(timers_[0], timers_[child_timer]);
330  }
331  }
332 }
333 
334 
335 
336 void Profiler::set_task_info(string description, int size) {
337  task_description_ = description;
338  task_size_ = size;
339 }
340 
341 
342 
343 void Profiler::set_program_info(string program_name, string program_version, string branch, string revision, string build) {
344  flow_name_ = program_name;
345  flow_version_ = program_version;
346  flow_branch_ = branch;
347  flow_revision_ = revision;
348  flow_build_ = build;
349 }
350 
351 
352 
353 int Profiler::start_timer(const CodePoint &cp) {
354  unsigned int parent_node = actual_node;
355  //DebugOut().fmt("Start timer: {}\n", cp.tag_);
356  int child_idx = find_child(cp);
357  if (child_idx < 0) {
358  //DebugOut().fmt("Adding timer: {}\n", cp.tag_);
359  // tag not present - create new timer
360  child_idx=timers_.size();
361  timers_.push_back( Timer(cp, actual_node) );
362  timers_[actual_node].add_child(child_idx , timers_.back() );
363  }
364  actual_node=child_idx;
365 
366  // pause current timer
367  timers_[parent_node].pause();
368 
369  timers_[actual_node].start();
370 
371  return actual_node;
372 }
373 
374 
375 
376 int Profiler::find_child(const CodePoint &cp) {
377  Timer &timer =timers_[actual_node];
378  unsigned int idx = cp.hash_idx_;
379  unsigned int child_idx;
380  do {
381  if (timer.child_timers[idx] == timer_no_child) break; // tag is not there
382 
383  child_idx=timer.child_timers[idx];
384  ASSERT_LT(child_idx, timers_.size()).error();
385  if (timers_[child_idx].full_hash_ == cp.hash_) return child_idx;
386  idx = ( (unsigned int)(idx)==(Timer::max_n_childs - 1) ? 0 : idx+1 );
387  } while ( (unsigned int)(idx) != cp.hash_idx_ ); // passed through whole array
388  return -1;
389 }
390 
391 
392 
393 void Profiler::stop_timer(const CodePoint &cp) {
394 #ifdef FLOW123D_DEBUG
395  // check that all childrens are closed
396  Timer &timer=timers_[actual_node];
397  for(unsigned int i=0; i < Timer::max_n_childs; i++)
398  if (timer.child_timers[i] != timer_no_child)
399  ASSERT(! timers_[timer.child_timers[i]].running())(timers_[timer.child_timers[i]].tag())(timer.tag())
400  .error("Child timer running while closing timer.");
401 #endif
402  unsigned int child_timer = actual_node;
403  if ( cp.hash_ != timers_[actual_node].full_hash_) {
404  // timer to close is not actual - we search for it above actual
405  for(unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer) {
406  if ( cp.hash_ == timers_[node].full_hash_) {
407  // found above - close all nodes between
408  for(; (unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
409  WarningOut() << "Timer to close '" << cp.tag_ << "' do not match actual timer '"
410  << timers_[actual_node].tag() << "'. Force closing actual." << std::endl;
411  timers_[actual_node].stop(true);
412  }
413  // close 'node' itself
414  timers_[actual_node].stop(false);
415  actual_node = timers_[actual_node].parent_timer;
416 
417  // actual_node == child_timer indicates this is root
418  if (actual_node == child_timer)
419  return;
420 
421  // resume current timer
422  timers_[actual_node].resume();
423  return;
424  }
425  }
426  // node not found - do nothing
427  return;
428  }
429  // node to close match the actual
430  timers_[actual_node].stop(false);
431  actual_node = timers_[actual_node].parent_timer;
432 
433  // actual_node == child_timer indicates this is root
434  if (actual_node == child_timer)
435  return;
436 
437  // resume current timer
438  timers_[actual_node].resume();
439 }
440 
441 
442 
443 void Profiler::stop_timer(int timer_index) {
444  // stop_timer with CodePoint type
445  // timer which is still running MUST be the same as actual_node index
446  // if timer is not running index will differ
447  if (timers_[timer_index].running()) {
448  ASSERT_EQ(timer_index, (int)actual_node).error();
449  stop_timer(*timers_[timer_index].code_point_);
450  }
451 
452 }
453 
454 
455 
456 void Profiler::add_calls(unsigned int n_calls) {
457  timers_[actual_node].call_count += n_calls-1;
458 }
459 
460 
461 
462 void Profiler::notify_malloc(const size_t size, const long p) {
463  if (!global_monitor_memory)
464  return;
465 
466  MemoryAlloc::malloc_map()[p] = static_cast<int>(size);
467  timers_[actual_node].total_allocated_ += size;
468  timers_[actual_node].current_allocated_ += size;
469  timers_[actual_node].alloc_called++;
470 
471  if (timers_[actual_node].current_allocated_ > timers_[actual_node].max_allocated_)
472  timers_[actual_node].max_allocated_ = timers_[actual_node].current_allocated_;
473 }
474 
475 
476 
477 void Profiler::notify_free(const long p) {
478  if (!global_monitor_memory)
479  return;
480 
481  int size = sizeof(p);
482  if (MemoryAlloc::malloc_map()[(long)p] > 0) {
483  size = MemoryAlloc::malloc_map()[(long)p];
484  MemoryAlloc::malloc_map().erase((long)p);
485  }
486  timers_[actual_node].total_deallocated_ += size;
487  timers_[actual_node].current_allocated_ -= size;
488  timers_[actual_node].dealloc_called++;
489 }
490 
491 
492 double Profiler::get_resolution () {
493  const int measurements = 100;
494  double result = 0;
495 
496  // perform 100 measurements
497  for (unsigned int i = 1; i < measurements; i++) {
498  TimePoint t1 = TimePoint ();
499  TimePoint t2 = TimePoint ();
500 
501  // double comparison should be avoided
502  while ((t2 - t1) == 0) t2 = TimePoint ();
503  // while ((t2.ticks - t1.ticks) == 0) t2 = TimePoint ();
504 
505  result += t2 - t1;
506  }
507 
508  return (result / measurements) * 1000; // ticks to seconds to microseconds conversion
509 }
510 
511 
512 std::string common_prefix( std::string a, std::string b ) {
513  if( a.size() > b.size() ) std::swap(a,b) ;
514  return std::string( a.begin(), std::mismatch( a.begin(), a.end(), b.begin() ).first ) ;
515 }
516 
517 
518 
519 template<typename ReduceFunctor>
520 void Profiler::add_timer_info(ReduceFunctor reduce, property_tree::ptree* holder, int timer_idx, double parent_time) {
521 
522  // get timer and check preconditions
523  Timer &timer = timers_[timer_idx];
524  ASSERT(timer_idx >=0)(timer_idx).error("Wrong timer index.");
525  ASSERT(timer.parent_timer >=0).error("Inconsistent tree.");
526 
527  // fix path
528  string filepath = timer.code_point_->file_;
529 
530  // if constant FLOW123D_SOURCE_DIR is defined, we try to erase it from beginning of each CodePoint's filepath
531  #ifdef FLOW123D_SOURCE_DIR
532  string common_path = common_prefix (string(FLOW123D_SOURCE_DIR), filepath);
533  filepath.erase (0, common_path.size());
534  #endif
535 
536 
537  // generate node representing this timer
538  // add basic information
539  property_tree::ptree node;
540  double cumul_time_sum;
541  node.put ("tag", (timer.tag()) );
542  node.put ("file-path", (filepath) );
543  node.put ("file-line", (timer.code_point_->line_) );
544  node.put ("function", (timer.code_point_->func_) );
545  cumul_time_sum = reduce (timer, node);
546 
547 
548  // statistical info
549  if (timer_idx == 0) parent_time = cumul_time_sum;
550  double percent = parent_time > 1.0e-10 ? cumul_time_sum / parent_time * 100.0 : 0.0;
551  node.put<double> ("percent", percent);
552 
553  // when collecting timer info, we need to make sure each processor contains
554  // the same time-frames, for now we simply reduce info to current timer
555  // using operation MPI_MIN, so if some processor does not contain some
556  // time-frame other processors will forget this time-frame (information lost)
557  /*
558  int child_timers[ Timer::max_n_childs];
559  MPI_Allreduce(&timer.child_timers, &child_timers, Timer::max_n_childs, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
560  #ifdef FLOW123D_DEBUG
561  int mpi_rank = MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
562  for (int j = 0; j < Timer::max_n_childs; j++) {
563  if (child_timers[j] != timer.child_timers[j]) {
564  // this is severe error, timers indicies are different
565  ASSERT(child_timers[j] == timer_no_child).error("Severe error: timers indicies are different.");
566 
567  // explore the difference in more depth
568  if (timer.child_timers[j] == timer_no_child) {
569  // this processor does not have child timer
570  WarningOut() << "Inconsistent tree in '" << timer.tag() << "': this processor[" << mpi_rank
571  << "] does not have child-timer[" << child_timers[j] << "]" << std::endl;
572  } else {
573  // other processors do not have child timer
574  WarningOut() << "Inconsistent tree in '" << timer.tag() << "': this processor[" << mpi_rank
575  << "] contains child-timer[" << timer.child_timers[j] << "] which others do not" << std::endl;
576  }
577  }
578  }
579  #endif // FLOW123D_DEBUG
580 */
581  // write times children timers using secured child_timers array
582  property_tree::ptree children;
583  bool has_children = false;
584  for (unsigned int i = 0; i < Timer::max_n_childs; i++) {
585  if (timer.child_timers[i] != timer_no_child) {
586  add_timer_info (reduce, &children, timer.child_timers[i], cumul_time_sum);
587  /*
588  if (child_timers[i] != timer_no_child) {
589  add_timer_info (reduce, &children, child_timers[i], cumul_time_sum); */
590  has_children = true;
591  }
592  }
593 
594  // add children tag and other info if present
595  if (has_children)
596  node.add_child ("children", children);
597 
598  // push itself to root ptree 'array'
599  holder->push_back (std::make_pair ("", node));
600 }
601 
602 
603 template <class T>
604 void save_nonmpi_metric (property_tree::ptree &node, T * ptr, string name) {
605  node.put (name+"-min", *ptr);
606  node.put (name+"-max", *ptr);
607  node.put (name+"-sum", *ptr);
608 }
609 
610 std::shared_ptr<std::ostream> Profiler::get_default_output_stream() {
611  char filename[PATH_MAX];
612  strftime(filename, sizeof (filename) - 1, "profiler_info_%y.%m.%d_%H-%M-%S.log.json", localtime(&start_time));
613  json_filepath = FilePath(string(filename), FilePath::output_file);
614 
615  //LogOut() << "output into: " << json_filepath << std::endl;
616  return make_shared<ofstream>(json_filepath.c_str());
617 }
618 
619 
620 #ifdef FLOW123D_HAVE_MPI
621 template <class T>
622 void save_mpi_metric (property_tree::ptree &node, MPI_Comm comm, T * ptr, string name) {
623  node.put (name+"-min", MPI_Functions::min(ptr, comm));
624  node.put (name+"-max", MPI_Functions::max(ptr, comm));
625  node.put (name+"-sum", MPI_Functions::sum(ptr, comm));
626 }
627 
628 void Profiler::output(MPI_Comm comm, ostream &os) {
629  int mpi_rank, mpi_size;
630  //wait until profiling on all processors is finished
631  MPI_Barrier(comm);
632  stop_timer(0);
633  propagate_timers();
634 
635  // stop monitoring memory
636  bool temp_memory_monitoring = global_monitor_memory;
637  set_memory_monitoring(false, petsc_monitor_memory);
638 
639  chkerr( MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank) );
640  MPI_Comm_size(comm, &mpi_size);
641 
642  // output header
643  property_tree::ptree root, children;
644  output_header (root, mpi_size);
645 
646  // recursively add all timers info
647  // define lambda function which reduces timer from multiple processors
648  // MPI implementation uses MPI call to reduce values
649  auto reduce = [=] (Timer &timer, property_tree::ptree &node) -> double {
650  int call_count = timer.call_count;
651  double cumul_time = timer.cumulative_time ();
652 
653  long memory_allocated = (long)timer.total_allocated_;
654  long memory_deallocated = (long)timer.total_deallocated_;
655  long memory_peak = (long)timer.max_allocated_;
656 
657  int alloc_called = timer.alloc_called;
658  int dealloc_called = timer.dealloc_called;
659 
660 
661  save_mpi_metric<double>(node, comm, &cumul_time, "cumul-time");
662  save_mpi_metric<int>(node, comm, &call_count, "call-count");
663 
664  save_mpi_metric<long>(node, comm, &memory_allocated, "memory-alloc");
665  save_mpi_metric<long>(node, comm, &memory_deallocated, "memory-dealloc");
666  save_mpi_metric<long>(node, comm, &memory_peak, "memory-peak");
667  //
668  save_mpi_metric<int>(node, comm, &alloc_called, "memory-alloc-called");
669  save_mpi_metric<int>(node, comm, &dealloc_called, "memory-dealloc-called");
670 
671 #ifdef FLOW123D_HAVE_PETSC
672  long petsc_memory_difference = (long)timer.petsc_memory_difference;
673  long petsc_peak_memory = (long)timer.petsc_peak_memory;
674  save_mpi_metric<long>(node, comm, &petsc_memory_difference, "memory-petsc-diff");
675  save_mpi_metric<long>(node, comm, &petsc_peak_memory, "memory-petsc-peak");
676 #endif // FLOW123D_HAVE_PETSC
677 
678  return MPI_Functions::sum(&cumul_time, comm);
679  };
680 
681  add_timer_info (reduce, &children, 0, 0.0);
682  root.add_child ("children", children);
683 
684 
685  // create profiler output only once (on the first processor)
686  // only active communicator should be the one with mpi_rank 0
687  if (mpi_rank == 0) {
688  try {
689  /**
690  * Flag to property_tree::write_json method
691  * resulting in json human readable format (indents, newlines)
692  */
693  const int FLOW123D_JSON_HUMAN_READABLE = 1;
694  // write result to stream
695  property_tree::write_json (os, root, FLOW123D_JSON_HUMAN_READABLE);
696  } catch (property_tree::json_parser::json_parser_error & e) {
697  stringstream ss;
698  ss << "Throw json_parser_error: " << e.message() << "\nIn file: " << e.filename() << ", at line " << e.line() << "\n";
699  THROW( ExcMessage() << EI_Message(ss.str()) );
700  }
701  }
702  // restore memory monitoring
703  set_memory_monitoring(temp_memory_monitoring, petsc_monitor_memory);
704 }
705 
706 
707 void Profiler::output(MPI_Comm comm) {
708  int mpi_rank;
709  chkerr( MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank) );
710  if (mpi_rank == 0) {
711  output(comm, *get_default_output_stream());
712  } else {
713  ostringstream os;
714  output(comm, os );
715  }
716 }
717 
718 #endif /* FLOW123D_HAVE_MPI */
719 
720 void Profiler::output(ostream &os) {
721  // last update
722  stop_timer(0);
723  propagate_timers();
724 
725  // output header
726  property_tree::ptree root, children;
727  /**
728  * Constant representing number of MPI processes
729  * where there is no MPI to work with (so 1 process)
730  */
731  const int FLOW123D_MPI_SINGLE_PROCESS = 1;
732  output_header (root, FLOW123D_MPI_SINGLE_PROCESS);
733 
734 
735  // recursively add all timers info
736  // define lambda function which reduces timer from multiple processors
737  // non-MPI implementation is just dummy repetition of initial value
738  auto reduce = [=] (Timer &timer, property_tree::ptree &node) -> double {
739  int call_count = timer.call_count;
740  double cumul_time = timer.cumulative_time ();
741 
742  long memory_allocated = (long)timer.total_allocated_;
743  long memory_deallocated = (long)timer.total_deallocated_;
744  long memory_peak = (long)timer.max_allocated_;
745 
746  int alloc_called = timer.alloc_called;
747  int dealloc_called = timer.dealloc_called;
748 
749  save_nonmpi_metric<double>(node, &cumul_time, "cumul-time");
750  save_nonmpi_metric<int>(node, &call_count, "call-count");
751 
752  save_nonmpi_metric<long>(node, &memory_allocated, "memory-alloc");
753  save_nonmpi_metric<long>(node, &memory_deallocated, "memory-dealloc");
754  save_nonmpi_metric<long>(node, &memory_peak, "memory-peak");
755 
756  save_nonmpi_metric<int>(node, &alloc_called, "memory-alloc-called");
757  save_nonmpi_metric<int>(node, &dealloc_called, "memory-dealloc-called");
758 
759 #ifdef FLOW123D_HAVE_PETSC
760  long petsc_memory_difference = (long)timer.petsc_memory_difference;
761  long petsc_peak_memory = (long)timer.petsc_peak_memory;
762  save_nonmpi_metric<long>(node, &petsc_memory_difference, "memory-petsc-diff");
763  save_nonmpi_metric<long>(node, &petsc_peak_memory, "memory-petsc-peak");
764 #endif // FLOW123D_HAVE_PETSC
765 
766  return cumul_time;
767  };
768 
769  add_timer_info (reduce, &children, 0, 0.0);
770  root.add_child ("children", children);
771 
772 
773  try {
774  /**
775  * Flag to property_tree::write_json method
776  * resulting in json human readable format (indents, newlines)
777  */
778  const int FLOW123D_JSON_HUMAN_READABLE = 1;
779  // write result to stream
780  property_tree::write_json (os, root, FLOW123D_JSON_HUMAN_READABLE);
781  } catch (property_tree::json_parser::json_parser_error & e) {
782  stringstream ss;
783  ss << "Throw json_parser_error: " << e.message() << "\nIn file: " << e.filename() << ", at line " << e.line() << "\n";
784  THROW( ExcMessage() << EI_Message(ss.str()) );
785  }
786 }
787 
788 
789 void Profiler::output() {
790  output(*get_default_output_stream());
791 }
792 
793 void Profiler::output_header (property_tree::ptree &root, int mpi_size) {
794  time_t end_time = time(NULL);
795 
796  const char format[] = "%x %X";
797  char start_time_string[BUFSIZ] = {0};
798  strftime(start_time_string, sizeof (start_time_string) - 1, format, localtime(&start_time));
799 
800  char end_time_string[BUFSIZ] = {0};
801  strftime(end_time_string, sizeof (end_time_string) - 1, format, localtime(&end_time));
802 
803  // generate current run details
804 
805  root.put ("program-name", flow_name_);
806  root.put ("program-version", flow_version_);
807  root.put ("program-branch", flow_branch_);
808  root.put ("program-revision", flow_revision_);
809  root.put ("program-build", flow_build_);
810  root.put ("timer-resolution", boost::format("%1.9f") % Profiler::get_resolution());
811  // if constant FLOW123D_SOURCE_DIR is defined, we add this information to profiler (later purposes)
812  #ifdef FLOW123D_SOURCE_DIR
813  #endif
814 
815  // print some information about the task at the beginning
816  root.put ("task-description", task_description_);
817  root.put ("task-size", task_size_);
818 
819  //print some information about the task at the beginning
820  root.put ("run-process-count", mpi_size);
821  root.put ("run-started-at", start_time_string);
822  root.put ("run-finished-at", end_time_string);
823 }
824 
825 #ifdef FLOW123D_HAVE_PYTHON
826 void Profiler::transform_profiler_data (const string &output_file_suffix, const string &formatter) {
827 
828  if (json_filepath=="") return;
829 
830  // error under CYGWIN environment : more details in this repo
831  // https://github.com/x3mSpeedy/cygwin-python-issue
832  //
833  // For now we only support profiler report conversion in UNIX environment
834  // Windows users will have to use a python script located in bin folder
835  //
836 
837  #ifndef FLOW123D_HAVE_CYGWIN
838  // grab module and function by importing module profiler_formatter_module.py
839  PyObject * python_module = PythonLoader::load_module_by_name ("profiler.profiler_formatter_module");
840  //
841  // def convert (json_location, output_file, formatter):
842  //
843  PyObject * convert_method = PythonLoader::get_callable (python_module, "convert" );
844 
845  int argument_index = 0;
846  PyObject * arguments = PyTuple_New (3);
847 
848  // set json path location as first argument
849  PyObject * tmp = PyUnicode_FromString (json_filepath.c_str());
850  PyTuple_SetItem (arguments, argument_index++, tmp);
851 
852  // set output path location as second argument
853  tmp = PyUnicode_FromString ((json_filepath + output_file_suffix).c_str());
854  PyTuple_SetItem (arguments, argument_index++, tmp);
855 
856  // set Formatter class as third value
857  tmp = PyUnicode_FromString (formatter.c_str());
858  PyTuple_SetItem (arguments, argument_index++, tmp);
859 
860  // execute method with arguments
861  PyObject_CallObject (convert_method, arguments);
862 
863  PythonLoader::check_error();
864 
865  #else
866 
867  // print information about windows-cygwin issue and offer manual solution
868  MessageOut() << "# Note: converting json profiler reports is not"
869  << " supported under Windows or Cygwin environment for now.\n"
870  << "# You can use python script located in bin/python folder"
871  << " in order to convert json report to txt or csv format.\n"
872  << "python profiler_formatter_script.py --input \"" << json_filepath
873  << "\" --output \"profiler.txt\"" << std::endl;
874  #endif // FLOW123D_HAVE_CYGWIN
875 }
876 #else
877 void Profiler::transform_profiler_data (const string &output_file_suffix, const string &formatter) {
878 }
879 
880 #endif // FLOW123D_HAVE_PYTHON
881 
882 
883 void Profiler::uninitialize() {
884  if (_instance) {
885  // ASSERT(_instance->actual_node==0)(_instance->timers_[_instance->actual_node].tag())
886  // .error("Forbidden to uninitialize the Profiler when actual timer is not zero.");
887  _instance->stop_timer(0);
888  set_memory_monitoring(false, false);
889  delete _instance;
890  _instance = NULL;
891  }
892 }
893 bool Profiler::global_monitor_memory = false;
894 bool Profiler::petsc_monitor_memory = true;
895 void Profiler::set_memory_monitoring(const bool global_monitor, const bool petsc_monitor) {
896  global_monitor_memory = global_monitor;
897  petsc_monitor_memory = petsc_monitor;
898 }
899 
900 bool Profiler::get_global_memory_monitoring() {
901  return global_monitor_memory;
902 }
903 
904 bool Profiler::get_petsc_memory_monitoring() {
905  return petsc_monitor_memory;
906 }
907 
908 unordered_map_with_alloc & MemoryAlloc::malloc_map() {
909  static unordered_map_with_alloc static_malloc_map;
910  return static_malloc_map;
911 }
912 
913 void * Profiler::operator new (size_t size) {
914  return malloc (size);
915 }
916 
917 void Profiler::operator delete (void* p) {
918  free(p);
919 }
920 
921 void *operator new (std::size_t size) OPERATOR_NEW_THROW_EXCEPTION {
922  void * p = malloc(size);
923  Profiler::instance()->notify_malloc(size, (long)p);
924  return p;
925 }
926 
927 void *operator new[] (std::size_t size) OPERATOR_NEW_THROW_EXCEPTION {
928  void * p = malloc(size);
929  Profiler::instance()->notify_malloc(size, (long)p);
930  return p;
931 }
932 
933 void *operator new[] (std::size_t size, const std::nothrow_t&) throw() {
934  void * p = malloc(size);
935  Profiler::instance()->notify_malloc(size, (long)p);
936  return p;
937 }
938 
939 void operator delete( void *p) throw() {
940  Profiler::instance()->notify_free((long)p);
941  free(p);
942 }
943 
944 void operator delete[]( void *p) throw() {
945  Profiler::instance()->notify_free((long)p);
946  free(p);
947 }
948 
949 #else // def FLOW123D_DEBUG_PROFILER
950 
952  initialize();
953  return _instance;
954  }
955 
957 
959  if (_instance == NULL) {
960  _instance = new Profiler();
961  // set_memory_monitoring(true, true);
962  }
963 }
964 
966  if (_instance) {
967  // ASSERT(_instance->actual_node==0)(_instance->timers_[_instance->actual_node].tag())
968  // .error("Forbidden to uninitialize the Profiler when actual timer is not zero.");
969  // set_memory_monitoring(false, false);
970  // _instance->stop_timer(0);
971  delete _instance;
972  _instance = NULL;
973  }
974 }
975 
976 
977 #endif // def FLOW123D_DEBUG_PROFILER
MPI_MIN
#define MPI_MIN
Definition: mpi.h:198
python_loader.hh
std::swap
void swap(nlohmann::json &j1, nlohmann::json &j2) noexcept(is_nothrow_move_constructible< nlohmann::json >::value and is_nothrow_move_assignable< nlohmann::json >::value)
exchanges the values of two JSON objects
Definition: json.hpp:8688
ASSERT
#define ASSERT(expr)
Allow use shorter versions of macro names if these names is not used with external library.
Definition: asserts.hh:346
OPERATOR_NEW_THROW_EXCEPTION
#define OPERATOR_NEW_THROW_EXCEPTION
Definition: system.hh:53
file_path.hh
Profiler::initialize
static void initialize()
Definition: sys_profiler.cc:958
FilePath
Dedicated class for storing path to input and output files.
Definition: file_path.hh:54
MPI_MAX
#define MPI_MAX
Definition: mpi.h:197
chkerr
void chkerr(unsigned int ierr)
Replacement of new/delete operator in the spirit of xmalloc.
Definition: system.hh:147
Profiler::set_task_info
void set_task_info(string description, int size)
Definition: sys_profiler.hh:853
MPI_DOUBLE
#define MPI_DOUBLE
Definition: mpi.h:156
THROW
#define THROW(whole_exception_expr)
Wrapper for throw. Saves the throwing point.
Definition: exceptions.hh:53
system.hh
Profiler::uninitialize
static void uninitialize()
Definition: sys_profiler.cc:965
CONSTEXPR_
#define CONSTEXPR_
Definition: sys_profiler.hh:90
fmt::format
std::string format(CStringRef format_str, ArgList args)
Definition: format.h:3141
Profiler::get_resolution
double get_resolution() const
Definition: sys_profiler.hh:869
ASSERT_LT
#define ASSERT_LT(a, b)
Definition of comparative assert macro (Less Than)
Definition: asserts.hh:295
MPI_Comm_rank
#define MPI_Comm_rank
Definition: mpi.h:236
Timer
Definition: memory.cc:33
MPI_Comm_size
#define MPI_Comm_size
Definition: mpi.h:235
TimePoint
Definition: time_point.hh:70
Profiler
Definition: sys_profiler.hh:848
sys_profiler.hh
Timer::start
void start()
Definition: memory.cc:39
mpi.h
operator<<
STREAM & operator<<(STREAM &s, UpdateFlags u)
Definition: update_flags.hh:164
MPI_Functions::sum
static int sum(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:50
Profiler::notify_free
void notify_free(const size_t size)
Definition: sys_profiler.hh:859
ASSERT_EQ
#define ASSERT_EQ(a, b)
Definition of comparative assert macro (EQual)
Definition: asserts.hh:327
MPI_Reduce
#define MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm)
Definition: mpi.h:608
MPI_INT
#define MPI_INT
Definition: mpi.h:160
MPI_SUM
#define MPI_SUM
Definition: mpi.h:196
MPI_Comm
int MPI_Comm
Definition: mpi.h:141
time_point.hh
Profiler::notify_malloc
void notify_malloc(const size_t size)
Definition: sys_profiler.hh:857
Profiler::Profiler
Profiler()
Definition: sys_profiler.hh:880
Profiler::transform_profiler_data
void transform_profiler_data(const string &output_file_suffix, const string &formatter)
Definition: sys_profiler.hh:867
WarningOut
#define WarningOut()
Macro defining 'warning' record of log.
Definition: logger.hh:246
MPI_COMM_WORLD
#define MPI_COMM_WORLD
Definition: mpi.h:123
Profiler::instance
static Profiler * instance()
Definition: sys_profiler.cc:951
MPI_Functions::max
static int max(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:86
FilePath::output_file
@ output_file
Definition: file_path.hh:69
Profiler::output
void output()
Definition: sys_profiler.hh:865
Profiler::set_program_info
void set_program_info(string program_name, string program_version, string branch, string revision, string build)
Definition: sys_profiler.hh:855
Profiler::_instance
static Profiler * _instance
Definition: sys_profiler.hh:879
MPI_LONG
#define MPI_LONG
Definition: mpi.h:161
MPI_Barrier
#define MPI_Barrier(comm)
Definition: mpi.h:531
Timer::stop
void stop()
Definition: memory.cc:42
MPI_Functions::min
static int min(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:68
MessageOut
#define MessageOut()
Macro defining 'message' record of log.
Definition: logger.hh:243