Flow123d  JS_before_hm-981-g3f34df1
sys_profiler.cc
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file sys_profiler.cc
15  * @ingroup system
16  * @brief Profiler
17  */
18 
19 
20 // Fat header
21 
22 #include <fstream>
23 #include <iomanip>
24 #include <sys/param.h>
25 #include <unordered_map>
26 
27 #ifdef FLOW123D_HAVE_PYTHON
28  #include "Python.h"
29 #endif // FLOW123D_HAVE_PYTHON
30 
31 #include "sys_profiler.hh"
32 #include "system/system.hh"
33 #include "system/python_loader.hh"
34 #include <iostream>
35 #include <boost/format.hpp>
36 
37 #include "system/file_path.hh"
38 #include "system/python_loader.hh"
39 #include "mpi.h"
40 #include "time_point.hh"
41 
42 /*
43  * These should be replaced by using boost MPI interface
44  */
45 int MPI_Functions::sum(int* val, MPI_Comm comm) {
46  int total = 0;
47  MPI_Reduce(val, &total, 1, MPI_INT, MPI_SUM, 0, comm);
48  return total;
49  }
50 
51 double MPI_Functions::sum(double* val, MPI_Comm comm) {
52  double total = 0;
53  MPI_Reduce(val, &total, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
54  return total;
55  }
56 
57 long MPI_Functions::sum(long* val, MPI_Comm comm) {
58  long total = 0;
59  MPI_Reduce(val, &total, 1, MPI_LONG, MPI_SUM, 0, comm);
60  return total;
61  }
62 
63 int MPI_Functions::min(int* val, MPI_Comm comm) {
64  int min = 0;
65  MPI_Reduce(val, &min, 1, MPI_INT, MPI_MIN, 0, comm);
66  return min;
67  }
68 
69 double MPI_Functions::min(double* val, MPI_Comm comm) {
70  double min = 0;
71  MPI_Reduce(val, &min, 1, MPI_DOUBLE, MPI_MIN, 0, comm);
72  return min;
73  }
74 
75 long MPI_Functions::min(long* val, MPI_Comm comm) {
76  long min = 0;
77  MPI_Reduce(val, &min, 1, MPI_LONG, MPI_MIN, 0, comm);
78  return min;
79  }
80 
81 int MPI_Functions::max(int* val, MPI_Comm comm) {
82  int max = 0;
83  MPI_Reduce(val, &max, 1, MPI_INT, MPI_MAX, 0, comm);
84  return max;
85  }
86 
87 double MPI_Functions::max(double* val, MPI_Comm comm) {
88  double max = 0;
89  MPI_Reduce(val, &max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
90  return max;
91  }
92 
93 long MPI_Functions::max(long* val, MPI_Comm comm) {
94  long max = 0;
95  MPI_Reduce(val, &max, 1, MPI_LONG, MPI_MAX, 0, comm);
96  return max;
97  }
98 
99 
100 #ifdef FLOW123D_DEBUG_PROFILER
101 /*********************************************************************************************
102  * Implementation of class Timer
103  */
104 
105 const int timer_no_child=-1;
106 
107 Timer::Timer(const CodePoint &cp, int parent)
108 : start_time(TimePoint()),
109  cumul_time(0.0),
110  call_count(0),
111  start_count(0),
112  code_point_(&cp),
113  full_hash_(cp.hash_),
114  hash_idx_(cp.hash_idx_),
115  parent_timer(parent),
116  total_allocated_(0),
117  total_deallocated_(0),
118  max_allocated_(0),
119  current_allocated_(0),
120  alloc_called(0),
121  dealloc_called(0)
122 #ifdef FLOW123D_HAVE_PETSC
123 , petsc_start_memory(0),
124  petsc_end_memory (0),
125  petsc_memory_difference(0),
126  petsc_peak_memory(0),
127  petsc_local_peak_memory(0)
128 #endif // FLOW123D_HAVE_PETSC
129 {
130  for(unsigned int i=0; i< max_n_childs ;i++) child_timers[i]=timer_no_child;
131 }
132 
133 
134 /**
135  * Debug information of the timer
136  */
137 ostream & operator <<(ostream& os, const Timer& timer) {
138  os << " Timer: " << timer.tag() << endl;
139  os << " malloc: " << timer.total_allocated_ << endl;
140  os << " dalloc: " << timer.total_deallocated_ << endl;
141  #ifdef FLOW123D_HAVE_PETSC
142  os << " start: " << timer.petsc_start_memory << endl;
143  os << " stop : " << timer.petsc_end_memory << endl;
144  os << " diff : " << timer.petsc_memory_difference << " (" << timer.petsc_end_memory - timer.petsc_start_memory << ")" << endl;
145  os << " peak : " << timer.petsc_peak_memory << " (" << timer.petsc_local_peak_memory << ")" << endl;
146  #endif // FLOW123D_HAVE_PETSC
147  os << endl;
148  return os;
149 }
150 
151 
152 double Timer::cumulative_time() const {
153  return cumul_time;
154 }
155 
156 void Profiler::accept_from_child(Timer &parent, Timer &child) {
157  int child_timer = 0;
158  for (unsigned int i = 0; i < Timer::max_n_childs; i++) {
159  child_timer = child.child_timers[i];
160  if (child_timer != timer_no_child) {
161  // propagate metrics from child to parent
162  accept_from_child(child, timers_[child_timer]);
163  }
164  }
165  // compute totals by adding values from child
166  parent.total_allocated_ += child.total_allocated_;
167  parent.total_deallocated_ += child.total_deallocated_;
168  parent.alloc_called += child.alloc_called;
169  parent.dealloc_called += child.dealloc_called;
170 
171 #ifdef FLOW123D_HAVE_PETSC
172  if (petsc_monitor_memory) {
173  // add differences from child
174  parent.petsc_memory_difference += child.petsc_memory_difference;
175  parent.current_allocated_ += child.current_allocated_;
176 
177  // when computing maximum, we take greater value from parent and child
178  // PetscMemoryGetCurrentUsage always return absolute (not relative) value
179  parent.petsc_peak_memory = max(parent.petsc_peak_memory, child.petsc_peak_memory);
180  }
181 #endif // FLOW123D_HAVE_PETSC
182 
183  parent.max_allocated_ = max(parent.max_allocated_, child.max_allocated_);
184 }
185 
186 
187 void Timer::pause() {
188 #ifdef FLOW123D_HAVE_PETSC
189  if (Profiler::get_petsc_memory_monitoring()) {
190  // get the maximum resident set size (memory used) for the program.
191  PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
192  if (petsc_peak_memory < petsc_local_peak_memory)
193  petsc_peak_memory = petsc_local_peak_memory;
194  }
195 #endif // FLOW123D_HAVE_PETSC
196 }
197 
198 void Timer::resume() {
199 #ifdef FLOW123D_HAVE_PETSC
200  if (Profiler::get_petsc_memory_monitoring()) {
201  // tell PETSc to monitor the maximum memory usage so
202  // that PetscMemoryGetMaximumUsage() will work.
203  PetscMemorySetGetMaximumUsage();
204  }
205 #endif // FLOW123D_HAVE_PETSC
206 }
207 
208 void Timer::start() {
209 #ifdef FLOW123D_HAVE_PETSC
210  if (Profiler::get_petsc_memory_monitoring()) {
211  // Tell PETSc to monitor the maximum memory usage so
212  // that PetscMemoryGetMaximumUsage() will work.
213  PetscMemorySetGetMaximumUsage();
214  PetscMemoryGetCurrentUsage (&petsc_start_memory);
215  }
216 #endif // FLOW123D_HAVE_PETSC
217 
218  if (start_count == 0) {
219  start_time = TimePoint();
220  }
221  call_count++;
222  start_count++;
223 }
224 
225 
226 
227 bool Timer::stop(bool forced) {
228 #ifdef FLOW123D_HAVE_PETSC
229  if (Profiler::get_petsc_memory_monitoring()) {
230  // get current memory usage
231  PetscMemoryGetCurrentUsage (&petsc_end_memory);
232  petsc_memory_difference += petsc_end_memory - petsc_start_memory;
233 
234  // get the maximum resident set size (memory used) for the program.
235  PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
236  if (petsc_peak_memory < petsc_local_peak_memory)
237  petsc_peak_memory = petsc_local_peak_memory;
238  }
239 #endif // FLOW123D_HAVE_PETSC
240 
241  if (forced) start_count=1;
242 
243  if (start_count == 1) {
244  cumul_time += (TimePoint() - start_time);
245  start_count--;
246  return true;
247  } else {
248  start_count--;
249  }
250  return false;
251 }
252 
253 
254 
255 void Timer::add_child(int child_index, const Timer &child)
256 {
257  unsigned int idx = child.hash_idx_;
258  if (child_timers[idx] != timer_no_child) {
259  // hash collision, find first empty place
260  unsigned int i=idx;
261  do {
262  i=( i < max_n_childs ? i+1 : 0);
263  } while (i!=idx && child_timers[i] != timer_no_child);
264  ASSERT(i!=idx)(tag()).error("Too many children of the timer");
265  idx=i;
266  }
267  //DebugOut().fmt("Adding child {} at index: {}\n", child_index, idx);
268  child_timers[idx] = child_index;
269 }
270 
271 
272 
273 string Timer::code_point_str() const {
274  return fmt::format("{%s}:{%d}, {%s}()",
275  code_point_->file_, code_point_->line_, code_point_->func_ );
276 }
277 
278 
279 /***********************************************************************************************
280  * Implementation of Profiler
281  */
282 
283 
284 Profiler * Profiler::instance(bool clear) {
285  static Profiler * _instance = NULL;
286 
287  if (clear) {
288  if (_instance != NULL) {
289  delete _instance;
290  _instance = NULL;
291  }
292  return _instance;
293  }
294 
295  if (_instance == NULL) {
296  MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
297  _instance = new Profiler();
298  }
299 
300  return _instance;
301  }
302 
303 
304 // static CONSTEXPR_ CodePoint main_cp = CODE_POINT("Whole Program");
305 const long Profiler::malloc_map_reserve = 100 * 1000;
306 
308 : actual_node(0),
309  task_size_(1),
310  start_time( time(NULL) ),
311  json_filepath("")
312 
313 {
314  static CONSTEXPR_ CodePoint main_cp = CODE_POINT("Whole Program");
315  set_memory_monitoring(true, true);
316 #ifdef FLOW123D_DEBUG_PROFILER
317  MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
318  timers_.push_back( Timer(main_cp, 0) );
319  timers_[0].start();
320 #endif
321 }
322 
323 
324 
325 void Profiler::propagate_timers() {
326  for (unsigned int i = 0; i < Timer::max_n_childs; i++) {
327  unsigned int child_timer = timers_[0].child_timers[i];
328  if ((signed int)child_timer != timer_no_child) {
329  // propagate metrics from child to Whole-Program time-frame
330  accept_from_child(timers_[0], timers_[child_timer]);
331  }
332  }
333 }
334 
335 
336 
337 void Profiler::set_task_info(string description, int size) {
338  task_description_ = description;
339  task_size_ = size;
340 }
341 
342 
343 
344 void Profiler::set_program_info(string program_name, string program_version, string branch, string revision, string build) {
345  flow_name_ = program_name;
346  flow_version_ = program_version;
347  flow_branch_ = branch;
348  flow_revision_ = revision;
349  flow_build_ = build;
350 }
351 
352 
353 
354 int Profiler::start_timer(const CodePoint &cp) {
355  unsigned int parent_node = actual_node;
356  //DebugOut().fmt("Start timer: {}\n", cp.tag_);
357  int child_idx = find_child(cp);
358  if (child_idx < 0) {
359  //DebugOut().fmt("Adding timer: {}\n", cp.tag_);
360  // tag not present - create new timer
361  child_idx=timers_.size();
362  timers_.push_back( Timer(cp, actual_node) );
363  timers_[actual_node].add_child(child_idx , timers_.back() );
364  }
365  actual_node=child_idx;
366 
367  // pause current timer
368  timers_[parent_node].pause();
369 
370  timers_[actual_node].start();
371 
372  return actual_node;
373 }
374 
375 
376 
377 int Profiler::find_child(const CodePoint &cp) {
378  Timer &timer =timers_[actual_node];
379  unsigned int idx = cp.hash_idx_;
380  unsigned int child_idx;
381  do {
382  if (timer.child_timers[idx] == timer_no_child) break; // tag is not there
383 
384  child_idx=timer.child_timers[idx];
385  ASSERT_LT(child_idx, timers_.size()).error();
386  if (timers_[child_idx].full_hash_ == cp.hash_) return child_idx;
387  idx = ( (unsigned int)(idx)==(Timer::max_n_childs - 1) ? 0 : idx+1 );
388  } while ( (unsigned int)(idx) != cp.hash_idx_ ); // passed through whole array
389  return -1;
390 }
391 
392 
393 
394 void Profiler::stop_timer(const CodePoint &cp) {
395 #ifdef FLOW123D_DEBUG
396  // check that all childrens are closed
397  Timer &timer=timers_[actual_node];
398  for(unsigned int i=0; i < Timer::max_n_childs; i++)
399  if (timer.child_timers[i] != timer_no_child)
400  ASSERT(! timers_[timer.child_timers[i]].running())(timers_[timer.child_timers[i]].tag())(timer.tag())
401  .error("Child timer running while closing timer.");
402 #endif
403  unsigned int child_timer = actual_node;
404  if ( cp.hash_ != timers_[actual_node].full_hash_) {
405  // timer to close is not actual - we search for it above actual
406  for(unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer) {
407  if ( cp.hash_ == timers_[node].full_hash_) {
408  // found above - close all nodes between
409  for(; (unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
410  WarningOut() << "Timer to close '" << cp.tag_ << "' do not match actual timer '"
411  << timers_[actual_node].tag() << "'. Force closing actual." << std::endl;
412  timers_[actual_node].stop(true);
413  }
414  // close 'node' itself
415  timers_[actual_node].stop(false);
416  actual_node = timers_[actual_node].parent_timer;
417 
418  // actual_node == child_timer indicates this is root
419  if (actual_node == child_timer)
420  return;
421 
422  // resume current timer
423  timers_[actual_node].resume();
424  return;
425  }
426  }
427  // node not found - do nothing
428  return;
429  }
430  // node to close match the actual
431  timers_[actual_node].stop(false);
432  actual_node = timers_[actual_node].parent_timer;
433 
434  // actual_node == child_timer indicates this is root
435  if (actual_node == child_timer)
436  return;
437 
438  // resume current timer
439  timers_[actual_node].resume();
440 }
441 
442 
443 
444 void Profiler::stop_timer(int timer_index) {
445  // stop_timer with CodePoint type
446  // timer which is still running MUST be the same as actual_node index
447  // if timer is not running index will differ
448  if (timers_[timer_index].running()) {
449  ASSERT_EQ(timer_index, (int)actual_node).error();
450  stop_timer(*timers_[timer_index].code_point_);
451  }
452 
453 }
454 
455 
456 
457 void Profiler::add_calls(unsigned int n_calls) {
458  timers_[actual_node].call_count += n_calls-1;
459 }
460 
461 
462 
463 void Profiler::notify_malloc(const size_t size, const long p) {
464  if (!global_monitor_memory)
465  return;
466 
467  MemoryAlloc::malloc_map()[p] = static_cast<int>(size);
468  timers_[actual_node].total_allocated_ += size;
469  timers_[actual_node].current_allocated_ += size;
470  timers_[actual_node].alloc_called++;
471 
472  if (timers_[actual_node].current_allocated_ > timers_[actual_node].max_allocated_)
473  timers_[actual_node].max_allocated_ = timers_[actual_node].current_allocated_;
474 }
475 
476 
477 
478 void Profiler::notify_free(const long p) {
479  if (!global_monitor_memory)
480  return;
481 
482  int size = sizeof(p);
483  if (MemoryAlloc::malloc_map()[(long)p] > 0) {
484  size = MemoryAlloc::malloc_map()[(long)p];
485  MemoryAlloc::malloc_map().erase((long)p);
486  }
487  timers_[actual_node].total_deallocated_ += size;
488  timers_[actual_node].current_allocated_ -= size;
489  timers_[actual_node].dealloc_called++;
490 }
491 
492 
493 double Profiler::get_resolution () {
494  const int measurements = 100;
495  double result = 0;
496 
497  // perform 100 measurements
498  for (unsigned int i = 1; i < measurements; i++) {
499  TimePoint t1 = TimePoint ();
500  TimePoint t2 = TimePoint ();
501 
502  // double comparison should be avoided
503  while ((t2 - t1) == 0) t2 = TimePoint ();
504  // while ((t2.ticks - t1.ticks) == 0) t2 = TimePoint ();
505 
506  result += t2 - t1;
507  }
508 
509  return (result / measurements) * 1000; // ticks to seconds to microseconds conversion
510 }
511 
512 
513 std::string common_prefix( std::string a, std::string b ) {
514  if( a.size() > b.size() ) std::swap(a,b) ;
515  return std::string( a.begin(), std::mismatch( a.begin(), a.end(), b.begin() ).first ) ;
516 }
517 
518 
519 
520 template<typename ReduceFunctor>
521 void Profiler::add_timer_info(ReduceFunctor reduce, nlohmann::json* holder, int timer_idx, double parent_time) {
522 
523  // get timer and check preconditions
524  Timer &timer = timers_[timer_idx];
525  ASSERT(timer_idx >=0)(timer_idx).error("Wrong timer index.");
526  ASSERT(timer.parent_timer >=0).error("Inconsistent tree.");
527 
528  // fix path
529  string filepath = timer.code_point_->file_;
530 
531  // if constant FLOW123D_SOURCE_DIR is defined, we try to erase it from beginning of each CodePoint's filepath
532  #ifdef FLOW123D_SOURCE_DIR
533  string common_path = common_prefix (string(FLOW123D_SOURCE_DIR), filepath);
534  filepath.erase (0, common_path.size());
535  #endif
536 
537 
538  // generate node representing this timer
539  // add basic information
540  nlohmann::json node;
541  double cumul_time_sum;
542  node["tag"] = timer.tag();
543  node["file-path"] = filepath;
544  node["file-line"] = timer.code_point_->line_;
545  node["function"] = timer.code_point_->func_;
546  cumul_time_sum = reduce(timer, node);
547 
548 
549  // statistical info
550  if (timer_idx == 0) parent_time = cumul_time_sum;
551  double percent = parent_time > 1.0e-10 ? cumul_time_sum / parent_time * 100.0 : 0.0;
552  node["percent"] = percent;
553 
554  // write times children timers using secured child_timers array
555  nlohmann::json children;
556  bool has_children = false;
557  for (unsigned int i = 0; i < Timer::max_n_childs; i++) {
558  if (timer.child_timers[i] != timer_no_child) {
559  add_timer_info (reduce, &children, timer.child_timers[i], cumul_time_sum);
560  /*
561  if (child_timers[i] != timer_no_child) {
562  add_timer_info (reduce, &children, child_timers[i], cumul_time_sum); */
563  has_children = true;
564  }
565  }
566 
567  // add children tag and other info if present
568  if (has_children) {
569  node["children"] = children;
570  }
571 
572  // add to the array
573  holder->push_back(node);
574 }
575 
576 
577 template <class T>
578 void save_nonmpi_metric (nlohmann::json &node, T * ptr, string name) {
579  node[name + "-min"] = *ptr;
580  node[name + "-max"] = *ptr;
581  node[name + "-sum"] = *ptr;
582 }
583 
584 std::shared_ptr<std::ostream> Profiler::get_default_output_stream() {
585  json_filepath = FilePath("profiler_info.log.json", FilePath::output_file);
586 
587  //LogOut() << "output into: " << json_filepath << std::endl;
588  return make_shared<ofstream>(json_filepath.c_str());
589 }
590 
591 
592 #ifdef FLOW123D_HAVE_MPI
593 template <class T>
594 void save_mpi_metric (nlohmann::json &node, MPI_Comm comm, T * ptr, string name) {
595  node[name + "-min"] = MPI_Functions::min(ptr, comm);
596  node[name + "-max"] = MPI_Functions::max(ptr, comm);
597  node[name + "-sum"] = MPI_Functions::sum(ptr, comm);
598 }
599 
600 void Profiler::output(MPI_Comm comm, ostream &os) {
601  int mpi_rank, mpi_size;
602  //wait until profiling on all processors is finished
603  MPI_Barrier(comm);
604  stop_timer(0);
605  propagate_timers();
606 
607  // stop monitoring memory
608  bool temp_memory_monitoring = global_monitor_memory;
609  set_memory_monitoring(false, petsc_monitor_memory);
610 
611  chkerr( MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank) );
612  MPI_Comm_size(comm, &mpi_size);
613 
614  // output header
615  nlohmann::json jsonRoot, jsonChildren;
616 
617  // recursively add all timers info
618  // define lambda function which reduces timer from multiple processors
619  // MPI implementation uses MPI call to reduce values
620  auto reduce = [=] (Timer &timer, nlohmann::json &node) -> double {
621  int call_count = timer.call_count;
622  double cumul_time = timer.cumulative_time ();
623 
624  long memory_allocated = (long)timer.total_allocated_;
625  long memory_deallocated = (long)timer.total_deallocated_;
626  long memory_peak = (long)timer.max_allocated_;
627 
628  int alloc_called = timer.alloc_called;
629  int dealloc_called = timer.dealloc_called;
630 
631 
632  save_mpi_metric<double>(node, comm, &cumul_time, "cumul-time");
633  save_mpi_metric<int>(node, comm, &call_count, "call-count");
634 
635  save_mpi_metric<long>(node, comm, &memory_allocated, "memory-alloc");
636  save_mpi_metric<long>(node, comm, &memory_deallocated, "memory-dealloc");
637  save_mpi_metric<long>(node, comm, &memory_peak, "memory-peak");
638  //
639  save_mpi_metric<int>(node, comm, &alloc_called, "memory-alloc-called");
640  save_mpi_metric<int>(node, comm, &dealloc_called, "memory-dealloc-called");
641 
642 #ifdef FLOW123D_HAVE_PETSC
643  long petsc_memory_difference = (long)timer.petsc_memory_difference;
644  long petsc_peak_memory = (long)timer.petsc_peak_memory;
645  save_mpi_metric<long>(node, comm, &petsc_memory_difference, "memory-petsc-diff");
646  save_mpi_metric<long>(node, comm, &petsc_peak_memory, "memory-petsc-peak");
647 #endif // FLOW123D_HAVE_PETSC
648 
649  return MPI_Functions::sum(&cumul_time, comm);
650  };
651 
652  add_timer_info (reduce, &jsonChildren, 0, 0.0);
653  jsonRoot["children"] = jsonChildren;
654  output_header(jsonRoot, mpi_size);
655 
656 
657  // create profiler output only once (on the first processor)
658  // only active communicator should be the one with mpi_rank 0
659  if (mpi_rank == 0) {
660  try {
661  /**
662  * indent size
663  * results in json human readable format (indents, newlines)
664  */
665  const int FLOW123D_JSON_HUMAN_READABLE = 2;
666  // write result to stream
667  os << jsonRoot.dump(FLOW123D_JSON_HUMAN_READABLE) << endl;
668 
669  } catch (exception & e) {
670  stringstream ss;
671  ss << "nlohmann::json::dump error: " << e.what() << "\n";
672  THROW( ExcMessage() << EI_Message(ss.str()) );
673  }
674  }
675  // restore memory monitoring
676  set_memory_monitoring(temp_memory_monitoring, petsc_monitor_memory);
677 }
678 
679 
680 void Profiler::output(MPI_Comm comm, string profiler_path /* = "" */) {
681  int mpi_rank;
682  chkerr(MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank));
683 
684  if (mpi_rank == 0) {
685  if (profiler_path == "") {
686  output(comm, *get_default_output_stream());
687  } else {
688  json_filepath = profiler_path;
689  std::shared_ptr<std::ostream> os = make_shared<ofstream>(profiler_path.c_str());
690  output(comm, *os);
691  }
692  } else {
693  ostringstream os;
694  output(comm, os);
695  }
696 }
697 
698 #endif /* FLOW123D_HAVE_MPI */
699 
700 void Profiler::output(ostream &os) {
701  // last update
702  stop_timer(0);
703  propagate_timers();
704 
705  // output header
706  nlohmann::json jsonRoot, jsonChildren;
707  /**
708  * Constant representing number of MPI processes
709  * where there is no MPI to work with (so 1 process)
710  */
711  const int FLOW123D_MPI_SINGLE_PROCESS = 1;
712  output_header(jsonRoot, FLOW123D_MPI_SINGLE_PROCESS);
713 
714 
715  // recursively add all timers info
716  // define lambda function which reduces timer from multiple processors
717  // non-MPI implementation is just dummy repetition of initial value
718  auto reduce = [=] (Timer &timer, nlohmann::json &node) -> double {
719  int call_count = timer.call_count;
720  double cumul_time = timer.cumulative_time ();
721 
722  long memory_allocated = (long)timer.total_allocated_;
723  long memory_deallocated = (long)timer.total_deallocated_;
724  long memory_peak = (long)timer.max_allocated_;
725 
726  int alloc_called = timer.alloc_called;
727  int dealloc_called = timer.dealloc_called;
728 
729  save_nonmpi_metric<double>(node, &cumul_time, "cumul-time");
730  save_nonmpi_metric<int>(node, &call_count, "call-count");
731 
732  save_nonmpi_metric<long>(node, &memory_allocated, "memory-alloc");
733  save_nonmpi_metric<long>(node, &memory_deallocated, "memory-dealloc");
734  save_nonmpi_metric<long>(node, &memory_peak, "memory-peak");
735 
736  save_nonmpi_metric<int>(node, &alloc_called, "memory-alloc-called");
737  save_nonmpi_metric<int>(node, &dealloc_called, "memory-dealloc-called");
738 
739 #ifdef FLOW123D_HAVE_PETSC
740  long petsc_memory_difference = (long)timer.petsc_memory_difference;
741  long petsc_peak_memory = (long)timer.petsc_peak_memory;
742  save_nonmpi_metric<long>(node, &petsc_memory_difference, "memory-petsc-diff");
743  save_nonmpi_metric<long>(node, &petsc_peak_memory, "memory-petsc-peak");
744 #endif // FLOW123D_HAVE_PETSC
745 
746  return cumul_time;
747  };
748 
749  add_timer_info(reduce, &jsonChildren, 0, 0.0);
750  jsonRoot["children"] = jsonChildren;
751 
752  try {
753  /**
754  * indent size
755  * results in json human readable format (indents, newlines)
756  */
757  const int FLOW123D_JSON_HUMAN_READABLE = 2;
758  // write result to stream
759  os << jsonRoot.dump(FLOW123D_JSON_HUMAN_READABLE) << endl;
760 
761  } catch (exception & e) {
762  stringstream ss;
763  ss << "nlohmann::json::dump error: " << e.what() << "\n";
764  THROW( ExcMessage() << EI_Message(ss.str()) );
765  }
766 }
767 
768 
769 void Profiler::output(string profiler_path /* = "" */) {
770  if(profiler_path == "") {
771  output(*get_default_output_stream());
772  } else {
773  json_filepath = profiler_path;
774  std::shared_ptr<std::ostream> os = make_shared<ofstream>(profiler_path.c_str());
775  output(*os);
776  }
777 }
778 
779 void Profiler::output_header (nlohmann::json &root, int mpi_size) {
780  time_t end_time = time(NULL);
781 
782  const char format[] = "%x %X";
783  char start_time_string[BUFSIZ] = {0};
784  strftime(start_time_string, sizeof (start_time_string) - 1, format, localtime(&start_time));
785 
786  char end_time_string[BUFSIZ] = {0};
787  strftime(end_time_string, sizeof (end_time_string) - 1, format, localtime(&end_time));
788 
789  // generate current run details
790  root["program-name"] = flow_name_;
791  root["program-version"] = flow_version_;
792  root["program-branch"] = flow_branch_;
793  root["program-revision"] = flow_revision_;
794  root["program-build"] = flow_build_;
795  root["timer-resolution"] = Profiler::get_resolution();
796 
797  // print some information about the task at the beginning
798  root["task-description"] = task_description_;
799  root["task-size"] = task_size_;
800 
801  //print some information about the task at the beginning
802  root["run-process-count"] = mpi_size;
803  root["run-started-at"] = start_time_string;
804  root["run-finished-at"] = end_time_string;
805 }
806 
807 #ifdef FLOW123D_HAVE_PYTHON
808 void Profiler::transform_profiler_data (const string &output_file_suffix, const string &formatter) {
809 
810  if (json_filepath == "") return;
811 
812  // error under CYGWIN environment : more details in this repo
813  // https://github.com/x3mSpeedy/cygwin-python-issue
814  //
815  // For now we only support profiler report conversion in UNIX environment
816  // Windows users will have to use a python script located in bin folder
817  //
818 
819  #ifndef FLOW123D_HAVE_CYGWIN
820  // grab module and function by importing module profiler_formatter_module.py
821  PyObject * python_module = PythonLoader::load_module_by_name ("profiler.profiler_formatter_module");
822  //
823  // def convert (json_location, output_file, formatter):
824  //
825  PyObject * convert_method = PythonLoader::get_callable (python_module, "convert" );
826 
827  int argument_index = 0;
828  PyObject * arguments = PyTuple_New (3);
829 
830  // set json path location as first argument
831  PyObject * tmp = PyUnicode_FromString (json_filepath.c_str());
832  PyTuple_SetItem (arguments, argument_index++, tmp);
833 
834  // set output path location as second argument
835  tmp = PyUnicode_FromString ((json_filepath + output_file_suffix).c_str());
836  PyTuple_SetItem (arguments, argument_index++, tmp);
837 
838  // set Formatter class as third value
839  tmp = PyUnicode_FromString (formatter.c_str());
840  PyTuple_SetItem (arguments, argument_index++, tmp);
841 
842  // execute method with arguments
843  PyObject_CallObject (convert_method, arguments);
844 
845  PythonLoader::check_error();
846 
847  #else
848 
849  // print information about windows-cygwin issue and offer manual solution
850  MessageOut() << "# Note: converting json profiler reports is not"
851  << " supported under Windows or Cygwin environment for now.\n"
852  << "# You can use python script located in bin/python folder"
853  << " in order to convert json report to txt or csv format.\n"
854  << "python profiler_formatter_script.py --input \"" << json_filepath
855  << "\" --output \"profiler.txt\"" << std::endl;
856  #endif // FLOW123D_HAVE_CYGWIN
857 }
858 #else
859 void Profiler::transform_profiler_data (const string &, const string &) {
860 }
861 
862 #endif // FLOW123D_HAVE_PYTHON
863 
864 
865 void Profiler::uninitialize() {
866  if (Profiler::instance()) {
867  ASSERT(Profiler::instance()->actual_node==0)(Profiler::instance()->timers_[Profiler::instance()->actual_node].tag())
868  .error("Forbidden to uninitialize the Profiler when actual timer is not zero.");
869  Profiler::instance()->stop_timer(0);
870  set_memory_monitoring(false, false);
871  Profiler::instance(true);
872  }
873 }
874 bool Profiler::global_monitor_memory = false;
875 bool Profiler::petsc_monitor_memory = true;
876 void Profiler::set_memory_monitoring(const bool global_monitor, const bool petsc_monitor) {
877  global_monitor_memory = global_monitor;
878  petsc_monitor_memory = petsc_monitor;
879 }
880 
881 bool Profiler::get_global_memory_monitoring() {
882  return global_monitor_memory;
883 }
884 
885 bool Profiler::get_petsc_memory_monitoring() {
886  return petsc_monitor_memory;
887 }
888 
889 unordered_map_with_alloc & MemoryAlloc::malloc_map() {
890  static unordered_map_with_alloc static_malloc_map;
891  return static_malloc_map;
892 }
893 
894 void * Profiler::operator new (size_t size) {
895  return malloc (size);
896 }
897 
898 void Profiler::operator delete (void* p) {
899  free(p);
900 }
901 
902 void *operator new (std::size_t size) OPERATOR_NEW_THROW_EXCEPTION {
903  void * p = malloc(size);
904  Profiler::instance()->notify_malloc(size, (long)p);
905  return p;
906 }
907 
908 void *operator new[] (std::size_t size) OPERATOR_NEW_THROW_EXCEPTION {
909  void * p = malloc(size);
910  Profiler::instance()->notify_malloc(size, (long)p);
911  return p;
912 }
913 
914 void *operator new[] (std::size_t size, const std::nothrow_t&) throw() {
915  void * p = malloc(size);
916  Profiler::instance()->notify_malloc(size, (long)p);
917  return p;
918 }
919 
920 void operator delete( void *p) throw() {
921  Profiler::instance()->notify_free((long)p);
922  free(p);
923 }
924 
925 void operator delete( void *p, std::size_t) throw() {
926  Profiler::instance()->notify_free((long)p);
927  free(p);
928 }
929 
930 void operator delete[]( void *p) throw() {
931  Profiler::instance()->notify_free((long)p);
932  free(p);
933 }
934 
935 void operator delete[]( void *p, std::size_t) throw() {
936  Profiler::instance()->notify_free((long)p);
937  free(p);
938 }
939 
940 #else // def FLOW123D_DEBUG_PROFILER
941 
942 Profiler * Profiler::instance(bool clear) {
943  static Profiler * _instance = new Profiler();
944  return _instance;
945 }
946 
947 
948 // void Profiler::initialize() {
949 // if (_instance == NULL) {
950 // _instance = new Profiler();
951 // set_memory_monitoring(true, true);
952 // }
953 // }
954 
956  if (Profiler::instance()) {
957  ASSERT(Profiler::instance()->actual_node==0)(Profiler::instance()->timers_[Profiler::instance()->actual_node].tag())
958  .error("Forbidden to uninitialize the Profiler when actual timer is not zero.");
959  set_memory_monitoring(false, false);
960  Profiler::instance()->stop_timer(0);
961  // delete _instance;
962  // _instance = NULL;
963  }
964 }
965 
966 
967 #endif // def FLOW123D_DEBUG_PROFILER
#define MPI_LONG
Definition: mpi.h:161
void set_task_info(string, int)
static int min(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:63
#define CONSTEXPR_
Definition: sys_profiler.hh:86
int MPI_Comm
Definition: mpi.h:141
#define OPERATOR_NEW_THROW_EXCEPTION
Definition: system.hh:54
static int sum(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:45
a class to store JSON values
Definition: json.hpp:173
#define MessageOut()
Macro defining &#39;message&#39; record of log.
Definition: logger.hh:255
double get_resolution() const
std::string format(CStringRef format_str, ArgList args)
Definition: format.h:3141
#define MPI_SUM
Definition: mpi.h:196
void chkerr(unsigned int ierr)
Replacement of new/delete operator in the spirit of xmalloc.
Definition: system.hh:148
#define ASSERT(expr)
Allow use shorter versions of macro names if these names is not used with external library...
Definition: asserts.hh:347
void set_program_info(string, string, string, string, string)
#define MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm)
Definition: mpi.h:608
static void uninitialize()
void output(MPI_Comm, ostream &)
void notify_malloc(const size_t)
#define MPI_MIN
Definition: mpi.h:198
static long min(long *val, MPI_Comm comm)
Definition: sys_profiler.cc:75
static int max(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:81
void start()
Definition: memory.cc:39
void swap(nlohmann::json &j1, nlohmann::json &j2) noexcept(is_nothrow_move_constructible< nlohmann::json >::value andis_nothrow_move_assignable< nlohmann::json >::value)
exchanges the values of two JSON objects
Definition: json.hpp:8688
#define MPI_Comm_size
Definition: mpi.h:235
#define MPI_DOUBLE
Definition: mpi.h:156
STREAM & operator<<(STREAM &s, UpdateFlags u)
void push_back(basic_json &&val)
add an object to an array
Definition: json.hpp:4686
#define MPI_Comm_rank
Definition: mpi.h:236
Dedicated class for storing path to input and output files.
Definition: file_path.hh:54
void notify_free(const size_t)
#define MPI_INT
Definition: mpi.h:160
#define ASSERT_LT(a, b)
Definition of comparative assert macro (Less Than)
Definition: asserts.hh:296
string_t dump(const int indent=-1) const
serialization
Definition: json.hpp:2079
#define WarningOut()
Macro defining &#39;warning&#39; record of log.
Definition: logger.hh:258
#define MPI_COMM_WORLD
Definition: mpi.h:123
#define MPI_MAX
Definition: mpi.h:197
static Profiler * instance(bool clear=false)
Definition: memory.cc:33
#define MPI_Barrier(comm)
Definition: mpi.h:531
void stop()
Definition: memory.cc:42
#define THROW(whole_exception_expr)
Wrapper for throw. Saves the throwing point.
Definition: exceptions.hh:53
#define ASSERT_EQ(a, b)
Definition of comparative assert macro (EQual)
Definition: asserts.hh:328
void transform_profiler_data(const string &, const string &)