Flow123d  last_with_con_2.0.0-4-g42e6930
sys_profiler.cc
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file sys_profiler.cc
15  * @ingroup system
16  * @brief Profiler
17  */
18 
19 #include <fstream>
20 #include <iomanip>
21 #include <sys/param.h>
22 
23 #ifdef FLOW123D_HAVE_PYTHON
24  #include "Python.h"
25 #endif // FLOW123D_HAVE_PYTHON
26 
27 #include "sys_profiler.hh"
28 #include "system/system.hh"
29 #include "system/python_loader.hh"
30 #include <iostream>
31 #include <boost/format.hpp>
32 #include <boost/property_tree/ptree.hpp>
33 #include <boost/property_tree/json_parser.hpp>
34 #include <boost/unordered_map.hpp>
35 
36 #include "system/file_path.hh"
37 #include "system/python_loader.hh"
38 #include "mpi.h"
39 #include "time_point.hh"
40 
41 // namespace alias
42 namespace property_tree = boost::property_tree;
43 
44 /*
45  * These should be replaced by using boost MPI interface
46  */
47 int MPI_Functions::sum(int* val, MPI_Comm comm) {
48  int total = 0;
49  MPI_Reduce(val, &total, 1, MPI_INT, MPI_SUM, 0, comm);
50  return total;
51  }
52 
53 double MPI_Functions::sum(double* val, MPI_Comm comm) {
54  double total = 0;
55  MPI_Reduce(val, &total, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
56  return total;
57  }
58 
59 long MPI_Functions::sum(long* val, MPI_Comm comm) {
60  long total = 0;
61  MPI_Reduce(val, &total, 1, MPI_LONG, MPI_SUM, 0, comm);
62  return total;
63  }
64 
65 int MPI_Functions::min(int* val, MPI_Comm comm) {
66  int min = 0;
67  MPI_Reduce(val, &min, 1, MPI_INT, MPI_MIN, 0, comm);
68  return min;
69  }
70 
71 double MPI_Functions::min(double* val, MPI_Comm comm) {
72  double min = 0;
73  MPI_Reduce(val, &min, 1, MPI_DOUBLE, MPI_MIN, 0, comm);
74  return min;
75  }
76 
77 long MPI_Functions::min(long* val, MPI_Comm comm) {
78  long min = 0;
79  MPI_Reduce(val, &min, 1, MPI_LONG, MPI_MIN, 0, comm);
80  return min;
81  }
82 
83 int MPI_Functions::max(int* val, MPI_Comm comm) {
84  int max = 0;
85  MPI_Reduce(val, &max, 1, MPI_INT, MPI_MAX, 0, comm);
86  return max;
87  }
88 
89 double MPI_Functions::max(double* val, MPI_Comm comm) {
90  double max = 0;
91  MPI_Reduce(val, &max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
92  return max;
93  }
94 
95 long MPI_Functions::max(long* val, MPI_Comm comm) {
96  long max = 0;
97  MPI_Reduce(val, &max, 1, MPI_LONG, MPI_MAX, 0, comm);
98  return max;
99  }
100 
101 
102 #ifdef FLOW123D_DEBUG_PROFILER
103 /*********************************************************************************************
104  * Implementation of class Timer
105  */
106 
107 const int timer_no_child=-1;
108 
109 Timer::Timer(const CodePoint &cp, int parent)
110 : start_time(TimePoint()),
111  cumul_time(0.0),
112  call_count(0),
113  start_count(0),
114  code_point_(&cp),
115  full_hash_(cp.hash_),
116  hash_idx_(cp.hash_idx_),
117  parent_timer(parent),
118  total_allocated_(0),
119  total_deallocated_(0),
120  max_allocated_(0),
121  current_allocated_(0),
122  alloc_called(0),
123  dealloc_called(0)
124 #ifdef FLOW123D_HAVE_PETSC
125 , petsc_start_memory(0),
126  petsc_end_memory (0),
127  petsc_memory_difference(0),
128  petsc_peak_memory(0),
129  petsc_local_peak_memory(0)
130 #endif // FLOW123D_HAVE_PETSC
131 {
132  for(unsigned int i=0; i< max_n_childs ;i++) child_timers[i]=timer_no_child;
133 }
134 
135 
136 /**
137  * Debug information of the timer
138  */
139 ostream & operator <<(ostream& os, const Timer& timer) {
140  os << " Timer: " << timer.tag() << endl;
141  os << " malloc: " << timer.total_allocated_ << endl;
142  os << " dalloc: " << timer.total_deallocated_ << endl;
143  #ifdef FLOW123D_HAVE_PETSC
144  os << " start: " << timer.petsc_start_memory << endl;
145  os << " stop : " << timer.petsc_end_memory << endl;
146  os << " diff : " << timer.petsc_memory_difference << " (" << timer.petsc_end_memory - timer.petsc_start_memory << ")" << endl;
147  os << " peak : " << timer.petsc_peak_memory << " (" << timer.petsc_local_peak_memory << ")" << endl;
148  #endif // FLOW123D_HAVE_PETSC
149  os << endl;
150  return os;
151 }
152 
153 
154 double Timer::cumulative_time() const {
155  return cumul_time;
156 }
157 
158 void Profiler::accept_from_child(Timer &parent, Timer &child) {
159  int child_timer = 0;
160  for (unsigned int i = 0; i < Timer::max_n_childs; i++) {
161  child_timer = child.child_timers[i];
162  if (child_timer != timer_no_child) {
163  // propagate metrics from child to parent
164  accept_from_child(child, timers_[child_timer]);
165  }
166  }
167  // compute totals by adding values from child
168  parent.total_allocated_ += child.total_allocated_;
169  parent.total_deallocated_ += child.total_deallocated_;
170  parent.alloc_called += child.alloc_called;
171  parent.dealloc_called += child.dealloc_called;
172 
173 #ifdef FLOW123D_HAVE_PETSC
174  if (petsc_monitor_memory) {
175  // add differences from child
176  parent.petsc_memory_difference += child.petsc_memory_difference;
177  parent.current_allocated_ += child.current_allocated_;
178 
179  // when computing maximum, we take greater value from parent and child
180  // PetscMemoryGetCurrentUsage always return absolute (not relative) value
181  parent.petsc_peak_memory = max(parent.petsc_peak_memory, child.petsc_peak_memory);
182  }
183 #endif // FLOW123D_HAVE_PETSC
184 
185  parent.max_allocated_ = max(parent.max_allocated_, child.max_allocated_);
186 }
187 
188 
189 void Timer::pause() {
190 #ifdef FLOW123D_HAVE_PETSC
191  if (Profiler::get_petsc_memory_monitoring()) {
192  // get the maximum resident set size (memory used) for the program.
193  PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
194  if (petsc_peak_memory < petsc_local_peak_memory)
195  petsc_peak_memory = petsc_local_peak_memory;
196  }
197 #endif // FLOW123D_HAVE_PETSC
198 }
199 
200 void Timer::resume() {
201 #ifdef FLOW123D_HAVE_PETSC
202  if (Profiler::get_petsc_memory_monitoring()) {
203  // tell PETSc to monitor the maximum memory usage so
204  // that PetscMemoryGetMaximumUsage() will work.
205  PetscMemorySetGetMaximumUsage();
206  }
207 #endif // FLOW123D_HAVE_PETSC
208 }
209 
210 void Timer::start() {
211 #ifdef FLOW123D_HAVE_PETSC
212  if (Profiler::get_petsc_memory_monitoring()) {
213  // Tell PETSc to monitor the maximum memory usage so
214  // that PetscMemoryGetMaximumUsage() will work.
215  PetscMemorySetGetMaximumUsage();
216  PetscMemoryGetCurrentUsage (&petsc_start_memory);
217  }
218 #endif // FLOW123D_HAVE_PETSC
219 
220  if (start_count == 0) {
221  start_time = TimePoint();
222  }
223  call_count++;
224  start_count++;
225 }
226 
227 
228 
229 bool Timer::stop(bool forced) {
230 #ifdef FLOW123D_HAVE_PETSC
231  if (Profiler::get_petsc_memory_monitoring()) {
232  // get current memory usage
233  PetscMemoryGetCurrentUsage (&petsc_end_memory);
234  petsc_memory_difference += petsc_end_memory - petsc_start_memory;
235 
236  // get the maximum resident set size (memory used) for the program.
237  PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
238  if (petsc_peak_memory < petsc_local_peak_memory)
239  petsc_peak_memory = petsc_local_peak_memory;
240  }
241 #endif // FLOW123D_HAVE_PETSC
242 
243  if (forced) start_count=1;
244 
245  if (start_count == 1) {
246  cumul_time += (TimePoint() - start_time);
247  start_count--;
248  return true;
249  } else {
250  start_count--;
251  }
252  return false;
253 }
254 
255 
256 
257 void Timer::add_child(int child_index, const Timer &child)
258 {
259  unsigned int idx = child.hash_idx_;
260  if (child_timers[idx] != timer_no_child) {
261  // hash collision, find first empty place
262  unsigned int i=idx;
263  do {
264  i=( i < max_n_childs ? i+1 : 0);
265  } while (i!=idx && child_timers[i] != timer_no_child);
266  ASSERT(i!=idx)(tag()).error("Too many children of the timer");
267  idx=i;
268  }
269  //DebugOut().fmt("Adding child {} at index: {}\n", child_index, idx);
270  child_timers[idx] = child_index;
271 }
272 
273 
274 
275 string Timer::code_point_str() const {
276  return boost::str( boost::format("%s:%d, %s()") % code_point_->file_ % code_point_->line_ % code_point_->func_ );
277 }
278 
279 
280 /***********************************************************************************************
281  * Implementation of Profiler
282  */
283 
284 
286  if (_instance == NULL) {
287  MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
288  _instance = new Profiler();
289  }
290  return _instance;
291  }
292 
293 
294 static CONSTEXPR_ CodePoint main_cp = CODE_POINT("Whole Program");
296 const long Profiler::malloc_map_reserve = 100 * 1000;
297 CodePoint Profiler::null_code_point = CodePoint("__no_tag__", "__no_file__", "__no_func__", 0);
298 
299 void Profiler::initialize() {
300  instance();
301  set_memory_monitoring(true, true);
302 }
303 
304 
306 : actual_node(0),
307  task_size_(1),
308  start_time( time(NULL) ),
309  json_filepath("")
310 
311 {
312 #ifdef FLOW123D_DEBUG_PROFILER
313  timers_.push_back( Timer(main_cp, 0) );
314  timers_[0].start();
315 #endif
316 }
317 
318 
319 
320 void Profiler::propagate_timers() {
321  for (unsigned int i = 0; i < Timer::max_n_childs; i++) {
322  unsigned int child_timer = timers_[0].child_timers[i];
323  if ((signed int)child_timer != timer_no_child) {
324  // propagate metrics from child to Whole-Program time-frame
325  accept_from_child(timers_[0], timers_[child_timer]);
326  }
327  }
328 }
329 
330 
331 
332 void Profiler::set_task_info(string description, int size) {
333  task_description_ = description;
334  task_size_ = size;
335 }
336 
337 
338 
339 void Profiler::set_program_info(string program_name, string program_version, string branch, string revision, string build) {
340  flow_name_ = program_name;
341  flow_version_ = program_version;
342  flow_branch_ = branch;
343  flow_revision_ = revision;
344  flow_build_ = build;
345 }
346 
347 
348 
349 int Profiler::start_timer(const CodePoint &cp) {
350  unsigned int parent_node = actual_node;
351  //DebugOut().fmt("Start timer: {}\n", cp.tag_);
352  int child_idx = find_child(cp);
353  if (child_idx < 0) {
354  //DebugOut().fmt("Adding timer: {}\n", cp.tag_);
355  // tag not present - create new timer
356  child_idx=timers_.size();
357  timers_.push_back( Timer(cp, actual_node) );
358  timers_[actual_node].add_child(child_idx , timers_.back() );
359  }
360  actual_node=child_idx;
361 
362  // pause current timer
363  timers_[parent_node].pause();
364 
365  timers_[actual_node].start();
366 
367  return actual_node;
368 }
369 
370 
371 
372 int Profiler::find_child(const CodePoint &cp) {
373  Timer &timer =timers_[actual_node];
374  unsigned int idx = cp.hash_idx_;
375  unsigned int child_idx;
376  do {
377  if (timer.child_timers[idx] == timer_no_child) break; // tag is not there
378 
379  child_idx=timer.child_timers[idx];
380  ASSERT_LT(child_idx, timers_.size()).error();
381  if (timers_[child_idx].full_hash_ == cp.hash_) return child_idx;
382  idx = ( (unsigned int)(idx)==(Timer::max_n_childs - 1) ? 0 : idx+1 );
383  } while ( (unsigned int)(idx) != cp.hash_idx_ ); // passed through whole array
384  return -1;
385 }
386 
387 
388 
389 void Profiler::stop_timer(const CodePoint &cp) {
390 #ifdef FLOW123D_DEBUG
391  // check that all childrens are closed
392  Timer &timer=timers_[actual_node];
393  for(unsigned int i=0; i < Timer::max_n_childs; i++)
394  if (timer.child_timers[i] != timer_no_child)
395  ASSERT(! timers_[timer.child_timers[i]].running())(timers_[timer.child_timers[i]].tag())(timer.tag())
396  .error("Child timer running while closing timer.");
397 #endif
398  unsigned int child_timer = actual_node;
399  if ( cp.hash_ != timers_[actual_node].full_hash_) {
400  // timer to close is not actual - we search for it above actual
401  for(unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer) {
402  if ( cp.hash_ == timers_[node].full_hash_) {
403  // found above - close all nodes between
404  for(; (unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
405  WarningOut() << "Timer to close '" << cp.tag_ << "' do not match actual timer '"
406  << timers_[actual_node].tag() << "'. Force closing actual." << std::endl;
407  timers_[actual_node].stop(true);
408  }
409  // close 'node' itself
410  timers_[actual_node].stop(false);
411  actual_node = timers_[actual_node].parent_timer;
412 
413  // actual_node == child_timer indicates this is root
414  if (actual_node == child_timer)
415  return;
416 
417  // resume current timer
418  timers_[actual_node].resume();
419  return;
420  }
421  }
422  // node not found - do nothing
423  return;
424  }
425  // node to close match the actual
426  timers_[actual_node].stop(false);
427  actual_node = timers_[actual_node].parent_timer;
428 
429  // actual_node == child_timer indicates this is root
430  if (actual_node == child_timer)
431  return;
432 
433  // resume current timer
434  timers_[actual_node].resume();
435 }
436 
437 
438 
439 void Profiler::stop_timer(int timer_index) {
440  // stop_timer with CodePoint type
441  // timer which is still running MUST be the same as actual_node index
442  // if timer is not running index will differ
443  if (timers_[timer_index].running()) {
444  ASSERT_EQ(timer_index, (int)actual_node).error();
445  stop_timer(*timers_[timer_index].code_point_);
446  }
447 
448 }
449 
450 
451 
452 void Profiler::add_calls(unsigned int n_calls) {
453  timers_[actual_node].call_count += n_calls-1;
454 }
455 
456 
457 
458 void Profiler::notify_malloc(const size_t size, const long p) {
459  if (!global_monitor_memory)
460  return;
461 
462  MemoryAlloc::malloc_map()[p] = static_cast<int>(size);
463  timers_[actual_node].total_allocated_ += size;
464  timers_[actual_node].current_allocated_ += size;
465  timers_[actual_node].alloc_called++;
466 
467  if (timers_[actual_node].current_allocated_ > timers_[actual_node].max_allocated_)
468  timers_[actual_node].max_allocated_ = timers_[actual_node].current_allocated_;
469 }
470 
471 
472 
473 void Profiler::notify_free(const long p) {
474  if (!global_monitor_memory)
475  return;
476 
477  int size = sizeof(p);
478  if (MemoryAlloc::malloc_map()[(long)p] > 0) {
479  size = MemoryAlloc::malloc_map()[(long)p];
480  MemoryAlloc::malloc_map().erase((long)p);
481  }
482  timers_[actual_node].total_deallocated_ += size;
483  timers_[actual_node].current_allocated_ -= size;
484  timers_[actual_node].dealloc_called++;
485 }
486 
487 
488 double Profiler::get_resolution () {
489  const int measurements = 100;
490  double result = 0;
491 
492  // perform 100 measurements
493  for (unsigned int i = 1; i < measurements; i++) {
494  TimePoint t1 = TimePoint ();
495  TimePoint t2 = TimePoint ();
496 
497  // double comparison should be avoided
498  while ((t2 - t1) == 0) t2 = TimePoint ();
499  // while ((t2.ticks - t1.ticks) == 0) t2 = TimePoint ();
500 
501  result += t2 - t1;
502  }
503 
504  return (result / measurements) * 1000; // ticks to seconds to microseconds conversion
505 }
506 
507 
508 std::string common_prefix( std::string a, std::string b ) {
509  if( a.size() > b.size() ) std::swap(a,b) ;
510  return std::string( a.begin(), std::mismatch( a.begin(), a.end(), b.begin() ).first ) ;
511 }
512 
513 
514 
515 template<typename ReduceFunctor>
516 void Profiler::add_timer_info(ReduceFunctor reduce, property_tree::ptree* holder, int timer_idx, double parent_time) {
517 
518  // get timer and check preconditions
519  Timer &timer = timers_[timer_idx];
520  ASSERT(timer_idx >=0)(timer_idx).error("Wrong timer index.");
521  ASSERT(timer.parent_timer >=0).error("Inconsistent tree.");
522 
523  // fix path
524  string filepath = timer.code_point_->file_;
525 
526  // if constant FLOW123D_SOURCE_DIR is defined, we try to erase it from beginning of each CodePoint's filepath
527  #ifdef FLOW123D_SOURCE_DIR
528  string common_path = common_prefix (string(FLOW123D_SOURCE_DIR), filepath);
529  filepath.erase (0, common_path.size());
530  #endif
531 
532 
533  // generate node representing this timer
534  // add basic information
535  property_tree::ptree node;
536  double cumul_time_sum;
537  node.put ("tag", (timer.tag()) );
538  node.put ("file-path", (filepath) );
539  node.put ("file-line", (timer.code_point_->line_) );
540  node.put ("function", (timer.code_point_->func_) );
541  cumul_time_sum = reduce (timer, node);
542 
543 
544  // statistical info
545  if (timer_idx == 0) parent_time = cumul_time_sum;
546  double percent = parent_time > 1.0e-10 ? cumul_time_sum / parent_time * 100.0 : 0.0;
547  node.put<double> ("percent", percent);
548 
549  // when collecting timer info, we need to make sure each processor contains
550  // the same time-frames, for now we simply reduce info to current timer
551  // using operation MPI_MIN, so if some processor does not contain some
552  // time-frame other processors will forget this time-frame (information lost)
553  /*
554  int child_timers[ Timer::max_n_childs];
555  MPI_Allreduce(&timer.child_timers, &child_timers, Timer::max_n_childs, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
556  #ifdef FLOW123D_DEBUG
557  int mpi_rank = MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
558  for (int j = 0; j < Timer::max_n_childs; j++) {
559  if (child_timers[j] != timer.child_timers[j]) {
560  // this is severe error, timers indicies are different
561  ASSERT(child_timers[j] == timer_no_child).error("Severe error: timers indicies are different.");
562 
563  // explore the difference in more depth
564  if (timer.child_timers[j] == timer_no_child) {
565  // this processor does not have child timer
566  WarningOut() << "Inconsistent tree in '" << timer.tag() << "': this processor[" << mpi_rank
567  << "] does not have child-timer[" << child_timers[j] << "]" << std::endl;
568  } else {
569  // other processors do not have child timer
570  WarningOut() << "Inconsistent tree in '" << timer.tag() << "': this processor[" << mpi_rank
571  << "] contains child-timer[" << timer.child_timers[j] << "] which others do not" << std::endl;
572  }
573  }
574  }
575  #endif // FLOW123D_DEBUG
576 */
577  // write times children timers using secured child_timers array
578  property_tree::ptree children;
579  bool has_children = false;
580  for (unsigned int i = 0; i < Timer::max_n_childs; i++) {
581  if (timer.child_timers[i] != timer_no_child) {
582  add_timer_info (reduce, &children, timer.child_timers[i], cumul_time_sum);
583  /*
584  if (child_timers[i] != timer_no_child) {
585  add_timer_info (reduce, &children, child_timers[i], cumul_time_sum); */
586  has_children = true;
587  }
588  }
589 
590  // add children tag and other info if present
591  if (has_children)
592  node.add_child ("children", children);
593 
594  // push itself to root ptree 'array'
595  holder->push_back (std::make_pair ("", node));
596 }
597 
598 
599 template <class T>
600 void save_nonmpi_metric (property_tree::ptree &node, T * ptr, string name) {
601  node.put (name+"-min", *ptr);
602  node.put (name+"-max", *ptr);
603  node.put (name+"-sum", *ptr);
604 }
605 
606 std::shared_ptr<std::ostream> Profiler::get_default_output_stream() {
607  char filename[PATH_MAX];
608  strftime(filename, sizeof (filename) - 1, "profiler_info_%y.%m.%d_%H-%M-%S.log.json", localtime(&start_time));
609  json_filepath = FilePath(string(filename), FilePath::output_file);
610 
611  //LogOut() << "output into: " << json_filepath << std::endl;
612  return make_shared<ofstream>(json_filepath.c_str());
613 }
614 
615 
616 #ifdef FLOW123D_HAVE_MPI
617 template <class T>
618 void save_mpi_metric (property_tree::ptree &node, MPI_Comm comm, T * ptr, string name) {
619  node.put (name+"-min", MPI_Functions::min(ptr, comm));
620  node.put (name+"-max", MPI_Functions::max(ptr, comm));
621  node.put (name+"-sum", MPI_Functions::sum(ptr, comm));
622 }
623 
624 void Profiler::output(MPI_Comm comm, ostream &os) {
625  int mpi_rank, mpi_size;
626  //wait until profiling on all processors is finished
627  MPI_Barrier(comm);
628  stop_timer(0);
629  propagate_timers();
630 
631  // stop monitoring memory
632  bool temp_memory_monitoring = global_monitor_memory;
633  set_memory_monitoring(false, petsc_monitor_memory);
634 
635  chkerr( MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank) );
636  MPI_Comm_size(comm, &mpi_size);
637 
638  // output header
639  property_tree::ptree root, children;
640  output_header (root, mpi_size);
641 
642  // recursively add all timers info
643  // define lambda function which reduces timer from multiple processors
644  // MPI implementation uses MPI call to reduce values
645  auto reduce = [=] (Timer &timer, property_tree::ptree &node) -> double {
646  int call_count = timer.call_count;
647  double cumul_time = timer.cumulative_time ();
648 
649  long memory_allocated = (long)timer.total_allocated_;
650  long memory_deallocated = (long)timer.total_deallocated_;
651  long memory_peak = (long)timer.max_allocated_;
652 
653  int alloc_called = timer.alloc_called;
654  int dealloc_called = timer.dealloc_called;
655 
656 
657  save_mpi_metric<double>(node, comm, &cumul_time, "cumul-time");
658  save_mpi_metric<int>(node, comm, &call_count, "call-count");
659 
660  save_mpi_metric<long>(node, comm, &memory_allocated, "memory-alloc");
661  save_mpi_metric<long>(node, comm, &memory_deallocated, "memory-dealloc");
662  save_mpi_metric<long>(node, comm, &memory_peak, "memory-peak");
663  //
664  save_mpi_metric<int>(node, comm, &alloc_called, "memory-alloc-called");
665  save_mpi_metric<int>(node, comm, &dealloc_called, "memory-dealloc-called");
666 
667 #ifdef FLOW123D_HAVE_PETSC
668  long petsc_memory_difference = (long)timer.petsc_memory_difference;
669  long petsc_peak_memory = (long)timer.petsc_peak_memory;
670  save_mpi_metric<long>(node, comm, &petsc_memory_difference, "memory-petsc-diff");
671  save_mpi_metric<long>(node, comm, &petsc_peak_memory, "memory-petsc-peak");
672 #endif // FLOW123D_HAVE_PETSC
673 
674  return MPI_Functions::sum(&cumul_time, comm);
675  };
676 
677  add_timer_info (reduce, &children, 0, 0.0);
678  root.add_child ("children", children);
679 
680 
681  // create profiler output only once (on the first processor)
682  // only active communicator should be the one with mpi_rank 0
683  if (mpi_rank == 0) {
684  /**
685  * Flag to property_tree::write_json method
686  * resulting in json human readable format (indents, newlines)
687  */
688  const int FLOW123D_JSON_HUMAN_READABLE = 1;
689  // write result to stream
690  property_tree::write_json (os, root, FLOW123D_JSON_HUMAN_READABLE);
691  }
692  // restore memory monitoring
693  set_memory_monitoring(temp_memory_monitoring, petsc_monitor_memory);
694 }
695 
696 
697 void Profiler::output(MPI_Comm comm) {
698  int mpi_rank;
699  chkerr( MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank) );
700  if (mpi_rank == 0) {
701  output(comm, *get_default_output_stream());
702  } else {
703  ostringstream os;
704  output(comm, os );
705  }
706 }
707 
708 #endif /* FLOW123D_HAVE_MPI */
709 
710 void Profiler::output(ostream &os) {
711  // last update
712  stop_timer(0);
713  propagate_timers();
714 
715  // output header
716  property_tree::ptree root, children;
717  /**
718  * Constant representing number of MPI processes
719  * where there is no MPI to work with (so 1 process)
720  */
721  const int FLOW123D_MPI_SINGLE_PROCESS = 1;
722  output_header (root, FLOW123D_MPI_SINGLE_PROCESS);
723 
724 
725  // recursively add all timers info
726  // define lambda function which reduces timer from multiple processors
727  // non-MPI implementation is just dummy repetition of initial value
728  auto reduce = [=] (Timer &timer, property_tree::ptree &node) -> double {
729  int call_count = timer.call_count;
730  double cumul_time = timer.cumulative_time ();
731 
732  long memory_allocated = (long)timer.total_allocated_;
733  long memory_deallocated = (long)timer.total_deallocated_;
734  long memory_peak = (long)timer.max_allocated_;
735 
736  int alloc_called = timer.alloc_called;
737  int dealloc_called = timer.dealloc_called;
738 
739  save_nonmpi_metric<double>(node, &cumul_time, "cumul-time");
740  save_nonmpi_metric<int>(node, &call_count, "call-count");
741 
742  save_nonmpi_metric<long>(node, &memory_allocated, "memory-alloc");
743  save_nonmpi_metric<long>(node, &memory_deallocated, "memory-dealloc");
744  save_nonmpi_metric<long>(node, &memory_peak, "memory-peak");
745 
746  save_nonmpi_metric<int>(node, &alloc_called, "memory-alloc-called");
747  save_nonmpi_metric<int>(node, &dealloc_called, "memory-dealloc-called");
748 
749 #ifdef FLOW123D_HAVE_PETSC
750  long petsc_memory_difference = (long)timer.petsc_memory_difference;
751  long petsc_peak_memory = (long)timer.petsc_peak_memory;
752  save_nonmpi_metric<long>(node, &petsc_memory_difference, "memory-petsc-diff");
753  save_nonmpi_metric<long>(node, &petsc_peak_memory, "memory-petsc-peak");
754 #endif // FLOW123D_HAVE_PETSC
755 
756  return cumul_time;
757  };
758 
759  add_timer_info (reduce, &children, 0, 0.0);
760  root.add_child ("children", children);
761 
762 
763  /**
764  * Flag to property_tree::write_json method
765  * resulting in json human readable format (indents, newlines)
766  */
767  const int FLOW123D_JSON_HUMAN_READABLE = 1;
768  // write result to stream
769  property_tree::write_json (os, root, FLOW123D_JSON_HUMAN_READABLE);
770 }
771 
772 
773 void Profiler::output() {
774  output(*get_default_output_stream());
775 }
776 
777 void Profiler::output_header (property_tree::ptree &root, int mpi_size) {
778  time_t end_time = time(NULL);
779 
780  const char format[] = "%x %X";
781  char start_time_string[BUFSIZ] = {0};
782  strftime(start_time_string, sizeof (start_time_string) - 1, format, localtime(&start_time));
783 
784  char end_time_string[BUFSIZ] = {0};
785  strftime(end_time_string, sizeof (end_time_string) - 1, format, localtime(&end_time));
786 
787  // generate current run details
788 
789  root.put ("program-name", flow_name_);
790  root.put ("program-version", flow_version_);
791  root.put ("program-branch", flow_branch_);
792  root.put ("program-revision", flow_revision_);
793  root.put ("program-build", flow_build_);
794  root.put ("timer-resolution", boost::format("%1.9f") % Profiler::get_resolution());
795  // if constant FLOW123D_SOURCE_DIR is defined, we add this information to profiler (later purposes)
796  #ifdef FLOW123D_SOURCE_DIR
797  #endif
798 
799  // print some information about the task at the beginning
800  root.put ("task-description", task_description_);
801  root.put ("task-size", task_size_);
802 
803  //print some information about the task at the beginning
804  root.put ("run-process-count", mpi_size);
805  root.put ("run-started-at", start_time_string);
806  root.put ("run-finished-at", end_time_string);
807 }
808 
809 #ifdef FLOW123D_HAVE_PYTHON
810 void Profiler::transform_profiler_data (const string &output_file_suffix, const string &formatter) {
811 
812  if (json_filepath=="") return;
813 
814  // error under CYGWIN environment : more details in this repo
815  // https://github.com/x3mSpeedy/cygwin-python-issue
816  //
817  // For now we only support profiler report conversion in UNIX environment
818  // Windows users will have to use a python script located in bin folder
819  //
820 
821  #ifndef FLOW123D_HAVE_CYGWIN
822  // grab module and function by importing module profiler_formatter_module.py
823  PyObject * python_module = PythonLoader::load_module_by_name ("profiler.profiler_formatter_module");
824  //
825  // def convert (json_location, output_file, formatter):
826  //
827  PyObject * convert_method = PythonLoader::get_callable (python_module, "convert" );
828 
829  int argument_index = 0;
830  PyObject * arguments = PyTuple_New (3);
831 
832  // set json path location as first argument
833  PyObject * tmp = PyString_FromString (json_filepath.c_str());
834  PyTuple_SetItem (arguments, argument_index++, tmp);
835 
836  // set output path location as second argument
837  tmp = PyString_FromString ((json_filepath + output_file_suffix).c_str());
838  PyTuple_SetItem (arguments, argument_index++, tmp);
839 
840  // set Formatter class as third value
841  tmp = PyString_FromString (formatter.c_str());
842  PyTuple_SetItem (arguments, argument_index++, tmp);
843 
844  // execute method with arguments
845  PyObject_CallObject (convert_method, arguments);
846 
847  PythonLoader::check_error();
848 
849  #else
850 
851  // print information about windows-cygwin issue and offer manual solution
852  MessageOut() << "# Note: converting json profiler reports is not"
853  << " supported under Windows or Cygwin environment for now.\n"
854  << "# You can use python script located in bin/python folder"
855  << " in order to convert json report to txt or csv format.\n"
856  << "python profiler_formatter_script.py --input \"" << json_filepath
857  << "\" --output \"profiler.txt\"" << std::endl;
858  #endif // FLOW123D_HAVE_CYGWIN
859 }
860 #else
861 void Profiler::transform_profiler_data (const string &output_file_suffix, const string &formatter) {
862 }
863 
864 #endif // FLOW123D_HAVE_PYTHON
865 
866 
867 void Profiler::uninitialize() {
868  if (_instance) {
869  ASSERT(_instance->actual_node==0)(_instance->timers_[_instance->actual_node].tag())
870  .error("Forbidden to uninitialize the Profiler when actual timer is not zero.");
871  _instance->stop_timer(0);
872  set_memory_monitoring(false, false);
873  delete _instance;
874  _instance = NULL;
875  }
876 }
877 bool Profiler::global_monitor_memory = false;
878 bool Profiler::petsc_monitor_memory = true;
879 void Profiler::set_memory_monitoring(const bool global_monitor, const bool petsc_monitor) {
880  global_monitor_memory = global_monitor;
881  petsc_monitor_memory = petsc_monitor;
882 }
883 
884 bool Profiler::get_global_memory_monitoring() {
885  return global_monitor_memory;
886 }
887 
888 bool Profiler::get_petsc_memory_monitoring() {
889  return petsc_monitor_memory;
890 }
891 
892 unordered_map_with_alloc & MemoryAlloc::malloc_map() {
893  static unordered_map_with_alloc static_malloc_map;
894  return static_malloc_map;
895 }
896 
897 void * Profiler::operator new (size_t size) {
898  return malloc (size);
899 }
900 
901 void Profiler::operator delete (void* p) {
902  free(p);
903 }
904 
905 void *operator new (std::size_t size) OPERATOR_NEW_THROW_EXCEPTION {
906  void * p = malloc(size);
907  Profiler::instance()->notify_malloc(size, (long)p);
908  return p;
909 }
910 
911 void *operator new[] (std::size_t size) OPERATOR_NEW_THROW_EXCEPTION {
912  void * p = malloc(size);
913  Profiler::instance()->notify_malloc(size, (long)p);
914  return p;
915 }
916 
917 void *operator new[] (std::size_t size, const std::nothrow_t&) throw() {
918  void * p = malloc(size);
919  Profiler::instance()->notify_malloc(size, (long)p);
920  return p;
921 }
922 
923 void operator delete( void *p) throw() {
924  Profiler::instance()->notify_free((long)p);
925  free(p);
926 }
927 
928 void operator delete[]( void *p) throw() {
929  Profiler::instance()->notify_free((long)p);
930  free(p);
931 }
932 
933 #else // def FLOW123D_DEBUG_PROFILER
934 
936  initialize();
937  return _instance;
938  }
939 
940 Profiler* Profiler::_instance = NULL;
941 
943  if (_instance == NULL) {
944  _instance = new Profiler();
945  set_memory_monitoring(true, true);
946  }
947 }
948 
950  if (_instance) {
951  ASSERT(_instance->actual_node==0)(_instance->timers_[_instance->actual_node].tag())
952  .error("Forbidden to uninitialize the Profiler when actual timer is not zero.");
953  set_memory_monitoring(false, false);
954  _instance->stop_timer(0);
955  delete _instance;
956  _instance = NULL;
957  }
958 }
959 
960 
961 #endif // def FLOW123D_DEBUG_PROFILER
#define MPI_LONG
Definition: mpi.h:161
static int min(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:65
#define CONSTEXPR_
Definition: sys_profiler.hh:83
int MPI_Comm
Definition: mpi.h:141
#define OPERATOR_NEW_THROW_EXCEPTION
Definition: system.hh:48
static int sum(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:47
#define MessageOut()
Macro defining &#39;message&#39; record of log.
Definition: logger.hh:231
double get_resolution() const
std::string format(CStringRef format_str, ArgList args)
Definition: format.h:3141
#define MPI_SUM
Definition: mpi.h:196
static double min(double *val, MPI_Comm comm)
Definition: sys_profiler.cc:71
void chkerr(unsigned int ierr)
Replacement of new/delete operator in the spirit of xmalloc.
Definition: system.hh:142
#define ASSERT(expr)
Allow use shorter versions of macro names if these names is not used with external library...
Definition: asserts.hh:347
void notify_free(const size_t size)
#define MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm)
Definition: mpi.h:608
static void uninitialize()
#define MPI_MIN
Definition: mpi.h:198
static int max(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:83
void set_program_info(string program_name, string program_version, string branch, string revision, string build)
#define MPI_Comm_size
Definition: mpi.h:235
#define MPI_DOUBLE
Definition: mpi.h:156
STREAM & operator<<(STREAM &s, UpdateFlags u)
void transform_profiler_data(const string &output_file_suffix, const string &formatter)
#define MPI_Comm_rank
Definition: mpi.h:236
Dedicated class for storing path to input and output files.
Definition: file_path.hh:48
#define MPI_INT
Definition: mpi.h:160
static void initialize()
#define ASSERT_LT(a, b)
Definition of comparative assert macro (Less Than)
Definition: asserts.hh:296
static Profiler * instance()
#define WarningOut()
Macro defining &#39;warning&#39; record of log.
Definition: logger.hh:234
#define MPI_COMM_WORLD
Definition: mpi.h:123
#define MPI_MAX
Definition: mpi.h:197
static Profiler * _instance
#define MPI_Barrier(comm)
Definition: mpi.h:531
void output(MPI_Comm comm, ostream &os)
#define ASSERT_EQ(a, b)
Definition of comparative assert macro (EQual)
Definition: asserts.hh:328
void set_task_info(string description, int size)
void notify_malloc(const size_t size)