Flow123d  DF_profiler_memory_monitor-0108f36
sys_profiler.hh
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file sys_profiler.hh
15  * @brief
16  * @todo
17  * - START_GLOBAL_TIMER(tag) - this calls the start_timer, which creates local timer on the correct place in the hierarchy,
18  * further this timer is added to the list of global timers, this contains groups of timers with same tag, and
19  * collect/sum data from these timers in the report.
20  *
21  * - Allow output even during calculation (not complete, but at least some thing)
22  * Report should contain time of start as well as time of creation of the report or time from start of the program.
23  *
24  * - When generating report we has to deal with possibly different trees at every MPI process.
25  *
26  * - test memory profiling
27  * in our own new and xmalloc functions - register allocatied and deallocated memory to active Profiler frame.
28  *
29  * - test in parallel
30  * - extended output:
31  * cas na jedno volani (jina redukce nez pro kumulativni cas, pokud je pocet volani ruzny)
32  * procenta vuci predkovi
33  * code point (az nekde na konci radky)
34  *
35  *
36  * !!! Unfortunately using constexpr is worse (without optimization).
37  * This is probably due to use of static variable for
38  * CodePoint, the access could be slow, and computation of hash is done only once. Actually timing results
39  * are:
40  *
41  * OPTIONS OVERHEAD (compared to call 2x clock())
42  * -g, no c++11 : 18%
43  * -g, c++11 : 60%
44  * -O3,no c++11 : 6%
45  * -O3, c++11 : 6%
46  */
47 
48 #ifndef PROFILER_H
49 #define PROFILER_H
50 
51 #include "global_defs.h"
52 
53 #include <mpi.h>
54 #include <ostream>
55 #include <unordered_map>
56 
57 namespace boost { template <class T> struct hash; }
58 #include <boost/functional/hash/hash.hpp> // for hash
59 #include <boost/ref.hpp>
60 #include <boost/tuple/detail/tuple_basic.hpp> // for get
61 
62 #include <nlohmann/json.hpp>
63 
64 #include "time_point.hh"
65 #include "petscsys.h"
66 #include "simple_allocator.hh"
67 
68 //instead of #include "mpi.h"
69 //mpi declarations follows:
71 public:
72  static int sum(int* val, MPI_Comm comm);
73  static double sum(double* val, MPI_Comm comm);
74  static long sum(long* val, MPI_Comm comm);
75 
76  static int min(int* val, MPI_Comm comm);
77  static double min(double* val, MPI_Comm comm);
78  static long min(long* val, MPI_Comm comm);
79 
80  static int max(int* val, MPI_Comm comm);
81  static double max(double* val, MPI_Comm comm);
82  static long max(long* val, MPI_Comm comm);
83 };
84 
85 // Assuming all compilers support constexpr
86 #define CONSTEXPR_ constexpr
87 
88 using namespace std;
89 
90 
91 // These helper macros are necessary due to use of _LINE_ variable in START_TIMER macro.
92 #define _PASTE(a,b) a ## b
93 #define PASTE(a,b) _PASTE(a, b)
94 
95 
96 
97 /**
98  * \def START_TIMER(tag)
99  *
100  * @brief Starts a timer with specified tag.
101  *
102  * In fact it creates an static constant expression that identifies the point in the code and
103  * contains tag of the involved timer and its hash. Then it creates local variable that
104  * calls @p Profiler::start_timer() in constructor and @p Profiler::stop_timer() in destructor.
105  * This way the timer is automatically closed at the end of current block.
106  *
107  * ATTENTION: This macro expands to two statements so following code is illegal:
108  * @code
109  * if (some_condition) START_TIMER(tag);
110  * @endcode
111  */
112 #ifdef FLOW123D_DEBUG_PROFILER
113 #define START_TIMER(tag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT(tag); TimerFrame PASTE(timer_,__LINE__) = TimerFrame( PASTE(cp_,__LINE__) )
114 #else
115 #define START_TIMER(tag)
116 #endif
117 
118 /**
119  * \def START_TIMER_EXT (tag, subtag)
120  *
121  * @brief Starts a timer with specified tag and subtag.
122  *
123  * In fact it creates an static constant expression that identifies the point in the code and
124  * contains tag and subtag of the involved timer and its hash. Then it creates local variable that
125  * calls @p Profiler::start_timer() in constructor and @p Profiler::stop_timer() in destructor.
126  * This way the timer is automatically closed at the end of current block.
127  *
128  * ATTENTION: This macro expands to two statements so following code is illegal:
129  * @code
130  * if (some_condition) START_TIMER_EXT(tag, subtag);
131  * @endcode
132  */
133 #ifdef FLOW123D_DEBUG_PROFILER
134 #define START_TIMER_EXT(tag, subtag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT_EXT(tag, subtag); TimerFrame PASTE(timer_,__LINE__) = TimerFrame( PASTE(cp_,__LINE__) )
135 #else
136 #define START_TIMER_EXT(tag, subtag)
137 #endif
138 
139 /**
140  * \def END_TIMER(tag)
141  *
142  * @brief Ends a timer with specified tag.
143  *
144  * Use only if you want to end timer before the end of block. Again this expands into two lines, see ATTENTION in previous macro.
145  */
146 #ifdef FLOW123D_DEBUG_PROFILER
147 #define END_TIMER(tag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT(tag); Profiler::instance()->stop_timer( PASTE(cp_,__LINE__) )
148 #else
149 #define END_TIMER(tag)
150 #endif
151 
152 /**
153  * \def START_MEMORY_MONITORING
154  *
155  * @brief Allow to use monitoring only in part of code.
156  */
157 #ifdef FLOW123D_DEBUG_PROFILER
158 #define START_MEMORY_MONITORING Profiler::instance()->start_memory_monitoring()
159 #else
160 #define START_MEMORY_MONITORING
161 #endif
162 
163 /**
164  * \def END_START_TIMER(tag)
165  *
166  * Ends current timer and starts the new one with given tag. Again this expands into two lines, see ATTENTION in previous macro.
167  */
168 #ifdef FLOW123D_DEBUG_PROFILER
169 #define END_START_TIMER(tag) Profiler::instance()->stop_timer(); START_TIMER(tag);
170 #else
171 #define END_START_TIMER(tag)
172 #endif
173 
174 
175 /**
176  * \def ADD_CALLS(n_calls)
177  *
178  * @brief Increase number of calls in actual timer.
179  *
180  * Some time you want to measure a performance of a cycle with body that is below resolution of the Timer implementation.
181  * If you know number of cycles, you can use this macro in following way:
182  *
183  * @code
184  * START_TIMER("cycle");
185  * unsigned int i;
186  * for(i =0; i<1000000; i++) i*i*i;
187  * ADD_CALLS(i);
188  * END_TIMER("cycle");
189  * @endcode
190  *
191  * In the profiler report you get the total time spent in the cycle, and time per one call which will be average
192  * time spent in the body of the cycle.
193  */
194 #ifdef FLOW123D_DEBUG_PROFILER
195 #define ADD_CALLS(n_calls) Profiler::instance()->add_calls(n_calls)
196 #else
197 #define ADD_CALLS(n_calls)
198 #endif
199 
200 
201 #ifdef FLOW123D_DEBUG_PROFILER
202 #define CUMUL_TIMER(tag) Profiler::instance()->find_timer(tag).cumulative_time()
203 #else
204 #define CUMUL_TIMER(tag) 0
205 #endif
206 
207 //////////////////////////////////////////////////////////////////////////////////////////////
208 #ifdef FLOW123D_DEBUG_PROFILER
209 
210 /**
211  * Variable which represents value when no subtag was specified in CodePoint class
212  */
213 #define PROFILER_EMPTY_SUBTAG ""
214 
215 /**
216  * Variable used for default value in hash process
217  */
218 #define PROFILER_HASH_DEFAULT 0
219 
220 /**
221  * @brief Function for compile-time hash computation. (Needs C++x11 standard.)
222  * Input, @p str, is constant null terminated string, result is unsigned int (usually 4 bytes).
223  * Function has to be recursive, since standard requires that the body consists only from the return statement.
224  *
225  * SALT is hash for the empty string. Currently zero for simpler testing.
226  */
227 inline CONSTEXPR_ unsigned int str_hash(const char * str, unsigned int default_value) {
228  #define SALT 0 //0xef50e38f
229  return (*str == 0 ? SALT : default_value + str_hash(str+1, PROFILER_HASH_DEFAULT) * 101 + (unsigned int)(*str) );
230 }
231 
232 /**
233  * Macro to generate constexpr CodePoint object.
234  */
235 #define CODE_POINT(tag) CodePoint(tag, __FILE__, __func__, __LINE__)
236 
237 /**
238  * Macro to generate constexpr CodePoint object.
239  */
240 #define CODE_POINT_EXT(tag, subtag) CodePoint(tag, subtag, __FILE__, __func__, __LINE__)
241 
242 
243 
244 
245 /**
246  * @brief Class that represents point in the code.
247  *
248  * This class allow construction at compile time. And includes the information about the code point as well
249  * as the 'tag' of the timer and cimpile-time computed hashes of this 'tag'. The @p hash_ is long one with
250  * very small probability of collisions - this we use for comparison of tags. The @p hash_idx_ is the long hash modulo
251  * length of the array of Timer's children, this is used for fast loop up into this array that servers as a simple hash table.
252  */
253 class CodePoint {
254 public:
255  CONSTEXPR_ CodePoint(const char *tag, const char * file, const char * func, const unsigned int line)
256  : tag_(tag), subtag_(PROFILER_EMPTY_SUBTAG), file_(file), func_(func), line_(line),
257  hash_(str_hash(tag, PROFILER_HASH_DEFAULT)),
258  hash_idx_( str_hash(tag, PROFILER_HASH_DEFAULT)%max_n_timer_childs )
259  {};
260  CONSTEXPR_ CodePoint(const char *tag, const char *subtag, const char * file, const char * func, const unsigned int line)
261  : tag_(tag), subtag_(subtag), file_(file), func_(func), line_(line),
262  hash_(str_hash(subtag, str_hash(tag, PROFILER_HASH_DEFAULT))),
263  hash_idx_( str_hash(subtag, str_hash(tag, PROFILER_HASH_DEFAULT))%max_n_timer_childs )
264  {};
265 
266  /// Size of child arrays in timer nodes.
267  static const unsigned int max_n_timer_childs=13;
268 
269  /// Tag of the code point.
270  const char * const tag_;
271 
272  /// Subtag of the code point.
273  const char * const subtag_;
274 
275  /// file name of the code point
276  const char * const file_;
277 
278  /// file name of the code point
279  const char * const func_;
280 
281  /// file name of the code point
282  const unsigned int line_;
283 
284  /// Full 32-bit hash of the tag ( practically no chance of collision)
285  unsigned int hash_;
286 
287  /// Hash modulo size of array of timer childs ( we have to check full hash to prevent collision)
288  unsigned int hash_idx_;
289 };
290 
291 
292 
293 /**
294  * @brief Class for profiling tree nodes.
295  *
296  * One Timer represents one particular time frame in the execution tree.
297  * It collects information about total time, number of calls, allocated and deallocated memory.
298  *
299  * It should be accessed only through Profiler, which is its friend class.
300  *
301  * TODO: for better performance: move copy hash_ and hash_idx_ into Timer since CodePoint are in static
302  * variables, that may be slower to acces.
303  *
304  */
305 class Timer {
306 
307 
308 public:
309  /// Size of array @p child_timers, the hash table containing descendants in the call tree.
310  static const unsigned int max_n_childs=CodePoint::max_n_timer_childs;
311 
312  /**
313  * Creates the timer node object. Should not be called directly, but through the START_TIMER macro.
314  */
315  Timer(const CodePoint &cp, int parent);
316 
317 
318  /**
319  * Start the timer. If it is already started, just increase number of starts (recursions) and calls.
320  */
321  void start();
322 
323  /**
324  * If number of starts (recursions) drop back to zero, we stop the timer and add the period to the cumulative time.
325  * This method do not take care of its childs (it has no access to the other timers).
326  * When the parameter 2p forced is 'true', we stop the timer immediately regardless the number of recursions.
327  * Returns true if the timer is not closed (recursions didn't drop to zero yet).
328  */
329  bool stop(bool forced = false);
330 
331 
332  /// Getter for the 'tag'.
333  inline string tag() const {
334  string buf(code_point_->tag_);
335  buf.append(code_point_->subtag_);
336  return buf;
337  }
338 
339  /// Returns true if the timer is open, number of starts (recursions) is nonzero.
340  inline bool running() const
341  { return start_count >0; }
342 
343  /// Returns string with description of the code point where the timer was first started.
344  std::string code_point_str() const;
345 
346  /**
347  * Returns cumulative time of the timer in seconds.
348  */
349  double cumulative_time() const;
350 
351  /*
352  * Adds given index @p child_index of the timer @p child to the correct place in the hash table.
353  */
354  void add_child(int child_index, const Timer &child);
355 
356  /**
357  * Set memory monitoring flag to true
358  */
359  inline void set_turn_off_memory_monitoring() {
360  turn_off_memory_monitoring_ = true;
361  }
362 
363 
364 protected:
365 
366  /**
367  * Pauses current timer, save measured petsc memory information util resume.
368  * We get Petsc maximum memory usage via PetscMemoryGetMaximumUsage call
369  * and save this value into temp value. (we override local maximum if temp
370  * value is greater)
371  */
372  void pause();
373  /**
374  * Resume current timer. e tell Petsc to monitor the maximum memory
375  * usage once again. We call PetscMemorySetGetMaximumUsage so later in
376  * resume() method will PetscMemoryGetMaximumUsage method work.
377  */
378  void resume();
379 
380  /**
381  * Start time when frame opens.
382  */
383  TimePoint start_time;
384  /**
385  * Cumulative time spent in the frame.
386  */
387  double cumul_time;
388  /**
389  * Total number of opening of the frame.
390  */
391  unsigned int call_count;
392  /**
393  * Number of recursive openings.
394  */
395  unsigned int start_count;
396 
397 
398  /**
399  * Code point of the first START_TIMER for the particular tag. The 'tag' identifies timer
400  * and is used in reported profiler table.
401  */
402  const CodePoint *code_point_;
403  /// Full tag hash. Copy from code_point_.
404  unsigned int full_hash_;
405  /// Hash modulo size of array of timer childs. Copy from code_point_.
406  unsigned int hash_idx_;
407 
408  /**
409  * Index of the parent timer node in the tree. Negative value means 'not set'.
410  */
411  int parent_timer;
412  /**
413  * Indices of the child timers in the Profiler::timers_ vector. Negative values means 'not set'.
414  */
415  int child_timers[max_n_childs];
416 
417  /**
418  * Total number of bytes allocated in this frame. After
419  * Profiler::propagate_timers call will also contain values from children.
420  */
421  size_t total_allocated_;
422  /**
423  * Total number of bytes deallocated in this frame. After
424  * Profiler::propagate_timers call, will also contain values from children.
425  */
426  size_t total_deallocated_;
427  /**
428  * Maximum number of bytes allocated at one time in this frame. After
429  * Profiler::propagate_timers call, maximum value will be taken from this
430  * Timer and also from all children Timers.
431  */
432  size_t max_allocated_;
433  /**
434  * Current number of bytes allocated in this frame at the same time.
435  * This value is used to monitor maximum bytes allocated. When notify_free
436  * and notify_malloc is called this values is changed and new maximum
437  * is tested.
438  */
439  size_t current_allocated_;
440 
441  /**
442  * Number of times new/new[] operator was used in this scope
443  */
444  int alloc_called;
445  /**
446  * Number of times delete/delete[] operator was used in this scope
447  */
448  int dealloc_called;
449  /**
450  * True if memory monitoring was switched on by macro START_MEMORY_MONITORING.
451  */
452  bool turn_off_memory_monitoring_;
453 
454  #ifdef FLOW123D_HAVE_PETSC
455  /**
456  * Number of bytes used by Petsc at the start of time-frame
457  */
458  PetscLogDouble petsc_start_memory;
459  /**
460  * Number of bytes used by Petsc at the end of time-frame
461  */
462  PetscLogDouble petsc_end_memory;
463  /**
464  * Difference between start and end of a petsc memory usage
465  */
466  PetscLogDouble petsc_memory_difference;
467  /**
468  * Maximum amount of memory used that was PetscMalloc()ed at any time
469  * during this run.
470  *
471  * The memory usage reported here includes all Fortran arrays (that may be
472  * used in application-defined sections of code).
473  */
474  PetscLogDouble petsc_peak_memory;
475  /**
476  * Local maximum amount of memory used that was PetscMalloc()ed
477  * used during time-frame pause/resume. Auxilary variable for storing
478  * local memory used when pause is called.
479  */
480  PetscLogDouble petsc_local_peak_memory;
481  #endif // FLOW123D_HAVE_PETSC
482 
483  friend class Profiler;
484  friend std::ostream & operator <<(std::ostream&, const Timer&);
485 
486  /**
487  * if under unit testing, specify friend so protected members can be tested
488  */
489  #ifdef __UNIT_TEST__
490  friend ProfilerTest;
491  #endif /* __UNIT_TEST__ */
492 
493 };
494 
495 /*
496 struct SimpleTranslator {
497  typedef std::string internal_type;
498  typedef int external_type;
499 
500  // Converts a string to int
501  boost::optional<external_type> get_value(const internal_type& str) {
502  return boost::optional<external_type>(std::stoi(str));
503  }
504 
505  // Converts a bool to string
506  boost::optional<internal_type> put_value(const external_type& i){
507  return boost::optional<internal_type>(std::to_string(i));
508  }
509 };
510 
511 namespace boost {
512 namespace property_tree {
513 
514 template<typename Ch, typename Traits, typename Alloc>
515 struct translator_between<std::basic_string< Ch, Traits, Alloc >, int> {
516  typedef SimpleTranslator type;
517 };
518 
519 
520 } // namespace property_tree
521 } // namespace boost
522 */
523 /**
524  *
525  * @brief Main class for profiling by measuring time intervals.
526  *
527  * These time intervals form a tree structure where each interval is represented
528  * by a Timer object. The root node of the tree is automatically created and
529  * started after creating the Profiler object and cannot be stopped manually.
530  *
531  * The class implements a singleton pattern and all the functions are accessible trough
532  * Profiler::instance(), but in most cases the programmer will access the profiler
533  * functions via the #START_TIMER and #END_TIMER macros. The #START_TIMER macro
534  * is responsible for the fact that we don't have to call #END_TIMER macro to stop the timer and
535  * the timer will be stopped at the end of the block in which #START_TIMER was used.
536  * These macros internally use the TimerFrame objects and the programmer should
537  * not use the TimerFrame objects directly.
538  *
539  * By using #SET_TIMER_SUBFRAMES macro, the programmer can specify the number of subframes (eg. iterations)
540  * for the currently active timer.
541  *
542  *
543  * Currently the Profiler system is not thread safe. No idea how to do this.
544  *
545  */
546 class Profiler {
547 public:
548  /**
549  * Returns unique Profiler object.
550  * if clear flag is set, will delete profiiler isntance
551  */
552  static Profiler* instance(bool clear = false);
553  /**
554  * Sets task specific information. The string @p description with textual description of the task and the
555  * number of elements of the mesh (parameter @p size). This is used for weak scaling graphs so it should
556  * measure size of the task of the same type (same description).
557  *
558  */
559  void set_task_info(string description, int size);
560  /**
561  * Sets informations about program version. This consists of @p program_version (includes program name), @p branch in the repository or rather full URL of the branch,
562  * and SVN @p revision (or hash for GIT).
563  *
564  */
565  void set_program_info(string program_name, string program_version, string branch, string revision, string build);
566 
567 
568  /**
569  * Starts a timer with code point, tag and hashes specified by CodePoint object @p cp.
570  * If the timer is not already created, it creates a new one. It returns index of
571  * the actual timer.
572  */
573  int start_timer(const CodePoint &cp);
574  /**
575  * Stops actual timer. It check if the hash of given code point match hash of the
576  * tag of actual timer node. If not we print out warning and try to find the correct tag
577  * towards the tree root closing all nodes we pass through.
578  *
579  * If FLOW123D_DEBUG is set, we check that all children are closed.
580  */
581  void stop_timer(const CodePoint &cp);
582 
583  /**
584  * Stop timer with index given by @p timer_index. If this is not equal to @p actual_node, we
585  * traverse the tree towards root while force closing nodes by the way.
586  *
587  * Negative @p timer_index means close @p actual_node
588  */
589  void stop_timer(int timer_index = -1);
590 
591  /**
592  * Start memory monitoring if it is switched off.
593  *
594  * Memory monitoring is automatically turn off at the end of actual tag.
595  */
596  void start_memory_monitoring();
597 
598  /**
599  * Adds @p n_calls - 1 to the total number of calls of the current timer. Minus one, since one call is counted when
600  * timer was started. You should use macro ADD_CALLS above.
601  */
602  void add_calls(unsigned int n_calls);
603  /**
604  * Notification about allocation of given size.
605  * Increase total allocated memory in current profiler frame.
606  */
607  void notify_malloc(const size_t size, const long p);
608  /**
609  * Notification about freeing memory of given size.
610  * Increase total deallocated memory in current profiler frame.
611  */
612  void notify_free(const long p);
613 
614  /**
615  * Return average profiler timer resolution in seconds
616  * based on 100 measurements
617  */
618  static double get_resolution ();
619 
620  /**
621  * Find a first timer matching the tag.
622  * O(n) complexity.
623  */
624  Timer find_timer(string tag);
625 
626 
627 
628 
629 #ifdef FLOW123D_HAVE_MPI
630  /**
631  * @brief Output current timing information into the given stream.
632  *
633  * COLECTIVE - all processes in the communicator have to call this
634  * method. All timers are finished, all processes are synchronized, collect
635  * profiling informations are collected and written to the given stream.
636  *
637  * Pass through the profiling tree (collective over processors)
638  * Print cumulative times average, balance (max/min), count (denote differences)
639  *
640  */
641  void output(MPI_Comm comm, std::ostream &os);
642 
643  /**
644  * Same as previous, but output to the file with default name: "profiler_info_YYMMDD_HH::MM:SS.log".
645  * Empty body if macro FLOW123D_DEBUG_PROFILER is not defined.
646  */
647  string output(MPI_Comm comm, string profiler_path = "");
648 
649 #endif /* FLOW123D_HAVE_MPI */
650  /**
651  * @brief Output current timing information into the given stream.
652  *
653  * It temporally stops all timers, synchronize all processes, collect
654  * profiling informations and write it to the given stream.
655  *
656  * Pass through the profiling tree (collective over processors)
657  * Print cumulative times average, balance (max/min), count (denote differences)
658  *
659  */
660  void output(std::ostream &os);
661 
662  /**
663  * Same as previous, but output to the file with default name: "profiler_info_YYMMDD_HH::MM:SS.log".
664  * Empty body if macro FLOW123D_DEBUG_PROFILER is not defined.
665  */
666  string output(string profiler_path = "");
667 
668  /**
669  * Method will transform last profiler json file to desired format
670  */
671 // void transform_profiler_data (const string &output_file_suffix, const string &formatter);
672  /**
673  * Stop all timers and destroys the Profiler object.
674  * If you want some output call @p output method just before.
675  */
676  static void uninitialize();
677 
678  /**
679  * Class-specific allocation function new. Called by the usual
680  * single-object new-expressions if allocating an object of type Profiler.
681  */
682  static void* operator new (size_t sz);
683  /**
684  * Class-specific allocation function delete. Deallocates storage
685  * previously allocated by a matching operator new. These deallocation
686  * functions are called by delete-expressions.
687  */
688  static void operator delete (void* p);
689  /// Sized deallocator, doesthe same as operator delete (void* p)
690  static void operator delete (void* p, std::size_t);
691 
692  /**
693  * Public setter to turn on/off memory monitoring
694  * @param global_monitor whether to turn global monitoring on or off
695  * @param petsc_monitor petsc monitoring
696  */
697  void static set_memory_monitoring(const bool global_monitor);
698 
699  /**
700  * Public getter to memory monitoring
701  * @return memory monitoring status
702  */
703  inline bool static get_global_memory_monitoring() {
704  return global_monitor_memory;
705  }
706 
707  /**
708  * Public getter to petsc memory monitoring
709  * @return memory monitoring status
710  */
711  inline bool static get_petsc_memory_monitoring() {
712  return petsc_monitor_memory;
713  }
714 
715  /**
716  * Run calibration frame "UNIT PAYLOAD".
717  * That should be about 100x timer resolution.
718  */
719  void calibrate();
720 
721  /**
722  * Time of a unit payload, result of a single measurement. Can be used for raw calibration.
723  */
724  double calibration_time() {
725  if (calibration_time_ < 0) calibrate();
726  return calibration_time_;
727  }
728  /**
729  * if under unit testing, specify friend so protected members can be tested
730  */
731  #ifdef __UNIT_TEST__
732  friend ProfilerTest;
733  #endif /* __UNIT_TEST__ */
734 
735 
736 protected:
737 
738  /**
739  * Whether to monitor operator 'new/delete'
740  */
741  static bool global_monitor_memory;
742 
743  /**
744  * Whether to monitor petsc memory usage
745  */
746  static bool petsc_monitor_memory;
747 
748  /**
749  * When creating Profiler also reserve some bytes in malloc_map so overhead
750  * of creating single items is lowered. This value is passed as parameter in
751  * map.reserve() method so it indicates how many objects (pointers) are
752  * allocated at first.
753  */
754  static const long malloc_map_reserve;
755 
756 
757  /**
758  * Method will propagate values from children timers to its parents
759  */
760  void propagate_timers ();
761 
762  /**
763  * Method for exchanging metrics from child timer to its parent timer
764  */
765  void accept_from_child (Timer &parent, Timer &child);
766 
767  /**
768  * Try to find timer with tag (in fact only its 32-bit hash) from given code point @p cp.
769  * Returns -1 if it is not found otherwise it returns its index.
770  */
771  int find_child(const CodePoint &cp);
772 
773 
774  /**
775  * Method will prepare construct specific details about the run (time start and time end)
776  * and write them along with basic informations about the run (name, description, ...)
777  * into nlohmann:json object
778  */
779  void output_header (nlohmann::json &root, int mpi_size);
780 
781  /**
782  * Open a new file for profiler output with default name based on the
783  * actual time and date. Returns a pointer to the stream of the output file.
784  */
785  //std::shared_ptr<std::ostream> get_output_stream(string path);
786 
787  /// Vector of all timers. Whole tree is stored in this array.
789 
790  /// Index of the actual timer node. Negative value means 'unset'.
791  unsigned int actual_node;
792 
793  /// MPI communicator used for final reduce of the timer node tree.
794  //MPI_Comm communicator_;
795  /// MPI_rank
796  //int mpi_rank_;
797 
798  /**
799  * flag indicating that collection of timer details will be
800  * using MPI
801  bool mpi_used;
802  */
803  // header informations
804 
805  /// Some measure of the size of the task in the set of the tasks that differs
806  /// only by size - used for scaling tests.
807  int task_size_;
808  /// Task description and identifier in possible database of all Profiler results.
809  string task_description_;
810  /// Time and date of the start of the task solution. In fact start of the Profiler.
811  time_t start_time;
812 
813  /// Name of the program.
814  string flow_name_;
815  /// Version of the program.
816  string flow_version_;
817  /// Http address of the branch in a repository.
818  string flow_branch_;
819  /// Revision or GIT hash.
820  string flow_revision_;
821  /// Build date and time.
822  string flow_build_;
823  /// Variable which stores last json log filepath
824  //string json_filepath;
825 
826  Timer none_timer_;
827 
828  /// Time of a unit payload, result of single measurement. Can be used for raw calibration.
829  double calibration_time_;
830 
831 protected:
832  /**
833  * Use DFS to pass through the tree and collect information about all timers reduced from the processes in the communicator.
834  * For every timer the information strings are stored in the struct TimerInfo in order to pad fields correctly
835  * to have alligned columns on the output. The alligning is performed in the output() method.
836  */
837  template<typename ReduceFunctor>
838  void add_timer_info(ReduceFunctor reduce, nlohmann::json* node, int timer_idx, double parent_time);
839 
840  //Profiler(MPI_Comm comm); // private constructor
841  Profiler(); // private constructor
842  Profiler(Profiler const&); // copy constructor is private
843  Profiler & operator=(Profiler const&); // assignment operator is private
844 
845 };
846 
847 
848 
849 
850 
851 
852 /**
853  *
854  * @brief Class for automatic timer closing. This class is used by #START_TIMER macro
855  * and is responsible for the fact that we don't have to call #END_TIMER macro to stop the timer,
856  * the timer will be stopped at the end of the block in which #START_TIMER was used.
857  *
858  * The main idea of the approach described is that the TimerFrame variable will be destroyed
859  * at the end of the block where #START_TIMER macro was used. In order to work properly
860  * in situations where #END_TIMER was used to stop the timer manually before (but there is still the
861  * variable which will be later destroyed), we have to store references to these variables and
862  * destroy them on-demand.
863  *
864  * TODO:
865  * Should only contain pointer to the Timer. And destructor, that close the timer.
866  */
867 class TimerFrame {
868 private:
869  int const timer_index_;
870 public:
871  inline TimerFrame(const CodePoint &cp)
872  : timer_index_( Profiler::instance()->start_timer(cp) )
873  {}
874 
875  ~TimerFrame() {
876  Profiler::instance()->stop_timer(timer_index_);
877  }
878 };
879 
880 
881 /**
882  * Simple class providing static map variable storing address and alloc size
883  */
884 // gcc version 4.9 and lower has following bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59751
885 // fix in version 4.9: https://gcc.gnu.org/gcc-4.9/changes.html#cxx
886 // typedef unordered_map<long, int, hash<long>, equal_to<long>, internal::SimpleAllocator<pair<const long, int>>> unordered_map_with_alloc;
887 typedef std::unordered_map<long, int, boost::hash<long>, equal_to<long>, internal::SimpleAllocator<std::pair<const long, int>>> unordered_map_with_alloc;
888 class MemoryAlloc {
889 public:
890  /**
891  * Create static map containing <allocation address, allocation size> pairs
892  * map is used for storing allocations and deallocations of all object not
893  * related to profiler after profiler initialization phase
894  */
895  static unordered_map_with_alloc & malloc_map();
896 };
897 
898 
899 
900 
901 #else // FLOW123D_DEBUG_PROFILER
902 
903 
904 // dummy declaration of Profiler class
905 class Profiler {
906 public:
907  static Profiler* instance(bool clear = false);
908 
909  void set_task_info(string, int)
910  {}
911  void set_program_info(string, string, string, string, string)
912  {}
913  void notify_malloc(const size_t )
914  {}
915  void notify_free(const size_t )
916  {}
917  void output(MPI_Comm, ostream &)
918  {}
919  string output(MPI_Comm, string)
920  {return "";}
921  void output(std::ostream &)
922  {}
923  string output(string)
924  {return "";}
925 // void output(MPI_Comm)
926 // {}
927 // string output()
928 // {}
929 // void transform_profiler_data(const string &, const string &)
930 // {}
931  double get_resolution () const
932  { return 0.0; }
933  const char *actual_tag() const
934  { return NULL; }
935  inline unsigned int actual_count() const
936  { return 0; }
937  inline double actual_cumulative_time() const
938  { return 0.0; }
939  static void uninitialize();
940  void calibrate();
941  double calibration_time() {
942  return -2;
943  }
944  static void set_memory_monitoring(bool)
945  {}
946 private:
947  Profiler() {}
948 };
949 
950 
951 
952 
953 #endif
954 
955 
956 #endif
957 
static int sum(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:42
static int min(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:60
static int max(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:78
double calibration_time()
void notify_malloc(const size_t)
void output(MPI_Comm, ostream &)
void notify_free(const size_t)
static void set_memory_monitoring(bool)
static Profiler * instance(bool clear=false)
double actual_cumulative_time() const
void output(std::ostream &)
string output(string)
unsigned int actual_count() const
void set_program_info(string, string, string, string, string)
const char * actual_tag() const
string output(MPI_Comm, string)
void set_task_info(string, int)
void calibrate()
double get_resolution() const
Definition: memory.cc:33
a class to store JSON values
Definition: json.hpp:174
Global macros to enhance readability and debugging, general constants.
int MPI_Comm
Definition: mpi.h:141
#define CONSTEXPR_
Definition: sys_profiler.hh:86
STREAM & operator<<(STREAM &s, UpdateFlags u)