Flow123d  release_3.0.0-1210-g1d9e2bd
sys_profiler.hh
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file sys_profiler.hh
15  * @brief
16  * @todo
17  * - START_GLOBAL_TIMER(tag) - this calls the start_timer, which creates local timer on the correct place in the hierarchy,
18  * further this timer is added to the list of global timers, this contains groups of timers with same tag, and
19  * collect/sum data from these timers in the report.
20  *
21  * - Allow output even during calculation (not complete, but at least some thing)
22  * Report should contain time of start as well as time of creation of the report or time from start of the program.
23  *
24  * - When generating report we has to deal with possibly different trees at every MPI process.
25  *
26  * - test memory profiling
27  * in our own new and xmalloc functions - register allocatied and deallocated memory to active Profiler frame.
28  *
29  * - test in parallel
30  * - extended output:
31  * cas na jedno volani (jina redukce nez pro kumulativni cas, pokud je pocet volani ruzny)
32  * procenta vuci predkovi
33  * code point (az nekde na konci radky)
34  *
35  *
36  * !!! Unfortunately using constexpr is worse (without optimization).
37  * This is probably due to use of static variable for
38  * CodePoint, the access could be slow, and computation of hash is done only once. Actually timing results
39  * are:
40  *
41  * OPTIONS OVERHEAD (compared to call 2x clock())
42  * -g, no c++11 : 18%
43  * -g, c++11 : 60%
44  * -O3,no c++11 : 6%
45  * -O3, c++11 : 6%
46  */
47 
48 #ifndef PROFILER_H
49 #define PROFILER_H
50 
51 #include "global_defs.h"
52 
53 #include <mpi.h>
54 #include <ostream>
55 namespace boost { template <class T> struct hash; }
56 #include <boost/functional/hash/hash.hpp> // for hash
57 #include <boost/ref.hpp>
58 #include <boost/tuple/detail/tuple_basic.hpp> // for get
59 #include <boost/unordered/unordered_map.hpp> // for unordered_map
60 #include <nlohmann/json.hpp>
61 
62 #include "time_point.hh"
63 #include "petscsys.h"
64 #include "simple_allocator.hh"
65 
66 //instead of #include "mpi.h"
67 //mpi declarations follows:
69 public:
70  static int sum(int* val, MPI_Comm comm);
71  static double sum(double* val, MPI_Comm comm);
72  static long sum(long* val, MPI_Comm comm);
73 
74  static int min(int* val, MPI_Comm comm);
75  static double min(double* val, MPI_Comm comm);
76  static long min(long* val, MPI_Comm comm);
77 
78  static int max(int* val, MPI_Comm comm);
79  static double max(double* val, MPI_Comm comm);
80  static long max(long* val, MPI_Comm comm);
81 };
82 
83 // Assuming all compilers support constexpr
84 #define CONSTEXPR_ constexpr
85 
86 using namespace std;
87 
88 
89 // These helper macros are necessary due to use of _LINE_ variable in START_TIMER macro.
90 #define _PASTE(a,b) a ## b
91 #define PASTE(a,b) _PASTE(a, b)
92 
93 
94 
95 /**
96  * \def START_TIMER(tag)
97  *
98  * @brief Starts a timer with specified tag.
99  *
100  * In fact it creates an static constant expression that identifies the point in the code and
101  * contains tag of the involved timer and its hash. Then it creates local variable that
102  * calls @p Profiler::start_timer() in constructor and @p Profiler::stop_timer() in destructor.
103  * This way the timer is automatically closed at the end of current block.
104  *
105  * ATTENTION: This macro expands to two statements so following code is illegal:
106  * @code
107  * if (some_condition) START_TIMER(tag);
108  * @endcode
109  */
110 #ifdef FLOW123D_DEBUG_PROFILER
111 #define START_TIMER(tag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT(tag); TimerFrame PASTE(timer_,__LINE__) = TimerFrame( PASTE(cp_,__LINE__) )
112 #else
113 #define START_TIMER(tag)
114 #endif
115 
116 /**
117  * \def START_TIMER_EXT (tag, subtag)
118  *
119  * @brief Starts a timer with specified tag and subtag.
120  *
121  * In fact it creates an static constant expression that identifies the point in the code and
122  * contains tag and subtag of the involved timer and its hash. Then it creates local variable that
123  * calls @p Profiler::start_timer() in constructor and @p Profiler::stop_timer() in destructor.
124  * This way the timer is automatically closed at the end of current block.
125  *
126  * ATTENTION: This macro expands to two statements so following code is illegal:
127  * @code
128  * if (some_condition) START_TIMER_EXT(tag, subtag);
129  * @endcode
130  */
131 #ifdef FLOW123D_DEBUG_PROFILER
132 #define START_TIMER_EXT(tag, subtag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT_EXT(tag, subtag); TimerFrame PASTE(timer_,__LINE__) = TimerFrame( PASTE(cp_,__LINE__) )
133 #else
134 #define START_TIMER_EXT(tag, subtag)
135 #endif
136 
137 /**
138  * \def END_TIMER(tag)
139  *
140  * @brief Ends a timer with specified tag.
141  *
142  * Use only if you want to end timer before the end of block. Again this expands into two lines, see ATTENTION in previous macro.
143  */
144 #ifdef FLOW123D_DEBUG_PROFILER
145 #define END_TIMER(tag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT(tag); Profiler::instance()->stop_timer( PASTE(cp_,__LINE__) )
146 #else
147 #define END_TIMER(tag)
148 #endif
149 
150 /**
151  * \def END_START_TIMER(tag)
152  *
153  * Ends current timer and starts the new one with given tag. Again this expands into two lines, see ATTENTION in previous macro.
154  */
155 #ifdef FLOW123D_DEBUG_PROFILER
156 #define END_START_TIMER(tag) Profiler::instance()->stop_timer(); START_TIMER(tag);
157 #else
158 #define END_START_TIMER(tag)
159 #endif
160 
161 
162 /**
163  * \def ADD_CALLS(n_calls)
164  *
165  * @brief Increase number of calls in actual timer.
166  *
167  * Some time you want to measure a performance of a cycle with body that is below resolution of the Timer implementation.
168  * If you know number of cycles, you can use this macro in following way:
169  *
170  * @code
171  * START_TIMER("cycle");
172  * unsigned int i;
173  * for(i =0; i<1000000; i++) i*i*i;
174  * ADD_CALLS(i);
175  * END_TIMER("cycle");
176  * @endcode
177  *
178  * In the profiler report you get the total time spent in the cycle, and time per one call which will be average
179  * time spent in the body of the cycle.
180  */
181 #ifdef FLOW123D_DEBUG_PROFILER
182 #define ADD_CALLS(n_calls) Profiler::instance()->add_calls(n_calls)
183 #else
184 #define ADD_CALLS(n_calls)
185 #endif
186 
187 
188 
189 
190 //////////////////////////////////////////////////////////////////////////////////////////////
191 #ifdef FLOW123D_DEBUG_PROFILER
192 
193 /**
194  * Variable which represents value when no subtag was specified in CodePoint class
195  */
196 #define PROFILER_EMPTY_SUBTAG ""
197 
198 /**
199  * Variable used for default value in hash process
200  */
201 #define PROFILER_HASH_DEFAULT 0
202 
203 /**
204  * @brief Function for compile-time hash computation. (Needs C++x11 standard.)
205  * Input, @p str, is constant null terminated string, result is unsigned int (usually 4 bytes).
206  * Function has to be recursive, since standard requires that the body consists only from the return statement.
207  *
208  * SALT is hash for the empty string. Currently zero for simpler testing.
209  */
210 inline CONSTEXPR_ unsigned int str_hash(const char * str, unsigned int default_value) {
211  #define SALT 0 //0xef50e38f
212  return (*str == 0 ? SALT : default_value + str_hash(str+1, PROFILER_HASH_DEFAULT) * 101 + (unsigned int)(*str) );
213 }
214 
215 /**
216  * Macro to generate constexpr CodePoint object.
217  */
218 #define CODE_POINT(tag) CodePoint(tag, __FILE__, __func__, __LINE__)
219 
220 /**
221  * Macro to generate constexpr CodePoint object.
222  */
223 #define CODE_POINT_EXT(tag, subtag) CodePoint(tag, subtag, __FILE__, __func__, __LINE__)
224 
225 
226 
227 
228 /**
229  * @brief Class that represents point in the code.
230  *
231  * This class allow construction at compile time. And includes the information about the code point as well
232  * as the 'tag' of the timer and cimpile-time computed hashes of this 'tag'. The @p hash_ is long one with
233  * very small probability of collisions - this we use for comparison of tags. The @p hash_idx_ is the long hash modulo
234  * length of the array of Timer's children, this is used for fast loop up into this array that servers as a simple hash table.
235  */
236 class CodePoint {
237 public:
238  CONSTEXPR_ CodePoint(const char *tag, const char * file, const char * func, const unsigned int line)
239  : tag_(tag), subtag_(PROFILER_EMPTY_SUBTAG), file_(file), func_(func), line_(line),
240  hash_(str_hash(tag, PROFILER_HASH_DEFAULT)),
241  hash_idx_( str_hash(tag, PROFILER_HASH_DEFAULT)%max_n_timer_childs )
242  {};
243  CONSTEXPR_ CodePoint(const char *tag, const char *subtag, const char * file, const char * func, const unsigned int line)
244  : tag_(tag), subtag_(subtag), file_(file), func_(func), line_(line),
245  hash_(str_hash(subtag, str_hash(tag, PROFILER_HASH_DEFAULT))),
246  hash_idx_( str_hash(subtag, str_hash(tag, PROFILER_HASH_DEFAULT))%max_n_timer_childs )
247  {};
248 
249  /// Size of child arrays in timer nodes.
250  static const unsigned int max_n_timer_childs=13;
251 
252  /// Tag of the code point.
253  const char * const tag_;
254 
255  /// Subtag of the code point.
256  const char * const subtag_;
257 
258  /// file name of the code point
259  const char * const file_;
260 
261  /// file name of the code point
262  const char * const func_;
263 
264  /// file name of the code point
265  const unsigned int line_;
266 
267  /// Full 32-bit hash of the tag ( practically no chance of collision)
268  unsigned int hash_;
269 
270  /// Hash modulo size of array of timer childs ( we have to check full hash to prevent collision)
271  unsigned int hash_idx_;
272 };
273 
274 
275 
276 /**
277  * @brief Class for profiling tree nodes.
278  *
279  * One Timer represents one particular time frame in the execution tree.
280  * It collects information about total time, number of calls, allocated and deallocated memory.
281  *
282  * It should be accessed only through Profiler, which is its friend class.
283  *
284  * TODO: for better performance: move copy hash_ and hash_idx_ into Timer since CodePoint are in static
285  * variables, that may be slower to acces.
286  *
287  */
288 class Timer {
289 
290 
291 public:
292  /// Size of array @p child_timers, the hash table containing descendants in the call tree.
293  static const unsigned int max_n_childs=CodePoint::max_n_timer_childs;
294 
295  /**
296  * Creates the timer node object. Should not be called directly, but through the START_TIMER macro.
297  */
298  Timer(const CodePoint &cp, int parent);
299 
300 
301  /**
302  * Start the timer. If it is already started, just increase number of starts (recursions) and calls.
303  */
304  void start();
305 
306  /**
307  * If number of starts (recursions) drop back to zero, we stop the timer and add the period to the cumulative time.
308  * This method do not take care of its childs (it has no access to the other timers).
309  * When the parameter 2p forced is 'true', we stop the timer immediately regardless the number of recursions.
310  * Returns true if the timer is not closed (recursions didn't drop to zero yet).
311  */
312  bool stop(bool forced = false);
313 
314 
315  /// Getter for the 'tag'.
316  inline string tag() const {
317  string buf(code_point_->tag_);
318  buf.append(code_point_->subtag_);
319  return buf;
320  }
321 
322  /// Returns true if the timer is open, number of starts (recursions) is nonzero.
323  inline bool running() const
324  { return start_count >0; }
325 
326  /// Returns string with description of the code point where the timer was first started.
327  std::string code_point_str() const;
328 
329  /**
330  * Returns cumulative time of the timer in seconds.
331  */
332  double cumulative_time() const;
333 
334  /*
335  * Adds given index @p child_index of the timer @p child to the correct place in the hash table.
336  */
337  void add_child(int child_index, const Timer &child);
338 
339 
340 protected:
341 
342  /**
343  * Pauses current timer, save measured petsc memory information util resume.
344  * We get Petsc maximum memory usage via PetscMemoryGetMaximumUsage call
345  * and save this value into temp value. (we override local maximum if temp
346  * value is greater)
347  */
348  void pause();
349  /**
350  * Resume current timer. e tell Petsc to monitor the maximum memory
351  * usage once again. We call PetscMemorySetGetMaximumUsage so later in
352  * resume() method will PetscMemoryGetMaximumUsage method work.
353  */
354  void resume();
355 
356  /**
357  * Start time when frame opens.
358  */
359  TimePoint start_time;
360  /**
361  * Cumulative time spent in the frame.
362  */
363  double cumul_time;
364  /**
365  * Total number of opening of the frame.
366  */
367  unsigned int call_count;
368  /**
369  * Number of recursive openings.
370  */
371  unsigned int start_count;
372 
373 
374  /**
375  * Code point of the first START_TIMER for the particular tag. The 'tag' identifies timer
376  * and is used in reported profiler table.
377  */
378  const CodePoint *code_point_;
379  /// Full tag hash. Copy from code_point_.
380  unsigned int full_hash_;
381  /// Hash modulo size of array of timer childs. Copy from code_point_.
382  unsigned int hash_idx_;
383 
384  /**
385  * Index of the parent timer node in the tree. Negative value means 'not set'.
386  */
387  int parent_timer;
388  /**
389  * Indices of the child timers in the Profiler::timers_ vector. Negative values means 'not set'.
390  */
391  int child_timers[max_n_childs];
392 
393  /**
394  * Total number of bytes allocated in this frame. After
395  * Profiler::propagate_timers call will also contain values from children.
396  */
397  size_t total_allocated_;
398  /**
399  * Total number of bytes deallocated in this frame. After
400  * Profiler::propagate_timers call, will also contain values from children.
401  */
402  size_t total_deallocated_;
403  /**
404  * Maximum number of bytes allocated at one time in this frame. After
405  * Profiler::propagate_timers call, maximum value will be taken from this
406  * Timer and also from all children Timers.
407  */
408  size_t max_allocated_;
409  /**
410  * Current number of bytes allocated in this frame at the same time.
411  * This value is used to monitor maximum bytes allocated. When notify_free
412  * and notify_malloc is called this values is changed and new maximum
413  * is tested.
414  */
415  size_t current_allocated_;
416 
417  /**
418  * Number of times new/new[] operator was used in this scope
419  */
420  int alloc_called;
421  /**
422  * Number of times delete/delete[] operator was used in this scope
423  */
424  int dealloc_called;
425 
426  #ifdef FLOW123D_HAVE_PETSC
427  /**
428  * Number of bytes used by Petsc at the start of time-frame
429  */
430  PetscLogDouble petsc_start_memory;
431  /**
432  * Number of bytes used by Petsc at the end of time-frame
433  */
434  PetscLogDouble petsc_end_memory;
435  /**
436  * Difference between start and end of a petsc memory usage
437  */
438  PetscLogDouble petsc_memory_difference;
439  /**
440  * Maximum amount of memory used that was PetscMalloc()ed at any time
441  * during this run.
442  *
443  * The memory usage reported here includes all Fortran arrays (that may be
444  * used in application-defined sections of code).
445  */
446  PetscLogDouble petsc_peak_memory;
447  /**
448  * Local maximum amount of memory used that was PetscMalloc()ed
449  * used during time-frame pause/resume. Auxilary variable for storing
450  * local memory used when pause is called.
451  */
452  PetscLogDouble petsc_local_peak_memory;
453  #endif // FLOW123D_HAVE_PETSC
454 
455  friend class Profiler;
456  friend std::ostream & operator <<(std::ostream&, const Timer&);
457 
458  /**
459  * if under unit testing, specify friend so protected members can be tested
460  */
461  #ifdef __UNIT_TEST__
462  friend ProfilerTest;
463  #endif /* __UNIT_TEST__ */
464 
465 };
466 
467 /*
468 struct SimpleTranslator {
469  typedef std::string internal_type;
470  typedef int external_type;
471 
472  // Converts a string to int
473  boost::optional<external_type> get_value(const internal_type& str) {
474  return boost::optional<external_type>(std::stoi(str));
475  }
476 
477  // Converts a bool to string
478  boost::optional<internal_type> put_value(const external_type& i){
479  return boost::optional<internal_type>(std::to_string(i));
480  }
481 };
482 
483 namespace boost {
484 namespace property_tree {
485 
486 template<typename Ch, typename Traits, typename Alloc>
487 struct translator_between<std::basic_string< Ch, Traits, Alloc >, int> {
488  typedef SimpleTranslator type;
489 };
490 
491 
492 } // namespace property_tree
493 } // namespace boost
494 */
495 /**
496  *
497  * @brief Main class for profiling by measuring time intervals.
498  *
499  * These time intervals form a tree structure where each interval is represented
500  * by a Timer object. The root node of the tree is automatically created and
501  * started after creating the Profiler object and cannot be stopped manually.
502  *
503  * The class implements a singleton pattern and all the functions are accessible trough
504  * Profiler::instance(), but in most cases the programmer will access the profiler
505  * functions via the #START_TIMER and #END_TIMER macros. The #START_TIMER macro
506  * is responsible for the fact that we don't have to call #END_TIMER macro to stop the timer and
507  * the timer will be stopped at the end of the block in which #START_TIMER was used.
508  * These macros internally use the TimerFrame objects and the programmer should
509  * not use the TimerFrame objects directly.
510  *
511  * By using #SET_TIMER_SUBFRAMES macro, the programmer can specify the number of subframes (eg. iterations)
512  * for the currently active timer.
513  *
514  *
515  * Currently the Profiler system is not thread safe. No idea how to do this.
516  *
517  */
518 class Profiler {
519 public:
520 
521  /**
522  * Initializes the Profiler with specific MPI communicator object
523  */
524  //static void initialize(MPI_Comm communicator = MPI_COMM_WORLD);
525  static void initialize();
526  /**
527  * Returns unique Profiler object.
528  */
529  static Profiler* instance();
530  /**
531  * Sets task specific information. The string @p description with textual description of the task and the
532  * number of elements of the mesh (parameter @p size). This is used for weak scaling graphs so it should
533  * measure size of the task of the same type (same description).
534  *
535  */
536  void set_task_info(string description, int size);
537  /**
538  * Sets informations about program version. This consists of @p program_version (includes program name), @p branch in the repository or rather full URL of the branch,
539  * and SVN @p revision (or hash for GIT).
540  *
541  */
542  void set_program_info(string program_name, string program_version, string branch, string revision, string build);
543 
544 
545  /**
546  * Starts a timer with code point, tag and hashes specified by CodePoint object @p cp.
547  * If the timer is not already created, it creates a new one. It returns index of
548  * the actual timer.
549  */
550  int start_timer(const CodePoint &cp);
551  /**
552  * Stops actual timer. It check if the hash of given code point match hash of the
553  * tag of actual timer node. If not we print out warning and try to find the correct tag
554  * towards the tree root closing all nodes we pass through.
555  *
556  * If FLOW123D_DEBUG is set, we check that all children are closed.
557  */
558  void stop_timer(const CodePoint &cp);
559 
560  /**
561  * Stop timer with index given by @p timer_index. If this is not equal to @p actual_node, we
562  * traverse the tree towards root while force closing nodes by the way.
563  *
564  * Negative @p timer_index means close @p actual_node
565  */
566  void stop_timer(int timer_index = -1);
567 
568  /**
569  * Adds @p n_calls - 1 to the total number of calls of the current timer. Minus one, since one call is counted when
570  * timer was started. You should use macro ADD_CALLS above.
571  */
572  void add_calls(unsigned int n_calls);
573  /**
574  * Notification about allocation of given size.
575  * Increase total allocated memory in current profiler frame.
576  */
577  void notify_malloc(const size_t size, const long p);
578  /**
579  * Notification about freeing memory of given size.
580  * Increase total deallocated memory in current profiler frame.
581  */
582  void notify_free(const long p);
583 
584  /**
585  * Return average profiler timer resolution in seconds
586  * based on 100 measurements
587  */
588  static double get_resolution ();
589 
590 
591 #ifdef FLOW123D_HAVE_MPI
592  /**
593  * @brief Output current timing information into the given stream.
594  *
595  * COLECTIVE - all processes in the communicator have to call this
596  * method. All timers are finished, all processes are synchronized, collect
597  * profiling informations are collected and written to the given stream.
598  *
599  * Pass through the profiling tree (collective over processors)
600  * Print cumulative times average, balance (max/min), count (denote differences)
601  *
602  */
603  void output(MPI_Comm comm, std::ostream &os);
604 
605  /**
606  * Same as previous, but output to the file with default name: "profiler_info_YYMMDD_HH::MM:SS.log".
607  * Empty body if macro FLOW123D_DEBUG_PROFILER is not defined.
608  */
609  void output(MPI_Comm comm, string profiler_path = "");
610 
611 #endif /* FLOW123D_HAVE_MPI */
612  /**
613  * @brief Output current timing information into the given stream.
614  *
615  * It temporally stops all timers, synchronize all processes, collect
616  * profiling informations and write it to the given stream.
617  *
618  * Pass through the profiling tree (collective over processors)
619  * Print cumulative times average, balance (max/min), count (denote differences)
620  *
621  */
622  void output(std::ostream &os);
623 
624  /**
625  * Same as previous, but output to the file with default name: "profiler_info_YYMMDD_HH::MM:SS.log".
626  * Empty body if macro FLOW123D_DEBUG_PROFILER is not defined.
627  */
628  void output(string profiler_path = "");
629 
630  /**
631  * Method will transform last profiler json file to desired format
632  */
633  void transform_profiler_data (const string &output_file_suffix, const string &formatter);
634  /**
635  * Stop all timers and destroys the Profiler object.
636  * If you want some output call @p output method just before.
637  */
638  static void uninitialize();
639 
640  /**
641  * Class-specific allocation function new. Called by the usual
642  * single-object new-expressions if allocating an object of type Profiler.
643  */
644  static void* operator new (size_t sz);
645  /**
646  * Class-specific allocation function delete. Deallocates storage
647  * previously allocated by a matching operator new. These deallocation
648  * functions are called by delete-expressions.
649  */
650  static void operator delete (void* p);
651 
652  /**
653  * Public setter to turn on/off memory monitoring
654  * @param global_monitor whether to turn global monitoring on or off
655  * @param petsc_monitor petsc monitoring
656  */
657  void static set_memory_monitoring(const bool global_monitor, const bool petsc_monitor);
658 
659  /**
660  * Public getter to memory monitoring
661  * @return memory monitoring status
662  */
663  bool static get_global_memory_monitoring();
664 
665  /**
666  * Public getter to petsc memory monitoring
667  * @return memory monitoring status
668  */
669  bool static get_petsc_memory_monitoring();
670 
671  /**
672  * if under unit testing, specify friend so protected members can be tested
673  */
674  #ifdef __UNIT_TEST__
675  friend ProfilerTest;
676  #endif /* __UNIT_TEST__ */
677 
678 protected:
679 
680  /**
681  * Whether to monitor operator 'new/delete'
682  */
683  static bool global_monitor_memory;
684 
685  /**
686  * Whether to monitor petsc memory usage
687  */
688  static bool petsc_monitor_memory;
689 
690  /**
691  * When creating Profiler also reserve some bytes in malloc_map so overhead
692  * of creating single items is lowered. This value is passed as parameter in
693  * map.reserve() method so it indicates how many objects (pointers) are
694  * allocated at first.
695  */
696  static const long malloc_map_reserve;
697 
698  /**
699  * Method will propagate values from children timers to its parents
700  */
701  void propagate_timers ();
702 
703  /**
704  * Method for exchanging metrics from child timer to its parent timer
705  */
706  void accept_from_child (Timer &parent, Timer &child);
707 
708  /**
709  * Try to find timer with tag (in fact only its 32-bit hash) from given code point @p cp.
710  * Returns -1 if it is not found otherwise it returns its index.
711  */
712  int find_child(const CodePoint &cp);
713 
714 
715  /**
716  * Method will prepare construct specific details about the run (time start and time end)
717  * and write them along with basic informations about the run (name, description, ...)
718  * into nlohmann:json object
719  */
720  void output_header (nlohmann::json &root, int mpi_size);
721 
722  /**
723  * Open a new file for profiler output with default name based on the
724  * actual time and date. Returns a pointer to the stream of the output file.
725  */
726  std::shared_ptr<std::ostream> get_default_output_stream();
727 
728  /// Default code point.
729  static CodePoint null_code_point;
730 
731  /// Pointer to the unique instance of singleton Profiler class.
732  static Profiler* _instance;
733 
734  /// Vector of all timers. Whole tree is stored in this array.
736 
737  /// Index of the actual timer node. Negative value means 'unset'.
738  unsigned int actual_node;
739 
740  /// MPI communicator used for final reduce of the timer node tree.
741  //MPI_Comm communicator_;
742  /// MPI_rank
743  //int mpi_rank_;
744 
745  /**
746  * flag indicating that collection of timer details will be
747  * using MPI
748  bool mpi_used;
749  */
750  // header informations
751 
752  /// Some measure of the size of the task in the set of the tasks that differs
753  /// only by size - used for scaling tests.
754  int task_size_;
755  /// Task description and identifier in possible database of all Profiler results.
756  string task_description_;
757  /// Time and date of the start of the task solution. In fact start of the Profiler.
758  time_t start_time;
759 
760  /// Name of the program.
761  string flow_name_;
762  /// Version of the program.
763  string flow_version_;
764  /// Http address of the branch in a repository.
765  string flow_branch_;
766  /// Revision or GIT hash.
767  string flow_revision_;
768  /// Build date and time.
769  string flow_build_;
770  /// Variable which stores last json log filepath
771  string json_filepath;
772 
773 
774  /**
775  * Use DFS to pass through the tree and collect information about all timers reduced from the processes in the communicator.
776  * For every timer the information strings are stored in the struct TimerInfo in order to pad fields correctly
777  * to have alligned columns on the output. The alligning is performed in the output() method.
778  */
779  template<typename ReduceFunctor>
780  void add_timer_info(ReduceFunctor reduce, nlohmann::json* node, int timer_idx, double parent_time);
781 
782  //Profiler(MPI_Comm comm); // private constructor
783  Profiler(); // private constructor
784  Profiler(Profiler const&); // copy constructor is private
785  Profiler & operator=(Profiler const&); // assignment operator is private
786 };
787 
788 
789 
790 
791 
792 
793 /**
794  *
795  * @brief Class for automatic timer closing. This class is used by #START_TIMER macro
796  * and is responsible for the fact that we don't have to call #END_TIMER macro to stop the timer,
797  * the timer will be stopped at the end of the block in which #START_TIMER was used.
798  *
799  * The main idea of the approach described is that the TimerFrame variable will be destroyed
800  * at the end of the block where #START_TIMER macro was used. In order to work properly
801  * in situations where #END_TIMER was used to stop the timer manually before (but there is still the
802  * variable which will be later destroyed), we have to store references to these variables and
803  * destroy them on-demand.
804  *
805  * TODO:
806  * Should only contain pointer to the Timer. And destructor, that close the timer.
807  */
808 class TimerFrame {
809 private:
810  int const timer_index_;
811 public:
812  inline TimerFrame(const CodePoint &cp)
813  : timer_index_( Profiler::instance()->start_timer(cp) )
814  {}
815 
816  ~TimerFrame() {
817  Profiler::instance()->stop_timer(timer_index_);
818  }
819 };
820 
821 
822 /**
823  * Simple class providing static map variable storing address and alloc size
824  */
825 // gcc version 4.9 and lower has following bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59751
826 // fix in version 4.9: https://gcc.gnu.org/gcc-4.9/changes.html#cxx
827 // typedef unordered_map<long, int, hash<long>, equal_to<long>, internal::SimpleAllocator<pair<const long, int>>> unordered_map_with_alloc;
828 typedef boost::unordered_map<long, int, boost::hash<long>, equal_to<long>, internal::SimpleAllocator<std::pair<const long, int>>> unordered_map_with_alloc;
829 class MemoryAlloc {
830 public:
831  /**
832  * Create static map containing <allocation address, allocation size> pairs
833  * map is used for storing allocations and deallocations of all object not
834  * related to profiler after profiler initialization phase
835  */
836  static unordered_map_with_alloc & malloc_map();
837 };
838 
839 
840 
841 
842 #else // FLOW123D_DEBUG_PROFILER
843 
844 
845 // dummy declaration of Profiler class
846 class Profiler {
847 public:
848  static void initialize();
849  static Profiler* instance();
850 
851  void set_task_info(string description, int size)
852  {}
853  void set_program_info(string program_name, string program_version, string branch, string revision, string build)
854  {}
855  void notify_malloc(const size_t size )
856  {}
857  void notify_free(const size_t size )
858  {}
859  void output(MPI_Comm comm, ostream &os)
860  {}
861  void output(MPI_Comm comm)
862  {}
863  void transform_profiler_data(const string &output_file_suffix, const string &formatter)
864  {}
865  double get_resolution () const
866  { return 0.0; }
867  const char *actual_tag() const
868  { return NULL; }
869  inline unsigned int actual_count() const
870  { return 0; }
871  inline double actual_cumulative_time() const
872  { return 0.0; }
873  static void uninitialize();
874 private:
876  Profiler() {}
877 };
878 
879 
880 
881 
882 #endif
883 
884 
885 #endif
#define CONSTEXPR_
Definition: sys_profiler.hh:84
int MPI_Comm
Definition: mpi.h:141
a class to store JSON values
Definition: json.hpp:173
double get_resolution() const
unsigned int actual_count() const
double actual_cumulative_time() const
void notify_free(const size_t size)
void output(MPI_Comm comm)
Global macros to enhance readability and debugging, general constants.
void set_program_info(string program_name, string program_version, string branch, string revision, string build)
STREAM & operator<<(STREAM &s, UpdateFlags u)
void transform_profiler_data(const string &output_file_suffix, const string &formatter)
static Profiler * instance()
static Profiler * _instance
Definition: memory.cc:33
void output(MPI_Comm comm, ostream &os)
void set_task_info(string description, int size)
void notify_malloc(const size_t size)
const char * actual_tag() const