Flow123d  release_3.0.0-968-gc87a28e79
sys_profiler.hh
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file sys_profiler.hh
15  * @brief
16  * @todo
17  * - START_GLOBAL_TIMER(tag) - this calls the start_timer, which creates local timer on the correct place in the hierarchy,
18  * further this timer is added to the list of global timers, this contains groups of timers with same tag, and
19  * collect/sum data from these timers in the report.
20  *
21  * - Allow output even during calculation (not complete, but at least some thing)
22  * Report should contain time of start as well as time of creation of the report or time from start of the program.
23  *
24  * - When generating report we has to deal with possibly different trees at every MPI process.
25  *
26  * - test memory profiling
27  * in our own new and xmalloc functions - register allocatied and deallocated memory to active Profiler frame.
28  *
29  * - test in parallel
30  * - extended output:
31  * cas na jedno volani (jina redukce nez pro kumulativni cas, pokud je pocet volani ruzny)
32  * procenta vuci predkovi
33  * code point (az nekde na konci radky)
34  *
35  *
36  * !!! Unfortunately using constexpr is worse (without optimization).
37  * This is probably due to use of static variable for
38  * CodePoint, the access could be slow, and computation of hash is done only once. Actually timing results
39  * are:
40  *
41  * OPTIONS OVERHEAD (compared to call 2x clock())
42  * -g, no c++11 : 18%
43  * -g, c++11 : 60%
44  * -O3,no c++11 : 6%
45  * -O3, c++11 : 6%
46  */
47 
48 #ifndef PROFILER_H
49 #define PROFILER_H
50 
51 #include "global_defs.h"
52 
53 #include <mpi.h>
54 #include <ostream>
55 #include <unordered_map>
56 
57 namespace boost { template <class T> struct hash; }
58 #include <boost/functional/hash/hash.hpp> // for hash
59 #include <boost/property_tree/ptree_fwd.hpp> // for ptree, property_tree
60 #include <boost/ref.hpp>
61 #include <boost/tuple/detail/tuple_basic.hpp> // for get
62 
63 
64 #include "time_point.hh"
65 #include "petscsys.h"
66 #include "simple_allocator.hh"
67 
68 
69 // namespace alias
70 namespace property_tree = boost::property_tree;
71 
72 //instead of #include "mpi.h"
73 //mpi declarations follows:
75 public:
76  static int sum(int* val, MPI_Comm comm);
77  static double sum(double* val, MPI_Comm comm);
78  static long sum(long* val, MPI_Comm comm);
79 
80  static int min(int* val, MPI_Comm comm);
81  static double min(double* val, MPI_Comm comm);
82  static long min(long* val, MPI_Comm comm);
83 
84  static int max(int* val, MPI_Comm comm);
85  static double max(double* val, MPI_Comm comm);
86  static long max(long* val, MPI_Comm comm);
87 };
88 
89 // Assuming all compilers support constexpr
90 #define CONSTEXPR_ constexpr
91 
92 using namespace std;
93 
94 
95 // These helper macros are necessary due to use of _LINE_ variable in START_TIMER macro.
96 #define _PASTE(a,b) a ## b
97 #define PASTE(a,b) _PASTE(a, b)
98 
99 
100 
101 /**
102  * \def START_TIMER(tag)
103  *
104  * @brief Starts a timer with specified tag.
105  *
106  * In fact it creates an static constant expression that identifies the point in the code and
107  * contains tag of the involved timer and its hash. Then it creates local variable that
108  * calls @p Profiler::start_timer() in constructor and @p Profiler::stop_timer() in destructor.
109  * This way the timer is automatically closed at the end of current block.
110  *
111  * ATTENTION: This macro expands to two statements so following code is illegal:
112  * @code
113  * if (some_condition) START_TIMER(tag);
114  * @endcode
115  */
116 #ifdef FLOW123D_DEBUG_PROFILER
117 #define START_TIMER(tag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT(tag); TimerFrame PASTE(timer_,__LINE__) = TimerFrame( PASTE(cp_,__LINE__) )
118 #else
119 #define START_TIMER(tag)
120 #endif
121 
122 /**
123  * \def START_TIMER_EXT (tag, subtag)
124  *
125  * @brief Starts a timer with specified tag and subtag.
126  *
127  * In fact it creates an static constant expression that identifies the point in the code and
128  * contains tag and subtag of the involved timer and its hash. Then it creates local variable that
129  * calls @p Profiler::start_timer() in constructor and @p Profiler::stop_timer() in destructor.
130  * This way the timer is automatically closed at the end of current block.
131  *
132  * ATTENTION: This macro expands to two statements so following code is illegal:
133  * @code
134  * if (some_condition) START_TIMER_EXT(tag, subtag);
135  * @endcode
136  */
137 #ifdef FLOW123D_DEBUG_PROFILER
138 #define START_TIMER_EXT(tag, subtag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT_EXT(tag, subtag); TimerFrame PASTE(timer_,__LINE__) = TimerFrame( PASTE(cp_,__LINE__) )
139 #else
140 #define START_TIMER_EXT(tag, subtag)
141 #endif
142 
143 /**
144  * \def END_TIMER(tag)
145  *
146  * @brief Ends a timer with specified tag.
147  *
148  * Use only if you want to end timer before the end of block. Again this expands into two lines, see ATTENTION in previous macro.
149  */
150 #ifdef FLOW123D_DEBUG_PROFILER
151 #define END_TIMER(tag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT(tag); Profiler::instance()->stop_timer( PASTE(cp_,__LINE__) )
152 #else
153 #define END_TIMER(tag)
154 #endif
155 
156 /**
157  * \def END_START_TIMER(tag)
158  *
159  * Ends current timer and starts the new one with given tag. Again this expands into two lines, see ATTENTION in previous macro.
160  */
161 #ifdef FLOW123D_DEBUG_PROFILER
162 #define END_START_TIMER(tag) Profiler::instance()->stop_timer(); START_TIMER(tag);
163 #else
164 #define END_START_TIMER(tag)
165 #endif
166 
167 
168 /**
169  * \def ADD_CALLS(n_calls)
170  *
171  * @brief Increase number of calls in actual timer.
172  *
173  * Some time you want to measure a performance of a cycle with body that is below resolution of the Timer implementation.
174  * If you know number of cycles, you can use this macro in following way:
175  *
176  * @code
177  * START_TIMER("cycle");
178  * unsigned int i;
179  * for(i =0; i<1000000; i++) i*i*i;
180  * ADD_CALLS(i);
181  * END_TIMER("cycle");
182  * @endcode
183  *
184  * In the profiler report you get the total time spent in the cycle, and time per one call which will be average
185  * time spent in the body of the cycle.
186  */
187 #ifdef FLOW123D_DEBUG_PROFILER
188 #define ADD_CALLS(n_calls) Profiler::instance()->add_calls(n_calls)
189 #else
190 #define ADD_CALLS(n_calls)
191 #endif
192 
193 
194 
195 
196 //////////////////////////////////////////////////////////////////////////////////////////////
197 #ifdef FLOW123D_DEBUG_PROFILER
198 
199 /**
200  * Variable which represents value when no subtag was specified in CodePoint class
201  */
202 #define PROFILER_EMPTY_SUBTAG ""
203 
204 /**
205  * Variable used for default value in hash process
206  */
207 #define PROFILER_HASH_DEFAULT 0
208 
209 /**
210  * @brief Function for compile-time hash computation. (Needs C++x11 standard.)
211  * Input, @p str, is constant null terminated string, result is unsigned int (usually 4 bytes).
212  * Function has to be recursive, since standard requires that the body consists only from the return statement.
213  *
214  * SALT is hash for the empty string. Currently zero for simpler testing.
215  */
216 inline CONSTEXPR_ unsigned int str_hash(const char * str, unsigned int default_value) {
217  #define SALT 0 //0xef50e38f
218  return (*str == 0 ? SALT : default_value + str_hash(str+1, PROFILER_HASH_DEFAULT) * 101 + (unsigned int)(*str) );
219 }
220 
221 /**
222  * Macro to generate constexpr CodePoint object.
223  */
224 #define CODE_POINT(tag) CodePoint(tag, __FILE__, __func__, __LINE__)
225 
226 /**
227  * Macro to generate constexpr CodePoint object.
228  */
229 #define CODE_POINT_EXT(tag, subtag) CodePoint(tag, subtag, __FILE__, __func__, __LINE__)
230 
231 
232 
233 
234 /**
235  * @brief Class that represents point in the code.
236  *
237  * This class allow construction at compile time. And includes the information about the code point as well
238  * as the 'tag' of the timer and cimpile-time computed hashes of this 'tag'. The @p hash_ is long one with
239  * very small probability of collisions - this we use for comparison of tags. The @p hash_idx_ is the long hash modulo
240  * length of the array of Timer's children, this is used for fast loop up into this array that servers as a simple hash table.
241  */
242 class CodePoint {
243 public:
244  CONSTEXPR_ CodePoint(const char *tag, const char * file, const char * func, const unsigned int line)
245  : tag_(tag), subtag_(PROFILER_EMPTY_SUBTAG), file_(file), func_(func), line_(line),
246  hash_(str_hash(tag, PROFILER_HASH_DEFAULT)),
247  hash_idx_( str_hash(tag, PROFILER_HASH_DEFAULT)%max_n_timer_childs )
248  {};
249  CONSTEXPR_ CodePoint(const char *tag, const char *subtag, const char * file, const char * func, const unsigned int line)
250  : tag_(tag), subtag_(subtag), file_(file), func_(func), line_(line),
251  hash_(str_hash(subtag, str_hash(tag, PROFILER_HASH_DEFAULT))),
252  hash_idx_( str_hash(subtag, str_hash(tag, PROFILER_HASH_DEFAULT))%max_n_timer_childs )
253  {};
254 
255  /// Size of child arrays in timer nodes.
256  static const unsigned int max_n_timer_childs=13;
257 
258  /// Tag of the code point.
259  const char * const tag_;
260 
261  /// Subtag of the code point.
262  const char * const subtag_;
263 
264  /// file name of the code point
265  const char * const file_;
266 
267  /// file name of the code point
268  const char * const func_;
269 
270  /// file name of the code point
271  const unsigned int line_;
272 
273  /// Full 32-bit hash of the tag ( practically no chance of collision)
274  unsigned int hash_;
275 
276  /// Hash modulo size of array of timer childs ( we have to check full hash to prevent collision)
277  unsigned int hash_idx_;
278 };
279 
280 
281 
282 /**
283  * @brief Class for profiling tree nodes.
284  *
285  * One Timer represents one particular time frame in the execution tree.
286  * It collects information about total time, number of calls, allocated and deallocated memory.
287  *
288  * It should be accessed only through Profiler, which is its friend class.
289  *
290  * TODO: for better performance: move copy hash_ and hash_idx_ into Timer since CodePoint are in static
291  * variables, that may be slower to acces.
292  *
293  */
294 class Timer {
295 
296 
297 public:
298  /// Size of array @p child_timers, the hash table containing descendants in the call tree.
299  static const unsigned int max_n_childs=CodePoint::max_n_timer_childs;
300 
301  /**
302  * Creates the timer node object. Should not be called directly, but through the START_TIMER macro.
303  */
304  Timer(const CodePoint &cp, int parent);
305 
306 
307  /**
308  * Start the timer. If it is already started, just increase number of starts (recursions) and calls.
309  */
310  void start();
311 
312  /**
313  * If number of starts (recursions) drop back to zero, we stop the timer and add the period to the cumulative time.
314  * This method do not take care of its childs (it has no access to the other timers).
315  * When the parameter 2p forced is 'true', we stop the timer immediately regardless the number of recursions.
316  * Returns true if the timer is not closed (recursions didn't drop to zero yet).
317  */
318  bool stop(bool forced = false);
319 
320 
321  /// Getter for the 'tag'.
322  inline string tag() const {
323  string buf(code_point_->tag_);
324  buf.append(code_point_->subtag_);
325  return buf;
326  }
327 
328  /// Returns true if the timer is open, number of starts (recursions) is nonzero.
329  inline bool running() const
330  { return start_count >0; }
331 
332  /// Returns string with description of the code point where the timer was first started.
333  std::string code_point_str() const;
334 
335  /**
336  * Returns cumulative time of the timer in seconds.
337  */
338  double cumulative_time() const;
339 
340  /*
341  * Adds given index @p child_index of the timer @p child to the correct place in the hash table.
342  */
343  void add_child(int child_index, const Timer &child);
344 
345 
346 protected:
347 
348  /**
349  * Pauses current timer, save measured petsc memory information util resume.
350  * We get Petsc maximum memory usage via PetscMemoryGetMaximumUsage call
351  * and save this value into temp value. (we override local maximum if temp
352  * value is greater)
353  */
354  void pause();
355  /**
356  * Resume current timer. e tell Petsc to monitor the maximum memory
357  * usage once again. We call PetscMemorySetGetMaximumUsage so later in
358  * resume() method will PetscMemoryGetMaximumUsage method work.
359  */
360  void resume();
361 
362  /**
363  * Start time when frame opens.
364  */
365  TimePoint start_time;
366  /**
367  * Cumulative time spent in the frame.
368  */
369  double cumul_time;
370  /**
371  * Total number of opening of the frame.
372  */
373  unsigned int call_count;
374  /**
375  * Number of recursive openings.
376  */
377  unsigned int start_count;
378 
379 
380  /**
381  * Code point of the first START_TIMER for the particular tag. The 'tag' identifies timer
382  * and is used in reported profiler table.
383  */
384  const CodePoint *code_point_;
385  /// Full tag hash. Copy from code_point_.
386  unsigned int full_hash_;
387  /// Hash modulo size of array of timer childs. Copy from code_point_.
388  unsigned int hash_idx_;
389 
390  /**
391  * Index of the parent timer node in the tree. Negative value means 'not set'.
392  */
393  int parent_timer;
394  /**
395  * Indices of the child timers in the Profiler::timers_ vector. Negative values means 'not set'.
396  */
397  int child_timers[max_n_childs];
398 
399  /**
400  * Total number of bytes allocated in this frame. After
401  * Profiler::propagate_timers call will also contain values from children.
402  */
403  size_t total_allocated_;
404  /**
405  * Total number of bytes deallocated in this frame. After
406  * Profiler::propagate_timers call, will also contain values from children.
407  */
408  size_t total_deallocated_;
409  /**
410  * Maximum number of bytes allocated at one time in this frame. After
411  * Profiler::propagate_timers call, maximum value will be taken from this
412  * Timer and also from all children Timers.
413  */
414  size_t max_allocated_;
415  /**
416  * Current number of bytes allocated in this frame at the same time.
417  * This value is used to monitor maximum bytes allocated. When notify_free
418  * and notify_malloc is called this values is changed and new maximum
419  * is tested.
420  */
421  size_t current_allocated_;
422 
423  /**
424  * Number of times new/new[] operator was used in this scope
425  */
426  int alloc_called;
427  /**
428  * Number of times delete/delete[] operator was used in this scope
429  */
430  int dealloc_called;
431 
432  #ifdef FLOW123D_HAVE_PETSC
433  /**
434  * Number of bytes used by Petsc at the start of time-frame
435  */
436  PetscLogDouble petsc_start_memory;
437  /**
438  * Number of bytes used by Petsc at the end of time-frame
439  */
440  PetscLogDouble petsc_end_memory;
441  /**
442  * Difference between start and end of a petsc memory usage
443  */
444  PetscLogDouble petsc_memory_difference;
445  /**
446  * Maximum amount of memory used that was PetscMalloc()ed at any time
447  * during this run.
448  *
449  * The memory usage reported here includes all Fortran arrays (that may be
450  * used in application-defined sections of code).
451  */
452  PetscLogDouble petsc_peak_memory;
453  /**
454  * Local maximum amount of memory used that was PetscMalloc()ed
455  * used during time-frame pause/resume. Auxilary variable for storing
456  * local memory used when pause is called.
457  */
458  PetscLogDouble petsc_local_peak_memory;
459  #endif // FLOW123D_HAVE_PETSC
460 
461  friend class Profiler;
462  friend std::ostream & operator <<(std::ostream&, const Timer&);
463 
464  /**
465  * if under unit testing, specify friend so protected members can be tested
466  */
467  #ifdef __UNIT_TEST__
468  friend ProfilerTest;
469  #endif /* __UNIT_TEST__ */
470 
471 };
472 
473 /*
474 struct SimpleTranslator {
475  typedef std::string internal_type;
476  typedef int external_type;
477 
478  // Converts a string to int
479  boost::optional<external_type> get_value(const internal_type& str) {
480  return boost::optional<external_type>(std::stoi(str));
481  }
482 
483  // Converts a bool to string
484  boost::optional<internal_type> put_value(const external_type& i){
485  return boost::optional<internal_type>(std::to_string(i));
486  }
487 };
488 
489 namespace boost {
490 namespace property_tree {
491 
492 template<typename Ch, typename Traits, typename Alloc>
493 struct translator_between<std::basic_string< Ch, Traits, Alloc >, int> {
494  typedef SimpleTranslator type;
495 };
496 
497 
498 } // namespace property_tree
499 } // namespace boost
500 */
501 /**
502  *
503  * @brief Main class for profiling by measuring time intervals.
504  *
505  * These time intervals form a tree structure where each interval is represented
506  * by a Timer object. The root node of the tree is automatically created and
507  * started after creating the Profiler object and cannot be stopped manually.
508  *
509  * The class implements a singleton pattern and all the functions are accessible trough
510  * Profiler::instance(), but in most cases the programmer will access the profiler
511  * functions via the #START_TIMER and #END_TIMER macros. The #START_TIMER macro
512  * is responsible for the fact that we don't have to call #END_TIMER macro to stop the timer and
513  * the timer will be stopped at the end of the block in which #START_TIMER was used.
514  * These macros internally use the TimerFrame objects and the programmer should
515  * not use the TimerFrame objects directly.
516  *
517  * By using #SET_TIMER_SUBFRAMES macro, the programmer can specify the number of subframes (eg. iterations)
518  * for the currently active timer.
519  *
520  *
521  * Currently the Profiler system is not thread safe. No idea how to do this.
522  *
523  */
524 class Profiler {
525 public:
526 
527  /**
528  * Initializes the Profiler with specific MPI communicator object
529  */
530  //static void initialize(MPI_Comm communicator = MPI_COMM_WORLD);
531  static void initialize();
532  /**
533  * Returns unique Profiler object.
534  */
535  static Profiler* instance();
536  /**
537  * Sets task specific information. The string @p description with textual description of the task and the
538  * number of elements of the mesh (parameter @p size). This is used for weak scaling graphs so it should
539  * measure size of the task of the same type (same description).
540  *
541  */
542  void set_task_info(string description, int size);
543  /**
544  * Sets informations about program version. This consists of @p program_version (includes program name), @p branch in the repository or rather full URL of the branch,
545  * and SVN @p revision (or hash for GIT).
546  *
547  */
548  void set_program_info(string program_name, string program_version, string branch, string revision, string build);
549 
550 
551  /**
552  * Starts a timer with code point, tag and hashes specified by CodePoint object @p cp.
553  * If the timer is not already created, it creates a new one. It returns index of
554  * the actual timer.
555  */
556  int start_timer(const CodePoint &cp);
557  /**
558  * Stops actual timer. It check if the hash of given code point match hash of the
559  * tag of actual timer node. If not we print out warning and try to find the correct tag
560  * towards the tree root closing all nodes we pass through.
561  *
562  * If FLOW123D_DEBUG is set, we check that all children are closed.
563  */
564  void stop_timer(const CodePoint &cp);
565 
566  /**
567  * Stop timer with index given by @p timer_index. If this is not equal to @p actual_node, we
568  * traverse the tree towards root while force closing nodes by the way.
569  *
570  * Negative @p timer_index means close @p actual_node
571  */
572  void stop_timer(int timer_index = -1);
573 
574  /**
575  * Adds @p n_calls - 1 to the total number of calls of the current timer. Minus one, since one call is counted when
576  * timer was started. You should use macro ADD_CALLS above.
577  */
578  void add_calls(unsigned int n_calls);
579  /**
580  * Notification about allocation of given size.
581  * Increase total allocated memory in current profiler frame.
582  */
583  void notify_malloc(const size_t size, const long p);
584  /**
585  * Notification about freeing memory of given size.
586  * Increase total deallocated memory in current profiler frame.
587  */
588  void notify_free(const long p);
589 
590  /**
591  * Return average profiler timer resolution in seconds
592  * based on 100 measurements
593  */
594  static double get_resolution ();
595 
596 
597 #ifdef FLOW123D_HAVE_MPI
598  /**
599  * @brief Output current timing information into the given stream.
600  *
601  * COLECTIVE - all processes in the communicator have to call this
602  * method. All timers are finished, all processes are synchronized, collect
603  * profiling informations are collected and written to the given stream.
604  *
605  * Pass through the profiling tree (collective over processors)
606  * Print cumulative times average, balance (max/min), count (denote differences)
607  *
608  */
609  void output(MPI_Comm comm, std::ostream &os);
610  /**
611  * Same as previous, but output to the file with default name: "profiler_info_YYMMDD_HH::MM:SS.log".
612  * Empty body if macro FLOW123D_DEBUG_PROFILER is not defined.
613  */
614  void output(MPI_Comm comm);
615 #endif /* FLOW123D_HAVE_MPI */
616  /**
617  * @brief Output current timing information into the given stream.
618  *
619  * It temporally stops all timers, synchronize all processes, collect
620  * profiling informations and write it to the given stream.
621  *
622  * Pass through the profiling tree (collective over processors)
623  * Print cumulative times average, balance (max/min), count (denote differences)
624  *
625  */
626  void output(std::ostream &os);
627  /**
628  * Same as previous, but output to the file with default name: "profiler_info_YYMMDD_HH::MM:SS.log".
629  * Empty body if macro FLOW123D_DEBUG_PROFILER is not defined.
630  */
631  void output();
632  /**
633  * Method will transform last profiler json file to desired format
634  */
635  void transform_profiler_data (const string &output_file_suffix, const string &formatter);
636  /**
637  * Stop all timers and destroys the Profiler object.
638  * If you want some output call @p output method just before.
639  */
640  static void uninitialize();
641 
642  /**
643  * Class-specific allocation function new. Called by the usual
644  * single-object new-expressions if allocating an object of type Profiler.
645  */
646  static void* operator new (size_t sz);
647  /**
648  * Class-specific allocation function delete. Deallocates storage
649  * previously allocated by a matching operator new. These deallocation
650  * functions are called by delete-expressions.
651  */
652  static void operator delete (void* p);
653 
654  /**
655  * Public setter to turn on/off memory monitoring
656  * @param global_monitor whether to turn global monitoring on or off
657  * @param petsc_monitor petsc monitoring
658  */
659  void static set_memory_monitoring(const bool global_monitor, const bool petsc_monitor);
660 
661  /**
662  * Public getter to memory monitoring
663  * @return memory monitoring status
664  */
665  bool static get_global_memory_monitoring();
666 
667  /**
668  * Public getter to petsc memory monitoring
669  * @return memory monitoring status
670  */
671  bool static get_petsc_memory_monitoring();
672 
673  /**
674  * if under unit testing, specify friend so protected members can be tested
675  */
676  #ifdef __UNIT_TEST__
677  friend ProfilerTest;
678  #endif /* __UNIT_TEST__ */
679 
680 protected:
681 
682  /**
683  * Whether to monitor operator 'new/delete'
684  */
685  static bool global_monitor_memory;
686 
687  /**
688  * Whether to monitor petsc memory usage
689  */
690  static bool petsc_monitor_memory;
691 
692  /**
693  * When creating Profiler also reserve some bytes in malloc_map so overhead
694  * of creating single items is lowered. This value is passed as parameter in
695  * map.reserve() method so it indicates how many objects (pointers) are
696  * allocated at first.
697  */
698  static const long malloc_map_reserve;
699 
700  /**
701  * Method will propagate values from children timers to its parents
702  */
703  void propagate_timers ();
704 
705  /**
706  * Method for exchanging metrics from child timer to its parent timer
707  */
708  void accept_from_child (Timer &parent, Timer &child);
709 
710  /**
711  * Try to find timer with tag (in fact only its 32-bit hash) from given code point @p cp.
712  * Returns -1 if it is not found otherwise it returns its index.
713  */
714  int find_child(const CodePoint &cp);
715 
716 
717  /**
718  * Method will prepare construct specific details about the run (time start and time end)
719  * and write them along with basic informations about the run (name, description, ...)
720  * into ptree object
721  */
722  void output_header (property_tree::ptree &root, int mpi_size);
723 
724  /**
725  * Open a new file for profiler output with default name based on the
726  * actual time and date. Returns a pointer to the stream of the output file.
727  */
728  std::shared_ptr<std::ostream> get_default_output_stream();
729 
730  /// Default code point.
731  static CodePoint null_code_point;
732 
733  /// Pointer to the unique instance of singleton Profiler class.
734  static Profiler* _instance;
735 
736  /// Vector of all timers. Whole tree is stored in this array.
738 
739  /// Index of the actual timer node. Negative value means 'unset'.
740  unsigned int actual_node;
741 
742  /// MPI communicator used for final reduce of the timer node tree.
743  //MPI_Comm communicator_;
744  /// MPI_rank
745  //int mpi_rank_;
746 
747  /**
748  * flag indicating that collection of timer details will be
749  * using MPI
750  bool mpi_used;
751  */
752  // header informations
753 
754  /// Some measure of the size of the task in the set of the tasks that differs
755  /// only by size - used for scaling tests.
756  int task_size_;
757  /// Task description and identifier in possible database of all Profiler results.
758  string task_description_;
759  /// Time and date of the start of the task solution. In fact start of the Profiler.
760  time_t start_time;
761 
762  /// Name of the program.
763  string flow_name_;
764  /// Version of the program.
765  string flow_version_;
766  /// Http address of the branch in a repository.
767  string flow_branch_;
768  /// Revision or GIT hash.
769  string flow_revision_;
770  /// Build date and time.
771  string flow_build_;
772  /// Variable which stores last json log filepath
773  string json_filepath;
774 
775 
776  /**
777  * Use DFS to pass through the tree and collect information about all timers reduced from the processes in the communicator.
778  * For every timer the information strings are stored in the struct TimerInfo in order to pad fields correctly
779  * to have alligned columns on the output. The alligning is performed in the output() method.
780  */
781  template<typename ReduceFunctor>
782  void add_timer_info(ReduceFunctor reduce, property_tree::ptree* node, int timer_idx, double parent_time);
783 
784  //Profiler(MPI_Comm comm); // private constructor
785  Profiler(); // private constructor
786  Profiler(Profiler const&); // copy constructor is private
787  Profiler & operator=(Profiler const&); // assignment operator is private
788 };
789 
790 
791 
792 
793 
794 
795 /**
796  *
797  * @brief Class for automatic timer closing. This class is used by #START_TIMER macro
798  * and is responsible for the fact that we don't have to call #END_TIMER macro to stop the timer,
799  * the timer will be stopped at the end of the block in which #START_TIMER was used.
800  *
801  * The main idea of the approach described is that the TimerFrame variable will be destroyed
802  * at the end of the block where #START_TIMER macro was used. In order to work properly
803  * in situations where #END_TIMER was used to stop the timer manually before (but there is still the
804  * variable which will be later destroyed), we have to store references to these variables and
805  * destroy them on-demand.
806  *
807  * TODO:
808  * Should only contain pointer to the Timer. And destructor, that close the timer.
809  */
810 class TimerFrame {
811 private:
812  int const timer_index_;
813 public:
814  inline TimerFrame(const CodePoint &cp)
815  : timer_index_( Profiler::instance()->start_timer(cp) )
816  {}
817 
818  ~TimerFrame() {
819  Profiler::instance()->stop_timer(timer_index_);
820  }
821 };
822 
823 
824 /**
825  * Simple class providing static map variable storing address and alloc size
826  */
827 // gcc version 4.9 and lower has following bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59751
828 // fix in version 4.9: https://gcc.gnu.org/gcc-4.9/changes.html#cxx
829 // typedef unordered_map<long, int, hash<long>, equal_to<long>, internal::SimpleAllocator<pair<const long, int>>> unordered_map_with_alloc;
830 typedef std::unordered_map<long, int, boost::hash<long>, equal_to<long>, internal::SimpleAllocator<std::pair<const long, int>>> unordered_map_with_alloc;
831 class MemoryAlloc {
832 public:
833  /**
834  * Create static map containing <allocation address, allocation size> pairs
835  * map is used for storing allocations and deallocations of all object not
836  * related to profiler after profiler initialization phase
837  */
838  static unordered_map_with_alloc & malloc_map();
839 };
840 
841 
842 
843 
844 #else // FLOW123D_DEBUG_PROFILER
845 
846 
847 // dummy declaration of Profiler class
848 class Profiler {
849 public:
850  static void initialize();
851  static Profiler* instance();
852 
853  void set_task_info(string description, int size)
854  {}
855  void set_program_info(string program_name, string program_version, string branch, string revision, string build)
856  {}
857  void notify_malloc(const size_t size )
858  {}
859  void notify_free(const size_t size )
860  {}
861  void output(MPI_Comm comm, ostream &os)
862  {}
863  void output(MPI_Comm comm)
864  {}
865  void output()
866  {}
867  void transform_profiler_data(const string &output_file_suffix, const string &formatter)
868  {}
869  double get_resolution () const
870  { return 0.0; }
871  const char *actual_tag() const
872  { return NULL; }
873  inline unsigned int actual_count() const
874  { return 0; }
875  inline double actual_cumulative_time() const
876  { return 0.0; }
877  static void uninitialize();
878 private:
880  Profiler() {}
881 };
882 
883 
884 
885 
886 #endif
887 
888 
889 #endif
Profiler::output
void output(MPI_Comm comm)
Definition: sys_profiler.hh:863
Profiler::set_task_info
void set_task_info(string description, int size)
Definition: sys_profiler.hh:853
std::vector
Definition: doxy_dummy_defs.hh:7
internal::SimpleAllocator
Definition: simple_allocator.hh:28
boost
Definition: finite_state_filter.hpp:34
CONSTEXPR_
#define CONSTEXPR_
Definition: sys_profiler.hh:90
Profiler::get_resolution
double get_resolution() const
Definition: sys_profiler.hh:869
Timer
Definition: memory.cc:33
TimePoint
Definition: time_point.hh:70
Profiler
Definition: sys_profiler.hh:848
mpi.h
operator<<
STREAM & operator<<(STREAM &s, UpdateFlags u)
Definition: update_flags.hh:164
MPI_Functions::sum
static int sum(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:50
Profiler::notify_free
void notify_free(const size_t size)
Definition: sys_profiler.hh:859
Profiler::output
void output(MPI_Comm comm, ostream &os)
Definition: sys_profiler.hh:861
Profiler::actual_tag
const char * actual_tag() const
Definition: sys_profiler.hh:871
MPI_Functions
Definition: sys_profiler.hh:74
Profiler::actual_count
unsigned int actual_count() const
Definition: sys_profiler.hh:873
MPI_Comm
int MPI_Comm
Definition: mpi.h:141
time_point.hh
Profiler::notify_malloc
void notify_malloc(const size_t size)
Definition: sys_profiler.hh:857
Profiler::Profiler
Profiler()
Definition: sys_profiler.hh:880
Profiler::transform_profiler_data
void transform_profiler_data(const string &output_file_suffix, const string &formatter)
Definition: sys_profiler.hh:867
global_defs.h
Global macros to enhance readability and debugging, general constants.
std
Definition: doxy_dummy_defs.hh:5
boost::hash
Definition: sys_profiler.hh:57
Profiler::instance
static Profiler * instance()
Definition: sys_profiler.cc:951
MPI_Functions::max
static int max(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:86
Profiler::actual_cumulative_time
double actual_cumulative_time() const
Definition: sys_profiler.hh:875
Profiler::output
void output()
Definition: sys_profiler.hh:865
Profiler::set_program_info
void set_program_info(string program_name, string program_version, string branch, string revision, string build)
Definition: sys_profiler.hh:855
Profiler::_instance
static Profiler * _instance
Definition: sys_profiler.hh:879
simple_allocator.hh
MPI_Functions::min
static int min(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:68