Flow123d  release_2.1.0-84-g6a13a75
sys_profiler.hh
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file sys_profiler.hh
15  * @brief
16  * @todo
17  * - START_GLOBAL_TIMER(tag) - this calls the start_timer, which creates local timer on the correct place in the hierarchy,
18  * further this timer is added to the list of global timers, this contains groups of timers with same tag, and
19  * collect/sum data from these timers in the report.
20  *
21  * - Allow output even during calculation (not complete, but at least some thing)
22  * Report should contain time of start as well as time of creation of the report or time from start of the program.
23  *
24  * - When generating report we has to deal with possibly different trees at every MPI process.
25  *
26  * - test memory profiling
27  * in our own new and xmalloc functions - register allocatied and deallocated memory to active Profiler frame.
28  *
29  * - test in parallel
30  * - extended output:
31  * cas na jedno volani (jina redukce nez pro kumulativni cas, pokud je pocet volani ruzny)
32  * procenta vuci predkovi
33  * code point (az nekde na konci radky)
34  *
35  *
36  * !!! Unfortunately using constexpr is worse (without optimization).
37  * This is probably due to use of static variable for
38  * CodePoint, the access could be slow, and computation of hash is done only once. Actually timing results
39  * are:
40  *
41  * OPTIONS OVERHEAD (compared to call 2x clock())
42  * -g, no c++11 : 18%
43  * -g, c++11 : 60%
44  * -O3,no c++11 : 6%
45  * -O3, c++11 : 6%
46  */
47 
48 #ifndef PROFILER_H
49 #define PROFILER_H
50 
51 #include "global_defs.h"
52 #include "system/system.hh"
53 #include <mpi.h>
54 #include <ostream>
55 #include <boost/property_tree/ptree.hpp>
56 #include <boost/unordered_map.hpp>
57 #include "time_point.hh"
58 #include "petscsys.h"
59 #include "simple_allocator.hh"
60 
61 
62 // namespace alias
63 namespace property_tree = boost::property_tree;
64 
65 //instead of #include "mpi.h"
66 //mpi declarations follows:
68 public:
69  static int sum(int* val, MPI_Comm comm);
70  static double sum(double* val, MPI_Comm comm);
71  static long sum(long* val, MPI_Comm comm);
72 
73  static int min(int* val, MPI_Comm comm);
74  static double min(double* val, MPI_Comm comm);
75  static long min(long* val, MPI_Comm comm);
76 
77  static int max(int* val, MPI_Comm comm);
78  static double max(double* val, MPI_Comm comm);
79  static long max(long* val, MPI_Comm comm);
80 };
81 
82 // Assuming all compilers support constexpr
83 #define CONSTEXPR_ constexpr
84 
85 using namespace std;
86 
87 
88 // These helper macros are necessary due to use of _LINE_ variable in START_TIMER macro.
89 #define _PASTE(a,b) a ## b
90 #define PASTE(a,b) _PASTE(a, b)
91 
92 
93 
94 /**
95  * \def START_TIMER(tag)
96  *
97  * @brief Starts a timer with specified tag.
98  *
99  * In fact it creates an static constant expression that identifies the point in the code and
100  * contains tag of the involved timer and its hash. Then it creates local variable that
101  * calls @p Profiler::start_timer() in constructor and @p Profiler::stop_timer() in destructor.
102  * This way the timer is automatically closed at the end of current block.
103  *
104  * ATTENTION: This macro expands to two statements so following code is illegal:
105  * @code
106  * if (some_condition) START_TIMER(tag);
107  * @endcode
108  */
109 #ifdef FLOW123D_DEBUG_PROFILER
110 #define START_TIMER(tag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT(tag); TimerFrame PASTE(timer_,__LINE__) = TimerFrame( PASTE(cp_,__LINE__) )
111 #else
112 #define START_TIMER(tag)
113 #endif
114 
115 /**
116  * \def START_TIMER_EXT (tag, subtag)
117  *
118  * @brief Starts a timer with specified tag and subtag.
119  *
120  * In fact it creates an static constant expression that identifies the point in the code and
121  * contains tag and subtag of the involved timer and its hash. Then it creates local variable that
122  * calls @p Profiler::start_timer() in constructor and @p Profiler::stop_timer() in destructor.
123  * This way the timer is automatically closed at the end of current block.
124  *
125  * ATTENTION: This macro expands to two statements so following code is illegal:
126  * @code
127  * if (some_condition) START_TIMER_EXT(tag, subtag);
128  * @endcode
129  */
130 #ifdef FLOW123D_DEBUG_PROFILER
131 #define START_TIMER_EXT(tag, subtag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT_EXT(tag, subtag); TimerFrame PASTE(timer_,__LINE__) = TimerFrame( PASTE(cp_,__LINE__) )
132 #else
133 #define START_TIMER_EXT(tag, subtag)
134 #endif
135 
136 /**
137  * \def END_TIMER(tag)
138  *
139  * @brief Ends a timer with specified tag.
140  *
141  * Use only if you want to end timer before the end of block. Again this expands into two lines, see ATTENTION in previous macro.
142  */
143 #ifdef FLOW123D_DEBUG_PROFILER
144 #define END_TIMER(tag) static CONSTEXPR_ CodePoint PASTE(cp_,__LINE__) = CODE_POINT(tag); Profiler::instance()->stop_timer( PASTE(cp_,__LINE__) )
145 #else
146 #define END_TIMER(tag)
147 #endif
148 
149 /**
150  * \def END_START_TIMER(tag)
151  *
152  * Ends current timer and starts the new one with given tag. Again this expands into two lines, see ATTENTION in previous macro.
153  */
154 #ifdef FLOW123D_DEBUG_PROFILER
155 #define END_START_TIMER(tag) Profiler::instance()->stop_timer(); START_TIMER(tag);
156 #else
157 #define END_START_TIMER(tag)
158 #endif
159 
160 
161 /**
162  * \def ADD_CALLS(n_calls)
163  *
164  * @brief Increase number of calls in actual timer.
165  *
166  * Some time you want to measure a performance of a cycle with body that is below resolution of the Timer implementation.
167  * If you know number of cycles, you can use this macro in following way:
168  *
169  * @code
170  * START_TIMER("cycle");
171  * unsigned int i;
172  * for(i =0; i<1000000; i++) i*i*i;
173  * ADD_CALLS(i);
174  * END_TIMER("cycle");
175  * @endcode
176  *
177  * In the profiler report you get the total time spent in the cycle, and time per one call which will be average
178  * time spent in the body of the cycle.
179  */
180 #ifdef FLOW123D_DEBUG_PROFILER
181 #define ADD_CALLS(n_calls) Profiler::instance()->add_calls(n_calls)
182 #else
183 #define ADD_CALLS(n_calls)
184 #endif
185 
186 
187 
188 
189 //////////////////////////////////////////////////////////////////////////////////////////////
190 #ifdef FLOW123D_DEBUG_PROFILER
191 
192 /**
193  * Variable which represents value when no subtag was specified in CodePoint class
194  */
195 #define PROFILER_EMPTY_SUBTAG ""
196 
197 /**
198  * Variable used for default value in hash process
199  */
200 #define PROFILER_HASH_DEFAULT 0
201 
202 /**
203  * @brief Function for compile-time hash computation. (Needs C++x11 standard.)
204  * Input, @p str, is constant null terminated string, result is unsigned int (usually 4 bytes).
205  * Function has to be recursive, since standard requires that the body consists only from the return statement.
206  *
207  * SALT is hash for the empty string. Currently zero for simpler testing.
208  */
209 inline CONSTEXPR_ unsigned int str_hash(const char * str, unsigned int default_value) {
210  #define SALT 0 //0xef50e38f
211  return (*str == 0 ? SALT : default_value + str_hash(str+1, PROFILER_HASH_DEFAULT) * 101 + (unsigned int)(*str) );
212 }
213 
214 /**
215  * Macro to generate constexpr CodePoint object.
216  */
217 #define CODE_POINT(tag) CodePoint(tag, __FILE__, __func__, __LINE__)
218 
219 /**
220  * Macro to generate constexpr CodePoint object.
221  */
222 #define CODE_POINT_EXT(tag, subtag) CodePoint(tag, subtag, __FILE__, __func__, __LINE__)
223 
224 
225 
226 
227 /**
228  * @brief Class that represents point in the code.
229  *
230  * This class allow construction at compile time. And includes the information about the code point as well
231  * as the 'tag' of the timer and cimpile-time computed hashes of this 'tag'. The @p hash_ is long one with
232  * very small probability of collisions - this we use for comparison of tags. The @p hash_idx_ is the long hash modulo
233  * length of the array of Timer's children, this is used for fast loop up into this array that servers as a simple hash table.
234  */
235 class CodePoint {
236 public:
237  CONSTEXPR_ CodePoint(const char *tag, const char * file, const char * func, const unsigned int line)
238  : tag_(tag), subtag_(PROFILER_EMPTY_SUBTAG), file_(file), func_(func), line_(line),
239  hash_(str_hash(tag, PROFILER_HASH_DEFAULT)),
240  hash_idx_( str_hash(tag, PROFILER_HASH_DEFAULT)%max_n_timer_childs )
241  {};
242  CONSTEXPR_ CodePoint(const char *tag, const char *subtag, const char * file, const char * func, const unsigned int line)
243  : tag_(tag), subtag_(subtag), file_(file), func_(func), line_(line),
244  hash_(str_hash(subtag, str_hash(tag, PROFILER_HASH_DEFAULT))),
245  hash_idx_( str_hash(subtag, str_hash(tag, PROFILER_HASH_DEFAULT))%max_n_timer_childs )
246  {};
247 
248  /// Size of child arrays in timer nodes.
249  static const unsigned int max_n_timer_childs=13;
250 
251  /// Tag of the code point.
252  const char * const tag_;
253 
254  /// Subtag of the code point.
255  const char * const subtag_;
256 
257  /// file name of the code point
258  const char * const file_;
259 
260  /// file name of the code point
261  const char * const func_;
262 
263  /// file name of the code point
264  const unsigned int line_;
265 
266  /// Full 32-bit hash of the tag ( practically no chance of collision)
267  unsigned int hash_;
268 
269  /// Hash modulo size of array of timer childs ( we have to check full hash to prevent collision)
270  unsigned int hash_idx_;
271 };
272 
273 
274 
275 /**
276  * @brief Class for profiling tree nodes.
277  *
278  * One Timer represents one particular time frame in the execution tree.
279  * It collects information about total time, number of calls, allocated and deallocated memory.
280  *
281  * It should be accessed only through Profiler, which is its friend class.
282  *
283  * TODO: for better performance: move copy hash_ and hash_idx_ into Timer since CodePoint are in static
284  * variables, that may be slower to acces.
285  *
286  */
287 class Timer {
288 
289 
290 public:
291  /// Size of array @p child_timers, the hash table containing descendants in the call tree.
292  static const unsigned int max_n_childs=CodePoint::max_n_timer_childs;
293 
294  /**
295  * Creates the timer node object. Should not be called directly, but through the START_TIMER macro.
296  */
297  Timer(const CodePoint &cp, int parent);
298 
299 
300  /**
301  * Start the timer. If it is already started, just increase number of starts (recursions) and calls.
302  */
303  void start();
304 
305  /**
306  * If number of starts (recursions) drop back to zero, we stop the timer and add the period to the cumulative time.
307  * This method do not take care of its childs (it has no access to the other timers).
308  * When the parameter 2p forced is 'true', we stop the timer immediately regardless the number of recursions.
309  * Returns true if the timer is not closed (recursions didn't drop to zero yet).
310  */
311  bool stop(bool forced = false);
312 
313 
314  /// Getter for the 'tag'.
315  inline string tag() const {
316  string buf(code_point_->tag_);
317  buf.append(code_point_->subtag_);
318  return buf;
319  }
320 
321  /// Returns true if the timer is open, number of starts (recursions) is nonzero.
322  inline bool running() const
323  { return start_count >0; }
324 
325  /// Returns string with description of the code point where the timer was first started.
326  std::string code_point_str() const;
327 
328  /**
329  * Returns cumulative time of the timer in seconds.
330  */
331  double cumulative_time() const;
332 
333  /*
334  * Adds given index @p child_index of the timer @p child to the correct place in the hash table.
335  */
336  void add_child(int child_index, const Timer &child);
337 
338 
339 protected:
340 
341  /**
342  * Pauses current timer, save measured petsc memory information util resume.
343  * We get Petsc maximum memory usage via PetscMemoryGetMaximumUsage call
344  * and save this value into temp value. (we override local maximum if temp
345  * value is greater)
346  */
347  void pause();
348  /**
349  * Resume current timer. e tell Petsc to monitor the maximum memory
350  * usage once again. We call PetscMemorySetGetMaximumUsage so later in
351  * resume() method will PetscMemoryGetMaximumUsage method work.
352  */
353  void resume();
354 
355  /**
356  * Start time when frame opens.
357  */
358  TimePoint start_time;
359  /**
360  * Cumulative time spent in the frame.
361  */
362  double cumul_time;
363  /**
364  * Total number of opening of the frame.
365  */
366  unsigned int call_count;
367  /**
368  * Number of recursive openings.
369  */
370  unsigned int start_count;
371 
372 
373  /**
374  * Code point of the first START_TIMER for the particular tag. The 'tag' identifies timer
375  * and is used in reported profiler table.
376  */
377  const CodePoint *code_point_;
378  /// Full tag hash. Copy from code_point_.
379  unsigned int full_hash_;
380  /// Hash modulo size of array of timer childs. Copy from code_point_.
381  unsigned int hash_idx_;
382 
383  /**
384  * Index of the parent timer node in the tree. Negative value means 'not set'.
385  */
386  int parent_timer;
387  /**
388  * Indices of the child timers in the Profiler::timers_ vector. Negative values means 'not set'.
389  */
390  int child_timers[max_n_childs];
391 
392  /**
393  * Total number of bytes allocated in this frame. After
394  * Profiler::propagate_timers call will also contain values from children.
395  */
396  size_t total_allocated_;
397  /**
398  * Total number of bytes deallocated in this frame. After
399  * Profiler::propagate_timers call, will also contain values from children.
400  */
401  size_t total_deallocated_;
402  /**
403  * Maximum number of bytes allocated at one time in this frame. After
404  * Profiler::propagate_timers call, maximum value will be taken from this
405  * Timer and also from all children Timers.
406  */
407  size_t max_allocated_;
408  /**
409  * Current number of bytes allocated in this frame at the same time.
410  * This value is used to monitor maximum bytes allocated. When notify_free
411  * and notify_malloc is called this values is changed and new maximum
412  * is tested.
413  */
414  size_t current_allocated_;
415 
416  /**
417  * Number of times new/new[] operator was used in this scope
418  */
419  int alloc_called;
420  /**
421  * Number of times delete/delete[] operator was used in this scope
422  */
423  int dealloc_called;
424 
425  #ifdef FLOW123D_HAVE_PETSC
426  /**
427  * Number of bytes used by Petsc at the start of time-frame
428  */
429  PetscLogDouble petsc_start_memory;
430  /**
431  * Number of bytes used by Petsc at the end of time-frame
432  */
433  PetscLogDouble petsc_end_memory;
434  /**
435  * Difference between start and end of a petsc memory usage
436  */
437  PetscLogDouble petsc_memory_difference;
438  /**
439  * Maximum amount of memory used that was PetscMalloc()ed at any time
440  * during this run.
441  *
442  * The memory usage reported here includes all Fortran arrays (that may be
443  * used in application-defined sections of code).
444  */
445  PetscLogDouble petsc_peak_memory;
446  /**
447  * Local maximum amount of memory used that was PetscMalloc()ed
448  * used during time-frame pause/resume. Auxilary variable for storing
449  * local memory used when pause is called.
450  */
451  PetscLogDouble petsc_local_peak_memory;
452  #endif // FLOW123D_HAVE_PETSC
453 
454  friend class Profiler;
455  friend std::ostream & operator <<(std::ostream&, const Timer&);
456 
457  /**
458  * if under unit testing, specify friend so protected members can be tested
459  */
460  #ifdef __UNIT_TEST__
461  friend ProfilerTest;
462  #endif /* __UNIT_TEST__ */
463 
464 };
465 
466 /*
467 struct SimpleTranslator {
468  typedef std::string internal_type;
469  typedef int external_type;
470 
471  // Converts a string to int
472  boost::optional<external_type> get_value(const internal_type& str) {
473  return boost::optional<external_type>(std::stoi(str));
474  }
475 
476  // Converts a bool to string
477  boost::optional<internal_type> put_value(const external_type& i){
478  return boost::optional<internal_type>(std::to_string(i));
479  }
480 };
481 
482 namespace boost {
483 namespace property_tree {
484 
485 template<typename Ch, typename Traits, typename Alloc>
486 struct translator_between<std::basic_string< Ch, Traits, Alloc >, int> {
487  typedef SimpleTranslator type;
488 };
489 
490 
491 } // namespace property_tree
492 } // namespace boost
493 */
494 /**
495  *
496  * @brief Main class for profiling by measuring time intervals.
497  *
498  * These time intervals form a tree structure where each interval is represented
499  * by a Timer object. The root node of the tree is automatically created and
500  * started after creating the Profiler object and cannot be stopped manually.
501  *
502  * The class implements a singleton pattern and all the functions are accessible trough
503  * Profiler::instance(), but in most cases the programmer will access the profiler
504  * functions via the #START_TIMER and #END_TIMER macros. The #START_TIMER macro
505  * is responsible for the fact that we don't have to call #END_TIMER macro to stop the timer and
506  * the timer will be stopped at the end of the block in which #START_TIMER was used.
507  * These macros internally use the TimerFrame objects and the programmer should
508  * not use the TimerFrame objects directly.
509  *
510  * By using #SET_TIMER_SUBFRAMES macro, the programmer can specify the number of subframes (eg. iterations)
511  * for the currently active timer.
512  *
513  *
514  * Currently the Profiler system is not thread safe. No idea how to do this.
515  *
516  */
517 class Profiler {
518 public:
519 
520  /**
521  * Initializes the Profiler with specific MPI communicator object
522  */
523  //static void initialize(MPI_Comm communicator = MPI_COMM_WORLD);
524  static void initialize();
525  /**
526  * Returns unique Profiler object.
527  */
528  static Profiler* instance();
529  /**
530  * Sets task specific information. The string @p description with textual description of the task and the
531  * number of elements of the mesh (parameter @p size). This is used for weak scaling graphs so it should
532  * measure size of the task of the same type (same description).
533  *
534  */
535  void set_task_info(string description, int size);
536  /**
537  * Sets informations about program version. This consists of @p program_version (includes program name), @p branch in the repository or rather full URL of the branch,
538  * and SVN @p revision (or hash for GIT).
539  *
540  */
541  void set_program_info(string program_name, string program_version, string branch, string revision, string build);
542 
543 
544  /**
545  * Starts a timer with code point, tag and hashes specified by CodePoint object @p cp.
546  * If the timer is not already created, it creates a new one. It returns index of
547  * the actual timer.
548  */
549  int start_timer(const CodePoint &cp);
550  /**
551  * Stops actual timer. It check if the hash of given code point match hash of the
552  * tag of actual timer node. If not we print out warning and try to find the correct tag
553  * towards the tree root closing all nodes we pass through.
554  *
555  * If FLOW123D_DEBUG is set, we check that all children are closed.
556  */
557  void stop_timer(const CodePoint &cp);
558 
559  /**
560  * Stop timer with index given by @p timer_index. If this is not equal to @p actual_node, we
561  * traverse the tree towards root while force closing nodes by the way.
562  *
563  * Negative @p timer_index means close @p actual_node
564  */
565  void stop_timer(int timer_index = -1);
566 
567  /**
568  * Adds @p n_calls - 1 to the total number of calls of the current timer. Minus one, since one call is counted when
569  * timer was started. You should use macro ADD_CALLS above.
570  */
571  void add_calls(unsigned int n_calls);
572  /**
573  * Notification about allocation of given size.
574  * Increase total allocated memory in current profiler frame.
575  */
576  void notify_malloc(const size_t size, const long p);
577  /**
578  * Notification about freeing memory of given size.
579  * Increase total deallocated memory in current profiler frame.
580  */
581  void notify_free(const long p);
582 
583  /**
584  * Return average profiler timer resolution in seconds
585  * based on 100 measurements
586  */
587  static double get_resolution ();
588 
589 
590 #ifdef FLOW123D_HAVE_MPI
591  /**
592  * @brief Output current timing information into the given stream.
593  *
594  * COLECTIVE - all processes in the communicator have to call this
595  * method. All timers are finished, all processes are synchronized, collect
596  * profiling informations are collected and written to the given stream.
597  *
598  * Pass through the profiling tree (collective over processors)
599  * Print cumulative times average, balance (max/min), count (denote differences)
600  *
601  */
602  void output(MPI_Comm comm, std::ostream &os);
603  /**
604  * Same as previous, but output to the file with default name: "profiler_info_YYMMDD_HH::MM:SS.log".
605  * Empty body if macro FLOW123D_DEBUG_PROFILER is not defined.
606  */
607  void output(MPI_Comm comm);
608 #endif /* FLOW123D_HAVE_MPI */
609  /**
610  * @brief Output current timing information into the given stream.
611  *
612  * It temporally stops all timers, synchronize all processes, collect
613  * profiling informations and write it to the given stream.
614  *
615  * Pass through the profiling tree (collective over processors)
616  * Print cumulative times average, balance (max/min), count (denote differences)
617  *
618  */
619  void output(std::ostream &os);
620  /**
621  * Same as previous, but output to the file with default name: "profiler_info_YYMMDD_HH::MM:SS.log".
622  * Empty body if macro FLOW123D_DEBUG_PROFILER is not defined.
623  */
624  void output();
625  /**
626  * Method will transform last profiler json file to desired format
627  */
628  void transform_profiler_data (const string &output_file_suffix, const string &formatter);
629  /**
630  * Stop all timers and destroys the Profiler object.
631  * If you want some output call @p output method just before.
632  */
633  static void uninitialize();
634 
635  /**
636  * Class-specific allocation function new. Called by the usual
637  * single-object new-expressions if allocating an object of type Profiler.
638  */
639  static void* operator new (size_t sz);
640  /**
641  * Class-specific allocation function delete. Deallocates storage
642  * previously allocated by a matching operator new. These deallocation
643  * functions are called by delete-expressions.
644  */
645  static void operator delete (void* p);
646 
647  /**
648  * Public setter to turn on/off memory monitoring
649  * @param global_monitor whether to turn global monitoring on or off
650  * @param petsc_monitor petsc monitoring
651  */
652  void static set_memory_monitoring(const bool global_monitor, const bool petsc_monitor);
653 
654  /**
655  * Public getter to memory monitoring
656  * @return memory monitoring status
657  */
658  bool static get_global_memory_monitoring();
659 
660  /**
661  * Public getter to petsc memory monitoring
662  * @return memory monitoring status
663  */
664  bool static get_petsc_memory_monitoring();
665 
666  /**
667  * if under unit testing, specify friend so protected members can be tested
668  */
669  #ifdef __UNIT_TEST__
670  friend ProfilerTest;
671  #endif /* __UNIT_TEST__ */
672 
673 protected:
674 
675  /**
676  * Whether to monitor operator 'new/delete'
677  */
678  static bool global_monitor_memory;
679 
680  /**
681  * Whether to monitor petsc memory usage
682  */
683  static bool petsc_monitor_memory;
684 
685  /**
686  * When creating Profiler also reserve some bytes in malloc_map so overhead
687  * of creating single items is lowered. This value is passed as parameter in
688  * map.reserve() method so it indicates how many objects (pointers) are
689  * allocated at first.
690  */
691  static const long malloc_map_reserve;
692 
693  /**
694  * Method will propagate values from children timers to its parents
695  */
696  void propagate_timers ();
697 
698  /**
699  * Method for exchanging metrics from child timer to its parent timer
700  */
701  void accept_from_child (Timer &parent, Timer &child);
702 
703  /**
704  * Try to find timer with tag (in fact only its 32-bit hash) from given code point @p cp.
705  * Returns -1 if it is not found otherwise it returns its index.
706  */
707  int find_child(const CodePoint &cp);
708 
709 
710  /**
711  * Method will prepare construct specific details about the run (time start and time end)
712  * and write them along with basic informations about the run (name, description, ...)
713  * into ptree object
714  */
715  void output_header (property_tree::ptree &root, int mpi_size);
716 
717  /**
718  * Open a new file for profiler output with default name based on the
719  * actual time and date. Returns a pointer to the stream of the output file.
720  */
721  std::shared_ptr<std::ostream> get_default_output_stream();
722 
723  /// Default code point.
724  static CodePoint null_code_point;
725 
726  /// Pointer to the unique instance of singleton Profiler class.
727  static Profiler* _instance;
728 
729  /// Vector of all timers. Whole tree is stored in this array.
731 
732  /// Index of the actual timer node. Negative value means 'unset'.
733  unsigned int actual_node;
734 
735  /// MPI communicator used for final reduce of the timer node tree.
736  //MPI_Comm communicator_;
737  /// MPI_rank
738  //int mpi_rank_;
739 
740  /**
741  * flag indicating that collection of timer details will be
742  * using MPI
743  bool mpi_used;
744  */
745  // header informations
746 
747  /// Some measure of the size of the task in the set of the tasks that differs
748  /// only by size - used for scaling tests.
749  int task_size_;
750  /// Task description and identifier in possible database of all Profiler results.
751  string task_description_;
752  /// Time and date of the start of the task solution. In fact start of the Profiler.
753  time_t start_time;
754 
755  /// Name of the program.
756  string flow_name_;
757  /// Version of the program.
758  string flow_version_;
759  /// Http address of the branch in a repository.
760  string flow_branch_;
761  /// Revision or GIT hash.
762  string flow_revision_;
763  /// Build date and time.
764  string flow_build_;
765  /// Variable which stores last json log filepath
766  string json_filepath;
767 
768 
769  /**
770  * Use DFS to pass through the tree and collect information about all timers reduced from the processes in the communicator.
771  * For every timer the information strings are stored in the struct TimerInfo in order to pad fields correctly
772  * to have alligned columns on the output. The alligning is performed in the output() method.
773  */
774  template<typename ReduceFunctor>
775  void add_timer_info(ReduceFunctor reduce, property_tree::ptree* node, int timer_idx, double parent_time);
776 
777  //Profiler(MPI_Comm comm); // private constructor
778  Profiler(); // private constructor
779  Profiler(Profiler const&); // copy constructor is private
780  Profiler & operator=(Profiler const&); // assignment operator is private
781 };
782 
783 
784 
785 
786 
787 
788 /**
789  *
790  * @brief Class for automatic timer closing. This class is used by #START_TIMER macro
791  * and is responsible for the fact that we don't have to call #END_TIMER macro to stop the timer,
792  * the timer will be stopped at the end of the block in which #START_TIMER was used.
793  *
794  * The main idea of the approach described is that the TimerFrame variable will be destroyed
795  * at the end of the block where #START_TIMER macro was used. In order to work properly
796  * in situations where #END_TIMER was used to stop the timer manually before (but there is still the
797  * variable which will be later destroyed), we have to store references to these variables and
798  * destroy them on-demand.
799  *
800  * TODO:
801  * Should only contain pointer to the Timer. And destructor, that close the timer.
802  */
803 class TimerFrame {
804 private:
805  int const timer_index_;
806 public:
807  inline TimerFrame(const CodePoint &cp)
808  : timer_index_( Profiler::instance()->start_timer(cp) )
809  {}
810 
811  ~TimerFrame() {
812  Profiler::instance()->stop_timer(timer_index_);
813  }
814 };
815 
816 
817 /**
818  * Simple class providing static map variable storing address and alloc size
819  */
820 // gcc version 4.9 and lower has following bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59751
821 // fix in version 4.9: https://gcc.gnu.org/gcc-4.9/changes.html#cxx
822 // typedef unordered_map<long, int, hash<long>, equal_to<long>, internal::SimpleAllocator<pair<const long, int>>> unordered_map_with_alloc;
823 typedef boost::unordered_map<long, int, boost::hash<long>, equal_to<long>, internal::SimpleAllocator<std::pair<const long, int>>> unordered_map_with_alloc;
824 class MemoryAlloc {
825 public:
826  /**
827  * Create static map containing <allocation address, allocation size> pairs
828  * map is used for storing allocations and deallocations of all object not
829  * related to profiler after profiler initialization phase
830  */
831  static unordered_map_with_alloc & malloc_map();
832 };
833 
834 
835 
836 
837 #else // FLOW123D_DEBUG_PROFILER
838 
839 
840 // dummy declaration of Profiler class
841 class Profiler {
842 public:
843  static void initialize();
844  static Profiler* instance();
845 
846  void set_task_info(string description, int size)
847  {}
848  void set_program_info(string program_name, string program_version, string branch, string revision, string build)
849  {}
850  void notify_malloc(const size_t size )
851  {}
852  void notify_free(const size_t size )
853  {}
854  void output(MPI_Comm comm, ostream &os)
855  {}
856  void output(MPI_Comm comm)
857  {}
858  void transform_profiler_data(const string &output_file_suffix, const string &formatter)
859  {}
860  double get_resolution () const
861  { return 0.0; }
862  const char *actual_tag() const
863  { return NULL; }
864  inline unsigned int actual_count() const
865  { return 0; }
866  inline double actual_cumulative_time() const
867  { return 0.0; }
868  static void uninitialize();
869 private:
871  Profiler() {}
872 };
873 
874 
875 
876 
877 #endif
878 
879 
880 #endif
static int min(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:65
#define CONSTEXPR_
Definition: sys_profiler.hh:83
int MPI_Comm
Definition: mpi.h:141
static int sum(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:47
double get_resolution() const
unsigned int actual_count() const
double actual_cumulative_time() const
void notify_free(const size_t size)
void output(MPI_Comm comm)
Global macros to enhance readability and debugging, general constants.
static int max(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:83
void set_program_info(string program_name, string program_version, string branch, string revision, string build)
STREAM & operator<<(STREAM &s, UpdateFlags u)
void transform_profiler_data(const string &output_file_suffix, const string &formatter)
static Profiler * instance()
static Profiler * _instance
Definition: memory.cc:33
void output(MPI_Comm comm, ostream &os)
void set_task_info(string description, int size)
void notify_malloc(const size_t size)
const char * actual_tag() const