Flow123d  jenkins-Flow123d-linux-release-multijob-282
sys_profiler.cc
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2007 Technical University of Liberec. All rights reserved.
4  *
5  * Please make a following refer to Flow123d on your project site if you use the program for any purpose,
6  * especially for academic research:
7  * Flow123d, Research Centre: Advanced Remedial Technologies, Technical University of Liberec, Czech Republic
8  *
9  * This program is free software; you can redistribute it and/or modify it under the terms
10  * of the GNU General Public License version 3 as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14  * See the GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along with this program; if not,
17  * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 021110-1307, USA.
18  *
19  *
20  * $Id: profiler.cc 842 2011-01-08 17:58:15Z tomas.bambuch $
21  * $Revision: 842 $
22  * $LastChangedBy: tomas.bambuch $
23  * $LastChangedDate: 2011-01-08 18:58:15 +0100 (So, 08 led 2011) $
24  *
25  * @file
26  * @ingroup system
27  * @brief Profiler
28  *
29  */
30 
31 #include <fstream>
32 #include <iomanip>
33 
34 #include <sys/param.h>
35 
36 #include "sys_profiler.hh"
37 #include "system/system.hh"
38 #include <boost/format.hpp>
39 
40 #include "system/file_path.hh"
41 
42 
43 #include "mpi.h"
44 /*
45  * These should be replaced by using boost MPI interface
46  */
47 int MPI_Functions::sum(int* val, MPI_Comm comm) {
48  int total = 0;
49  MPI_Reduce(val, &total, 1, MPI_INT, MPI_SUM, 0, comm);
50  return total;
51  }
52 
53 double MPI_Functions::sum(double* val, MPI_Comm comm) {
54  double total = 0;
55  MPI_Reduce(val, &total, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
56  return total;
57  }
58 
59 int MPI_Functions::min(int* val, MPI_Comm comm) {
60  int min = 0;
61  MPI_Reduce(val, &min, 1, MPI_INT, MPI_MIN, 0, comm);
62  return min;
63  }
64 
65 double MPI_Functions::min(double* val, MPI_Comm comm) {
66  double min = 0;
67  MPI_Reduce(val, &min, 1, MPI_DOUBLE, MPI_MIN, 0, comm);
68  return min;
69  }
70 
71 int MPI_Functions::max(int* val, MPI_Comm comm) {
72  int max = 0;
73  MPI_Reduce(val, &max, 1, MPI_INT, MPI_MAX, 0, comm);
74  return max;
75  }
76 
77 double MPI_Functions::max(double* val, MPI_Comm comm) {
78  double max = 0;
79  MPI_Reduce(val, &max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
80  return max;
81  }
82 
83 
84 #ifdef DEBUG_PROFILER
85 /*********************************************************************************************
86  * Implementation of class Timer
87  */
88 
89 Timer::Timer(const CodePoint &cp, int parent)
90 : start_time(0),
91  cumul_time(0),
92  call_count(0),
93  start_count(0),
94  code_point_(&cp),
95  full_hash_(cp.hash_),
96  hash_idx_(cp.hash_idx_),
97  parent_timer(parent),
98  total_allocated_(0),
99  total_deallocated_(0)
100 {
101  for(unsigned int i=0; i< max_n_childs ;i++) child_timers[i]=-1;
102 }
103 
104 
105 /*
106  * Standard C++ clock() function measure the time spent in calling process, not the wall time,
107  * that is exactly what I want. Disadvantage is that it has small resolution about 10 ms.
108  *
109  * For more precise wall clock timer one can use:
110  * time(&begin);
111  * time(&end);
112  * cout << "Time elapsed: " << difftime(end, begin) << " seconds"<< endl;
113  */
114 Timer::TimeData Timer::get_time() {
115  return clock();
116 }
117 
118 
119 
120 double Timer::cumulative_time() const
121 {
122  return 1000.0 * double(cumul_time) / CLOCKS_PER_SEC;
123 }
124 
125 
126 
127 void Timer::start() {
128  if (start_count == 0) start_time = get_time();
129  call_count++;
130  start_count++;
131 }
132 
133 
134 
135 void Timer::update() {
136  TimeData time = get_time();
137  cumul_time += time - start_time;
138 }
139 
140 
141 
142 bool Timer::stop(bool forced) {
143 
144  if (forced) start_count=1;
145 
146  if (start_count == 1) {
147  TimeData time = get_time();
148  cumul_time += time - start_time;
149  start_count--;
150  return true;
151  } else {
152  start_count--;
153  }
154  return false;
155 }
156 
157 
158 
159 void Timer::add_child(int child_index, const Timer &child)
160 {
161  unsigned int idx = child.hash_idx_;
162  if (child_timers[idx] >0) {
163  unsigned int i=idx;
164  do {
165  i=( i < max_n_childs ? i+1 : 0);
166  } while (i!=idx && child_timers[i] >0);
167  ASSERT(i!=idx, "Too many children of the timer with tag '%s'\n", tag() );
168  idx=i;
169  }
170  child_timers[idx] = child_index;
171 }
172 
173 
174 
175 string Timer::code_point_str() const {
176  return boost::str( boost::format("%s:%d, %s()") % code_point_->file_ % code_point_->line_ % code_point_->func_ );
177 }
178 
179 
180 /***********************************************************************************************
181  * Implementation of Profiler
182  */
183 
184 
185 
186 
187 
188 
190 CodePoint Profiler::null_code_point = CodePoint("__no_tag__", "__no_file__", "__no_func__", 0);
191 
192 
193 
195 {
196 
197  if (!_instance)
198  _instance = new Profiler();
199 
200 }
201 
202 
204 : actual_node(0),
205  task_size_(1),
206  start_time( time(NULL) )
207 
208 {
209 #ifdef DEBUG_PROFILER
210  static CONSTEXPR_ CodePoint main_cp = CODE_POINT("Whole Program");
211  timers_.push_back( Timer(main_cp, 0) );
212  timers_[0].start();
213 #endif
214 }
215 
216 
217 
218 
219 void Profiler::set_task_info(string description, int size) {
220  task_description_ = description;
221  task_size_ = size;
222 }
223 
224 
225 
226 void Profiler::set_program_info(string program_name, string program_version, string branch, string revision, string build) {
227  flow_name_ = program_name;
228  flow_version_ = program_version;
229  flow_branch_ = branch;
230  flow_revision_ = revision;
231  flow_build_ = build;
232 }
233 
234 
235 
236 int Profiler::start_timer(const CodePoint &cp) {
237 
238  int child_idx = find_child(cp);
239  if (child_idx < 0) {
240  // tag not present - create new timer
241  child_idx=timers_.size();
242  timers_.push_back( Timer(cp, actual_node) );
243  timers_[actual_node].add_child(child_idx , timers_.back() );
244  }
245 
246  timers_[child_idx].start();
247  actual_node=child_idx;
248 
249  return actual_node;
250 }
251 
252 
253 
254 int Profiler::find_child(const CodePoint &cp) {
255  Timer &timer =timers_[actual_node];
256  int idx = cp.hash_idx_;
257  int child_idx;
258  do {
259  child_idx=timer.child_timers[idx];
260 
261  if (child_idx < 0) break; // tag is not there
262 
263  ASSERT_LESS( child_idx, timers_.size());
264  if (timers_[child_idx].full_hash_ == cp.hash_) return child_idx;
265  idx = ( (unsigned int)(idx)==Timer::max_n_childs ? 0 : idx+1 );
266  } while ( (unsigned int)(idx) != cp.hash_idx_ ); // passed through whole array
267  return -1;
268 }
269 
270 
271 
272 void Profiler::stop_timer(const CodePoint &cp) {
273 #ifdef Flow123d_DEBUG
274  // check that all childrens are closed
275  Timer &timer=timers_[actual_node];
276  for(unsigned int i=0; i < Timer::max_n_childs; i++)
277  if (timer.child_timers[i] >0)
278  ASSERT( ! timers_[timer.child_timers[i]].running() , "Child timer '%s' running while closing timer '%s'.\n", timers_[timer.child_timers[i]].tag(), timer.tag() );
279 #endif
280  if ( cp.hash_ != timers_[actual_node].full_hash_) {
281  // timer to close is not actual - we search for it above actual
282  for(unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer) {
283  if ( cp.hash_ == timers_[node].full_hash_) {
284  // found above - close all nodes between
285  for(; (unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
286  xprintf(Warn, "Timer to close '%s' do not match actual timer '%s'. Force closing actual.\n", cp.tag_, timers_[actual_node].tag());
287  timers_[actual_node].stop(true);
288  }
289  // close 'node' itself
290  timers_[actual_node].stop(false);
291  actual_node=timers_[actual_node].parent_timer;
292  return;
293  }
294  }
295  // node not found - do nothing
296  return;
297  }
298  // node to close match the actual
299  timers_[actual_node].stop(false);
300  actual_node=timers_[actual_node].parent_timer;
301 }
302 
303 
304 
305 void Profiler::stop_timer(int timer_index) {
306  if (timer_index <0) timer_index=actual_node;
307  ASSERT_LESS( timer_index, timers_.size() );
308 
309  if (! timers_[timer_index].running() ) return;
310 
311  if ( timer_index != actual_node ) {
312  // timer to close is not actual - we search for it above actual
313  for(unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer)
314  if ( (unsigned int)(timer_index) == node) {
315  // found above - close all nodes between
316  for(; (unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
317  xprintf(Warn, "Timer to close '%s' do not match actual timer '%s'. Force closing actual.\n", timers_[timer_index].tag(), timers_[actual_node].tag());
318  timers_[actual_node].stop(true);
319  }
320  // close 'node' itself
321  timers_[actual_node].stop(false);
322  actual_node=timers_[actual_node].parent_timer;
323  return;
324  }
325  // node not found - do nothing
326  return;
327  }
328 
329  // node to close match the actual
330  timers_[actual_node].stop(false);
331  actual_node=timers_[actual_node].parent_timer;
332 }
333 
334 
335 
336 void Profiler::add_calls(unsigned int n_calls) {
337  timers_[actual_node].call_count += n_calls-1;
338 }
339 
340 
341 
342 void Profiler::notify_malloc(const size_t size) {
343  timers_[actual_node].total_allocated_ += size;
344 }
345 
346 
347 
348 void Profiler::notify_free(const size_t size) {
349  timers_[actual_node].total_deallocated_ += size;
350 }
351 
352 
353 
354 
355 
356 
357 void Profiler::add_timer_info(MPI_Comm comm, vector<vector<string> > &timers_info, int timer_idx, int indent, double parent_time) {
358 
359  Timer &timer = timers_[timer_idx];
360 
361  ASSERT( timer_idx >=0, "Wrong timer index %d.\n", timer_idx);
362  ASSERT( timer.parent_timer >=0 , "Inconsistent tree.\n");
363 
364  int numproc;
365  MPI_Comm_size(comm, &numproc);
366 
367  int call_count = timer.call_count;
368  int call_count_min = MPI_Functions::min(&call_count, comm);
369  int call_count_max = MPI_Functions::max(&call_count, comm);
370  int call_count_sum = MPI_Functions::sum(&call_count, comm);
371 
372  double cumul_time = timer.cumulative_time() / 1000; // in seconds
373  double cumul_time_min = MPI_Functions::min(&cumul_time, comm);
374  double cumul_time_max = MPI_Functions::max(&cumul_time, comm);
375  double cumul_time_sum = MPI_Functions::sum(&cumul_time, comm);
376 
377  if (timer_idx == 0) parent_time = cumul_time_sum;
378 
379  vector<string> info;
380  double percent = parent_time > 1.0e-10 ? cumul_time_sum / parent_time * 100.0 : 0.0;
381  string tree_info = string(2*indent, ' ') +
382  boost::str( boost::format("[%.1f] ") % percent )+
383  timer.tag();
384  info.push_back( tree_info );
385 
386  info.push_back( boost::str(boost::format("%i%s") % call_count % (call_count_min != call_count_max ? "*" : " ")) );
387  info.push_back( boost::str( boost::format("%.2f") % (cumul_time_max) ) );
388  info.push_back( boost::str(boost::format("%.2f") % (cumul_time_min > 1.0e-10 ? cumul_time_max / cumul_time_min : 1)) );
389  info.push_back( boost::str( boost::format("%.2f") % (cumul_time_sum / call_count_sum) ) );
390  info.push_back( boost::str( boost::format("%.2f") % (cumul_time_sum) ) );
391  info.push_back( timer.code_point_str() );
392 
393  timers_info.push_back(info);
394 
395  for (unsigned int i = 0; i < Timer::max_n_childs; i++)
396  if (timer.child_timers[i] > 0)
397  add_timer_info(comm, timers_info, timer.child_timers[i], indent + 1, cumul_time_sum);
398 }
399 
400 
401 
402 void Profiler::update_running_timers() {
403  for(int node=actual_node; node !=0; node = timers_[node].parent_timer)
404  timers_[node].update();
405  timers_[0].update();
406 }
407 
408 
409 
410 void Profiler::output(MPI_Comm comm, ostream &os) {
411 
412  const int column_space = 3;
413 
414  //wait until profiling on all processors is finished
415  MPI_Barrier(comm);
416  update_running_timers();
417 
418  int ierr, mpi_rank;
419  ierr = MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
420  ASSERT(ierr == 0, "Error in MPI test of rank.");
421 
422  vector < vector<string> > timers_info(1);
423 
424  // add header into timers_info table !!
425  timers_info[0].push_back( "tag tree");
426  timers_info[0].push_back( "calls");
427  timers_info[0].push_back( "Tmax");
428  timers_info[0].push_back( "max/min");
429  timers_info[0].push_back( "T/calls");
430  timers_info[0].push_back( "Ttotal");
431  timers_info[0].push_back( "code_point");
432 
433  add_timer_info(comm, timers_info, 0, 0, 0.0);
434 
435  //create profiler output only once (on the first processor)
436  if (mpi_rank == 0) {
437 
438  // compute with of columns
439  vector<unsigned int> width(timers_info[0].size(),0);
440  for (unsigned int i = 0; i < timers_info.size(); i++)
441  for (unsigned int j = 0; j < timers_info[i].size(); j++) width[j] = max( width[j] , (unsigned int)timers_info[i][j].size() );
442  // detect common path of code points
443  unsigned int common_length=timers_info[1].back().size();
444  for (unsigned int i = 2; i < timers_info.size(); i++) {
445  common_length = min( common_length, (unsigned int) timers_info[i].back().size() );
446  for (unsigned int j = 0; j < common_length; j++ ) {
447  if (timers_info[1].back().at(j) != timers_info[i].back().at(j)) {
448  common_length = j;
449  break;
450  }
451  }
452  }
453  // remove common path
454  for (unsigned int i = 1; i < timers_info.size(); i++) timers_info[i].back().erase(0, common_length);
455 
456 
457  int mpi_size;
458  MPI_Comm_size(comm, &mpi_size);
459 
460  time_t end_time = time(NULL);
461 
462  const char format[] = "%x %X";
463  char start_time_string[BUFSIZ] = {0};
464  strftime(start_time_string, sizeof (start_time_string) - 1, format, localtime(&start_time));
465 
466  char end_time_string[BUFSIZ] = {0};
467  strftime(end_time_string, sizeof (end_time_string) - 1, format, localtime(&end_time));
468 
469  //create a file where the output will be written to
470 
471  os << "Program name: " << flow_name_ << endl
472  << "Program version: " << flow_version_ << endl
473  << "Program branch: " << flow_branch_ << endl
474  << "Program revision: " << flow_revision_ << endl
475  << "Program build: " << flow_build_ << endl << endl;
476 
477 
478  os << "Task description: " << task_description_ << endl
479  << "Task size: " << task_size_ << endl << endl;
480 
481  //print some information about the task at the beginning
482  os << "Run processes count: " << mpi_size << endl;
483  os << "Run started at: " << start_time_string << endl;
484  os << "Run finished at: " << end_time_string << endl;
485 
486  os << setfill ('-') << setw (80) << "" << endl;
487  os.fill(' ');
488 
489  // print header
490  for(unsigned int j=0; j< timers_info[0].size(); j++)
491  os << left << setw(width[j]) << timers_info[0][j] << setw(column_space) << "";
492  os << endl;
493 
494  os << setfill ('-') << setw (80) << "" << endl;
495  os.fill(' ');
496 
497  for (unsigned int i = 1; i < timers_info.size(); i++) {
498  for(unsigned int j=0; j< timers_info[i].size(); j++) {
499  // first and last item are left aligned
500  if (j==0 || j==timers_info[i].size()-1 ) os << left; else os<<right;
501  os << setw(width[j]) << timers_info[i][j] << setw(column_space) << "";
502  }
503 
504  os << endl;
505  }
506 
507  }
508 }
509 
510 
511 
512 void Profiler::output(MPI_Comm comm) {
513  char filename[PATH_MAX];
514  strftime(filename, sizeof (filename) - 1, "profiler_info_%y.%m.%d_%H-%M-%S.log", localtime(&start_time));
515  string full_fname = FilePath(string(filename), FilePath::output_file);
516 
517  xprintf(MsgLog, "output into: %s\n", full_fname.c_str());
518  ofstream os(full_fname.c_str());
519  output(comm, os);
520  os.close();
521 }
522 
523 
524 
526 {
527  if (_instance) {
528  ASSERT( _instance->actual_node==0 , "Forbidden to uninitialize the Profiler when actual timer is not zero (but '%s').\n",
529  _instance->timers_[_instance->actual_node].tag());
530  _instance->stop_timer(0);
531  delete _instance;
532  _instance = NULL;
533  }
534 }
535 
536 #else // def DEBUG_PROFILER
537 
538 Profiler* Profiler::_instance = NULL;
539 
541  if (!_instance) _instance = new Profiler();
542 }
543 
545  if (_instance) {
546  delete _instance;
547  _instance = NULL;
548  }
549 }
550 
551 
552 #endif // def DEBUG_PROFILER
553 
554 
static int min(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:59
#define CONSTEXPR_
Definition: sys_profiler.hh:83
int MPI_Comm
Definition: mpi.h:141
static int sum(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:47
#define MPI_SUM
Definition: mpi.h:196
void notify_free(const size_t size)
#define MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm)
Definition: mpi.h:608
Definition: system.hh:72
static void uninitialize()
#define ASSERT(...)
Definition: global_defs.h:121
Definition: system.hh:72
#define MPI_MIN
Definition: mpi.h:198
#define xprintf(...)
Definition: system.hh:100
#define ASSERT_LESS(a, b)
Definition: global_defs.h:164
static int max(int *val, MPI_Comm comm)
Definition: sys_profiler.cc:71
void set_program_info(string program_name, string program_version, string branch, string revision, string build)
#define MPI_Comm_size
Definition: mpi.h:235
#define MPI_DOUBLE
Definition: mpi.h:156
#define MPI_Comm_rank
Definition: mpi.h:236
Dedicated class for storing path to input and output files.
Definition: file_path.hh:32
#define MPI_INT
Definition: mpi.h:160
static void initialize()
#define MPI_COMM_WORLD
Definition: mpi.h:123
#define MPI_MAX
Definition: mpi.h:197
static Profiler * _instance
#define MPI_Barrier(comm)
Definition: mpi.h:531
void output(MPI_Comm comm, ostream &os)
void set_task_info(string description, int size)
void notify_malloc(const size_t size)