Flow123d
sys_profiler.cc
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2007 Technical University of Liberec. All rights reserved.
4  *
5  * Please make a following refer to Flow123d on your project site if you use the program for any purpose,
6  * especially for academic research:
7  * Flow123d, Research Centre: Advanced Remedial Technologies, Technical University of Liberec, Czech Republic
8  *
9  * This program is free software; you can redistribute it and/or modify it under the terms
10  * of the GNU General Public License version 3 as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14  * See the GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along with this program; if not,
17  * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 021110-1307, USA.
18  *
19  *
20  * $Id: profiler.cc 842 2011-01-08 17:58:15Z tomas.bambuch $
21  * $Revision: 842 $
22  * $LastChangedBy: tomas.bambuch $
23  * $LastChangedDate: 2011-01-08 18:58:15 +0100 (So, 08 led 2011) $
24  *
25  * @file
26  * @ingroup system
27  * @brief Profiler
28  *
29  */
30 
31 #include <fstream>
32 #include <iomanip>
33 
34 #include <sys/param.h>
35 
36 #include "sys_profiler.hh"
37 #include "system/system.hh"
38 #include <boost/format.hpp>
39 
40 #include "system/file_path.hh"
41 
42 
43 #include "mpi.h"
44 /*
45  * These should be replaced by using boost MPI interface
46  */
47 int MPI_Functions::sum(int* val, MPI_Comm comm) {
48  int total = 0;
49  MPI_Reduce(val, &total, 1, MPI_INT, MPI_SUM, 0, comm);
50  return total;
51  }
52 
53 double MPI_Functions::sum(double* val, MPI_Comm comm) {
54  double total = 0;
55  MPI_Reduce(val, &total, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
56  return total;
57  }
58 
59 int MPI_Functions::min(int* val, MPI_Comm comm) {
60  int min = 0;
61  MPI_Reduce(val, &min, 1, MPI_INT, MPI_MIN, 0, comm);
62  return min;
63  }
64 
65 double MPI_Functions::min(double* val, MPI_Comm comm) {
66  double min = 0;
67  MPI_Reduce(val, &min, 1, MPI_DOUBLE, MPI_MIN, 0, comm);
68  return min;
69  }
70 
71 int MPI_Functions::max(int* val, MPI_Comm comm) {
72  int max = 0;
73  MPI_Reduce(val, &max, 1, MPI_INT, MPI_MAX, 0, comm);
74  return max;
75  }
76 
77 double MPI_Functions::max(double* val, MPI_Comm comm) {
78  double max = 0;
79  MPI_Reduce(val, &max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
80  return max;
81  }
82 
83 
84 #ifdef DEBUG_PROFILER
85 /*********************************************************************************************
86  * Implementation of class Timer
87  */
88 
89 Timer::Timer(const CodePoint &cp, int parent)
90 : start_time(0),
91  cumul_time(0),
92  call_count(0),
93  start_count(0),
94  code_point_(&cp),
95  full_hash_(cp.hash_),
96  hash_idx_(cp.hash_idx_),
97  parent_timer(parent),
98  total_allocated_(0),
99  total_deallocated_(0)
100 {
101  for(unsigned int i=0; i< max_n_childs ;i++) child_timers[i]=-1;
102 }
103 
104 
105 /*
106  * Standard C++ clock() function measure the time spent in calling process, not the wall time,
107  * that is exactly what I want. Disadvantage is that it has small resolution about 10 ms.
108  *
109  * For more precise wall clock timer one can use:
110  * time(&begin);
111  * time(&end);
112  * cout << "Time elapsed: " << difftime(end, begin) << " seconds"<< endl;
113  */
114 Timer::TimeData Timer::get_time() {
115  return clock();
116 }
117 
118 
119 
120 double Timer::cumulative_time() const
121 {
122  return 1000.0 * double(cumul_time) / CLOCKS_PER_SEC;
123 }
124 
125 
126 
127 void Timer::start() {
128  if (start_count == 0) start_time = get_time();
129  call_count++;
130  start_count++;
131 }
132 
133 
134 
135 void Timer::update() {
136  TimeData time = get_time();
137  cumul_time += time - start_time;
138 }
139 
140 
141 
142 bool Timer::stop(bool forced) {
143 
144  if (forced) start_count=1;
145 
146  if (start_count == 1) {
147  TimeData time = get_time();
148  cumul_time += time - start_time;
149  start_count--;
150  return true;
151  } else {
152  start_count--;
153  }
154  return false;
155 }
156 
157 
158 
159 void Timer::add_child(int child_index, const Timer &child)
160 {
161  unsigned int idx = child.hash_idx_;
162  if (child_timers[idx] >0) {
163  unsigned int i=idx;
164  do {
165  i=( i < max_n_childs ? i+1 : 0);
166  } while (i!=idx && child_timers[i] >0);
167  ASSERT(i!=idx, "Too many children of the timer with tag '%s'\n", tag() );
168  idx=i;
169  }
170  child_timers[idx] = child_index;
171 }
172 
173 
174 
175 string Timer::code_point_str() const {
176  return boost::str( boost::format("%s:%d, %s()") % code_point_->file_ % code_point_->line_ % code_point_->func_ );
177 }
178 
179 
180 /***********************************************************************************************
181  * Implementation of Profiler
182  */
183 
184 
185 
186 
187 
188 
190 CodePoint Profiler::null_code_point = CodePoint("__no_tag__", "__no_file__", "__no_func__", 0);
191 
192 
193 
195 {
196 
197  if (!_instance)
198  _instance = new Profiler();
199  //else
200  // xprintf(Warn, "The profiler already initialized.\n");
201 
202 }
203 
204 
206 : actual_node(0),
207  task_size_(1),
208  start_time( time(NULL) )
209 
210 {
211 #ifdef DEBUG_PROFILER
212  static CONSTEXPR_ CodePoint main_cp = CODE_POINT("Whole Program");
213  timers_.push_back( Timer(main_cp, 0) );
214  timers_[0].start();
215 #endif
216 }
217 
218 
219 
220 
221 void Profiler::set_task_info(string description, int size) {
222  task_description_ = description;
223  task_size_ = size;
224 }
225 
226 
227 
228 void Profiler::set_program_info(string program_name, string program_version, string branch, string revision, string build) {
229  flow_name_ = program_name;
230  flow_version_ = program_version;
231  flow_branch_ = branch;
232  flow_revision_ = revision;
233  flow_build_ = build;
234 }
235 
236 
237 
238 int Profiler::start_timer(const CodePoint &cp) {
239 
240  int child_idx = find_child(cp);
241  if (child_idx < 0) {
242  // tag not present - create new timer
243  child_idx=timers_.size();
244  timers_.push_back( Timer(cp, actual_node) );
245  timers_[actual_node].add_child(child_idx , timers_.back() );
246  }
247 
248  timers_[child_idx].start();
249  actual_node=child_idx;
250 
251  return actual_node;
252 }
253 
254 
255 
256 int Profiler::find_child(const CodePoint &cp) {
257  Timer &timer =timers_[actual_node];
258  int idx = cp.hash_idx_;
259  int child_idx;
260  do {
261  child_idx=timer.child_timers[idx];
262 
263  if (child_idx < 0) break; // tag is not there
264 
265  ASSERT_LESS( child_idx, timers_.size());
266  if (timers_[child_idx].full_hash_ == cp.hash_) return child_idx;
267  idx = ( (unsigned int)(idx)==Timer::max_n_childs ? 0 : idx+1 );
268  } while ( (unsigned int)(idx) != cp.hash_idx_ ); // passed through whole array
269  return -1;
270 }
271 
272 
273 
274 void Profiler::stop_timer(const CodePoint &cp) {
275 #ifdef Flow123d_DEBUG
276  // check that all childrens are closed
277  Timer &timer=timers_[actual_node];
278  for(unsigned int i=0; i < Timer::max_n_childs; i++)
279  if (timer.child_timers[i] >0)
280  ASSERT( ! timers_[timer.child_timers[i]].running() , "Child timer '%s' running while closing timer '%s'.\n", timers_[timer.child_timers[i]].tag(), timer.tag() );
281 #endif
282  if ( cp.hash_ != timers_[actual_node].full_hash_) {
283  // timer to close is not actual - we search for it above actual
284  for(unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer) {
285  if ( cp.hash_ == timers_[node].full_hash_) {
286  // found above - close all nodes between
287  for(; (unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
288  xprintf(Warn, "Timer to close '%s' do not match actual timer '%s'. Force closing actual.\n", cp.tag_, timers_[actual_node].tag());
289  timers_[actual_node].stop(true);
290  }
291  // close 'node' itself
292  timers_[actual_node].stop(false);
293  actual_node=timers_[actual_node].parent_timer;
294  return;
295  }
296  }
297  // node not found - do nothing
298  return;
299  }
300  // node to close match the actual
301  timers_[actual_node].stop(false);
302  actual_node=timers_[actual_node].parent_timer;
303 }
304 
305 
306 
307 void Profiler::stop_timer(int timer_index) {
308  if (timer_index <0) timer_index=actual_node;
309  ASSERT_LESS( timer_index, timers_.size() );
310 
311  if (! timers_[timer_index].running() ) return;
312 
313  if ( timer_index != actual_node ) {
314  // timer to close is not actual - we search for it above actual
315  for(unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer)
316  if ( (unsigned int)(timer_index) == node) {
317  // found above - close all nodes between
318  for(; (unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
319  xprintf(Warn, "Timer to close '%s' do not match actual timer '%s'. Force closing actual.\n", timers_[timer_index].tag(), timers_[actual_node].tag());
320  timers_[actual_node].stop(true);
321  }
322  // close 'node' itself
323  timers_[actual_node].stop(false);
324  actual_node=timers_[actual_node].parent_timer;
325  return;
326  }
327  // node not found - do nothing
328  return;
329  }
330 
331  // node to close match the actual
332  timers_[actual_node].stop(false);
333  actual_node=timers_[actual_node].parent_timer;
334 }
335 
336 
337 
338 void Profiler::add_calls(unsigned int n_calls) {
339  timers_[actual_node].call_count += n_calls-1;
340 }
341 
342 
343 
344 void Profiler::notify_malloc(const size_t size) {
345  timers_[actual_node].total_allocated_ += size;
346 }
347 
348 
349 
350 void Profiler::notify_free(const size_t size) {
351  timers_[actual_node].total_deallocated_ += size;
352 }
353 
354 
355 
356 
357 
358 
359 void Profiler::add_timer_info(MPI_Comm comm, vector<vector<string> > &timers_info, int timer_idx, int indent, double parent_time) {
360 
361  Timer &timer = timers_[timer_idx];
362 
363  ASSERT( timer_idx >=0, "Wrong timer index %d.\n", timer_idx);
364  ASSERT( timer.parent_timer >=0 , "Inconsistent tree.\n");
365 
366  int numproc;
367  MPI_Comm_size(comm, &numproc);
368 
369  int call_count = timer.call_count;
370  int call_count_min = MPI_Functions::min(&call_count, comm);
371  int call_count_max = MPI_Functions::max(&call_count, comm);
372  int call_count_sum = MPI_Functions::sum(&call_count, comm);
373 
374  double cumul_time = timer.cumulative_time() / 1000; // in seconds
375  double cumul_time_min = MPI_Functions::min(&cumul_time, comm);
376  double cumul_time_max = MPI_Functions::max(&cumul_time, comm);
377  double cumul_time_sum = MPI_Functions::sum(&cumul_time, comm);
378 
379  if (timer_idx == 0) parent_time = cumul_time_sum;
380 
381  vector<string> info;
382  double percent = parent_time > 1.0e-10 ? cumul_time_sum / parent_time * 100.0 : 0.0;
383  string tree_info = string(2*indent, ' ') +
384  boost::str( boost::format("[%.1f] ") % percent )+
385  timer.tag();
386  info.push_back( tree_info );
387 
388  info.push_back( boost::str(boost::format("%i%s") % call_count % (call_count_min != call_count_max ? "*" : " ")) );
389  info.push_back( boost::str( boost::format("%.2f") % (cumul_time_max) ) );
390  info.push_back( boost::str(boost::format("%.2f") % (cumul_time_min > 1.0e-10 ? cumul_time_max / cumul_time_min : 1)) );
391  info.push_back( boost::str( boost::format("%.2f") % (cumul_time_sum / call_count_sum) ) );
392  info.push_back( boost::str( boost::format("%.2f") % (cumul_time_sum) ) );
393  info.push_back( timer.code_point_str() );
394 
395  timers_info.push_back(info);
396 
397  for (unsigned int i = 0; i < Timer::max_n_childs; i++)
398  if (timer.child_timers[i] > 0)
399  add_timer_info(comm, timers_info, timer.child_timers[i], indent + 1, cumul_time_sum);
400 }
401 
402 
403 
404 void Profiler::update_running_timers() {
405  for(int node=actual_node; node !=0; node = timers_[node].parent_timer)
406  timers_[node].update();
407  timers_[0].update();
408 }
409 
410 
411 
412 void Profiler::output(MPI_Comm comm, ostream &os) {
413 
414  const int column_space = 3;
415 
416  //wait until profiling on all processors is finished
417  MPI_Barrier(comm);
418  update_running_timers();
419 
420  int ierr, mpi_rank;
421  ierr = MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
422  ASSERT(ierr == 0, "Error in MPI test of rank.");
423 
424  vector < vector<string> > timers_info(1);
425 
426  // add header into timers_info table !!
427  timers_info[0].push_back( "tag tree");
428  timers_info[0].push_back( "calls");
429  timers_info[0].push_back( "Tmax");
430  timers_info[0].push_back( "max/min");
431  timers_info[0].push_back( "T/calls");
432  timers_info[0].push_back( "Ttotal");
433  timers_info[0].push_back( "code_point");
434 
435  add_timer_info(comm, timers_info, 0, 0, 0.0);
436 
437  //create profiler output only once (on the first processor)
438  if (mpi_rank == 0) {
439 
440  // compute with of columns
441  vector<unsigned int> width(timers_info[0].size(),0);
442  for (unsigned int i = 0; i < timers_info.size(); i++)
443  for (unsigned int j = 0; j < timers_info[i].size(); j++) width[j] = max( width[j] , (unsigned int)timers_info[i][j].size() );
444  // detect common path of code points
445  unsigned int common_length=timers_info[1].back().size();
446  for (unsigned int i = 2; i < timers_info.size(); i++) {
447  common_length = min( common_length, (unsigned int) timers_info[i].back().size() );
448  for (unsigned int j = 0; j < common_length; j++ ) {
449  if (timers_info[1].back().at(j) != timers_info[i].back().at(j)) {
450  common_length = j;
451  break;
452  }
453  }
454  }
455  // remove common path
456  for (unsigned int i = 1; i < timers_info.size(); i++) timers_info[i].back().erase(0, common_length);
457 
458 
459  int mpi_size;
460  MPI_Comm_size(comm, &mpi_size);
461 
462  time_t end_time = time(NULL);
463 
464  const char format[] = "%x %X";
465  char start_time_string[BUFSIZ] = {0};
466  strftime(start_time_string, sizeof (start_time_string) - 1, format, localtime(&start_time));
467 
468  char end_time_string[BUFSIZ] = {0};
469  strftime(end_time_string, sizeof (end_time_string) - 1, format, localtime(&end_time));
470 
471  //create a file where the output will be written to
472 
473  os << "Program name: " << flow_name_ << endl
474  << "Program version: " << flow_version_ << endl
475  << "Program branch: " << flow_branch_ << endl
476  << "Program revision: " << flow_revision_ << endl
477  << "Program build: " << flow_build_ << endl << endl;
478 
479 
480  os << "Task description: " << task_description_ << endl
481  << "Task size: " << task_size_ << endl << endl;
482 
483  //print some information about the task at the beginning
484  os << "Run processes count: " << mpi_size << endl;
485  os << "Run started at: " << start_time_string << endl;
486  os << "Run finished at: " << end_time_string << endl;
487 
488  os << setfill ('-') << setw (80) << "" << endl;
489  os.fill(' ');
490 
491  // print header
492  for(unsigned int j=0; j< timers_info[0].size(); j++)
493  os << left << setw(width[j]) << timers_info[0][j] << setw(column_space) << "";
494  os << endl;
495 
496  os << setfill ('-') << setw (80) << "" << endl;
497  os.fill(' ');
498 
499  for (unsigned int i = 1; i < timers_info.size(); i++) {
500  for(unsigned int j=0; j< timers_info[i].size(); j++) {
501  // first and last item are left aligned
502  if (j==0 || j==timers_info[i].size()-1 ) os << left; else os<<right;
503  os << setw(width[j]) << timers_info[i][j] << setw(column_space) << "";
504  }
505 
506  os << endl;
507  }
508 
509  }
510 }
511 
512 
513 
514 void Profiler::output(MPI_Comm comm) {
515  char filename[PATH_MAX];
516  strftime(filename, sizeof (filename) - 1, "profiler_info_%y.%m.%d_%H-%M-%S.log", localtime(&start_time));
517  string full_fname = FilePath(string(filename), FilePath::output_file);
518 
519  xprintf(MsgLog, "output into: %s\n", full_fname.c_str());
520  ofstream os(full_fname.c_str());
521  output(comm, os);
522  os.close();
523 }
524 
525 
526 
528 {
529  if (_instance) {
530  ASSERT( _instance->actual_node==0 , "Forbidden to uninitialize the Profiler when actual timer is not zero (but '%s').\n",
531  _instance->timers_[_instance->actual_node].tag());
532  _instance->stop_timer(0);
533  delete _instance;
534  _instance = NULL;
535  }
536 }
537 
538 #else // def DEBUG_PROFILER
539 
540 Profiler* Profiler::_instance = NULL;
541 
543  if (!_instance) _instance = new Profiler();
544 }
545 
547  if (_instance) {
548  delete _instance;
549  _instance = NULL;
550  }
551 }
552 
553 
554 #endif // def DEBUG_PROFILER
555 
556