24 #include <sys/param.h>
25 #include <unordered_map>
33 #include <boost/format.hpp>
97 #ifdef FLOW123D_DEBUG_PROFILER
102 const int timer_no_child=-1;
104 Timer::Timer(
const CodePoint &cp,
int parent)
110 full_hash_(cp.hash_),
111 hash_idx_(cp.hash_idx_),
112 parent_timer(parent),
114 total_deallocated_(0),
116 current_allocated_(0),
119 turn_off_memory_monitoring_(false)
120 #ifdef FLOW123D_HAVE_PETSC
121 , petsc_start_memory(0),
122 petsc_end_memory (0),
123 petsc_memory_difference(0),
124 petsc_peak_memory(0),
125 petsc_local_peak_memory(0)
128 for(
unsigned int i=0; i< max_n_childs ;i++) child_timers[i]=timer_no_child;
136 os <<
" Timer: " << timer.tag() << endl;
137 os <<
" malloc: " << timer.total_allocated_ << endl;
138 os <<
" dalloc: " << timer.total_deallocated_ << endl;
139 #ifdef FLOW123D_HAVE_PETSC
140 os <<
" start: " << timer.petsc_start_memory << endl;
141 os <<
" stop : " << timer.petsc_end_memory << endl;
142 os <<
" diff : " << timer.petsc_memory_difference <<
" (" << timer.petsc_end_memory - timer.petsc_start_memory <<
")" << endl;
143 os <<
" peak : " << timer.petsc_peak_memory <<
" (" << timer.petsc_local_peak_memory <<
")" << endl;
150 double Timer::cumulative_time()
const {
154 void Profiler::accept_from_child(
Timer &parent,
Timer &child) {
156 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
157 child_timer = child.child_timers[i];
158 if (child_timer != timer_no_child) {
160 accept_from_child(child, timers_[child_timer]);
164 parent.total_allocated_ += child.total_allocated_;
165 parent.total_deallocated_ += child.total_deallocated_;
166 parent.alloc_called += child.alloc_called;
167 parent.dealloc_called += child.dealloc_called;
169 #ifdef FLOW123D_HAVE_PETSC
170 if (petsc_monitor_memory) {
172 parent.petsc_memory_difference += child.petsc_memory_difference;
173 parent.current_allocated_ += child.current_allocated_;
177 parent.petsc_peak_memory = max(parent.petsc_peak_memory, child.petsc_peak_memory);
181 parent.max_allocated_ = max(parent.max_allocated_, child.max_allocated_);
185 void Timer::pause() {
186 #ifdef FLOW123D_HAVE_PETSC
187 if (Profiler::get_petsc_memory_monitoring()) {
189 PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
190 if (petsc_peak_memory < petsc_local_peak_memory)
191 petsc_peak_memory = petsc_local_peak_memory;
196 void Timer::resume() {
197 #ifdef FLOW123D_HAVE_PETSC
198 if (Profiler::get_petsc_memory_monitoring()) {
201 PetscMemorySetGetMaximumUsage();
207 #ifdef FLOW123D_HAVE_PETSC
208 if (Profiler::get_petsc_memory_monitoring()) {
211 PetscMemorySetGetMaximumUsage();
212 PetscMemoryGetCurrentUsage (&petsc_start_memory);
216 if (start_count == 0) {
226 #ifdef FLOW123D_HAVE_PETSC
227 if (Profiler::get_petsc_memory_monitoring()) {
229 PetscMemoryGetCurrentUsage (&petsc_end_memory);
230 petsc_memory_difference += petsc_end_memory - petsc_start_memory;
233 PetscMemoryGetMaximumUsage(&petsc_local_peak_memory);
234 if (petsc_peak_memory < petsc_local_peak_memory)
235 petsc_peak_memory = petsc_local_peak_memory;
239 if (turn_off_memory_monitoring_) {
243 if (forced) start_count=1;
245 if (start_count == 1) {
246 cumul_time += (
TimePoint() - start_time);
257 void Timer::add_child(
int child_index,
const Timer &child)
259 unsigned int idx = child.hash_idx_;
260 if (child_timers[idx] != timer_no_child) {
264 i=( i < max_n_childs ? i+1 : 0);
265 }
while (i!=idx && child_timers[i] != timer_no_child);
270 child_timers[idx] = child_index;
275 string Timer::code_point_str()
const {
277 code_point_->file_, code_point_->line_, code_point_->func_ );
290 if (_instance != NULL) {
297 if (_instance == NULL) {
298 MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
307 const long Profiler::malloc_map_reserve = 100 * 1000;
312 start_time( time(NULL) ),
314 none_timer_(CODE_POINT(
"NONE TIMER"), 0),
315 calibration_time_(-1)
318 static CONSTEXPR_ CodePoint main_cp = CODE_POINT(
"Whole Program");
319 set_memory_monitoring(
false);
320 #ifdef FLOW123D_DEBUG_PROFILER
321 MemoryAlloc::malloc_map().reserve(Profiler::malloc_map_reserve);
322 timers_.push_back(
Timer(main_cp, 0) );
330 uint SIZE = 64 * 1024;
331 uint HALF = SIZE / 2;
333 Timer &timer = timers_[actual_node];
336 while(
TimePoint() - timer.start_time < 0.1) {
337 double * block =
new double[SIZE];
338 for(
uint i=0; i<HALF; i++) {
339 block[HALF+i] = block[i]*block[i] + i;
349 const double reference_count = 7730;
353 calibration_time_ = 10 * timer.cumul_time * reference_count / count;
354 LogOut() <<
"Profiler calibration count: " << count << std::endl;
355 LogOut() <<
"Profiler calibration time: " << calibration_time_ << std::endl;
361 void Profiler::propagate_timers() {
362 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
363 unsigned int child_timer = timers_[0].child_timers[i];
364 if ((
signed int)child_timer != timer_no_child) {
366 accept_from_child(timers_[0], timers_[child_timer]);
374 task_description_ = description;
381 flow_name_ = program_name;
382 flow_version_ = program_version;
383 flow_branch_ = branch;
384 flow_revision_ = revision;
390 int Profiler::start_timer(
const CodePoint &cp) {
391 unsigned int parent_node = actual_node;
393 int child_idx = find_child(cp);
397 child_idx=timers_.size();
398 timers_.push_back(
Timer(cp, actual_node) );
399 timers_[actual_node].add_child(child_idx , timers_.back() );
401 actual_node=child_idx;
404 timers_[parent_node].pause();
406 timers_[actual_node].start();
413 int Profiler::find_child(
const CodePoint &cp) {
414 Timer &timer =timers_[actual_node];
415 unsigned int idx = cp.hash_idx_;
416 unsigned int child_idx;
418 if (timer.child_timers[idx] == timer_no_child)
break;
420 child_idx=timer.child_timers[idx];
422 if (timers_[child_idx].full_hash_ == cp.hash_)
return child_idx;
423 idx = ( (
unsigned int)(idx)==(Timer::max_n_childs - 1) ? 0 : idx+1 );
424 }
while ( (
unsigned int)(idx) != cp.hash_idx_ );
430 void Profiler::stop_timer(
const CodePoint &cp) {
431 #ifdef FLOW123D_DEBUG_ASSERTS
433 Timer &timer=timers_[actual_node];
434 for(
unsigned int i=0; i < Timer::max_n_childs; i++)
435 if (timer.child_timers[i] != timer_no_child)
436 ASSERT_PERMANENT(! timers_[timer.child_timers[i]].running())(timers_[timer.child_timers[i]].tag())(timer.tag())
437 .error(
"Child timer running while closing timer.");
439 unsigned int child_timer = actual_node;
440 if ( cp.hash_ != timers_[actual_node].full_hash_) {
442 for(
unsigned int node=actual_node; node != 0; node=timers_[node].parent_timer) {
443 if ( cp.hash_ == timers_[node].full_hash_) {
445 for(; (
unsigned int)(actual_node) != node; actual_node=timers_[actual_node].parent_timer) {
446 WarningOut() <<
"Timer to close '" << cp.tag_ <<
"' do not match actual timer '"
447 << timers_[actual_node].tag() <<
"'. Force closing actual." << std::endl;
448 timers_[actual_node].stop(
true);
451 timers_[actual_node].stop(
false);
452 actual_node = timers_[actual_node].parent_timer;
455 if (actual_node == child_timer)
459 timers_[actual_node].resume();
467 timers_[actual_node].stop(
false);
468 actual_node = timers_[actual_node].parent_timer;
471 if (actual_node == child_timer)
475 timers_[actual_node].resume();
480 void Profiler::stop_timer(
int timer_index) {
484 if (timers_[timer_index].running()) {
486 stop_timer(*timers_[timer_index].code_point_);
492 void Profiler::start_memory_monitoring() {
493 if ( !get_global_memory_monitoring() ) {
495 timers_[actual_node].set_turn_off_memory_monitoring();
501 void Profiler::add_calls(
unsigned int n_calls) {
502 timers_[actual_node].call_count += n_calls-1;
508 MemoryAlloc::malloc_map()[p] =
static_cast<int>(size);
509 timers_[actual_node].total_allocated_ += size;
510 timers_[actual_node].current_allocated_ += size;
511 timers_[actual_node].alloc_called++;
513 if (timers_[actual_node].current_allocated_ > timers_[actual_node].max_allocated_)
514 timers_[actual_node].max_allocated_ = timers_[actual_node].current_allocated_;
520 int size =
sizeof(p);
521 if (MemoryAlloc::malloc_map()[(long)p] > 0) {
522 size = MemoryAlloc::malloc_map()[(long)p];
523 MemoryAlloc::malloc_map().erase((
long)p);
525 timers_[actual_node].total_deallocated_ += size;
526 timers_[actual_node].current_allocated_ -= size;
527 timers_[actual_node].dealloc_called++;
532 const int measurements = 100;
536 for (
unsigned int i = 1; i < measurements; i++) {
541 while ((t2 - t1) == 0) t2 =
TimePoint ();
547 return (result / measurements) * 1000;
551 Timer Profiler::find_timer(
string tag) {
552 for(
auto t : timers_) {
553 if (t.tag() == tag)
return t;
559 std::string common_prefix( std::string a, std::string b ) {
560 if( a.size() > b.size() )
std::swap(a,b) ;
561 return std::string( a.begin(), std::mismatch( a.begin(), a.end(), b.begin() ).first ) ;
566 template<
typename ReduceFunctor>
567 void Profiler::add_timer_info(ReduceFunctor reduce,
nlohmann::json* holder,
int timer_idx,
double parent_time) {
570 Timer &timer = timers_[timer_idx];
575 string filepath = timer.code_point_->file_;
578 #ifdef FLOW123D_SOURCE_DIR
579 string common_path = common_prefix (
string(FLOW123D_SOURCE_DIR), filepath);
580 filepath.erase (0, common_path.size());
587 double cumul_time_sum;
588 node[
"tag"] = timer.tag();
589 node[
"file-path"] = filepath;
590 node[
"file-line"] = timer.code_point_->line_;
591 node[
"function"] = timer.code_point_->func_;
592 cumul_time_sum = reduce(timer, node);
596 if (timer_idx == 0) parent_time = cumul_time_sum;
597 double percent = parent_time > 1.0e-10 ? cumul_time_sum / parent_time * 100.0 : 0.0;
598 node[
"percent"] = percent;
605 for (
unsigned int i = 0; i < Timer::max_n_childs; i++) {
606 if (timer.child_timers[i] != timer_no_child)
607 child_timers_values.push_back(timer.child_timers[i]);
609 std::sort(child_timers_values.begin(), child_timers_values.end());
611 for(
int idx : child_timers_values)
612 add_timer_info(reduce, &children, idx, cumul_time_sum);
615 if (child_timers_values.size() > 0) {
616 node[
"children"] = children;
625 void save_nonmpi_metric (
nlohmann::json &node, T * ptr,
string name) {
626 node[name +
"-min"] = *ptr;
627 node[name +
"-max"] = *ptr;
628 node[name +
"-sum"] = *ptr;
633 string _profiler_output_path(
string path) {
634 if (path ==
"") path =
"profiler_info.log.json";
638 std::shared_ptr<std::ostream> _profiler_output_stream(
const string &path) {
639 return make_shared<ofstream>(path.c_str());
643 #ifdef FLOW123D_HAVE_MPI
652 int mpi_rank, mpi_size;
659 bool temp_memory_monitoring = global_monitor_memory;
672 int call_count = timer.call_count;
673 double cumul_time = timer.cumulative_time ();
675 long memory_allocated = (long)timer.total_allocated_;
676 long memory_deallocated = (
long)timer.total_deallocated_;
677 long memory_peak = (long)timer.max_allocated_;
679 int alloc_called = timer.alloc_called;
680 int dealloc_called = timer.dealloc_called;
683 save_mpi_metric<double>(node, comm, &cumul_time,
"cumul-time");
684 save_mpi_metric<int>(node, comm, &call_count,
"call-count");
686 save_mpi_metric<long>(node, comm, &memory_allocated,
"memory-alloc");
687 save_mpi_metric<long>(node, comm, &memory_deallocated,
"memory-dealloc");
688 save_mpi_metric<long>(node, comm, &memory_peak,
"memory-peak");
690 save_mpi_metric<int>(node, comm, &alloc_called,
"memory-alloc-called");
691 save_mpi_metric<int>(node, comm, &dealloc_called,
"memory-dealloc-called");
693 #ifdef FLOW123D_HAVE_PETSC
694 long petsc_memory_difference = (long)timer.petsc_memory_difference;
695 long petsc_peak_memory = (
long)timer.petsc_peak_memory;
696 save_mpi_metric<long>(node, comm, &petsc_memory_difference,
"memory-petsc-diff");
697 save_mpi_metric<long>(node, comm, &petsc_peak_memory,
"memory-petsc-peak");
703 add_timer_info (reduce, &jsonChildren, 0, 0.0);
704 jsonRoot[
"children"] = jsonChildren;
705 output_header(jsonRoot, mpi_size);
716 const int FLOW123D_JSON_HUMAN_READABLE = 2;
718 os << jsonRoot.
dump(FLOW123D_JSON_HUMAN_READABLE) << endl;
720 }
catch (exception & e) {
722 ss <<
"nlohmann::json::dump error: " << e.what() <<
"\n";
723 THROW( ExcMessage() << EI_Message(ss.str()) );
737 string out_path = _profiler_output_path(profiler_path);
738 output(comm, *_profiler_output_stream(out_path));
760 const int FLOW123D_MPI_SINGLE_PROCESS = 1;
761 output_header(jsonRoot, FLOW123D_MPI_SINGLE_PROCESS);
768 int call_count = timer.call_count;
769 double cumul_time = timer.cumulative_time ();
771 long memory_allocated = (long)timer.total_allocated_;
772 long memory_deallocated = (
long)timer.total_deallocated_;
773 long memory_peak = (long)timer.max_allocated_;
775 int alloc_called = timer.alloc_called;
776 int dealloc_called = timer.dealloc_called;
778 save_nonmpi_metric<double>(node, &cumul_time,
"cumul-time");
779 save_nonmpi_metric<int>(node, &call_count,
"call-count");
781 save_nonmpi_metric<long>(node, &memory_allocated,
"memory-alloc");
782 save_nonmpi_metric<long>(node, &memory_deallocated,
"memory-dealloc");
783 save_nonmpi_metric<long>(node, &memory_peak,
"memory-peak");
785 save_nonmpi_metric<int>(node, &alloc_called,
"memory-alloc-called");
786 save_nonmpi_metric<int>(node, &dealloc_called,
"memory-dealloc-called");
788 #ifdef FLOW123D_HAVE_PETSC
789 long petsc_memory_difference = (long)timer.petsc_memory_difference;
790 long petsc_peak_memory = (
long)timer.petsc_peak_memory;
791 save_nonmpi_metric<long>(node, &petsc_memory_difference,
"memory-petsc-diff");
792 save_nonmpi_metric<long>(node, &petsc_peak_memory,
"memory-petsc-peak");
798 add_timer_info(reduce, &jsonChildren, 0, 0.0);
799 jsonRoot[
"children"] = jsonChildren;
806 const int FLOW123D_JSON_HUMAN_READABLE = 2;
808 os << jsonRoot.
dump(FLOW123D_JSON_HUMAN_READABLE) << endl;
810 }
catch (exception & e) {
812 ss <<
"nlohmann::json::dump error: " << e.what() <<
"\n";
813 THROW( ExcMessage() << EI_Message(ss.str()) );
819 string out_path = _profiler_output_path(profiler_path);
820 output(*_profiler_output_stream(out_path));
824 void Profiler::output_header (
nlohmann::json &root,
int mpi_size) {
825 time_t end_time = time(NULL);
827 const char format[] =
"%x %X";
828 char start_time_string[BUFSIZ] = {0};
829 strftime(start_time_string,
sizeof (start_time_string) - 1,
format, localtime(&start_time));
831 char end_time_string[BUFSIZ] = {0};
832 strftime(end_time_string,
sizeof (end_time_string) - 1,
format, localtime(&end_time));
834 if (timers_[0].cumul_time > 60) {
838 root[
"program-name"] = flow_name_;
839 root[
"program-version"] = flow_version_;
840 root[
"program-branch"] = flow_branch_;
841 root[
"program-revision"] = flow_revision_;
842 root[
"program-build"] = flow_build_;
844 root[
"timer-calibration"] = calibration_time_;
847 root[
"task-description"] = task_description_;
848 root[
"task-size"] = task_size_;
851 root[
"run-process-count"] = mpi_size;
852 root[
"run-started-at"] = start_time_string;
853 root[
"run-finished-at"] = end_time_string;
862 .error(
"Forbidden to uninitialize the Profiler when actual timer is not zero.");
868 bool Profiler::global_monitor_memory =
false;
869 bool Profiler::petsc_monitor_memory =
false;
871 global_monitor_memory = global_monitor;
872 petsc_monitor_memory = global_monitor;
875 unordered_map_with_alloc & MemoryAlloc::malloc_map() {
876 static unordered_map_with_alloc static_malloc_map;
877 return static_malloc_map;
880 void * Profiler::operator
new (
size_t size) {
881 return malloc (size);
884 void Profiler::operator
delete (
void* p) {
889 void * p = malloc(size);
890 if (Profiler::get_global_memory_monitoring())
896 void * p = malloc(size);
897 if (Profiler::get_global_memory_monitoring())
902 void *
operator new[] (std::size_t size,
const std::nothrow_t&)
throw() {
903 void * p = malloc(size);
904 if (Profiler::get_global_memory_monitoring())
909 void operator delete(
void *p)
throw() {
910 if (Profiler::get_global_memory_monitoring())
915 void operator delete(
void *p, std::size_t)
throw() {
916 if (Profiler::get_global_memory_monitoring())
921 void operator delete[](
void *p)
throw() {
922 if (Profiler::get_global_memory_monitoring())
927 void operator delete[](
void *p, std::size_t)
throw() {
928 if (Profiler::get_global_memory_monitoring())
#define ASSERT_PERMANENT_LT(a, b)
Definition of comparative assert macro (Less Than)
#define ASSERT_PERMANENT(expr)
Allow use shorter versions of macro names if these names is not used with external library.
#define ASSERT_PERMANENT_EQ(a, b)
Definition of comparative assert macro (EQual)
Dedicated class for storing path to input and output files.
static int sum(int *val, MPI_Comm comm)
static int min(int *val, MPI_Comm comm)
static int max(int *val, MPI_Comm comm)
static void uninitialize()
void notify_malloc(const size_t)
void output(MPI_Comm, ostream &)
void notify_free(const size_t)
static void set_memory_monitoring(bool)
static Profiler * instance(bool clear=false)
void set_program_info(string, string, string, string, string)
void set_task_info(string, int)
double get_resolution() const
a class to store JSON values
void push_back(basic_json &&val)
add an object to an array
string_t dump(const int indent=-1) const
serialization
#define THROW(whole_exception_expr)
Wrapper for throw. Saves the throwing point.
#define WarningOut()
Macro defining 'warning' record of log.
#define LogOut()
Macro defining 'log' record of log.
manipulators::Array< T, Delim > format(T const &deduce, Delim delim=", ")
#define MPI_Barrier(comm)
#define MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm)
std::string format(CStringRef format_str, ArgList args)
void swap(nlohmann::json &j1, nlohmann::json &j2) noexcept(is_nothrow_move_constructible< nlohmann::json >::value and is_nothrow_move_assignable< nlohmann::json >::value)
exchanges the values of two JSON objects
#define END_TIMER(tag)
Ends a timer with specified tag.
#define START_TIMER(tag)
Starts a timer with specified tag.
#define OPERATOR_NEW_THROW_EXCEPTION
void chkerr(unsigned int ierr)
Replacement of new/delete operator in the spirit of xmalloc.
STREAM & operator<<(STREAM &s, UpdateFlags u)