#pragma once
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if defined(OS_LINUX)
#include
#else
struct taskstats {};
#endif
/** Implement ProfileEvents with statistics about resource consumption of the current thread.
*/
namespace ProfileEvents
{
extern const Event RealTimeMicroseconds;
extern const Event UserTimeMicroseconds;
extern const Event SystemTimeMicroseconds;
extern const Event SoftPageFaults;
extern const Event HardPageFaults;
}
namespace DB
{
/// Handles overflow
template
inline TUInt safeDiff(TUInt prev, TUInt curr)
{
return curr >= prev ? curr - prev : 0;
}
struct RUsageCounters
{
/// In nanoseconds
UInt64 real_time = 0;
UInt64 user_time = 0;
UInt64 sys_time = 0;
UInt64 soft_page_faults = 0;
UInt64 hard_page_faults = 0;
UInt64 thread_id = 0;
RUsageCounters() = default;
RUsageCounters(const ::rusage & rusage_, UInt64 real_time_)
{
set(rusage_, real_time_);
}
void set(const ::rusage & rusage, UInt64 real_time_)
{
real_time = real_time_;
user_time = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL;
sys_time = rusage.ru_stime.tv_sec * 1000000000UL + rusage.ru_stime.tv_usec * 1000UL;
soft_page_faults = static_cast(rusage.ru_minflt);
hard_page_faults = static_cast(rusage.ru_majflt);
thread_id = getThreadId();
}
static RUsageCounters current()
{
::rusage rusage {};
#if !defined(OS_DARWIN)
#if defined(OS_SUNOS)
::getrusage(RUSAGE_LWP, &rusage);
#else
::getrusage(RUSAGE_THREAD, &rusage);
#endif // OS_SUNOS
#endif // __APPLE
return RUsageCounters(rusage, getClockMonotonic());
}
static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events)
{
chassert(prev.thread_id == curr.thread_id);
/// LONG_MAX is ~106751 days
chassert(curr.real_time - prev.real_time < LONG_MAX);
chassert(curr.user_time - prev.user_time < LONG_MAX);
chassert(curr.sys_time - prev.sys_time < LONG_MAX);
profile_events.increment(ProfileEvents::RealTimeMicroseconds, (curr.real_time - prev.real_time) / 1000U);
profile_events.increment(ProfileEvents::UserTimeMicroseconds, (curr.user_time - prev.user_time) / 1000U);
profile_events.increment(ProfileEvents::SystemTimeMicroseconds, (curr.sys_time - prev.sys_time) / 1000U);
profile_events.increment(ProfileEvents::SoftPageFaults, curr.soft_page_faults - prev.soft_page_faults);
profile_events.increment(ProfileEvents::HardPageFaults, curr.hard_page_faults - prev.hard_page_faults);
}
static void updateProfileEvents(RUsageCounters & last_counters, ProfileEvents::Counters & profile_events)
{
auto current_counters = current();
incrementProfileEvents(last_counters, current_counters, profile_events);
last_counters = current_counters;
}
private:
static UInt64 getClockMonotonic()
{
struct timespec ts;
if (0 != clock_gettime(CLOCK_MONOTONIC, &ts))
throw std::system_error(std::error_code(errno, std::system_category()));
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
};
#if defined(OS_LINUX)
struct PerfEventInfo
{
// see perf_event.h/perf_type_id enum
int event_type;
// see configs in perf_event.h
int event_config;
ProfileEvents::Event profile_event;
std::string settings_name;
};
struct PerfEventValue
{
UInt64 value = 0;
UInt64 time_enabled = 0;
UInt64 time_running = 0;
};
static constexpr size_t NUMBER_OF_RAW_EVENTS = 22;
struct PerfDescriptorsHolder : boost::noncopyable
{
int descriptors[NUMBER_OF_RAW_EVENTS]{};
PerfDescriptorsHolder();
~PerfDescriptorsHolder();
void releaseResources();
};
struct PerfEventsCounters
{
PerfDescriptorsHolder thread_events_descriptors_holder;
// time_enabled and time_running can't be reset, so we have to store the
// data from the previous profiling period and calculate deltas to them,
// to be able to properly account for counter multiplexing.
PerfEventValue previous_values[NUMBER_OF_RAW_EVENTS]{};
void initializeProfileEvents(const std::string & events_list);
void finalizeProfileEvents(ProfileEvents::Counters & profile_events);
void closeEventDescriptors();
bool processThreadLocalChanges(const std::string & needed_events_list);
static std::vector eventIndicesFromString(const std::string & events_list);
};
// Perf event creation is moderately heavy, so we create them once per thread and
// then reuse.
extern thread_local PerfEventsCounters current_thread_counters;
#else
// the functionality is disabled when we are not running on Linux.
struct PerfEventsCounters
{
void initializeProfileEvents(const std::string & /* events_list */) {}
void finalizeProfileEvents(ProfileEvents::Counters & /* profile_events */) {}
void closeEventDescriptors() {}
};
extern PerfEventsCounters current_thread_counters;
#endif
#if defined(OS_LINUX)
class TasksStatsCounters
{
public:
enum class MetricsProvider : uint8_t
{
None,
Procfs,
Netlink,
};
static const char * metricsProviderString(MetricsProvider provider);
static bool checkIfAvailable();
static MetricsProvider findBestAvailableProvider();
static std::unique_ptr create(UInt64 tid);
void reset();
void updateCounters(ProfileEvents::Counters & profile_events);
private:
::taskstats stats;
std::function<::taskstats()> stats_getter;
explicit TasksStatsCounters(UInt64 tid, MetricsProvider provider);
static void incrementProfileEvents(const ::taskstats & prev, const ::taskstats & curr, ProfileEvents::Counters & profile_events);
};
#else
class TasksStatsCounters
{
public:
static bool checkIfAvailable() { return false; }
static std::unique_ptr create(const UInt64 /*tid*/) { return {}; }
void reset() {}
void updateCounters(ProfileEvents::Counters &) {}
private:
TasksStatsCounters(const UInt64 /*tid*/) {}
};
#endif
}