#pragma once #include #include #include #include #include namespace DB { /** * A DDSketch is a fully-mergeable quantile sketch with relative-error guarantees. That is, for any value x, * the value returned by the sketch is guaranteed to be in the (1 +- epsilon) * x range. The sketch is * parameterized by a relative accuracy epsilon, which is the maximum relative error of any quantile estimate. * * The sketch is implemented as a set of logarithmically-spaced bins. Each bin is a pair of a value and a count. * * The sketch is fully mergeable, meaning that the merge of two sketches is equivalent to the sketch of the * union of the input datasets. The memory size of the sketch depends on the range that is covered by * the input values: the larger that range, the more bins are needed to keep track of the input values. * As a rough estimate, if working on durations using DDSketches.unboundedDense(0.02) (relative accuracy of 2%), * about 2kB (275 bins) are needed to cover values between 1 millisecond and 1 minute, and about 6kB (802 bins) * to cover values between 1 nanosecond and 1 day. * * This implementation maintains the binary compatibility with the DDSketch ProtoBuf format * https://github.com/DataDog/sketches-java/blob/master/src/protobuf/proto/DDSketch.proto. * Which enables sending the pre-aggregated sketches to the ClickHouse server and calculating the quantiles * during the query time. See DDSketchEncoding.h for byte-level details. * */ template class QuantileDD { public: using Weight = UInt64; QuantileDD() = default; explicit QuantileDD(Float64 relative_accuracy) : data(relative_accuracy) { } void add(const Value & x) { add(x, 1); } void add(const Value & x, Weight w) { if (!isNaN(x)) data.add(x, w); } void merge(const QuantileDD &other) { data.merge(other.data); } void serialize(WriteBuffer & buf) const { data.serialize(buf); } void deserialize(ReadBuffer & buf) { data.deserialize(buf); } Value get(Float64 level) const { return getImpl(level); } void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const { getManyImpl(levels, indices, size, result); } Float64 getFloat(Float64 level) const { return getImpl(level); } void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result) const { getManyImpl(levels, indices, size, result); } private: DDSketchDenseLogarithmic data; template T getImpl(Float64 level) const { return static_cast(data.get(level)); } template void getManyImpl(const Float64 * levels, const size_t *, size_t num_levels, T * result) const { for (size_t i = 0; i < num_levels; ++i) result[i] = getImpl(levels[i]); } }; }