#pragma once #include #include #include #include #include #include #include #include namespace DB { struct Settings; namespace ErrorCodes { extern const int BAD_ARGUMENTS; } /// Because ranks are adjusted, we have to store each of them in Float type. using RanksArray = std::vector; template std::pair computeRanksAndTieCorrection(const Values & values) { const size_t size = values.size(); /// Save initial positions, than sort indices according to the values. std::vector indexes(size); iota(indexes.data(), indexes.size(), size_t(0)); std::sort(indexes.begin(), indexes.end(), [&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; }); size_t left = 0; Float64 tie_numenator = 0; RanksArray out(size); while (left < size) { size_t right = left; while (right < size && values[indexes[left]] == values[indexes[right]]) ++right; auto adjusted = (left + right + 1.) / 2.; auto count_equal = right - left; /// Scipy implementation throws exception in this case too. if (count_equal == size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "All numbers in both samples are identical"); tie_numenator += std::pow(count_equal, 3) - count_equal; for (size_t iter = left; iter < right; ++iter) out[indexes[iter]] = adjusted; left = right; } return {out, 1 - (tie_numenator / (std::pow(size, 3) - size))}; } template struct StatisticalSample { using AllocatorXSample = MixedAlignedArenaAllocator; using SampleX = PODArray; using AllocatorYSample = MixedAlignedArenaAllocator; using SampleY = PODArray; SampleX x{}; SampleY y{}; size_t size_x{0}; size_t size_y{0}; void addX(X value, Arena * arena) { if (isNaN(value)) return; ++size_x; x.push_back(value, arena); } void addY(Y value, Arena * arena) { if (isNaN(value)) return; ++size_y; y.push_back(value, arena); } void merge(const StatisticalSample & rhs, Arena * arena) { size_x += rhs.size_x; size_y += rhs.size_y; x.insert(rhs.x.begin(), rhs.x.end(), arena); y.insert(rhs.y.begin(), rhs.y.end(), arena); } void write(WriteBuffer & buf) const { writeVarUInt(size_x, buf); writeVarUInt(size_y, buf); buf.write(reinterpret_cast(x.data()), size_x * sizeof(x[0])); buf.write(reinterpret_cast(y.data()), size_y * sizeof(y[0])); } void read(ReadBuffer & buf, Arena * arena) { readVarUInt(size_x, buf); readVarUInt(size_y, buf); x.resize(size_x, arena); y.resize(size_y, arena); buf.readStrict(reinterpret_cast(x.data()), size_x * sizeof(x[0])); buf.readStrict(reinterpret_cast(y.data()), size_y * sizeof(y[0])); } }; }