/*========================================================================= Program: Visualization Toolkit Module: vtkReservoirSampler.h Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen All rights reserved. See Copyright.txt or http://www.kitware.com/Copyright.htm for details. This software is distributed WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the above copyright notice for more information. =========================================================================*/ /** * @class vtkReservoirSampler * @brief Generate a monotonic sequence of integers that randomly kk-sample * a range without substitution. * * Given a sequence of size nn, we wish to choose kk random values from the array. * This class returns kk (or fewer, if nn < kk) indices in the range [0, nn-1[ that * are ordered from smallest to largest. * * The algorithm is an implementation of Kim-Hung Li's approach, known as * "Algorithm L" and documented in the article "Reservoir-Sampling Algorithms of * Time Complexity O(n(1+log(N/n)))". ACM Transactions on Mathematical Software. * 20 (4): 481–493. doi:10.1145/198429.198435. */ #ifndef vtkReservoirSampler_h #define vtkReservoirSampler_h #include "vtkAbstractArray.h" #include "vtkCommonMathModule.h" #include #include #include #include #include class VTKCOMMONMATH_EXPORT vtkReservoirSamplerBase { protected: using SeedType = typename std::random_device::result_type; static SeedType RandomSeed(); }; template class vtkReservoirSampler : public vtkReservoirSamplerBase { public: /// Choose kk items from a sequence of (0, nn - 1). /// /// This will throw an exception if kk <= 0. const std::vector& operator()(Integer kk, Integer nn) const { VTK_THREAD_LOCAL static std::vector data; this->GenerateSample(kk, nn, data); return data; } /// Choose kk items from a sequence of (0, \a array->GetNumberOfTuples() - 1). /// /// This will throw an exception if kk <= 0. const std::vector& operator()(Integer kk, vtkAbstractArray* array) const { VTK_THREAD_LOCAL static std::vector data; if (!array) { throw std::invalid_argument("Null arrays are disallowed."); } if (array->GetNumberOfTuples() > std::numeric_limits::max()) { throw std::invalid_argument("Array size would overflow integer type."); } this->GenerateSample(kk, array->GetNumberOfTuples(), data); return data; } protected: void GenerateSample(Integer kk, Integer nn, std::vector& data) const { if (nn < kk) { kk = nn; } if (kk < 0) { throw std::invalid_argument( "You must choose a non-negative number of values from a proper sequence."); } data.resize(kk); if (kk == 0) { return; } // I. Fill the output with the first kk values. Integer ii; for (ii = 0; ii < kk; ++ii) { data[ii] = ii; } if (kk == nn) { return; } std::mt19937 generator(vtkReservoirSampler::RandomSeed()); std::uniform_real_distribution<> unitUniform(0., 1.); std::uniform_int_distribution randomIndex(0, kk - 1); double w = exp(log(unitUniform(generator)) / kk); while (true) { Integer delta = static_cast(floor(log(unitUniform(generator)) / log(1.0 - w)) + 1); if (delta < 0) { // If delta overflows the size of the integer, we are done. break; } // Be careful here since delta may be large and nn may be // at or near numeric_limits::max(). if (nn - ii > delta) { Integer jj = randomIndex(generator); #if 0 std::cout << " i " << ii << " δ " << delta << " w " << w << " → j " << jj << "\n"; #endif ii += delta; data[jj] = ii; w *= exp(log(unitUniform(generator)) / kk); } else { // Adding delta to ii would step beyond our sequence size, // so we are done. break; } } if (Monotonic) { std::sort(data.begin(), data.end()); } } }; #endif // vtkReservoirSampler_h // VTK-HeaderTest-Exclude: vtkReservoirSampler.h