163template <
typename Container>
165 -> std::decay_t<
decltype(*std::begin(data))>
167 using T = std::decay_t<
decltype(*std::begin(data))>;
169 for (
const auto & x : data)
182template <
typename Container>
184 -> std::decay_t<
decltype(*std::begin(data))>
186 using T = std::decay_t<
decltype(*std::begin(data))>;
189 for (
const auto & x : data)
195 throw std::invalid_argument(
"mean: empty container");
196 return s /
static_cast<T>(n);
214template <
typename Container>
216 -> std::decay_t<
decltype(*std::begin(data))>
218 using T = std::decay_t<
decltype(*std::begin(data))>;
225 for (
const auto & x : data)
229 m += delta /
static_cast<T>(n);
237 throw std::invalid_argument(
"variance: empty container");
238 return m2 /
static_cast<T>(n);
243 throw std::invalid_argument(
"variance: need at least 2 elements for sample variance");
244 return m2 /
static_cast<T>(n - 1);
256template <
typename Container>
258 -> std::decay_t<
decltype(*std::begin(data))>
260 return std::sqrt(
variance(data, population));
271template <
typename Container>
273 -> std::decay_t<
decltype(*std::begin(data))>
275 auto it = std::begin(data);
276 auto end = std::end(data);
278 throw std::invalid_argument(
"min_value: empty container");
281 for (++it; it != end; ++it)
295template <
typename Container>
297 -> std::decay_t<
decltype(*std::begin(data))>
299 auto it = std::begin(data);
300 auto end = std::end(data);
302 throw std::invalid_argument(
"max_value: empty container");
305 for (++it; it != end; ++it)
319template <
typename Container>
321 -> std::pair<std::decay_t<
decltype(*std::begin(data))>,
322 std::decay_t<
decltype(*std::begin(data))>>
324 using T = std::decay_t<
decltype(*std::begin(data))>;
325 auto it = std::begin(data);
326 auto end = std::end(data);
328 throw std::invalid_argument(
"min_max: empty container");
332 for (++it; it != end; ++it)
357template <
typename Container>
359 -> std::decay_t<
decltype(*std::begin(data))>
361 using T = std::decay_t<
decltype(*std::begin(data))>;
364 throw std::invalid_argument(
"percentile: p must be in [0, 100]");
367 std::vector<T>
sorted(std::begin(data), std::end(data));
369 throw std::invalid_argument(
"percentile: empty container");
379 double index = (p / 100.0) * (
sorted.
size() - 1);
380 size_t lower =
static_cast<size_t>(std::floor(index));
381 size_t upper =
static_cast<size_t>(std::ceil(index));
399template <
typename Container>
401 -> std::decay_t<
decltype(*std::begin(data))>
413template <
typename Container>
415 -> std::tuple<std::decay_t<
decltype(*std::begin(data))>,
416 std::decay_t<
decltype(*std::begin(data))>,
417 std::decay_t<
decltype(*std::begin(data))>>
419 using T = std::decay_t<
decltype(*std::begin(data))>;
433template <
typename Container>
435 -> std::decay_t<
decltype(*std::begin(data))>
455template <
typename Container>
457 -> std::decay_t<
decltype(*std::begin(data))>
459 using T = std::decay_t<
decltype(*std::begin(data))>;
461 auto it = std::begin(data);
462 auto end = std::end(data);
464 throw std::invalid_argument(
"mode: empty container");
466 std::map<T, size_t>
freq;
467 for (
const auto & x : data)
492template <
typename Container>
495 using T = std::decay_t<
decltype(*std::begin(data))>;
497 std::map<T, size_t>
freq;
498 for (
const auto & x : data)
535template <
typename Container>
537 -> std::decay_t<
decltype(*std::begin(data))>
539 using T = std::decay_t<
decltype(*std::begin(data))>;
549 for (
const auto & x : data)
551 T diff = (x - m) / s;
557 throw std::invalid_argument(
"skewness: need at least 3 elements");
560 T factor =
static_cast<T>(n) / ((n - 1) * (n - 2));
561 return factor *
sum3;
578template <
typename Container>
580 -> std::decay_t<
decltype(*std::begin(data))>
582 using T = std::decay_t<
decltype(*std::begin(data))>;
592 for (
const auto & x : data)
594 T diff = (x - m) / s;
601 throw std::invalid_argument(
"kurtosis: need at least 4 elements");
604 T n_t =
static_cast<T>(n);
621template <
typename Container>
623 -> std::decay_t<
decltype(*std::begin(data))>
625 using T = std::decay_t<
decltype(*std::begin(data))>;
628 throw std::invalid_argument(
"coefficient_of_variation: mean is zero");
629 return stddev(data) / std::abs(m);
647template <
typename Container1,
typename Container2>
649 bool population =
false)
650 -> std::decay_t<
decltype(*std::begin(x))>
652 using T = std::decay_t<
decltype(*std::begin(x))>;
654 auto it_x = std::begin(x);
655 auto it_y = std::begin(
y);
656 auto end_x = std::end(x);
679 throw std::invalid_argument(
"covariance: containers have different sizes");
684 throw std::invalid_argument(
"covariance: empty containers");
685 return c /
static_cast<T>(n);
690 throw std::invalid_argument(
"covariance: need at least 2 elements");
691 return c /
static_cast<T>(n - 1);
710template <
typename Container1,
typename Container2>
712 -> std::decay_t<
decltype(*std::begin(x))>
714 using T = std::decay_t<
decltype(*std::begin(x))>;
721 throw std::invalid_argument(
"correlation: one or both datasets have zero variance");
739template <
typename Container>
741 -> std::vector<std::pair<std::decay_t<
decltype(*std::begin(data))>,
744 using T = std::decay_t<
decltype(*std::begin(data))>;
747 throw std::invalid_argument(
"histogram: num_bins must be > 0");
755 return {{
min_val, std::distance(std::begin(data), std::end(data))}};
762 for (
const auto & x : data)
770 std::vector<std::pair<T, size_t>> result;
773 for (
size_t i = 0; i <
num_bins; ++i)
776 result.emplace_back(center,
counts[i]);
793template <
typename Container>
795 ->
Stats<std::decay_t<
decltype(*std::begin(data))>>
797 using T = std::decay_t<
decltype(*std::begin(data))>;
801 for (
const auto & x : data)
810 s.mean = s.sum /
static_cast<T>(s.count);
821 s.stddev = std::sqrt(s.variance);
825 s.coef_variation = s.stddev / std::abs(s.mean);
873 T & avg,
T & var,
T &
med,
883 std::sort(data +
l, data + r + 1);
899 for (
int i =
l; i <= r; ++i)
902 T delta = data[i] - m;
903 m += delta /
static_cast<T>(
k);
909 var = (n > 1) ?
m2 /
static_cast<T>(n - 1) :
T();
925 template <
typename Container>
927 std::decay_t<
decltype(*std::begin(data))> & avg,
928 std::decay_t<
decltype(*std::begin(data))> & var,
929 std::decay_t<
decltype(*std::begin(data))> &
med,
930 std::decay_t<
decltype(*std::begin(data))> &
_min,
931 std::decay_t<
decltype(*std::begin(data))> &
_max)
933 using T = std::decay_t<
decltype(*std::begin(data))>;
935 std::vector<T>
sorted(std::begin(data), std::end(data));
948 const size_t mid = n / 2;
958 for (
const auto & x :
sorted)
962 m += delta /
static_cast<T>(
k);
968 var = (n > 1) ?
m2 /
static_cast<T>(n - 1) :
T();
void empty() noexcept
empty the list
size_t size() const noexcept
Count the number of elements of the list.
iterator end() noexcept
Return an STL-compatible end iterator.
iterator begin() noexcept
Return an STL-compatible iterator to the first element.
Main namespace for Aleph-w library functions.
auto percentile(const Container &data, double p) -> std::decay_t< decltype(*std::begin(data))>
Compute a percentile value.
auto histogram(const Container &data, size_t num_bins) -> std::vector< std::pair< std::decay_t< decltype(*std::begin(data))>, size_t > >
Compute a histogram of the data.
auto variance(const Container &data, bool population=false) -> std::decay_t< decltype(*std::begin(data))>
Compute variance using Welford's numerically stable algorithm.
auto kurtosis(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute excess kurtosis (measure of tailedness).
const T * median(const T &a, const T &b, const T &c, const Compare &cmp=Compare())
Return a pointer to the median value among three elements.
std::decay_t< typename HeadC::Item_Type > T
auto covariance(const Container1 &x, const Container2 &y, bool population=false) -> std::decay_t< decltype(*std::begin(x))>
Compute covariance between two datasets.
auto stddev(const Container &data, bool population=false) -> std::decay_t< decltype(*std::begin(data))>
Compute standard deviation.
auto mean(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute the arithmetic mean.
bool diff(const C1 &c1, const C2 &c2, Eq e=Eq())
Check if two containers differ.
auto min_max(const Container &data) -> std::pair< std::decay_t< decltype(*std::begin(data))>, std::decay_t< decltype(*std::begin(data))> >
Compute minimum and maximum values in one pass.
auto min_value(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute minimum value.
auto skewness(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute skewness (measure of asymmetry).
auto iqr(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute the interquartile range (IQR = Q3 - Q1).
auto compute_all_stats(const Container &data) -> Stats< std::decay_t< decltype(*std::begin(data))> >
Compute all statistics for a dataset.
auto correlation(const Container1 &x, const Container2 &y) -> std::decay_t< decltype(*std::begin(x))>
Compute Pearson correlation coefficient.
Container< T > range(const T start, const T end, const T step=1)
Generate a range of values [start, end] with a given step.
bool is_multimodal(const Container &data)
Check if data is multimodal.
auto coefficient_of_variation(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute coefficient of variation (CV = stddev / mean).
auto mode(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute the mode (most frequent value).
auto quartiles(const Container &data) -> std::tuple< std::decay_t< decltype(*std::begin(data))>, std::decay_t< decltype(*std::begin(data))>, std::decay_t< decltype(*std::begin(data))> >
Compute quartiles (Q1, Q2, Q3).
auto max_value(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute maximum value.
void compute_stats(T *data, int l, int r, T &avg, T &var, T &med, T &_min, T &_max)
Compute basic descriptive statistics for an array range.
DynList< T > maps(const C &c, Op op)
Classic map operation.
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
T sum(const Container &container, const T &init=T{})
Compute sum of all elements.
Container for comprehensive statistical results.
T q1
First quartile (25th percentile)
size_t count
Number of elements.
T skewness
Skewness (asymmetry)
T q3
Third quartile (75th percentile)
T variance
Sample variance.
T coef_variation
Coefficient of variation (stddev/mean)
T median
Median (50th percentile)
bool is_valid() const noexcept
Check if statistics are valid.
T iqr
Interquartile range (Q3 - Q1)
T range() const noexcept
Get the range (max - min).
T kurtosis
Excess kurtosis (tailedness)
T stddev
Standard deviation.
Lazy and scalable dynamic array implementation.
Comprehensive sorting algorithms and search utilities for Aleph-w.