Darwin  1.10(beta)
Macros | Functions
drwnStatsUtils.h File Reference

Generic statistical utilities. More...

Go to the source code of this file.

Macros

#define EXP_A   (1048576.0 / M_LN2)
 
#define EXP_C   60801
 
#define M_LN2   0.69314718055994530942
 

Functions

void drwnInitializeRand ()
 initialize the standard C library random number generator with a time-of-day seed
 
template<typename T >
drwn::minElem (const vector< T > &v)
 returns the minimum element in a vector of objects
 
template<typename T >
drwn::maxElem (const vector< T > &v)
 returns the maximum element in a vector of objects
 
template<typename T >
drwn::mean (const vector< T > &v)
 returns the mean of all elements in a vector of objects
 
template<typename T >
drwn::median (const vector< T > &v)
 returns the median element in a vector of objects
 
template<typename T >
drwn::destructive_median (vector< T > &w)
 returns the median element in a vector of objects (but may modify the vector's contents)
 
template<typename T >
drwn::mode (const vector< T > &v)
 returns the most frequent element in a vector of objects
 
template<typename T >
drwn::variance (const vector< T > &v)
 returns the variance (second moment about the mean) of all elements in a vector of objects
 
template<typename T >
drwn::stdev (const vector< T > &v)
 returns the standard deviation of all elements in a vector of objects
 
template<typename T >
int drwn::argmin (const vector< T > &v)
 returns the index of the smallest element in a vector of objects
 
int drwn::argmin (const VectorXd &v)
 returns the index of the smallest element in a vector of objects
 
template<typename T >
vector< int > drwn::argmins (const vector< vector< T > > &v)
 returns the index for the smallest element in each of vector of vector of objects
 
template<typename T >
int drwn::argmax (const vector< T > &v)
 returns the index of the largest element in a vector of objects
 
int drwn::argmax (const VectorXd &v)
 returns the index of the largest element in a vector of objects
 
template<typename T >
vector< int > drwn::argmaxs (const vector< vector< T > > &v)
 returns the index for the largest element in each of vector of vector of objects
 
int drwn::argrand (const vector< double > &v)
 returns the index for a random element sampled in proportion to the size of the element from a vector of positive entries
 
int drwn::argrand (const VectorXd &v)
 returns the index for a random element sampled in proportion to the size of the element from a vector of positive entries
 
template<typename T >
drwn::excessKurtosis (const vector< T > &v)
 returns the kurtosis for a vector of objects
 
template<typename T >
vector< float > drwn::percentiles (const vector< T > &v)
 
template<typename T >
pair< T, T > drwn::range (const vector< T > &v)
 returns the minimum and maximum values in a vector of objects
 
template<typename T >
pair< T, T > drwn::range (const vector< vector< T > > &v)
 returns the minimum and maximum values in a vector of vector of objects
 
template<typename T >
vector< T > drwn::extractSubVector (const vector< T > &v, const vector< int > &indx)
 select an ordered subvector from a vector
 
template<typename T >
vector< T > drwn::removeOutliers (const vector< T > &v, const vector< double > &scores, int keepSize)
 removes (v.size() - keepSize)/2 minimum and maximum entries
 
template<typename T >
set< set< T > > drwn::powerset (const set< T > &s)
 generate powerset of a set
 
int drwn::roundUp (int n, int d)
 rounds (away from zero) to nearest discretization
 
bool drwn::containsInvalidEntries (const vector< double > &v)
 returns true if the vector contains NaN or Inf values
 
double drwn::logistic (const vector< double > &theta, const vector< double > &data)
 logistic function $y = \frac{1}{1 + \exp\left\{- \theta^T x\right\}}$
 
double drwn::logistic (const double *theta, const double *data, int n)
 logistic function $y = \frac{1}{1 + \exp\left\{- \theta^T x\right\}}$
 
double drwn::entropy (const std::vector< double > &p)
 computes the entropy of a possibly unnormalized distribution
 
double drwn::entropy (const std::vector< int > &counts)
 computes the entropy of a frequency histogram
 
double drwn::gini (const std::vector< double > &p)
 computes the gini impurity of a possibly unnormalized distribution
 
double drwn::gini (const std::vector< int > &p)
 computes the gini impurity of a frequency histogram
 
double drwn::expAndNormalize (std::vector< double > &v)
 exponentiates and normalizes a vector in-place; returns log of the normalization constant
 
double drwn::expAndNormalize (VectorXd &v)
 exponentiates and normalizes a vector in-place; returns log of the normalization constant
 
double drwn::fastexp (double x)
 fast exponentiation
 
vector< int > drwn::randomPermutation (int n)
 compute a random permutation of the numbers [0..n-1]
 
template<typename T >
void drwn::shuffle (vector< T > &v)
 randomly permutes the entries of a vector inline
 
template<typename T >
vector< T > drwn::subSample (const vector< T > &v, size_t n)
 extract a subsample from a vector of size n
 
vector< double > drwn::linSpaceVector (double startValue, double endValue, unsigned n=10)
 generate a vector of linearly-spaced values from startValue to endValue
 
vector< double > drwn::logSpaceVector (double startValue, double endValue, unsigned n=10)
 generate a vector of logarithmically-spaced values from startValue to endValue
 
void drwn::predecessor (std::vector< int > &array, int limit)
 Computes the predecessor of a discrete vector, for example, predecessor([1 0 0], 2) produces [0 0 0]. Each position must have the same cardinality.
 
void drwn::successor (std::vector< int > &array, int limit)
 Computes the successor of a discrete vector, for example, successor([1 0 0], 2) produces [0 1 0]. Each position must have the same cardinality.
 
void drwn::predecessor (std::vector< int > &array, const std::vector< int > &limits)
 Computes the predecessor of a discrete vector, for example, predecessor([1 0 0], [2 2 2]) produces [0 0 0]. Each position can have different cardinality.
 
void drwn::successor (std::vector< int > &array, const std::vector< int > &limits)
 Computes the successor of a discrete vector, for example, successor([1 0 0], [2 2 2]) produces [0 1 0]. Each position can have different cardinality.
 
double drwn::huberFunction (double x, double m=1.0)
 huber penalty function, $y = x^2$ for $|x| \leq m$ and $\textrm{sgn}(x) m (2x - m)$ otherwise
 
double drwn::huberDerivative (double x, double m=1.0)
 derivative of huberFunction at x
 
double drwn::huberFunctionAndDerivative (double x, double *df, double m=1.0)
 huber penalty function and derivative at x
 
double drwn::bhattacharyyaDistance (std::vector< double > &p, std::vector< double > &q)
 Computes the Bhattacharyya distance between two discrete probability distributions. The distributions do not need to be normalized.
 
double drwn::euclideanDistanceSq (std::vector< double > &p, std::vector< double > &q)
 Computes the Euclidean norm between two discrete probability distributions. The distributions do not need to be normalized.
 
double drwn::sum (const vector< double > &v)
 sum the elements in a vector
 
double drwn::sum (const double *v, size_t length)
 sum the elements in a vector
 
double drwn::dot (const double *x, const double *y, size_t length)
 dot product between elements in two vectors
 
double drwn::dot (const vector< double > &x, const vector< double > &y)
 dot product between elements in two vectors
 
bool drwn::eq (const double x, const double y)
 whether two numbers are equal
 
bool drwn::lt (const double x, const double y)
 whether first number is less than second number
 

Detailed Description

Generic statistical utilities.

Function Documentation

◆ percentiles()

template<typename T >
vector< float > drwn::percentiles ( const vector< T > &  v)
Todo:
can change from O(n^2) to O(n log n) by using a sorting implementation