30 using namespace Eigen;
32 #if defined(_WIN32)||defined(WIN32)||defined(__WIN32__) 47 inline T
minElem(
const vector<T>& v);
51 inline T
maxElem(
const vector<T>& v);
55 T
mean(
const vector<T>& v);
59 T
median(
const vector<T>& v);
68 T
mode(
const vector<T>& v);
76 T
stdev(
const vector<T>& v);
80 int argmin(
const vector<T>& v);
83 int argmin(
const VectorXd &v);
87 vector<int>
argmins(
const vector<vector<T> >& v);
91 int argmax(
const vector<T>& v);
94 int argmax(
const VectorXd &v);
98 vector<int>
argmaxs(
const vector<vector<T> >& v);
102 int argrand(
const vector<double>& v);
105 int argrand(
const VectorXd &v);
108 template <
typename T>
111 template <
typename T>
115 template <
typename T>
116 pair<T, T>
range(
const vector<T>& v);
119 template <
typename T>
120 pair<T, T>
range(
const vector<vector<T> >& v);
123 template <
typename T>
127 template <
typename T>
129 const vector<double>& scores,
int keepSize);
132 template <
typename T>
133 set<set<T> >
powerset(
const set<T>& s);
142 double logistic(
const vector<double>& theta,
const vector<double>& data);
144 double logistic(
const double *theta,
const double *data,
int n);
147 double entropy(
const std::vector<double>& p);
149 double entropy(
const std::vector<int>& counts);
152 double gini(
const std::vector<double>& p);
154 double gini(
const std::vector<int>& p);
162 inline double fastexp(
double x);
168 template <
typename T>
172 template <
typename T>
173 vector<T>
subSample(
const vector<T>& v,
size_t n);
176 vector<double>
linSpaceVector(
double startValue,
double endValue,
unsigned n = 10);
178 vector<double>
logSpaceVector(
double startValue,
double endValue,
unsigned n = 10);
183 void predecessor(std::vector<int>& array,
int limit);
187 void successor(std::vector<int>& array,
int limit);
191 void predecessor(std::vector<int>& array,
const std::vector<int>& limits);
195 void successor(std::vector<int>& array,
const std::vector<int>& limits);
213 double sum(
const vector<double> &v);
215 double sum(
const double *v,
size_t length);
218 double dot(
const double *x,
const double *y,
size_t length);
220 double dot(
const vector<double>& x,
const vector<double>& y);
223 bool eq(
const double x,
const double y);
225 bool lt(
const double x,
const double y);
230 template <
typename T>
234 case 0: DRWN_LOG_FATAL(
"invalid size");
break;
235 case 1:
return v.front();
break;
236 case 2:
return std::min(v.front(), v.back());
break;
240 for (
typename vector<T>::const_iterator i = v.begin() + 1; i != v.end(); ++i) {
241 minObj = std::min(minObj, *i);
247 template <
typename T>
251 case 0: DRWN_LOG_FATAL(
"invalid size");
break;
252 case 1:
return v.front();
break;
253 case 2:
return std::max(v.front(), v.back());
break;
257 for (
typename vector<T>::const_iterator i = v.begin() + 1; i != v.end(); ++i) {
258 maxObj = std::max(maxObj, *i);
264 template <
typename T>
267 DRWN_ASSERT(v.size() > 0);
271 for (
typename vector<T>::const_iterator i = v.begin(); i != v.end(); ++i) {
275 return sum / T(v.size());
278 template <
typename T>
281 DRWN_ASSERT(v.size() > 0);
284 if (w.size() % 2 == 1) {
285 int ix = w.size() / 2;
286 nth_element(w.begin(), w.begin()+ix, w.end());
290 int ix_sup = w.size()/2;
291 nth_element(w.begin(), w.begin() + ix_sup, w.end());
292 nth_element(w.begin(), w.begin() + ix_sup - 1, w.begin()+ ix_sup);
293 return T(0.5 * ( w[ix_sup] + w[ix_sup-1] ));
297 template <
typename T>
300 DRWN_ASSERT(w.size() > 0);
301 if (w.size() % 2 == 1) {
302 int ix = w.size() / 2;
303 nth_element(w.begin(), w.begin()+ix, w.end());
307 int ix_sup = w.size()/2;
308 nth_element(w.begin(), w.begin() + ix_sup, w.end());
309 nth_element(w.begin(), w.begin() + ix_sup - 1, w.begin()+ ix_sup);
310 return T(0.5 * ( w[ix_sup] + w[ix_sup-1] ));
314 template <
typename T>
317 DRWN_ASSERT(v.size() > 0);
320 typename vector<T>::const_iterator modeElement = v.begin();
321 for (
typename vector<T>::const_iterator it = v.begin(); it != v.end(); it++) {
322 typename map<T, int>::iterator jt = w.find(*it);
324 jt = w.insert(w.end(), make_pair(*it, 0));
329 if (jt->second > maxCount) {
337 template <
typename T>
340 DRWN_ASSERT(v.size() > 0);
345 for (
typename vector<T>::const_iterator i = v.begin(), last = v.end(); i != last; ++i) {
346 double dev = *i - mu;
350 return sum / T(v.size());
353 template <
typename T>
357 return (std2 > 0.0 ? sqrt(std2) : 0.0);
360 template <
typename T>
366 case 0: minIndx = -1;
break;
367 case 1: minIndx = 0;
break;
368 case 2: minIndx = (v[0] <= v[1]) ? 0 : 1;
break;
372 for (
int i = 1; i < (int)v.size(); i++) {
373 if (v[i] < v[minIndx]) {
383 template <
typename T>
386 vector<int> minIndx(v.size(), -1);
387 for (
int i = 0; i < (int)v.size(); i++) {
388 minIndx[i] =
argmin(v[i]);
394 template <
typename T>
400 case 0: maxIndx = -1;
break;
401 case 1: maxIndx = 0;
break;
402 case 2: maxIndx = (v[0] >= v[1]) ? 0 : 1;
break;
406 for (
int i = 1; i < (int)v.size(); i++) {
407 if (v[i] > v[maxIndx]) {
417 template <
typename T>
420 vector<int> maxIndx(v.size(), -1);
421 for (
int i = 0; i < (int)v.size(); i++) {
422 maxIndx[i] =
argmax(v[i]);
428 template <
typename T>
431 DRWN_ASSERT(!v.empty());
437 for (
typename vector<T>::const_iterator i = v.begin(), last = v.end(); i != last; ++i) {
438 double dev = *i - mu;
439 double sqDev = dev * dev;
440 sum += sqDev * sqDev;
443 return sum / ( T(v.size() * sigma_squared * sigma_squared)) - 3.0;
446 template <
typename T>
451 for (
int i = 0; i < v.size(); i++) {
453 for (
int j = 0; j < v.size(); j++) {
457 rval.push_back(
float(sum)/
float(v.size()));
462 template <
typename T>
465 DRWN_ASSERT(v.size() > 0);
467 typename vector<T>::const_iterator minObj(v.begin());
468 typename vector<T>::const_iterator maxObj(v.begin());
469 for (
typename vector<T>::const_iterator i = v.begin() + 1;
471 if (*i < *minObj) minObj = i;
472 if (*i > *maxObj) maxObj = i;
475 return make_pair(*minObj, *maxObj);
478 template <
typename T>
481 DRWN_ASSERT(v.size() > 0);
483 pair<T, T> r =
range(*v.begin());
484 for (
typename vector<vector<T> >::const_iterator i = v.begin() + 1;
486 pair<T, T> ri =
range(*i);
487 if (ri.first < r.first)
489 if (ri.second > r.second)
490 r.second = ri.second;
496 template <
typename T>
501 w.reserve(indx.size());
502 for (vector<int>::const_iterator it = indx.begin(); it != indx.end(); ++it) {
509 template <
typename T>
511 const vector<double>& scores,
int keepSize)
513 DRWN_ASSERT(scores.size() == v.size());
514 if (keepSize >= (
int)v.size()) {
519 vector<pair<double, int> > indx(v.size());
520 for (
unsigned i = 0; i < v.size(); i++) {
521 indx[i] = make_pair(scores[i], i);
523 sort(indx.begin(), indx.end());
525 vector<T> w(keepSize);
526 unsigned startIndx = (v.size() - keepSize) / 2;
527 unsigned endIndx = startIndx + keepSize;
528 for (
unsigned i = startIndx; i < endIndx; i++) {
529 w[i - startIndx] = v[indx[i].second];
535 template <
typename T>
541 result.insert(set<T>());
543 for (
typename set<T>::const_iterator it = s.begin(); it != s.end(); ++it) {
551 set<set<T> > smallP =
powerset(smallS);
552 result.insert(smallP.begin(), smallP.end());
556 for (
typename set<set<T> >::const_iterator jt = smallP.begin();
557 jt != smallP.end(); ++jt) {
572 #define EXP_A (1048576.0 / M_LN2) 576 #define M_LN2 0.69314718055994530942 581 if (y < -700.0)
return 0.0;
587 struct {
int j, i; } n;
589 struct {
int i, j; } n;
592 _eco.n.i = (int)(EXP_A * (y)) + (1072693248 - EXP_C);
597 template <
typename T>
600 const size_t n = v.size();
602 for (
size_t i = 0; i < n - 1; i++) {
603 size_t j = rand() % (n - i);
604 std::swap(v[i], v[i + j]);
608 template <
typename T>
611 if (n >= v.size())
return v;
612 if (n == 0)
return vector<T>();
618 for (
size_t i = 0; i < n; i++) {
619 size_t j = rand() % (w.size() - i);
620 std::swap(w[i], w[i + j]);
630 if (x < -m)
return (m * (-2.0 * x - m));
631 if (x > m)
return (m * (2.0 * x - m));
638 if (x < -m)
return -2.0 * m;
639 if (x > m)
return 2.0 * m;
648 return (m * (-2.0 * x - m));
651 return (m * (2.0 * x - m));
659 return (n % d == 0) ? n : n + d - (n % d);
double logistic(const vector< double > &theta, const vector< double > &data)
logistic function
Definition: drwnStatsUtils.cpp:61
vector< T > removeOutliers(const vector< T > &v, const vector< double > &scores, int keepSize)
removes (v.size() - keepSize)/2 minimum and maximum entries
Definition: drwnStatsUtils.h:510
int argmin(const VectorXd &v)
returns the index of the smallest element in a vector of objects
Definition: drwnStatsUtils.cpp:399
T stdev(const vector< T > &v)
returns the standard deviation of all elements in a vector of objects
Definition: drwnStatsUtils.h:354
pair< T, T > range(const vector< T > &v)
returns the minimum and maximum values in a vector of objects
Definition: drwnStatsUtils.h:463
vector< T > extractSubVector(const vector< T > &v, const vector< int > &indx)
select an ordered subvector from a vector
Definition: drwnStatsUtils.h:497
T destructive_median(vector< T > &w)
returns the median element in a vector of objects (but may modify the vector's contents) ...
Definition: drwnStatsUtils.h:298
T mean(const vector< T > &v)
returns the mean of all elements in a vector of objects
Definition: drwnStatsUtils.h:265
vector< int > argmaxs(const vector< vector< T > > &v)
returns the index for the largest element in each of vector of vector of objects
Definition: drwnStatsUtils.h:418
set< set< T > > powerset(const set< T > &s)
generate powerset of a set
Definition: drwnStatsUtils.h:536
pair< T, T > range(const vector< vector< T > > &v)
returns the minimum and maximum values in a vector of vector of objects
Definition: drwnStatsUtils.h:479
Definition: acknowledgments.dox:1
vector< float > percentiles(const vector< T > &v)
Definition: drwnStatsUtils.h:447
int argmax(const VectorXd &v)
returns the index of the largest element in a vector of objects
Definition: drwnStatsUtils.cpp:413
vector< double > linSpaceVector(double startValue, double endValue, unsigned n=10)
generate a vector of linearly-spaced values from startValue to endValue
Definition: drwnStatsUtils.cpp:209
T excessKurtosis(const vector< T > &v)
returns the kurtosis for a vector of objects
Definition: drwnStatsUtils.h:429
int argmax(const vector< T > &v)
returns the index of the largest element in a vector of objects
Definition: drwnStatsUtils.h:395
double euclideanDistanceSq(std::vector< double > &p, std::vector< double > &q)
Computes the Euclidean norm between two discrete probability distributions. The distributions do not ...
Definition: drwnStatsUtils.cpp:320
double huberFunction(double x, double m=1.0)
huber penalty function, for and otherwise
Definition: drwnStatsUtils.h:628
double expAndNormalize(std::vector< double > &v)
exponentiates and normalizes a vector in-place; returns log of the normalization constant ...
Definition: drwnStatsUtils.cpp:159
T variance(const vector< T > &v)
returns the variance (second moment about the mean) of all elements in a vector of objects ...
Definition: drwnStatsUtils.h:338
vector< int > argmins(const vector< vector< T > > &v)
returns the index for the smallest element in each of vector of vector of objects ...
Definition: drwnStatsUtils.h:384
vector< double > logSpaceVector(double startValue, double endValue, unsigned n=10)
generate a vector of logarithmically-spaced values from startValue to endValue
Definition: drwnStatsUtils.cpp:227
double dot(const double *x, const double *y, size_t length)
dot product between elements in two vectors
Definition: drwnStatsUtils.cpp:346
void successor(std::vector< int > &array, int limit)
Computes the successor of a discrete vector, for example, successor([1 0 0], 2) produces [0 1 0]...
Definition: drwnStatsUtils.cpp:260
T minElem(const vector< T > &v)
returns the minimum element in a vector of objects
Definition: drwnStatsUtils.h:231
vector< T > subSample(const vector< T > &v, size_t n)
extract a subsample from a vector of size n
Definition: drwnStatsUtils.h:609
vector< int > randomPermutation(int n)
compute a random permutation of the numbers [0..n-1]
Definition: drwnStatsUtils.cpp:188
double sum(const vector< double > &v)
sum the elements in a vector
Definition: drwnStatsUtils.cpp:330
void predecessor(std::vector< int > &array, int limit)
Computes the predecessor of a discrete vector, for example, predecessor([1 0 0], 2) produces [0 0 0]...
Definition: drwnStatsUtils.cpp:246
bool eq(const double x, const double y)
whether two numbers are equal
Definition: drwnStatsUtils.cpp:447
double huberFunctionAndDerivative(double x, double *df, double m=1.0)
huber penalty function and derivative at x
Definition: drwnStatsUtils.h:644
double bhattacharyyaDistance(std::vector< double > &p, std::vector< double > &q)
Computes the Bhattacharyya distance between two discrete probability distributions. The distributions do not need to be normalized.
Definition: drwnStatsUtils.cpp:302
double fastexp(double x)
fast exponentiation
Definition: drwnStatsUtils.h:579
double huberDerivative(double x, double m=1.0)
derivative of huberFunction at x
Definition: drwnStatsUtils.h:636
void shuffle(vector< T > &v)
randomly permutes the entries of a vector inline
Definition: drwnStatsUtils.h:598
bool lt(const double x, const double y)
whether first number is less than second number
Definition: drwnStatsUtils.cpp:452
T mode(const vector< T > &v)
returns the most frequent element in a vector of objects
Definition: drwnStatsUtils.h:315
T maxElem(const vector< T > &v)
returns the maximum element in a vector of objects
Definition: drwnStatsUtils.h:248
void drwnInitializeRand()
initialize the standard C library random number generator with a time-of-day seed ...
Definition: drwnStatsUtils.cpp:34
int argmin(const vector< T > &v)
returns the index of the smallest element in a vector of objects
Definition: drwnStatsUtils.h:361
bool containsInvalidEntries(const vector< double > &v)
returns true if the vector contains NaN or Inf values
Definition: drwnStatsUtils.cpp:45
T median(const vector< T > &v)
returns the median element in a vector of objects
Definition: drwnStatsUtils.h:279
int roundUp(int n, int d)
rounds (away from zero) to nearest discretization
Definition: drwnStatsUtils.h:658
double entropy(const std::vector< double > &p)
computes the entropy of a possibly unnormalized distribution
Definition: drwnStatsUtils.cpp:86
int argrand(const vector< double > &v)
returns the index for a random element sampled in proportion to the size of the element from a vector...
Definition: drwnStatsUtils.cpp:427
double gini(const std::vector< double > &p)
computes the gini impurity of a possibly unnormalized distribution
Definition: drwnStatsUtils.cpp:115