Disabled external gits

2022-04-07 18:46:57 +02:00
parent 88cb3426ad
commit 15e7120d6d
5316 changed files with 4563444 additions and 6 deletions
--- a/cs440-acg/ext/hypothesis/LICENSE
+++ b/cs440-acg/ext/hypothesis/LICENSE
@@ -0,0 +1,21 @@
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the <organization> nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/cs440-acg/ext/hypothesis/README.md
+++ b/cs440-acg/ext/hypothesis/README.md
@@ -0,0 +1,7 @@
+# hypothesis.h
+## A collection of quantiles and utility functions for running Z, Chi^2, and Student's T hypothesis tests
+
+A variety of quantile functions are needed to perform statistical hypothesis
+tests, but these are missing from the C++ standard library. This compact header
+file-only library contains the most important quantiles; it is mostly a wrapper
+around a C++ port of the relevant functions from the Cephes math library.
--- a/cs440-acg/ext/hypothesis/cephes.h
+++ b/cs440-acg/ext/hypothesis/cephes.h
@@ -0,0 +1,404 @@
+/*
+    cephes.h: A subset of cephes math routines used by hypothesis.h
+
+    Redistributed under the BSD license with permission of the author, see
+    https://github.com/deepmind/torch-cephes/blob/master/LICENSE.txt
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above copyright
+          notice, this list of conditions and the following disclaimer in the
+          documentation and/or other materials provided with the distribution.
+        * Neither the name of the <organization> nor the
+          names of its contributors may be used to endorse or promote products
+          derived from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cmath>
+#include <stdexcept>
+
+namespace cephes {
+    static const double biginv =  2.22044604925031308085e-16;
+    static const double big = 4.503599627370496e15;
+    static const double MAXGAM = 171.624376956302725;
+    static const double MACHEP = 1.11022302462515654042E-16;
+    static const double MAXLOG = 7.09782712893383996843E2;
+    static const double MINLOG = -7.08396418532264106224E2;
+
+    /* Forward declarations */
+    static double pseries(double a, double b, double x);
+    static double incbd(double a, double b, double x);
+    static double incbcf(double a, double b, double x);
+
+    inline double incbet(double aa, double bb, double xx) {
+        double a, b, t, x, xc, w, y;
+        int flag;
+
+        if (aa <= 0.0 || bb <= 0.0)
+            goto domerr;
+
+        if ((xx <= 0.0) || (xx >= 1.0)) {
+            if (xx == 0.0)
+                return 0.0;
+            if (xx == 1.0)
+                return 1.0;
+            domerr:
+            throw std::runtime_error("incbet: domain error!");
+        }
+
+        flag = 0;
+        if ((bb * xx) <= 1.0 && xx <= 0.95) {
+            t = pseries(aa, bb, xx);
+            goto done;
+        }
+
+        w = 1.0 - xx;
+
+        /* Reverse a and b if x is greater than the mean. */
+        if (xx > (aa / (aa + bb))) {
+            flag = 1;
+            a = bb;
+            b = aa;
+            xc = xx;
+            x = w;
+        } else {
+            a = aa;
+            b = bb;
+            xc = w;
+            x = xx;
+        }
+
+        if (flag == 1 && (b * x) <= 1.0 && x <= 0.95) {
+            t = pseries(a, b, x);
+            goto done;
+        }
+
+        /* Choose expansion for better convergence. */
+        y = x * (a + b - 2.0) - (a - 1.0);
+        if (y < 0.0)
+            w = incbcf(a, b, x);
+        else
+            w = incbd(a, b, x) / xc;
+
+        /* Multiply w by the factor
+             a      b   _             _     _
+            x  (1-x)   | (a+b) / ( a | (a) | (b) ) .   */
+
+        y = a * std::log(x);
+        t = b * std::log(xc);
+        if ((a + b) < MAXGAM && std::abs(y) < MAXLOG && std::abs(t) < MAXLOG) {
+            t = pow(xc, b);
+            t *= pow(x, a);
+            t /= a;
+            t *= w;
+            t *= std::tgamma(a + b) / (std::tgamma(a) * std::tgamma(b));
+            goto done;
+        }
+        /* Resort to logarithms.  */
+        y += t + std::lgamma(a + b) - std::lgamma(a) - std::lgamma(b);
+        y += std::log(w / a);
+        if (y < MINLOG)
+            t = 0.0;
+        else
+            t = std::exp(y);
+
+    done:
+
+        if (flag == 1) {
+            if (t <= MACHEP)
+                t = 1.0 - MACHEP;
+            else
+                t = 1.0 - t;
+        }
+        return t;
+    }
+
+    /* Continued fraction expansion #1
+     * for incomplete beta integral
+     */
+    inline static double incbcf(double a, double b, double x) {
+        double xk, pk, pkm1, pkm2, qk, qkm1, qkm2;
+        double k1, k2, k3, k4, k5, k6, k7, k8;
+        double r, t, ans, thresh;
+        int n;
+
+        k1 = a;
+        k2 = a + b;
+        k3 = a;
+        k4 = a + 1.0;
+        k5 = 1.0;
+        k6 = b - 1.0;
+        k7 = k4;
+        k8 = a + 2.0;
+
+        pkm2 = 0.0;
+        qkm2 = 1.0;
+        pkm1 = 1.0;
+        qkm1 = 1.0;
+        ans = 1.0;
+        r = 1.0;
+        n = 0;
+        thresh = 3.0 * MACHEP;
+        do {
+
+            xk = -(x * k1 * k2) / (k3 * k4);
+            pk = pkm1 + pkm2 * xk;
+            qk = qkm1 + qkm2 * xk;
+            pkm2 = pkm1;
+            pkm1 = pk;
+            qkm2 = qkm1;
+            qkm1 = qk;
+
+            xk = (x * k5 * k6) / (k7 * k8);
+            pk = pkm1 + pkm2 * xk;
+            qk = qkm1 + qkm2 * xk;
+            pkm2 = pkm1;
+            pkm1 = pk;
+            qkm2 = qkm1;
+            qkm1 = qk;
+
+            if (qk != 0)
+                r = pk / qk;
+            if (r != 0) {
+                t = std::abs((ans - r) / r);
+                ans = r;
+            } else
+                t = 1.0;
+
+            if (t < thresh)
+                goto cdone;
+
+            k1 += 1.0;
+            k2 += 1.0;
+            k3 += 2.0;
+            k4 += 2.0;
+            k5 += 1.0;
+            k6 -= 1.0;
+            k7 += 2.0;
+            k8 += 2.0;
+
+            if ((std::abs(qk) + std::abs(pk)) > big) {
+                pkm2 *= biginv;
+                pkm1 *= biginv;
+                qkm2 *= biginv;
+                qkm1 *= biginv;
+            }
+            if ((std::abs(qk) < biginv) || (std::abs(pk) < biginv)) {
+                pkm2 *= big;
+                pkm1 *= big;
+                qkm2 *= big;
+                qkm1 *= big;
+            }
+        } while (++n < 300);
+
+    cdone:
+        return (ans);
+    }
+
+    /* Continued fraction expansion #2
+     * for incomplete beta integral
+     */
+    inline static double incbd(double a, double b, double x) {
+        double xk, pk, pkm1, pkm2, qk, qkm1, qkm2;
+        double k1, k2, k3, k4, k5, k6, k7, k8;
+        double r, t, ans, z, thresh;
+        int n;
+
+        k1 = a;
+        k2 = b - 1.0;
+        k3 = a;
+        k4 = a + 1.0;
+        k5 = 1.0;
+        k6 = a + b;
+        k7 = a + 1.0;
+        k8 = a + 2.0;
+
+        pkm2 = 0.0;
+        qkm2 = 1.0;
+        pkm1 = 1.0;
+        qkm1 = 1.0;
+        z = x / (1.0 - x);
+        ans = 1.0;
+        r = 1.0;
+        n = 0;
+        thresh = 3.0 * MACHEP;
+        do {
+
+            xk = -(z * k1 * k2) / (k3 * k4);
+            pk = pkm1 + pkm2 * xk;
+            qk = qkm1 + qkm2 * xk;
+            pkm2 = pkm1;
+            pkm1 = pk;
+            qkm2 = qkm1;
+            qkm1 = qk;
+
+            xk = (z * k5 * k6) / (k7 * k8);
+            pk = pkm1 + pkm2 * xk;
+            qk = qkm1 + qkm2 * xk;
+            pkm2 = pkm1;
+            pkm1 = pk;
+            qkm2 = qkm1;
+            qkm1 = qk;
+
+            if (qk != 0)
+                r = pk / qk;
+            if (r != 0) {
+                t = std::abs((ans - r) / r);
+                ans = r;
+            } else
+                t = 1.0;
+
+            if (t < thresh)
+                goto cdone;
+
+            k1 += 1.0;
+            k2 -= 1.0;
+            k3 += 2.0;
+            k4 += 2.0;
+            k5 += 1.0;
+            k6 += 1.0;
+            k7 += 2.0;
+            k8 += 2.0;
+
+            if ((std::abs(qk) + std::abs(pk)) > big) {
+                pkm2 *= biginv;
+                pkm1 *= biginv;
+                qkm2 *= biginv;
+                qkm1 *= biginv;
+            }
+            if ((std::abs(qk) < biginv) || (std::abs(pk) < biginv)) {
+                pkm2 *= big;
+                pkm1 *= big;
+                qkm2 *= big;
+                qkm1 *= big;
+            }
+        } while (++n < 300);
+    cdone:
+        return (ans);
+    }
+
+    /* Power series for incomplete beta integral.
+       Use when b*x is small and x not too close to 1.  */
+    inline static double pseries(double a, double b, double x) {
+        double s, t, u, v, n, t1, z, ai;
+
+        ai = 1.0 / a;
+        u = (1.0 - b) * x;
+        v = u / (a + 1.0);
+        t1 = v;
+        t = u;
+        n = 2.0;
+        s = 0.0;
+        z = MACHEP * ai;
+        while (std::abs(v) > z) {
+            u = (n - b) * x / n;
+            t *= u;
+            v = t / (a + n);
+            s += v;
+            n += 1.0;
+        }
+        s += t1;
+        s += ai;
+
+        u = a * std::log(x);
+        if ((a + b) < MAXGAM && std::abs(u) < MAXLOG) {
+            t = std::tgamma(a + b) / (std::tgamma(a) * std::tgamma(b));
+            s = s * t * pow(x, a);
+        } else {
+            t = std::lgamma(a + b) - std::lgamma(a) - std::lgamma(b) + u + std::log(s);
+            if (t < MINLOG)
+                s = 0.0;
+            else
+                s = std::exp(t);
+        }
+        return s;
+    }
+
+    /// Regularized lower incomplete gamma function
+    inline double rlgamma(double a, double x) {
+        const double epsilon = 0.000000000000001;
+
+        if (a < 0 || x < 0)
+            throw std::runtime_error("LLGamma: invalid arguments range!");
+
+        if (x == 0)
+            return 0.0;
+
+        double ax = (a * std::log(x)) - x - std::lgamma(a);
+        if (ax < -709.78271289338399)
+            return a < x ? 1.0 : 0.0;
+
+        if (x <= 1 || x <= a) {
+                double r2 = a;
+                double c2 = 1;
+                double ans2 = 1;
+
+            do {
+                r2 = r2 + 1;
+                c2 = c2 * x / r2;
+                ans2 += c2;
+            } while ((c2 / ans2) > epsilon);
+
+            return std::exp(ax) * ans2 / a;
+        }
+
+        int c = 0;
+        double y = 1 - a;
+        double z = x + y + 1;
+        double p3 = 1;
+        double q3 = x;
+        double p2 = x + 1;
+        double q2 = z * x;
+        double ans = p2 / q2;
+        double error;
+
+        do {
+            c++;
+            y += 1;
+            z += 2;
+            double yc = y * c;
+            double p = (p2 * z) - (p3 * yc);
+            double q = (q2 * z) - (q3 * yc);
+
+            if (q != 0) {
+                double nextans = p / q;
+                error = std::abs((ans - nextans) / nextans);
+                ans = nextans;
+            } else {
+                // zero div, skip
+                error = 1;
+            }
+
+            // shift
+            p3 = p2;
+            p2 = p;
+            q3 = q2;
+            q2 = q;
+
+            // normalize fraction when the numerator becomes large
+            if (std::abs(p) > big) {
+                p3 *= biginv;
+                p2 *= biginv;
+                q3 *= biginv;
+                q2 *= biginv;
+            }
+        } while (error > epsilon);
+
+        return 1.0 - (std::exp(ax) * ans);
+    }
+};
--- a/cs440-acg/ext/hypothesis/hypothesis.h
+++ b/cs440-acg/ext/hypothesis/hypothesis.h
@@ -0,0 +1,355 @@
+/*
+    hypothesis.h: A collection of quantile and quadrature routines
+    for Z, Chi^2, and Student's T hypothesis tests.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above copyright
+          notice, this list of conditions and the following disclaimer in the
+          documentation and/or other materials provided with the distribution.
+        * Neither the name of the <organization> nor the
+          names of its contributors may be used to endorse or promote products
+          derived from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <fstream>
+#include <functional>
+#include <sstream>
+#include <stdexcept>
+#include <vector>
+#include "cephes.h"
+
+namespace hypothesis {
+    /// Cumulative distribution function of the standard normal distribution
+    inline double stdnormal_cdf(double x) {
+        return std::erfc(-x/std::sqrt(2.0))*0.5;
+    }
+
+    /// Cumulative distribution function of the Chi^2 distribution
+    inline double chi2_cdf(double x, int dof) {
+        if (dof < 1 || x < 0) {
+            return 0.0;
+        } else if (dof == 2) {
+            return 1.0 - std::exp(-0.5*x);
+        } else {
+            return cephes::rlgamma(0.5 * dof, 0.5 * x);
+        }
+    }
+
+    /// Cumulative distribution function of Student's T distribution
+    inline double students_t_cdf(double x, int dof) {
+        if (x > 0)
+            return 1-0.5*cephes::incbet(dof * 0.5, 0.5, dof/(x*x+dof));
+        else
+            return 0.5*cephes::incbet(dof * 0.5, 0.5, dof/(x*x+dof));
+    }
+
+    /// adaptive Simpson integration over an 1D interval
+    inline double adaptiveSimpson(const std::function<double (double)> &f, double x0, double x1, double eps = 1e-6, int depth = 6) {
+        int count = 0;
+        /* Define an recursive lambda function for integration over subintervals */
+        std::function<double (double, double, double, double, double, double, double, double, int)> integrate =
+            [&](double a, double b, double c, double fa, double fb, double fc, double I, double eps, int depth) {
+            /* Evaluate the function at two intermediate points */
+            double d = 0.5 * (a + b), e = 0.5 * (b + c), fd = f(d), fe = f(e);
+
+            /* Simpson integration over each subinterval */
+            double h = c-a,
+                  I0 = (1.0/12.0) * h * (fa + 4.0*fd + fb),
+                  I1 = (1.0/12.0) * h * (fb + 4.0*fe + fc),
+                  Ip = I0+I1;
+            ++count;
+
+            /* Stopping criterion from J.N. Lyness (1969)
+              "Notes on the adaptive Simpson quadrature routine" */
+            if (depth <= 0 || std::abs(Ip-I) < 15.0*eps) {
+                // Richardson extrapolation
+                return Ip + (1.0/15.0) * (Ip-I);
+            }
+
+            return integrate(a, d, b, fa, fd, fb, I0, 0.5*eps, depth-1) +
+                   integrate(b, e, c, fb, fe, fc, I1, 0.5*eps, depth-1);
+        };
+        double a = x0, b = 0.5 * (x0+x1), c = x1;
+        double fa = f(a), fb = f(b), fc = f(c);
+        double I = (c-a) * (1.0/6.0) * (fa+4.0*fb+fc);
+        return integrate(a, b, c, fa, fb, fc, I, eps, depth);
+    }
+
+    /// Nested adaptive Simpson integration over a 2D rectangle
+    inline double adaptiveSimpson2D(const std::function<double (double, double)> &f, double x0, double y0,
+            double x1, double y1, double eps = 1e-6, int depth = 6) {
+        /* Lambda function that integrates over the X axis */
+        auto integrate = [&](double y) {
+            return adaptiveSimpson(std::bind(f, std::placeholders::_1, y), x0, x1, eps, depth);
+        };
+        double value = adaptiveSimpson(integrate, y0, y1, eps, depth);
+        return value;
+    }
+
+    /**
+     * Peform a Chi^2 test based on the given frequency tables
+     *
+     * \param nCells
+     *   Total number of table cells
+     *
+     * \param obsFrequencies
+     *   Observed cell frequencies in each cell
+     *
+     * \param expFrequencies
+     *   Integrated cell frequencies in each cell (i.e. the noise-free reference)
+     *
+     * \param sampleCount
+     *   Total observed sample count
+     *
+     * \param minExpFrequency
+     *   Minimum expected cell frequency. The chi^2 test does not work reliably
+     *   when the expected frequency in a cell is low (e.g. less than 5), because
+     *   normality assumptions break down in this case. Therefore, the
+     *   implementation will merge such low-frequency cells when they fall below
+     *   the threshold specified here.
+     *
+     * \param significanceLevel
+     *   The null hypothesis will be rejected when the associated
+     *   p-value is below the significance level specified here.
+     *
+     * \param numTests
+     *   Specifies the total number of tests that will be executed. If greater than one,
+     *   the Sidak correction will be applied to the significance level. This is because
+     *   by conducting multiple independent hypothesis tests in sequence, the probability
+     *   of a failure increases accordingly.
+     *
+     * \return
+     *   A pair of values containing the test result (success: \c true and failure: \c false)
+     *   and a descriptive string
+     */
+    inline std::pair<bool, std::string> chi2_test(
+            int nCells, const double *obsFrequencies, const double *expFrequencies,
+            int sampleCount, double minExpFrequency, double significanceLevel, int numTests = 1) {
+
+        struct Cell {
+            double expFrequency;
+            size_t index;
+        };
+
+        /* Sort all cells by their expected frequencies */
+        std::vector<Cell> cells(nCells);
+        for (size_t i=0; i<cells.size(); ++i) {
+            cells[i].expFrequency = expFrequencies[i];
+            cells[i].index = i;
+        }
+        std::sort(cells.begin(), cells.end(), [](const Cell &a, const Cell &b) {
+            return a.expFrequency < b.expFrequency;
+        });
+
+        /* Compute the Chi^2 statistic and pool cells as necessary */
+        double pooledFrequencies = 0, pooledExpFrequencies = 0, chsq = 0;
+        int pooledCells = 0, dof = 0;
+
+        std::ostringstream oss;
+        for (const Cell &c : cells) {
+            if (expFrequencies[c.index] < 0) {
+                oss << "Encountered a negative expected number of samples ("
+                    << expFrequencies[c.index]
+                    << "). Rejecting the null hypothesis!" << std::endl;
+                return std::make_pair(false, oss.str());
+            } else if (expFrequencies[c.index] == 0) {
+                if (obsFrequencies[c.index] > sampleCount * 1e-5) {
+                    /* Uh oh: samples in a cell that should be completely empty
+                       according to the probability density function. Ordinarily,
+                       even a single sample requires immediate rejection of the null
+                       hypothesis. But due to finite-precision computations and rounding
+                       errors, this can occasionally happen without there being an
+                       actual bug. Therefore, the criterion here is a bit more lenient. */
+
+                    oss << "Encountered " << obsFrequencies[c.index] << " samples in a cell "
+                        << "with expected frequency 0. Rejecting the null hypothesis!" << std::endl;
+                    return std::make_pair(false, oss.str());
+                }
+            } else if (expFrequencies[c.index] < minExpFrequency) {
+                /* Pool cells with low expected frequencies */
+                pooledFrequencies += obsFrequencies[c.index];
+                pooledExpFrequencies += expFrequencies[c.index];
+                pooledCells++;
+            } else if (pooledExpFrequencies > 0 && pooledExpFrequencies < minExpFrequency) {
+                /* Keep on pooling cells until a sufficiently high
+                   expected frequency is achieved. */
+                pooledFrequencies += obsFrequencies[c.index];
+                pooledExpFrequencies += expFrequencies[c.index];
+                pooledCells++;
+            } else {
+                double diff = obsFrequencies[c.index] - expFrequencies[c.index];
+                chsq += (diff*diff) / expFrequencies[c.index];
+                ++dof;
+            }
+        }
+
+        if (pooledExpFrequencies > 0 || pooledFrequencies > 0) {
+            oss << "Pooled " << pooledCells << " to ensure sufficiently high expected "
+                   "cell frequencies (>" << minExpFrequency << ")" << std::endl;
+            double diff = pooledFrequencies - pooledExpFrequencies;
+            chsq += (diff*diff) / pooledExpFrequencies;
+            ++dof;
+        }
+
+        /* All parameters are assumed to be known, so there is no
+           additional DF reduction due to model parameters */
+        dof -= 1;
+
+        if (dof <= 0) {
+            oss << "The number of degrees of freedom (" << dof << ") is too low!" << std::endl;
+            return std::make_pair(false, oss.str());
+        }
+
+        oss << "Chi^2 statistic = " << chsq << " (d.o.f. = " << dof << ")" << std::endl;
+
+        /* Probability of obtaining a test statistic at least
+           as extreme as the one observed under the assumption
+           that the distributions match */
+        double pval = 1 - (double) chi2_cdf(chsq, dof);
+
+        /* Apply the Sidak correction term, since we'll be conducting multiple independent
+           hypothesis tests. This accounts for the fact that the probability of a failure
+           increases quickly when several hypothesis tests are run in sequence. */
+        double alpha = 1.0 - std::pow(1.0 - significanceLevel, 1.0 / numTests);
+
+        bool result = false;
+        if (pval < alpha || !std::isfinite(pval)) {
+            oss << "***** Rejected ***** the null hypothesis (p-value = " << pval << ", "
+                "significance level = " << alpha << ")" << std::endl;
+        } else {
+            oss << "Accepted the null hypothesis (p-value = " << pval << ", "
+                "significance level = " << alpha << ")" << std::endl;
+            result = true;
+        }
+        return std::make_pair(result, oss.str());
+    }
+
+    /// Write 2D Chi^2 frequency tables to disk in a format that is nicely plottable by Octave and MATLAB
+    inline void chi2_dump(int res1, int res2, const double *obsFrequencies, const double *expFrequencies, const std::string &filename) {
+        std::ofstream f(filename);
+
+        f << "obsFrequencies = [ ";
+        for (int i=0; i<res1; ++i) {
+            for (int j=0; j<res2; ++j) {
+                f << obsFrequencies[i*res2+j];
+                if (j+1 < res2)
+                    f << ", ";
+            }
+            if (i+1 < res1)
+                f << "; ";
+        }
+        f << " ];" << std::endl
+            << "expFrequencies = [ ";
+        for (int i=0; i<res1; ++i) {
+            for (int j=0; j<res2; ++j) {
+                f << expFrequencies[i*res2+j];
+                if (j+1 < res2)
+                    f << ", ";
+            }
+            if (i+1 < res1)
+                f << "; ";
+        }
+        f << " ];" << std::endl
+            << "colormap(jet);" << std::endl
+            << "clf; subplot(2,1,1);" << std::endl
+            << "imagesc(obsFrequencies);" << std::endl
+            << "title('Observed frequencies');" << std::endl
+            << "axis equal;" << std::endl
+            << "subplot(2,1,2);" << std::endl
+            << "imagesc(expFrequencies);" << std::endl
+            << "axis equal;" << std::endl
+            << "title('Expected frequencies');" << std::endl;
+        f.close();
+    }
+
+    /**
+     * Peform a two-sided t-test based on the given mean, variance and reference value
+     *
+     * This test analyzes whether the expected value of a random variable matches a
+     * certain known value. When there is significant statistical "evidence"
+     * against this hypothesis, the test fails.
+     *
+     * This is useful in checking whether a Monte Carlo method method converges
+     * against the right value. Because statistical tests are able to handle the
+     * inherent noise of these methods, they can be used to construct statistical
+     * test suites not unlike the traditional unit tests used in software engineering.
+     *
+     * \param mean
+     *   Estimated mean of the statistical estimator
+     *
+     * \param variance
+     *   Estimated variance of the statistical estimator
+     *
+     * \param sampleCount
+     *   Number of samples used to estimate \c mean and \c variance
+     *
+     * \param reference
+     *   A known reference value ("true mean")
+     *
+     * \param significanceLevel
+     *   The null hypothesis will be rejected when the associated
+     *   p-value is below the significance level specified here.
+     *
+     * \param numTests
+     *   Specifies the total number of tests that will be executed. If greater than one,
+     *   the Sidak correction will be applied to the significance level. This is because
+     *   by conducting multiple independent hypothesis tests in sequence, the probability
+     *   of a failure increases accordingly.
+     *
+     * \return
+     *   A pair of values containing the test result (success: \c true and failure: \c false)
+     *   and a descriptive string
+     */
+    inline std::pair<bool, std::string>
+    students_t_test(double mean, double variance, double reference,
+                    int sampleCount, double significanceLevel, int numTests) {
+        std::ostringstream oss;
+
+        /* Compute the t statistic */
+        double t = std::abs(mean - reference) * std::sqrt(sampleCount / std::max(variance, 1e-5));
+
+        /* Determine the degrees of freedom, and instantiate a matching distribution object */
+        int dof = sampleCount - 1;
+
+        oss << "Sample mean = " << mean << " (reference value = " << reference << ")" << std::endl;
+        oss << "Sample variance = " << variance << std::endl;
+        oss << "t-statistic = " << t << " (d.o.f. = " << dof << ")" << std::endl;
+
+        /* Compute the p-value */
+        double pval = 2 * (1 - students_t_cdf(t, dof));
+
+        /* Apply the Sidak correction term, since we'll be conducting multiple independent
+           hypothesis tests. This accounts for the fact that the probability of a failure
+           increases quickly when several hypothesis tests are run in sequence. */
+        double alpha = 1.0 - std::pow(1.0 - significanceLevel, 1.0 / numTests);
+
+        bool result = false;
+        if (pval < alpha) {
+            oss << "***** Rejected ***** the null hypothesis (p-value = " << pval << ", "
+                   "significance level = " << alpha << ")" << std::endl;
+        } else {
+            oss << "Accepted the null hypothesis (p-value = " << pval << ", "
+                   "significance level = " << alpha << ")" << std::endl;
+            result = true;
+        }
+        return std::make_pair(result, oss.str());
+    }
+}; /* namespace hypothesis */