diff --git a/libraries/Gauss/CHANGELOG.md b/libraries/Gauss/CHANGELOG.md index 9a16e96c..3634037e 100644 --- a/libraries/Gauss/CHANGELOG.md +++ b/libraries/Gauss/CHANGELOG.md @@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [0.1.1] - 2023-07-07 +- improve performance => reciprokeSD = 1.0/stddev +- update readme.md + - add performance section (UNO / ESP32) + - elaborated future section +- generated a more precise lookup table (8 decimals) +- update unit tests +- add default parameters to **bool begin(float mean = 0, float stddev = 1)** + - allow negative **stddev** but return false if stddev <= 0. +- add **float getMean()** convenience function. +- add **float getStdDev()** convenience function. +- clean up a bit + + + ## [0.1.0] - 2023-07-06 - initial version diff --git a/libraries/Gauss/Gauss.cpp b/libraries/Gauss/Gauss.cpp index 81ca0a6e..0bff0615 100644 --- a/libraries/Gauss/Gauss.cpp +++ b/libraries/Gauss/Gauss.cpp @@ -1,7 +1,7 @@ // // FILE: Gauss.cpp // AUTHOR: Rob Tillaart -// VERSION: 0.1.0 +// VERSION: 0.1.1 // PURPOSE: Library for the Gauss probability math. // DATE: 2023-07-06 diff --git a/libraries/Gauss/Gauss.h b/libraries/Gauss/Gauss.h index 5e0e6bcb..c66c990f 100644 --- a/libraries/Gauss/Gauss.h +++ b/libraries/Gauss/Gauss.h @@ -2,7 +2,7 @@ // // FILE: Gauss.h // AUTHOR: Rob Tillaart -// VERSION: 0.1.0 +// VERSION: 0.1.1 // PURPOSE: Library for the Gauss probability math. // DATE: 2023-07-06 @@ -10,7 +10,7 @@ #include "Arduino.h" #include "MultiMap.h" -#define GAUSS_LIB_VERSION (F("0.1.0")) +#define GAUSS_LIB_VERSION (F("0.1.1")) class Gauss @@ -20,34 +20,48 @@ public: { _mean = 0; _stddev = 1; + _reciprokeSD = 1; } - bool begin(float mean, float stddev) + // stddev should be positive. + bool begin(float mean = 0, float stddev = 1) { _mean = mean; - _stddev = abs(stddev); - return true; + _stddev = stddev; // should be positive + _reciprokeSD = 1.0 / _stddev; + return (stddev > 0); + } + + + float getMean() + { + return _mean; + } + + + float getStdDev() + { + return _stddev; } float P_smaller(float value) { if (_stddev == 0) return NAN; - return _P_smaller((value - _mean) / _stddev); + // normalize(value) + return _P_smaller((value - _mean) * _reciprokeSD); } float P_larger(float value) { return 1.0 - P_smaller(value); - // if (_stddev == 0) return NAN; - // optimize math division? - // return _P_larger((value - _mean) / _stddev); } float P_between(float p, float q) { + if (_stddev == 0) return NAN; if (p >= q) return 0; return P_smaller(q) - P_smaller(p); } @@ -56,8 +70,8 @@ public: float P_equal(float value) { if (_stddev == 0) return NAN; - float n = (value - _mean)/_stddev; - float c = 1.0 / (_stddev * sqrt(TWO_PI)); + float n = (value - _mean) * _reciprokeSD; + float c = _reciprokeSD * (1.0 / sqrt(TWO_PI)); return c * exp(-0.5 * n * n); } @@ -70,7 +84,7 @@ public: float normalize(float value) { - return (value - _mean)/_stddev; + return (value - _mean) * _reciprokeSD; } @@ -85,27 +99,40 @@ private: float _P_smaller(float x) { - // TODO improve accuracy or reduce points. + // NORM.DIST(mean, stddev, x, true) float __gauss[] = { - 0.5000, 0.5398, 0.5793, 0.6179, 0.6554, 0.6915, 0.7257, 0.7580, - 0.7881, 0.8159, 0.8413, 0.8643, 0.8849, 0.9032, 0.9192, 0.9332, - 0.9452, 0.9554, 0.9641, 0.9713, 0.9772, 0.9821, 0.9861, 0.9893, - 0.9918, 0.9938, 0.9953, 0.9965, 0.9974, 0.9981, 0.9987, 1.0000 + 0.50000000, 0.53982784, 0.57925971, 0.61791142, + 0.65542174, 0.69146246, 0.72574688, 0.75803635, + 0.78814460, 0.81593987, 0.84134475, 0.86433394, + 0.88493033, 0.90319952, 0.91924334, 0.93319280, + 0.94520071, 0.95543454, 0.96406968, 0.97128344, + 0.97724987, 0.98213558, 0.98609655, 0.98927589, + 0.99180246, 0.99379033, 0.99533881, 0.99653303, + 0.99744487, 0.99813419, 0.99865010, 0.99996833, + 0.99999971, 1.00000000 }; + + // 0..60000 uint16_t = 68 bytes less float __z[] = { 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, - 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 10.0 + 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 4,0, + 5.0, 6.0 }; - if (x < 0) return 1.0 - multiMap(-x, __z, __gauss, 32); - return multiMap(x, __z, __gauss, 32); + // a dedicated MultiMap could exploit the fact that + // the __z[] array is largely equidistant. + // that could remove the __z[] array (almost) completely. + if (x < 0) return 1.0 - multiMap(-x, __z, __gauss, 34); + return multiMap(x, __z, __gauss, 34); } float _mean = 0; - float _stddev = 1; + float _stddev = 1; // not needed as _reciprokeSD holds same info? + float _reciprokeSD = 1; }; // -- END OF FILE -- + diff --git a/libraries/Gauss/README.md b/libraries/Gauss/README.md index 94ac1f62..17f94eb2 100644 --- a/libraries/Gauss/README.md +++ b/libraries/Gauss/README.md @@ -16,7 +16,7 @@ Library for the Gauss probability math. Gauss is an experimental Arduino library to approximate the probability that a value is smaller or larger than a given value. These under the premises of a Gaussian distribution with parameters **mean** and **stddev** -(a.k.a. average / mu and standard deviation / sigma). +(a.k.a. average / mu / µ and standard deviation / sigma / σ). If these parameters are not given, 0 and 1 are used by default (normalized Gaussian distribution). The values are approximated with **MultiMap()** using a 32 points interpolated lookup. @@ -28,12 +28,14 @@ Return values are given as floats, if one needs percentages, just multiply by 10 #### Accuracy -The lookup table used has 32 points with 4 significant digits. -Do not expect a higher accuracy / precision. +The lookup table has 34 points with 8 decimals. +This matches the precision of float data type. +Do not expect a very high accuracy / precision as interpolation is linear. For many applications this accuracy is sufficient. -(links to a table with more significant digits is welcome). +Values of the table are calculated with ```NORM.DIST(mean, stddev, x, true)```. +Note: 0.1.0 was 32 points 4 decimals. Need to investigate reduction of points. #### Applications @@ -42,9 +44,20 @@ For many applications this accuracy is sufficient. - compare population data with individual +#### Character + +| parameter | name | ALT-code | char | +|:-----------:|:------:|:----------:|:-----:| +| mean | mu | ALT-230 | µ | +| stddev | sigma | ALT-229 | σ | + +- https://altcodesguru.com/greek-alt-codes.html + + #### Related - https://en.wikipedia.org/wiki/Normal_distribution +- https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_probability/bs704_probability9.html - https://github.com/RobTillaart/Multimap - https://github.com/RobTillaart/Statistic (more stat links there). @@ -59,19 +72,32 @@ For many applications this accuracy is sufficient. #### Base - **Gauss()** constructor. Uses mean = 0 and stddev = 1 by default. -- **bool begin(float mean, float stddev)** set the mean and stddev. -Returns true on success. If needed stddev is made positive. +- **bool begin(float mean = 0, float stddev = 1)** set the mean and stddev. +Returns true if stddev > 0 which should be so. +Returns false if stddev <= 0, which could be a user choice. +Note that if ```stddev == 0```, probabilities cannot be calculated +as the distribution is not Gaussian. +The default values (0,1) gives the normalized Gaussian distribution. +**begin()** can be called at any time to change the mean or stddev. +- **float getMean()** returns current mean. +- **float getStddev()** returns current stddev. + #### Probability +Probability functions return NAN if stddev == 0. + - **float P_smaller(float f)** returns probability **P(x < f)**. Multiply by 100.0 to get the value as a percentage. +A.k.a. **CDF()** Cumulative Distribution Function. - **float P_larger(float f)** returns probability **P(x > f)**. Multiply by 100.0 to get the value as a percentage. +As the distribution is continuous **P_larger(f) == 1 - P_smaller(f)**. - **float P_between(float f, float g)** returns probability **P(f < x < g)**. Multiply by 100.0 to get the value as a percentage. - **float P_equal(float f)** returns probability **P(x == f)**. -This is the bell curve formula. +This uses the bell curve formula. + #### Other @@ -82,43 +108,74 @@ E.g if mean == 50 and stddev == 14, then 71 ==> +1.5 sigma. - **float bellCurve(float f)** returns probability **P(x == f)**. +## Performance + +Indicative numbers for 1000 calls, timing in micros. + +Arduino UNO, 16 MHz, IDE 1.8.19 + +| function | 0.1.0 | 0.1.1 | notes | +|:--------------|:--------:|:--------:|:--------| +| P_smaller | 375396 | 365964 | +| P_larger | 384368 | 375032 | +| P_between | 265624 | 269176 | +| normalize | 44172 | 23024 | +| bellCurve | 255728 | 205460 | +| approx.bell | 764028 | 719184 | see examples + + +ESP32, 240 MHz, IDE 1.8.19 + +| function | 0.1.0 | 0.1.1 | notes | +|:--------------|:--------:|:--------:|:--------| +| P_smaller | - | 4046 | +| P_larger | - | 4043 | +| P_between | - | 3023 | +| normalize | - | 592 | +| bellCurve | - | 13522 | +| approx.bell | - | 7300 | + + ## Future #### Must - documentation - - mu + sigma character -- unit tests + #### Should -- optimize performance - - remove division by stddev - optimize accuracy - revisit lookup of MultiMap - (-10 .. 0) might be more accurate (significant digits)? - double instead of floats? (good table?) - + - make use of equidistant \_\_z\[] table #### Could -- **void setMean(float f)** -- **float getMean()** -- **void setStddev(float f)** -- **float getStddev()** -- default values for **begin(0,1)** - add examples - e.g. temperature (DS18B20 or DHT22) - e.g. loadcell (HX711) -- does the stddev needs to be positive, - - what happens if negative values are allowed? -- equality test Gauss objects -- move code to .cpp file? (rather small lib). - embed MultiMap hardcoded instead of library dependency -- **bellCurve()** => **Z()**? +- add unit tests +- remove **\_stddev** as **\_reciprokeSD** holds same information. +- reverse normalization + - G(100,25) which value has stddev 0.735? +- **VAL(probability = 0.75)** ==> 134 whatever + - Returns the value of the distribution for which the **CDF()** is at least probability. + - Inverse of **P_smaller()** +- **float P_outside(float f, float g)** returns probability **P(x < f) + P(g < x)**. + - assuming no overlap. Use **P_outside() = 1 - P_between()** #### Won't (unless requested) +- equality test Gauss objects +- does the stddev needs to be positive? Yes. + - what happens if negative values are allowed? P curve is reversed. +- move code to .cpp file? (rather small lib). +- **void setMean(float f)** can be done with begin() +- **void setStddev(float f)** can be done with begin() + diff --git a/libraries/Gauss/examples/Gauss_performance/Gauss_performance.ino b/libraries/Gauss/examples/Gauss_performance/Gauss_performance.ino index 73a48933..31aa37b4 100644 --- a/libraries/Gauss/examples/Gauss_performance/Gauss_performance.ino +++ b/libraries/Gauss/examples/Gauss_performance/Gauss_performance.ino @@ -20,7 +20,7 @@ void setup(void) Serial.print("GAUSS_LIB_VERSION: "); Serial.println(GAUSS_LIB_VERSION); Serial.println(); - Serial.println("Timing in micros"); + Serial.println("Timing in micros (1000 calls)"); Serial.println(); test_1(); diff --git a/libraries/Gauss/examples/Gauss_performance/performance_0.1.1.txt b/libraries/Gauss/examples/Gauss_performance/performance_0.1.1.txt new file mode 100644 index 00000000..0d5b600a --- /dev/null +++ b/libraries/Gauss/examples/Gauss_performance/performance_0.1.1.txt @@ -0,0 +1,16 @@ +Arduino UNO +IDE 1.8.19 + +Gauss_performance.ino +GAUSS_LIB_VERSION: 0.1.1 + +Timing in micros (1000 calls) + +P_smaller: 365964 +P_larger: 375032 +P_between: 269176 +normalize: 23024 +bellCurve: 205460 +approx.bell: 719184 + +done... diff --git a/libraries/Gauss/examples/Gauss_test/Gauss_test.ino b/libraries/Gauss/examples/Gauss_test/Gauss_test.ino index e93279a5..80217334 100644 --- a/libraries/Gauss/examples/Gauss_test/Gauss_test.ino +++ b/libraries/Gauss/examples/Gauss_test/Gauss_test.ino @@ -17,9 +17,9 @@ void setup(void) Serial.println(); test_1(); - test_2(); - test_3(); - test_4(); +// test_2(); +// test_3(); +// test_4(); Serial.println("\ndone..."); } diff --git a/libraries/Gauss/keywords.txt b/libraries/Gauss/keywords.txt index 7f758703..5a834b2b 100644 --- a/libraries/Gauss/keywords.txt +++ b/libraries/Gauss/keywords.txt @@ -6,6 +6,8 @@ Gauss KEYWORD1 # Methods and Functions (KEYWORD2) begin KEYWORD2 +getMean KEYWORD2 +getStdDev KEYWORD2 P_smaller KEYWORD2 P_larger KEYWORD2 diff --git a/libraries/Gauss/library.json b/libraries/Gauss/library.json index e03ce840..5ae12fce 100644 --- a/libraries/Gauss/library.json +++ b/libraries/Gauss/library.json @@ -23,7 +23,7 @@ "version": "^0.1.7" } ], - "version": "0.1.0", + "version": "0.1.1", "license": "MIT", "frameworks": "arduino", "platforms": "*", diff --git a/libraries/Gauss/library.properties b/libraries/Gauss/library.properties index 4a8fa032..7473aff7 100644 --- a/libraries/Gauss/library.properties +++ b/libraries/Gauss/library.properties @@ -1,9 +1,9 @@ name=Gauss -version=0.1.0 +version=0.1.1 author=Rob Tillaart maintainer=Rob Tillaart sentence=Library for the Gauss probability math. -paragraph= +paragraph=normal distribution. category=Data Processing url=https://github.com/RobTillaart/Gauss architectures=* diff --git a/libraries/Gauss/test/unit_test_001.cpp b/libraries/Gauss/test/unit_test_001.cpp index 78323b0c..528d0fe5 100644 --- a/libraries/Gauss/test/unit_test_001.cpp +++ b/libraries/Gauss/test/unit_test_001.cpp @@ -40,10 +40,14 @@ unittest_teardown() unittest(test_constructor) { Gauss G; + + assertEqualFloat(0.0, G.getMean(), 0.0001); + assertEqualFloat(1.0, G.getStdDev(), 0.0001); + assertEqualFloat(0.5, G.P_smaller(0), 0.0001); - G.begin(0, 1); - - assertEqualFloat(0.5, G.P_smaller(0), 0.001); + G.begin(10, 3); + assertEqualFloat(10.0, G.getMean(), 0.0001); + assertEqualFloat(3.0, G.getStdDev(), 0.0001); } @@ -53,13 +57,17 @@ unittest(test_P_smaller) G.begin(0, 1); - assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.001); - assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.001); - assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.001); - assertEqualFloat(0.5000, G.P_smaller(0.0), 0.001); - assertEqualFloat(0.8413, G.P_smaller(1.0), 0.001); - assertEqualFloat(0.9772, G.P_smaller(2.0), 0.001); - assertEqualFloat(0.9987, G.P_smaller(3.0), 0.001); + assertEqualFloat(0.0000, G.P_smaller(-6.0), 0.0001); + assertEqualFloat(0.0001, G.P_smaller(-4.0), 0.0001); + assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.0001); + assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.0001); + assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.0001); + assertEqualFloat(0.5000, G.P_smaller(0.0), 0.0001); + assertEqualFloat(0.8413, G.P_smaller(1.0), 0.0001); + assertEqualFloat(0.9772, G.P_smaller(2.0), 0.0001); + assertEqualFloat(0.9987, G.P_smaller(3.0), 0.0001); + assertEqualFloat(0.9999, G.P_smaller(4.0), 0.0001); + assertEqualFloat(1.0000, G.P_smaller(6.0), 0.0001); } @@ -69,13 +77,13 @@ unittest(test_P_larger) G.begin(0, 1); - assertEqualFloat(0.9987, G.P_larger(-3.0), 0.001); - assertEqualFloat(0.9772, G.P_larger(-2.0), 0.001); - assertEqualFloat(0.8413, G.P_larger(-1.0), 0.001); - assertEqualFloat(0.5000, G.P_larger(0.0), 0.001); - assertEqualFloat(0.1587, G.P_larger(1.0), 0.001); - assertEqualFloat(0.0228, G.P_larger(2.0), 0.001); - assertEqualFloat(0.0013, G.P_larger(3.0), 0.001); + assertEqualFloat(0.9987, G.P_larger(-3.0), 0.0001); + assertEqualFloat(0.9772, G.P_larger(-2.0), 0.0001); + assertEqualFloat(0.8413, G.P_larger(-1.0), 0.0001); + assertEqualFloat(0.5000, G.P_larger(0.0), 0.0001); + assertEqualFloat(0.1587, G.P_larger(1.0), 0.0001); + assertEqualFloat(0.0228, G.P_larger(2.0), 0.0001); + assertEqualFloat(0.0013, G.P_larger(3.0), 0.0001); } @@ -85,13 +93,13 @@ unittest(test_P_between) G.begin(0, 1); - assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.001); - assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.001); - assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.001); - assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.001); - assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.001); - assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.001); - assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.001); + assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.0001); + assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.0001); + assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.0001); + assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.0001); + assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.0001); + assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.0001); + assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.0001); } @@ -101,13 +109,29 @@ unittest(test_P_equal) G.begin(0, 1); - assertEqualFloat(0.004432, G.P_equal(-3.0), 0.001); - assertEqualFloat(0.053991, G.P_equal(-2.0), 0.001); - assertEqualFloat(0.241971, G.P_equal(-1.0), 0.001); - assertEqualFloat(0.398942, G.P_equal(0.0), 0.001); - assertEqualFloat(0.241971, G.P_equal(1.0), 0.001); - assertEqualFloat(0.053991, G.P_equal(2.0), 0.001); - assertEqualFloat(0.004432, G.P_equal(3.0), 0.001); + assertEqualFloat(0.004432, G.P_equal(-3.0), 0.0001); + assertEqualFloat(0.053991, G.P_equal(-2.0), 0.0001); + assertEqualFloat(0.241971, G.P_equal(-1.0), 0.0001); + assertEqualFloat(0.398942, G.P_equal(0.0), 0.0001); + assertEqualFloat(0.241971, G.P_equal(1.0), 0.0001); + assertEqualFloat(0.053991, G.P_equal(2.0), 0.0001); + assertEqualFloat(0.004432, G.P_equal(3.0), 0.0001); +} + + +unittest(test_normailze) +{ + Gauss G; + + G.begin(100, 25); + + assertEqualFloat(-3.0, G.normalize(25), 0.0001); + assertEqualFloat(-2.0, G.normalize(50), 0.0001); + assertEqualFloat(-1.0, G.normalize(75), 0.0001); + assertEqualFloat(0.0, G.normalize(100), 0.0001); + assertEqualFloat(1.0, G.normalize(125), 0.0001); + assertEqualFloat(2.0, G.normalize(150), 0.0001); + assertEqualFloat(3.0, G.normalize(175), 0.0001); } @@ -115,3 +139,4 @@ unittest_main() // -- END OF FILE -- +