0.1.1 Gauss

2024-10-03 18:09:02 -04:00 · 2023-07-08 14:23:27 +02:00 · 2023-07-08 14:23:27 +02:00 · 33e70b036d
commit 33e70b036d
parent 8270df9d5a
11 changed files with 224 additions and 82 deletions
--- a/libraries/Gauss/CHANGELOG.md
+++ b/libraries/Gauss/CHANGELOG.md
@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 ## [0.1.1] - 2023-07-07
 - improve performance => reciprokeSD = 1.0/stddev
 - update readme.md
  - add performance section (UNO / ESP32)
  - elaborated future section
 - generated a more precise lookup table (8 decimals)
 - update unit tests
 - add default parameters to **bool begin(float mean = 0, float stddev = 1)**
  - allow negative **stddev** but return false if stddev <= 0.
 - add **float getMean()** convenience function.
 - add **float getStdDev()** convenience function.
 - clean up a bit
 ## [0.1.0] - 2023-07-06
 - initial version
--- a/libraries/Gauss/Gauss.cpp
+++ b/libraries/Gauss/Gauss.cpp
@ -1,7 +1,7 @@
 //
 //    FILE: Gauss.cpp
 //  AUTHOR: Rob Tillaart
-// VERSION: 0.1.0
+// VERSION: 0.1.1
 // PURPOSE: Library for the Gauss probability math.
 //    DATE: 2023-07-06
--- a/libraries/Gauss/Gauss.h
+++ b/libraries/Gauss/Gauss.h
@ -2,7 +2,7 @@
 //
 //    FILE: Gauss.h
 //  AUTHOR: Rob Tillaart
-// VERSION: 0.1.0
+// VERSION: 0.1.1
 // PURPOSE: Library for the Gauss probability math.
 //    DATE: 2023-07-06
@ -10,7 +10,7 @@
 #include "Arduino.h"
 #include "MultiMap.h"
-#define GAUSS_LIB_VERSION       (F("0.1.0"))
+#define GAUSS_LIB_VERSION       (F("0.1.1"))
 class Gauss
@ -20,34 +20,48 @@ public:
  {
    _mean = 0;
    _stddev = 1;
    _reciprokeSD = 1;
  }
-  bool begin(float mean, float stddev)
+  //  stddev should be positive.
  bool begin(float mean = 0, float stddev = 1)
  {
    _mean = mean;
-    _stddev = abs(stddev);
+    _stddev = stddev;  //  should be positive
-    return true;
+    _reciprokeSD = 1.0 / _stddev;
    return (stddev > 0);
  }
  float getMean()
  {
    return _mean;
  }
  float getStdDev()
  {
    return _stddev;
  }
  float P_smaller(float value)
  {
    if (_stddev == 0) return NAN;
-    return _P_smaller((value - _mean) / _stddev);
+    //  normalize(value)
    return _P_smaller((value - _mean) * _reciprokeSD);
  }
  float P_larger(float value)
  {
    return 1.0 - P_smaller(value);
    //  if (_stddev == 0) return NAN;
    //  optimize math division?
    //  return _P_larger((value - _mean) / _stddev);
  }
  float P_between(float p, float q)
  {
    if (_stddev == 0) return NAN;
    if (p >= q) return 0;
    return P_smaller(q) - P_smaller(p);
  }
@ -56,8 +70,8 @@ public:
  float P_equal(float value)
  {
    if (_stddev == 0) return NAN;
-    float n = (value - _mean)/_stddev;
+    float n = (value - _mean) * _reciprokeSD;
-    float c = 1.0 / (_stddev * sqrt(TWO_PI));
+    float c = _reciprokeSD * (1.0 / sqrt(TWO_PI));
    return c * exp(-0.5 * n * n);
  }
@ -70,7 +84,7 @@ public:
  float normalize(float value)
  {
-    return (value - _mean)/_stddev;
+    return (value - _mean) * _reciprokeSD;
  }
@ -85,27 +99,40 @@ private:
  float _P_smaller(float x)
  {
-    //  TODO improve accuracy or reduce points.
+    //  NORM.DIST(mean, stddev, x, true)
    float __gauss[] = {
-      0.5000, 0.5398, 0.5793, 0.6179, 0.6554, 0.6915, 0.7257, 0.7580,
+      0.50000000, 0.53982784, 0.57925971, 0.61791142,
-      0.7881, 0.8159, 0.8413, 0.8643, 0.8849, 0.9032, 0.9192, 0.9332,
+      0.65542174, 0.69146246, 0.72574688, 0.75803635,
-      0.9452, 0.9554, 0.9641, 0.9713, 0.9772, 0.9821, 0.9861, 0.9893,
+      0.78814460, 0.81593987, 0.84134475, 0.86433394,
-      0.9918, 0.9938, 0.9953, 0.9965, 0.9974, 0.9981, 0.9987, 1.0000
+      0.88493033, 0.90319952, 0.91924334, 0.93319280,
      0.94520071, 0.95543454, 0.96406968, 0.97128344,
      0.97724987, 0.98213558, 0.98609655, 0.98927589,
      0.99180246, 0.99379033, 0.99533881, 0.99653303,
      0.99744487, 0.99813419, 0.99865010, 0.99996833,
      0.99999971, 1.00000000
    };
    //  0..60000  uint16_t = 68 bytes less
    float __z[] = {
      0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
      0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5,
      1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3,
-      2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 10.0
+      2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 4,0,
      5.0, 6.0
    };
-    if (x < 0) return 1.0 - multiMap<float>(-x, __z, __gauss, 32);
+    //  a dedicated MultiMap could exploit the fact that
-    return multiMap<float>(x, __z, __gauss, 32);
+    //  the __z[] array is largely equidistant.
    //  that could remove the __z[] array (almost) completely.
    if (x < 0) return 1.0 - multiMap<float>(-x, __z, __gauss, 34);
    return multiMap<float>(x, __z, __gauss, 34);
  }
  float _mean = 0;
-  float _stddev = 1;
+  float _stddev = 1;       //  not needed as _reciprokeSD holds same info?
  float _reciprokeSD = 1;
 };
 //  -- END OF FILE --
--- a/libraries/Gauss/README.md
+++ b/libraries/Gauss/README.md
@ -16,7 +16,7 @@ Library for the Gauss probability math.
 Gauss is an experimental Arduino library to approximate the probability that a value is 
 smaller or larger than a given value.
 These under the premises of a Gaussian distribution with parameters **mean** and **stddev** 
-(a.k.a. average / mu and standard deviation / sigma).
+(a.k.a. average / mu / µ and standard deviation / sigma / σ).
 If these parameters are not given, 0 and 1 are used by default (normalized Gaussian distribution).
 The values are approximated with **MultiMap()** using a 32 points interpolated lookup.
@ -28,12 +28,14 @@ Return values are given as floats, if one needs percentages, just multiply by 10
 #### Accuracy
-The lookup table used has 32 points with 4 significant digits.
+The lookup table has 34 points with 8 decimals.  
-Do not expect a higher accuracy / precision.
+This matches the precision of float data type.
 Do not expect a very high accuracy / precision as interpolation is linear.
 For many applications this accuracy is sufficient.
-(links to a table with more significant digits is welcome).
+Values of the table are calculated with ```NORM.DIST(mean, stddev, x, true)```.
 Note: 0.1.0 was 32 points 4 decimals. Need to investigate reduction of points.
 #### Applications
@ -42,9 +44,20 @@ For many applications this accuracy is sufficient.
 - compare population data with individual
 #### Character
 |  parameter  |  name  |  ALT-code  |  char |
 |:-----------:|:------:|:----------:|:-----:|
 |  mean       |  mu    |  ALT-230   |   µ   |
 |  stddev     | sigma  |  ALT-229   |   σ   |
 - https://altcodesguru.com/greek-alt-codes.html
 #### Related
 - https://en.wikipedia.org/wiki/Normal_distribution
 - https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_probability/bs704_probability9.html
 - https://github.com/RobTillaart/Multimap
 - https://github.com/RobTillaart/Statistic  (more stat links there).
@ -59,19 +72,32 @@ For many applications this accuracy is sufficient.
 #### Base
 - **Gauss()** constructor. Uses mean = 0 and stddev = 1 by default.
- **bool begin(float mean, float stddev)** set the mean and stddev.
+- **bool begin(float mean = 0, float stddev = 1)** set the mean and stddev.
-Returns true on success. If needed stddev is made positive.
+Returns true if stddev > 0 which should be so.
 Returns false if stddev <= 0, which could be a user choice.
 Note that if ```stddev == 0```, probabilities cannot be calculated 
 as the distribution is not Gaussian.
 The default values (0,1) gives the normalized Gaussian distribution.
 **begin()** can be called at any time to change the mean or stddev.
 - **float getMean()** returns current mean.
 - **float getStddev()** returns current stddev.
 #### Probability
 Probability functions return NAN if stddev == 0.
 - **float P_smaller(float f)** returns probability **P(x < f)**.
 Multiply by 100.0 to get the value as a percentage.
 A.k.a. **CDF()** Cumulative Distribution Function.
 - **float P_larger(float f)** returns probability **P(x > f)**.
 Multiply by 100.0 to get the value as a percentage.
 As the distribution is continuous **P_larger(f) == 1 - P_smaller(f)**.
 - **float P_between(float f, float g)** returns probability **P(f < x < g)**.
 Multiply by 100.0 to get the value as a percentage.
 - **float P_equal(float f)** returns probability **P(x == f)**.
-This is the bell curve formula.
+This uses the bell curve formula.
 #### Other
@ -82,43 +108,74 @@ E.g if mean == 50 and stddev == 14, then 71 ==> +1.5 sigma.
 - **float bellCurve(float f)** returns probability **P(x == f)**.
 ## Performance
 Indicative numbers for 1000 calls, timing in micros.
 Arduino UNO, 16 MHz, IDE 1.8.19
 |  function     |  0.1.0   |  0.1.1   |  notes  |
 |:--------------|:--------:|:--------:|:--------|
 |  P_smaller    |  375396  |  365964  |
 |  P_larger     |  384368  |  375032  |
 |  P_between    |  265624  |  269176  |
 |  normalize    |   44172  |   23024  |
 |  bellCurve    |  255728  |  205460  |
 |  approx.bell  |  764028  |  719184  |  see examples 
 ESP32, 240 MHz, IDE 1.8.19
 |  function     |  0.1.0   |  0.1.1   |  notes  |
 |:--------------|:--------:|:--------:|:--------|
 |  P_smaller    |    -     |    4046  |
 |  P_larger     |    -     |    4043  |
 |  P_between    |    -     |    3023  |
 |  normalize    |    -     |     592  |
 |  bellCurve    |    -     |   13522  |
 |  approx.bell  |    -     |    7300  | 
 ## Future
 #### Must
 - documentation
-  - mu + sigma character
+
 - unit tests
 #### Should
 - optimize performance
  - remove division by stddev
 - optimize accuracy
  - revisit lookup of MultiMap
  - (-10 .. 0) might be more accurate (significant digits)?
  - double instead of floats? (good table?)
-
+  - make use of equidistant \_\_z\[] table
 #### Could
 - **void setMean(float f)**
 - **float getMean()**
 - **void setStddev(float f)**
 - **float getStddev()**
 - default values for **begin(0,1)**
 - add examples
  - e.g. temperature (DS18B20 or DHT22)
  - e.g. loadcell (HX711)
 - does the stddev needs to be positive, 
  - what happens if negative values are allowed?
 - equality test Gauss objects
 - move code to .cpp file? (rather small lib).
 - embed MultiMap hardcoded instead of library dependency
- **bellCurve()** => **Z()**?
+- add unit tests
 - remove **\_stddev** as **\_reciprokeSD** holds same information.
 - reverse normalization
  - G(100,25) which value has stddev 0.735?
 - **VAL(probability = 0.75)** ==>  134 whatever
  - Returns the value of the distribution for which the **CDF()** is at least probability.
  - Inverse of **P_smaller()**
 - **float P_outside(float f, float g)** returns probability **P(x < f) + P(g < x)**.
  - assuming no overlap. Use **P_outside() = 1 - P_between()**
 #### Won't (unless requested)
 - equality test Gauss objects
 - does the stddev needs to be positive? Yes.
  - what happens if negative values are allowed? P curve is reversed.
 - move code to .cpp file? (rather small lib).
 - **void setMean(float f)** can be done with begin()
 - **void setStddev(float f)** can be done with begin()
--- a/libraries/Gauss/examples/Gauss_performance/Gauss_performance.ino
+++ b/libraries/Gauss/examples/Gauss_performance/Gauss_performance.ino
@ -20,7 +20,7 @@ void setup(void)
  Serial.print("GAUSS_LIB_VERSION: ");
  Serial.println(GAUSS_LIB_VERSION);
  Serial.println();
-  Serial.println("Timing in micros");
+  Serial.println("Timing in micros (1000 calls)");
  Serial.println();
  test_1();
--- a/libraries/Gauss/examples/Gauss_performance/performance_0.1.1.txt
+++ b/libraries/Gauss/examples/Gauss_performance/performance_0.1.1.txt
@ -0,0 +1,16 @@
 Arduino UNO
 IDE 1.8.19
 Gauss_performance.ino
 GAUSS_LIB_VERSION: 0.1.1
 Timing in micros (1000 calls)
 P_smaller:	365964
 P_larger:	375032
 P_between:	269176
 normalize:	23024
 bellCurve:	205460
 approx.bell:	719184
 done...
--- a/libraries/Gauss/examples/Gauss_test/Gauss_test.ino
+++ b/libraries/Gauss/examples/Gauss_test/Gauss_test.ino
@ -17,9 +17,9 @@ void setup(void)
  Serial.println();
  test_1();
-  test_2();
+//  test_2();
-  test_3();
+//  test_3();
-  test_4();
+//  test_4();
  Serial.println("\ndone...");
 }
--- a/libraries/Gauss/keywords.txt
+++ b/libraries/Gauss/keywords.txt
@ -6,6 +6,8 @@ Gauss	KEYWORD1
 # Methods and Functions (KEYWORD2)
 begin	KEYWORD2
 getMean	KEYWORD2
 getStdDev	KEYWORD2
 P_smaller	KEYWORD2
 P_larger	KEYWORD2
--- a/libraries/Gauss/library.json
+++ b/libraries/Gauss/library.json
@ -23,7 +23,7 @@
      "version": "^0.1.7"
    }
  ],
-  "version": "0.1.0",
+  "version": "0.1.1",
  "license": "MIT",
  "frameworks": "arduino",
  "platforms": "*",
--- a/libraries/Gauss/library.properties
+++ b/libraries/Gauss/library.properties
@ -1,9 +1,9 @@
 name=Gauss
-version=0.1.0
+version=0.1.1
 author=Rob Tillaart <rob.tillaart@gmail.com>
 maintainer=Rob Tillaart <rob.tillaart@gmail.com>
 sentence=Library for the Gauss probability math.
-paragraph=
+paragraph=normal distribution.
 category=Data Processing
 url=https://github.com/RobTillaart/Gauss
 architectures=*
--- a/libraries/Gauss/test/unit_test_001.cpp
+++ b/libraries/Gauss/test/unit_test_001.cpp
@ -41,9 +41,13 @@ unittest(test_constructor)
 {
  Gauss G;
-  G.begin(0, 1);
+  assertEqualFloat(0.0, G.getMean(), 0.0001);
  assertEqualFloat(1.0, G.getStdDev(), 0.0001);
  assertEqualFloat(0.5, G.P_smaller(0), 0.0001);
-  assertEqualFloat(0.5, G.P_smaller(0), 0.001);
+  G.begin(10, 3);
  assertEqualFloat(10.0, G.getMean(), 0.0001);
  assertEqualFloat(3.0,  G.getStdDev(), 0.0001);
 }
@ -53,13 +57,17 @@ unittest(test_P_smaller)
  G.begin(0, 1);
-  assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.001);
+  assertEqualFloat(0.0000, G.P_smaller(-6.0), 0.0001);
-  assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.001);
+  assertEqualFloat(0.0001, G.P_smaller(-4.0), 0.0001);
-  assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.001);
+  assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.0001);
-  assertEqualFloat(0.5000, G.P_smaller(0.0), 0.001);
+  assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.0001);
-  assertEqualFloat(0.8413, G.P_smaller(1.0), 0.001);
+  assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.0001);
-  assertEqualFloat(0.9772, G.P_smaller(2.0), 0.001);
+  assertEqualFloat(0.5000, G.P_smaller(0.0), 0.0001);
-  assertEqualFloat(0.9987, G.P_smaller(3.0), 0.001);
+  assertEqualFloat(0.8413, G.P_smaller(1.0), 0.0001);
  assertEqualFloat(0.9772, G.P_smaller(2.0), 0.0001);
  assertEqualFloat(0.9987, G.P_smaller(3.0), 0.0001);
  assertEqualFloat(0.9999, G.P_smaller(4.0), 0.0001);
  assertEqualFloat(1.0000, G.P_smaller(6.0), 0.0001);
 }
@ -69,13 +77,13 @@ unittest(test_P_larger)
  G.begin(0, 1);
-  assertEqualFloat(0.9987, G.P_larger(-3.0), 0.001);
+  assertEqualFloat(0.9987, G.P_larger(-3.0), 0.0001);
-  assertEqualFloat(0.9772, G.P_larger(-2.0), 0.001);
+  assertEqualFloat(0.9772, G.P_larger(-2.0), 0.0001);
-  assertEqualFloat(0.8413, G.P_larger(-1.0), 0.001);
+  assertEqualFloat(0.8413, G.P_larger(-1.0), 0.0001);
-  assertEqualFloat(0.5000, G.P_larger(0.0), 0.001);
+  assertEqualFloat(0.5000, G.P_larger(0.0), 0.0001);
-  assertEqualFloat(0.1587, G.P_larger(1.0), 0.001);
+  assertEqualFloat(0.1587, G.P_larger(1.0), 0.0001);
-  assertEqualFloat(0.0228, G.P_larger(2.0), 0.001);
+  assertEqualFloat(0.0228, G.P_larger(2.0), 0.0001);
-  assertEqualFloat(0.0013, G.P_larger(3.0), 0.001);
+  assertEqualFloat(0.0013, G.P_larger(3.0), 0.0001);
 }
@ -85,13 +93,13 @@ unittest(test_P_between)
  G.begin(0, 1);
-  assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.001);
+  assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.0001);
-  assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.001);
+  assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.0001);
-  assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.001);
+  assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.0001);
-  assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.001);
+  assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.0001);
-  assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.001);
+  assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.0001);
-  assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.001);
+  assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.0001);
-  assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.001);
+  assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.0001);
 }
@ -101,13 +109,29 @@ unittest(test_P_equal)
  G.begin(0, 1);
-  assertEqualFloat(0.004432, G.P_equal(-3.0), 0.001);
+  assertEqualFloat(0.004432, G.P_equal(-3.0), 0.0001);
-  assertEqualFloat(0.053991, G.P_equal(-2.0), 0.001);
+  assertEqualFloat(0.053991, G.P_equal(-2.0), 0.0001);
-  assertEqualFloat(0.241971, G.P_equal(-1.0), 0.001);
+  assertEqualFloat(0.241971, G.P_equal(-1.0), 0.0001);
-  assertEqualFloat(0.398942, G.P_equal(0.0), 0.001);
+  assertEqualFloat(0.398942, G.P_equal(0.0), 0.0001);
-  assertEqualFloat(0.241971, G.P_equal(1.0), 0.001);
+  assertEqualFloat(0.241971, G.P_equal(1.0), 0.0001);
-  assertEqualFloat(0.053991, G.P_equal(2.0), 0.001);
+  assertEqualFloat(0.053991, G.P_equal(2.0), 0.0001);
-  assertEqualFloat(0.004432, G.P_equal(3.0), 0.001);
+  assertEqualFloat(0.004432, G.P_equal(3.0), 0.0001);
 }
 unittest(test_normailze)
 {
  Gauss G;
  G.begin(100, 25);
  assertEqualFloat(-3.0, G.normalize(25), 0.0001);
  assertEqualFloat(-2.0, G.normalize(50), 0.0001);
  assertEqualFloat(-1.0, G.normalize(75), 0.0001);
  assertEqualFloat(0.0, G.normalize(100), 0.0001);
  assertEqualFloat(1.0, G.normalize(125), 0.0001);
  assertEqualFloat(2.0, G.normalize(150), 0.0001);
  assertEqualFloat(3.0, G.normalize(175), 0.0001);
 }
@ -115,3 +139,4 @@ unittest_main()
 //  -- END OF FILE --