0.1.1 Gauss

2024-10-03 18:09:02 -04:00 · 2023-07-08 14:23:27 +02:00 · 2023-07-08 14:23:27 +02:00 · 33e70b036d
commit 33e70b036d
parent 8270df9d5a
11 changed files with 224 additions and 82 deletions
--- a/libraries/Gauss/CHANGELOG.md
+++ b/libraries/Gauss/CHANGELOG.md
@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).


+## [0.1.1] - 2023-07-07
+- improve performance => reciprokeSD = 1.0/stddev
+- update readme.md
+  - add performance section (UNO / ESP32)
+  - elaborated future section
+- generated a more precise lookup table (8 decimals)
+- update unit tests
+- add default parameters to **bool begin(float mean = 0, float stddev = 1)**
+  - allow negative **stddev** but return false if stddev <= 0.
+- add **float getMean()** convenience function.
+- add **float getStdDev()** convenience function.
+- clean up a bit
+
+
+
 ## [0.1.0] - 2023-07-06
 - initial version

--- a/libraries/Gauss/Gauss.cpp
+++ b/libraries/Gauss/Gauss.cpp
@ -1,7 +1,7 @@
 //
 //    FILE: Gauss.cpp
 //  AUTHOR: Rob Tillaart
-// VERSION: 0.1.0
+// VERSION: 0.1.1
 // PURPOSE: Library for the Gauss probability math.
 //    DATE: 2023-07-06

--- a/libraries/Gauss/Gauss.h
+++ b/libraries/Gauss/Gauss.h
@ -2,7 +2,7 @@
 //
 //    FILE: Gauss.h
 //  AUTHOR: Rob Tillaart
-// VERSION: 0.1.0
+// VERSION: 0.1.1
 // PURPOSE: Library for the Gauss probability math.
 //    DATE: 2023-07-06

@ -10,7 +10,7 @@
 #include "Arduino.h"
 #include "MultiMap.h"

-#define GAUSS_LIB_VERSION       (F("0.1.0"))
+#define GAUSS_LIB_VERSION       (F("0.1.1"))


 class Gauss
@ -20,34 +20,48 @@ public:
  {
    _mean = 0;
    _stddev = 1;
+    _reciprokeSD = 1;
  }

-  bool begin(float mean, float stddev)
+  //  stddev should be positive.
+  bool begin(float mean = 0, float stddev = 1)
  {
    _mean = mean;
-    _stddev = abs(stddev);
-    return true;
+    _stddev = stddev;  //  should be positive
+    _reciprokeSD = 1.0 / _stddev;
+    return (stddev > 0);
+  }
+
+
+  float getMean()
+  {
+    return _mean;
+  }
+
+
+  float getStdDev()
+  {
+    return _stddev;
  }


  float P_smaller(float value)
  {
    if (_stddev == 0) return NAN;
-    return _P_smaller((value - _mean) / _stddev);
+    //  normalize(value)
+    return _P_smaller((value - _mean) * _reciprokeSD);
  }


  float P_larger(float value)
  {
    return 1.0 - P_smaller(value);
-    //  if (_stddev == 0) return NAN;
-    //  optimize math division?
-    //  return _P_larger((value - _mean) / _stddev);
  }


  float P_between(float p, float q)
  {
+    if (_stddev == 0) return NAN;
    if (p >= q) return 0;
    return P_smaller(q) - P_smaller(p);
  }
@ -56,8 +70,8 @@ public:
  float P_equal(float value)
  {
    if (_stddev == 0) return NAN;
-    float n = (value - _mean)/_stddev;
-    float c = 1.0 / (_stddev * sqrt(TWO_PI));
+    float n = (value - _mean) * _reciprokeSD;
+    float c = _reciprokeSD * (1.0 / sqrt(TWO_PI));
    return c * exp(-0.5 * n * n);
  }

@ -70,7 +84,7 @@ public:

  float normalize(float value)
  {
-    return (value - _mean)/_stddev;
+    return (value - _mean) * _reciprokeSD;
  }


@ -85,27 +99,40 @@ private:

  float _P_smaller(float x)
  {
-    //  TODO improve accuracy or reduce points.
+    //  NORM.DIST(mean, stddev, x, true)
    float __gauss[] = {
-      0.5000, 0.5398, 0.5793, 0.6179, 0.6554, 0.6915, 0.7257, 0.7580,
-      0.7881, 0.8159, 0.8413, 0.8643, 0.8849, 0.9032, 0.9192, 0.9332,
-      0.9452, 0.9554, 0.9641, 0.9713, 0.9772, 0.9821, 0.9861, 0.9893,
-      0.9918, 0.9938, 0.9953, 0.9965, 0.9974, 0.9981, 0.9987, 1.0000
+      0.50000000, 0.53982784, 0.57925971, 0.61791142,
+      0.65542174, 0.69146246, 0.72574688, 0.75803635,
+      0.78814460, 0.81593987, 0.84134475, 0.86433394,
+      0.88493033, 0.90319952, 0.91924334, 0.93319280,
+      0.94520071, 0.95543454, 0.96406968, 0.97128344,
+      0.97724987, 0.98213558, 0.98609655, 0.98927589,
+      0.99180246, 0.99379033, 0.99533881, 0.99653303,
+      0.99744487, 0.99813419, 0.99865010, 0.99996833,
+      0.99999971, 1.00000000
    };
+
+    //  0..60000  uint16_t = 68 bytes less
    float __z[] = {
      0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
      0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5,
      1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3,
-      2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 10.0
+      2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 4,0,
+      5.0, 6.0
    };

-    if (x < 0) return 1.0 - multiMap<float>(-x, __z, __gauss, 32);
-    return multiMap<float>(x, __z, __gauss, 32);
+    //  a dedicated MultiMap could exploit the fact that
+    //  the __z[] array is largely equidistant.
+    //  that could remove the __z[] array (almost) completely.
+    if (x < 0) return 1.0 - multiMap<float>(-x, __z, __gauss, 34);
+    return multiMap<float>(x, __z, __gauss, 34);
  }

  float _mean = 0;
-  float _stddev = 1;
+  float _stddev = 1;       //  not needed as _reciprokeSD holds same info?
+  float _reciprokeSD = 1;
 };


 //  -- END OF FILE --
+
--- a/libraries/Gauss/README.md
+++ b/libraries/Gauss/README.md
@ -16,7 +16,7 @@ Library for the Gauss probability math.
 Gauss is an experimental Arduino library to approximate the probability that a value is 
 smaller or larger than a given value.
 These under the premises of a Gaussian distribution with parameters **mean** and **stddev** 
-(a.k.a. average / mu and standard deviation / sigma).
+(a.k.a. average / mu / µ and standard deviation / sigma / σ).
 If these parameters are not given, 0 and 1 are used by default (normalized Gaussian distribution).

 The values are approximated with **MultiMap()** using a 32 points interpolated lookup.
@ -28,12 +28,14 @@ Return values are given as floats, if one needs percentages, just multiply by 10

 #### Accuracy

-The lookup table used has 32 points with 4 significant digits.
-Do not expect a higher accuracy / precision.
+The lookup table has 34 points with 8 decimals.  
+This matches the precision of float data type.
+Do not expect a very high accuracy / precision as interpolation is linear.
 For many applications this accuracy is sufficient.

-(links to a table with more significant digits is welcome).
+Values of the table are calculated with ```NORM.DIST(mean, stddev, x, true)```.

+Note: 0.1.0 was 32 points 4 decimals. Need to investigate reduction of points.

 #### Applications

@ -42,9 +44,20 @@ For many applications this accuracy is sufficient.
 - compare population data with individual


+#### Character
+
+|  parameter  |  name  |  ALT-code  |  char |
+|:-----------:|:------:|:----------:|:-----:|
+|  mean       |  mu    |  ALT-230   |   µ   |
+|  stddev     | sigma  |  ALT-229   |   σ   |
+
+- https://altcodesguru.com/greek-alt-codes.html
+
+
 #### Related

 - https://en.wikipedia.org/wiki/Normal_distribution
+- https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_probability/bs704_probability9.html
 - https://github.com/RobTillaart/Multimap
 - https://github.com/RobTillaart/Statistic  (more stat links there).

@ -59,19 +72,32 @@ For many applications this accuracy is sufficient.
 #### Base

 - **Gauss()** constructor. Uses mean = 0 and stddev = 1 by default.
- **bool begin(float mean, float stddev)** set the mean and stddev.
-Returns true on success. If needed stddev is made positive.
+- **bool begin(float mean = 0, float stddev = 1)** set the mean and stddev.
+Returns true if stddev > 0 which should be so.
+Returns false if stddev <= 0, which could be a user choice.
+Note that if ```stddev == 0```, probabilities cannot be calculated 
+as the distribution is not Gaussian.
+The default values (0,1) gives the normalized Gaussian distribution.
+**begin()** can be called at any time to change the mean or stddev.
+- **float getMean()** returns current mean.
+- **float getStddev()** returns current stddev.
+

 #### Probability

+Probability functions return NAN if stddev == 0.
+
 - **float P_smaller(float f)** returns probability **P(x < f)**.
 Multiply by 100.0 to get the value as a percentage.
+A.k.a. **CDF()** Cumulative Distribution Function.
 - **float P_larger(float f)** returns probability **P(x > f)**.
 Multiply by 100.0 to get the value as a percentage.
+As the distribution is continuous **P_larger(f) == 1 - P_smaller(f)**.
 - **float P_between(float f, float g)** returns probability **P(f < x < g)**.
 Multiply by 100.0 to get the value as a percentage.
 - **float P_equal(float f)** returns probability **P(x == f)**.
-This is the bell curve formula.
+This uses the bell curve formula.
+

 #### Other

@ -82,43 +108,74 @@ E.g if mean == 50 and stddev == 14, then 71 ==> +1.5 sigma.
 - **float bellCurve(float f)** returns probability **P(x == f)**.


+## Performance
+
+Indicative numbers for 1000 calls, timing in micros.
+
+Arduino UNO, 16 MHz, IDE 1.8.19
+
+|  function     |  0.1.0   |  0.1.1   |  notes  |
+|:--------------|:--------:|:--------:|:--------|
+|  P_smaller    |  375396  |  365964  |
+|  P_larger     |  384368  |  375032  |
+|  P_between    |  265624  |  269176  |
+|  normalize    |   44172  |   23024  |
+|  bellCurve    |  255728  |  205460  |
+|  approx.bell  |  764028  |  719184  |  see examples 
+
+
+ESP32, 240 MHz, IDE 1.8.19
+
+|  function     |  0.1.0   |  0.1.1   |  notes  |
+|:--------------|:--------:|:--------:|:--------|
+|  P_smaller    |    -     |    4046  |
+|  P_larger     |    -     |    4043  |
+|  P_between    |    -     |    3023  |
+|  normalize    |    -     |     592  |
+|  bellCurve    |    -     |   13522  |
+|  approx.bell  |    -     |    7300  | 
+
+
 ## Future

 #### Must

 - documentation
-  - mu + sigma character
- unit tests
+

 #### Should

- optimize performance
-  - remove division by stddev
 - optimize accuracy
  - revisit lookup of MultiMap
  - (-10 .. 0) might be more accurate (significant digits)?
  - double instead of floats? (good table?)
-
+  - make use of equidistant \_\_z\[] table


 #### Could

- **void setMean(float f)**
- **float getMean()**
- **void setStddev(float f)**
- **float getStddev()**
- default values for **begin(0,1)**
 - add examples
  - e.g. temperature (DS18B20 or DHT22)
  - e.g. loadcell (HX711)
- does the stddev needs to be positive, 
-  - what happens if negative values are allowed?
- equality test Gauss objects
- move code to .cpp file? (rather small lib).
 - embed MultiMap hardcoded instead of library dependency
- **bellCurve()** => **Z()**?
+- add unit tests
+- remove **\_stddev** as **\_reciprokeSD** holds same information.
+- reverse normalization
+  - G(100,25) which value has stddev 0.735?
+- **VAL(probability = 0.75)** ==>  134 whatever
+  - Returns the value of the distribution for which the **CDF()** is at least probability.
+  - Inverse of **P_smaller()**
+- **float P_outside(float f, float g)** returns probability **P(x < f) + P(g < x)**.
+  - assuming no overlap. Use **P_outside() = 1 - P_between()**


 #### Won't (unless requested)

+- equality test Gauss objects
+- does the stddev needs to be positive? Yes.
+  - what happens if negative values are allowed? P curve is reversed.
+- move code to .cpp file? (rather small lib).
+- **void setMean(float f)** can be done with begin()
+- **void setStddev(float f)** can be done with begin()
+

--- a/libraries/Gauss/examples/Gauss_performance/Gauss_performance.ino
+++ b/libraries/Gauss/examples/Gauss_performance/Gauss_performance.ino
@ -20,7 +20,7 @@ void setup(void)
  Serial.print("GAUSS_LIB_VERSION: ");
  Serial.println(GAUSS_LIB_VERSION);
  Serial.println();
-  Serial.println("Timing in micros");
+  Serial.println("Timing in micros (1000 calls)");
  Serial.println();
  
  test_1();
--- a/libraries/Gauss/examples/Gauss_performance/performance_0.1.1.txt
+++ b/libraries/Gauss/examples/Gauss_performance/performance_0.1.1.txt
@ -0,0 +1,16 @@
+Arduino UNO
+IDE 1.8.19
+
+Gauss_performance.ino
+GAUSS_LIB_VERSION: 0.1.1
+
+Timing in micros (1000 calls)
+
+P_smaller:	365964
+P_larger:	375032
+P_between:	269176
+normalize:	23024
+bellCurve:	205460
+approx.bell:	719184
+
+done...
--- a/libraries/Gauss/examples/Gauss_test/Gauss_test.ino
+++ b/libraries/Gauss/examples/Gauss_test/Gauss_test.ino
@ -17,9 +17,9 @@ void setup(void)
  Serial.println();

  test_1();
-  test_2();
-  test_3();
-  test_4();
+//  test_2();
+//  test_3();
+//  test_4();

  Serial.println("\ndone...");
 }
--- a/libraries/Gauss/keywords.txt
+++ b/libraries/Gauss/keywords.txt
@ -6,6 +6,8 @@ Gauss	KEYWORD1
 # Methods and Functions (KEYWORD2)

 begin	KEYWORD2
+getMean	KEYWORD2
+getStdDev	KEYWORD2

 P_smaller	KEYWORD2
 P_larger	KEYWORD2
--- a/libraries/Gauss/library.json
+++ b/libraries/Gauss/library.json
@ -23,7 +23,7 @@
      "version": "^0.1.7"
    }
  ],
-  "version": "0.1.0",
+  "version": "0.1.1",
  "license": "MIT",
  "frameworks": "arduino",
  "platforms": "*",
--- a/libraries/Gauss/library.properties
+++ b/libraries/Gauss/library.properties
@ -1,9 +1,9 @@
 name=Gauss
-version=0.1.0
+version=0.1.1
 author=Rob Tillaart <rob.tillaart@gmail.com>
 maintainer=Rob Tillaart <rob.tillaart@gmail.com>
 sentence=Library for the Gauss probability math.
-paragraph=
+paragraph=normal distribution.
 category=Data Processing
 url=https://github.com/RobTillaart/Gauss
 architectures=*
--- a/libraries/Gauss/test/unit_test_001.cpp
+++ b/libraries/Gauss/test/unit_test_001.cpp
@ -41,9 +41,13 @@ unittest(test_constructor)
 {
  Gauss G;
 
-  G.begin(0, 1);
+  assertEqualFloat(0.0, G.getMean(), 0.0001);
+  assertEqualFloat(1.0, G.getStdDev(), 0.0001);
+  assertEqualFloat(0.5, G.P_smaller(0), 0.0001);

-  assertEqualFloat(0.5, G.P_smaller(0), 0.001);
+  G.begin(10, 3);
+  assertEqualFloat(10.0, G.getMean(), 0.0001);
+  assertEqualFloat(3.0,  G.getStdDev(), 0.0001);
 }


@ -53,13 +57,17 @@ unittest(test_P_smaller)

  G.begin(0, 1);

-  assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.001);
-  assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.001);
-  assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.001);
-  assertEqualFloat(0.5000, G.P_smaller(0.0), 0.001);
-  assertEqualFloat(0.8413, G.P_smaller(1.0), 0.001);
-  assertEqualFloat(0.9772, G.P_smaller(2.0), 0.001);
-  assertEqualFloat(0.9987, G.P_smaller(3.0), 0.001);
+  assertEqualFloat(0.0000, G.P_smaller(-6.0), 0.0001);
+  assertEqualFloat(0.0001, G.P_smaller(-4.0), 0.0001);
+  assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.0001);
+  assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.0001);
+  assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.0001);
+  assertEqualFloat(0.5000, G.P_smaller(0.0), 0.0001);
+  assertEqualFloat(0.8413, G.P_smaller(1.0), 0.0001);
+  assertEqualFloat(0.9772, G.P_smaller(2.0), 0.0001);
+  assertEqualFloat(0.9987, G.P_smaller(3.0), 0.0001);
+  assertEqualFloat(0.9999, G.P_smaller(4.0), 0.0001);
+  assertEqualFloat(1.0000, G.P_smaller(6.0), 0.0001);
 }


@ -69,13 +77,13 @@ unittest(test_P_larger)

  G.begin(0, 1);

-  assertEqualFloat(0.9987, G.P_larger(-3.0), 0.001);
-  assertEqualFloat(0.9772, G.P_larger(-2.0), 0.001);
-  assertEqualFloat(0.8413, G.P_larger(-1.0), 0.001);
-  assertEqualFloat(0.5000, G.P_larger(0.0), 0.001);
-  assertEqualFloat(0.1587, G.P_larger(1.0), 0.001);
-  assertEqualFloat(0.0228, G.P_larger(2.0), 0.001);
-  assertEqualFloat(0.0013, G.P_larger(3.0), 0.001);
+  assertEqualFloat(0.9987, G.P_larger(-3.0), 0.0001);
+  assertEqualFloat(0.9772, G.P_larger(-2.0), 0.0001);
+  assertEqualFloat(0.8413, G.P_larger(-1.0), 0.0001);
+  assertEqualFloat(0.5000, G.P_larger(0.0), 0.0001);
+  assertEqualFloat(0.1587, G.P_larger(1.0), 0.0001);
+  assertEqualFloat(0.0228, G.P_larger(2.0), 0.0001);
+  assertEqualFloat(0.0013, G.P_larger(3.0), 0.0001);
 }


@ -85,13 +93,13 @@ unittest(test_P_between)

  G.begin(0, 1);

-  assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.001);
-  assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.001);
-  assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.001);
-  assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.001);
-  assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.001);
-  assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.001);
-  assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.001);
+  assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.0001);
+  assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.0001);
+  assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.0001);
+  assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.0001);
+  assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.0001);
+  assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.0001);
+  assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.0001);
 }


@ -101,13 +109,29 @@ unittest(test_P_equal)

  G.begin(0, 1);

-  assertEqualFloat(0.004432, G.P_equal(-3.0), 0.001);
-  assertEqualFloat(0.053991, G.P_equal(-2.0), 0.001);
-  assertEqualFloat(0.241971, G.P_equal(-1.0), 0.001);
-  assertEqualFloat(0.398942, G.P_equal(0.0), 0.001);
-  assertEqualFloat(0.241971, G.P_equal(1.0), 0.001);
-  assertEqualFloat(0.053991, G.P_equal(2.0), 0.001);
-  assertEqualFloat(0.004432, G.P_equal(3.0), 0.001);
+  assertEqualFloat(0.004432, G.P_equal(-3.0), 0.0001);
+  assertEqualFloat(0.053991, G.P_equal(-2.0), 0.0001);
+  assertEqualFloat(0.241971, G.P_equal(-1.0), 0.0001);
+  assertEqualFloat(0.398942, G.P_equal(0.0), 0.0001);
+  assertEqualFloat(0.241971, G.P_equal(1.0), 0.0001);
+  assertEqualFloat(0.053991, G.P_equal(2.0), 0.0001);
+  assertEqualFloat(0.004432, G.P_equal(3.0), 0.0001);
+}
+
+
+unittest(test_normailze)
+{
+  Gauss G;
+
+  G.begin(100, 25);
+
+  assertEqualFloat(-3.0, G.normalize(25), 0.0001);
+  assertEqualFloat(-2.0, G.normalize(50), 0.0001);
+  assertEqualFloat(-1.0, G.normalize(75), 0.0001);
+  assertEqualFloat(0.0, G.normalize(100), 0.0001);
+  assertEqualFloat(1.0, G.normalize(125), 0.0001);
+  assertEqualFloat(2.0, G.normalize(150), 0.0001);
+  assertEqualFloat(3.0, G.normalize(175), 0.0001);
 }


@ -115,3 +139,4 @@ unittest_main()


 //  -- END OF FILE --
+