0.1.1 Gauss

This commit is contained in:
Rob Tillaart 2023-07-08 14:23:27 +02:00
parent 8270df9d5a
commit 33e70b036d
11 changed files with 224 additions and 82 deletions

View File

@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/). and this project adheres to [Semantic Versioning](http://semver.org/).
## [0.1.1] - 2023-07-07
- improve performance => reciprokeSD = 1.0/stddev
- update readme.md
- add performance section (UNO / ESP32)
- elaborated future section
- generated a more precise lookup table (8 decimals)
- update unit tests
- add default parameters to **bool begin(float mean = 0, float stddev = 1)**
- allow negative **stddev** but return false if stddev <= 0.
- add **float getMean()** convenience function.
- add **float getStdDev()** convenience function.
- clean up a bit
## [0.1.0] - 2023-07-06 ## [0.1.0] - 2023-07-06
- initial version - initial version

View File

@ -1,7 +1,7 @@
// //
// FILE: Gauss.cpp // FILE: Gauss.cpp
// AUTHOR: Rob Tillaart // AUTHOR: Rob Tillaart
// VERSION: 0.1.0 // VERSION: 0.1.1
// PURPOSE: Library for the Gauss probability math. // PURPOSE: Library for the Gauss probability math.
// DATE: 2023-07-06 // DATE: 2023-07-06

View File

@ -2,7 +2,7 @@
// //
// FILE: Gauss.h // FILE: Gauss.h
// AUTHOR: Rob Tillaart // AUTHOR: Rob Tillaart
// VERSION: 0.1.0 // VERSION: 0.1.1
// PURPOSE: Library for the Gauss probability math. // PURPOSE: Library for the Gauss probability math.
// DATE: 2023-07-06 // DATE: 2023-07-06
@ -10,7 +10,7 @@
#include "Arduino.h" #include "Arduino.h"
#include "MultiMap.h" #include "MultiMap.h"
#define GAUSS_LIB_VERSION (F("0.1.0")) #define GAUSS_LIB_VERSION (F("0.1.1"))
class Gauss class Gauss
@ -20,34 +20,48 @@ public:
{ {
_mean = 0; _mean = 0;
_stddev = 1; _stddev = 1;
_reciprokeSD = 1;
} }
bool begin(float mean, float stddev) // stddev should be positive.
bool begin(float mean = 0, float stddev = 1)
{ {
_mean = mean; _mean = mean;
_stddev = abs(stddev); _stddev = stddev; // should be positive
return true; _reciprokeSD = 1.0 / _stddev;
return (stddev > 0);
}
float getMean()
{
return _mean;
}
float getStdDev()
{
return _stddev;
} }
float P_smaller(float value) float P_smaller(float value)
{ {
if (_stddev == 0) return NAN; if (_stddev == 0) return NAN;
return _P_smaller((value - _mean) / _stddev); // normalize(value)
return _P_smaller((value - _mean) * _reciprokeSD);
} }
float P_larger(float value) float P_larger(float value)
{ {
return 1.0 - P_smaller(value); return 1.0 - P_smaller(value);
// if (_stddev == 0) return NAN;
// optimize math division?
// return _P_larger((value - _mean) / _stddev);
} }
float P_between(float p, float q) float P_between(float p, float q)
{ {
if (_stddev == 0) return NAN;
if (p >= q) return 0; if (p >= q) return 0;
return P_smaller(q) - P_smaller(p); return P_smaller(q) - P_smaller(p);
} }
@ -56,8 +70,8 @@ public:
float P_equal(float value) float P_equal(float value)
{ {
if (_stddev == 0) return NAN; if (_stddev == 0) return NAN;
float n = (value - _mean)/_stddev; float n = (value - _mean) * _reciprokeSD;
float c = 1.0 / (_stddev * sqrt(TWO_PI)); float c = _reciprokeSD * (1.0 / sqrt(TWO_PI));
return c * exp(-0.5 * n * n); return c * exp(-0.5 * n * n);
} }
@ -70,7 +84,7 @@ public:
float normalize(float value) float normalize(float value)
{ {
return (value - _mean)/_stddev; return (value - _mean) * _reciprokeSD;
} }
@ -85,27 +99,40 @@ private:
float _P_smaller(float x) float _P_smaller(float x)
{ {
// TODO improve accuracy or reduce points. // NORM.DIST(mean, stddev, x, true)
float __gauss[] = { float __gauss[] = {
0.5000, 0.5398, 0.5793, 0.6179, 0.6554, 0.6915, 0.7257, 0.7580, 0.50000000, 0.53982784, 0.57925971, 0.61791142,
0.7881, 0.8159, 0.8413, 0.8643, 0.8849, 0.9032, 0.9192, 0.9332, 0.65542174, 0.69146246, 0.72574688, 0.75803635,
0.9452, 0.9554, 0.9641, 0.9713, 0.9772, 0.9821, 0.9861, 0.9893, 0.78814460, 0.81593987, 0.84134475, 0.86433394,
0.9918, 0.9938, 0.9953, 0.9965, 0.9974, 0.9981, 0.9987, 1.0000 0.88493033, 0.90319952, 0.91924334, 0.93319280,
0.94520071, 0.95543454, 0.96406968, 0.97128344,
0.97724987, 0.98213558, 0.98609655, 0.98927589,
0.99180246, 0.99379033, 0.99533881, 0.99653303,
0.99744487, 0.99813419, 0.99865010, 0.99996833,
0.99999971, 1.00000000
}; };
// 0..60000 uint16_t = 68 bytes less
float __z[] = { float __z[] = {
0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5,
1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3,
2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 10.0 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 4,0,
5.0, 6.0
}; };
if (x < 0) return 1.0 - multiMap<float>(-x, __z, __gauss, 32); // a dedicated MultiMap could exploit the fact that
return multiMap<float>(x, __z, __gauss, 32); // the __z[] array is largely equidistant.
// that could remove the __z[] array (almost) completely.
if (x < 0) return 1.0 - multiMap<float>(-x, __z, __gauss, 34);
return multiMap<float>(x, __z, __gauss, 34);
} }
float _mean = 0; float _mean = 0;
float _stddev = 1; float _stddev = 1; // not needed as _reciprokeSD holds same info?
float _reciprokeSD = 1;
}; };
// -- END OF FILE -- // -- END OF FILE --

View File

@ -16,7 +16,7 @@ Library for the Gauss probability math.
Gauss is an experimental Arduino library to approximate the probability that a value is Gauss is an experimental Arduino library to approximate the probability that a value is
smaller or larger than a given value. smaller or larger than a given value.
These under the premises of a Gaussian distribution with parameters **mean** and **stddev** These under the premises of a Gaussian distribution with parameters **mean** and **stddev**
(a.k.a. average / mu and standard deviation / sigma). (a.k.a. average / mu / µ and standard deviation / sigma / σ).
If these parameters are not given, 0 and 1 are used by default (normalized Gaussian distribution). If these parameters are not given, 0 and 1 are used by default (normalized Gaussian distribution).
The values are approximated with **MultiMap()** using a 32 points interpolated lookup. The values are approximated with **MultiMap()** using a 32 points interpolated lookup.
@ -28,12 +28,14 @@ Return values are given as floats, if one needs percentages, just multiply by 10
#### Accuracy #### Accuracy
The lookup table used has 32 points with 4 significant digits. The lookup table has 34 points with 8 decimals.
Do not expect a higher accuracy / precision. This matches the precision of float data type.
Do not expect a very high accuracy / precision as interpolation is linear.
For many applications this accuracy is sufficient. For many applications this accuracy is sufficient.
(links to a table with more significant digits is welcome). Values of the table are calculated with ```NORM.DIST(mean, stddev, x, true)```.
Note: 0.1.0 was 32 points 4 decimals. Need to investigate reduction of points.
#### Applications #### Applications
@ -42,9 +44,20 @@ For many applications this accuracy is sufficient.
- compare population data with individual - compare population data with individual
#### Character
| parameter | name | ALT-code | char |
|:-----------:|:------:|:----------:|:-----:|
| mean | mu | ALT-230 | µ |
| stddev | sigma | ALT-229 | σ |
- https://altcodesguru.com/greek-alt-codes.html
#### Related #### Related
- https://en.wikipedia.org/wiki/Normal_distribution - https://en.wikipedia.org/wiki/Normal_distribution
- https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_probability/bs704_probability9.html
- https://github.com/RobTillaart/Multimap - https://github.com/RobTillaart/Multimap
- https://github.com/RobTillaart/Statistic (more stat links there). - https://github.com/RobTillaart/Statistic (more stat links there).
@ -59,19 +72,32 @@ For many applications this accuracy is sufficient.
#### Base #### Base
- **Gauss()** constructor. Uses mean = 0 and stddev = 1 by default. - **Gauss()** constructor. Uses mean = 0 and stddev = 1 by default.
- **bool begin(float mean, float stddev)** set the mean and stddev. - **bool begin(float mean = 0, float stddev = 1)** set the mean and stddev.
Returns true on success. If needed stddev is made positive. Returns true if stddev > 0 which should be so.
Returns false if stddev <= 0, which could be a user choice.
Note that if ```stddev == 0```, probabilities cannot be calculated
as the distribution is not Gaussian.
The default values (0,1) gives the normalized Gaussian distribution.
**begin()** can be called at any time to change the mean or stddev.
- **float getMean()** returns current mean.
- **float getStddev()** returns current stddev.
#### Probability #### Probability
Probability functions return NAN if stddev == 0.
- **float P_smaller(float f)** returns probability **P(x < f)**. - **float P_smaller(float f)** returns probability **P(x < f)**.
Multiply by 100.0 to get the value as a percentage. Multiply by 100.0 to get the value as a percentage.
A.k.a. **CDF()** Cumulative Distribution Function.
- **float P_larger(float f)** returns probability **P(x > f)**. - **float P_larger(float f)** returns probability **P(x > f)**.
Multiply by 100.0 to get the value as a percentage. Multiply by 100.0 to get the value as a percentage.
As the distribution is continuous **P_larger(f) == 1 - P_smaller(f)**.
- **float P_between(float f, float g)** returns probability **P(f < x < g)**. - **float P_between(float f, float g)** returns probability **P(f < x < g)**.
Multiply by 100.0 to get the value as a percentage. Multiply by 100.0 to get the value as a percentage.
- **float P_equal(float f)** returns probability **P(x == f)**. - **float P_equal(float f)** returns probability **P(x == f)**.
This is the bell curve formula. This uses the bell curve formula.
#### Other #### Other
@ -82,43 +108,74 @@ E.g if mean == 50 and stddev == 14, then 71 ==> +1.5 sigma.
- **float bellCurve(float f)** returns probability **P(x == f)**. - **float bellCurve(float f)** returns probability **P(x == f)**.
## Performance
Indicative numbers for 1000 calls, timing in micros.
Arduino UNO, 16 MHz, IDE 1.8.19
| function | 0.1.0 | 0.1.1 | notes |
|:--------------|:--------:|:--------:|:--------|
| P_smaller | 375396 | 365964 |
| P_larger | 384368 | 375032 |
| P_between | 265624 | 269176 |
| normalize | 44172 | 23024 |
| bellCurve | 255728 | 205460 |
| approx.bell | 764028 | 719184 | see examples
ESP32, 240 MHz, IDE 1.8.19
| function | 0.1.0 | 0.1.1 | notes |
|:--------------|:--------:|:--------:|:--------|
| P_smaller | - | 4046 |
| P_larger | - | 4043 |
| P_between | - | 3023 |
| normalize | - | 592 |
| bellCurve | - | 13522 |
| approx.bell | - | 7300 |
## Future ## Future
#### Must #### Must
- documentation - documentation
- mu + sigma character
- unit tests
#### Should #### Should
- optimize performance
- remove division by stddev
- optimize accuracy - optimize accuracy
- revisit lookup of MultiMap - revisit lookup of MultiMap
- (-10 .. 0) might be more accurate (significant digits)? - (-10 .. 0) might be more accurate (significant digits)?
- double instead of floats? (good table?) - double instead of floats? (good table?)
- make use of equidistant \_\_z\[] table
#### Could #### Could
- **void setMean(float f)**
- **float getMean()**
- **void setStddev(float f)**
- **float getStddev()**
- default values for **begin(0,1)**
- add examples - add examples
- e.g. temperature (DS18B20 or DHT22) - e.g. temperature (DS18B20 or DHT22)
- e.g. loadcell (HX711) - e.g. loadcell (HX711)
- does the stddev needs to be positive,
- what happens if negative values are allowed?
- equality test Gauss objects
- move code to .cpp file? (rather small lib).
- embed MultiMap hardcoded instead of library dependency - embed MultiMap hardcoded instead of library dependency
- **bellCurve()** => **Z()**? - add unit tests
- remove **\_stddev** as **\_reciprokeSD** holds same information.
- reverse normalization
- G(100,25) which value has stddev 0.735?
- **VAL(probability = 0.75)** ==> 134 whatever
- Returns the value of the distribution for which the **CDF()** is at least probability.
- Inverse of **P_smaller()**
- **float P_outside(float f, float g)** returns probability **P(x < f) + P(g < x)**.
- assuming no overlap. Use **P_outside() = 1 - P_between()**
#### Won't (unless requested) #### Won't (unless requested)
- equality test Gauss objects
- does the stddev needs to be positive? Yes.
- what happens if negative values are allowed? P curve is reversed.
- move code to .cpp file? (rather small lib).
- **void setMean(float f)** can be done with begin()
- **void setStddev(float f)** can be done with begin()

View File

@ -20,7 +20,7 @@ void setup(void)
Serial.print("GAUSS_LIB_VERSION: "); Serial.print("GAUSS_LIB_VERSION: ");
Serial.println(GAUSS_LIB_VERSION); Serial.println(GAUSS_LIB_VERSION);
Serial.println(); Serial.println();
Serial.println("Timing in micros"); Serial.println("Timing in micros (1000 calls)");
Serial.println(); Serial.println();
test_1(); test_1();

View File

@ -0,0 +1,16 @@
Arduino UNO
IDE 1.8.19
Gauss_performance.ino
GAUSS_LIB_VERSION: 0.1.1
Timing in micros (1000 calls)
P_smaller: 365964
P_larger: 375032
P_between: 269176
normalize: 23024
bellCurve: 205460
approx.bell: 719184
done...

View File

@ -17,9 +17,9 @@ void setup(void)
Serial.println(); Serial.println();
test_1(); test_1();
test_2(); // test_2();
test_3(); // test_3();
test_4(); // test_4();
Serial.println("\ndone..."); Serial.println("\ndone...");
} }

View File

@ -6,6 +6,8 @@ Gauss KEYWORD1
# Methods and Functions (KEYWORD2) # Methods and Functions (KEYWORD2)
begin KEYWORD2 begin KEYWORD2
getMean KEYWORD2
getStdDev KEYWORD2
P_smaller KEYWORD2 P_smaller KEYWORD2
P_larger KEYWORD2 P_larger KEYWORD2

View File

@ -23,7 +23,7 @@
"version": "^0.1.7" "version": "^0.1.7"
} }
], ],
"version": "0.1.0", "version": "0.1.1",
"license": "MIT", "license": "MIT",
"frameworks": "arduino", "frameworks": "arduino",
"platforms": "*", "platforms": "*",

View File

@ -1,9 +1,9 @@
name=Gauss name=Gauss
version=0.1.0 version=0.1.1
author=Rob Tillaart <rob.tillaart@gmail.com> author=Rob Tillaart <rob.tillaart@gmail.com>
maintainer=Rob Tillaart <rob.tillaart@gmail.com> maintainer=Rob Tillaart <rob.tillaart@gmail.com>
sentence=Library for the Gauss probability math. sentence=Library for the Gauss probability math.
paragraph= paragraph=normal distribution.
category=Data Processing category=Data Processing
url=https://github.com/RobTillaart/Gauss url=https://github.com/RobTillaart/Gauss
architectures=* architectures=*

View File

@ -41,9 +41,13 @@ unittest(test_constructor)
{ {
Gauss G; Gauss G;
G.begin(0, 1); assertEqualFloat(0.0, G.getMean(), 0.0001);
assertEqualFloat(1.0, G.getStdDev(), 0.0001);
assertEqualFloat(0.5, G.P_smaller(0), 0.0001);
assertEqualFloat(0.5, G.P_smaller(0), 0.001); G.begin(10, 3);
assertEqualFloat(10.0, G.getMean(), 0.0001);
assertEqualFloat(3.0, G.getStdDev(), 0.0001);
} }
@ -53,13 +57,17 @@ unittest(test_P_smaller)
G.begin(0, 1); G.begin(0, 1);
assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.001); assertEqualFloat(0.0000, G.P_smaller(-6.0), 0.0001);
assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.001); assertEqualFloat(0.0001, G.P_smaller(-4.0), 0.0001);
assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.001); assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.0001);
assertEqualFloat(0.5000, G.P_smaller(0.0), 0.001); assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.0001);
assertEqualFloat(0.8413, G.P_smaller(1.0), 0.001); assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.0001);
assertEqualFloat(0.9772, G.P_smaller(2.0), 0.001); assertEqualFloat(0.5000, G.P_smaller(0.0), 0.0001);
assertEqualFloat(0.9987, G.P_smaller(3.0), 0.001); assertEqualFloat(0.8413, G.P_smaller(1.0), 0.0001);
assertEqualFloat(0.9772, G.P_smaller(2.0), 0.0001);
assertEqualFloat(0.9987, G.P_smaller(3.0), 0.0001);
assertEqualFloat(0.9999, G.P_smaller(4.0), 0.0001);
assertEqualFloat(1.0000, G.P_smaller(6.0), 0.0001);
} }
@ -69,13 +77,13 @@ unittest(test_P_larger)
G.begin(0, 1); G.begin(0, 1);
assertEqualFloat(0.9987, G.P_larger(-3.0), 0.001); assertEqualFloat(0.9987, G.P_larger(-3.0), 0.0001);
assertEqualFloat(0.9772, G.P_larger(-2.0), 0.001); assertEqualFloat(0.9772, G.P_larger(-2.0), 0.0001);
assertEqualFloat(0.8413, G.P_larger(-1.0), 0.001); assertEqualFloat(0.8413, G.P_larger(-1.0), 0.0001);
assertEqualFloat(0.5000, G.P_larger(0.0), 0.001); assertEqualFloat(0.5000, G.P_larger(0.0), 0.0001);
assertEqualFloat(0.1587, G.P_larger(1.0), 0.001); assertEqualFloat(0.1587, G.P_larger(1.0), 0.0001);
assertEqualFloat(0.0228, G.P_larger(2.0), 0.001); assertEqualFloat(0.0228, G.P_larger(2.0), 0.0001);
assertEqualFloat(0.0013, G.P_larger(3.0), 0.001); assertEqualFloat(0.0013, G.P_larger(3.0), 0.0001);
} }
@ -85,13 +93,13 @@ unittest(test_P_between)
G.begin(0, 1); G.begin(0, 1);
assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.001); assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.0001);
assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.001); assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.0001);
assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.001); assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.0001);
assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.001); assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.0001);
assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.001); assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.0001);
assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.001); assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.0001);
assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.001); assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.0001);
} }
@ -101,13 +109,29 @@ unittest(test_P_equal)
G.begin(0, 1); G.begin(0, 1);
assertEqualFloat(0.004432, G.P_equal(-3.0), 0.001); assertEqualFloat(0.004432, G.P_equal(-3.0), 0.0001);
assertEqualFloat(0.053991, G.P_equal(-2.0), 0.001); assertEqualFloat(0.053991, G.P_equal(-2.0), 0.0001);
assertEqualFloat(0.241971, G.P_equal(-1.0), 0.001); assertEqualFloat(0.241971, G.P_equal(-1.0), 0.0001);
assertEqualFloat(0.398942, G.P_equal(0.0), 0.001); assertEqualFloat(0.398942, G.P_equal(0.0), 0.0001);
assertEqualFloat(0.241971, G.P_equal(1.0), 0.001); assertEqualFloat(0.241971, G.P_equal(1.0), 0.0001);
assertEqualFloat(0.053991, G.P_equal(2.0), 0.001); assertEqualFloat(0.053991, G.P_equal(2.0), 0.0001);
assertEqualFloat(0.004432, G.P_equal(3.0), 0.001); assertEqualFloat(0.004432, G.P_equal(3.0), 0.0001);
}
unittest(test_normailze)
{
Gauss G;
G.begin(100, 25);
assertEqualFloat(-3.0, G.normalize(25), 0.0001);
assertEqualFloat(-2.0, G.normalize(50), 0.0001);
assertEqualFloat(-1.0, G.normalize(75), 0.0001);
assertEqualFloat(0.0, G.normalize(100), 0.0001);
assertEqualFloat(1.0, G.normalize(125), 0.0001);
assertEqualFloat(2.0, G.normalize(150), 0.0001);
assertEqualFloat(3.0, G.normalize(175), 0.0001);
} }
@ -115,3 +139,4 @@ unittest_main()
// -- END OF FILE -- // -- END OF FILE --