0.1.1 Gauss

This commit is contained in:
Rob Tillaart 2023-07-08 14:23:27 +02:00
parent 8270df9d5a
commit 33e70b036d
11 changed files with 224 additions and 82 deletions

View File

@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [0.1.1] - 2023-07-07
- improve performance => reciprokeSD = 1.0/stddev
- update readme.md
- add performance section (UNO / ESP32)
- elaborated future section
- generated a more precise lookup table (8 decimals)
- update unit tests
- add default parameters to **bool begin(float mean = 0, float stddev = 1)**
- allow negative **stddev** but return false if stddev <= 0.
- add **float getMean()** convenience function.
- add **float getStdDev()** convenience function.
- clean up a bit
## [0.1.0] - 2023-07-06
- initial version

View File

@ -1,7 +1,7 @@
//
// FILE: Gauss.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.1.0
// VERSION: 0.1.1
// PURPOSE: Library for the Gauss probability math.
// DATE: 2023-07-06

View File

@ -2,7 +2,7 @@
//
// FILE: Gauss.h
// AUTHOR: Rob Tillaart
// VERSION: 0.1.0
// VERSION: 0.1.1
// PURPOSE: Library for the Gauss probability math.
// DATE: 2023-07-06
@ -10,7 +10,7 @@
#include "Arduino.h"
#include "MultiMap.h"
#define GAUSS_LIB_VERSION (F("0.1.0"))
#define GAUSS_LIB_VERSION (F("0.1.1"))
class Gauss
@ -20,34 +20,48 @@ public:
{
_mean = 0;
_stddev = 1;
_reciprokeSD = 1;
}
bool begin(float mean, float stddev)
// stddev should be positive.
bool begin(float mean = 0, float stddev = 1)
{
_mean = mean;
_stddev = abs(stddev);
return true;
_stddev = stddev; // should be positive
_reciprokeSD = 1.0 / _stddev;
return (stddev > 0);
}
float getMean()
{
return _mean;
}
float getStdDev()
{
return _stddev;
}
float P_smaller(float value)
{
if (_stddev == 0) return NAN;
return _P_smaller((value - _mean) / _stddev);
// normalize(value)
return _P_smaller((value - _mean) * _reciprokeSD);
}
float P_larger(float value)
{
return 1.0 - P_smaller(value);
// if (_stddev == 0) return NAN;
// optimize math division?
// return _P_larger((value - _mean) / _stddev);
}
float P_between(float p, float q)
{
if (_stddev == 0) return NAN;
if (p >= q) return 0;
return P_smaller(q) - P_smaller(p);
}
@ -56,8 +70,8 @@ public:
float P_equal(float value)
{
if (_stddev == 0) return NAN;
float n = (value - _mean)/_stddev;
float c = 1.0 / (_stddev * sqrt(TWO_PI));
float n = (value - _mean) * _reciprokeSD;
float c = _reciprokeSD * (1.0 / sqrt(TWO_PI));
return c * exp(-0.5 * n * n);
}
@ -70,7 +84,7 @@ public:
float normalize(float value)
{
return (value - _mean)/_stddev;
return (value - _mean) * _reciprokeSD;
}
@ -85,27 +99,40 @@ private:
float _P_smaller(float x)
{
// TODO improve accuracy or reduce points.
// NORM.DIST(mean, stddev, x, true)
float __gauss[] = {
0.5000, 0.5398, 0.5793, 0.6179, 0.6554, 0.6915, 0.7257, 0.7580,
0.7881, 0.8159, 0.8413, 0.8643, 0.8849, 0.9032, 0.9192, 0.9332,
0.9452, 0.9554, 0.9641, 0.9713, 0.9772, 0.9821, 0.9861, 0.9893,
0.9918, 0.9938, 0.9953, 0.9965, 0.9974, 0.9981, 0.9987, 1.0000
0.50000000, 0.53982784, 0.57925971, 0.61791142,
0.65542174, 0.69146246, 0.72574688, 0.75803635,
0.78814460, 0.81593987, 0.84134475, 0.86433394,
0.88493033, 0.90319952, 0.91924334, 0.93319280,
0.94520071, 0.95543454, 0.96406968, 0.97128344,
0.97724987, 0.98213558, 0.98609655, 0.98927589,
0.99180246, 0.99379033, 0.99533881, 0.99653303,
0.99744487, 0.99813419, 0.99865010, 0.99996833,
0.99999971, 1.00000000
};
// 0..60000 uint16_t = 68 bytes less
float __z[] = {
0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5,
1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3,
2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 10.0
2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 4,0,
5.0, 6.0
};
if (x < 0) return 1.0 - multiMap<float>(-x, __z, __gauss, 32);
return multiMap<float>(x, __z, __gauss, 32);
// a dedicated MultiMap could exploit the fact that
// the __z[] array is largely equidistant.
// that could remove the __z[] array (almost) completely.
if (x < 0) return 1.0 - multiMap<float>(-x, __z, __gauss, 34);
return multiMap<float>(x, __z, __gauss, 34);
}
float _mean = 0;
float _stddev = 1;
float _stddev = 1; // not needed as _reciprokeSD holds same info?
float _reciprokeSD = 1;
};
// -- END OF FILE --

View File

@ -16,7 +16,7 @@ Library for the Gauss probability math.
Gauss is an experimental Arduino library to approximate the probability that a value is
smaller or larger than a given value.
These under the premises of a Gaussian distribution with parameters **mean** and **stddev**
(a.k.a. average / mu and standard deviation / sigma).
(a.k.a. average / mu / µ and standard deviation / sigma / σ).
If these parameters are not given, 0 and 1 are used by default (normalized Gaussian distribution).
The values are approximated with **MultiMap()** using a 32 points interpolated lookup.
@ -28,12 +28,14 @@ Return values are given as floats, if one needs percentages, just multiply by 10
#### Accuracy
The lookup table used has 32 points with 4 significant digits.
Do not expect a higher accuracy / precision.
The lookup table has 34 points with 8 decimals.
This matches the precision of float data type.
Do not expect a very high accuracy / precision as interpolation is linear.
For many applications this accuracy is sufficient.
(links to a table with more significant digits is welcome).
Values of the table are calculated with ```NORM.DIST(mean, stddev, x, true)```.
Note: 0.1.0 was 32 points 4 decimals. Need to investigate reduction of points.
#### Applications
@ -42,9 +44,20 @@ For many applications this accuracy is sufficient.
- compare population data with individual
#### Character
| parameter | name | ALT-code | char |
|:-----------:|:------:|:----------:|:-----:|
| mean | mu | ALT-230 | µ |
| stddev | sigma | ALT-229 | σ |
- https://altcodesguru.com/greek-alt-codes.html
#### Related
- https://en.wikipedia.org/wiki/Normal_distribution
- https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_probability/bs704_probability9.html
- https://github.com/RobTillaart/Multimap
- https://github.com/RobTillaart/Statistic (more stat links there).
@ -59,19 +72,32 @@ For many applications this accuracy is sufficient.
#### Base
- **Gauss()** constructor. Uses mean = 0 and stddev = 1 by default.
- **bool begin(float mean, float stddev)** set the mean and stddev.
Returns true on success. If needed stddev is made positive.
- **bool begin(float mean = 0, float stddev = 1)** set the mean and stddev.
Returns true if stddev > 0 which should be so.
Returns false if stddev <= 0, which could be a user choice.
Note that if ```stddev == 0```, probabilities cannot be calculated
as the distribution is not Gaussian.
The default values (0,1) gives the normalized Gaussian distribution.
**begin()** can be called at any time to change the mean or stddev.
- **float getMean()** returns current mean.
- **float getStddev()** returns current stddev.
#### Probability
Probability functions return NAN if stddev == 0.
- **float P_smaller(float f)** returns probability **P(x < f)**.
Multiply by 100.0 to get the value as a percentage.
A.k.a. **CDF()** Cumulative Distribution Function.
- **float P_larger(float f)** returns probability **P(x > f)**.
Multiply by 100.0 to get the value as a percentage.
As the distribution is continuous **P_larger(f) == 1 - P_smaller(f)**.
- **float P_between(float f, float g)** returns probability **P(f < x < g)**.
Multiply by 100.0 to get the value as a percentage.
- **float P_equal(float f)** returns probability **P(x == f)**.
This is the bell curve formula.
This uses the bell curve formula.
#### Other
@ -82,43 +108,74 @@ E.g if mean == 50 and stddev == 14, then 71 ==> +1.5 sigma.
- **float bellCurve(float f)** returns probability **P(x == f)**.
## Performance
Indicative numbers for 1000 calls, timing in micros.
Arduino UNO, 16 MHz, IDE 1.8.19
| function | 0.1.0 | 0.1.1 | notes |
|:--------------|:--------:|:--------:|:--------|
| P_smaller | 375396 | 365964 |
| P_larger | 384368 | 375032 |
| P_between | 265624 | 269176 |
| normalize | 44172 | 23024 |
| bellCurve | 255728 | 205460 |
| approx.bell | 764028 | 719184 | see examples
ESP32, 240 MHz, IDE 1.8.19
| function | 0.1.0 | 0.1.1 | notes |
|:--------------|:--------:|:--------:|:--------|
| P_smaller | - | 4046 |
| P_larger | - | 4043 |
| P_between | - | 3023 |
| normalize | - | 592 |
| bellCurve | - | 13522 |
| approx.bell | - | 7300 |
## Future
#### Must
- documentation
- mu + sigma character
- unit tests
#### Should
- optimize performance
- remove division by stddev
- optimize accuracy
- revisit lookup of MultiMap
- (-10 .. 0) might be more accurate (significant digits)?
- double instead of floats? (good table?)
- make use of equidistant \_\_z\[] table
#### Could
- **void setMean(float f)**
- **float getMean()**
- **void setStddev(float f)**
- **float getStddev()**
- default values for **begin(0,1)**
- add examples
- e.g. temperature (DS18B20 or DHT22)
- e.g. loadcell (HX711)
- does the stddev needs to be positive,
- what happens if negative values are allowed?
- equality test Gauss objects
- move code to .cpp file? (rather small lib).
- embed MultiMap hardcoded instead of library dependency
- **bellCurve()** => **Z()**?
- add unit tests
- remove **\_stddev** as **\_reciprokeSD** holds same information.
- reverse normalization
- G(100,25) which value has stddev 0.735?
- **VAL(probability = 0.75)** ==> 134 whatever
- Returns the value of the distribution for which the **CDF()** is at least probability.
- Inverse of **P_smaller()**
- **float P_outside(float f, float g)** returns probability **P(x < f) + P(g < x)**.
- assuming no overlap. Use **P_outside() = 1 - P_between()**
#### Won't (unless requested)
- equality test Gauss objects
- does the stddev needs to be positive? Yes.
- what happens if negative values are allowed? P curve is reversed.
- move code to .cpp file? (rather small lib).
- **void setMean(float f)** can be done with begin()
- **void setStddev(float f)** can be done with begin()

View File

@ -20,7 +20,7 @@ void setup(void)
Serial.print("GAUSS_LIB_VERSION: ");
Serial.println(GAUSS_LIB_VERSION);
Serial.println();
Serial.println("Timing in micros");
Serial.println("Timing in micros (1000 calls)");
Serial.println();
test_1();

View File

@ -0,0 +1,16 @@
Arduino UNO
IDE 1.8.19
Gauss_performance.ino
GAUSS_LIB_VERSION: 0.1.1
Timing in micros (1000 calls)
P_smaller: 365964
P_larger: 375032
P_between: 269176
normalize: 23024
bellCurve: 205460
approx.bell: 719184
done...

View File

@ -17,9 +17,9 @@ void setup(void)
Serial.println();
test_1();
test_2();
test_3();
test_4();
// test_2();
// test_3();
// test_4();
Serial.println("\ndone...");
}

View File

@ -6,6 +6,8 @@ Gauss KEYWORD1
# Methods and Functions (KEYWORD2)
begin KEYWORD2
getMean KEYWORD2
getStdDev KEYWORD2
P_smaller KEYWORD2
P_larger KEYWORD2

View File

@ -23,7 +23,7 @@
"version": "^0.1.7"
}
],
"version": "0.1.0",
"version": "0.1.1",
"license": "MIT",
"frameworks": "arduino",
"platforms": "*",

View File

@ -1,9 +1,9 @@
name=Gauss
version=0.1.0
version=0.1.1
author=Rob Tillaart <rob.tillaart@gmail.com>
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
sentence=Library for the Gauss probability math.
paragraph=
paragraph=normal distribution.
category=Data Processing
url=https://github.com/RobTillaart/Gauss
architectures=*

View File

@ -41,9 +41,13 @@ unittest(test_constructor)
{
Gauss G;
G.begin(0, 1);
assertEqualFloat(0.0, G.getMean(), 0.0001);
assertEqualFloat(1.0, G.getStdDev(), 0.0001);
assertEqualFloat(0.5, G.P_smaller(0), 0.0001);
assertEqualFloat(0.5, G.P_smaller(0), 0.001);
G.begin(10, 3);
assertEqualFloat(10.0, G.getMean(), 0.0001);
assertEqualFloat(3.0, G.getStdDev(), 0.0001);
}
@ -53,13 +57,17 @@ unittest(test_P_smaller)
G.begin(0, 1);
assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.001);
assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.001);
assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.001);
assertEqualFloat(0.5000, G.P_smaller(0.0), 0.001);
assertEqualFloat(0.8413, G.P_smaller(1.0), 0.001);
assertEqualFloat(0.9772, G.P_smaller(2.0), 0.001);
assertEqualFloat(0.9987, G.P_smaller(3.0), 0.001);
assertEqualFloat(0.0000, G.P_smaller(-6.0), 0.0001);
assertEqualFloat(0.0001, G.P_smaller(-4.0), 0.0001);
assertEqualFloat(0.0013, G.P_smaller(-3.0), 0.0001);
assertEqualFloat(0.0228, G.P_smaller(-2.0), 0.0001);
assertEqualFloat(0.1587, G.P_smaller(-1.0), 0.0001);
assertEqualFloat(0.5000, G.P_smaller(0.0), 0.0001);
assertEqualFloat(0.8413, G.P_smaller(1.0), 0.0001);
assertEqualFloat(0.9772, G.P_smaller(2.0), 0.0001);
assertEqualFloat(0.9987, G.P_smaller(3.0), 0.0001);
assertEqualFloat(0.9999, G.P_smaller(4.0), 0.0001);
assertEqualFloat(1.0000, G.P_smaller(6.0), 0.0001);
}
@ -69,13 +77,13 @@ unittest(test_P_larger)
G.begin(0, 1);
assertEqualFloat(0.9987, G.P_larger(-3.0), 0.001);
assertEqualFloat(0.9772, G.P_larger(-2.0), 0.001);
assertEqualFloat(0.8413, G.P_larger(-1.0), 0.001);
assertEqualFloat(0.5000, G.P_larger(0.0), 0.001);
assertEqualFloat(0.1587, G.P_larger(1.0), 0.001);
assertEqualFloat(0.0228, G.P_larger(2.0), 0.001);
assertEqualFloat(0.0013, G.P_larger(3.0), 0.001);
assertEqualFloat(0.9987, G.P_larger(-3.0), 0.0001);
assertEqualFloat(0.9772, G.P_larger(-2.0), 0.0001);
assertEqualFloat(0.8413, G.P_larger(-1.0), 0.0001);
assertEqualFloat(0.5000, G.P_larger(0.0), 0.0001);
assertEqualFloat(0.1587, G.P_larger(1.0), 0.0001);
assertEqualFloat(0.0228, G.P_larger(2.0), 0.0001);
assertEqualFloat(0.0013, G.P_larger(3.0), 0.0001);
}
@ -85,13 +93,13 @@ unittest(test_P_between)
G.begin(0, 1);
assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.001);
assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.001);
assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.001);
assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.001);
assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.001);
assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.001);
assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.001);
assertEqualFloat(0.4987, G.P_between(-3.0, 0.0), 0.0001);
assertEqualFloat(0.4772, G.P_between(-2.0, 0.0), 0.0001);
assertEqualFloat(0.3413, G.P_between(-1.0, 0.0), 0.0001);
assertEqualFloat(0.0000, G.P_between(0.0, 0.0), 0.0001);
assertEqualFloat(0.3413, G.P_between(0.0, 1.0), 0.0001);
assertEqualFloat(0.4772, G.P_between(0.0, 2.0), 0.0001);
assertEqualFloat(0.4987, G.P_between(0.0, 3.0), 0.0001);
}
@ -101,13 +109,29 @@ unittest(test_P_equal)
G.begin(0, 1);
assertEqualFloat(0.004432, G.P_equal(-3.0), 0.001);
assertEqualFloat(0.053991, G.P_equal(-2.0), 0.001);
assertEqualFloat(0.241971, G.P_equal(-1.0), 0.001);
assertEqualFloat(0.398942, G.P_equal(0.0), 0.001);
assertEqualFloat(0.241971, G.P_equal(1.0), 0.001);
assertEqualFloat(0.053991, G.P_equal(2.0), 0.001);
assertEqualFloat(0.004432, G.P_equal(3.0), 0.001);
assertEqualFloat(0.004432, G.P_equal(-3.0), 0.0001);
assertEqualFloat(0.053991, G.P_equal(-2.0), 0.0001);
assertEqualFloat(0.241971, G.P_equal(-1.0), 0.0001);
assertEqualFloat(0.398942, G.P_equal(0.0), 0.0001);
assertEqualFloat(0.241971, G.P_equal(1.0), 0.0001);
assertEqualFloat(0.053991, G.P_equal(2.0), 0.0001);
assertEqualFloat(0.004432, G.P_equal(3.0), 0.0001);
}
unittest(test_normailze)
{
Gauss G;
G.begin(100, 25);
assertEqualFloat(-3.0, G.normalize(25), 0.0001);
assertEqualFloat(-2.0, G.normalize(50), 0.0001);
assertEqualFloat(-1.0, G.normalize(75), 0.0001);
assertEqualFloat(0.0, G.normalize(100), 0.0001);
assertEqualFloat(1.0, G.normalize(125), 0.0001);
assertEqualFloat(2.0, G.normalize(150), 0.0001);
assertEqualFloat(3.0, G.normalize(175), 0.0001);
}
@ -115,3 +139,4 @@ unittest_main()
// -- END OF FILE --