0.3.4 Histogram

This commit is contained in:
Rob Tillaart 2023-07-24 12:51:25 +02:00
parent 4734e016e2
commit 3a488920d6
8 changed files with 279 additions and 85 deletions

View File

@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [0.3.4] - 2023-06-30
- change return type **uint8_t clear()**, return status bucket
- change return type **uint8_t setBucket(value)**, return status bucket
- change return type **uint8_t add(value)**, return status bucket
- change return type **uint8_t sub(value)**, return status bucket
- add **uint8_t status()** last known status.
- add **int32_t sum()**, adds up all buckets.
- made **setBucket()** virtual.
- update / clean up readme.md
- minor improvements derived classes - still experimental
## [0.3.3] - 2023-02-21
- optimize loops
- update readme.md
@ -13,7 +25,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- update license 2023
- minor edits
## [0.3.2] - 2022-11-09
- add changelog.md
- add rp2040 to build-CI

View File

@ -29,14 +29,14 @@ void setup()
Serial.print("# buckets: ");
Serial.println(hist.size());
for (uint16_t i = 0; i < hist.size()-1; i++)
for (uint16_t i = 0; i < hist.size() - 1; i++)
{
Serial.print("\t");
Serial.print(b[i], 2);
}
Serial.println();
for (uint16_t i = 0; i < hist.size()-1; i++)
for (uint16_t i = 0; i < hist.size() - 1; i++)
{
Serial.print("\t");
Serial.print(hist.find(b[i]));
@ -59,7 +59,13 @@ void loop()
// Serial.print(x);
// Serial.print("\t");
// Serial.println(hist.find(x));
hist.add(x);
if (hist.add(x) == false)
{
Serial.print("ERR: \t");
Serial.print(x);
Serial.print("\t");
Serial.println(hist.find(x));
}
// update output
uint32_t now = millis();

View File

@ -14,7 +14,7 @@ float bounds[] = { 0, 100, 200, 300, 325, 350, 375, 400, 500, 600, 700, 800, 900
Histogram hist(14, bounds);
uint32_t lastTime = 0;
const uint32_t threshold = 1000; // milliseconds, for updating display
const uint32_t threshold = 5000; // milliseconds, for updating display
void setup()
@ -43,7 +43,7 @@ void loop()
{
lastTime = now;
for (int i = 0; i < hist.size(); i++)
for (uint16_t i = 0; i < hist.size(); i++)
{
Serial.print(i);
Serial.print("\t");

View File

@ -1,7 +1,7 @@
//
// FILE: Histogram.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.3.3
// VERSION: 0.3.4
// PURPOSE: Histogram library for Arduino
// DATE: 2012-11-10
@ -23,6 +23,8 @@ Histogram::Histogram(const uint16_t length, float *bounds)
_length = 0;
}
_count = 0;
_status = HISTO_OK;
_maxBucket = 2147483647;
}
@ -36,45 +38,75 @@ Histogram::~Histogram()
// resets all counters to value (default 0)
void Histogram::clear(int32_t value)
uint8_t Histogram::clear(int32_t value)
{
for (uint16_t i = 0; i < _length; i++)
{
_data[i] = value;
}
_count = 0;
_status = HISTO_OK;
if (value == _maxBucket) _status = HISTO_FULL;
return _status;
}
void Histogram::setBucket(const uint16_t index, int32_t value)
uint8_t Histogram::setBucket(const uint16_t index, int32_t value)
{
_data[index] = value;
_status = HISTO_OK;
if (value == _maxBucket) _status = HISTO_FULL;
return _status;
}
// adds a new value to the histogram - increasing
void Histogram::add(const float value)
uint8_t Histogram::add(const float value)
{
if (_length > 0)
if (_length == 0)
{
uint16_t index = find(value);
_data[index]++;
_count++;
_status = HISTO_ERR_LENGTH;
return _status;
}
// return index or count.
uint16_t index = find(value);
if (_data[index] == _maxBucket)
{
_status = HISTO_ERR_FULL;
return _status;
}
_data[index]++;
_count++;
_status = HISTO_OK;
if (_data[index] == _maxBucket) _status = HISTO_FULL;
return _status;
}
// adds a new value to the histogram - decreasing
void Histogram::sub(const float value)
uint8_t Histogram::sub(const float value)
{
if (_length > 0)
if (_length == 0)
{
uint16_t index = find(value);
_data[index]--;
_count++;
_status = HISTO_ERR_LENGTH;
return _status;
}
// return index or count.
uint16_t index = find(value);
if (_data[index] == -_maxBucket)
{
_status = HISTO_ERR_FULL;
return _status;
}
_data[index]--;
_count++;
_status = HISTO_OK;
if (_data[index] == _maxBucket) _status = HISTO_FULL;
return _status;
}
uint8_t Histogram::status()
{
return _status;
}
@ -165,6 +197,17 @@ float Histogram::VAL(const float probability)
}
int32_t Histogram::sum()
{
int32_t _sum = 0;
for (uint16_t i = 0; i < _length; i++)
{
_sum += _data[i];
}
return _sum;
}
// returns the bucket number for value
// - binary search, more memory ; faster for #buckets > 20 ?
// uint16_t Histogram::find(const float value)
@ -285,18 +328,93 @@ uint16_t Histogram::countBelow(const int32_t level)
}
//////////////////////////////////////////////////////////////
//
// DERIVED CLASS
//
Histogram16::Histogram16(const uint16_t length, float *bounds) : Histogram(length, bounds)
// experimental use with care
int32_t Histogram::getMaxBucket()
{
return _maxBucket;
}
void Histogram::setMaxBucket(int32_t value)
{
_maxBucket = value;
}
//////////////////////////////////////////////////////////////
//
// DERIVED CLASS - HISTOGRAM16
//
Histogram16::Histogram16(const uint16_t length, float *bounds) : Histogram(length, bounds)
{
_bounds = bounds;
_length = length + 1;
_data = (int16_t *) malloc((_length) * sizeof(int16_t));
if (_data != NULL)
{
clear();
}
else
{
_length = 0;
}
_count = 0;
_status = HISTO_OK;
_maxBucket = 32767;
}
Histogram16::~Histogram16()
{
if (_data) free(_data);
}
uint8_t Histogram16::setBucket(const uint16_t index, int16_t value)
{
_data[index] = value;
_status = HISTO_OK;
if (value == _maxBucket) _status = HISTO_FULL;
return _status;
}
//////////////////////////////////////////////////////////////
//
// DERIVED CLASS - HISTOGRAM8
//
Histogram8::Histogram8(const uint16_t length, float *bounds) : Histogram(length, bounds)
{
_bounds = bounds;
_length = length + 1;
_data = (int8_t *) malloc((_length) * sizeof(int8_t));
if (_data != NULL)
{
clear();
}
else
{
_length = 0;
}
_count = 0;
_status = HISTO_OK;
_maxBucket = 127;
}
Histogram8::~Histogram8()
{
if (_data) free(_data);
}
uint8_t Histogram8::setBucket(const uint16_t index, int8_t value)
{
_data[index] = value;
_status = HISTO_OK;
if (value == _maxBucket) _status = HISTO_FULL;
return _status;
}

View File

@ -2,26 +2,34 @@
//
// FILE: Histogram.h
// AUTHOR: Rob Tillaart
// VERSION: 0.3.3
// VERSION: 0.3.4
// PURPOSE: Histogram library for Arduino
// DATE: 2012-11-10
#include "Arduino.h"
#define HISTOGRAM_LIB_VERSION (F("0.3.3"))
#define HISTOGRAM_LIB_VERSION (F("0.3.4"))
// return STATUS add(), sub(), clear(), setBucket();
#define HISTO_OK 0x00 // idem
#define HISTO_FULL 0x01 // just got full
#define HISTO_ERR_FULL 0xFF // over- underflow
#define HISTO_ERR_LENGTH 0xFE // constructor issue.
class Histogram
{
public:
Histogram(const uint16_t length, float *bounds);
Histogram(const uint16_t length, float * bounds);
~Histogram();
void clear(int32_t value = 0);
void setBucket(const uint16_t index, int32_t value = 0);
void add(const float value);
void sub(const float value);
uint8_t clear(int32_t value = 0);
uint8_t add(const float value);
uint8_t sub(const float value);
virtual uint8_t setBucket(const uint16_t index, int32_t value = 0);
// returns last known status
uint8_t status();
// number of buckets
uint16_t size();
@ -36,6 +44,7 @@ public:
float PMF(const float value);
float CDF(const float value);
float VAL(const float probability);
int32_t sum();
uint16_t find(const float value);
uint16_t findMin();
@ -44,12 +53,18 @@ public:
uint16_t countAbove(const int32_t level);
uint16_t countBelow(const int32_t level);
// use with care
int32_t getMaxBucket();
void setMaxBucket(int32_t value);
protected:
float * _bounds;
int32_t * _data;
uint16_t _length;
uint32_t _count;
int32_t _maxBucket;
uint8_t _status;
};
@ -60,8 +75,11 @@ protected:
class Histogram16 : public Histogram
{
public:
Histogram16(const uint16_t length, float *bounds);
Histogram16(const uint16_t length, float * bounds);
~Histogram16();
uint8_t setBucket(const uint16_t index, int16_t value = 0);
protected:
int16_t * _data;
};
@ -70,8 +88,11 @@ protected:
class Histogram8 : public Histogram
{
public:
Histogram8(const uint16_t length, float *bounds);
Histogram8(const uint16_t length, float * bounds);
~Histogram8();
uint8_t setBucket(const uint16_t index, int8_t value = 0);
protected:
int8_t * _data;
};

View File

@ -15,7 +15,7 @@
"type": "git",
"url": "https://github.com/RobTillaart/Histogram.git"
},
"version": "0.3.3",
"version": "0.3.4",
"license": "MIT",
"frameworks": "arduino",
"platforms": "*",

View File

@ -1,5 +1,5 @@
name=Histogram
version=0.3.3
version=0.3.4
author=Rob Tillaart <rob.tillaart@gmail.com>
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
sentence=Arduino library for creating histograms math.

View File

@ -23,6 +23,18 @@ If you need more quantitative analysis, you might need the statistics library,
- https://github.com/RobTillaart/Statistic
#### Related
- https://github.com/RobTillaart/Correlation
- https://github.com/RobTillaart/GST - Golden standard test metrics
- https://github.com/RobTillaart/Histogram
- https://github.com/RobTillaart/RunningAngle
- https://github.com/RobTillaart/RunningAverage
- https://github.com/RobTillaart/RunningMedian
- https://github.com/RobTillaart/statHelpers - combinations & permutations
- https://github.com/RobTillaart/Statistic
#### Working
When the class is initialized an array of the boundaries to define the borders of the
@ -34,12 +46,13 @@ The values in the boundary array do not need to be equidistant (equal in size)
but they need to be in ascending order.
Internally the library does not record the individual values, only the count per bucket.
If a new value is added - **add()** or **sub()** - the class checks in which bucket it
If a new value is added - **add(value)** - the class checks in which bucket it
belongs and the buckets counter is increased.
The **sub()** function is used to decrease the count of a bucket and it can cause the
count to become below zero. Although seldom used but still depending on the application
it can be useful. E.g. when you want to compare two value generating streams, you let
The **sub(value)** function is used to decrease the count of a bucket and it can
cause the count to become below zero.
Although seldom used but still depending on the application it can be useful.
E.g. when you want to compare two value generating streams, you let
one stream **add()** and the other **sub()**. If the histogram of both streams is
similar they should cancel each other out (more or less), and the value of all buckets
should be around 0. \[not tried\].
@ -75,7 +88,7 @@ the experimental version.
#include "histogram.h"
```
### Constructor
#### Constructor
- **Histogram(uint16_t length, float \*bounds)** constructor, get an array of boundary values and array length.
Length should be less than 65534.
@ -86,19 +99,58 @@ Length should be less than 65534.
- **~Histogram16()** destructor.
### Base
#### maxBucket
Default the maxBucket size is defined as 255 (8 bit), 65535 (16 bit) or
2147483647 (32 bit) depending on class used.
The functions below allow to set and get the maxBucket so the **add()** and
**sub()** function will reach **FULL** faster.
Useful in some applications e.g. games.
- **void setMaxBucket(uint32_t value)** to have a user defined maxBucket level e.g 25
- **uint32_t getMaxBucket()** returns the current maxBucket.
Please note it makes no sense to set maxBucket to a value larger than
the histogram type can handle.
Setting maxBucket to 300 for **Histogram8** will always fail as data can only
handle values between 0 .. 255.
#### Base
- **uint8_t clear(float value = 0)** reset all bucket counters to value (default 0).
Returns status, see below.
- **uint8_t setBucket(const uint16_t index, int32_t value = 0)** store / overwrite a value of bucket.
Returns status, see below.
- **uint8_t add(float value)** add a value, increase count of bucket.
Returns status, see below.
- **uint8_t sub(float value)** 'add' a value, decrease (subtract) count of bucket.
This is less used and has some side effects, see **frequency()**.
Returns status, see below.
| Status | Value | Description |
|:------------------:|:-------:|:------------:|
| HISTO_OK | 0x00 | all is well
| HISTO_FULL | 0x01 | add() / sub() caused bucket full ( + or - )
| HISTO_ERR_FULL | 0xFF | cannot add() / sub(), overflow / underflow
| HISTO_ERR_LENGTH | 0xFE | length = 0 error (constructor)
- **void clear(float value = 0)** reset all bucket counters to value (default 0).
- **void setBucket(const uint16_t index, int32_t value = 0)** store / overwrite a value of bucket.
- **void add(float value)** add a value, increase count of bucket.
- **void sub(float value)** 'add' a value, but decrease count (subtract).
- **uint16_t size()** returns number of buckets.
- **uint32_t count()** returns total number of values added (or subtracted).
- **int32_t bucket(uint16_t index)** returns the count of single bucket, can be negative due to **sub()**
- **float frequency(uint16_t index)** returns the relative frequency of a bucket, always between 0.0 and 1.0.
- **int32_t bucket(uint16_t index)** returns the count of single bucket.
Can be negative if one uses **sub()**
- **float frequency(uint16_t index)** returns the relative frequency of a bucket.
This is always between -1.0 and 1.0.
Some notes about **frequency()**
- can return a negative value if an application uses **sub()**
- sum of all buckets will not add up to 1.0 if one uses **sub()**
- value (and thus sum) will deviate if **HISTO_ERR_FULL** has occurred.
### Helper functions
#### Helper functions
- **uint16_t find(float value)** returns the index of the bucket for value.
- **uint16_t findMin()** returns the (first) index of the bucket with the minimum value.
@ -108,9 +160,9 @@ Length should be less than 65534.
- **uint16_t countBelow(int32_t level)** returns the number of buckets below level.
### Probability Distribution Functions
#### Probability Distribution Functions
There are three functions:
There are three experimental functions:
- **float PMF(float value)** Probability Mass Function.
Quite similar to **frequency()**, but uses a value as parameter.
@ -118,20 +170,21 @@ Quite similar to **frequency()**, but uses a value as parameter.
Returns the sum of frequencies <= value. Always between 0.0 and 1.0.
- **float VAL(float probability)** Value Function, is **CDF()** inverted.
Returns the value of the original array for which the CDF is at least probability.
- **int32_t sum()** returns the sum of all buckets. (not experimental).
Just as with **frequency()** it is affected by the use of **sub()**,
including returning a negative value.
As the Arduino typical uses a small number of buckets these functions are quite
coarse and/or inaccurate (linear interpolation within bucket is still to be investigated)
As most Arduino sketches typical uses a small number of buckets these functions
are quite coarse and/or inaccurate, so indicative at best.
Linear interpolation within "last" bucket needs to be investigated, however it
introduces its own uncertainty. Alternative is to add last box for 50%.
Note **PDF()** is a continuous function and therefore not applicable in discrete histogram.
Note **PDF()** is a continuous function and therefore not applicable in a discrete histogram.
- https://en.wikipedia.org/wiki/Probability_mass_function PMF()
- https://en.wikipedia.org/wiki/Cumulative_distribution_function CDF() + VAL()
## Operation
See examples
- https://en.wikipedia.org/wiki/Probability_density_function PDF()
## Future
@ -140,45 +193,30 @@ See examples
#### Must
- improve documentation
- explain **PMF()**, **CDF()** and **VAL()** functions.
- or a link to a good site?
#### Should
- investigate performance - **find()** the right bucket.
- Binary search is faster
- Binary search is faster (above 20)
- need testing.
- mixed search, last part (< 20) linear?
- improve accuracy - linear interpolation for **PMF()**, **CDF()** and **VAL()**
- performance - merge loops in **PMF()**
- performance - reverse loops - compare to zero.
#### Could
- bucket full / overflow warning.
- The **add()** **sub()** could return a bool to indicate that a bucket is (almost) full.
- return index ...
- keep track of max filled histogram.
- max value per bin
- saturation indication of the whole histogram
- 2D histograms ? e.g. positions on a grid.
- see SparseMatrix
- **saturation()** indication of the whole histogram
- count / nr of bins?
- percentage readOut == frequency()
- int32_t total() = 100%
- getBucketPercent(idx)..
- clear selected bins?
- **float getBucketPercent(idx)**
- template class <bucketsizeType>.
#### Wont
- merge bins
#### expensive ideas
Expensive ideas in terms of memory or performance
- Additional values per bucket.
- Sum, Min, Max, (average can be derived)
- separate bucket-array for sub()
- Copy the boundaries array?
- 2D histograms ? e.g. positions on a grid.
- see SparseMatrix