0.3.0 Histogram

This commit is contained in:
rob tillaart 2021-11-04 12:32:04 +01:00
parent 258ef3276d
commit 8d8eda436c
17 changed files with 656 additions and 150 deletions

View File

@ -2,6 +2,10 @@ compile:
# Choosing to run compilation tests on 2 different Arduino platforms
platforms:
- uno
- leonardo
- due
- zero
# - due
# - zero
# - leonardo
- m4
- esp32
# - esp8266
# - mega2560

View File

@ -4,10 +4,14 @@ name: Arduino CI
on: [push, pull_request]
jobs:
arduino_ci:
runTest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: Arduino-CI/action@master
# Arduino-CI/action@v0.1.1
- uses: ruby/setup-ruby@v1
with:
ruby-version: 2.6
- run: |
gem install arduino_ci
arduino_ci.rb

View File

@ -0,0 +1,81 @@
//
// FILE: hist_find_performance.ino
// AUTHOR: Rob Tillaart
// VERSION: 0.1.0
// DATE: 2021-11-03
//
// PUPROSE: indication histogram find performance
//
#include "histogram.h"
float b[100];
Histogram hist(100, b);
uint32_t start, duration;
uint32_t lastTime = 0;
void setup()
{
Serial.begin(115200);
Serial.println(__FILE__);
Serial.print("\nHistogram version: ");
Serial.println(HISTOGRAM_LIB_VERSION);
// fill boundary array
for (int i = 0; i < 100; i++) b[i] = i * 10.0;
Serial.print("# buckets: ");
Serial.println(hist.size());
int x = 4;
start = micros();
int y = hist.find(x);
duration = micros() - start;
Serial.print("Duration: ");
Serial.println(duration);
Serial.print(" Bucket: ");
Serial.println(y);
delay(10);
x = 54;
start = micros();
y = hist.find(x);
duration = micros() - start;
Serial.print("Duration: ");
Serial.println(duration);
Serial.print(" Bucket: ");
Serial.println(y);
delay(10);
x = 654;
start = micros();
y = hist.find(x);
duration = micros() - start;
Serial.print("Duration: ");
Serial.println(duration);
Serial.print(" Bucket: ");
Serial.println(y);
delay(10);
x = 7654;
start = micros();
y = hist.find(x);
duration = micros() - start;
Serial.print("Duration: ");
Serial.println(duration);
Serial.print(" Bucket: ");
Serial.println(y);
delay(10);
Serial.println();
}
void loop()
{
}
// -- END OF FILE --

View File

@ -91,4 +91,5 @@ void loop()
}
}
// END OF FILE
// -- END OF FILE --

View File

@ -0,0 +1,66 @@
//
// FILE: hist_test_big.ino
// AUTHOR: Rob Tillaart
// VERSION: 0.1.1
// DATE: 2012-12-23
//
// PUPROSE: demo histogram frequency
// run on ESP32 -
// view in Serial plotter to graph distribution
#include "histogram.h"
float b[200]; // MIGHT NOT WORK ON AVR !!
Histogram hist(200, b);
uint32_t lastTime = 0;
const uint32_t threshold = 1000; // milliseconds, for updating display
void setup()
{
Serial.begin(115200);
Serial.println(__FILE__);
Serial.print("\nHistogram version: ");
Serial.println(HISTOGRAM_LIB_VERSION);
// fill the boundaries as first step
for (int i = 0; i < 200; i++)
{
b[i] = i;
}
}
void loop()
{
// bigger chance on 1 than on 0
// int x = 180 * sin(random(10000) * PI / 20000);
// float x = 180 * sqrt(random(1000) * 0.001);
// n = 1 gives flat line = uniform distribution.
// n = 2 gives "triangles"
// n = 3 and higher some normal distribution.
// higher is smaller peaks.
int x = 0;
int n = 5;
for (int i = 0; i < n; i++) x += random(180);
x /= n;
hist.add(x);
// update output
uint32_t now = millis();
if (now - lastTime > threshold)
{
lastTime = now;
for (int i = 0; i < hist.size() - 1; i++)
{
Serial.println(hist.bucket(i));
}
}
}
// -- END OF FILE --

View File

@ -7,6 +7,7 @@
// PUPROSE: test histogram library
//
#include "histogram.h"
float b[] = {
@ -14,6 +15,7 @@ float b[] = {
Histogram hist(7, b);
void setup()
{
Serial.begin(115200);
@ -22,6 +24,7 @@ void setup()
Serial.println(HISTOGRAM_LIB_VERSION);
}
void loop()
{
int x = random(1000);
@ -29,7 +32,7 @@ void loop()
Serial.print(hist.count());
Serial.print("\t");
for (int i = 0; i < (hist.size() -1); i++)
for (int i = 0; i < (hist.size() - 1); i++)
{
Serial.print(hist.CDF(b[i]), 2);
Serial.print("\t");
@ -41,3 +44,5 @@ void loop()
delay(10);
}
// -- END OF FILE --

View File

@ -7,8 +7,10 @@
// PUPROSE: test histogram frequency
//
#include "histogram.h"
// boundaries array does not need to be equally distributed.
float bounds[] = { 0, 100, 200, 300, 325, 350, 375, 400, 500, 600, 700, 800, 900, 1000 };
@ -17,6 +19,7 @@ Histogram hist(14, bounds);
uint32_t lastTime = 0;
const uint32_t threshold = 1000; // milliseconds, for updating display
void setup()
{
Serial.begin(115200);
@ -29,6 +32,7 @@ void setup()
Serial.println();
}
void loop()
{
// "generator" for histogram data
@ -61,3 +65,5 @@ void loop()
}
}
// -- END OF FILE --

View File

@ -0,0 +1,102 @@
//
// FILE: hist_test_level.ino
// AUTHOR: Rob Tillaart
// VERSION: 0.1.0
// DATE: 2021-11-04
//
// PUPROSE: test histogram
//
#include "histogram.h"
// boundaries does not need to be equally distributed.
float b[100];
Histogram hist(100, b);
uint32_t lastTime = 0;
const uint32_t threshold = 25; // milliseconds, for updating display
void setup()
{
Serial.begin(115200);
Serial.println();
Serial.println(__FILE__);
Serial.print("\nHistogram version: ");
Serial.println(HISTOGRAM_LIB_VERSION);
Serial.println();
// fill boundary array
for (int i = 0; i < 100; i++)
{
b[i] = i * 10.0;
}
Serial.print("# buckets: ");
Serial.println(hist.size());
Serial.print("BELOW 0: ");
Serial.println(hist.countBelow(0));
Serial.print("LEVEL 0: ");
Serial.println(hist.countLevel(0));
Serial.print("ABOVE 0: ");
Serial.println(hist.countAbove(0));
Serial.print("COUNT : ");
Serial.println(hist.count());
Serial.println();
Serial.println("add 100 random numbers");
for (int i = 0; i < 100; i++)
{
int x = random(1000);
hist.add(x);
}
Serial.print("BELOW 0: ");
Serial.println(hist.countBelow(0));
Serial.print("LEVEL 0: ");
Serial.println(hist.countLevel(0));
Serial.print("ABOVE 0: ");
Serial.println(hist.countAbove(0));
Serial.print("COUNT : ");
Serial.println(hist.count());
Serial.println();
Serial.println("sub 100 random numbers");
for (int i = 0; i < 100; i++)
{
int x = random(1000);
hist.sub(x);
}
Serial.print("BELOW 0: ");
Serial.println(hist.countBelow(0));
Serial.print("LEVEL 0: ");
Serial.println(hist.countLevel(0));
Serial.print("ABOVE 0: ");
Serial.println(hist.countAbove(0));
Serial.print("COUNT : ");
Serial.println(hist.count());
Serial.println();
int minidx = hist.findMin();
int maxidx = hist.findMax();
Serial.print("MIN INDEX: ");
Serial.print(minidx);
Serial.print("\t");
Serial.println(hist.bucket(minidx));
Serial.print("MAX INDEX: ");
Serial.print(maxidx);
Serial.print("\t");
Serial.println(hist.bucket(maxidx));
Serial.println();
Serial.println("done...");
}
void loop()
{
}
// -- END OF FILE --

View File

@ -16,6 +16,7 @@ float b[] = {
Histogram hist(16, b);
void setup()
{
Serial.begin(115200);
@ -24,6 +25,7 @@ void setup()
Serial.println(HISTOGRAM_LIB_VERSION);
}
void loop()
{
int x = random(1000);
@ -43,4 +45,5 @@ void loop()
delay(10);
}
// END OF FILE
// -- END OF FILE --

View File

@ -16,6 +16,7 @@ float b[] = {
Histogram hist(16, b);
void setup()
{
Serial.begin(115200);
@ -24,6 +25,7 @@ void setup()
Serial.println(HISTOGRAM_LIB_VERSION);
}
void loop()
{
int x = random(800);
@ -39,8 +41,8 @@ void loop()
Serial.print("\t");
float sum = 0;
int i = 0;
for (i = 0; i< hist.size(); i++)
uint16_t i = 0;
for (i = 0; i < hist.size(); i++)
{
sum += hist.frequency(i);
Serial.print(sum, 2);
@ -55,4 +57,5 @@ void loop()
delay(10);
}
// END OF FILE
// -- END OF FILE --

View File

@ -1,7 +1,7 @@
//
// FILE: Histogram.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.2.1
// VERSION: 0.3.0
// PURPOSE: Histogram library for Arduino
// DATE: 2012-11-10
//
@ -11,131 +11,268 @@
// 0.1.2 2012-12-23 changed float to double; some comments
// 0.1.3 2013-09-29 testing a lot & refactoring
// 0.1.4 2015-03-06 stricter interface
// 0.1.5 2017-07-16 refactor, support for > 256 buckets; prevent alloc errors
// 0.1.5 2017-07-16 refactor, support for > 256 buckets; prevent alloc() errors
// 0.1.6 2017-07-27 revert double to float (issue #33)
// 0.2.0 2020-06-12 #pragma once, removed pre 1.0 support
// 0.2.1 2020-12-24 arduino-ci + unit tests
// 0.2.1 2020-12-24 Arduino-CI + unit tests
// 0.3.0 2021-11-02 update build-CI, add badges
// refactor readability
// add parameter for clear(value = 0)
// add findMin(), findMax()
// add countAbove(), countLevel(), countBelow().
// add setBucket(),
// change length to uint16_t ==> 65534
#include "histogram.h"
Histogram::Histogram(const int16_t len, float *bounds)
Histogram::Histogram(const uint16_t length, float *bounds)
{
_bounds = bounds;
_len = len + 1;
_data = (int32_t *) malloc((_len) * sizeof(int32_t));
_length = length + 1;
_data = (int32_t *) malloc((_length) * sizeof(int32_t));
if (_data) clear();
else _len = 0;
_cnt = 0;
else _length = 0;
_count = 0;
}
Histogram::~Histogram()
{
if (_data) free(_data);
}
// resets all counters
void Histogram::clear()
// resets all counters to value (default 0)
void Histogram::clear(int32_t value)
{
for (int16_t i = 0; i < _len; i++) _data[i] = 0;
_cnt = 0;
for (uint16_t i = 0; i < _length; i++) _data[i] = value;
_count = 0;
}
// adds a new value to the histogram - increasing
void Histogram::add(const float f)
void Histogram::add(const float value)
{
if (_len > 0)
if (_length > 0)
{
_data[find(f)]++;
_cnt++;
uint16_t index = find(value);
_data[index]++;
_count++;
}
// could return index or count.
}
// adds a new value to the histogram - decreasing
void Histogram::sub(const float f)
void Histogram::sub(const float value)
{
if (_len > 0)
if (_length > 0)
{
_data[find(f)]--;
_cnt++;
uint16_t index = find(value);
_data[index]--;
_count++;
}
// could return index or count.
}
// returns the count of a bucket
int32_t Histogram::bucket(const int16_t idx)
int32_t Histogram::bucket(const uint16_t index)
{
if (idx > _len) return 0;
return _data[idx];
if (index > _length) return 0;
return _data[index];
}
// returns the relative frequency of a bucket
float Histogram::frequency(const int16_t idx)
float Histogram::frequency(const uint16_t index)
{
if (_cnt == 0 || _len == 0) return NAN;
if ((_count == 0) || (_length == 0)) return NAN;
if (idx > _len) return 0; // diff with PMF
return (1.0 * _data[idx]) / _cnt;
if (index > _length) return 0; // differs from PMF()
return (1.0 * _data[index]) / _count;
}
// EXPERIMENTAL
// returns the probability of the bucket of a value
float Histogram::PMF(const float val)
float Histogram::PMF(const float value)
{
if (_cnt == 0 || _len == 0) return NAN;
if ((_count == 0) || (_length == 0)) return NAN;
int16_t idx = find(val);
return (1.0 * _data[idx]) / _cnt;
uint16_t index = find(value);
return (1.0 * _data[index]) / _count;
}
// EXPERIMENTAL
// returns the cummulative probability of
// values <= value
float Histogram::CDF(const float val)
{
if (_cnt == 0 || _len == 0) return NAN;
int16_t idx = find(val);
int32_t sum = 0;
for (int16_t i = 0; i <= idx; i++)
// EXPERIMENTAL
// returns the cumulative probability of
// values <= value
float Histogram::CDF(const float value)
{
if ((_count == 0) || (_length == 0)) return NAN;
// TODO: could be done in one loop?
uint16_t index = find(value);
int32_t sum = 0;
for (uint16_t i = 0; i <= index; i++)
{
sum += _data[i];
}
return (1.0 * sum) / _cnt;
return (1.0 * sum) / _count;
}
// EXPERIMENTAL
// returns the value of the original array for
// which the CDF is at least prob.
// which the CDF is at least prob(ability).
float Histogram::VAL(const float prob)
{
if (_cnt == 0 || _len == 0) return NAN;
if ((_count == 0) || (_length == 0)) return NAN;
float p = prob;
if (p < 0.0) p = 0.0;
if (p > 1.0) p = 1.0;
float probability = p * _cnt;
float probability = p * _count;
int32_t sum = 0;
for (int16_t i = 0; i < _len; i++)
for (uint16_t i = 0; i < _length; i++)
{
sum += _data[i];
if (sum >= probability && (i <(_len-1)) ) return _bounds[i];
if (sum >= probability && (i < (_length - 1)) )
{
return _bounds[i];
}
}
return INFINITY;
}
// returns the bucket number for value val
int16_t Histogram::find(const float val)
{
if (_len <= 0) return -1;
for (int16_t i = 0; i < (_len-1); i++)
// returns the bucket number for value
// - binary search, more memory ; faster for #buckets > 20 ?
// uint16_t Histogram::find(const float value)
// {
// if (_length <= 0) return -1;
// uint16_t low = 0, high = _length;
// uint16_t mid;
// while (high - low > 1)
// {
// mid = (low + high)/2;
// if (_bounds[mid] > value)
// {
// high = mid;
// }
// else
// {
// low = mid;
// }
// }
// if (_bounds[mid] > value) return mid;
// return _length - 1;
// }
// returns the bucket number for value
uint16_t Histogram::find(const float value)
{
if (_length <= 0) return -1;
for (uint16_t i = 0; i < (_length - 1); i++)
{
if (_bounds[i] >= val) return i;
if (_bounds[i] >= value)
{
return i;
}
}
return _len-1; // len?
// int16_t i = 0;
// while ((i < (_len-1)) && (_bounds[i] < val)) i++;
// return i;
return _length - 1;
}
// returns the (first) index of the bucket with minimum value.
uint16_t Histogram::findMin()
{
if (_length <= 0) return -1;
uint16_t index = 0;
for (uint16_t i = 1; i < _length; i++)
{
if (_data[i] < _data[index]) index = i;
}
return index;
}
// returns the (first) index of the bucket with maximum value.
uint16_t Histogram::findMax()
{
if (_length <= 0) return -1;
uint16_t index = 0;
for (uint16_t i = 1; i < _length; i++)
{
if (_data[i] > _data[index]) index = i;
}
return index;
}
// returns the number of buckets above a certain level.
uint16_t Histogram::countLevel(const int32_t level)
{
if (_length <= 0) return -1;
uint16_t buckets = 0;
for (uint16_t i = 0; i < _length; i++)
{
if (_data[i] == level) buckets++;
}
return buckets;
}
// returns the number of buckets above a certain level.
uint16_t Histogram::countAbove(const int32_t level)
{
if (_length <= 0) return -1;
uint16_t buckets = 0;
for (uint16_t i = 0; i < _length; i++)
{
if (_data[i] > level) buckets++;
}
return buckets;
}
// returns the number of buckets below a certain level.
uint16_t Histogram::countBelow(const int32_t level)
{
if (_length <= 0) return -1;
uint16_t buckets = 0;
for (uint16_t i = 0; i < _length; i++)
{
if (_data[i] < level) buckets++;
}
return buckets;
}
//////////////////////////////////////////////////////////////
//
// DERIVED CLASS
//
Histogram16::Histogram16(const uint16_t length, float *bounds) : Histogram(length, bounds)
{
};
Histogram8::Histogram8(const uint16_t length, float *bounds) : Histogram(length, bounds)
{
};
// -- END OF FILE --

View File

@ -2,44 +2,82 @@
//
// FILE: Histogram.h
// AUTHOR: Rob Tillaart
// VERSION: 0.2.1
// VERSION: 0.3.0
// PURPOSE: Histogram library for Arduino
// DATE: 2012-11-10
//
#include "Arduino.h"
#define HISTOGRAM_LIB_VERSION "0.2.1"
#define HISTOGRAM_LIB_VERSION (F("0.3.0"))
class Histogram
{
public:
Histogram(const int16_t len, float *bounds);
Histogram(const uint16_t length, float *bounds);
~Histogram();
void clear();
void add(const float val);
void sub(const float val);
void clear(int32_t value = 0);
void setBucket(const uint16_t index, int32_t value = 0) { _data[index] = value; };
void add(const float value);
void sub(const float value);
// number of buckets
inline int16_t size() { return _len; };
inline uint16_t size() { return _length; };
// number of values added to all buckets
inline uint32_t count() { return _cnt; };
// number of values added to single bucket
int32_t bucket(const int16_t idx);
inline uint32_t count() { return _count; };
// number of values added to single bucket
int32_t bucket(const uint16_t index);
float frequency(const uint16_t index);
float PMF(const float value);
float CDF(const float value);
float VAL(const float prob);
uint16_t find(const float value);
uint16_t findMin();
uint16_t findMax();
uint16_t countLevel(const int32_t level);
uint16_t countAbove(const int32_t level);
uint16_t countBelow(const int32_t level);
float frequency(const int16_t idx);
float PMF(const float val);
float CDF(const float val);
float VAL(const float prob);
int16_t find(const float f);
protected:
float * _bounds;
int32_t * _data;
int16_t _len;
uint32_t _cnt;
uint16_t _length;
uint32_t _count;
};
//////////////////////////////////////////////////////////////
//
// DERIVED CLASS
//
class Histogram16 : public Histogram
{
public:
Histogram16(const uint16_t length, float *bounds);
~Histogram16();
protected:
int16_t * _data;
};
class Histogram8 : public Histogram
{
public:
Histogram8(const uint16_t length, float *bounds);
~Histogram8();
protected:
int8_t * _data;
};
// -- END OF FILE --

View File

@ -1,20 +1,33 @@
# Syntax Coloring Map For Histogram
# Syntax Colouring Map For Histogram
# Datatypes (KEYWORD1)
# Data types (KEYWORD1)
Histogram KEYWORD1
Histogram8 KEYWORD1
Histogram16 KEYWORD1
# Methods and Functions (KEYWORD2)
clear KEYWORD2
setBucket KEYWORD2
add KEYWORD2
sub KEYWORD2
size KEYWORD2
count KEYWORD2
bucket KEYWORD2
frequency KEYWORD2
PMF KEYWORD2
CDF KEYWORD2
VAL KEYWORD2
find KEYWORD2
findMin KEYWORD2
findMax KEYWORD2
countLevel KEYWORD2
countAbove KEYWORD2
countBelow KEYWORD2
# Constants (LITERAL1)
HISTOGRAM_LIB_VERSION LITERAL1

View File

@ -15,7 +15,7 @@
"type": "git",
"url": "https://github.com/RobTillaart/Histogram.git"
},
"version": "0.2.1",
"version": "0.3.0",
"license": "MIT",
"frameworks": "arduino",
"platforms": "*"

View File

@ -1,5 +1,5 @@
name=Histogram
version=0.2.1
version=0.3.0
author=Rob Tillaart <rob.tillaart@gmail.com>
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
sentence=Arduino library for creating histograms math.

View File

@ -1,12 +1,16 @@
[![Arduino CI](https://github.com/RobTillaart/Histogram/workflows/Arduino%20CI/badge.svg)](https://github.com/marketplace/actions/arduino_ci)
[![Arduino-lint](https://github.com/RobTillaart/Histogram/actions/workflows/arduino-lint.yml/badge.svg)](https://github.com/RobTillaart/Histogram/actions/workflows/arduino-lint.yml)
[![JSON check](https://github.com/RobTillaart/Histogram/actions/workflows/jsoncheck.yml/badge.svg)](https://github.com/RobTillaart/Histogram/actions/workflows/jsoncheck.yml)
[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/RobTillaart/Histogram/blob/master/LICENSE)
[![GitHub release](https://img.shields.io/github/release/RobTillaart/Histogram.svg?maxAge=3600)](https://github.com/RobTillaart/Histogram/releases)
# Histogram
Arduino library for creating histograms math.
## Description
One of the main applications for the Arduino board is reading and logging of sensor data.
@ -16,75 +20,124 @@ measurements. This is where this Histogram library comes in.
The Histogram distributes the values added to it into buckets and keeps count.
If you need more quantitative analysis, you might need the statistics library,
a- https://github.com/RobTillaart/Statistic
- https://github.com/RobTillaart/Statistic
## Interface
### Constructor
- **Histogram(uint8_t len, float \*bounds)** constructor, get an array of boundary values and array length
- **~Histogram()** destructor
### Base
- **void clear()** reset all counters
- **void add(float val)** add a value, increase count of bucket
- **void sub(float val)** 'add' a value, but decrease count
- **uint8_t size()** number of buckets
- **unsigned long count()** total number of values added
- **long bucket(uint8_t idx)** count of single bucket, can be negative due to **sub()**
- **float frequency(uint8_t idx)** the relative frequency of a bucket
- **uint8_t find(float f)** find the bucket for value f
#### working
When the class is initialized an array of the boundaries to define the borders of the
buckets is passed to the constructor. This array should be declared global as the
Histogram class does not copy the values to keep memory usage low. This allows to change
the boundaries runtime, so after a **clear()**, a new Histogram can be created.
The values in the boundary array do not need to be equidistant (equal in size).
The values in the boundary array do not need to be equidistant (equal in size)
but they need to be in ascending order.
Internally the library does not record the individual values, only the count per bucket.
If a new value is added - **add()** or **sub()** - the class checks in which bucket it belongs
and the buckets counter is increased.
If a new value is added - **add()** or **sub()** - the class checks in which bucket it
belongs and the buckets counter is increased.
The **sub()** function is used to decrease the count of a bucket and it can cause the count
to become below zero. ALthough seldom used but still depending on the application it can
be useful. E.g. when you want to compare two value generating streams, you let one stream
**add()** and the other **sub()**. If the histogram of both streams is similar they should cancel
each other out (more or less), and the value of all buckets should be around 0. \[not tried\].
The **sub()** function is used to decrease the count of a bucket and it can cause the
count to become below zero. Although seldom used but still depending on the application
it can be useful. E.g. when you want to compare two value generating streams, you let
one stream **add()** and the other **sub()**. If the histogram of both streams is
similar they should cancel each other out (more or less), and the value of all buckets
should be around 0. \[not tried\].
The **frequency()** function may be removed to reduce footprint as it can be calculated with
the formula **(1.0 \* bucket(i))/count()**.
The **frequency()** function may be removed to reduce footprint as it can be calculated
with the formula **(1.0 \* bucket(i))/count()**.
### Experimental
- **float PMF(float val)** Probability Mass Function
- **float CDF(float val)** Cumulative Distribution Function
- **float VAL(float prob)** Value Function
#### experimental: Histogram8 Histogram16
There are three experimental functions:
- **PMF()** is quite similar to frequency, but uses a value as parameter.
- **CDF()** gives the sum of frequencies <= value.
- **VAL()** is **CDF()** inverted.
Histogram8 and Histogram16 are classes with same interface but smaller buckets. Histogram can count to ±2^31 while often ±2^15 or even ±2^7 is sufficient. Saves memory.
| class name | length | count/bucket | maxmem |
|:------------|-------:|-------------:|-------:|
| Histogram | 65534 | ±2147483647 | 260 KB |
| Histogram8 | 65534 | ±127 | 65 KB |
| Histogram16 | 65534 | ±32767 | 130 KB |
The difference is the **\_data** array, to reduce the memory footprint.
Note: Maxmem is without the boundary array.
Performance optimizations are possible too however not essential for
the experimental version.
## Interface
### Constructor
- **Histogram(uint16_t length, float \*bounds)** constructor, get an array of boundary values and array length. Length should be less than 65534.
- **~Histogram()** destructor.
### Base
- **void clear(float value = 0)** reset all bucket counters to value (default 0).
- **void add(float value)** add a value, increase count of bucket.
- **void sub(float value)** 'add' a value, but decrease count (subtract).
- **uint16_t size()** returns number of buckets.
- **uint32_t count()** returns total number of values added (or subtracted).
- **int32_t bucket(uint16_t index)** returns the count of single bucket, can be negative due to **sub()**
- **float frequency(uint16_t index)** returns the relative frequency of a bucket, always between 0.0 and 1.0.
### Helper functions
- **uint16_t find(float value)** returns the index of the bucket for value.
- **uint16_t findMin()** returns the (first) index of the bucket with the minimum value.
- **uint16_t findMax()** returns the (first) index of the bucket with the maximum value.
- **uint16_t countLevel(int32_t level)** returns the number of buckets with exact that level (count).
- **uint16_t countAbove(int32_t level)** returns the number of buckets above level.
- **uint16_t countBelow(int32_t level)** returns the number of buckets below level.
### Probability Distribution Functions
There are three functions:
- **float PMF(float value)** Probability Mass Function. Quite similar to **frequency()**,
but uses a value as parameter.
- **float CDF(float value)** Cumulative Distribution Function.
Returns the sum of frequencies <= value. Always between 0.0 and 1.0.
- **float VAL(float prob)** Value Function, is **CDF()** inverted.
Returns the value of the original array for which the CDF is at least probability.
As the Arduino typical uses a small number of buckets these functions are quite
coarse/inaccurate (linear interpolation within bucket is still to be investigated)
coarse and/or inaccurate (linear interpolation within bucket is still to be investigated)
## Todo list
Note **PDF()** is a continuous function and therefore not applicable in discrete histogram.
- Copy the boundaries array?
- Additional values per bucket.
- Sum, Min, Max, (average can be derived)
- separate bucket-array for sub()
- improve strategy for **find()** the right bucket..
- investigate linear interpolation for **PMF()**, **CDF()** and **VAL()** functions to improve accuracy.
- explain **PMF()**, **CDF()** and **VAL()** functions
- clear individual buckets
- merge buckets
- bucket full / overflow warning.
- make github issues of the above...
## Operation
See examples
## Future
- performance - **find()** the right bucket. Binary search is faster - need testing.
- improve accuracy - linear interpolation for **PMF()**, **CDF()** and **VAL()**
- performance - merge loops in **PMF()**
- performance - reverse loops - compare to zero.
- improve documentation
- explain **PMF()**, **CDF()** and **VAL()** functions.
- bucket full / overflow warning. The **add()** **sub()** should
return a bool to indicate that a bucket is (almost) full.
- 2D histograms ? e.g. positions on a grid.
#### expensive ideas
Expensive ideas in terms of memory or performance
- Additional values per bucket.
- Sum, Min, Max, (average can be derived)
- separate bucket-array for sub()
- Copy the boundaries array?

View File

@ -37,18 +37,6 @@ unittest_teardown()
{
}
/*
unittest(test_new_operator)
{
assertEqualINF(exp(800));
assertEqualINF(0.0/0.0);
assertEqualINF(42);
assertEqualNAN(INFINITY - INFINITY);
assertEqualNAN(0.0/0.0);
assertEqualNAN(42);
}
*/
unittest(test_constructor)
{
@ -66,6 +54,7 @@ unittest(test_constructor)
}
}
unittest(test_dice)
{
float diceValues[] = { 0.5, 1.5, 2.5, 3.5, 4.5, 5.5 };
@ -98,6 +87,7 @@ unittest(test_dice)
}
}
unittest_main()
// --------