0.3.0 Correlation

This commit is contained in:
rob tillaart 2023-01-22 15:55:51 +01:00
parent 4c90b3e839
commit f8c8180349
20 changed files with 211 additions and 130 deletions

View File

@ -6,7 +6,7 @@ jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: arduino/arduino-lint-action@v1
with:
library-manager: update

View File

@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: ruby/setup-ruby@v1
with:
ruby-version: 2.6

View File

@ -10,7 +10,7 @@ jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: json-syntax-check
uses: limitusus/json-syntax-check@v1
with:

View File

@ -6,12 +6,20 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [0.3.0] - 2023-01-22
- update GitHub actions
- update license 2023
- commented obsolete functions (not removed yet)
- update readme.md
- update keywords.txt
- fix unit test
## [0.2.3] - 2022-10-30
- add changelog.md
- add rp2040 to build-CI
- minor edit unit test
## [0.2.2] - 2022-06-20
- optimize getEstimateX() to match getEstimateY();
- optimize averaging within calculate.

View File

@ -1,10 +1,9 @@
//
// FILE: Correlation.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.2.3
// VERSION: 0.3.0
// PURPOSE: Arduino Library to determine correlation between X and Y dataset
//
// HISTORY: see cjhangelog.md
// URL: https://github.com/RobTillaart/Correlation
#include "Correlation.h"
@ -243,5 +242,23 @@ float Correlation::getY(uint8_t index)
}
// -- END OF FILE --
float Correlation::getSumXY()
{
return _sumXiYi;
}
float Correlation::getSumX2()
{
return _sumXi2;
}
float Correlation::getSumY2()
{
return _sumYi2;
}
// -- END OF FILE --

View File

@ -2,16 +2,15 @@
//
// FILE: Correlation.h
// AUTHOR: Rob Tillaart
// VERSION: 0.2.3
// VERSION: 0.3.0
// PURPOSE: Calculate Correlation from a small dataset.
// HISTORY: See Correlation.cpp
//
// URL: https://github.com/RobTillaart/Correlation
#include "Arduino.h"
#define CORRELATION_LIB_VERSION (F("0.2.3"))
#define CORRELATION_LIB_VERSION (F("0.3.0"))
class Correlation
@ -72,10 +71,10 @@ public:
// get the average values of the datasets (if count > 0)
float getAverageX(){ return _avgX; }; // will replace getAvgX() in time
float getAverageY(){ return _avgY; }; // will replace getAvgY() in time
float getAvgX() { return _avgX; }; // will be obsolete in future
float getAvgY() { return _avgY; }; // will be obsolete in future
float getAverageX() { return _avgX; }; // will replace getAvgX() in time
float getAverageY() { return _avgY; }; // will replace getAvgY() in time
// float getAvgX() { return _avgX; }; // obsolete in 0.3.0
// float getAvgY() { return _avgY; }; // obsolete in 0.3.0
// based on the dataset get the estimated values for X and Y
@ -86,25 +85,26 @@ public:
// STATISTICAL
float getMinX(); // idem
float getMaxX(); // idem
float getMinY(); // idem
float getMaxY(); // idem
// to get bounding box of all x,y pairs.
float getMinX(); // idem
float getMaxX(); // idem
float getMinY(); // idem
float getMaxY(); // idem
// DEBUGGING - access to internal arrays.
bool setXY(uint8_t index, float x, float y); // returns true if succeeded
bool setX(uint8_t index, float x); // returns true if succeeded
bool setY(uint8_t index, float y); // ss returns true if succeeded
bool setY(uint8_t index, float y); // returns true if succeeded
float getX(uint8_t index); // idem
float getY(uint8_t index); // idem
float getSumXY() { return _sumXiYi; }; // replaces getSumXiYi()
float getSumX2() { return _sumXi2; }; // replaces getSumXi2()
float getSumY2() { return _sumYi2; }; // replaces getSumYi2()
float getSumXiYi() { return _sumXiYi; }; // obsolete in version 0.3.0
float getSumXi2() { return _sumXi2; }; // obsolete in version 0.3.0
float getSumYi2() { return _sumYi2; }; // obsolete in version 0.3.0
float getSumXY(); // replaces getSumXiYi()
float getSumX2(); // replaces getSumXi2()
float getSumY2(); // replaces getSumYi2()
// float getSumXiYi() { return _sumXiYi; }; // obsolete in version 0.3.0
// float getSumXi2() { return _sumXi2; }; // obsolete in version 0.3.0
// float getSumYi2() { return _sumYi2; }; // obsolete in version 0.3.0
private:
@ -132,5 +132,5 @@ private:
};
// -- END OF FILE --
// -- END OF FILE --

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2020-2022 Rob Tillaart
Copyright (c) 2020-2023 Rob Tillaart
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -13,9 +13,9 @@ Arduino Library to determine linear correlation between X and Y datasets.
## Description
This library calculates the coefficients of the linear correlation
between two (relative small) datasets. The size of these datasets is
20 by default. The size can be set in the constructor.
This library calculates the coefficients of the linear correlation
between two (relative small) datasets. The size of these datasets is
20 by default. The size can be set in the constructor.
Please note that the correlation uses about ~50 bytes per instance,
and 2 floats == 8 bytes per pair of elements.
@ -24,34 +24,48 @@ So ~120 elements will use up 50% of the RAM of an UNO.
The formula of the correlation is expressed as **Y = A + B \* X**.
If all points are on a vertical line, the parameter B will be NAN,
This will happen if the **sumXi2** is zero or very small.
This will happen if the **sumXi2** is zero or very small.
Use with care.
#### Related
- https://github.com/RobTillaart/Correlation
- https://github.com/RobTillaart/GST - Golden standard test metrics
- https://github.com/RobTillaart/RunningAngle
- https://github.com/RobTillaart/RunningAverage
- https://github.com/RobTillaart/RunningMedian
- https://github.com/RobTillaart/statHelpers - combinations & permutations
- https://github.com/RobTillaart/Statistic
## Interface
```cpp
#include "Correlation.h"
```
### Constructor
#### Constructor
- **Correlation(uint8_t size = 20)** allocates the array needed and resets internal admin.
- **Correlation(uint8_t size = 20)** allocates the array needed and resets internal admin.
Size should be between 1 and 255. Size = 0 will set the size to 20.
- **~Correlation()** frees the allocated arrays.
### Base functions
#### Base functions
- **bool add(float x, float y)** adds a pair of **floats** to the internal storage array's.
Returns true if the value is added, returns false when internal array is full.
When running correlation is set, **add()** will replace the oldest element and return true.
Warning: **add()** does not check if the floats are NAN or INFINITE.
- **uint8_t count()** returns the amount of items in the internal arrays.
- **uint8_t count()** returns the amount of items in the internal arrays.
This number is always between 0 ..**size()**
- **uint8_t size()** returns the size of the internal arrays.
- **void clear()** resets the data structures to the start condition (zero elements added).
- **bool calculate()** does the math to calculate the correlation parameters A, B and R.
- **bool calculate()** does the math to calculate the correlation parameters A, B and R.
This function will be called automatically when needed.
You can call it on a more convenient time.
You can call it on a more convenient time.
Returns false if nothing to calculate **count == 0**
- **void setR2Calculation(bool)** enables / disables the calculation of Rsquared.
- **bool getR2Calculation()** returns the flag set.
@ -62,8 +76,8 @@ After the calculation the following functions can be called to return the core v
- **float getA()** returns the A parameter of formula **Y = A + B \* X**
- **float getB()** returns the B parameter of formula **Y = A + B \* X**
- **float getR()** returns the correlation coefficient R which is always between -1 .. 1
The closer to 0 the less correlation there is between X and Y.
Correlation can be positive or negative.
The closer to 0 the less correlation there is between X and Y.
Correlation can be positive or negative.
Most often the Rsquared **R x R** is used.
- **float getRsquare()** returns **R x R** which is always between 0.. 1.
- **float getEsquare()** returns the error squared to get an indication of the
@ -76,37 +90,37 @@ quality of the correlation.
#### Correlation Coefficient R
Indicative description of the correlation
Indicative description of the correlation value.
| R | correlation |
|:-------------:|:--------------|
| +1.0 | Perfect |
| +0.8 to +1.0 | Very strong |
| +0.6 to +0.8 | Strong |
| +0.4 to +0.6 | Moderate |
| +0.2 to +0.4 | Weak |
| 0.0 to +0.2 | Very weak |
| 0.0 to -0.2 | Very weak |
| -0.2 to -0.4 | Weak |
| -0.4 to -0.6 | Moderate |
| -0.6 to -0.8 | Strong |
| -0.8 to -1.0 | Very strong |
| -1.0 | Perfect |
| R | correlation |
|:--------------:|:--------------|
| +1.0 | Perfect |
| +0.8 to +1.0 | Very strong |
| +0.6 to +0.8 | Strong |
| +0.4 to +0.6 | Moderate |
| +0.2 to +0.4 | Weak |
| 0.0 to +0.2 | Very weak |
| 0.0 to -0.2 | Very weak |
| -0.2 to -0.4 | Weak |
| -0.4 to -0.6 | Moderate |
| -0.6 to -0.8 | Strong |
| -0.8 to -1.0 | Very strong |
| -1.0 | Perfect |
### Running correlation
#### Running correlation
- **void setRunningCorrelation(bool rc)** sets the internal variable runningMode
which allows **add()** to overwrite old elements in the internal arrays.
- **void setRunningCorrelation(bool rc)** sets the internal variable runningMode
which allows **add()** to overwrite old elements in the internal arrays.
- **bool getRunningCorrelation()** returns the runningMode flag.
The running correlation will be calculated over the last **count** elements.
The running correlation will be calculated over the last **count** elements.
If the array is full, count will be size.
This running correlation allows for more adaptive formula finding e.g. find the
This running correlation allows for more adaptive formula finding e.g. find the
relation between temperature and humidity per hour, and how it changes over time.
### Statistical
#### Statistical
These functions give an indication of the "trusted interval" for estimations.
The idea is that for **getEstimateX()** the further outside the range defined
@ -119,7 +133,7 @@ It also depends on **R** of course. Idem for **getEstimateY()**
- **float getMaxY()** idem
### Debugging / educational
#### Debugging / educational
Normally not used. For all these functions index should be < count!
@ -134,33 +148,35 @@ Returns true if succeeded.
- **float getSumY2()** returns sum(Yi \* Yi).
### Obsolete in 0.3.0
#### Obsolete since 0.3.0
To improve readability the following functions are replaced
To improve readability the following functions are replaced.
- **float getAvgX()** returns average X.
- **float getAvgY()** returns average Y.
- **float getSumXiYi()** returns sum(Xi \* Yi).
- **float getSumXi2()** returns sum(Xi \* Xi).
- **float getSumYi2()** returns sum(Yi \* Yi).
- **float getAvgX()** ==> **getAverageX()**
- **float getAvgY()** ==> **getAverageY()**
- **float getSumXiYi()** ==> **getSumXY()**
- **float getSumXi2()** ==> **getSumX2()**
- **float getSumYi2()** ==> **getSumY2()**
## Future
- Template version?
The constructor should get a TYPE parameter, as this
allows smaller data types to be analysed taking less memory.
#### Must
- improve documentation
#### Should
- examples
- real world if possible.
#### Could
### 0.3.0
- Template version?
The constructor should get a TYPE parameter, as this
allows smaller data types to be analysed taking less memory.
- move code from .h to .cpp
- fix naming in examples as some function names are replaced.
- are the getSUmXiYi indeed worse?
#### Wont
## Operation
See example

View File

@ -3,7 +3,7 @@
// AUTHOR: Rob Tillaart
// DATE: 2020-05-17
// PUPROSE: demo of the Correlation Library
//
// URL: https://github.com/RobTillaart/Correlation
#include "Correlation.h"
@ -49,11 +49,11 @@ void setup()
Serial.print("Esq:\t");
Serial.println(C.getEsquare(), 3);
Serial.print("SumXiYi:\t");
Serial.println(C.getSumXiYi(), 1);
Serial.println(C.getSumXY(), 1);
Serial.print("SumXi2:\t\t");
Serial.println(C.getSumXi2(), 1);
Serial.println(C.getSumX2(), 1);
Serial.print("SumYi2:\t\t");
Serial.println(C.getSumYi2(), 1);
Serial.println(C.getSumY2(), 1);
Serial.println();
Serial.println();
@ -149,4 +149,4 @@ void loop()
}
// -- END OF FILE --
// -- END OF FILE --

View File

@ -3,7 +3,7 @@
// AUTHOR: Rob Tillaart
// DATE: 2020-05-17
// PUPROSE: demo of the Correlation Library
//
// URL: https://github.com/RobTillaart/Correlation
/*
@ -65,5 +65,5 @@ void loop()
}
// -- END OF FILE --
// -- END OF FILE --

View File

@ -3,7 +3,7 @@
// AUTHOR: Rob Tillaart
// DATE: 2020-05-17
// PUPROSE: demo of the Correlation Library
//
// URL: https://github.com/RobTillaart/Correlation
#include "Correlation.h"

View File

@ -3,9 +3,10 @@
// AUTHOR: Rob Tillaart
// DATE: 2020-05-18
// PUPROSE: demo of the Correlation Library
// URL: https://github.com/RobTillaart/Correlation
// performance test: only ADD and CALCULATE as these are the most used
// and could be optimized in the future
// performance test: only ADD and CALCULATE as these are the most used
// and could be optimized in the future
#include "Correlation.h"

View File

@ -0,0 +1,43 @@
correlation_performance.ino
CORRELATION_LIB_VERSION: 0.3.0
ADD
12.20
CALCULATE - needed
2768
CALCULATE - no new values added
12
getEstimateX
24
getEstimateY
20
getMaxX
84
getMinX
84
===================================
ADD - fill arrays again
11.80
disable R2 and E2 math from calculate
CALCULATE - needed
1776
CALCULATE - no new values added
8
CALCULATE - no new values added but forced
1780
Done...

View File

@ -3,7 +3,7 @@
// AUTHOR: Rob Tillaart
// DATE: 2020-05-17
// PUPROSE: demo of the Correlation Library
//
// URL: https://github.com/RobTillaart/Correlation
#include "Correlation.h"
@ -41,5 +41,5 @@ void loop()
}
// -- END OF FILE --
// -- END OF FILE --

View File

@ -3,9 +3,11 @@
// AUTHOR: Rob Tillaart
// DATE: 2020-05-18
// PUPROSE: demo of the Correlation Library
// URL: https://github.com/RobTillaart/Correlation
// performance test: only ADD and CALCULATE as these are the most used
// and could be optimized in the future
// performance test:
// only ADD and CALCULATE as these are the most used
// and could be optimized in the future
#include "Correlation.h"
@ -49,9 +51,9 @@ void setup()
Serial.println(stop - start);
Serial.println();
Serial.println(C.getSumXiYi(), 6);
Serial.println(C.getSumXi2(), 6);
Serial.println(C.getSumYi2(), 6);
Serial.println(C.getSumXY(), 6);
Serial.println(C.getSumX2(), 6);
Serial.println(C.getSumY2(), 6);
Serial.println(C.getRsquare(), 6);
Serial.println(C.getR(), 6);
Serial.print("A: ");
@ -62,8 +64,8 @@ void setup()
start = micros();
float COV = C.getSumXiYi() / ((C.count() - 1));
float R = C.getSumXiYi() / sqrt(C.getSumXi2() * C.getSumYi2());
float COV = C.getSumXY() / ((C.count() - 1));
float R = C.getSumXY() / sqrt(C.getSumX2() * C.getSumY2());
stop = micros();
Serial.println(stop - start);
Serial.print("COV: ");
@ -94,9 +96,9 @@ void test()
C.calculate();
Serial.println("---------------");
Serial.println(C.getSumXiYi(), 6);
Serial.println(C.getSumXi2(), 6);
Serial.println(C.getSumYi2(), 6);
Serial.println(C.getSumXY(), 6);
Serial.println(C.getSumX2(), 6);
Serial.println(C.getSumY2(), 6);
Serial.println(C.getRsquare(), 6);
Serial.println(C.getR(), 6);
Serial.print("A: ");
@ -113,5 +115,5 @@ void test()
}
// -- END OF FILE --
// -- END OF FILE --

View File

@ -3,7 +3,7 @@
// AUTHOR: Rob Tillaart
// DATE: 2020-05-18
// PUPROSE: demo of the Correlation Library
//
// URL: https://github.com/RobTillaart/Correlation
#include "Correlation.h"

View File

@ -46,15 +46,6 @@ getSumX2 KEYWORD2
getSumY2 KEYWORD2
### remove 0.3.0
getAvgX KEYWORD2
getAvgY KEYWORD2
getSumXiYi KEYWORD2
getSumXi2 KEYWORD2
getSumYi2 KEYWORD2
# Constants (LITERAL1)
CORRELATION_LIB_VERSION LITERAL1

View File

@ -15,7 +15,7 @@
"type": "git",
"url": "https://github.com/RobTillaart/Correlation.git"
},
"version": "0.2.3",
"version": "0.3.0",
"license": "MIT",
"frameworks": "arduino",
"platforms": "*",

View File

@ -1,9 +1,9 @@
name=Correlation
version=0.2.3
version=0.3.0
author=Rob Tillaart <rob.tillaart@gmail.com>
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
sentence=Arduino Library to determine correlation between X and Y dataset
paragraph=linear Correlation
paragraph=linear Correlation
category=Data Processing
url=https://github.com/RobTillaart/Correlation
architectures=*

View File

@ -3,7 +3,7 @@
// AUTHOR: Rob Tillaart
// DATE: 2020-12-03
// PURPOSE: unit tests for the Correlation library
// https://github.com/RobTillaart/
// https://github.com/RobTillaart/Correlation
// https://github.com/Arduino-CI/arduino_ci/blob/master/REFERENCE.md
//
@ -27,12 +27,12 @@
#include "Correlation.h"
unittest_setup()
{
fprintf(stderr, "CORRELATION_LIB_VERSION: %s\n", (char *) CORRELATION_LIB_VERSION);
}
unittest_teardown()
{
}
@ -43,7 +43,7 @@ unittest(test_constructor)
Correlation C;
assertEqual(0, C.count());
assertEqual(20, C.size());
Correlation D(100);
assertEqual(0, D.count());
assertEqual(100, D.size());
@ -76,7 +76,7 @@ unittest(test_add_clear)
C.clear();
assertEqual(0, C.count());
assertEqual(20, C.size());
for (int i = 0; i < C.size(); i++)
{
assertTrue(C.add(i, i*i));
@ -96,11 +96,11 @@ unittest(test_get_coefficients)
C.add(6, 15);
C.calculate();
assertEqualFloat(2.6, C.getA(), 0.0001);
assertEqualFloat(2.1, C.getB(), 0.0001);
assertEqualFloat(0.97913, C.getR(), 0.0001);
assertEqualFloat(2.6, C.getA(), 0.0001);
assertEqualFloat(2.1, C.getB(), 0.0001);
assertEqualFloat(0.97913, C.getR(), 0.0001);
assertEqualFloat(0.958696, C.getRsquare(), 0.0001);
assertEqualFloat(1.9, C.getEsquare(), 0.0001);
assertEqualFloat(1.9, C.getEsquare(), 0.0001);
}
@ -115,8 +115,8 @@ unittest(test_get_statistics)
C.add(6, 15);
C.calculate();
assertEqualFloat(4, C.getAvgX(), 0.0001);
assertEqualFloat(11, C.getAvgY(), 0.0001);
assertEqualFloat(4, C.getAverageX(), 0.0001);
assertEqualFloat(11, C.getAverageY(), 0.0001);
}
@ -130,19 +130,19 @@ unittest(test_estimate)
C.add(5, 14);
C.add(6, 15);
C.calculate();
fprintf(stderr, "estimate X\n");
for (int i = 4; i < 8; i++)
{
fprintf(stderr, "%d\t%f\n", i, C.getEstimateX(i));
}
fprintf(stderr, "estimate X\n");
fprintf(stderr, "estimate Y\n");
for (int i = 15; i < 20; i++)
{
fprintf(stderr, "%d\t%f\n", i, C.getEstimateY(i));
}
assertEqualFloat(-1.2381, C.getEstimateX(0), 0.0001);
assertEqualFloat(2.6, C.getEstimateY(0), 0.0001);
assertEqualFloat(2.6, C.getEstimateY(0), 0.0001);
}
@ -152,11 +152,13 @@ unittest(test_calculate)
assertFalse(C.calculate());
assertFalse(C.calculate(true));
C.add(2, 7);
C.add(3, 9);
C.add(4, 10);
C.add(5, 14);
C.add(6, 15);
assertTrue(C.calculate());
assertTrue(C.calculate(true));
}
@ -182,4 +184,5 @@ unittest(test_calculate_flags)
unittest_main()
// --------
// --END OF FILE --