0.2.2 Correlation

This commit is contained in:
rob tillaart 2022-06-21 08:22:05 +02:00
parent 0208c0e5d5
commit 1f2f8cef25
14 changed files with 146 additions and 70 deletions

View File

@ -1,10 +1,14 @@
//
// FILE: Correlation.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.2.1
// VERSION: 0.2.2
// PURPOSE: Arduino Library to determine correlation between X and Y dataset
//
// HISTORY:
//
// 0.2.2 2022-06-20 optimize getEstimateX() to match getEstimateY();
// optimize averaging within calculate.
// prepare renaming (5) functions in 0.3.0
// 0.2.1 2021-12-14 update library.json, license, minor edits
// 0.2.0 2021-08-26 Add flags to skip Rsquared and Esquared calculation
// will improve performance calculate
@ -50,6 +54,7 @@ void Correlation::clear()
_avgY = 0;
_a = 0;
_b = 0;
_div_b = -1; // as 1/_b is undefined
_r = 0;
_sumErrorSquare = 0;
_sumXiYi = 0;
@ -81,21 +86,22 @@ bool Correlation::calculate(bool forced)
if (_count == 0) return false;
if (! (_needRecalculate || forced)) return true;
// CALC AVERAGE X, AVERAGE Y
// CALC AVERAGE X, AVERAGE Y
float avgx = 0;
float avgy = 0;
float div_count = 1.0 / _count; // speed up averaging
for (uint8_t i = 0; i < _count; i++)
{
avgx += _x[i];
avgy += _y[i];
}
avgx /= _count;
avgy /= _count;
avgx *= div_count;
avgy *= div_count;
_avgX = avgx;
_avgY = avgy;
// CALC A and B ==> formula Y = A + B*X
// CALC A and B ==> formula Y = A + B * X
float sumXiYi = 0;
float sumXi2 = 0;
float sumYi2 = 0;
@ -112,14 +118,15 @@ bool Correlation::calculate(bool forced)
_a = a;
_b = b;
_div_b = 1.0 / b;
_sumXiYi = sumXiYi;
_sumXi2 = sumXi2;
_sumYi2 = sumYi2;
if (_doR2 == true)
{
// R is calculated instead of rSquared so we do not loose the sign.
// Rsquared from R is much faster than R from Rsquared.
// R is calculated instead of rSquared so we do not loose the sign.
// Rsquared from R is much faster than R from Rsquared.
_r = sumXiYi / sqrt(sumXi2 * sumYi2);
}
@ -151,13 +158,13 @@ float Correlation::getEstimateX(float y)
{
if (_count == 0) return NAN;
if (_needRecalculate) calculate();
return (y - _a) / _b;
return (y - _a) * _div_b;
}
//////////////////////////////////////////////////////
//
// STATISTICAL
// STATISTICAL
//
float Correlation::getMaxX()
{
@ -209,7 +216,7 @@ float Correlation::getMinY()
//////////////////////////////////////////////////////
//
// DEBUGGING - access to internal arrays.
// DEBUGGING - access to internal arrays.
//
bool Correlation::setXY(uint8_t index, float x, float y)
{

View File

@ -2,7 +2,7 @@
//
// FILE: Correlation.h
// AUTHOR: Rob Tillaart
// VERSION: 0.2.1
// VERSION: 0.2.2
// PURPOSE: Calculate Correlation from a small dataset.
// HISTORY: See Correlation.cpp
//
@ -11,7 +11,7 @@
#include "Arduino.h"
#define CORRELATION_LIB_VERSION (F("0.2.1"))
#define CORRELATION_LIB_VERSION (F("0.2.2"))
class Correlation
@ -72,8 +72,10 @@ public:
// get the average values of the datasets (if count > 0)
float getAvgX() { return _avgX; };
float getAvgY() { return _avgY; };
float getAverageX(){ return _avgX; }; // will replace getAvgX() in time
float getAverageY(){ return _avgY; }; // will replace getAvgY() in time
float getAvgX() { return _avgX; }; // will be obsolete in future
float getAvgY() { return _avgY; }; // will be obsolete in future
// based on the dataset get the estimated values for X and Y
@ -97,9 +99,12 @@ public:
float getX(uint8_t index); // idem
float getY(uint8_t index); // idem
float getSumXiYi() { return _sumXiYi; };
float getSumXi2() { return _sumXi2; };
float getSumYi2() { return _sumYi2; };
float getSumXY() { return _sumXiYi; }; // replaces getSumXiYi()
float getSumX2() { return _sumXi2; }; // replaces getSumXi2()
float getSumY2() { return _sumYi2; }; // replaces getSumYi2()
float getSumXiYi() { return _sumXiYi; }; // obsolete in version 0.3.0
float getSumXi2() { return _sumXi2; }; // obsolete in version 0.3.0
float getSumYi2() { return _sumYi2; }; // obsolete in version 0.3.0
private:
@ -118,6 +123,7 @@ private:
float _avgY;
float _a;
float _b;
float _div_b;
float _r;
float _sumErrorSquare;
float _sumXiYi;

View File

@ -68,8 +68,8 @@ Most often the Rsquared **R x R** is used.
- **float getRsquare()** returns **R x R** which is always between 0.. 1.
- **float getEsquare()** returns the error squared to get an indication of the
quality of the correlation.
- **float getAvgX()** returns the average of all elements in the X dataset.
- **float getAvgY()** returns the average of all elements in the Y dataset.
- **float getAverageX()** returns the average of all elements in the X dataset.
- **float getAverageY()** returns the average of all elements in the Y dataset.
- **float getEstimateX(float y)** use to calculate the estimated X for a given Y.
- **float getEstimateY(float x)** use to calculate the estimated Y for a given X.
@ -129,6 +129,17 @@ Returns true if succeeded.
- **bool setY(uint8_t index, float y)** overwrites single Y.
- **float getX(uint8_t index)** returns single value.
- **float getY(uint8_t index)** returns single value.
- **float getSumXY()** returns sum(Xi \* Yi).
- **float getSumX2()** returns sum(Xi \* Xi).
- **float getSumY2()** returns sum(Yi \* Yi).
### Obsolete in 0.3.0
To improve readability the following functions are replaced
- **float getAvgX()** returns average X.
- **float getAvgY()** returns average Y.
- **float getSumXiYi()** returns sum(Xi \* Yi).
- **float getSumXi2()** returns sum(Xi \* Xi).
- **float getSumYi2()** returns sum(Yi \* Yi).
@ -136,18 +147,20 @@ Returns true if succeeded.
## Future
- Template version
- Template version?
The constructor should get a TYPE parameter, as this
allows smaller data types to be analysed taking less memory.
- naming of some functions - 0.3.0
- **getAverageX()** instead of **getAvgX()** less cryptic
- **getSumXY() getSumX2() getSumY2()** the i is not functional
- **getMinimumX()** ???
- **getRsquared()** with d ???
- examples
- real world if possible.
### 0.3.0
- fix naming in examples as some function names are replaced.
- are the getSUmXiYi indeed worse?
## Operation
See example

View File

@ -2,7 +2,6 @@
// FILE: correlation_debugging.ino
// AUTHOR: Rob Tillaart
// DATE: 2020-05-17
// VERSION: 0.1.0
// PUPROSE: demo of the Correlation Library
//
@ -151,4 +150,3 @@ void loop()
// -- END OF FILE --

View File

@ -2,7 +2,6 @@
// FILE: demo0.ino
// AUTHOR: Rob Tillaart
// DATE: 2020-05-17
// VERSION: 0.1.0
// PUPROSE: demo of the Correlation Library
//

View File

@ -2,7 +2,6 @@
// FILE: correlation_demo1.ino
// AUTHOR: Rob Tillaart
// DATE: 2020-05-17
// VERSION: 0.1.0
// PUPROSE: demo of the Correlation Library
//

View File

@ -2,7 +2,6 @@
// FILE: correlation_performance.ino
// AUTHOR: Rob Tillaart
// DATE: 2020-05-18
// VERSION: 0.1.1
// PUPROSE: demo of the Correlation Library
// performance test: only ADD and CALCULATE as these are the most used
@ -27,8 +26,9 @@ void setup()
Serial.println("ADD");
delay(10);
delay(100);
C.clear();
sum = 0;
for (int i = 0; i < 20; i++)
{
f = i * 40 + 0.1 * random(10);
@ -41,7 +41,7 @@ void setup()
Serial.println("\nCALCULATE - needed ");
delay(10);
delay(100);
start = micros();
C.calculate();
stop = micros();
@ -49,7 +49,7 @@ void setup()
Serial.println("\nCALCULATE - no new values added");
delay(10);
delay(100);
start = micros();
C.calculate();
stop = micros();
@ -57,7 +57,7 @@ void setup()
Serial.println("\ngetEstimateX");
delay(10);
delay(100);
start = micros();
f = C.getEstimateX(42);
stop = micros();
@ -65,7 +65,7 @@ void setup()
Serial.println("\ngetEstimateY");
delay(10);
delay(100);
start = micros();
f = C.getEstimateY(42);
stop = micros();
@ -73,17 +73,25 @@ void setup()
Serial.println("\ngetMaxX");
delay(10);
delay(100);
start = micros();
f = C.getMaxX();
stop = micros();
Serial.println(stop - start);
Serial.println("\n===================================\n0.2.0 \n");
Serial.println("\ngetMinX");
delay(100);
start = micros();
f = C.getMinX();
stop = micros();
Serial.println(stop - start);
Serial.println("ADD");
delay(10);
Serial.println("\n===================================\n\n");
Serial.println("ADD - fill arrays again");
delay(100);
C.clear();
sum = 0;
for (int i = 0; i < 20; i++)
{
f = i * 40 + 0.1 * random(10);
@ -100,7 +108,7 @@ void setup()
C.setE2Calculation(false);
Serial.println("\nCALCULATE - needed ");
delay(10);
delay(100);
start = micros();
C.calculate();
stop = micros();
@ -108,7 +116,7 @@ void setup()
Serial.println("\nCALCULATE - no new values added");
delay(10);
delay(100);
start = micros();
C.calculate();
stop = micros();
@ -116,7 +124,7 @@ void setup()
Serial.println("\nCALCULATE - no new values added but forced");
delay(10);
delay(100);
start = micros();
C.calculate(true);
stop = micros();

View File

@ -0,0 +1,40 @@
CORRELATION_LIB_VERSION: 0.2.2
ADD
11.60
CALCULATE - needed
2764
CALCULATE - no new values added
8
getEstimateX
20
getEstimateY
20
getMaxX
84
getMinX
84
===================================
ADD - fill arrays again
11.60
disable R2 and E2 math from calculate
CALCULATE - needed
1780
CALCULATE - no new values added
12
CALCULATE - no new values added but forced
1780
Done...

View File

@ -2,7 +2,6 @@
// FILE: correlation_statistical.ino
// AUTHOR: Rob Tillaart
// DATE: 2020-05-17
// VERSION: 0.1.0
// PUPROSE: demo of the Correlation Library
//

View File

@ -2,7 +2,6 @@
// FILE: correlation_test.ino
// AUTHOR: Rob Tillaart
// DATE: 2020-05-18
// VERSION: 0.1.1
// PUPROSE: demo of the Correlation Library
// performance test: only ADD and CALCULATE as these are the most used
@ -84,27 +83,27 @@ void loop()
void test()
{
C.clear();
C.add(2, 7);
C.add(3, 9);
C.add(4, 10);
C.add(5, 14);
C.add(6, 15);
C.calculate();
Serial.println("---------------");
Serial.println(C.getSumXiYi(), 6);
Serial.println(C.getSumXi2(), 6);
Serial.println(C.getSumYi2(), 6);
Serial.println(C.getRsquare(), 6);
Serial.println(C.getR(), 6);
Serial.print("A: ");
Serial.println(C.getA());
Serial.print("B: ");
Serial.println(C.getB());
Serial.println();
C.clear();
C.add(2, 7);
C.add(3, 9);
C.add(4, 10);
C.add(5, 14);
C.add(6, 15);
C.calculate();
Serial.println("---------------");
Serial.println(C.getSumXiYi(), 6);
Serial.println(C.getSumXi2(), 6);
Serial.println(C.getSumYi2(), 6);
Serial.println(C.getRsquare(), 6);
Serial.println(C.getR(), 6);
Serial.print("A: ");
Serial.println(C.getA());
Serial.print("B: ");
Serial.println(C.getB());
Serial.println();
// assertEqualFloat(2.6, C.getA(), 0.0001);
// assertEqualFloat(2.1, C.getB(), 0.0001);

View File

@ -2,7 +2,6 @@
// FILE: demo_running_correlation.ino.ino
// AUTHOR: Rob Tillaart
// DATE: 2020-05-18
// VERSION: 0.1.0
// PUPROSE: demo of the Correlation Library
//

View File

@ -25,8 +25,8 @@ getR KEYWORD2
getRsquare KEYWORD2
getEsquare KEYWORD2
getAvgX KEYWORD2
getAvgY KEYWORD2
getAverageX KEYWORD2
getAverageY KEYWORD2
getEstimateY KEYWORD2
getEstimateX KEYWORD2
@ -41,6 +41,15 @@ setY KEYWORD2
getX KEYWORD2
getY KEYWORD2
getSumXY KEYWORD2
getSumX2 KEYWORD2
getSumY2 KEYWORD2
### remove 0.3.0
getAvgX KEYWORD2
getAvgY KEYWORD2
getSumXiYi KEYWORD2
getSumXi2 KEYWORD2
getSumYi2 KEYWORD2

View File

@ -15,7 +15,7 @@
"type": "git",
"url": "https://github.com/RobTillaart/Correlation.git"
},
"version": "0.2.1",
"version": "0.2.2",
"license": "MIT",
"frameworks": "arduino",
"platforms": "*",

View File

@ -1,5 +1,5 @@
name=Correlation
version=0.2.1
version=0.2.2
author=Rob Tillaart <rob.tillaart@gmail.com>
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
sentence=Arduino Library to determine correlation between X and Y dataset