434 lines
8.5 KiB
C
Raw Normal View History

2020-11-27 11:16:22 +01:00
#pragma once
2013-09-30 17:16:02 +02:00
//
// FILE: IEEE754tools.h
// AUTHOR: Rob Tillaart
2021-12-20 08:09:14 +01:00
// VERSION: 0.2.3
2020-11-27 11:16:22 +01:00
// PURPOSE: manipulate IEEE754 float numbers fast
// URL: https://github.com/RobTillaart/IEEE754tools.git
2013-09-30 17:16:02 +02:00
//
2021-01-29 12:31:58 +01:00
// EXPERIMENTAL ==> USE WITH CARE
// not tested extensively,
//
// 0.1.00 2013-09-08 initial version
// 0.1.01 2013-09-08 added IEEE_NAN, IEEE_INF tests + version string
// 0.1.02 2013-09-08 added SHIFT_POW2
// 0.1.03 2013-09-10 renamed IEEE_Sign IEEE_Exponent
// 0.2.0 2020-06-30 own repo + some refactor...
2021-12-20 08:09:14 +01:00
// 0.2.1 2020-12-30 Arduino-CI
2021-11-05 17:24:10 +01:00
// 0.2.2 2021-11-05 update Arduino-CI, badges, readme.md
// some testing on ESP32 (no fixing)
2021-12-20 08:09:14 +01:00
// 0.2.3 2021-12-20 update library.json, license, minor edits
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
#include "Arduino.h"
2021-12-20 08:09:14 +01:00
#define IEEE754_VERSION (F("0.2.3"))
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
// (un)comment lines to configure functionality / size
//#define IEEE754_ENABLE_MSB // +78 bytes
2020-11-27 11:16:22 +01:00
2013-09-30 17:16:02 +02:00
// IEEE754 float layout;
struct IEEEfloat
{
uint32_t m:23;
uint8_t e:8;
uint8_t s:1;
};
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
// IEEE754 double layout;
struct IEEEdouble
{
uint64_t m:52;
uint16_t e:11;
uint8_t s:1;
};
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
// Arduino UNO double layout:
// the UNO has no 64 bit double, it is only able to map 23 bits of the mantisse
// a filler is added for the remaining bits. These might be useful in future?
2013-09-30 17:16:02 +02:00
struct _DBL
{
uint32_t filler:29;
uint32_t m:23;
uint16_t e:11;
uint8_t s:1;
};
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
// for packing and unpacking a float
union _FLOATCONV
2013-09-30 17:16:02 +02:00
{
IEEEfloat p;
float f;
byte b[4];
};
2013-09-30 17:16:02 +02:00
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
// for packing and unpacking a double
union _DBLCONV
2013-09-30 17:16:02 +02:00
{
// IEEEdouble p;
_DBL p;
double d; // !! is a 32bit float for UNO.
byte b[8];
};
2013-09-30 17:16:02 +02:00
2020-11-27 11:16:22 +01:00
//
// DEBUG FUNCTIONS
//
2013-09-30 17:16:02 +02:00
// print float components
void dumpFloat(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
Serial.print(x->s, HEX);
Serial.print("\t");
Serial.print(x->e, HEX);
Serial.print("\t");
Serial.println(x->m, HEX);
// Serial.print(" sign: "); Serial.print(x->s);
// Serial.print(" exp: "); Serial.print(x->e);
// Serial.print(" mant: "); Serial.println(x->m);
}
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
// print "double" components
void dumpDBL(struct _DBL dbl)
{
Serial.print(dbl.s, HEX);
Serial.print("\t");
Serial.print(dbl.e, HEX);
Serial.print("\t");
Serial.println(dbl.m, HEX);
}
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
//
// mapping to/from 64bit double - best effort
//
2013-09-30 17:16:02 +02:00
// converts a float to a packed array of 8 bytes representing a 64 bit double
2021-11-05 17:24:10 +01:00
// restriction exponent and mantissa.
2013-09-30 17:16:02 +02:00
// float; array of 8 bytes; LSBFIRST; MSBFIRST
2020-11-27 11:16:22 +01:00
void float2DoublePacked(float number, byte* bar, int byteOrder = LSBFIRST)
2013-09-30 17:16:02 +02:00
{
_FLOATCONV fl;
fl.f = number;
_DBLCONV dbl;
2020-11-27 11:16:22 +01:00
dbl.p.filler = 0;
2013-09-30 17:16:02 +02:00
dbl.p.s = fl.p.s;
2021-01-29 12:31:58 +01:00
dbl.p.e = fl.p.e - 127 + 1023; // exponent adjust
2013-09-30 17:16:02 +02:00
dbl.p.m = fl.p.m;
#ifdef IEEE754_ENABLE_MSB
if (byteOrder == LSBFIRST)
{
#endif
2020-11-27 11:16:22 +01:00
for (int i = 0; i < 8; i++)
2013-09-30 17:16:02 +02:00
{
bar[i] = dbl.b[i];
}
#ifdef IEEE754_ENABLE_MSB
}
else
{
2020-11-27 11:16:22 +01:00
for (int i = 0; i < 8; i++)
2013-09-30 17:16:02 +02:00
{
bar[i] = dbl.b[7-i];
}
}
#endif
}
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
// converts a packed array of bytes into a 32bit float.
// there can be an exponent overflow
2021-11-05 17:24:10 +01:00
// the mantissa is truncated to 23 bits.
2020-11-27 11:16:22 +01:00
float doublePacked2Float(byte* bar, int byteOrder = LSBFIRST)
2013-09-30 17:16:02 +02:00
{
_FLOATCONV fl;
_DBLCONV dbl;
#ifdef IEEE754_ENABLE_MSB
if (byteOrder == LSBFIRST)
{
#endif
2020-11-27 11:16:22 +01:00
for (int i = 0; i < 8; i++)
2013-09-30 17:16:02 +02:00
{
dbl.b[i] = bar[i];
}
#ifdef IEEE754_ENABLE_MSB
}
else
{
2020-11-27 11:16:22 +01:00
for (int i = 0; i < 8; i++)
2013-09-30 17:16:02 +02:00
{
dbl.b[i] = bar[7-i];
}
}
#endif
2020-11-27 11:16:22 +01:00
int e = dbl.p.e - 1023 + 127; // exponent adjust
2013-09-30 17:16:02 +02:00
// TODO check exponent overflow.
if (e >=0 || e <= 255)
{
fl.p.s = dbl.p.s;
fl.p.e = e;
fl.p.m = dbl.p.m; // note this one clips the mantisse
return fl.f;
2013-09-30 17:16:02 +02:00
}
return NAN; // OR +-INF?
// return (fl.p.s) ? -INFINITY : INFINITY;
2013-09-30 17:16:02 +02:00
}
2021-11-05 17:24:10 +01:00
//
// TEST FUNCTIONS
//
// ~1.7x faster
int IEEE_NAN(float number)
{
uint16_t* x = ((uint16_t*) &number + 1);
return ((*x) == 0x7FC0);
}
2021-11-05 17:24:10 +01:00
// ~3.4x faster
int IEEE_INF(float number)
{
uint8_t* x = ((uint8_t*) &number);
if (*(x+2) != 0x80) return 0;
if (*(x+3) == 0x7F) return 1;
if (*(x+3) == 0xFF) return -1;
return 0;
}
2021-11-05 17:24:10 +01:00
// for the real speed freaks, the next
boolean IEEE_PosINF(float number)
{
return (* ((uint16_t*) &number + 1) ) == 0x7F80;
}
boolean IEEE_NegINF(float number)
{
return (* ((uint16_t*) &number + 1) ) == 0xFF80;
}
//
// PROPERTIES
//
uint8_t IEEE_Sign(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
return x->s;
}
int IEEE_Exponent(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
return x->e - 127;
}
uint32_t IEEE_Mantisse(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
return x->m;
}
2021-11-05 17:24:10 +01:00
//
// MATH FUNCTIONS
//
2021-11-05 17:24:10 +01:00
// f = f * 2^n
// factor ~2.7; (tested with *16) more correct than the faster one
2013-09-30 17:17:46 +02:00
float IEEE_POW2(float number, int n)
{
_FLOATCONV fl;
fl.f = number;
2013-09-30 17:17:46 +02:00
int e = fl.p.e + n;
if (e >= 0 && e < 256)
{
fl.p.e = e;
return fl.f;
}
return (fl.p.s) ? -INFINITY : INFINITY;
}
2021-11-05 17:24:10 +01:00
// WARNING no overflow detection in the SHIFT (factor ~3.5)
2013-09-30 17:17:46 +02:00
float IEEE_POW2fast(float number, int n)
{
_FLOATCONV fl;
fl.f = number;
2013-09-30 17:17:46 +02:00
fl.p.e += n;
return fl.f;
}
2021-11-05 17:24:10 +01:00
// - FAILS ON ESP32 (x16 => x256 strange)
float IEEE_FLOAT_POW2fast(float number, int n)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
x->e += n;
return number;
}
2021-11-05 17:24:10 +01:00
// - NOT FASTER
// - FAILS ON ESP32 (==> divides by 4)
float IEEE_FLOAT_DIV2(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
x->e--;
return number;
}
bool IEEE_FLOAT_EQ(float &f, float &g)
{
uint16_t *p = (uint16_t *) &f;
uint16_t *q = (uint16_t *) &g;
return (*p++ == *q++) && (*p++ == *q++);
}
bool IEEE_FLOAT_NEQ(float &f, float &g)
{
uint16_t *p = (uint16_t *) &f;
uint16_t *q = (uint16_t *) &g;
return (*p++ != *q++) || (*p++ != *q++);
}
////////////////////////////////////////////////////////////////////////////////
2013-09-30 17:16:02 +02:00
//
// NOT TESTED FUNCTIONS
//
//
// get truncated part as separate float.
//
void doublePacked2Float2(byte* bar, int byteOrder, float* value, float* error)
{
_FLOATCONV fl;
_DBLCONV dbl;
#ifdef IEEE754_ENABLE_MSB
if (byteOrder == LSBFIRST)
{
#endif
2020-11-27 11:16:22 +01:00
for (int i = 0; i < 8; i++)
{
dbl.b[i] = bar[i];
}
#ifdef IEEE754_ENABLE_MSB
}
else
{
2020-11-27 11:16:22 +01:00
for (int i = 0; i < 8; i++)
{
2020-11-27 11:16:22 +01:00
dbl.b[i] = bar[7 - i];
}
}
#endif
2020-11-27 11:16:22 +01:00
int e = dbl.p.e - 1023 + 127; // exponent adjust
// TODO check exponent overflow.
if (e >=0 || e <= 255)
{
fl.p.s = dbl.p.s;
fl.p.e = e;
fl.p.m = dbl.p.m; // note this one clips the mantisse
*value = fl.f;
fl.p.s = dbl.p.s;
fl.p.e = e-23;
fl.p.m = dbl.p.filler; // note this one clips the mantisse
*error = fl.f;
}
*value = (dbl.p.s) ? -INFINITY : INFINITY;
*error = 0;
}
2013-09-30 17:17:46 +02:00
// what is this???
float IEEE_FLIP(float number)
{
_FLOATCONV fl;
fl.f = number;
fl.p.e = -fl.p.e;
fl.p.m = (0x007FFFFF - fl.p.m);
return fl.f;
}
2013-09-30 17:16:02 +02:00
/*
// ONELINERS to speed up some specific 32 bit float math
// *(((byte*) &number)+3) &= 0x7F; // number == fabs(number);
// x = *(((byte*) &number)+3) & 0x7F; // x = fabs(number);
// GAIN = factor 2
2021-11-05 17:24:10 +01:00
2013-09-30 17:22:41 +02:00
// *(((byte*) &number)+3) |= 0x80; // number == -fabs(number);
2013-09-30 17:16:02 +02:00
// x = *(((byte*) &number)+3) | 0x80; // x == -fabs(number);
// GAIN = factor 2
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
// *(((byte*) &number)+3) ^= 0x80; // number = -number;
// x = *(((byte*) &number)+3) ^ 0x80; // x = -number;
// GAIN = factor 2
2021-11-05 17:24:10 +01:00
2013-09-30 17:16:02 +02:00
// s = *(((uint8_t*) &number)+3) & 0x80; // s = sign(number);
// if ( *(((byte*) &number)+3) & 0x80) x=2; // if (number < 0) x=2;
// GAIN = factor 5
// no speed optimize found for
boolean IEEE_ZERO(float number)
{
return (* ((uint32_t*) &number) ) & 0x7FFFFFFF;
}
bool IEEE_LESS(float f, float g)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&f);
IEEEfloat* y = (IEEEfloat*) ((void*)&g);
if (x->s > y->s) return 1;
if (x->s < y->s) return 0;
if (x->e < y->e) return 1;
if (x->e > y->e) return 0;
if (x->m < y->m) return 1;
return 0;
}
2021-11-05 17:24:10 +01:00
bool IEEE_FLOAT_EQ(float &f, float &g)
2013-09-30 17:16:02 +02:00
{
2021-11-05 17:24:10 +01:00
not fast enough
return (memcmp(&f, &g, 4) == 0);
return (* ((uint32_t *) &f) - * ((uint32_t *) &g)) == 0 ;
2013-09-30 17:16:02 +02:00
}
*/
2020-11-27 11:16:22 +01:00
2021-11-05 17:24:10 +01:00
2020-11-27 11:16:22 +01:00
// -- END OF FILE --
2021-11-05 17:24:10 +01:00