2022-11-13 09:47:33 +01:00

430 lines
8.2 KiB
C

#pragma once
//
// FILE: IEEE754tools.h
// AUTHOR: Rob Tillaart
// VERSION: 0.2.4
// PURPOSE: manipulate IEEE754 float numbers fast
// URL: https://github.com/RobTillaart/IEEE754tools.git
//
// HISTORY: see changelog.md
//
// EXPERIMENTAL ==> USE WITH CARE
// not tested extensively,
#include "Arduino.h"
#define IEEE754_VERSION (F("0.2.4"))
// (un)comment lines to configure functionality / size
//#define IEEE754_ENABLE_MSB // +78 bytes
// IEEE754 float layout;
struct IEEEfloat
{
uint32_t m:23;
uint8_t e:8;
uint8_t s:1;
};
// IEEE754 double layout;
struct IEEEdouble
{
uint64_t m:52;
uint16_t e:11;
uint8_t s:1;
};
// Arduino UNO double layout:
// the UNO has no 64 bit double, it is only able to map 23 bits of the mantisse
// a filler is added for the remaining bits. These might be useful in future?
struct _DBL
{
uint32_t filler:29;
uint32_t m:23;
uint16_t e:11;
uint8_t s:1;
};
// for packing and unpacking a float
union _FLOATCONV
{
IEEEfloat p;
float f;
byte b[4];
};
// for packing and unpacking a double
union _DBLCONV
{
// IEEEdouble p;
_DBL p;
double d; // !! is a 32bit float for UNO.
byte b[8];
};
//
// DEBUG FUNCTIONS
//
// print float components
void dumpFloat(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
Serial.print(x->s, HEX);
Serial.print("\t");
Serial.print(x->e, HEX);
Serial.print("\t");
Serial.println(x->m, HEX);
// Serial.print(" sign: "); Serial.print(x->s);
// Serial.print(" exp: "); Serial.print(x->e);
// Serial.print(" mant: "); Serial.println(x->m);
}
// print "double" components
void dumpDBL(struct _DBL dbl)
{
Serial.print(dbl.s, HEX);
Serial.print("\t");
Serial.print(dbl.e, HEX);
Serial.print("\t");
Serial.println(dbl.m, HEX);
}
//
// mapping to/from 64bit double - best effort
//
// converts a float to a packed array of 8 bytes representing a 64 bit double
// restriction exponent and mantissa.
// float; array of 8 bytes; LSBFIRST; MSBFIRST
void float2DoublePacked(float number, byte* bar, int byteOrder = LSBFIRST)
{
_FLOATCONV fl;
// prevent warning/error on ESP build
fl.p.s = 0;
fl.p.e = 0;
fl.p.m = 0;
fl.f = number;
_DBLCONV dbl;
dbl.p.filler = 0;
dbl.p.s = fl.p.s;
dbl.p.e = fl.p.e - 127 + 1023; // exponent adjust
dbl.p.m = fl.p.m;
#ifdef IEEE754_ENABLE_MSB
if (byteOrder == LSBFIRST)
{
#endif
for (int i = 0; i < 8; i++)
{
bar[i] = dbl.b[i];
}
#ifdef IEEE754_ENABLE_MSB
}
else
{
for (int i = 0; i < 8; i++)
{
bar[i] = dbl.b[7-i];
}
}
#endif
}
// converts a packed array of bytes into a 32bit float.
// there can be an exponent overflow
// the mantissa is truncated to 23 bits.
float doublePacked2Float(byte* bar, int byteOrder = LSBFIRST)
{
_FLOATCONV fl;
_DBLCONV dbl;
#ifdef IEEE754_ENABLE_MSB
if (byteOrder == LSBFIRST)
{
#endif
for (int i = 0; i < 8; i++)
{
dbl.b[i] = bar[i];
}
#ifdef IEEE754_ENABLE_MSB
}
else
{
for (int i = 0; i < 8; i++)
{
dbl.b[i] = bar[7-i];
}
}
#endif
int e = dbl.p.e - 1023 + 127; // exponent adjust
// TODO check exponent overflow.
if (e >=0 || e <= 255)
{
fl.p.s = dbl.p.s;
fl.p.e = e;
fl.p.m = dbl.p.m; // note this one clips the mantisse
return fl.f;
}
return NAN; // OR +-INF?
// return (fl.p.s) ? -INFINITY : INFINITY;
}
//
// TEST FUNCTIONS
//
// ~1.7x faster
int IEEE_NAN(float number)
{
uint16_t* x = ((uint16_t*) &number + 1);
return ((*x) == 0x7FC0);
}
// ~3.4x faster
int IEEE_INF(float number)
{
uint8_t* x = ((uint8_t*) &number);
if (*(x+2) != 0x80) return 0;
if (*(x+3) == 0x7F) return 1;
if (*(x+3) == 0xFF) return -1;
return 0;
}
// for the real speed freaks, the next
boolean IEEE_PosINF(float number)
{
return (* ((uint16_t*) &number + 1) ) == 0x7F80;
}
boolean IEEE_NegINF(float number)
{
return (* ((uint16_t*) &number + 1) ) == 0xFF80;
}
//
// PROPERTIES
//
uint8_t IEEE_Sign(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
return x->s;
}
int IEEE_Exponent(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
return x->e - 127;
}
uint32_t IEEE_Mantisse(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
return x->m;
}
//
// MATH FUNCTIONS
//
// f = f * 2^n
// factor ~2.7; (tested with *16) more correct than the faster one
float IEEE_POW2(float number, int n)
{
_FLOATCONV fl;
fl.f = number;
int e = fl.p.e + n;
if (e >= 0 && e < 256)
{
fl.p.e = e;
return fl.f;
}
return (fl.p.s) ? -INFINITY : INFINITY;
}
// WARNING no overflow detection in the SHIFT (factor ~3.5)
float IEEE_POW2fast(float number, int n)
{
_FLOATCONV fl;
fl.f = number;
fl.p.e += n;
return fl.f;
}
// - FAILS ON ESP32 (x16 => x256 strange)
float IEEE_FLOAT_POW2fast(float number, int n)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
x->e += n;
return number;
}
// - NOT FASTER
// - FAILS ON ESP32 (==> divides by 4)
float IEEE_FLOAT_DIV2(float number)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&number);
x->e--;
return number;
}
bool IEEE_FLOAT_EQ(float &f, float &g)
{
uint16_t *p = (uint16_t *) &f;
uint16_t *q = (uint16_t *) &g;
return (*p++ == *q++) && (*p++ == *q++);
}
bool IEEE_FLOAT_NEQ(float &f, float &g)
{
uint16_t *p = (uint16_t *) &f;
uint16_t *q = (uint16_t *) &g;
return (*p++ != *q++) || (*p++ != *q++);
}
////////////////////////////////////////////////////////////////////////////////
//
// NOT TESTED FUNCTIONS
//
//
// get truncated part as separate float.
//
void doublePacked2Float2(byte* bar, int byteOrder, float* value, float* error)
{
_FLOATCONV fl;
_DBLCONV dbl;
#ifdef IEEE754_ENABLE_MSB
if (byteOrder == LSBFIRST)
{
#endif
for (int i = 0; i < 8; i++)
{
dbl.b[i] = bar[i];
}
#ifdef IEEE754_ENABLE_MSB
}
else
{
for (int i = 0; i < 8; i++)
{
dbl.b[i] = bar[7 - i];
}
}
#endif
int e = dbl.p.e - 1023 + 127; // exponent adjust
// TODO check exponent overflow.
if (e >=0 || e <= 255)
{
fl.p.s = dbl.p.s;
fl.p.e = e;
fl.p.m = dbl.p.m; // note this one clips the mantisse
*value = fl.f;
fl.p.s = dbl.p.s;
fl.p.e = e-23;
fl.p.m = dbl.p.filler; // note this one clips the mantisse
*error = fl.f;
}
*value = (dbl.p.s) ? -INFINITY : INFINITY;
*error = 0;
}
// what is this???
float IEEE_FLIP(float number)
{
_FLOATCONV fl;
fl.f = number;
fl.p.e = -fl.p.e;
fl.p.m = (0x007FFFFF - fl.p.m);
return fl.f;
}
/*
// ONELINERS to speed up some specific 32 bit float math
// *(((byte*) &number)+3) &= 0x7F; // number == fabs(number);
// x = *(((byte*) &number)+3) & 0x7F; // x = fabs(number);
// GAIN = factor 2
// *(((byte*) &number)+3) |= 0x80; // number == -fabs(number);
// x = *(((byte*) &number)+3) | 0x80; // x == -fabs(number);
// GAIN = factor 2
// *(((byte*) &number)+3) ^= 0x80; // number = -number;
// x = *(((byte*) &number)+3) ^ 0x80; // x = -number;
// GAIN = factor 2
// s = *(((uint8_t*) &number)+3) & 0x80; // s = sign(number);
// if ( *(((byte*) &number)+3) & 0x80) x=2; // if (number < 0) x=2;
// GAIN = factor 5
// no speed optimize found for
boolean IEEE_ZERO(float number)
{
return (* ((uint32_t*) &number) ) & 0x7FFFFFFF;
}
bool IEEE_LESS(float f, float g)
{
IEEEfloat* x = (IEEEfloat*) ((void*)&f);
IEEEfloat* y = (IEEEfloat*) ((void*)&g);
if (x->s > y->s) return 1;
if (x->s < y->s) return 0;
if (x->e < y->e) return 1;
if (x->e > y->e) return 0;
if (x->m < y->m) return 1;
return 0;
}
bool IEEE_FLOAT_EQ(float &f, float &g)
{
not fast enough
return (memcmp(&f, &g, 4) == 0);
return (* ((uint32_t *) &f) - * ((uint32_t *) &g)) == 0 ;
}
*/
// -- END OF FILE --