mirror of
https://github.com/RobTillaart/Arduino.git
synced 2024-10-03 18:09:02 -04:00
0.2.0 float16ext
This commit is contained in:
parent
c6851e9ae3
commit
82f164b6bb
@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
||||
and this project adheres to [Semantic Versioning](http://semver.org/).
|
||||
|
||||
|
||||
## [0.2.0] - 2024-04-18
|
||||
- sync with float16 version 0.3.0
|
||||
- remove printable interface to fix issue #12 float 16 (footprint).
|
||||
|
||||
----
|
||||
|
||||
## [0.1.0] - 2024-03-06
|
||||
- initial version based upon float16
|
||||
|
||||
|
@ -19,17 +19,33 @@ This **experimental** library defines the float16ext (2 byte) data type, includi
|
||||
function to and from float32 type. It is an extension to the float16 library.
|
||||
Reference -https://en.wikipedia.org/wiki/Half-precision_floating-point_format#ARM_alternative_half-precision
|
||||
|
||||
The library implements the **Printable** interface so one can directly print the
|
||||
float16ext values in any stream e.g. Serial.
|
||||
|
||||
The primary usage of the float16ext data type is to efficiently store and transport
|
||||
a floating point number. As it uses only 2 bytes where float and double have typical
|
||||
4 and 8 bytes, gains can be made at the price of range and precision.
|
||||
|
||||
Note that float16ext only has ~3 significant digits.
|
||||
|
||||
To print a float16, one need to convert it with toFloat(), toDouble() or toString(decimals).
|
||||
The latter allows concatenation and further conversion to an char array.
|
||||
|
||||
#### Difference with float16
|
||||
In pre 0.3.0 version the Printable interface was implemented, but it has been removed
|
||||
as it caused excessive memory usage when declaring arrays of float16.
|
||||
|
||||
|
||||
#### ARM alternative half-precision
|
||||
|
||||
-https://en.wikipedia.org/wiki/Half-precision_floating-point_format#ARM_alternative_half-precision
|
||||
|
||||
_ARM processors support (via a floating point control register bit)
|
||||
an "alternative half-precision" format, which does away with the
|
||||
special case for an exponent value of 31 (111112).[10] It is almost
|
||||
identical to the IEEE format, but there is no encoding for infinity or NaNs;
|
||||
instead, an exponent of 31 encodes normalized numbers in the range 65536 to 131008._
|
||||
|
||||
Implemented in https://github.com/RobTillaart/float16ext class.
|
||||
|
||||
|
||||
#### Difference with float16 and float16ext
|
||||
|
||||
The float16ext library has an extended range as it supports values from +- 65504
|
||||
to +- 131008.
|
||||
@ -44,25 +60,73 @@ Although they share a lot of code float16 and float16ext should not be mixed.
|
||||
In the future these libraries might merge / derive one from the other.
|
||||
|
||||
|
||||
#### Breaking change 0.2.0
|
||||
|
||||
Version 0.3.0 has a breaking change. The **Printable** interface is removed as
|
||||
it causes larger than expected arrays of float 16 (See #16). On ESP8266 every
|
||||
float16 object was 8 bytes and on AVR it was 5 bytes instead of the expected 2 bytes.
|
||||
|
||||
To support printing the class added two new conversion functions:
|
||||
```cpp
|
||||
f16.toFloat();
|
||||
f16.toString(decimals);
|
||||
|
||||
Serial.println(f16.toFloat(), 4);
|
||||
Serial.println(f16.toString(4));
|
||||
```
|
||||
This keeps printing relative easy.
|
||||
|
||||
The footprint of the library is now smaller and one can now create compact array's
|
||||
of float16 elements using only 2 bytes per element.
|
||||
|
||||
## Specifications
|
||||
|
||||
layout is same as float16.
|
||||
layout is same as float16, however the range is different.
|
||||
|
||||
| attribute | value | notes |
|
||||
|:----------|:-------------|:--------|
|
||||
| size | 2 bytes | layout s eeeee mmmmmmmmmm (1, 5, 10)
|
||||
| sign | 1 bit |
|
||||
| exponent | 5 bit |
|
||||
| mantissa | 10 bit | ~ 3 digits
|
||||
| minimum | 5.96046 E−8 | smallest positive number.
|
||||
| | 1.0009765625 | 1 + 2^−10 = smallest number larger than 1.
|
||||
| maximum | 131008 |
|
||||
| | |
|
||||
| Attribute | Value | Notes |
|
||||
|:------------|:----------------|:--------|
|
||||
| size | 2 bytes | layout s eeeee mmmmmmmmmm (1, 5, 10)
|
||||
| sign | 1 bit |
|
||||
| exponent | 5 bit |
|
||||
| mantissa | 10 bit | 3 - 4 digits
|
||||
| minimum | ±5.96046 E−8 | smallest number.
|
||||
| | ±1.0009765625 | 1 + 2^−10 = smallest number larger than 1.
|
||||
| maximum | ±131008 |
|
||||
| | |
|
||||
|
||||
± = ALT 0177
|
||||
|
||||
|
||||
#### Example values
|
||||
|
||||
Source: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
|
||||
|
||||
```cpp
|
||||
/*
|
||||
SIGN EXP MANTISSA
|
||||
0 01111 0000000000 = 1
|
||||
0 01111 0000000001 = 1 + 2−10 = 1.0009765625 (next smallest float after 1)
|
||||
1 10000 0000000000 = −2
|
||||
|
||||
0 11110 1111111111 = 65504 (max half precision)
|
||||
|
||||
0 00001 0000000000 = 2−14 ≈ 6.10352 × 10−5 (minimum positive normal)
|
||||
0 00000 1111111111 = 2−14 - 2−24 ≈ 6.09756 × 10−5 (maximum subnormal)
|
||||
0 00000 0000000001 = 2−24 ≈ 5.96046 × 10−8 (minimum positive subnormal)
|
||||
|
||||
0 00000 0000000000 = 0
|
||||
1 00000 0000000000 = −0
|
||||
|
||||
0 01101 0101010101 = 0.333251953125 ≈ 1/3
|
||||
*/
|
||||
```
|
||||
|
||||
|
||||
#### Related
|
||||
|
||||
- https://github.com/RobTillaart/float16
|
||||
- https://github.com/RobTillaart/float16ext
|
||||
- https://github.com/RobTillaart/fraction
|
||||
- https://en.wikipedia.org/wiki/Half-precision_floating-point_format
|
||||
|
||||
|
||||
@ -74,28 +138,35 @@ layout is same as float16.
|
||||
|
||||
#### Constructors
|
||||
|
||||
- **float16ext(void)** defaults to zero.
|
||||
- **float16ext(void)** defaults value to zero.
|
||||
- **float16ext(double f)** constructor.
|
||||
- **float16ext(const float16ext &f)** copy constructor.
|
||||
|
||||
|
||||
#### Conversion
|
||||
|
||||
- **double toDouble(void)** convert to double (or float).
|
||||
- **double toDouble(void)** convert value to double or float (if the same e.g. UNO).
|
||||
- **float toFloat(void)** convert value to float.
|
||||
- **String toString(unsigned int decimals = 2)** convert value to a String with decimals.
|
||||
Please note that the accuracy is only 3-4 digits for the whole number so use decimals
|
||||
with care.
|
||||
|
||||
|
||||
#### Export and store
|
||||
|
||||
To serialize the internal format e.g. to disk, two helper functions are available.
|
||||
|
||||
- **uint16_t getBinary()** get the 2 byte binary representation.
|
||||
- **void setBinary(uint16_t u)** set the 2 bytes binary representation.
|
||||
- **size_t printTo(Print& p) const** Printable interface.
|
||||
- **void setDecimals(uint8_t d)** idem, used for printTo.
|
||||
- **uint8_t getDecimals()** idem.
|
||||
|
||||
Note the setDecimals takes one byte per object which is not efficient for arrays of float16ext.
|
||||
See array example for efficient storage using set/getBinary() functions.
|
||||
|
||||
|
||||
#### Compare
|
||||
|
||||
Standard compare functions. Since 0.1.5 these are quite optimized,
|
||||
so it is fast to compare e.g. 2 measurements.
|
||||
The library implement the standard compare functions.
|
||||
These are optimized, so it is fast to compare 2 float16ext values.
|
||||
|
||||
Note: comparison with a float or double always include a conversion.
|
||||
You can improve performance by converting e.g. a threshold only once before comparison.
|
||||
|
||||
- **bool operator == (const float16ext& f)**
|
||||
- **bool operator != (const float16ext& f)**
|
||||
@ -120,14 +191,14 @@ Not planned to optimize these.
|
||||
- **float16ext& operator \*= (const float16ext& f)**
|
||||
- **float16ext& operator /= (const float16ext& f)**
|
||||
|
||||
negation operator.
|
||||
Negation operator.
|
||||
- **float16ext operator - ()** fast negation.
|
||||
|
||||
Math helpers.
|
||||
- **int sign()** returns 1 == positive, 0 == zero, -1 == negative.
|
||||
- **bool isZero()** returns true if zero. slightly faster than **sign()**.
|
||||
|
||||
|
||||
## Notes
|
||||
The float16ext does not support INF or NAN.
|
||||
|
||||
|
||||
## Future
|
||||
@ -135,7 +206,7 @@ negation operator.
|
||||
#### Must
|
||||
|
||||
- update documentation.
|
||||
- keep in sync with float16 lib
|
||||
- keep in sync with float16 lib where possible.
|
||||
|
||||
#### Should
|
||||
|
||||
|
@ -21,8 +21,6 @@ void setup()
|
||||
// Serial.print("FLOAT16EXT_LIB_VERSION: ");
|
||||
// Serial.println(FLOAT16EXT_LIB_VERSION);
|
||||
|
||||
f16.setDecimals(10);
|
||||
|
||||
print_all();
|
||||
|
||||
Serial.println("\ndone");
|
||||
@ -43,7 +41,7 @@ void print_all()
|
||||
f16.setBinary(x);
|
||||
Serial.print(x);
|
||||
Serial.print("\t");
|
||||
Serial.print(f16);
|
||||
Serial.print(f16.toString(4));
|
||||
Serial.println();
|
||||
}
|
||||
}
|
||||
|
@ -21,8 +21,6 @@ void setup()
|
||||
Serial.print("FLOAT16EXT_LIB_VERSION: ");
|
||||
Serial.println(FLOAT16EXT_LIB_VERSION);
|
||||
|
||||
f16.setDecimals(10);
|
||||
|
||||
print_all();
|
||||
|
||||
Serial.println("\ndone");
|
||||
@ -47,7 +45,7 @@ void print_all()
|
||||
Serial.print("\t");
|
||||
Serial.print(f16.toDouble(), 10);
|
||||
Serial.print("\t");
|
||||
Serial.print(f16);
|
||||
Serial.print(f16.toString(10));
|
||||
Serial.println();
|
||||
}
|
||||
|
||||
@ -59,7 +57,7 @@ void print_all()
|
||||
Serial.print("\t");
|
||||
Serial.print(f16.toDouble(), 10);
|
||||
Serial.print("\t");
|
||||
Serial.print(f16);
|
||||
Serial.print(f16.toString(10));
|
||||
Serial.println();
|
||||
}
|
||||
}
|
||||
|
@ -21,8 +21,6 @@ void setup()
|
||||
Serial.print("FLOAT16EXT_LIB_VERSION: ");
|
||||
Serial.println(FLOAT16EXT_LIB_VERSION);
|
||||
|
||||
f16.setDecimals(6);
|
||||
|
||||
test_all();
|
||||
|
||||
Serial.println("\ndone");
|
||||
|
@ -1,7 +1,7 @@
|
||||
//
|
||||
// FILE: float16ext.cpp
|
||||
// AUTHOR: Rob Tillaart
|
||||
// VERSION: 0.1.0
|
||||
// VERSION: 0.2.0
|
||||
// PURPOSE: library for Float16s for Arduino
|
||||
// URL: http://en.wikipedia.org/wiki/Half-precision_floating-point_format
|
||||
|
||||
@ -15,18 +15,26 @@ float16ext::float16ext(double f)
|
||||
_value = f32tof16(f);
|
||||
}
|
||||
|
||||
// PRINTING
|
||||
size_t float16ext::printTo(Print& p) const
|
||||
{
|
||||
double d = this->f16tof32(_value);
|
||||
return p.print(d, _decimals);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
//
|
||||
// CONVERTING & PRINTING
|
||||
//
|
||||
double float16ext::toDouble() const
|
||||
{
|
||||
return f16tof32(_value);
|
||||
}
|
||||
|
||||
float float16ext::toFloat() const
|
||||
{
|
||||
return f16tof32(_value);
|
||||
}
|
||||
|
||||
String float16ext::toString(unsigned int decimals) const
|
||||
{
|
||||
return String((double)f16tof32(_value), decimals);
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
//
|
||||
@ -172,12 +180,7 @@ float float16ext::f16tof32(uint16_t _value) const
|
||||
{
|
||||
return sgn ? -0 : 0;
|
||||
}
|
||||
// NAN & INF
|
||||
// if (exp == 0x001F)
|
||||
// {
|
||||
// if (man == 0) return sgn ? -INFINITY : INFINITY;
|
||||
// else return NAN;
|
||||
// }
|
||||
// NAN & INF not supported
|
||||
|
||||
// NORMAL
|
||||
if (exp > 0)
|
||||
|
@ -2,7 +2,7 @@
|
||||
//
|
||||
// FILE: float16ext.h
|
||||
// AUTHOR: Rob Tillaart
|
||||
// VERSION: 0.1.0
|
||||
// VERSION: 0.2.0
|
||||
// PURPOSE: Arduino library to implement float16ext data type.
|
||||
// half-precision floating point format,
|
||||
// used for efficient storage and transport.
|
||||
@ -11,10 +11,10 @@
|
||||
|
||||
#include "Arduino.h"
|
||||
|
||||
#define FLOAT16EXT_LIB_VERSION (F("0.1.0"))
|
||||
#define FLOAT16EXT_LIB_VERSION (F("0.2.0"))
|
||||
|
||||
|
||||
class float16ext: public Printable
|
||||
class float16ext
|
||||
{
|
||||
public:
|
||||
// Constructors
|
||||
@ -22,16 +22,14 @@ public:
|
||||
float16ext(double f);
|
||||
float16ext(const float16ext &f) { _value = f._value; };
|
||||
|
||||
// Conversion
|
||||
// Conversion and printing
|
||||
double toDouble(void) const;
|
||||
// access the 2 byte representation.
|
||||
uint16_t getBinary() { return _value; };
|
||||
void setBinary(uint16_t u) { _value = u; };
|
||||
float toFloat() const;
|
||||
String toString(unsigned int decimals = 2) const; // keep esp32 happy.
|
||||
|
||||
// Printable
|
||||
size_t printTo(Print& p) const;
|
||||
void setDecimals(uint8_t d) { _decimals = d; };
|
||||
uint8_t getDecimals() { return _decimals; };
|
||||
// access the 2 byte representation.
|
||||
uint16_t getBinary() { return _value; };
|
||||
void setBinary(uint16_t u) { _value = u; };
|
||||
|
||||
// equalities
|
||||
bool operator == (const float16ext& f);
|
||||
@ -62,13 +60,12 @@ public:
|
||||
|
||||
|
||||
// CORE CONVERSION
|
||||
// should be private but for testing...
|
||||
// should be private, needed for testing.
|
||||
float f16tof32(uint16_t) const;
|
||||
uint16_t f32tof16(float) const;
|
||||
|
||||
|
||||
private:
|
||||
uint8_t _decimals = 4;
|
||||
uint16_t _value;
|
||||
|
||||
};
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Syntax Colouring Map For float16
|
||||
# Syntax Colouring Map For float16ext
|
||||
|
||||
|
||||
# Data types (KEYWORD1)
|
||||
@ -7,11 +7,11 @@ float16ext KEYWORD1
|
||||
|
||||
# Methods and Functions (KEYWORD2)
|
||||
toDouble KEYWORD2
|
||||
toFloat KEYWORD2
|
||||
toString KEYWORD2
|
||||
getBinary KEYWORD2
|
||||
setBinary KEYWORD2
|
||||
|
||||
setDecimals KEYWORD2
|
||||
getDecimals KEYWORD2
|
||||
|
||||
sign KEYWORD2
|
||||
isZero KEYWORD2
|
||||
|
@ -15,7 +15,7 @@
|
||||
"type": "git",
|
||||
"url": "https://github.com/RobTillaart/float16ext.git"
|
||||
},
|
||||
"version": "0.1.0",
|
||||
"version": "0.2.0",
|
||||
"license": "MIT",
|
||||
"frameworks": "*",
|
||||
"platforms": "*",
|
||||
|
@ -1,5 +1,5 @@
|
||||
name=float16ext
|
||||
version=0.1.0
|
||||
version=0.2.0
|
||||
author=Rob Tillaart <rob.tillaart@gmail.com>
|
||||
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
|
||||
sentence=Arduino library to implement float16ext data type.
|
||||
|
@ -85,6 +85,16 @@ unittest(test_constructor)
|
||||
}
|
||||
|
||||
|
||||
unittest(test_sizeof)
|
||||
{
|
||||
float16ext value(1);
|
||||
float16ext arr[10];
|
||||
|
||||
assertEqual(2, sizeof(value));
|
||||
assertEqual(20, sizeof(arr));
|
||||
}
|
||||
|
||||
|
||||
unittest(test_compare_equal)
|
||||
{
|
||||
float16ext a(1);
|
||||
@ -153,19 +163,12 @@ unittest(test_conversion)
|
||||
}
|
||||
|
||||
|
||||
unittest(test_printable)
|
||||
unittest(test_toString)
|
||||
{
|
||||
float16ext f16(123.456);
|
||||
// test default value.
|
||||
assertEqual(4, f16.getDecimals());
|
||||
for (int i = 0; i < 6; i++)
|
||||
{
|
||||
f16.setDecimals(i);
|
||||
assertEqual(i, f16.getDecimals());
|
||||
}
|
||||
|
||||
// TODO
|
||||
// printable? how to test?
|
||||
float16ext f16(-123.456);
|
||||
fprintf(stderr, "note the limited accuracy (~4 digits).\n");
|
||||
assertNotEqual("-123.456", f16.toString(3));
|
||||
assertEqual("-123.4", f16.toString(1));
|
||||
}
|
||||
|
||||
|
||||
@ -184,17 +187,19 @@ unittest(test_all_values)
|
||||
b = a.toDouble();
|
||||
if (x != b.getBinary()) // assert would give 65K lines output!
|
||||
{
|
||||
fprintf(stderr, "fail at %d\n", x);
|
||||
fprintf(stderr, "fail at %d != %d\n", x, b.getBinary());
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "test all negative patterns\n");
|
||||
fprintf(stderr, "only fails -0\n");
|
||||
for (uint32_t x = 0x8000; x < 0xFFFF; x++)
|
||||
{
|
||||
a.setBinary(x);
|
||||
b = a.toDouble();
|
||||
if (x != b.getBinary())
|
||||
{
|
||||
fprintf(stderr, "fail at %d\n", x);
|
||||
fprintf(stderr, "fail at %d != %d\n", x, b.getBinary());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user