mirror of
https://github.com/RobTillaart/Arduino.git
synced 2024-10-03 18:09:02 -04:00
0.1.5 float16
This commit is contained in:
parent
3598a42f1d
commit
2464abf608
@ -21,22 +21,23 @@ The library implements the **Printable** interface so one can directly print the
|
||||
float16 values in any stream e.g. Serial.
|
||||
|
||||
The primary usage of the float16 data type is to efficiently store and transport
|
||||
a floating point number. As it is only 2 bytes where float and double have typical
|
||||
4 and 8, gains can be made at the price of range and precision.
|
||||
a floating point number. As it uses only 2 bytes where float and double have typical
|
||||
4 and 8 bytes, gains can be made at the price of range and precision.
|
||||
|
||||
|
||||
## Specifications
|
||||
|
||||
|
||||
| attribute | value | notes |
|
||||
|:----------|:-------------|:-------------|
|
||||
| Size | 2 bytes | |
|
||||
| sign | 1 bit | |
|
||||
| mantissa | 11 bit | ~ 3 digits |
|
||||
| exponent | 4 bit | |
|
||||
| minimum | 1.0009765625 | 1 + 2^−10 |
|
||||
| maximum | 65504 | |
|
||||
| | | |
|
||||
| attribute | value | notes |
|
||||
|:----------|:-------------|:--------|
|
||||
| size | 2 bytes | layout s eeeee mmmmmmmmmm
|
||||
| sign | 1 bit |
|
||||
| exponent | 5 bit |
|
||||
| mantissa | 11 bit | ~ 3 digits
|
||||
| minimum | 5.96046 E−8 | smallest positive number.
|
||||
| | 1.0009765625 | 1 + 2^−10 = smallest nr larger than 1.
|
||||
| maximum | 65504 |
|
||||
| | |
|
||||
|
||||
|
||||
## Interface
|
||||
@ -52,72 +53,73 @@ to elaborate
|
||||
|
||||
#### Conversion
|
||||
|
||||
- **double toDouble(void)** convert to double (or float)
|
||||
- **double toDouble(void)** convert to double (or float).
|
||||
- **uint16_t getBinary()** get the 2 byte binary representation.
|
||||
- **void setBinary(uint16_t u)** set the 2 bytes binary representation.
|
||||
- **size_t printTo(Print& p) const** Printable interface.
|
||||
- **void setDecimals(uint8_t d)** idem, used for printTo.
|
||||
- **uint8_t getDecimals()** idem.
|
||||
|
||||
Note the setDecimals takes one byte per object which is not efficient for arrays.
|
||||
Note the setDecimals takes one byte per object which is not efficient for arrays of float16.
|
||||
See array example for efficient storage using set/getBinary() functions.
|
||||
|
||||
|
||||
#### Compare
|
||||
|
||||
to elaborate
|
||||
Standard compare functions. Since 0.1.5 these are quite optimized,
|
||||
so it is fast to compare e.g. 2 measurements.
|
||||
|
||||
- **bool operator == (const float16& f)**
|
||||
- **bool operator != (const float16& f)**
|
||||
- **bool operator > (const float16& f)**
|
||||
- **bool operator >= (const float16& f)**
|
||||
- **bool operator < (const float16& f)**
|
||||
- **bool operator <= (const float16& f)**
|
||||
|
||||
|
||||
#### Math (basic)
|
||||
|
||||
Math is done by converting to double, do the math and convert back.
|
||||
These operators are added for convenience only.
|
||||
Not planned to optimize these.
|
||||
|
||||
- **float16 operator + (const float16& f)**
|
||||
- **float16 operator - (const float16& f)**
|
||||
- **float16 operator \* (const float16& f)**
|
||||
- **float16 operator / (const float16& f)**
|
||||
- **float16& operator += (const float16& f)**
|
||||
- **float16& operator -= (const float16& f)**
|
||||
- **float16& operator \*= (const float16& f)**
|
||||
- **float16& operator /= (const float16& f)**
|
||||
|
||||
negation operator.
|
||||
- **float16 operator - ()** fast negation.
|
||||
|
||||
- **int sign()** returns 1 == positive, 0 == zero, -1 == negative.
|
||||
- **bool isZero()** returns true if zero. slightly faster than **sign()**.
|
||||
- **bool isInf()** returns true if value is (-)infinite.
|
||||
|
||||
|
||||
## Notes
|
||||
|
||||
|
||||
#### comparison functions
|
||||
|
||||
First version of inequality operations are implemented by converting data to double and compare those.
|
||||
The strategy is to get these working first and optionally optimize them later.
|
||||
|
||||
|
||||
## TODO (future)
|
||||
|
||||
to get focus on getting things done...
|
||||
|
||||
|
||||
#### 0.1.4
|
||||
|
||||
the following should work:
|
||||
|
||||
- update documentation
|
||||
- positive numbers
|
||||
- negative numbers
|
||||
- infinity
|
||||
- rounding to zero (e.g. 1e-30)
|
||||
- array of numbers.
|
||||
- unit tests of the above..
|
||||
|
||||
|
||||
#### 0.1.5
|
||||
|
||||
- update documentation
|
||||
- comparison operators
|
||||
- unit tests of the above..
|
||||
## Future
|
||||
|
||||
|
||||
#### 0.1.6
|
||||
|
||||
- update documentation
|
||||
- get basic math working (+-*/)
|
||||
- isNan()
|
||||
- isINF()
|
||||
- abs()
|
||||
- sgn()
|
||||
- unit tests of the above..
|
||||
- update documentation.
|
||||
- unit tests of the above.
|
||||
- isNan().
|
||||
|
||||
|
||||
#### later
|
||||
|
||||
- update documentation
|
||||
- get basic math II working += -= *= /=
|
||||
- divide by zero errors.
|
||||
- f16tof32() + f32tof16()
|
||||
- rewrite toDouble with bit magic
|
||||
- ...
|
||||
|
||||
- update documentation.
|
||||
- error handling.
|
||||
- divide by zero errors.
|
||||
- look for optimizations.
|
||||
- rewrite **f16tof32()** with bit magic.
|
||||
- add storage example - with SD card, FRAM or EEPROM
|
||||
- add communication example - serial or Ethernet?
|
||||
|
||||
|
@ -6,27 +6,27 @@
|
||||
// DATE: 2015-03-11
|
||||
// URL: https://github.com/RobTillaart/float16
|
||||
//
|
||||
// Released to the public domain
|
||||
//
|
||||
|
||||
|
||||
/*
|
||||
0 01111 0000000000 = 1
|
||||
0 01111 0000000001 = 1 + 2−10 = 1.0009765625 (next smallest float after 1)
|
||||
1 10000 0000000000 = −2
|
||||
SIGN EXP MANTISSA
|
||||
0 01111 0000000000 = 1
|
||||
0 01111 0000000001 = 1 + 2−10 = 1.0009765625 (next smallest float after 1)
|
||||
1 10000 0000000000 = −2
|
||||
|
||||
0 11110 1111111111 = 65504 (max half precision)
|
||||
0 11110 1111111111 = 65504 (max half precision)
|
||||
|
||||
0 00001 0000000000 = 2−14 ≈ 6.10352 × 10−5 (minimum positive normal)
|
||||
0 00000 1111111111 = 2−14 - 2−24 ≈ 6.09756 × 10−5 (maximum subnormal)
|
||||
0 00000 0000000001 = 2−24 ≈ 5.96046 × 10−8 (minimum positive subnormal)
|
||||
0 00001 0000000000 = 2−14 ≈ 6.10352 × 10−5 (minimum positive normal)
|
||||
0 00000 1111111111 = 2−14 - 2−24 ≈ 6.09756 × 10−5 (maximum subnormal)
|
||||
0 00000 0000000001 = 2−24 ≈ 5.96046 × 10−8 (minimum positive subnormal)
|
||||
|
||||
0 00000 0000000000 = 0
|
||||
1 00000 0000000000 = −0
|
||||
0 00000 0000000000 = 0
|
||||
1 00000 0000000000 = −0
|
||||
|
||||
0 11111 0000000000 = infinity
|
||||
1 11111 0000000000 = −infinity
|
||||
0 11111 0000000000 = infinity
|
||||
1 11111 0000000000 = −infinity
|
||||
|
||||
0 01101 0101010101 = 0.333251953125 ≈ 1/3
|
||||
0 01101 0101010101 = 0.333251953125 ≈ 1/3
|
||||
*/
|
||||
|
||||
#include "float16.h"
|
||||
@ -62,8 +62,8 @@ void test_constructors()
|
||||
Serial.println("\ntest_constructors:");
|
||||
float16 a;
|
||||
Serial.println(a.toDouble(), 9);
|
||||
Serial.println(a.getBinary(), HEX);
|
||||
|
||||
Serial.println(a.getBinary(), HEX);
|
||||
|
||||
float16 b = 6;
|
||||
Serial.println(b.toDouble(), 9);
|
||||
Serial.println(b.getBinary(), HEX);
|
||||
@ -144,7 +144,7 @@ void test_numbers()
|
||||
Serial.println("** OVERFLOW **");
|
||||
float16 f(1000000.0);
|
||||
Serial.println(f.toDouble(), 9);
|
||||
|
||||
|
||||
Serial.println("** UNDERFLOW **");
|
||||
float16 g(1 / 1000000.0);
|
||||
Serial.println(g.toDouble(), 9);
|
||||
|
@ -7,6 +7,7 @@
|
||||
// URL: https://github.com/RobTillaart/float16
|
||||
//
|
||||
|
||||
|
||||
#include "float16.h"
|
||||
|
||||
float16 X;
|
||||
@ -19,11 +20,11 @@ void setup()
|
||||
Serial.println(__FILE__);
|
||||
Serial.print("FLOAT16_LIB_VERSION: ");
|
||||
Serial.println(FLOAT16_LIB_VERSION);
|
||||
Serial.println("\nStart ");
|
||||
|
||||
float f;
|
||||
|
||||
for (uint16_t n = 0; n < 65535; n++)
|
||||
// dump all possible values
|
||||
for (uint16_t n = 0; n < 65535; n++)
|
||||
{
|
||||
f = X.f16tof32(n);
|
||||
Serial.print(n);
|
||||
|
@ -77,7 +77,7 @@ void test_1()
|
||||
Serial.print('\t');
|
||||
float current = f16.toDouble();
|
||||
Serial.print(current, 8);
|
||||
if (prev > current)
|
||||
if (prev > current) // numbers should be increasing.
|
||||
{
|
||||
Serial.print("\t\tERROR");
|
||||
errors++;
|
||||
@ -107,7 +107,7 @@ void test_1()
|
||||
Serial.print('\t');
|
||||
float current = f16.toDouble();
|
||||
Serial.print(current, 8);
|
||||
if (prev < current)
|
||||
if (prev < current) // negative numbers should be decreasing.
|
||||
{
|
||||
Serial.print("\t\tERROR");
|
||||
errors++;
|
||||
|
@ -22,9 +22,10 @@ void setup()
|
||||
Serial.println(__FILE__);
|
||||
Serial.print("FLOAT16_LIB_VERSION: ");
|
||||
Serial.println(FLOAT16_LIB_VERSION);
|
||||
Serial.println("\nStart ");
|
||||
Serial.println();
|
||||
|
||||
// simulate temperature with random numbers
|
||||
|
||||
// simulate temperature sensor with random numbers
|
||||
for (uint32_t n = 0; n < 10; n++)
|
||||
{
|
||||
temperature[n] = (random(1000) - 300) * 0.01;
|
||||
|
@ -17,7 +17,7 @@ void setup()
|
||||
Serial.println(__FILE__);
|
||||
Serial.print("FLOAT16_LIB_VERSION: ");
|
||||
Serial.println(FLOAT16_LIB_VERSION);
|
||||
Serial.println("\nStart ");
|
||||
Serial.println();
|
||||
|
||||
|
||||
for( int i = -10; i < 2; i++)
|
||||
|
@ -24,9 +24,9 @@ void setup()
|
||||
Serial.println(FLOAT16_LIB_VERSION);
|
||||
Serial.println();
|
||||
|
||||
f = random(1000000) * 0.001;
|
||||
|
||||
// CONSTRUCTORS
|
||||
Serial.println("CONSTRUCTORS");
|
||||
f = random(1000000) * 0.001;
|
||||
start = micros();
|
||||
float16 f16(f);
|
||||
stop = micros();
|
||||
@ -41,8 +41,10 @@ void setup()
|
||||
Serial.print("a = b: \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
Serial.println();
|
||||
|
||||
// CONVERSION
|
||||
|
||||
Serial.println("CONVERSION");
|
||||
start = micros();
|
||||
f = f16.toDouble();
|
||||
stop = micros();
|
||||
@ -52,7 +54,7 @@ void setup()
|
||||
Serial.println();
|
||||
|
||||
|
||||
// COMPARE
|
||||
Serial.println("COMPARE");
|
||||
f17 = f16.toDouble() + 1;
|
||||
|
||||
start = micros();
|
||||
@ -96,6 +98,85 @@ void setup()
|
||||
Serial.print("compare > : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
Serial.println();
|
||||
|
||||
|
||||
Serial.println("MATH I");
|
||||
float16 f18;
|
||||
start = micros();
|
||||
f18 = f16 + f17;
|
||||
stop = micros();
|
||||
Serial.print("math + : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
// Serial.println(f16);
|
||||
// Serial.println(f17);
|
||||
// Serial.println(f18);
|
||||
|
||||
start = micros();
|
||||
f18 = f16 - f17;
|
||||
stop = micros();
|
||||
Serial.print("math - : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
|
||||
start = micros();
|
||||
f18 = f16 * f17;
|
||||
stop = micros();
|
||||
Serial.print("math * : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
|
||||
start = micros();
|
||||
f18 = f16 + f17;
|
||||
stop = micros();
|
||||
Serial.print("math / : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
Serial.println();
|
||||
|
||||
Serial.println("MATH II");
|
||||
start = micros();
|
||||
f18 += f16;
|
||||
stop = micros();
|
||||
Serial.print("math += : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
|
||||
start = micros();
|
||||
f18 -= f16;
|
||||
stop = micros();
|
||||
Serial.print("math -= : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
|
||||
start = micros();
|
||||
f18 *= f16;
|
||||
stop = micros();
|
||||
Serial.print("math *= : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
|
||||
start = micros();
|
||||
f18 /= f16;
|
||||
stop = micros();
|
||||
Serial.print("math /= : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
Serial.println();
|
||||
|
||||
Serial.println(f16);
|
||||
|
||||
Serial.println("MATH III - negation");
|
||||
start = micros();
|
||||
f18 = -f16;
|
||||
stop = micros();
|
||||
Serial.print("negation : \t");
|
||||
Serial.println(stop - start);
|
||||
delay(10);
|
||||
Serial.println();
|
||||
|
||||
Serial.println(f18);
|
||||
|
||||
Serial.println("\ndone");
|
||||
}
|
||||
|
@ -18,7 +18,6 @@ void setup()
|
||||
Serial.println(__FILE__);
|
||||
Serial.print("FLOAT16_LIB_VERSION: ");
|
||||
Serial.println(FLOAT16_LIB_VERSION);
|
||||
Serial.println("\nStart ");
|
||||
|
||||
for (uint32_t n = 1; n < 65536; n *= 2)
|
||||
{
|
||||
|
@ -31,7 +31,6 @@ void setup()
|
||||
Serial.println(__FILE__);
|
||||
Serial.print("FLOAT16_LIB_VERSION: ");
|
||||
Serial.println(FLOAT16_LIB_VERSION);
|
||||
Serial.println("\nStart ");
|
||||
|
||||
f16.setDecimals(6);
|
||||
|
||||
@ -46,7 +45,6 @@ void setup()
|
||||
Serial.print("\t");
|
||||
Serial.println();
|
||||
}
|
||||
|
||||
Serial.println();
|
||||
Serial.println();
|
||||
|
||||
|
@ -22,146 +22,191 @@
|
||||
// CONSTRUCTOR
|
||||
float16::float16(double f)
|
||||
{
|
||||
n = f32tof16(f);
|
||||
_value = f32tof16(f);
|
||||
}
|
||||
|
||||
|
||||
// PRINTING
|
||||
size_t float16::printTo(Print& p) const
|
||||
{
|
||||
double d = this->f16tof32(n);
|
||||
return p.print(d, _decimals);
|
||||
double d = this->f16tof32(_value);
|
||||
return p.print(d, _decimals);
|
||||
};
|
||||
|
||||
|
||||
double float16::toDouble() const
|
||||
{
|
||||
return f16tof32(n);
|
||||
return f16tof32(_value);
|
||||
}
|
||||
|
||||
|
||||
// NEGATE
|
||||
float16 float16::operator - ()
|
||||
{
|
||||
return float16( -f16tof32(n) );
|
||||
}
|
||||
|
||||
// bool float16::isNaN();
|
||||
// bool float16::isInf();
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
//
|
||||
// EQUALITIES
|
||||
//
|
||||
bool float16::operator == (const float16 &f)
|
||||
{
|
||||
return (n == f.n);
|
||||
return (_value == f._value);
|
||||
}
|
||||
|
||||
|
||||
bool float16::operator != (const float16 &f)
|
||||
{
|
||||
return (n != f.n);
|
||||
return (_value != f._value);
|
||||
}
|
||||
|
||||
|
||||
bool float16::operator > (const float16 &c)
|
||||
bool float16::operator > (const float16 &f)
|
||||
{
|
||||
return this->toDouble() > c.toDouble();
|
||||
if ((_value & 0x8000) && ( f._value & 0x8000)) return _value < f._value;
|
||||
if (_value & 0x8000) return false;
|
||||
if (f._value & 0x8000) return true;
|
||||
return _value > f._value;
|
||||
}
|
||||
|
||||
|
||||
bool float16::operator >= (const float16 &c)
|
||||
bool float16::operator >= (const float16 &f)
|
||||
{
|
||||
return this->toDouble() >= c.toDouble();
|
||||
if ((_value & 0x8000) && (f._value & 0x8000)) return _value <= f._value;
|
||||
if (_value & 0x8000) return false;
|
||||
if (f._value & 0x8000) return true;
|
||||
return _value >= f._value;
|
||||
}
|
||||
|
||||
|
||||
bool float16::operator < (const float16 &c)
|
||||
bool float16::operator < (const float16 &f)
|
||||
{
|
||||
return this->toDouble() < c.toDouble();
|
||||
if ((_value & 0x8000) && (f._value & 0x8000)) return _value > f._value;
|
||||
if (_value & 0x8000) return true;
|
||||
if (f._value & 0x8000) return false;
|
||||
return _value < f._value;
|
||||
}
|
||||
|
||||
|
||||
bool float16::operator <= (const float16 &c)
|
||||
bool float16::operator <= (const float16 &f)
|
||||
{
|
||||
return this->toDouble() <= c.toDouble();
|
||||
if ((_value & 0x8000) && (f._value & 0x8000)) return _value >= f._value;
|
||||
if (_value & 0x8000) return true;
|
||||
if (f._value & 0x8000) return false;
|
||||
return _value <= f._value;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
// BASIC MATH I
|
||||
float16 float16::operator + (const float16 &c)
|
||||
//////////////////////////////////////////////////////////
|
||||
//
|
||||
// NEGATION
|
||||
//
|
||||
float16 float16::operator - ()
|
||||
{
|
||||
return (float16(this->toDouble() + c.toDouble());
|
||||
float16 f16;
|
||||
f16.setBinary(_value ^ 0x8000);
|
||||
return f16;
|
||||
}
|
||||
|
||||
float16 float16::operator - (const float16 &c)
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
//
|
||||
// MATH
|
||||
//
|
||||
float16 float16::operator + (const float16 &f)
|
||||
{
|
||||
return (float16(this->toDouble() - c.toDouble());
|
||||
return float16(this->toDouble() + f.toDouble());
|
||||
}
|
||||
|
||||
float16 float16::operator * (const float16 &c)
|
||||
|
||||
float16 float16::operator - (const float16 &f)
|
||||
{
|
||||
return (float16(this->toDouble() * c.toDouble());
|
||||
return float16(this->toDouble() - f.toDouble());
|
||||
}
|
||||
|
||||
float16 float16::operator / (const float16 &c)
|
||||
|
||||
float16 float16::operator * (const float16 &f)
|
||||
{
|
||||
return (float16(this->toDouble() / c.toDouble());
|
||||
return float16(this->toDouble() * f.toDouble());
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
// BASIC MATH II
|
||||
float16& float16::operator += (const float16 &c)
|
||||
float16 float16::operator / (const float16 &f)
|
||||
{
|
||||
return float16(this->toDouble() / f.toDouble());
|
||||
}
|
||||
|
||||
float16& float16::operator -= (const float16 &c)
|
||||
|
||||
float16& float16::operator += (const float16 &f)
|
||||
{
|
||||
*this = this->toDouble() + f.toDouble();
|
||||
return *this;
|
||||
}
|
||||
|
||||
float16& float16::operator *= (const float16 &c)
|
||||
|
||||
float16& float16::operator -= (const float16 &f)
|
||||
{
|
||||
*this = this->toDouble() - f.toDouble();
|
||||
return *this;
|
||||
}
|
||||
|
||||
float16& float16::operator /= (const float16 &c)
|
||||
|
||||
float16& float16::operator *= (const float16 &f)
|
||||
{
|
||||
*this = this->toDouble() * f.toDouble();
|
||||
return *this;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
float16& float16::operator /= (const float16 &f)
|
||||
{
|
||||
*this = this->toDouble() / f.toDouble();
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
float float16::f16tof32(uint16_t n) const
|
||||
//////////////////////////////////////////////////////////
|
||||
//
|
||||
// MATH HELPER FUNCTIONS
|
||||
//
|
||||
|
||||
int float16::sign()
|
||||
{
|
||||
if (_value & 0x8000) return -1;
|
||||
if (_value & 0xFFFF) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
bool float16::isZero()
|
||||
{
|
||||
return ((_value & 0x7FFF) == 0x0000);
|
||||
}
|
||||
|
||||
// bool float16::isNaN()
|
||||
// {
|
||||
// return ((_value & 0x7FFF) == 0x0000);
|
||||
// }
|
||||
|
||||
bool float16::isInf()
|
||||
{
|
||||
return ((_value == 0x7C00) || (_value == 0xFC00));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
//
|
||||
// CORE CONVERSION
|
||||
//
|
||||
float float16::f16tof32(uint16_t _value) const
|
||||
{
|
||||
uint16_t sgn, man;
|
||||
int exp;
|
||||
double f;
|
||||
|
||||
sgn = (n & 0x8000) > 0;
|
||||
exp = (n & 0x7C00) >> 10;
|
||||
man = (n & 0x03FF);
|
||||
|
||||
#ifdef DEBUG
|
||||
Serial.println(sgn, BIN);
|
||||
Serial.println(exp, BIN);
|
||||
Serial.println(man, BIN);
|
||||
#endif
|
||||
sgn = (_value & 0x8000) > 0;
|
||||
exp = (_value & 0x7C00) >> 10;
|
||||
man = (_value & 0x03FF);
|
||||
|
||||
// ZERO
|
||||
if ((n & 0x7FFF) == 0)
|
||||
if ((_value & 0x7FFF) == 0)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Serial.println("ZERO");
|
||||
#endif
|
||||
return sgn ? -0 : 0;
|
||||
}
|
||||
// NAN & INF
|
||||
if (exp == 0x001F)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
Serial.println("INFINITY");
|
||||
#endif
|
||||
if (man == 0) return sgn ? -INFINITY : INFINITY;
|
||||
else return NAN;
|
||||
}
|
||||
@ -193,10 +238,6 @@ uint16_t float16::f32tof16(float f) const
|
||||
int16_t exp = (t & 0x7F800000) >> 23;
|
||||
bool sgn = (t & 0x80000000);
|
||||
|
||||
// Serial.print("SGN: "); Serial.println(sgn, BIN);
|
||||
// Serial.print("EXP: "); Serial.println(exp, BIN);
|
||||
// Serial.print("MAN: "); Serial.println(man, BIN);
|
||||
|
||||
// handle 0
|
||||
if ((t & 0x7FFFFFFF) == 0)
|
||||
{
|
||||
@ -241,9 +282,6 @@ uint16_t float16::f32tof16(float f) const
|
||||
exp <<= 10;
|
||||
man++;
|
||||
man >>= 1;
|
||||
// Serial.print("SGN: "); Serial.println(sgn, BIN);
|
||||
// Serial.print("EXP: "); Serial.println(exp, BIN);
|
||||
// Serial.print("MAN: "); Serial.println(man, BIN);
|
||||
if (sgn) return 0x8000 | exp | man;
|
||||
return exp | man;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
//
|
||||
// FILE: float16.h
|
||||
// AUTHOR: Rob Tillaart
|
||||
// VERSION: 0.1.4
|
||||
// VERSION: 0.1.5
|
||||
// PURPOSE: Arduino library to implement float16 data type.
|
||||
// half-precision floating point format,
|
||||
// used for efficient storage and transport.
|
||||
@ -12,61 +12,59 @@
|
||||
|
||||
#include "Arduino.h"
|
||||
|
||||
#define FLOAT16_LIB_VERSION "0.1.4"
|
||||
#define FLOAT16_LIB_VERSION (F("0.1.5"))
|
||||
|
||||
|
||||
class float16: public Printable
|
||||
{
|
||||
public:
|
||||
// Constructors
|
||||
float16(void) { n = 0; };
|
||||
float16(void) { _value = 0x0000; };
|
||||
float16(double f);
|
||||
float16(const float16 &f) { n = f.n; };
|
||||
float16(const float16 &f) { _value = f._value; };
|
||||
|
||||
// Conversion
|
||||
double toDouble(void) const;
|
||||
// access the 2 byte representation.
|
||||
uint16_t getBinary() { return n; };
|
||||
void setBinary(uint16_t u) { n = u; };
|
||||
uint16_t getBinary() { return _value; };
|
||||
void setBinary(uint16_t u) { _value = u; };
|
||||
|
||||
// Printable
|
||||
size_t printTo(Print& p) const;
|
||||
void setDecimals(uint8_t d) { _decimals = d; };
|
||||
uint8_t getDecimals() { return _decimals; };
|
||||
|
||||
// equalities
|
||||
bool operator == (const float16& f);
|
||||
bool operator != (const float16& f);
|
||||
|
||||
|
||||
// bool isNaN();
|
||||
// bool isInf();
|
||||
|
||||
bool operator > (const float16& f);
|
||||
bool operator >= (const float16& f);
|
||||
bool operator < (const float16& f);
|
||||
bool operator <= (const float16& f);
|
||||
|
||||
// negation
|
||||
float16 operator - ();
|
||||
|
||||
// equalities
|
||||
bool operator == (const float16&);
|
||||
bool operator != (const float16&);
|
||||
|
||||
bool operator > (const float16&);
|
||||
bool operator >= (const float16&);
|
||||
bool operator < (const float16&);
|
||||
bool operator <= (const float16&);
|
||||
|
||||
/*
|
||||
// basic math
|
||||
float16 operator + (const float16&);
|
||||
float16 operator - (const float16&);
|
||||
float16 operator * (const float16&);
|
||||
float16 operator / (const float16&);
|
||||
float16 operator + (const float16& f);
|
||||
float16 operator - (const float16& f);
|
||||
float16 operator * (const float16& f);
|
||||
float16 operator / (const float16& f);
|
||||
|
||||
float16& operator += (const float16&);
|
||||
float16& operator -= (const float16&);
|
||||
float16& operator *= (const float16&);
|
||||
float16& operator /= (const float16&);
|
||||
*/
|
||||
float16& operator += (const float16& f);
|
||||
float16& operator -= (const float16& f);
|
||||
float16& operator *= (const float16& f);
|
||||
float16& operator /= (const float16& f);
|
||||
|
||||
// math helper functions
|
||||
int sign(); // 1 = positive 0 = zero -1 = negative.
|
||||
bool isZero();
|
||||
// bool isNaN();
|
||||
bool isInf();
|
||||
|
||||
|
||||
// DEBUGGING
|
||||
// CORE CONVERSION
|
||||
// should be private but for testing...
|
||||
float f16tof32(uint16_t) const;
|
||||
uint16_t f32tof16(float) const;
|
||||
@ -74,10 +72,7 @@ class float16: public Printable
|
||||
|
||||
private:
|
||||
uint8_t _decimals = 4;
|
||||
// TODO
|
||||
// n is not descriptive,
|
||||
// should be _n at least;
|
||||
uint16_t n;
|
||||
uint16_t _value;
|
||||
|
||||
};
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
"type": "git",
|
||||
"url": "https://github.com/RobTillaart/float16.git"
|
||||
},
|
||||
"version": "0.1.4",
|
||||
"version": "0.1.5",
|
||||
"license": "MIT",
|
||||
"frameworks": "arduino",
|
||||
"platforms": "*",
|
||||
|
@ -1,5 +1,5 @@
|
||||
name=float16
|
||||
version=0.1.4
|
||||
version=0.1.5
|
||||
author=Rob Tillaart <rob.tillaart@gmail.com>
|
||||
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
|
||||
sentence=Arduino library to implement float16 data type.
|
||||
|
Loading…
x
Reference in New Issue
Block a user