0.1.8 float16

2024-09-19 16:46:11 -04:00 · 2023-11-02 15:12:29 +01:00 · 2023-11-02 15:12:29 +01:00 · 7d0ed91f95
commit 7d0ed91f95
parent 299170332e
8 changed files with 298 additions and 181 deletions
--- a/libraries/float16/CHANGELOG.md
+++ b/libraries/float16/CHANGELOG.md
@ -6,13 +6,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).


+## [0.1.8] - 2023-11-02
+- update readme.md
+- add **isNan()** (experimental).
+- minor edits.
+
+
 ## [0.1.7] - 2022-11-07
 - add changelog.md
 - add rp2040 to build-CI
 - update readme.md
 - update keywords.txt

-
 ## [0.1.6] - 2021-12-18
 - update library.json
 - update license
--- a/libraries/float16/README.md
+++ b/libraries/float16/README.md
@ -1,11 +1,12 @@

-
 [![Arduino CI](https://github.com/RobTillaart/float16/workflows/Arduino%20CI/badge.svg)](https://github.com/marketplace/actions/arduino_ci)
 [![Arduino-lint](https://github.com/RobTillaart/float16/actions/workflows/arduino-lint.yml/badge.svg)](https://github.com/RobTillaart/float16/actions/workflows/arduino-lint.yml)
 [![JSON check](https://github.com/RobTillaart/float16/actions/workflows/jsoncheck.yml/badge.svg)](https://github.com/RobTillaart/float16/actions/workflows/jsoncheck.yml)
-[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/RobTillaart/float16/blob/master/LICENSE)
-[![GitHub release](https://img.shields.io/github/release/RobTillaart/float16.svg?maxAge=3600)](https://github.com/RobTillaart/Complex/releases)
+[![GitHub issues](https://img.shields.io/github/issues/RobTillaart/float16.svg)](https://github.com/RobTillaart/float16/issues)

+[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/RobTillaart/float16/blob/master/LICENSE)
+[![GitHub release](https://img.shields.io/github/release/RobTillaart/float16.svg?maxAge=3600)](https://github.com/RobTillaart/float16/releases)
+[![PlatformIO Registry](https://badges.registry.platformio.org/packages/robtillaart/library/float16.svg)](https://registry.platformio.org/libraries/robtillaart/float16)

 # float16

@ -30,7 +31,7 @@ a floating point number. As it uses only 2 bytes where float and double have typ

 | attribute | value        |  notes  |
 |:----------|:-------------|:--------|
-| size      | 2 bytes      | layout s  eeeee  mmmmmmmmmm
+| size      | 2 bytes      | layout s  eeeee  mmmmmmmmmm  (1,5,10)
 | sign      | 1 bit        |
 | exponent  | 5 bit        |
 | mantissa  | 11 bit       | ~ 3 digits
@ -66,9 +67,16 @@ a floating point number. As it uses only 2 bytes where float and double have typ
 ```


+#### Related
+
+- https://wokwi.com/projects/376313228108456961  (demo of its usage)
+
+
 ## Interface

-to elaborate
+```cpp
+#include "float16.h"
+```

 #### Constructors

@ -126,20 +134,25 @@ negation operator.
 - **bool isInf()** returns true if value is (-)infinite.


+#### Experimental 0.1.8
+
+- **bool isNaN()** returns true if value is not a number.
+
+
 ## Notes


 ## Future

-
-#### 0.1.x
+#### Must

 - update documentation.
+
+#### Should
+
 - unit tests of the above.
- isNan().

-
-#### later
+#### Could

 - update documentation.
 - error handling.
@ -149,3 +162,14 @@ negation operator.
 - add storage example - with SD card, FRAM or EEPROM
 - add communication example - serial or Ethernet?

+#### Wont
+
+
+## Support
+
+If you appreciate my libraries, you can support the development and maintenance.
+Improve the quality of the libraries by providing issues and Pull Requests, or
+donate through PayPal or GitHub sponsors.
+
+Thank you,
+
--- a/libraries/float16/examples/float16_test_performance/performance_0.1.7.txt
+++ b/libraries/float16/examples/float16_test_performance/performance_0.1.7.txt
@ -0,0 +1,88 @@
+
+//
+// test: UNO 
+//  IDE: 1.8.13
+//
+
+FLOAT16_LIB_VERSION: 0.1.7
+
+CONSTRUCTORS
+Constructor: 	28
+a = b: 	4
+
+CONVERSION
+toDouble(): 	428
+
+COMPARE
+compare == : 	4
+compare != : 	4
+compare <  : 	4
+compare <= : 	4
+compare >= : 	4
+compare >  : 	8
+
+MATH I
+math +  : 	864
+math -  : 	812
+math *  : 	812
+math /  : 	812
+
+MATH II
+math += : 	872
+math -= : 	820
+math *= : 	804
+math /= : 	804
+
+17.8125
+MATH III - negation
+negation : 	4
+
+-17.8125
+
+done
+
+------------------------------------------
+
+//
+// test: RP2040 
+//  IDE: 1.8.13
+//
+
+FLOAT16_LIB_VERSION: 0.1.7
+
+CONSTRUCTORS
+Constructor:    19
+a = b:  2
+
+CONVERSION
+toDouble():     96
+
+COMPARE
+compare == :    4
+compare != :    3
+compare <  :    7
+compare <= :    6
+compare >= :    6
+compare >  :    6
+
+MATH I
+math +  :       67
+math -  :       60
+math *  :       58
+math /  :       54
+
+MATH II
+math += :       62
+math -= :       61
+math *= :       61
+math /= :       45
+
+767.0000
+MATH III - negation
+negation :      7
+
+-767.0000
+
+done
+
+-------------------------------------------
--- a/libraries/float16/float16.cpp
+++ b/libraries/float16/float16.cpp
@ -1,30 +1,26 @@
 //
 //    FILE: float16.cpp
 //  AUTHOR: Rob Tillaart
-// VERSION: 0.1.7
+// VERSION: 0.1.8
 // PURPOSE: library for Float16s for Arduino
 //     URL: http://en.wikipedia.org/wiki/Half-precision_floating-point_format
-//
-// HISTORY: see changelog.md


 #include "float16.h"

-// #define DEBUG

-
-// CONSTRUCTOR
+//  CONSTRUCTOR
 float16::float16(double f)
 {
  _value = f32tof16(f);
 }

-// PRINTING
+//  PRINTING
 size_t float16::printTo(Print& p) const
 {
  double d = this->f16tof32(_value);
  return p.print(d, _decimals);
-};
+}

 double float16::toDouble() const
 {
@ -34,54 +30,54 @@ double float16::toDouble() const

 //////////////////////////////////////////////////////////
 //
-// EQUALITIES
+//  EQUALITIES
 //
 bool float16::operator == (const float16 &f)
 {
-    return (_value == f._value);
+  return (_value == f._value);
 }

 bool float16::operator != (const float16 &f)
 {
-    return (_value != f._value);
+  return (_value != f._value);
 }

 bool float16::operator > (const float16 &f)
 {
-    if ((_value & 0x8000) && ( f._value & 0x8000)) return _value < f._value;
-    if (_value & 0x8000) return false;
-    if (f._value & 0x8000) return true;
-    return _value > f._value;
+  if ((_value & 0x8000) && ( f._value & 0x8000)) return _value < f._value;
+  if (_value & 0x8000) return false;
+  if (f._value & 0x8000) return true;
+  return _value > f._value;
 }

 bool float16::operator >= (const float16 &f)
 {
-    if ((_value & 0x8000) && (f._value & 0x8000)) return _value <= f._value;
-    if (_value & 0x8000) return false;
-    if (f._value & 0x8000) return true;
-    return _value >= f._value;
+  if ((_value & 0x8000) && (f._value & 0x8000)) return _value <= f._value;
+  if (_value & 0x8000) return false;
+  if (f._value & 0x8000) return true;
+  return _value >= f._value;
 }

 bool float16::operator < (const float16 &f)
 {
-    if ((_value & 0x8000) && (f._value & 0x8000)) return _value > f._value;
-    if (_value & 0x8000) return true;
-    if (f._value & 0x8000) return false;
-    return _value < f._value;
+  if ((_value & 0x8000) && (f._value & 0x8000)) return _value > f._value;
+  if (_value & 0x8000) return true;
+  if (f._value & 0x8000) return false;
+  return _value < f._value;
 }

 bool float16::operator <= (const float16 &f)
 {
-    if ((_value & 0x8000) && (f._value & 0x8000)) return _value >= f._value;
-    if (_value   & 0x8000) return true;
-    if (f._value & 0x8000) return false;
-    return _value <= f._value;
+  if ((_value & 0x8000) && (f._value & 0x8000)) return _value >= f._value;
+  if (_value   & 0x8000) return true;
+  if (f._value & 0x8000) return false;
+  return _value <= f._value;
 }


 //////////////////////////////////////////////////////////
 //
-// NEGATION
+//  NEGATION
 //
 float16 float16::operator - ()
 {
@ -93,7 +89,7 @@ float16 float16::operator - ()

 //////////////////////////////////////////////////////////
 //
-// MATH
+//  MATH
 //
 float16 float16::operator + (const float16 &f)
 {
@ -142,9 +138,8 @@ float16& float16::operator /= (const float16 &f)

 //////////////////////////////////////////////////////////
 //
-// MATH HELPER FUNCTIONS
+//  MATH HELPER FUNCTIONS
 //
-
 int float16::sign()
 {
  if (_value & 0x8000) return -1;
@ -157,10 +152,12 @@ bool float16::isZero()
  return ((_value & 0x7FFF) == 0x0000);
 }

-// bool float16::isNaN()
-// {
-  // return ((_value & 0x7FFF) == 0x0000);
-// }
+bool float16::isNaN()
+{
+  if ((_value & 0x7C00) != 0x7C00) return false;
+  if ((_value & 0x03FF) == 0x0000) return false;
+  return true;
+}

 bool float16::isInf()
 {
@ -170,104 +167,104 @@ bool float16::isInf()

 //////////////////////////////////////////////////////////
 //
-// CORE CONVERSION
+//  CORE CONVERSION
 //
 float float16::f16tof32(uint16_t _value) const
 {
-    uint16_t sgn, man;
-    int exp;
-    double f;
+  uint16_t sgn, man;
+  int exp;
+  double f;

-    sgn = (_value & 0x8000) > 0;
-    exp = (_value & 0x7C00) >> 10;
-    man = (_value & 0x03FF);
+  sgn = (_value & 0x8000) > 0;
+  exp = (_value & 0x7C00) >> 10;
+  man = (_value & 0x03FF);

-    // ZERO
-    if ((_value & 0x7FFF) == 0)
-    {
-        return sgn ? -0 : 0;
-    }
-    // NAN & INF
-    if (exp == 0x001F)
-    {
-        if (man == 0) return sgn ? -INFINITY : INFINITY;
-        else return NAN;
-    }
+  //  ZERO
+  if ((_value & 0x7FFF) == 0)
+  {
+    return sgn ? -0 : 0;
+  }
+  //  NAN & INF
+  if (exp == 0x001F)
+  {
+    if (man == 0) return sgn ? -INFINITY : INFINITY;
+    else return NAN;
+  }

-    // SUBNORMAL/NORMAL
-    if (exp == 0)  f = 0;
-    else           f = 1;
+  //  SUBNORMAL/NORMAL
+  if (exp == 0)  f = 0;
+  else           f = 1;

-    // PROCESS MANTISSE
-    for (int i = 9; i >= 0; i--)
-    {
-        f *= 2;
-        if (man & (1 << i)) f = f + 1;
-    }
-    f = f * pow(2.0, exp - 25);
-    if (exp == 0)
-    {
-        f = f * pow(2.0, -13);    // 5.96046447754e-8;
-    }
-    return sgn ? -f : f;
+  //  PROCESS MANTISSE
+  for (int i = 9; i >= 0; i--)
+  {
+    f *= 2;
+    if (man & (1 << i)) f = f + 1;
+  }
+  f = f * pow(2.0, exp - 25);
+  if (exp == 0)
+  {
+    f = f * pow(2.0, -13);    // 5.96046447754e-8;
+  }
+  return sgn ? -f : f;
 }

 uint16_t float16::f32tof16(float f) const
 {
-    uint32_t t = *(uint32_t *) &f;
-    // man bits = 10; but we keep 11 for rounding
-    uint16_t man = (t & 0x007FFFFF) >> 12;
-    int16_t  exp = (t & 0x7F800000) >> 23;
-    bool     sgn = (t & 0x80000000);
+  uint32_t t = *(uint32_t *) &f;
+  //  man bits = 10; but we keep 11 for rounding
+  uint16_t man = (t & 0x007FFFFF) >> 12;
+  int16_t  exp = (t & 0x7F800000) >> 23;
+  bool     sgn = (t & 0x80000000);

-    // handle 0
-    if ((t & 0x7FFFFFFF) == 0)
-    {
-        return sgn ? 0x8000 : 0x0000;
-    }
-    // denormalized float32 does not fit in float16
-    if (exp == 0x00)
-    {
-        return sgn ? 0x8000 : 0x0000;
-    }
-    // handle infinity & NAN
-    if (exp == 0x00FF)
-    {
-        if (man) return 0xFE00;         //  NAN
-        return sgn ? 0xFC00 : 0x7C00;   // -INF : INF
-    }
+  //  handle 0
+  if ((t & 0x7FFFFFFF) == 0)
+  {
+    return sgn ? 0x8000 : 0x0000;
+  }
+  //  denormalized float32 does not fit in float16
+  if (exp == 0x00)
+  {
+    return sgn ? 0x8000 : 0x0000;
+  }
+  //  handle infinity & NAN
+  if (exp == 0x00FF)
+  {
+    if (man) return 0xFE00;         //  NAN
+    return sgn ? 0xFC00 : 0x7C00;   //  -INF : INF
+  }

-    // normal numbers
-    exp = exp - 127 + 15;
-    // overflow does not fit => INF
-    if (exp > 30)
-    {
-        return sgn ? 0xFC00 : 0x7C00;   // -INF : INF
-    }
-    //  subnormal numbers
-    if (exp < -38)
-    {
-        return sgn ? 0x8000 : 0x0000;  // -0 or 0  ?   just 0 ?
-    }
-    if (exp <= 0) // subnormal
-    {
-        man >>= (exp + 14);
-        // rounding
-        man++;
-        man >>= 1;
-        if (sgn) return 0x8000 | man;
-        return man;
-    }
-
-    // normal
-    // TODO rounding
-    exp <<= 10;
+  //  normal numbers
+  exp = exp - 127 + 15;
+  //  overflow does not fit => INF
+  if (exp > 30)
+  {
+    return sgn ? 0xFC00 : 0x7C00;   //  -INF : INF
+  }
+  //  subnormal numbers
+  if (exp < -38)
+  {
+    return sgn ? 0x8000 : 0x0000;   //  -0 or 0  ?   just 0 ?
+  }
+  if (exp <= 0) //  subnormal
+  {
+    man >>= (exp + 14);
+    //  rounding
    man++;
    man >>= 1;
-    if (sgn) return 0x8000 | exp | man;
-    return exp | man;
+    if (sgn) return 0x8000 | man;
+    return man;
+  }
+
+  //  normal
+  //  TODO rounding
+  exp <<= 10;
+  man++;
+  man >>= 1;
+  if (sgn) return 0x8000 | exp | man;
+  return exp | man;
 }


-// -- END OF FILE --
+//  -- END OF FILE --

--- a/libraries/float16/float16.h
+++ b/libraries/float16/float16.h
@ -2,80 +2,79 @@
 //
 //    FILE: float16.h
 //  AUTHOR: Rob Tillaart
-// VERSION: 0.1.7
+// VERSION: 0.1.8
 // PURPOSE: Arduino library to implement float16 data type.
-//          half-precision floating point format, 
+//          half-precision floating point format,
 //          used for efficient storage and transport.
 //     URL: https://github.com/RobTillaart/float16
-//


 #include "Arduino.h"

-#define FLOAT16_LIB_VERSION                 (F("0.1.7"))
+#define FLOAT16_LIB_VERSION                 (F("0.1.8"))


 class float16: public Printable
 {
-  public:
-    // Constructors
-    float16(void)               { _value = 0x0000; };
-    float16(double f);
-    float16(const float16 &f)   { _value = f._value; };
+public:
+  //  Constructors
+  float16(void)               { _value = 0x0000; };
+  float16(double f);
+  float16(const float16 &f)   { _value = f._value; };

-    // Conversion
-    double   toDouble(void) const;
-    //  access the 2 byte representation.
-    uint16_t getBinary()           { return _value; };
-    void     setBinary(uint16_t u) { _value = u; };
-    
-    // Printable
-    size_t   printTo(Print& p) const;
-    void     setDecimals(uint8_t d) { _decimals = d; };
-    uint8_t  getDecimals()          { return _decimals; };
+  //  Conversion
+  double   toDouble(void) const;
+  //  access the 2 byte representation.
+  uint16_t getBinary()           { return _value; };
+  void     setBinary(uint16_t u) { _value = u; };

-    // equalities
-    bool operator == (const float16& f);
-    bool operator != (const float16& f);
+  //  Printable
+  size_t   printTo(Print& p) const;
+  void     setDecimals(uint8_t d) { _decimals = d; };
+  uint8_t  getDecimals()          { return _decimals; };

-    bool operator >  (const float16& f);
-    bool operator >= (const float16& f);
-    bool operator <  (const float16& f);
-    bool operator <= (const float16& f);
+  //  equalities
+  bool operator == (const float16& f);
+  bool operator != (const float16& f);

-    // negation
-    float16 operator - ();
+  bool operator >  (const float16& f);
+  bool operator >= (const float16& f);
+  bool operator <  (const float16& f);
+  bool operator <= (const float16& f);

-    // basic math
-    float16 operator + (const float16& f);
-    float16 operator - (const float16& f);
-    float16 operator * (const float16& f);
-    float16 operator / (const float16& f);
+  //  negation
+  float16 operator - ();

-    float16& operator += (const float16& f);
-    float16& operator -= (const float16& f);
-    float16& operator *= (const float16& f);
-    float16& operator /= (const float16& f);
+  //  basic math
+  float16 operator + (const float16& f);
+  float16 operator - (const float16& f);
+  float16 operator * (const float16& f);
+  float16 operator / (const float16& f);

-    // math helper functions
-    int   sign();       // 1 = positive   0 = zero   -1 = negative.
-    bool  isZero();
-//    bool isNaN();
-    bool isInf();
+  float16& operator += (const float16& f);
+  float16& operator -= (const float16& f);
+  float16& operator *= (const float16& f);
+  float16& operator /= (const float16& f);
+
+  //  math helper functions
+  int   sign();       //  1 = positive   0 = zero   -1 = negative.
+  bool  isZero();
+  bool isNaN();
+  bool isInf();


-    // CORE CONVERSION
-    // should be private but for testing...
-    float    f16tof32(uint16_t) const;
-    uint16_t f32tof16(float) const;
+  //  CORE CONVERSION
+  //  should be private but for testing...
+  float    f16tof32(uint16_t) const;
+  uint16_t f32tof16(float) const;


-  private:
-    uint8_t  _decimals = 4;
-    uint16_t _value;
+private:
+  uint8_t  _decimals = 4;
+  uint16_t _value;

 };


-// -- END OF FILE --
+//  -- END OF FILE --

--- a/libraries/float16/keywords.txt
+++ b/libraries/float16/keywords.txt
@ -13,6 +13,10 @@ setBinary	KEYWORD2
 setDecimals	KEYWORD2
 getDecimals	KEYWORD2

+sign	KEYWORD2
+isZero	KEYWORD2
+isNan	KEYWORD2
+isInf	KEYWORD2

 # Constants (LITERAL1)
 FLOAT16_LIB_VERSION	LITERAL1
--- a/libraries/float16/library.json
+++ b/libraries/float16/library.json
@ -15,9 +15,9 @@
    "type": "git",
    "url": "https://github.com/RobTillaart/float16.git"
  },
-  "version": "0.1.7",
+  "version": "0.1.8",
  "license": "MIT",
-  "frameworks": "arduino",
+  "frameworks": "*",
  "platforms": "*",
  "headers": "float16.h"
 }
--- a/libraries/float16/library.properties
+++ b/libraries/float16/library.properties
@ -1,5 +1,5 @@
 name=float16
-version=0.1.7
+version=0.1.8
 author=Rob Tillaart <rob.tillaart@gmail.com>
 maintainer=Rob Tillaart <rob.tillaart@gmail.com>
 sentence=Arduino library to implement float16 data type.