0.4.0 FastShiftIn

This commit is contained in:
Rob Tillaart 2024-09-19 14:18:47 +02:00
parent 6ce71e3145
commit 4bb93dad77
8 changed files with 288 additions and 72 deletions

View File

@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [0.4.0] - 2024-09-10
- fix #17, loop unroll option, improving performance, kudos to nt314p
- added flag to select LOOP UNROLL (is optional as it gives larger code size)
- update readme.md
- minor edits
----
## [0.3.4] - 2024-07-22
- add **void read(uint8_t \*array, uint8_t size)** (experimental)
- update readme.md

View File

@ -1,7 +1,7 @@
//
// FILE: FastShiftIn.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.3.4
// VERSION: 0.4.0
// PURPOSE: Fast ShiftIn for 74HC165 register, AVR optimized
// DATE: 2013-09-29
// URL: https://github.com/RobTillaart/FastShiftIn
@ -148,36 +148,98 @@ uint8_t FastShiftIn::readLSBFIRST()
{
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
#if defined(FASTSHIFTIN_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED #17
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localClockRegister = _clockRegister;
// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80; // read one bit
*localClockRegister = r; // clock pulse LOW
// restore interrupt state
SREG = oldSREG;
_lastValue = rv;
#else // AVR SIZE OPTIMIZED
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t cbmask2 = ~_clockBit;
uint8_t inmask1 = _dataInBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localClockRegister = _clockRegister;
// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
for (uint8_t m = 0x01; m > 0; m <<= 1)
{
// remember state register
uint8_t oldSREG = SREG;
// disable interrupts
noInterrupts();
// clock pulse HIGH
*_clockRegister |= cbmask1;
*localClockRegister |= cbmask1;
// read one bit
if ((*_dataInRegister & inmask1) > 0) rv |= m;
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
*_clockRegister &= cbmask2;
// reset interrupts flag to previous state
SREG = oldSREG;
*localClockRegister = r;
}
_lastValue = rv;
return rv;
#else
// reset interrupts flag to previous state
SREG = oldSREG;
_lastValue = rv;
#endif // if (AVR)
#else // other platforms reference shiftOut()
// reference implementation
_lastValue = shiftIn(_dataPinIn, _clockPin, LSBFIRST);
return _lastValue;
#endif
// all paths will return _lastValue.
return _lastValue;
}
@ -185,37 +247,97 @@ uint8_t FastShiftIn::readMSBFIRST()
{
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t cbmask2 = ~_clockBit;
uint8_t inmask1 = _dataInBit;
#if defined(FASTSHIFTIN_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localClockRegister = _clockRegister;
// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01; // read one bit
*localClockRegister = r; // clock pulse LOW
// restore interrupt state
SREG = oldSREG;
_lastValue = rv;
#else // AVR SIZE OPTIMIZED
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localClockRegister = _clockRegister;
// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
for (uint8_t m = 0x80; m > 0; m >>= 1)
{
// remember state register
uint8_t oldSREG = SREG;
// disable interrupts
noInterrupts();
// clock pulse HIGH
*_clockRegister |= cbmask1;
*localClockRegister |= cbmask1;
// read one bit
if ((*_dataInRegister & inmask1) > 0) rv |= m;
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
*_clockRegister &= cbmask2;
// reset interrupts flag to previous state
SREG = oldSREG;
*localClockRegister = r;
}
_lastValue = rv;
return rv;
#else
// reset interrupts flag to previous state
SREG = oldSREG;
_lastValue = rv;
#endif // if (AVR)
#else // other platforms reference shiftOut()
// reference implementation
_lastValue = shiftIn(_dataPinIn, _clockPin, MSBFIRST);
return _lastValue;
#endif
// all paths will return _lastValue.
return _lastValue;
}

View File

@ -2,7 +2,7 @@
//
// FILE: FastShiftIn.h
// AUTHOR: Rob Tillaart
// VERSION: 0.3.4
// VERSION: 0.4.0
// PURPOSE: Fast ShiftIn for 74HC165 register, AVR optimized
// DATE: 2013-09-29
// URL: https://github.com/RobTillaart/FastShiftIn
@ -11,8 +11,10 @@
#include "Arduino.h"
#define FASTSHIFTIN_LIB_VERSION (F("0.3.4"))
#define FASTSHIFTIN_LIB_VERSION (F("0.4.0"))
// uncomment next line to get SPEED OPTIMIZED CODE
// #define FASTSHIFTIN_AVR_LOOP_UNROLLED 1
class FastShiftIn
{

View File

@ -19,7 +19,7 @@ void setup()
Serial.println(__FILE__);
Serial.println(FASTSHIFTIN_LIB_VERSION);
FSI.setBitOrder(LSBFIRST);
FSI.setBitOrder(MSBFIRST);
digitalWrite(12, HIGH);
Serial.println("\n 8 bits HIGH\n");
@ -59,6 +59,7 @@ void test_read()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
@ -86,7 +87,8 @@ void test_read16()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
{
@ -113,7 +115,8 @@ void test_read24()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
{
@ -140,7 +143,8 @@ void test_read32()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
{
@ -167,7 +171,8 @@ void test_readLSBFIRST()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
{
@ -194,7 +199,8 @@ void test_readMSBFIRST()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
{
@ -221,7 +227,8 @@ void test_reference()
duration1 = micros() - start;
Serial.print("Standard shiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
{
@ -243,4 +250,4 @@ void loop()
}
// -- END OF FILE --
// -- END OF FILE --

View File

@ -0,0 +1,51 @@
Arduino UNO
IDE 1.8.19
fastShiftIn_test.ino
0.4.0
8 bits HIGH
Performance - time in us : read()
FastShiftIn1: 12.51
FastShiftIn2: 23.77
Delta: 11.26
Performance - time in us : read16()
FastShiftIn1: 23.28
FastShiftIn2: 45.78
Delta: 22.50
Performance - time in us : read24()
FastShiftIn1: 35.54
FastShiftIn2: 70.30
Delta: 34.76
Performance - time in us : read32()
FastShiftIn1: 46.41
FastShiftIn2: 92.05
Delta: 45.64
Performance - time in us : readLSBFIRST()
FastShiftIn1: 12.83
FastShiftIn2: 24.77
Delta: 11.94
Performance - time in us : readMSBFIRST()
FastShiftIn1: 11.38
FastShiftIn2: 21.88
Delta: 10.50
Performance - time in us : reference shiftIn()
Standard shiftIn1: 108.99
Standard shiftIn2: 217.04
Delta: 108.05
done...

View File

@ -15,7 +15,7 @@
"type": "git",
"url": "https://github.com/RobTillaart/FastShiftIn.git"
},
"version": "0.3.4",
"version": "0.4.0",
"license": "MIT",
"frameworks": "*",
"platforms": "*",

View File

@ -1,5 +1,5 @@
name=FastShiftIn
version=0.3.4
version=0.4.0
author=Rob Tillaart <rob.tillaart@gmail.com>
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
sentence=Arduino library for (AVR) optimized shiftIn - e.g. for 74HC165

View File

@ -17,7 +17,7 @@ Arduino library for **AVR** optimized shiftIn - e.g. for 74HC165.
## Description
FastShiftIn is a class that has optimized code (AVR only) to shift in data faster
than the normal **shiftIn()** function.
than the default provided **shiftIn()** function.
It speeds up the shift using low level ports and masks. These are predetermined
in the constructor of the FastShiftIn object.
@ -26,13 +26,22 @@ to the default **shiftIn()** implementation.
The library allows to set (and get) the bitOrder and apply this to multiple read()
calls. It also provide access to **readLSBFIRST()** and **readMSBFIRST()** which
are the low level workers and most optimized code (so far).
are the low level workers and most optimized code (so far).
The library provides wrapper functions to read multi-byte variables.
These are read16(), read24(), read32() and read(array, size).
The latter is used to shift in any size object.
### 0.4.0 breaking changes
The 0.4.0 version has a flag to unroll the inner loop in **readLSBFIRST()**
and **readMSBFIRST()**. The AVR optimized code blocks the interrupts per byte.
Note: this optimization is new and thus experimental.
Feedback, including improvements, is welcome.
### Performance
The performance of **read()** is substantially faster for **AVR** than the default
@ -40,28 +49,36 @@ Arduino **shiftIn()**, but not as fast as HW SPI.
Exact how large the performance gain is can be seen with the example sketch.
It does a comparison and shows how the class is to be used.
Time in microseconds, Arduino UNO
| function | 0.2.3 | 0.3.2 |
|:---------------------|---------:|---------:|
| read() | 19.30 | 20.49 |
| read16() | | 41.04 |
| read24() | | 62.91 |
| read32() | | 83.95 |
| readLSBFIRST() | 19.04 | 19.92 |
| readMSBFIRST() | 19.04 | 19.92 |
| reference shiftIn() | 107.82 | 108.20 |
#### Measurements
Numbers may vary depending on bit-order flag.
Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 calls.
| function | 0.2.3 | 0.3.2 | 0.4.0 | 0.4.0L |
|:---------------------|---------:|---------:|---------:|---------:|
| read() | 19.30 | 20.49 | 12.71 | 9.51 |
| read16() | | 41.04 | 25.39 | 18.98 |
| read24() | | 62.91 | 39.10 | 29.48 |
| read32() | | 83.95 | 51.42 | 38.60 |
| readLSBFIRST() | 19.04 | 19.92 | 11.96 | 8.81 |
| readMSBFIRST() | 19.04 | 19.92 | 11.94 | 8.75 |
| reference shiftIn() | 107.82 | 108.20 | 108.05 | 108.05 |
0.3.2 is a bit slower (incl. reference) than 0.2.3 but still much
faster than the reference.
- Note: 0.3.2 is a bit slower (incl. reference) than 0.2.3 but still much
faster than the reference. (Older IDE?)
- Note: 0.4.0 improved test sketch,
- Note: 0.4.0 measured with loop unroll flag disabled.
- Note: 0.4.0L measured with loop unrolled flag enabled.
### Related libraries
- https://github.com/RobTillaart/FastShiftIn
- https://github.com/RobTillaart/FastShiftOut
- https://github.com/RobTillaart/FastShiftInOut
- https://github.com/RobTillaart/FastShiftOut
- https://github.com/RobTillaart/ShiftInSlow
- https://github.com/RobTillaart/ShiftOutSlow
@ -74,6 +91,8 @@ faster than the reference.
### Constructor
bitOrder = { LSBFIRST, MSBFIRST };
- **FastShiftIn(uint8_t dataIn, uint8_t clockPin, uint8_t bitOrder = LSBFIRST)** Constructor
### Functions
@ -83,16 +102,18 @@ faster than the reference.
- **uint32_t read24(void)** reads a new value, 24 bit.
- **uint32_t read32(void)** reads a new value, 32 bit.
- **uint32_t lastRead()** returns last value read.
### Meta
- **bool setBitOrder(uint8_t bitOrder)** set LSBFIRST or MSBFIRST.
Returns false for other values.
- **uint8_t getBitOrder(void)** returns LSBFIRST or MSBFIRST.
- **uint16_t readLSBFIRST(void)** optimized LSB read(), 8 bit.
- **uint16_t readMSBFIRST(void)** optimized MSB read(), 8 bit.
### BitOrder
- **bool setBitOrder(uint8_t bitOrder)** set LSBFIRST or MSBFIRST.
Returns false for other values ==> no change.
- **uint8_t getBitOrder(void)** returns LSBFIRST or MSBFIRST as set in the constructor
or latest set from **setBitOrder()**.
### Experimental
- **void read(uint8_t \*array, uint8_t size)** read an array of values.
@ -116,6 +137,8 @@ If the BIT-order is not the BYTE-order, the user has two options
- call **read()** multiple times and merge the bytes in the order needed.
- call **read32()** (a.o) and reorder the bytes in a separate function.
The library will not support such functionality.
## Notes
@ -128,6 +151,7 @@ pull up resistors, especially if wires are exceeding 10 cm (4").
#### Must
- keep in sync with FastShiftOut()
#### Should
@ -135,18 +159,20 @@ pull up resistors, especially if wires are exceeding 10 cm (4").
#### Could
- investigate ESP32 optimization readLSBFIRST readMSBFIRST
- performance ESP32
- example schema
- add invert flag?
- would it be interesting to make a fastShiftIn16() etc?
- squeeze performance but more maintenance.?
#### Wont
- investigate separate **BYTE**-order,
- only MSBFirst and LSBFirst
- **void setByteOrder()** + **uint8_t getByteOrder()**
- other option is add parameters / overload to make byte order explicit
- **read32(1,0,3,2)** performance penalty + invalid combination.
- investigate ESP32 optimization readLSBFIRST readMSBFIRST
- example schemas
- would it be interesting to make a fastShiftIn16() etc?
- squeeze performance but more maintenance.?
#### Wont
## Support