0.4.0 FastShiftIn

This commit is contained in:
Rob Tillaart 2024-09-19 14:18:47 +02:00
parent 6ce71e3145
commit 4bb93dad77
8 changed files with 288 additions and 72 deletions

View File

@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [0.4.0] - 2024-09-10
- fix #17, loop unroll option, improving performance, kudos to nt314p
- added flag to select LOOP UNROLL (is optional as it gives larger code size)
- update readme.md
- minor edits
----
## [0.3.4] - 2024-07-22
- add **void read(uint8_t \*array, uint8_t size)** (experimental)
- update readme.md

View File

@ -1,7 +1,7 @@
//
// FILE: FastShiftIn.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.3.4
// VERSION: 0.4.0
// PURPOSE: Fast ShiftIn for 74HC165 register, AVR optimized
// DATE: 2013-09-29
// URL: https://github.com/RobTillaart/FastShiftIn
@ -148,36 +148,98 @@ uint8_t FastShiftIn::readLSBFIRST()
{
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
#if defined(FASTSHIFTIN_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED #17
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localClockRegister = _clockRegister;
// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80; // read one bit
*localClockRegister = r; // clock pulse LOW
// restore interrupt state
SREG = oldSREG;
_lastValue = rv;
#else // AVR SIZE OPTIMIZED
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t cbmask2 = ~_clockBit;
uint8_t inmask1 = _dataInBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localClockRegister = _clockRegister;
// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
for (uint8_t m = 0x01; m > 0; m <<= 1)
{
// remember state register
uint8_t oldSREG = SREG;
// disable interrupts
noInterrupts();
// clock pulse HIGH
*_clockRegister |= cbmask1;
*localClockRegister |= cbmask1;
// read one bit
if ((*_dataInRegister & inmask1) > 0) rv |= m;
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
*_clockRegister &= cbmask2;
// reset interrupts flag to previous state
SREG = oldSREG;
*localClockRegister = r;
}
_lastValue = rv;
return rv;
#else
// reset interrupts flag to previous state
SREG = oldSREG;
_lastValue = rv;
#endif // if (AVR)
#else // other platforms reference shiftOut()
// reference implementation
_lastValue = shiftIn(_dataPinIn, _clockPin, LSBFIRST);
return _lastValue;
#endif
// all paths will return _lastValue.
return _lastValue;
}
@ -185,37 +247,97 @@ uint8_t FastShiftIn::readMSBFIRST()
{
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t cbmask2 = ~_clockBit;
uint8_t inmask1 = _dataInBit;
#if defined(FASTSHIFTIN_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localClockRegister = _clockRegister;
// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02; // read one bit
*localClockRegister = r; // clock pulse LOW
*localClockRegister |= cbmask1; // clock pulse HIGH
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01; // read one bit
*localClockRegister = r; // clock pulse LOW
// restore interrupt state
SREG = oldSREG;
_lastValue = rv;
#else // AVR SIZE OPTIMIZED
uint8_t rv = 0;
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localClockRegister = _clockRegister;
// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
for (uint8_t m = 0x80; m > 0; m >>= 1)
{
// remember state register
uint8_t oldSREG = SREG;
// disable interrupts
noInterrupts();
// clock pulse HIGH
*_clockRegister |= cbmask1;
*localClockRegister |= cbmask1;
// read one bit
if ((*_dataInRegister & inmask1) > 0) rv |= m;
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
*_clockRegister &= cbmask2;
// reset interrupts flag to previous state
SREG = oldSREG;
*localClockRegister = r;
}
_lastValue = rv;
return rv;
#else
// reset interrupts flag to previous state
SREG = oldSREG;
_lastValue = rv;
#endif // if (AVR)
#else // other platforms reference shiftOut()
// reference implementation
_lastValue = shiftIn(_dataPinIn, _clockPin, MSBFIRST);
return _lastValue;
#endif
// all paths will return _lastValue.
return _lastValue;
}

View File

@ -2,7 +2,7 @@
//
// FILE: FastShiftIn.h
// AUTHOR: Rob Tillaart
// VERSION: 0.3.4
// VERSION: 0.4.0
// PURPOSE: Fast ShiftIn for 74HC165 register, AVR optimized
// DATE: 2013-09-29
// URL: https://github.com/RobTillaart/FastShiftIn
@ -11,8 +11,10 @@
#include "Arduino.h"
#define FASTSHIFTIN_LIB_VERSION (F("0.3.4"))
#define FASTSHIFTIN_LIB_VERSION (F("0.4.0"))
// uncomment next line to get SPEED OPTIMIZED CODE
// #define FASTSHIFTIN_AVR_LOOP_UNROLLED 1
class FastShiftIn
{

View File

@ -19,7 +19,7 @@ void setup()
Serial.println(__FILE__);
Serial.println(FASTSHIFTIN_LIB_VERSION);
FSI.setBitOrder(LSBFIRST);
FSI.setBitOrder(MSBFIRST);
digitalWrite(12, HIGH);
Serial.println("\n 8 bits HIGH\n");
@ -59,6 +59,7 @@ void test_read()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
@ -86,6 +87,7 @@ void test_read16()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
@ -113,6 +115,7 @@ void test_read24()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
@ -140,6 +143,7 @@ void test_read32()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
@ -167,6 +171,7 @@ void test_readLSBFIRST()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
@ -194,6 +199,7 @@ void test_readMSBFIRST()
duration1 = micros() - start;
Serial.print("FastShiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
@ -221,6 +227,7 @@ void test_reference()
duration1 = micros() - start;
Serial.print("Standard shiftIn1: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
@ -243,4 +250,4 @@ void loop()
}
// -- END OF FILE --
// -- END OF FILE --

View File

@ -0,0 +1,51 @@
Arduino UNO
IDE 1.8.19
fastShiftIn_test.ino
0.4.0
8 bits HIGH
Performance - time in us : read()
FastShiftIn1: 12.51
FastShiftIn2: 23.77
Delta: 11.26
Performance - time in us : read16()
FastShiftIn1: 23.28
FastShiftIn2: 45.78
Delta: 22.50
Performance - time in us : read24()
FastShiftIn1: 35.54
FastShiftIn2: 70.30
Delta: 34.76
Performance - time in us : read32()
FastShiftIn1: 46.41
FastShiftIn2: 92.05
Delta: 45.64
Performance - time in us : readLSBFIRST()
FastShiftIn1: 12.83
FastShiftIn2: 24.77
Delta: 11.94
Performance - time in us : readMSBFIRST()
FastShiftIn1: 11.38
FastShiftIn2: 21.88
Delta: 10.50
Performance - time in us : reference shiftIn()
Standard shiftIn1: 108.99
Standard shiftIn2: 217.04
Delta: 108.05
done...

View File

@ -15,7 +15,7 @@
"type": "git",
"url": "https://github.com/RobTillaart/FastShiftIn.git"
},
"version": "0.3.4",
"version": "0.4.0",
"license": "MIT",
"frameworks": "*",
"platforms": "*",

View File

@ -1,5 +1,5 @@
name=FastShiftIn
version=0.3.4
version=0.4.0
author=Rob Tillaart <rob.tillaart@gmail.com>
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
sentence=Arduino library for (AVR) optimized shiftIn - e.g. for 74HC165

View File

@ -17,7 +17,7 @@ Arduino library for **AVR** optimized shiftIn - e.g. for 74HC165.
## Description
FastShiftIn is a class that has optimized code (AVR only) to shift in data faster
than the normal **shiftIn()** function.
than the default provided **shiftIn()** function.
It speeds up the shift using low level ports and masks. These are predetermined
in the constructor of the FastShiftIn object.
@ -33,6 +33,15 @@ These are read16(), read24(), read32() and read(array, size).
The latter is used to shift in any size object.
### 0.4.0 breaking changes
The 0.4.0 version has a flag to unroll the inner loop in **readLSBFIRST()**
and **readMSBFIRST()**. The AVR optimized code blocks the interrupts per byte.
Note: this optimization is new and thus experimental.
Feedback, including improvements, is welcome.
### Performance
The performance of **read()** is substantially faster for **AVR** than the default
@ -40,28 +49,36 @@ Arduino **shiftIn()**, but not as fast as HW SPI.
Exact how large the performance gain is can be seen with the example sketch.
It does a comparison and shows how the class is to be used.
Time in microseconds, Arduino UNO
| function | 0.2.3 | 0.3.2 |
|:---------------------|---------:|---------:|
| read() | 19.30 | 20.49 |
| read16() | | 41.04 |
| read24() | | 62.91 |
| read32() | | 83.95 |
| readLSBFIRST() | 19.04 | 19.92 |
| readMSBFIRST() | 19.04 | 19.92 |
| reference shiftIn() | 107.82 | 108.20 |
#### Measurements
Numbers may vary depending on bit-order flag.
Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 calls.
| function | 0.2.3 | 0.3.2 | 0.4.0 | 0.4.0L |
|:---------------------|---------:|---------:|---------:|---------:|
| read() | 19.30 | 20.49 | 12.71 | 9.51 |
| read16() | | 41.04 | 25.39 | 18.98 |
| read24() | | 62.91 | 39.10 | 29.48 |
| read32() | | 83.95 | 51.42 | 38.60 |
| readLSBFIRST() | 19.04 | 19.92 | 11.96 | 8.81 |
| readMSBFIRST() | 19.04 | 19.92 | 11.94 | 8.75 |
| reference shiftIn() | 107.82 | 108.20 | 108.05 | 108.05 |
0.3.2 is a bit slower (incl. reference) than 0.2.3 but still much
faster than the reference.
- Note: 0.3.2 is a bit slower (incl. reference) than 0.2.3 but still much
faster than the reference. (Older IDE?)
- Note: 0.4.0 improved test sketch,
- Note: 0.4.0 measured with loop unroll flag disabled.
- Note: 0.4.0L measured with loop unrolled flag enabled.
### Related libraries
- https://github.com/RobTillaart/FastShiftIn
- https://github.com/RobTillaart/FastShiftOut
- https://github.com/RobTillaart/FastShiftInOut
- https://github.com/RobTillaart/FastShiftOut
- https://github.com/RobTillaart/ShiftInSlow
- https://github.com/RobTillaart/ShiftOutSlow
@ -74,6 +91,8 @@ faster than the reference.
### Constructor
bitOrder = { LSBFIRST, MSBFIRST };
- **FastShiftIn(uint8_t dataIn, uint8_t clockPin, uint8_t bitOrder = LSBFIRST)** Constructor
### Functions
@ -83,16 +102,18 @@ faster than the reference.
- **uint32_t read24(void)** reads a new value, 24 bit.
- **uint32_t read32(void)** reads a new value, 32 bit.
- **uint32_t lastRead()** returns last value read.
### Meta
- **bool setBitOrder(uint8_t bitOrder)** set LSBFIRST or MSBFIRST.
Returns false for other values.
- **uint8_t getBitOrder(void)** returns LSBFIRST or MSBFIRST.
- **uint16_t readLSBFIRST(void)** optimized LSB read(), 8 bit.
- **uint16_t readMSBFIRST(void)** optimized MSB read(), 8 bit.
### BitOrder
- **bool setBitOrder(uint8_t bitOrder)** set LSBFIRST or MSBFIRST.
Returns false for other values ==> no change.
- **uint8_t getBitOrder(void)** returns LSBFIRST or MSBFIRST as set in the constructor
or latest set from **setBitOrder()**.
### Experimental
- **void read(uint8_t \*array, uint8_t size)** read an array of values.
@ -116,6 +137,8 @@ If the BIT-order is not the BYTE-order, the user has two options
- call **read()** multiple times and merge the bytes in the order needed.
- call **read32()** (a.o) and reorder the bytes in a separate function.
The library will not support such functionality.
## Notes
@ -128,6 +151,7 @@ pull up resistors, especially if wires are exceeding 10 cm (4").
#### Must
- keep in sync with FastShiftOut()
#### Should
@ -135,18 +159,20 @@ pull up resistors, especially if wires are exceeding 10 cm (4").
#### Could
- investigate separate **BYTE**-order,
- only MSBFirst and LSBFirst
- **void setByteOrder()** + **uint8_t getByteOrder()**
- other option is add parameters / overload to make byte order explicit
- **read32(1,0,3,2)** performance penalty + invalid combination.
- investigate ESP32 optimization readLSBFIRST readMSBFIRST
- example schemas
- performance ESP32
- example schema
- add invert flag?
- would it be interesting to make a fastShiftIn16() etc?
- squeeze performance but more maintenance.?
#### Wont
- investigate separate **BYTE**-order,
- only MSBFirst and LSBFirst
- **void setByteOrder()** + **uint8_t getByteOrder()**
- other option is add parameters / overload to make byte order explicit
- **read32(1,0,3,2)** performance penalty + invalid combination.
## Support