0.2.0 FastShiftInOut

This commit is contained in:
Rob Tillaart 2024-09-19 14:49:14 +02:00
parent 4bb93dad77
commit 188019a4fb
8 changed files with 419 additions and 137 deletions

View File

@ -6,11 +6,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [0.1.3] - 2023-02-20
## [0.2.0] - 2024-09-10
- fix #7, loop unroll option, improving performance, kudos to nt314p
- added flag to select LOOP UNROLL (is optional as it gives larger code size)
- update readme.md
- minor edits
----
## [0.1.4] - 2023-02-20
- update readme.md
- update keywords.txt
## [0.1.3] - 2023-02-20
- optimized noInterrupts
- add lastRead()
@ -20,7 +27,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- minor edits
## [0.1.2] - 2022-11-06
- redo clock pulse to match fastShiftOut
- redo clock pulse to match fastShiftOut
(after write and before read)
## [0.1.1] - 2022-11-05

View File

@ -1,7 +1,7 @@
//
// FILE: FastShiftInOut.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.1.4
// VERSION: 0.2.0
// PURPOSE: Arduino library for (AVR) optimized shiftInOut (simultaneously)
// URL: https://github.com/RobTillaart/FastShiftInOut
@ -40,6 +40,7 @@ FastShiftInOut::FastShiftInOut(uint8_t dataIn, uint8_t dataOut, uint8_t clockPin
_clockPin = clockPin;
#endif
_lastValue = 0;
_lastRead = 0;
}
@ -55,111 +56,6 @@ uint8_t FastShiftInOut::write(uint8_t data)
}
uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)
{
uint8_t rv = 0;
uint8_t value = data;
_lastValue = value;
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
uint8_t cbmask1 = _clockBit;
uint8_t cbmask2 = ~_clockBit;
uint8_t inmask1 = _dataInBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;
uint8_t oldSREG = SREG;
noInterrupts();
for (uint8_t m = 1; m > 0; m <<= 1)
{
// write one bit
if ((value & m) == 0) *_dataOutRegister &= outmask2;
else *_dataOutRegister |= outmask1;
// clock pulse HIGH
*_clockRegister |= cbmask1;
// read one bit
if ((*_dataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
*_clockRegister &= cbmask2;
}
SREG = oldSREG;
#else
for (uint8_t i = 0; i < 8; i++)
{
// write one bit
digitalWrite(_dataPinOut, value & 0x01);
value >>= 1;
// clock pulse
digitalWrite(_clockPin, HIGH);
// read one bit
rv >>= 1;
if (digitalRead(_dataPinIn) == HIGH) rv |= 0x80;
// clock pulse
digitalWrite(_clockPin, LOW);
}
#endif
_lastRead = rv;
return rv;
}
uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
{
uint8_t rv = 0;
uint8_t value = data;
_lastValue = value;
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
uint8_t cbmask1 = _clockBit;
uint8_t cbmask2 = ~_clockBit;
uint8_t inmask1 = _dataInBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;
uint8_t oldSREG = SREG;
noInterrupts();
for (uint8_t m = 0x80; m > 0; m >>= 1)
{
// write one bit
if ((value & m) == 0) *_dataOutRegister &= outmask2;
else *_dataOutRegister |= outmask1;
// clock pulse HIGH
*_clockRegister |= cbmask1;
// read one bit
if ((*_dataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
*_clockRegister &= cbmask2;
}
SREG = oldSREG;
#else
for (uint8_t i = 0; i < 8; i++)
{
// write one bit
digitalWrite(_dataPinOut, value & 0x80);
value <<= 1;
// clock pulse
digitalWrite(_clockPin, HIGH);
// read one bit
rv <<= 1;
if (digitalRead(_dataPinIn) == HIGH) rv |= 1;
// clock pulse
digitalWrite(_clockPin, LOW);
}
#endif
_lastRead = rv;
return rv;
}
uint8_t FastShiftInOut::lastWritten(void)
{
@ -190,5 +86,287 @@ uint8_t FastShiftInOut::getBitOrder(void)
};
uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)
{
uint8_t rv = 0;
uint8_t value = data;
_lastValue = value;
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
#if defined(FASTSHIFTINOUT_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localDataOutRegister = _dataOutRegister;
volatile uint8_t* localClockRegister = _clockRegister;
// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();
if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// *localClockRegister |= cbmask1;
// if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
// *localClockRegister &= cbmask2; // ~_clockBit;
// following code is allowed as interrupts are disabled.
// so register can not change
uint8_t r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
*localClockRegister = r; // reset it
if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02;
*localClockRegister = r; // reset it
if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04;
*localClockRegister = r; // reset it
if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08;
*localClockRegister = r; // reset it
if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10;
*localClockRegister = r; // reset it
if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20;
*localClockRegister = r; // reset it
if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40;
*localClockRegister = r; // reset it
if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
*localClockRegister = r; // reset it
SREG = oldSREG;
#else // AVR SIZE OPTIMIZED
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localDataOutRegister = _dataOutRegister;
volatile uint8_t* localClockRegister = _clockRegister;
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
for (uint8_t m = 1; m > 0; m <<= 1)
{
// write one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// clock pulse HIGH
*localClockRegister |= cbmask1;
// read one bit
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
*localClockRegister = r;
}
SREG = oldSREG;
#endif // if (AVR)
#else // other platforms reference implementation
for (uint8_t i = 0; i < 8; i++)
{
// write one bit
digitalWrite(_dataPinOut, value & 0x01);
value >>= 1;
// clock pulse
digitalWrite(_clockPin, HIGH);
// read one bit
rv >>= 1;
if (digitalRead(_dataPinIn) == HIGH) rv |= 0x80;
// clock pulse
digitalWrite(_clockPin, LOW);
}
#endif
_lastRead = rv;
return rv;
}
uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
{
uint8_t rv = 0;
uint8_t value = data;
_lastValue = value;
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
#if defined(FASTSHIFTINOUT_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localDataOutRegister = _dataOutRegister;
volatile uint8_t* localClockRegister = _clockRegister;
uint8_t oldSREG = SREG;
noInterrupts();
if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// *localClockRegister |= cbmask1;
// if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
// *localClockRegister &= cbmask2; // ~_clockBit;
// following code is allowed as interrupts are disabled.
// so register can not change
uint8_t r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
*localClockRegister = r; // reset it
if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40;
*localClockRegister = r; // reset it
if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20;
*localClockRegister = r; // reset it
if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10;
*localClockRegister = r; // reset it
if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08;
*localClockRegister = r; // reset it
if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04;
*localClockRegister = r; // reset it
if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02;
*localClockRegister = r; // reset it
if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
*localClockRegister = r; // reset it
SREG = oldSREG;
#else // AVR SIZE OPTIMIZED
uint8_t cbmask1 = _clockBit;
uint8_t inmask1 = _dataInBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;
volatile uint8_t* localDataInRegister = _dataInRegister;
volatile uint8_t* localDataOutRegister = _dataOutRegister;
volatile uint8_t* localClockRegister = _clockRegister;
uint8_t oldSREG = SREG;
noInterrupts();
uint8_t r = *localClockRegister;
for (uint8_t m = 0x80; m > 0; m >>= 1)
{
// write one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// clock pulse HIGH
*localClockRegister |= cbmask1;
// read one bit
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
*localClockRegister = r;
}
// reset interrupts flag to previous state
SREG = oldSREG;
#endif // if (AVR)
#else // other platforms reference implementation
for (uint8_t i = 0; i < 8; i++)
{
// write one bit
digitalWrite(_dataPinOut, value & 0x80);
value <<= 1;
// clock pulse
digitalWrite(_clockPin, HIGH);
// read one bit
rv <<= 1;
if (digitalRead(_dataPinIn) == HIGH) rv |= 1;
// clock pulse
digitalWrite(_clockPin, LOW);
}
#endif
_lastRead = rv;
return rv;
}
// -- END OF FILE --

View File

@ -2,7 +2,7 @@
//
// FILE: FastShiftInOut.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.1.4
// VERSION: 0.2.0
// PURPOSE: Arduino library for (AVR) optimized shiftInOut (simultaneously)
// URL: https://github.com/RobTillaart/FastShiftInOut
@ -10,7 +10,10 @@
#include "Arduino.h"
#define FASTSHIFTINOUT_LIB_VERSION (F("0.1.4"))
#define FASTSHIFTINOUT_LIB_VERSION (F("0.2.0"))
// uncomment next line to get SPEED OPTIMIZED CODE
#define FASTSHIFTINOUT_AVR_LOOP_UNROLLED 1
class FastShiftInOut
@ -28,7 +31,7 @@ public:
bool setBitOrder(uint8_t bitOrder);
uint8_t getBitOrder(void);
// overrule bitOrder (most optimized).
// overrule bitOrder (most optimized).
uint8_t writeLSBFIRST(uint8_t data);
uint8_t writeMSBFIRST(uint8_t data);

View File

@ -13,33 +13,78 @@
Arduino library for **AVR** optimized shiftInOut (simultaneously).
Related libraries
- https://github.com/RobTillaart/FastShiftIn
- https://github.com/RobTillaart/FastShiftOut
- https://github.com/RobTillaart/ShiftInSlow
- https://github.com/RobTillaart/ShiftOutSlow
## Description
FastShiftInOut is a class that can send and receive bytes simultaneously.
In that sense it mimics a SPI bus.
**Experimental**
Experimental.
FastShiftInOut is a class that has optimized code (**AVR** only) to send and receive
bytes simultaneously. In that sense it mimics a SPI bus.
It speeds up the shift using low level ports and masks. These are predetermined
in the constructor of the FastShiftOut object.
If not an **ARDUINO_ARCH_AVR** or **ARDUINO_ARCH_MEGAAVR** the class falls back
to a default non optimized implementation.
The library allows to set (and get) the bitOrder and apply this to multiple write()
calls. It also provide access to **writeLSBFIRST()** and **writeMSBFIRST()** which
are the low level workers and most optimized code (so far).
Note: the bitOrder of the byte read and the byte written are the same.
## Performance
### 0.2.0 breaking changes
performance of **write()**
The 0.2.0 version has a flag to unroll the inner loop in **writeLSBFIRST()**
and **writeMSBFIRST()**. The unrolled loop blocks the interrupts per byte.
Note: this optimization is new and thus experimental.
Feedback, including improvements, is welcome.
### Performance
#### Measurements (pre 0.2.0)
Performance of **write()**
| version | UNO (us) | ESP32 (us) |
|:---------:|-----------:|-------------:|
| 0.1.0 | 181.08 | 4.32 |
| 0.1.1 | 26.84 | 4.32 |
| 0.1.2 | 26.84 | no data |
| 0.1.2 | 26.84 | no data |
| 0.1.3 | 25.52 | 4.32 |
#### Measurements
(0.2.0)
Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 calls.
(delta between 2 calls and 1 call to eliminate overhead)
| function | 0.1.3 | 0.2.0 | 0.2.0L |
|:-------------------------|---------:|---------:|----------:|
| write() (reference) | no data | 158.24 | no data |
| write() | 25.52 | 17.61 | 12.26 |
| writeLSBFIRST() | 25.52 | 17.61 | 12.26 |
| writeMSBFIRST() | 25.52 | 17.60 | 12.20 |
- Note: 0.1.3 added from old table.
- Note: reference run on AVR by commenting all optimizations.
- Note: 0.2.0 measured with loop unroll flag disabled.
- Note: 0.2.0L measured with loop unrolled flag enabled.
### Related
- https://github.com/RobTillaart/FastShiftIn
- https://github.com/RobTillaart/FastShiftInOut
- https://github.com/RobTillaart/FastShiftOut
- https://github.com/RobTillaart/ShiftInSlow
- https://github.com/RobTillaart/ShiftOutSlow
## Interface
```cpp
@ -54,29 +99,34 @@ bitOrder = { LSBFIRST, MSBFIRST };
- **uint8_t write(uint8_t data)** reads and writes simultaneously.
- **uint8_t lastWritten(void)** returns last byte written.
- **uint8_t lastRead(void)** returns last byte read.
- **uint8_t writeLSBFIRST(uint8_t data)** lowest level function, optimized for LSB.
- **uint8_t writeMSBFIRST(uint8_t data)** lowest level function, optimized for MSB.
### BitOrder
- **bool setBitOrder(uint8_t bitOrder)** bitOrder must be LSBFIRST or MSBFIRST.
- **uint8_t getBitOrder(void)** idem.
- **uint8_t writeLSBFIRST(uint8_t data)** optimized version, in practice almost no difference.
- **uint8_t writeMSBFIRST(uint8_t data)** optimized version, in practice almost no difference.
## Future
#### Must
- documentation
- follow FastShiftIn and FastShiftOut
- Update documentation
- Follow FastShiftIn and FastShiftOut
#### Should
#### Could
- **void ignoreRead()**
- add Print interface?
#### Wont
- **void ignoreRead()** => would in effect be FastShiftIn()
- add Print interface?
- meaning of the return value is not defined.
## Support

View File

@ -37,7 +37,8 @@ void test1()
duration1 = micros() - start;
Serial.print(" write: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
{
@ -64,7 +65,8 @@ void test2()
duration1 = micros() - start;
Serial.print("writeLSBFIRST: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
{
@ -91,7 +93,8 @@ void test3()
duration1 = micros() - start;
Serial.print("writeMSBFIRST: ");
Serial.println(duration1 * 0.001);
delay(100);
start = micros();
for (int i = 0; i < 1000; i++)
{
@ -113,4 +116,4 @@ void loop()
}
// -- END OF FILE --
// -- END OF FILE --

View File

@ -0,0 +1,41 @@
IDE: 1.8.19
Board: UNO
FASTSHIFTINOUT_LIB_VERSION: 0.2.0
Performance - time in us
write: 18.74
write: 36.35
Delta: 17.61
writeLSBFIRST: 17.86
writeLSBFIRST: 35.46
Delta: 17.61
writeMSBFIRST: 17.86
writeMSBFIRST: 35.46
Delta: 17.60
done ...
# loop unrolled.
FASTSHIFTINOUT_LIB_VERSION: 0.2.0
Performance - time in us
write: 13.40
write: 25.65
Delta: 12.26
writeLSBFIRST: 12.52
writeLSBFIRST: 24.77
Delta: 12.26
writeMSBFIRST: 12.45
writeMSBFIRST: 24.65
Delta: 12.20
done ...

View File

@ -15,7 +15,7 @@
"type": "git",
"url": "https://github.com/RobTillaart/FastShiftInOut.git"
},
"version": "0.1.4",
"version": "0.2.0",
"license": "MIT",
"frameworks": "*",
"platforms": "*",

View File

@ -1,5 +1,5 @@
name=FastShiftInOut
version=0.1.4
version=0.2.0
author=Rob Tillaart <rob.tillaart@gmail.com>
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
sentence=Arduino library for (AVR) optimized shiftInOut (simultaneously)