Skip to content

Commit e668c02

Browse files
committed
0.2.1 FastShiftInOut
1 parent c10a901 commit e668c02

File tree

7 files changed

+114
-55
lines changed

7 files changed

+114
-55
lines changed

libraries/FastShiftInOut/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
66
and this project adheres to [Semantic Versioning](http://semver.org/).
77

88

9+
## [0.2.1] - 2024-10-31
10+
- fix #9, more optimizations
11+
912
## [0.2.0] - 2024-09-10
1013
- fix #7, loop unroll option, improving performance, kudos to nt314p
1114
- added flag to select LOOP UNROLL (is optional as it gives larger code size)

libraries/FastShiftInOut/FastShiftInOut.cpp

Lines changed: 58 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//
22
// FILE: FastShiftInOut.cpp
33
// AUTHOR: Rob Tillaart
4-
// VERSION: 0.2.0
4+
// VERSION: 0.2.1
55
// PURPOSE: Arduino library for (AVR) optimized shiftInOut (simultaneously)
66
// URL: https://github.com/RobTillaart/FastShiftInOut
77

@@ -109,8 +109,12 @@ uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)
109109
uint8_t oldSREG = SREG;
110110
noInterrupts();
111111

112-
if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
113-
else *localDataOutRegister |= outmask1;
112+
// See discussion #17 FastShiftOut
113+
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
114+
uint8_t d1 = d0 | outmask1; // cache 1
115+
116+
if ((value & 0x01) == 0) *localDataOutRegister = d0;
117+
else *localDataOutRegister = d1;
114118
// *localClockRegister |= cbmask1;
115119
// if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
116120
// *localClockRegister &= cbmask2; // ~_clockBit;
@@ -121,50 +125,50 @@ uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)
121125
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
122126
*localClockRegister = r; // reset it
123127

124-
if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
125-
else *localDataOutRegister |= outmask1;
128+
if ((value & 0x02) == 0) *localDataOutRegister = d0;
129+
else *localDataOutRegister = d1;
126130
r = *localClockRegister;
127131
*localClockRegister = r | cbmask1; // set one bit
128132
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02;
129133
*localClockRegister = r; // reset it
130134

131-
if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
132-
else *localDataOutRegister |= outmask1;
135+
if ((value & 0x04) == 0) *localDataOutRegister = d0;
136+
else *localDataOutRegister = d1;
133137
r = *localClockRegister;
134138
*localClockRegister = r | cbmask1; // set one bit
135139
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04;
136140
*localClockRegister = r; // reset it
137141

138-
if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
139-
else *localDataOutRegister |= outmask1;
142+
if ((value & 0x08) == 0) *localDataOutRegister = d0;
143+
else *localDataOutRegister = d1;
140144
r = *localClockRegister;
141145
*localClockRegister = r | cbmask1; // set one bit
142146
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08;
143147
*localClockRegister = r; // reset it
144148

145-
if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
146-
else *localDataOutRegister |= outmask1;
149+
if ((value & 0x10) == 0) *localDataOutRegister = d0;
150+
else *localDataOutRegister = d1;
147151
r = *localClockRegister;
148152
*localClockRegister = r | cbmask1; // set one bit
149153
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10;
150154
*localClockRegister = r; // reset it
151155

152-
if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
153-
else *localDataOutRegister |= outmask1;
156+
if ((value & 0x20) == 0) *localDataOutRegister = d0;
157+
else *localDataOutRegister = d1;
154158
r = *localClockRegister;
155159
*localClockRegister = r | cbmask1; // set one bit
156160
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20;
157161
*localClockRegister = r; // reset it
158162

159-
if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
160-
else *localDataOutRegister |= outmask1;
163+
if ((value & 0x40) == 0) *localDataOutRegister = d0;
164+
else *localDataOutRegister = d1;
161165
r = *localClockRegister;
162166
*localClockRegister = r | cbmask1; // set one bit
163167
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40;
164168
*localClockRegister = r; // reset it
165169

166-
if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
167-
else *localDataOutRegister |= outmask1;
170+
if ((value & 0x80) == 0) *localDataOutRegister = d0;
171+
else *localDataOutRegister = d1;
168172
r = *localClockRegister;
169173
*localClockRegister = r | cbmask1; // set one bit
170174
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
@@ -185,16 +189,19 @@ uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)
185189

186190
uint8_t oldSREG = SREG;
187191
noInterrupts();
188-
189-
uint8_t r = *localClockRegister;
190-
192+
193+
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
194+
uint8_t d1 = d0 | outmask1; // cache 1
195+
191196
for (uint8_t m = 1; m > 0; m <<= 1)
192197
{
193198
// write one bit
194-
if ((value & m) == 0) *localDataOutRegister &= outmask2;
195-
else *localDataOutRegister |= outmask1;
199+
if ((value & m) == 0) *localDataOutRegister = d0;
200+
else *localDataOutRegister = d1;
201+
uint8_t r = *localClockRegister;
202+
196203
// clock pulse HIGH
197-
*localClockRegister |= cbmask1;
204+
*localClockRegister = r | cbmask1;
198205
// read one bit
199206
if ((*localDataInRegister & inmask1) > 0) rv |= m;
200207
// clock pulse LOW
@@ -249,8 +256,12 @@ uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
249256
uint8_t oldSREG = SREG;
250257
noInterrupts();
251258

252-
if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
253-
else *localDataOutRegister |= outmask1;
259+
// See discussion #17 FastShiftOut
260+
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
261+
uint8_t d1 = d0 | outmask1; // cache 1
262+
263+
if ((value & 0x80) == 0) *localDataOutRegister = d0;
264+
else *localDataOutRegister = d1;
254265
// *localClockRegister |= cbmask1;
255266
// if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
256267
// *localClockRegister &= cbmask2; // ~_clockBit;
@@ -261,50 +272,50 @@ uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
261272
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
262273
*localClockRegister = r; // reset it
263274

264-
if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
265-
else *localDataOutRegister |= outmask1;
275+
if ((value & 0x40) == 0) *localDataOutRegister = d0;
276+
else *localDataOutRegister = d1;
266277
r = *localClockRegister;
267278
*localClockRegister = r | cbmask1; // set one bit
268279
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40;
269280
*localClockRegister = r; // reset it
270281

271-
if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
272-
else *localDataOutRegister |= outmask1;
282+
if ((value & 0x20) == 0) *localDataOutRegister = d0;
283+
else *localDataOutRegister = d1;
273284
r = *localClockRegister;
274285
*localClockRegister = r | cbmask1; // set one bit
275286
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20;
276287
*localClockRegister = r; // reset it
277288

278-
if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
279-
else *localDataOutRegister |= outmask1;
289+
if ((value & 0x10) == 0) *localDataOutRegister = d0;
290+
else *localDataOutRegister = d1;
280291
r = *localClockRegister;
281292
*localClockRegister = r | cbmask1; // set one bit
282293
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10;
283294
*localClockRegister = r; // reset it
284295

285-
if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
286-
else *localDataOutRegister |= outmask1;
296+
if ((value & 0x08) == 0) *localDataOutRegister = d0;
297+
else *localDataOutRegister = d1;
287298
r = *localClockRegister;
288299
*localClockRegister = r | cbmask1; // set one bit
289300
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08;
290301
*localClockRegister = r; // reset it
291302

292-
if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
293-
else *localDataOutRegister |= outmask1;
303+
if ((value & 0x04) == 0) *localDataOutRegister = d0;
304+
else *localDataOutRegister = d1;
294305
r = *localClockRegister;
295306
*localClockRegister = r | cbmask1; // set one bit
296307
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04;
297308
*localClockRegister = r; // reset it
298309

299-
if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
300-
else *localDataOutRegister |= outmask1;
310+
if ((value & 0x02) == 0) *localDataOutRegister = d0;
311+
else *localDataOutRegister = d1;
301312
r = *localClockRegister;
302313
*localClockRegister = r | cbmask1; // set one bit
303314
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02;
304315
*localClockRegister = r; // reset it
305316

306-
if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
307-
else *localDataOutRegister |= outmask1;
317+
if ((value & 0x01) == 0) *localDataOutRegister = d0;
318+
else *localDataOutRegister = d1;
308319
r = *localClockRegister;
309320
*localClockRegister = r | cbmask1; // set one bit
310321
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
@@ -326,14 +337,18 @@ uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
326337
uint8_t oldSREG = SREG;
327338
noInterrupts();
328339

329-
uint8_t r = *localClockRegister;
340+
// See discussion #17 FastShiftOut
341+
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
342+
uint8_t d1 = d0 | outmask1; // cache 1
343+
330344
for (uint8_t m = 0x80; m > 0; m >>= 1)
331345
{
332346
// write one bit
333-
if ((value & m) == 0) *localDataOutRegister &= outmask2;
334-
else *localDataOutRegister |= outmask1;
347+
if ((value & m) == 0) *localDataOutRegister = d0;
348+
else *localDataOutRegister = d1;
349+
uint8_t r = *localClockRegister;
335350
// clock pulse HIGH
336-
*localClockRegister |= cbmask1;
351+
*localClockRegister = r | cbmask1;
337352
// read one bit
338353
if ((*localDataInRegister & inmask1) > 0) rv |= m;
339354
// clock pulse LOW

libraries/FastShiftInOut/FastShiftInOut.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,18 @@
22
//
33
// FILE: FastShiftInOut.cpp
44
// AUTHOR: Rob Tillaart
5-
// VERSION: 0.2.0
5+
// VERSION: 0.2.1
66
// PURPOSE: Arduino library for (AVR) optimized shiftInOut (simultaneously)
77
// URL: https://github.com/RobTillaart/FastShiftInOut
88

99

1010
#include "Arduino.h"
1111

1212

13-
#define FASTSHIFTINOUT_LIB_VERSION (F("0.2.0"))
13+
#define FASTSHIFTINOUT_LIB_VERSION (F("0.2.1"))
1414

1515
// uncomment next line to get SPEED OPTIMIZED CODE
16-
#define FASTSHIFTINOUT_AVR_LOOP_UNROLLED 1
16+
// #define FASTSHIFTINOUT_AVR_LOOP_UNROLLED 1
1717

1818

1919
class FastShiftInOut

libraries/FastShiftInOut/README.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,22 +58,23 @@ Performance of **write()**
5858

5959
#### Measurements
6060

61-
(0.2.0)
61+
(0.2.1)
6262
Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 calls.
6363
(delta between 2 calls and 1 call to eliminate overhead)
6464

65-
| function | 0.1.3 | 0.2.0 | 0.2.0L |
66-
|:-------------------------|---------:|---------:|----------:|
67-
| write() (reference) | no data | 158.24 | no data |
68-
| write() | 25.52 | 17.61 | 12.26 |
69-
| writeLSBFIRST() | 25.52 | 17.61 | 12.26 |
70-
| writeMSBFIRST() | 25.52 | 17.60 | 12.20 |
65+
| function | 0.1.3 | 0.2.0 | 0.2.0L | 0.2.1 | 0.2.1L |
66+
|:-------------------------|---------:|---------:|----------:|---------:|----------:|
67+
| write() (reference) | no data | 158.24 | no data | 158.24 | no data |
68+
| write() | 25.52 | 17.61 | 12.26 | 16.72 | 11.00 |
69+
| writeLSBFIRST() | 25.52 | 17.61 | 12.26 | 16.72 | 11.00 |
70+
| writeMSBFIRST() | 25.52 | 17.60 | 12.20 | 16.72 | 10.94 |
7171

7272

7373
- Note: 0.1.3 added from old table.
7474
- Note: reference run on AVR by commenting all optimizations.
7575
- Note: 0.2.0 measured with loop unroll flag disabled.
7676
- Note: 0.2.0L measured with loop unrolled flag enabled.
77+
- Note: 0.2.1 / 0.2.1L idem.
7778

7879

7980
### Related
@@ -83,6 +84,7 @@ Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 cal
8384
- https://github.com/RobTillaart/FastShiftOut
8485
- https://github.com/RobTillaart/ShiftInSlow
8586
- https://github.com/RobTillaart/ShiftOutSlow
87+
- https://github.com/RobTillaart/SWSPI (experimental)
8688

8789

8890
## Interface
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
IDE: 1.8.19
2+
Board: UNO
3+
4+
FASTSHIFTINOUT_LIB_VERSION: 0.2.1
5+
6+
Performance - time in us
7+
write: 17.86
8+
write: 34.58
9+
Delta: 16.72
10+
11+
writeLSBFIRST: 16.98
12+
writeLSBFIRST: 33.70
13+
Delta: 16.72
14+
15+
writeMSBFIRST: 16.98
16+
writeMSBFIRST: 33.70
17+
Delta: 16.72
18+
19+
20+
# loop unrolled.
21+
22+
FASTSHIFTINOUT_LIB_VERSION: 0.2.1
23+
24+
Performance - time in us
25+
write: 12.14
26+
write: 23.14
27+
Delta: 11.00
28+
29+
writeLSBFIRST: 11.26
30+
writeLSBFIRST: 22.26
31+
Delta: 11.00
32+
33+
writeMSBFIRST: 11.19
34+
writeMSBFIRST: 22.13
35+
Delta: 10.94
36+
37+
38+
done ...
39+

libraries/FastShiftInOut/library.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"type": "git",
1616
"url": "https://github.com/RobTillaart/FastShiftInOut.git"
1717
},
18-
"version": "0.2.0",
18+
"version": "0.2.1",
1919
"license": "MIT",
2020
"frameworks": "*",
2121
"platforms": "*",

libraries/FastShiftInOut/library.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name=FastShiftInOut
2-
version=0.2.0
2+
version=0.2.1
33
author=Rob Tillaart <[email protected]>
44
maintainer=Rob Tillaart <[email protected]>
55
sentence=Arduino library for (AVR) optimized shiftInOut (simultaneously)

0 commit comments

Comments
 (0)