Skip to content

Commit c10a901

Browse files
committed
0.4.1 FastShiftOut
1 parent 75cf304 commit c10a901

File tree

9 files changed

+200
-56
lines changed

9 files changed

+200
-56
lines changed

libraries/FastShiftOut/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
66
and this project adheres to [Semantic Versioning](http://semver.org/).
77

88

9+
## [0.4.1] - 2024-10-31
10+
- fix #17, add more optimizations, kudos to nt314p
11+
912
## [0.4.0] - 2024-09-03
1013
- fix #15, loop unroll option, improving performance, kudos to nt314p
1114
- fixed bug in test program (see #15)

libraries/FastShiftOut/Examples/FastShiftOut_scope_test/FastShiftOut_scope_test.ino

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ void loop()
6262
// shiftOut(12, 13, MSBFIRST, 0x55);
6363

6464
FSO.write(0x55);
65-
delayMicroseconds(100);
65+
delayMicroseconds(50);
6666
}
6767

6868

69-
// -- END OF FILE --
69+
// -- END OF FILE --

libraries/FastShiftOut/Examples/FastShiftOut_test/performance_0.4.0.txt

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,41 @@ println(3.14159265, 4): 629.96
4343

4444
done ...
4545

46+
47+
no loop unroll version
48+
49+
Performance - time in us
50+
write: 15.34
51+
write: 29.43
52+
Delta: 14.10
53+
54+
writeLSBFIRST: 14.34
55+
writeLSBFIRST: 28.42
56+
Delta: 14.09
57+
58+
writeMSBFIRST: 14.34
59+
writeMSBFIRST: 28.42
60+
Delta: 14.08
61+
62+
Standard shiftOut1: 89.85
63+
Standard shiftOut2: 179.44
64+
Delta: 89.60
65+
66+
write16: 29.31
67+
write16: 58.35
68+
Delta: 29.04
69+
70+
write24: 43.38
71+
write24: 86.51
72+
Delta: 43.13
73+
74+
write32: 57.47
75+
write32: 114.68
76+
Delta: 57.22
77+
78+
79+
Test print interface
80+
println("Hello world"): 222.68
81+
println(1357): 262.60
82+
println(3.14159265, 4): 650.68
83+
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
IDE: 1.8.19
2+
Board: UNO
3+
4+
loop unrolled version
5+
6+
FASTSHIFTOUT_LIB_VERSION: 0.4.1
7+
8+
Performance - time in us
9+
write: 10.37
10+
write: 19.49
11+
Delta: 9.12
12+
13+
writeLSBFIRST: 9.37
14+
writeLSBFIRST: 18.49
15+
Delta: 9.12
16+
17+
writeMSBFIRST: 9.37
18+
writeMSBFIRST: 18.49
19+
Delta: 9.12
20+
21+
Standard shiftOut1: 89.85
22+
Standard shiftOut2: 179.45
23+
Delta: 89.60
24+
25+
write16: 19.37
26+
write16: 38.48
27+
Delta: 19.11
28+
29+
write24: 28.48
30+
write24: 56.72
31+
Delta: 28.23
32+
33+
write32: 37.60
34+
write32: 74.95
35+
Delta: 37.34
36+
37+
38+
Test print interface
39+
println("Hello world"): 158.12
40+
println(1357): 232.80
41+
println(3.14159265, 4): 610.92
42+
43+
44+
done ...
45+
46+
47+
no loop unroll version
48+
49+
Performance - time in us
50+
write: 14.08
51+
write: 26.91
52+
Delta: 12.83
53+
54+
writeLSBFIRST: 13.08
55+
writeLSBFIRST: 25.90
56+
Delta: 12.82
57+
58+
writeMSBFIRST: 13.08
59+
writeMSBFIRST: 25.90
60+
Delta: 12.82
61+
62+
Standard shiftOut1: 89.85
63+
Standard shiftOut2: 179.44
64+
Delta: 89.59
65+
66+
write16: 26.78
67+
write16: 53.32
68+
Delta: 26.54
69+
70+
write24: 39.62
71+
write24: 78.98
72+
Delta: 39.36
73+
74+
write32: 52.44
75+
write32: 104.62
76+
Delta: 52.18
77+
78+
79+
Test print interface
80+
println("Hello world"): 206.32
81+
println(1357): 255.04
82+
println(3.14159265, 4): 640.52
83+
84+
85+
done ...

libraries/FastShiftOut/FastShiftOut.cpp

Lines changed: 52 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//
22
// FILE: FastShiftOut.cpp
33
// AUTHOR: Rob Tillaart
4-
// VERSION: 0.4.0
4+
// VERSION: 0.4.1
55
// PURPOSE: ShiftOut that implements the Print interface
66
// DATE: 2013-08-22
77
// URL: https://github.com/RobTillaart/FastShiftOut
@@ -170,8 +170,12 @@ size_t FastShiftOut::writeLSBFIRST(uint8_t data)
170170
uint8_t oldSREG = SREG;
171171
noInterrupts();
172172

173-
if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
174-
else *localDataOutRegister |= outmask1;
173+
// See discussion #17
174+
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
175+
uint8_t d1 = d0 | outmask1; // cache 1
176+
177+
if ((value & 0x01) == 0) *localDataOutRegister = d0;
178+
else *localDataOutRegister = d1;
175179
// *_clockRegister |= cbmask1;
176180
// *_clockRegister &= cbmask2;
177181
// following code is allowed as interrupts are disabled.
@@ -180,44 +184,44 @@ size_t FastShiftOut::writeLSBFIRST(uint8_t data)
180184
*localClockRegister = r | cbmask1; // set one bit
181185
*localClockRegister = r; // reset bit
182186

183-
if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
184-
else *localDataOutRegister |= outmask1;
187+
if ((value & 0x02) == 0) *localDataOutRegister = d0;
188+
else *localDataOutRegister = d1;
185189
r = *localClockRegister;
186190
*localClockRegister = r | cbmask1; // set one bit
187191
*localClockRegister = r; // reset it
188192

189-
if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
190-
else *localDataOutRegister |= outmask1;
193+
if ((value & 0x04) == 0) *localDataOutRegister = d0;
194+
else *localDataOutRegister = d1;
191195
r = *localClockRegister;
192196
*localClockRegister = r | cbmask1; // set one bit
193197
*localClockRegister = r; // reset it
194198

195-
if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
196-
else *localDataOutRegister |= outmask1;
199+
if ((value & 0x08) == 0) *localDataOutRegister = d0;
200+
else *localDataOutRegister = d1;
197201
r = *localClockRegister;
198202
*localClockRegister = r | cbmask1; // set one bit
199203
*localClockRegister = r; // reset it
200204

201-
if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
202-
else *localDataOutRegister |= outmask1;
205+
if ((value & 0x10) == 0) *localDataOutRegister = d0;
206+
else *localDataOutRegister = d1;
203207
r = *localClockRegister;
204208
*localClockRegister = r | cbmask1; // set one bit
205209
*localClockRegister = r; // reset it
206210

207-
if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
208-
else *localDataOutRegister |= outmask1;
211+
if ((value & 0x20) == 0) *localDataOutRegister = d0;
212+
else *localDataOutRegister = d1;
209213
r = *localClockRegister;
210214
*localClockRegister = r | cbmask1; // set one bit
211215
*localClockRegister = r; // reset it
212216

213-
if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
214-
else *localDataOutRegister |= outmask1;
217+
if ((value & 0x40) == 0) *localDataOutRegister = d0;
218+
else *localDataOutRegister = d1;
215219
r = *localClockRegister;
216220
*localClockRegister = r | cbmask1; // set one bit
217221
*localClockRegister = r; // reset it
218222

219-
if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
220-
else *localDataOutRegister |= outmask1;
223+
if ((value & 0x80) == 0) *localDataOutRegister = d0;
224+
else *localDataOutRegister = d1;
221225
r = *localClockRegister;
222226
*localClockRegister = r | cbmask1; // set one bit
223227
*localClockRegister = r; // reset it
@@ -238,11 +242,14 @@ size_t FastShiftOut::writeLSBFIRST(uint8_t data)
238242
uint8_t oldSREG = SREG;
239243
noInterrupts();
240244

245+
// See discussion #17
246+
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
247+
uint8_t d1 = d0 | outmask1; // cache 1
241248
for (uint8_t m = 1; m > 0; m <<= 1)
242249
{
243250
// process one bit
244-
if ((value & m) == 0) *localDataOutRegister &= outmask2;
245-
else *localDataOutRegister |= outmask1;
251+
if ((value & m) == 0) *localDataOutRegister = d0;
252+
else *localDataOutRegister = d1;
246253
uint8_t r = *localClockRegister;
247254
*localClockRegister = r | cbmask1; // set one bit
248255
*localClockRegister = r; // reset it
@@ -284,8 +291,12 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data)
284291
uint8_t oldSREG = SREG;
285292
noInterrupts();
286293

287-
if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
288-
else *localDataOutRegister |= outmask1;
294+
// See discussion #17
295+
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
296+
uint8_t d1 = d0 | outmask1; // cache 1
297+
298+
if ((value & 0x80) == 0) *localDataOutRegister = d0;
299+
else *localDataOutRegister = d1;
289300
// *localClockRegister |= cbmask1;
290301
// *localClockRegister &= cbmask2;
291302
// following code is allowed as interrupts are disabled.
@@ -294,44 +305,44 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data)
294305
*localClockRegister = r | cbmask1; // set one bit
295306
*localClockRegister = r; // reset it
296307

297-
if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
298-
else *localDataOutRegister |= outmask1;
308+
if ((value & 0x40) == 0) *localDataOutRegister = d0;
309+
else *localDataOutRegister = d1;
299310
r = *localClockRegister;
300311
*localClockRegister = r | cbmask1; // set one bit
301312
*localClockRegister = r; // reset it
302313

303-
if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
304-
else *localDataOutRegister |= outmask1;
314+
if ((value & 0x20) == 0) *localDataOutRegister = d0;
315+
else *localDataOutRegister = d1;
305316
r = *localClockRegister;
306317
*localClockRegister = r | cbmask1; // set one bit
307318
*localClockRegister = r; // reset it
308319

309-
if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
310-
else *localDataOutRegister |= outmask1;
320+
if ((value & 0x10) == 0) *localDataOutRegister = d0;
321+
else *localDataOutRegister = d1;
311322
r = *localClockRegister;
312323
*localClockRegister = r | cbmask1; // set one bit
313324
*localClockRegister = r; // reset it
314325

315-
if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
316-
else *localDataOutRegister |= outmask1;
326+
if ((value & 0x08) == 0) *localDataOutRegister = d0;
327+
else *localDataOutRegister = d1;
317328
r = *localClockRegister;
318329
*localClockRegister = r | cbmask1; // set one bit
319330
*localClockRegister = r; // reset it
320331

321-
if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
322-
else *localDataOutRegister |= outmask1;
332+
if ((value & 0x04) == 0) *localDataOutRegister = d0;
333+
else *localDataOutRegister = d1;
323334
r = *localClockRegister;
324335
*localClockRegister = r | cbmask1; // set one bit
325336
*localClockRegister = r; // reset it
326337

327-
if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
328-
else *localDataOutRegister |= outmask1;
338+
if ((value & 0x02) == 0) *localDataOutRegister = d0;
339+
else *localDataOutRegister = d1;
329340
r = *localClockRegister;
330341
*localClockRegister = r | cbmask1; // set one bit
331342
*localClockRegister = r; // reset it
332343

333-
if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
334-
else *localDataOutRegister |= outmask1;
344+
if ((value & 0x01) == 0) *localDataOutRegister = d0;
345+
else *localDataOutRegister = d1;
335346
r = *localClockRegister;
336347
*localClockRegister = r | cbmask1; // set one bit
337348
*localClockRegister = r; // reset it
@@ -352,16 +363,20 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data)
352363
uint8_t oldSREG = SREG;
353364
noInterrupts();
354365

366+
// See discussion #17
367+
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
368+
uint8_t d1 = d0 | outmask1; // cache 1
355369
for (uint8_t m = 0x80; m > 0; m >>= 1)
356370
{
357371
// process one bit
358-
if ((value & m) == 0) *localDataOutRegister &= outmask2;
359-
else *localDataOutRegister |= outmask1;
372+
if ((value & m) == 0) *localDataOutRegister = d0;
373+
else *localDataOutRegister = d1;
360374
uint8_t r = *localClockRegister;
361375
*localClockRegister = r | cbmask1; // set one bit
362376
*localClockRegister = r; // reset it
363377
}
364378

379+
365380
// restore interrupt state
366381
SREG = oldSREG;
367382

libraries/FastShiftOut/FastShiftOut.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// FILE: FastShiftOut.h
44
// AUTHOR: Rob Tillaart
5-
// VERSION: 0.4.0
5+
// VERSION: 0.4.1
66
// PURPOSE: shiftOut class that implements the Print interface
77
// DATE: 2013-08-22
88
// URL: https://github.com/RobTillaart/FastShiftOut
@@ -11,10 +11,10 @@
1111
#include "Arduino.h"
1212
#include "Print.h"
1313

14-
#define FASTSHIFTOUT_LIB_VERSION (F("0.4.0"))
14+
#define FASTSHIFTOUT_LIB_VERSION (F("0.4.1"))
1515

1616
// uncomment next line to get SPEED OPTIMIZED CODE
17-
#define FASTSHIFTOUT_AVR_LOOP_UNROLLED 1
17+
// #define FASTSHIFTOUT_AVR_LOOP_UNROLLED 1
1818

1919

2020
class FastShiftOut : public Print

0 commit comments

Comments
 (0)