Skip to content

Commit 1cf34c8

Browse files
committed
nimble/phy/nrf5x: fix race conditions in TIMER0 setup
In the nRF5x LL PHY, we use TIMER0 to trigger various Tx/Rx sequencing tasks. The PHY attempted to recover from some cases in which it was 'late' setting up TIMER0; in theory, these ought not have happened, but if interrupt priorities do not provide absolute priority for the LL (for instance, in RTOS systems that have other priorities, or scheduling systems that have incompatible interrupt priority in order to call into RTOS queue management routines), then the PHY can indeed end up failing to schedule a transmit / receive window in time. In some cases, the recovery process for these had race conditions, which could result in the LL state machine locking up (the end-of-tx interrupt would never fire). One race condition was extremely short (a few instructions wide; if the timer was set, but the timer triggered before the appropriate PPI could be configured); the other race condition was a microsecond wide (comparison to see if the timer had already triggered needed the current time to be greater in order to fail, but if it triggered within that microsecond, the current time would be equal). There are also other potential issues around timer event reset and check that we did not replicate, but did notice by inspection. These cases appeared in a few different forms, so we factor out a "safe reset" routine to ensure that a timer will not trigger (to be used before setting up a PPI), and then we also factor out a routine to check whether a timer has been missed. This change is verified to fix some observed stability issues in PebbleOS under heavy load. Signed-off-by: Joshua Wise <[email protected]>
1 parent 75a3ec3 commit 1cf34c8

File tree

1 file changed

+45
-24
lines changed

1 file changed

+45
-24
lines changed

nimble/drivers/nrf5x/src/ble_phy.c

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,31 @@ ble_phy_rxpdu_copy(uint8_t *dptr, struct os_mbuf *rxpdu)
583583
sizeof(struct ble_mbuf_hdr));
584584
}
585585

586+
/**
587+
* There is no way on nRF5 to disable a COMPARE event entirely. So we fake
588+
* it, by setting it as far in the future as possible (by triggering a
589+
* capture to it), and then clearing the event if it was set.
590+
*/
591+
static void
592+
timer0_safely_reset(int timer)
593+
{
594+
nrf_timer_task_trigger(NRF_TIMER0, nrf_timer_capture_task_get(timer));
595+
NRF_TIMER0->EVENTS_COMPARE[timer] = 0;
596+
}
597+
598+
/**
599+
* It is possible that we set up a timer, but we were too late -- it didn't
600+
* fire. Detect when this happens, reliably, by capturing the current time
601+
* to a scratch counter and comparing with wraparound.
602+
*/
603+
static bool
604+
timer0_did_miss(int timer, int scratch)
605+
{
606+
nrf_timer_task_trigger(NRF_TIMER0, nrf_timer_capture_task_get(scratch));
607+
uint32_t ticks_rem = NRF_TIMER0->CC[timer] - NRF_TIMER0->CC[scratch];
608+
return (ticks_rem >= 0x80000000) && !NRF_TIMER0->EVENTS_COMPARE[timer];
609+
}
610+
586611
/**
587612
* Called when we want to wait if the radio is in either the rx or tx
588613
* disable states. We want to wait until that state is over before doing
@@ -885,13 +910,13 @@ ble_phy_wfr_enable(int txrx, uint8_t tx_phy_mode, uint32_t wfr_usecs)
885910
/* Adjust for delay between actual access address RX and EVENT_ADDRESS */
886911
end_time += g_ble_phy_t_rxaddrdelay[phy];
887912

888-
/* wfr_secs is the time from rxen until timeout */
889-
nrf_timer_cc_set(NRF_TIMER0, 3, end_time);
890-
NRF_TIMER0->EVENTS_COMPARE[3] = 0;
891-
892913
/* Enable wait for response PPI */
914+
timer0_safely_reset(3);
893915
phy_ppi_wfr_enable();
894916

917+
/* wfr_secs is the time from rxen until timeout */
918+
nrf_timer_cc_set(NRF_TIMER0, 3, end_time);
919+
895920
/*
896921
* It may happen that if CPU is halted for a brief moment (e.g. during flash
897922
* erase or write), TIMER0 already counted past CC[3] and thus wfr will not
@@ -904,8 +929,7 @@ ble_phy_wfr_enable(int txrx, uint8_t tx_phy_mode, uint32_t wfr_usecs)
904929
* CC[1] is only used as a reference on RX start, we do not need it here so
905930
* it can be used to read TIMER0 counter.
906931
*/
907-
nrf_timer_task_trigger(NRF_TIMER0, NRF_TIMER_TASK_CAPTURE1);
908-
if (NRF_TIMER0->CC[1] > NRF_TIMER0->CC[3]) {
932+
if (timer0_did_miss(3, 1 /* scratch */)) {
909933
phy_ppi_wfr_disable();
910934
nrf_radio_task_trigger(NRF_RADIO, NRF_RADIO_TASK_DISABLE);
911935
}
@@ -1096,15 +1120,15 @@ ble_phy_tx_end_isr(void)
10961120

10971121
#if PHY_USE_FEM_LNA
10981122
fem_time = rx_time - MYNEWT_VAL(BLE_FEM_LNA_TURN_ON_US);
1099-
nrf_timer_cc_set(NRF_TIMER0, 2, fem_time);
1100-
NRF_TIMER0->EVENTS_COMPARE[2] = 0;
1123+
timer0_safely_reset(2);
11011124
phy_fem_enable_lna();
1125+
nrf_timer_cc_set(NRF_TIMER0, 2, fem_time);
11021126
#endif
11031127

11041128
radio_time = rx_time - BLE_PHY_T_RXENFAST;
1105-
nrf_timer_cc_set(NRF_TIMER0, 0, radio_time);
1106-
NRF_TIMER0->EVENTS_COMPARE[0] = 0;
1129+
timer0_safely_reset(0);
11071130
phy_ppi_timer0_compare0_to_radio_rxen_enable();
1131+
nrf_timer_cc_set(NRF_TIMER0, 0, radio_time);
11081132

11091133
/* In case TIMER0 did already count past CC[0] and/or CC[2], radio
11101134
* and/or LNA may not be enabled. In any case we won't be stuck since
@@ -1142,18 +1166,17 @@ ble_phy_tx_end_isr(void)
11421166
tx_time -= g_ble_phy_t_txdelay[g_ble_phy_data.phy_cur_phy_mode];
11431167

11441168
radio_time = tx_time - BLE_PHY_T_TXENFAST;
1145-
nrf_timer_cc_set(NRF_TIMER0, 0, radio_time);
1146-
NRF_TIMER0->EVENTS_COMPARE[0] = 0;
1169+
timer0_safely_reset(0);
11471170
phy_ppi_timer0_compare0_to_radio_txen_enable();
1171+
nrf_timer_cc_set(NRF_TIMER0, 0, radio_time);
11481172

11491173
#if PHY_USE_FEM_PA
1150-
nrf_timer_cc_set(NRF_TIMER0, 2, fem_time);
1151-
NRF_TIMER0->EVENTS_COMPARE[2] = 0;
1174+
timer0_safely_reset(2);
11521175
phy_fem_enable_pa();
1176+
nrf_timer_cc_set(NRF_TIMER0, 2, fem_time);
11531177
#endif
11541178

1155-
nrf_timer_task_trigger(NRF_TIMER0, NRF_TIMER_TASK_CAPTURE3);
1156-
if (NRF_TIMER0->CC[3] > NRF_TIMER0->CC[0]) {
1179+
if (timer0_did_miss(0, 3 /* scratch */)) {
11571180
phy_ppi_timer0_compare0_to_radio_txen_disable();
11581181
g_ble_phy_data.phy_transition_late = 1;
11591182
}
@@ -1292,14 +1315,14 @@ ble_phy_rx_end_isr(void)
12921315
tx_time -= g_ble_phy_t_txdelay[g_ble_phy_data.phy_cur_phy_mode];
12931316

12941317
radio_time = tx_time - BLE_PHY_T_TXENFAST;
1295-
nrf_timer_cc_set(NRF_TIMER0, 0, radio_time);
1296-
NRF_TIMER0->EVENTS_COMPARE[0] = 0;
1318+
timer0_safely_reset(0);
12971319
phy_ppi_timer0_compare0_to_radio_txen_enable();
1320+
nrf_timer_cc_set(NRF_TIMER0, 0, radio_time);
12981321

12991322
#if PHY_USE_FEM_PA
1300-
nrf_timer_cc_set(NRF_TIMER0, 2, fem_time);
1301-
NRF_TIMER0->EVENTS_COMPARE[2] = 0;
1323+
timer0_safely_reset(2);
13021324
phy_fem_enable_pa();
1325+
nrf_timer_cc_set(NRF_TIMER0, 2, fem_time);
13031326
#endif
13041327

13051328
/* Need to check if TIMER0 did not already count past CC[0] and/or CC[2], so
@@ -1308,11 +1331,9 @@ ble_phy_rx_end_isr(void)
13081331
*
13091332
* Note: CC[3] is used only for wfr which we do not need here.
13101333
*/
1311-
nrf_timer_task_trigger(NRF_TIMER0, NRF_TIMER_TASK_CAPTURE3);
1312-
is_late = (NRF_TIMER0->CC[3] > radio_time) && !NRF_TIMER0->EVENTS_COMPARE[0];
1334+
is_late = timer0_did_miss(0, 3 /* scratch */);
13131335
#if PHY_USE_FEM_PA
1314-
is_late = is_late ||
1315-
((NRF_TIMER0->CC[3] > fem_time) && !NRF_TIMER0->EVENTS_COMPARE[2]);
1336+
is_late = is_late || timer0_did_miss(2, 3 /* scratch */);
13161337
#endif
13171338
if (is_late) {
13181339
phy_ppi_timer0_compare0_to_radio_txen_disable();

0 commit comments

Comments
 (0)