diff mbox series

[wpa_suppplicant,2/2] driver_nl82011: wait for rtnetlink event with carrier_up_count

Message ID 20231201114952.420b40a5f188.I75677b755f36ca63f8289d84de29b212f4c37ec0@changeid
State New
Headers show
Series wpa_supplicant: wait for carrier race | expand

Commit Message

Johannes Berg Dec. 1, 2023, 10:49 a.m. UTC
From: Johannes Berg <johannes.berg@intel.com>

There's a race (see the comment in the code) between the
kernel and userspace that can lead to dropping TX packets
after the associated event was already received.

If the kernel indicates the carrier_up_count, wait for
the corresponding rtnetlink event to reach that count.
This fixes the race since the rtnetlink event is sent
after the async processing that's needed in the kernel
before a frame can be transmitted.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 src/drivers/driver_nl80211.c       |  7 ++++
 src/drivers/driver_nl80211.h       |  2 ++
 src/drivers/driver_nl80211_event.c | 52 ++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+)
diff mbox series

Patch

diff --git a/src/drivers/driver_nl80211.c b/src/drivers/driver_nl80211.c
index 03d54222bb52..b442dee1e710 100644
--- a/src/drivers/driver_nl80211.c
+++ b/src/drivers/driver_nl80211.c
@@ -1365,6 +1365,7 @@  static void wpa_driver_nl80211_event_rtm_newlink(void *ctx,
 	char ifname[IFNAMSIZ + 1];
 	char extra[100], *pos, *end;
 	int init_failed;
+	u32 carrier_up_count = 0;
 
 	extra[0] = '\0';
 	pos = extra;
@@ -1396,6 +1397,9 @@  static void wpa_driver_nl80211_event_rtm_newlink(void *ctx,
 			pos += os_snprintf(pos, end - pos, " linkmode=%u",
 					   nla_get_u32((struct nlattr *) attr));
 			break;
+		case IFLA_CARRIER_UP_COUNT:
+			carrier_up_count = nla_get_u32((struct nlattr *) attr);
+			break;
 		}
 		attr = RTA_NEXT(attr, attrlen);
 	}
@@ -1415,6 +1419,9 @@  static void wpa_driver_nl80211_event_rtm_newlink(void *ctx,
 	if (init_failed)
 		return; /* do not update interface state */
 
+	if (carrier_up_count)
+		drv->carrier_up_count = carrier_up_count;
+
 	if (!drv->if_disabled && !(ifi->ifi_flags & IFF_UP)) {
 		namebuf[0] = '\0';
 		if (if_indextoname(ifi->ifi_index, namebuf) &&
diff --git a/src/drivers/driver_nl80211.h b/src/drivers/driver_nl80211.h
index f82f604e9017..874988715b21 100644
--- a/src/drivers/driver_nl80211.h
+++ b/src/drivers/driver_nl80211.h
@@ -201,6 +201,8 @@  struct wpa_driver_nl80211_data {
 	unsigned int puncturing:1;
 	unsigned int qca_ap_allowed_freqs:1;
 
+	u32 carrier_up_count;
+
 	u32 ignore_next_local_disconnect;
 	u32 ignore_next_local_deauth;
 
diff --git a/src/drivers/driver_nl80211_event.c b/src/drivers/driver_nl80211_event.c
index 60b4fb51fcd8..4ff25b57ceeb 100644
--- a/src/drivers/driver_nl80211_event.c
+++ b/src/drivers/driver_nl80211_event.c
@@ -19,6 +19,7 @@ 
 #include "common/ieee802_11_defs.h"
 #include "common/ieee802_11_common.h"
 #include "driver_nl80211.h"
+#include "netlink.h"
 
 
 static void
@@ -256,6 +257,49 @@  static void nl80211_parse_wmm_params(struct nlattr *wmm_attr,
 }
 
 
+/*
+ * Wait for an RTM newlink event with corresponding carrier up count
+ *
+ * There's a race condition with mac80211 and the network stack, which
+ * mostly hits depending on scheduling in time-simulation (tests),
+ * which is that mac80211 will both indicate carrier on to the network
+ * stack and send the associated/... event to userspace. Now, the
+ * internal kernel function netif_carrier_ok() immediately returns
+ * true after this, however, the linkwatch work still needs to run
+ * and change the TX queue qdisc away from noop (which drops all TX
+ * packets).
+ *
+ * When the race happens, userspace (and in particular tests) can see
+ * the associated/... event and immediately try to send a frame, at a
+ * time that the linkwatch work hasn't run yet, causing the frame to
+ * be dropped.
+ *
+ * Thus, if the kernel indicated the current carrier_up_count in an
+ * event, wait here for an RTM newlink event for our interface, so in
+ * in addition to seeing the associated/... event, we also know the
+ * carrier state has actually changed sufficiently to send packets,
+ * if it was meant to change.
+ *
+ * This works because the event to userspace is also sent from the
+ * asynchronous linkwatch work.
+ */
+static void
+nl80211_wait_for_carrier_up_count(struct wpa_driver_nl80211_data *drv,
+				  u32 carrier_up_count)
+{
+#define WRAPPED_U32_LESS(x, y)	((s32)(y) - (s32)(x) < 0)
+
+	if (WRAPPED_U32_LESS(drv->carrier_up_count, carrier_up_count))
+		netlink_process_one_event(drv->global->netlink, 100);
+
+	if (WRAPPED_U32_LESS(drv->carrier_up_count, carrier_up_count))
+		wpa_printf(MSG_ERROR,
+			   "nl80211: %s: carrier up count %u not seen (got %u)\n",
+			   drv->first_bss->ifname, carrier_up_count,
+			   drv->carrier_up_count);
+}
+
+
 static void mlme_event_assoc(struct wpa_driver_nl80211_data *drv,
 			     const u8 *frame, size_t len, struct nlattr *wmm,
 			     struct nlattr *req_ie)
@@ -3845,6 +3889,14 @@  static void do_process_drv_event(struct i802_bss *bss, int cmd,
 	     cmd == NL80211_CMD_SCAN_ABORTED))
 		nl80211_restore_ap_mode(bss);
 
+	/* see comment above wpa_driver_nl80211_own_ifname() */
+	if (tb[NL80211_ATTR_CARRIER_UP_COUNT]) {
+		u32 carrier_up_count =
+			nla_get_u32(tb[NL80211_ATTR_CARRIER_UP_COUNT]);
+
+		nl80211_wait_for_carrier_up_count(drv, carrier_up_count);
+	}
+
 	switch (cmd) {
 	case NL80211_CMD_TRIGGER_SCAN:
 		wpa_dbg(drv->ctx, MSG_DEBUG, "nl80211: Scan trigger");