diff mbox series

[net-next,04/16] net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule

Message ID 20200527014924.278327-5-saeedm@mellanox.com
State New
Headers show
Series [net-next,01/16] net/mlx5: E-Switch, Refactor eswitch egress acl codes | expand

Commit Message

Saeed Mahameed May 27, 2020, 1:49 a.m. UTC
From: Or Gerlitz <ogerlitz@mellanox.com>

Register a notifier block to handle netdev events for bond device
of non-uplink representors to support eswitch vports bonding.

When a non-uplink representor is a lower dev (slave) of bond and
becomes active, adding egress acl forward-to-vport rule of all slave
netdevs (active + standby) to forward to this representor's vport. Use
change lower netdev event to do this.

Use change upper event to detect slave representor unslaved from lag
device to delete its vport egress acl forward rule if any.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   3 +-
 .../ethernet/mellanox/mlx5/core/en/rep/bond.c | 161 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |   8 +-
 .../net/ethernet/mellanox/mlx5/core/en_rep.h  |   7 +
 4 files changed, 175 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 3934dc258041..b61e47bc16e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -34,7 +34,8 @@  mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
 mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o lib/geneve.o lib/port_tun.o lag_mp.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += lag_mp.o lib/geneve.o lib/port_tun.o \
+					en_rep.o en/rep/bond.o
 mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
 					en/mapping.o esw/chains.o en/tc_tun.o \
 					en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
new file mode 100644
index 000000000000..d0aab36f1947
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -0,0 +1,161 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include <net/lag.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "en_rep.h"
+
+struct mlx5e_rep_bond {
+	struct notifier_block nb;
+	struct netdev_net_notifier nn;
+};
+
+static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	/* A given netdev is not a representor or not a slave of LAG configuration */
+	if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev))
+		return false;
+
+	/* Egress acl forward to vport is supported only non-uplink representor */
+	return rpriv->rep->vport != MLX5_VPORT_UPLINK;
+}
+
+static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+{
+	struct netdev_notifier_changelowerstate_info *info;
+	struct netdev_lag_lower_state_info *lag_info;
+	struct mlx5e_rep_priv *rpriv;
+	struct net_device *lag_dev;
+	struct mlx5e_priv *priv;
+	struct list_head *iter;
+	struct net_device *dev;
+	u16 acl_vport_num;
+	u16 fwd_vport_num;
+
+	if (!mlx5e_rep_is_lag_netdev(netdev))
+		return;
+
+	info = ptr;
+	lag_info = info->lower_state_info;
+	/* This is not an event of a representor becoming active slave */
+	if (!lag_info->tx_enabled)
+		return;
+
+	priv = netdev_priv(netdev);
+	rpriv = priv->ppriv;
+	fwd_vport_num = rpriv->rep->vport;
+	lag_dev = netdev_master_upper_dev_get(netdev);
+
+	netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
+		   lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
+
+	/* Point everyone's egress acl to the vport of the active representor */
+	netdev_for_each_lower_dev(lag_dev, dev, iter) {
+		priv = netdev_priv(dev);
+		rpriv = priv->ppriv;
+		acl_vport_num = rpriv->rep->vport;
+		if (acl_vport_num != fwd_vport_num) {
+			mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
+						       fwd_vport_num,
+						       acl_vport_num);
+		}
+	}
+}
+
+static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
+{
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5e_priv *priv;
+
+	if (!mlx5e_rep_is_lag_netdev(netdev))
+		return;
+
+	/* Nothing to setup for new enslaved representor */
+	if (info->linking)
+		return;
+
+	priv = netdev_priv(netdev);
+	rpriv = priv->ppriv;
+	netdev_dbg(netdev, "Unslave, reset vport(%d) egress acl\n", rpriv->rep->vport);
+
+	/* Reset all egress acl rules of unslave representor's vport */
+	mlx5_esw_acl_egress_vport_unbond(priv->mdev->priv.eswitch,
+					 rpriv->rep->vport);
+}
+
+/* Bond device of representors and netdev events are used here in specific way
+ * to support eswitch vports bonding and to perform failover of eswitch vport
+ * by modifying the vport's egress acl of lower dev representors. Thus this
+ * also change the traditional behavior of lower dev under bond device.
+ * All non-representor netdevs or representors of other vendors as lower dev
+ * of bond device are not supported.
+ */
+static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
+				       unsigned long event, void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+	switch (event) {
+	case NETDEV_CHANGELOWERSTATE:
+		mlx5e_rep_changelowerstate_event(netdev, ptr);
+		break;
+	case NETDEV_CHANGEUPPER:
+		mlx5e_rep_changeupper_event(netdev, ptr);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/* If HW support eswitch vports bonding, register a specific notifier to
+ * handle it when two or more representors are bonded
+ */
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	struct net_device *netdev = rpriv->netdev;
+	struct mlx5e_priv *priv;
+	int ret = 0;
+
+	priv = netdev_priv(netdev);
+	if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
+		goto out;
+
+	uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
+	if (!uplink_priv->bond) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
+	ret = register_netdevice_notifier_dev_net(netdev,
+						  &uplink_priv->bond->nb,
+						  &uplink_priv->bond->nn);
+	if (ret) {
+		netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
+		kvfree(uplink_priv->bond);
+		uplink_priv->bond = NULL;
+	}
+out:
+	return ret;
+}
+
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+	if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
+	    !rpriv->uplink_priv.bond)
+		return;
+
+	unregister_netdevice_notifier_dev_net(rpriv->netdev,
+					      &rpriv->uplink_priv.bond->nb,
+					      &rpriv->uplink_priv.bond->nn);
+	kvfree(rpriv->uplink_priv.bond);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 4e13e37a9ecd..12593d75e885 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -959,16 +959,18 @@  static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
 
 	mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
 
+	mlx5e_rep_bond_init(rpriv);
 	err = mlx5e_rep_tc_netdevice_event_register(rpriv);
 	if (err) {
 		mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n",
 			      err);
-		goto tc_rep_cleanup;
+		goto err_event_reg;
 	}
 
 	return 0;
 
-tc_rep_cleanup:
+err_event_reg:
+	mlx5e_rep_bond_cleanup(rpriv);
 	mlx5e_rep_tc_cleanup(rpriv);
 	return err;
 }
@@ -1001,7 +1003,7 @@  static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
 {
 	mlx5e_rep_tc_netdevice_event_unregister(rpriv);
 	mlx5e_rep_indr_clean_block_privs(rpriv);
-
+	mlx5e_rep_bond_cleanup(rpriv);
 	mlx5e_rep_tc_cleanup(rpriv);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 1c4af8522467..7e56787aa224 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -56,6 +56,7 @@  struct mlx5e_neigh_update_table {
 };
 
 struct mlx5_tc_ct_priv;
+struct mlx5e_rep_bond;
 struct mlx5_rep_uplink_priv {
 	/* Filters DB - instantiated by the uplink representor and shared by
 	 * the uplink's VFs
@@ -89,6 +90,9 @@  struct mlx5_rep_uplink_priv {
 	struct mapping_ctx *tunnel_enc_opts_mapping;
 
 	struct mlx5_tc_ct_priv *ct_priv;
+
+	/* support eswitch vports bonding */
+	struct mlx5e_rep_bond *bond;
 };
 
 struct mlx5e_rep_priv {
@@ -211,6 +215,9 @@  struct mlx5e_rep_sq {
 
 void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
 void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv);
+
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);