@@ -158,6 +158,22 @@ struct mlx5e_neigh_hash_entry {
enum {
/* set when the encap entry is successfully offloaded into HW */
MLX5_ENCAP_ENTRY_VALID = BIT(0),
+ MLX5_REFORMAT_DECAP = BIT(1),
+};
+
+struct mlx5e_decap_key {
+ struct ethhdr key;
+};
+
+struct mlx5e_decap_entry {
+ struct mlx5e_decap_key key;
+ struct list_head flows;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ struct completion res_ready;
+ int compl_result;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ struct rcu_head rcu;
};
struct mlx5e_encap_entry {
@@ -46,6 +46,7 @@
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_pedit.h>
#include <net/tc_act/tc_csum.h>
+#include <net/tc_act/tc_mpls.h>
#include <net/arp.h>
#include <net/ipv6_stubs.h>
#include <net/bareudp.h>
@@ -93,6 +94,7 @@ enum {
MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
};
#define MLX5E_TC_MAX_SPLITS 1
@@ -126,6 +128,11 @@ struct mlx5e_tc_flow {
u64 cookie;
unsigned long flags;
struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+
+ /* flows sharing the same reformat object - currently mpls decap */
+ struct list_head l3_to_l2_reformat;
+ struct mlx5e_decap_entry *decap_reformat;
+
/* Flow can be associated with multiple encap IDs.
* The number of encaps is bounded by the number of supported
* destinations.
@@ -157,6 +164,7 @@ struct mlx5e_tc_flow_parse_attr {
struct mlx5_flow_spec spec;
struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct ethhdr eth;
};
#define MLX5E_TC_TABLE_NUM_GROUPS 4
@@ -1124,6 +1132,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct netlink_ext_ack *extack,
struct net_device **encap_dev,
bool *encap_valid);
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack);
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
static struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
@@ -1299,6 +1312,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
+ if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
+ err = mlx5e_attach_decap(priv, flow, extack);
+ if (err)
+ return err;
+ }
+
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
int mirred_ifindex;
@@ -1408,6 +1427,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
mlx5_fc_destroy(attr->counter_dev, attr->counter);
+
+ if (flow_flag_test(flow, L3_TO_L2_DECAP))
+ mlx5e_detach_decap(priv, flow);
}
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -1684,6 +1706,17 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr
kfree_rcu(e, rcu);
}
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_decap_entry *d)
+{
+ WARN_ON(!list_empty(&d->flows));
+
+ if (!d->compl_result)
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
+
+ kfree_rcu(d, rcu);
+}
+
void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -1696,6 +1729,18 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
mlx5e_encap_dealloc(priv, e);
}
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
+ return;
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow, int out_index)
{
@@ -1719,6 +1764,29 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
mlx5e_encap_dealloc(priv, e);
}
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
+
+ if (!d)
+ return;
+
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ list_del(&flow->l3_to_l2_reformat);
+ flow->decap_reformat = NULL;
+
+ if (!refcount_dec_and_test(&d->refcnt)) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
@@ -1990,7 +2058,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
return err;
}
- flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ /* With mpls over udp we decapsulate using packet reformat
+ * object
+ */
+ if (!netif_is_bareudp(filter_dev))
+ flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
}
if (!needs_mapping && !sets_mapping)
@@ -3285,12 +3357,22 @@ static inline int cmp_encap_info(struct encap_key *a,
a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
}
+static inline int cmp_decap_info(struct mlx5e_decap_key *a,
+ struct mlx5e_decap_key *b)
+{
+ return memcmp(&a->key, &b->key, sizeof(b->key));
+}
+
static inline int hash_encap_info(struct encap_key *key)
{
return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
key->tc_tunnel->tunnel_type);
}
+static inline int hash_decap_info(struct mlx5e_decap_key *key)
+{
+ return jhash(&key->key, sizeof(key->key), 0);
+}
static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
struct net_device *peer_netdev)
@@ -3305,13 +3387,16 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
same_hw_devs(priv, peer_priv));
}
-
-
bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
{
return refcount_inc_not_zero(&e->refcnt);
}
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
uintptr_t hash_key)
@@ -3332,6 +3417,24 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
return NULL;
}
+static struct mlx5e_decap_entry *
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_key r_key;
+ struct mlx5e_decap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
+ hlist, hash_key) {
+ r_key = e->key;
+ if (!cmp_decap_info(&r_key, key) &&
+ mlx5e_decap_take(e))
+ return e;
+ }
+ return NULL;
+}
+
static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
{
size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
@@ -3477,6 +3580,84 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
return err;
}
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_decap_entry *d;
+ struct mlx5e_decap_key key;
+ uintptr_t hash_key;
+ int err;
+
+ parse_attr = attr->parse_attr;
+ if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "encap header larger than max supported");
+ return -EOPNOTSUPP;
+ }
+
+ key.key = parse_attr->eth;
+ hash_key = hash_decap_info(&key);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ d = mlx5e_decap_get(priv, &key, hash_key);
+ if (d) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ wait_for_completion(&d->res_ready);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ if (d->compl_result) {
+ err = -EREMOTEIO;
+ goto out_free;
+ }
+ goto found;
+ }
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ d->key = key;
+ refcount_set(&d->refcnt, 1);
+ init_completion(&d->res_ready);
+ INIT_LIST_HEAD(&d->flows);
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
+ sizeof(parse_attr->eth),
+ &parse_attr->eth,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(d->pkt_reformat)) {
+ err = PTR_ERR(d->pkt_reformat);
+ d->compl_result = err;
+ }
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ complete_all(&d->res_ready);
+ if (err)
+ goto out_free;
+
+found:
+ flow->decap_reformat = d;
+ attr->decap_pkt_reformat = d->pkt_reformat;
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return 0;
+
+out_free:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ mlx5e_decap_put(priv, d);
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return err;
+}
+
static int parse_tc_vlan_action(struct mlx5e_priv *priv,
const struct flow_action_entry *act,
struct mlx5_esw_flow_attr *attr,
@@ -3688,7 +3869,8 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv,
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ struct net_device *filter_dev)
{
struct pedit_headers_action hdrs[2] = {};
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -3727,8 +3909,32 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
}
mpls_push = true;
break;
+ case FLOW_ACTION_MPLS_POP:
+ /* we only support mpls pop if it is the first action
+ * and the filter net device is bareudp. Subsequent
+ * actions can be pedit and the last can be mirred
+ * egress redirect.
+ */
+ if (i) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "mpls pop supported only as first action");
+ return -EOPNOTSUPP;
+ }
+ if (!netif_is_bareudp(filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "mpls pop supported only on bareudp devices");
+ return -EOPNOTSUPP;
+ }
+
+ parse_attr->eth.h_proto = act->mpls_pop.proto;
+ action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_flag_set(flow, L3_TO_L2_DECAP);
+ break;
case FLOW_ACTION_MANGLE:
case FLOW_ACTION_ADD:
+ if (flow_flag_test(flow, L3_TO_L2_DECAP))
+ return -EOPNOTSUPP;
+
err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
hdrs, extack);
if (err)
@@ -4093,6 +4299,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
INIT_LIST_HEAD(&flow->encaps[out_index].list);
INIT_LIST_HEAD(&flow->mod_hdr);
INIT_LIST_HEAD(&flow->hairpin);
+ INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
refcount_set(&flow->refcnt, 1);
init_completion(&flow->init_done);
@@ -4162,7 +4369,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
- err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
+ err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
if (err)
goto err_free;
@@ -2262,6 +2262,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
hash_init(esw->offloads.encap_tbl);
mutex_init(&esw->offloads.mod_hdr.lock);
hash_init(esw->offloads.mod_hdr.hlist);
+ mutex_init(&esw->offloads.decap_tbl_lock);
+ hash_init(esw->offloads.decap_tbl);
atomic64_set(&esw->offloads.num_flows, 0);
mutex_init(&esw->state_lock);
mutex_init(&esw->mode_lock);
@@ -2303,6 +2305,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
mutex_destroy(&esw->state_lock);
mutex_destroy(&esw->offloads.mod_hdr.lock);
mutex_destroy(&esw->offloads.encap_tbl_lock);
+ mutex_destroy(&esw->offloads.decap_tbl_lock);
kfree(esw->vports);
kfree(esw);
}
@@ -209,6 +209,8 @@ struct mlx5_esw_offload {
struct mutex peer_mutex;
struct mutex encap_tbl_lock; /* protects encap_tbl */
DECLARE_HASHTABLE(encap_tbl, 8);
+ struct mutex decap_tbl_lock; /* protects decap_tbl */
+ DECLARE_HASHTABLE(decap_tbl, 8);
struct mod_hdr_tbl mod_hdr;
DECLARE_HASHTABLE(termtbl_tbl, 8);
struct mutex termtbl_mutex; /* protects termtbl hash */
@@ -432,6 +434,7 @@ struct mlx5_esw_flow_attr {
struct mlx5_flow_table *fdb;
struct mlx5_flow_table *dest_ft;
struct mlx5_ct_attr ct_attr;
+ struct mlx5_pkt_reformat *decap_pkt_reformat;
struct mlx5e_tc_flow_parse_attr *parse_attr;
};
@@ -366,6 +366,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
}
}
}
+
+ if (attr->decap_pkt_reformat)
+ flow_act.pkt_reformat = attr->decap_pkt_reformat;
+
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dest[i].counter_id = mlx5_fc_id(attr->counter);