diff mbox series

[net-next,v2,11/14] nexthop: Add netlink handlers for bucket dump

Message ID 52363e987d4412512cfef60114953c0af9f02085.1615485052.git.petrm@nvidia.com
State New
Headers show
Series nexthop: Resilient next-hop groups | expand

Commit Message

Petr Machata March 11, 2021, 6:03 p.m. UTC
Add a dump handler for resilient next hop buckets. When next-hop group ID
is given, it walks buckets of that group, otherwise it walks buckets of all
groups. It then dumps the buckets whose next hops match the given filtering
criteria.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
---

Notes:
    v1 (changes since RFC):
    - u32 -> u16 for bucket counts / indices

 net/ipv4/nexthop.c | 283 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 283 insertions(+)
diff mbox series

Patch

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 439bf3b7ced5..ed2745708f9d 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -55,6 +55,17 @@  static const struct nla_policy rtm_nh_res_policy_new[] = {
 	[NHA_RES_GROUP_UNBALANCED_TIMER]	= { .type = NLA_U32 },
 };
 
+static const struct nla_policy rtm_nh_policy_dump_bucket[] = {
+	[NHA_ID]		= { .type = NLA_U32 },
+	[NHA_OIF]		= { .type = NLA_U32 },
+	[NHA_MASTER]		= { .type = NLA_U32 },
+	[NHA_RES_BUCKET]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy rtm_nh_res_bucket_policy_dump[] = {
+	[NHA_RES_BUCKET_NH_ID]	= { .type = NLA_U32 },
+};
+
 static bool nexthop_notifiers_is_empty(struct net *net)
 {
 	return !net->nexthop.notifier_chain.head;
@@ -883,6 +894,60 @@  static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket)
 	atomic_long_set(&bucket->used_time, (long)jiffies);
 }
 
+static clock_t nh_res_bucket_idle_time(const struct nh_res_bucket *bucket)
+{
+	unsigned long used_time = nh_res_bucket_used_time(bucket);
+
+	return jiffies_delta_to_clock_t(jiffies - used_time);
+}
+
+static int nh_fill_res_bucket(struct sk_buff *skb, struct nexthop *nh,
+			      struct nh_res_bucket *bucket, u16 bucket_index,
+			      int event, u32 portid, u32 seq,
+			      unsigned int nlflags,
+			      struct netlink_ext_ack *extack)
+{
+	struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
+	struct nlmsghdr *nlh;
+	struct nlattr *nest;
+	struct nhmsg *nhm;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	nhm = nlmsg_data(nlh);
+	nhm->nh_family = AF_UNSPEC;
+	nhm->nh_flags = bucket->nh_flags;
+	nhm->nh_protocol = nh->protocol;
+	nhm->nh_scope = 0;
+	nhm->resvd = 0;
+
+	if (nla_put_u32(skb, NHA_ID, nh->id))
+		goto nla_put_failure;
+
+	nest = nla_nest_start(skb, NHA_RES_BUCKET);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, NHA_RES_BUCKET_INDEX, bucket_index) ||
+	    nla_put_u32(skb, NHA_RES_BUCKET_NH_ID, nhge->nh->id) ||
+	    nla_put_u64_64bit(skb, NHA_RES_BUCKET_IDLE_TIME,
+			      nh_res_bucket_idle_time(bucket),
+			      NHA_RES_BUCKET_PAD))
+		goto nla_put_failure_nest;
+
+	nla_nest_end(skb, nest);
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure_nest:
+	nla_nest_cancel(skb, nest);
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
 			   bool *is_fdb, struct netlink_ext_ack *extack)
 {
@@ -2918,10 +2983,12 @@  static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 }
 
 struct nh_dump_filter {
+	u32 nh_id;
 	int dev_idx;
 	int master_idx;
 	bool group_filter;
 	bool fdb_filter;
+	u32 res_bucket_nh_id;
 };
 
 static bool nh_dump_filtered(struct nexthop *nh,
@@ -3101,6 +3168,219 @@  static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
 	return err;
 }
 
+static struct nexthop *
+nexthop_find_group_resilient(struct net *net, u32 id,
+			     struct netlink_ext_ack *extack)
+{
+	struct nh_group *nhg;
+	struct nexthop *nh;
+
+	nh = nexthop_find_by_id(net, id);
+	if (!nh)
+		return ERR_PTR(-ENOENT);
+
+	if (!nh->is_group) {
+		NL_SET_ERR_MSG(extack, "Not a nexthop group");
+		return ERR_PTR(-EINVAL);
+	}
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	if (!nhg->resilient) {
+		NL_SET_ERR_MSG(extack, "Nexthop group not of type resilient");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return nh;
+}
+
+static int nh_valid_dump_nhid(struct nlattr *attr, u32 *nh_id_p,
+			      struct netlink_ext_ack *extack)
+{
+	u32 idx;
+
+	if (attr) {
+		idx = nla_get_u32(attr);
+		if (!idx) {
+			NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+			return -EINVAL;
+		}
+		*nh_id_p = idx;
+	} else {
+		*nh_id_p = 0;
+	}
+
+	return 0;
+}
+
+static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
+				    struct nh_dump_filter *filter,
+				    struct netlink_callback *cb)
+{
+	struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)];
+	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+			  ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1,
+			  rtm_nh_policy_dump_bucket, NULL);
+	if (err < 0)
+		return err;
+
+	err = nh_valid_dump_nhid(tb[NHA_ID], &filter->nh_id, cb->extack);
+	if (err)
+		return err;
+
+	if (tb[NHA_RES_BUCKET]) {
+		size_t max = ARRAY_SIZE(rtm_nh_res_bucket_policy_dump) - 1;
+
+		err = nla_parse_nested(res_tb, max,
+				       tb[NHA_RES_BUCKET],
+				       rtm_nh_res_bucket_policy_dump,
+				       cb->extack);
+		if (err < 0)
+			return err;
+
+		err = nh_valid_dump_nhid(res_tb[NHA_RES_BUCKET_NH_ID],
+					 &filter->res_bucket_nh_id,
+					 cb->extack);
+		if (err)
+			return err;
+	}
+
+	return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
+}
+
+struct rtm_dump_res_bucket_ctx {
+	struct rtm_dump_nh_ctx nh;
+	u16 bucket_index;
+	u32 done_nh_idx; /* 1 + the index of the last fully processed NH. */
+};
+
+static struct rtm_dump_res_bucket_ctx *
+rtm_dump_res_bucket_ctx(struct netlink_callback *cb)
+{
+	struct rtm_dump_res_bucket_ctx *ctx = (void *)cb->ctx;
+
+	BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+	return ctx;
+}
+
+struct rtm_dump_nexthop_bucket_data {
+	struct rtm_dump_res_bucket_ctx *ctx;
+	struct nh_dump_filter filter;
+};
+
+static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
+				      struct netlink_callback *cb,
+				      struct nexthop *nh,
+				      struct rtm_dump_nexthop_bucket_data *dd)
+{
+	u32 portid = NETLINK_CB(cb->skb).portid;
+	struct nhmsg *nhm = nlmsg_data(cb->nlh);
+	struct nh_res_table *res_table;
+	struct nh_group *nhg;
+	u16 bucket_index;
+	int err;
+
+	if (dd->ctx->nh.idx < dd->ctx->done_nh_idx)
+		return 0;
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	res_table = rtnl_dereference(nhg->res_table);
+	for (bucket_index = dd->ctx->bucket_index;
+	     bucket_index < res_table->num_nh_buckets;
+	     bucket_index++) {
+		struct nh_res_bucket *bucket;
+		struct nh_grp_entry *nhge;
+
+		bucket = &res_table->nh_buckets[bucket_index];
+		nhge = rtnl_dereference(bucket->nh_entry);
+		if (nh_dump_filtered(nhge->nh, &dd->filter, nhm->nh_family))
+			continue;
+
+		if (dd->filter.res_bucket_nh_id &&
+		    dd->filter.res_bucket_nh_id != nhge->nh->id)
+			continue;
+
+		err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+					 RTM_NEWNEXTHOPBUCKET, portid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 cb->extack);
+		if (err < 0) {
+			if (likely(skb->len))
+				goto out;
+			goto out_err;
+		}
+	}
+
+	dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
+	bucket_index = 0;
+
+out:
+	err = skb->len;
+out_err:
+	dd->ctx->bucket_index = bucket_index;
+	return err;
+}
+
+static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
+				      struct netlink_callback *cb,
+				      struct nexthop *nh, void *data)
+{
+	struct rtm_dump_nexthop_bucket_data *dd = data;
+	struct nh_group *nhg;
+
+	if (!nh->is_group)
+		return 0;
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	if (!nhg->resilient)
+		return 0;
+
+	return rtm_dump_nexthop_bucket_nh(skb, cb, nh, dd);
+}
+
+/* rtnl */
+static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	struct rtm_dump_res_bucket_ctx *ctx = rtm_dump_res_bucket_ctx(cb);
+	struct rtm_dump_nexthop_bucket_data dd = { .ctx = ctx };
+	struct net *net = sock_net(skb->sk);
+	struct nexthop *nh;
+	int err;
+
+	err = nh_valid_dump_bucket_req(cb->nlh, &dd.filter, cb);
+	if (err)
+		return err;
+
+	if (dd.filter.nh_id) {
+		nh = nexthop_find_group_resilient(net, dd.filter.nh_id,
+						  cb->extack);
+		if (IS_ERR(nh))
+			return PTR_ERR(nh);
+		err = rtm_dump_nexthop_bucket_nh(skb, cb, nh, &dd);
+	} else {
+		struct rb_root *root = &net->nexthop.rb_root;
+
+		err = rtm_dump_walk_nexthops(skb, cb, root, &ctx->nh,
+					     &rtm_dump_nexthop_bucket_cb, &dd);
+	}
+
+	if (err < 0) {
+		if (likely(skb->len))
+			goto out;
+		goto out_err;
+	}
+
+out:
+	err = skb->len;
+out_err:
+	cb->seq = net->nexthop.seq;
+	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+	return err;
+}
+
 static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
 {
 	unsigned int hash = nh_dev_hashfn(dev->ifindex);
@@ -3324,6 +3604,9 @@  static int __init nexthop_init(void)
 	rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
 	rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
 
+	rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, NULL,
+		      rtm_dump_nexthop_bucket, 0);
+
 	return 0;
 }
 subsys_initcall(nexthop_init);