@@ -4137,7 +4137,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
extern int netdev_budget;
extern unsigned int netdev_budget_usecs;
-
+extern unsigned int rps_pv_send_ipi __read_mostly;
/* Called by rtnetlink.c:rtnl_unlock() */
void netdev_run_todo(void);
@@ -203,6 +203,8 @@ static unsigned int napi_gen_id = NR_CPUS;
static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
static DECLARE_RWSEM(devnet_rename_sem);
+unsigned int rps_pv_send_ipi __read_mostly;
+static DEFINE_PER_CPU(cpumask_var_t, rps_ipi_mask);
static inline void dev_base_seq_inc(struct net *net)
{
@@ -4529,9 +4531,9 @@ EXPORT_SYMBOL(rps_may_expire_flow);
#endif /* CONFIG_RFS_ACCEL */
/* Called from hardirq (IPI) context */
-static void rps_trigger_softirq(void *data)
+static void rps_trigger_softirq(void *data __maybe_unused)
{
- struct softnet_data *sd = data;
+ struct softnet_data *sd = this_cpu_ptr(&softnet_data);
____napi_schedule(sd, &sd->backlog);
sd->received_rps++;
@@ -6364,12 +6366,26 @@ EXPORT_SYMBOL(__skb_gro_checksum_complete);
static void net_rps_send_ipi(struct softnet_data *remsd)
{
#ifdef CONFIG_RPS
- while (remsd) {
- struct softnet_data *next = remsd->rps_ipi_next;
+ if (!rps_pv_send_ipi) {
+ while (remsd) {
+ struct softnet_data *next = remsd->rps_ipi_next;
+
+ if (cpu_online(remsd->cpu))
+ smp_call_function_single_async(remsd->cpu, &remsd->csd);
+ remsd = next;
+ }
+ } else {
+ struct cpumask *tmpmask = this_cpu_cpumask_var_ptr(rps_ipi_mask);
+
+ cpumask_clear(tmpmask);
+ while (remsd) {
+ struct softnet_data *next = remsd->rps_ipi_next;
- if (cpu_online(remsd->cpu))
- smp_call_function_single_async(remsd->cpu, &remsd->csd);
- remsd = next;
+ if (cpu_online(remsd->cpu))
+ cpumask_set_cpu(remsd->cpu, tmpmask);
+ remsd = next;
+ }
+ smp_call_function_many(tmpmask, rps_trigger_softirq, NULL, false);
}
#endif
}
@@ -11627,6 +11643,8 @@ static int __init net_dev_init(void)
#ifdef CONFIG_RPS
INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
sd->cpu = i;
+ zalloc_cpumask_var_node(&per_cpu(rps_ipi_mask, i),
+ GFP_KERNEL, cpu_to_node(i));
#endif
init_gro_hash(&sd->backlog);
@@ -377,6 +377,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0444,
.proc_handler = proc_do_rss_key,
},
+ {
+ .procname = "rps_pv_send_ipi",
+ .data = &rps_pv_send_ipi,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
#ifdef CONFIG_BPF_JIT
{
.procname = "bpf_jit_enable",
In virtualization setup, IPI sending will cause vmexit, and is expensive so it should be avoid to send IPI one by one in highest throughput smp_call_function_many maybe call PV ipi to send IPI to many cpus once Signed-off-by: Li RongQing <lirongqing@baidu.com> --- include/linux/netdevice.h | 2 +- net/core/dev.c | 32 +++++++++++++++++++++++++------- net/core/sysctl_net_core.c | 9 +++++++++ 3 files changed, 35 insertions(+), 8 deletions(-)