@@ -2,7 +2,7 @@
obj-y := core.o
CFLAGS_core.o += $(call cc-disable-warning, override-init)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
new file mode 100644
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/pid_namespace.h>
+#include <linux/fs.h>
+#include <linux/fdtable.h>
+#include <linux/filter.h>
+
+struct bpf_iter_seq_task_common {
+ struct pid_namespace *ns;
+};
+
+struct bpf_iter_seq_task_info {
+ struct bpf_iter_seq_task_common common;
+ struct task_struct *task;
+ u32 id;
+};
+
+static struct task_struct *task_seq_get_next(struct pid_namespace *ns, u32 *id)
+{
+ struct task_struct *task = NULL;
+ struct pid *pid;
+
+ rcu_read_lock();
+ pid = idr_get_next(&ns->idr, id);
+ if (pid)
+ task = get_pid_task(pid, PIDTYPE_PID);
+ rcu_read_unlock();
+
+ return task;
+}
+
+static void *task_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_task_info *info = seq->private;
+ struct task_struct *task;
+ u32 id = info->id;
+
+ task = task_seq_get_next(info->common.ns, &id);
+ if (!task)
+ return NULL;
+
+ ++*pos;
+ info->task = task;
+ info->id = id;
+
+ return task;
+}
+
+static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_task_info *info = seq->private;
+ struct task_struct *task;
+
+ ++*pos;
+ ++info->id;
+ task = task_seq_get_next(info->common.ns, &info->id);
+ if (!task)
+ return NULL;
+
+ put_task_struct(info->task);
+ info->task = task;
+ return task;
+}
+
+struct bpf_iter__task {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct task_struct *, task);
+};
+
+DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
+
+static int __task_seq_show(struct seq_file *seq, void *v, bool in_stop)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_iter__task ctx;
+ struct bpf_prog *prog;
+ int ret = 0;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (prog) {
+ meta.seq = seq;
+ ctx.meta = &meta;
+ ctx.task = v;
+ ret = bpf_iter_run_prog(prog, &ctx);
+ }
+
+ return 0;
+}
+
+static int task_seq_show(struct seq_file *seq, void *v)
+{
+ return __task_seq_show(seq, v, false);
+}
+
+static void task_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_task_info *info = seq->private;
+
+ if (!v)
+ __task_seq_show(seq, v, true);
+
+ if (info->task) {
+ put_task_struct(info->task);
+ info->task = NULL;
+ }
+}
+
+static const struct seq_operations task_seq_ops = {
+ .start = task_seq_start,
+ .next = task_seq_next,
+ .stop = task_seq_stop,
+ .show = task_seq_show,
+};
+
+struct bpf_iter_seq_task_file_info {
+ struct bpf_iter_seq_task_common common;
+ struct task_struct *task;
+ struct files_struct *files;
+ u32 id;
+ u32 fd;
+};
+
+static struct file *task_file_seq_get_next(struct pid_namespace *ns, u32 *id,
+ int *fd, struct task_struct **task,
+ struct files_struct **fstruct)
+{
+ struct files_struct *files;
+ struct task_struct *tk;
+ u32 sid = *id;
+ int sfd;
+
+ /* If this function returns a non-NULL file object,
+ * it held a reference to the files_struct and file.
+ * Otherwise, it does not hold any reference.
+ */
+again:
+ if (*fstruct) {
+ files = *fstruct;
+ sfd = *fd;
+ } else {
+ tk = task_seq_get_next(ns, &sid);
+ if (!tk)
+ return NULL;
+
+ files = get_files_struct(tk);
+ put_task_struct(tk);
+ if (!files) {
+ sid = ++(*id);
+ *fd = 0;
+ goto again;
+ }
+ *fstruct = files;
+ *task = tk;
+ if (sid == *id) {
+ sfd = *fd;
+ } else {
+ *id = sid;
+ sfd = 0;
+ }
+ }
+
+ rcu_read_lock();
+ for (; sfd < files_fdtable(files)->max_fds; sfd++) {
+ struct file *f;
+
+ f = fcheck_files(files, sfd);
+ if (!f)
+ continue;
+ *fd = sfd;
+ get_file(f);
+ rcu_read_unlock();
+ return f;
+ }
+
+ /* the current task is done, go to the next task */
+ rcu_read_unlock();
+ put_files_struct(files);
+ *fstruct = NULL;
+ sid = ++(*id);
+ *fd = 0;
+ goto again;
+}
+
+static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_task_file_info *info = seq->private;
+ struct files_struct *files = NULL;
+ struct task_struct *task = NULL;
+ struct file *file;
+ u32 id = info->id;
+ int fd = info->fd;
+
+ file = task_file_seq_get_next(info->common.ns, &id, &fd, &task, &files);
+ if (!file) {
+ info->files = NULL;
+ return NULL;
+ }
+
+ ++*pos;
+ info->id = id;
+ info->fd = fd;
+ info->task = task;
+ info->files = files;
+
+ return file;
+}
+
+static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_task_file_info *info = seq->private;
+ struct files_struct *files = info->files;
+ struct task_struct *task = info->task;
+ struct file *file;
+
+ ++*pos;
+ ++info->fd;
+ fput((struct file *)v);
+ file = task_file_seq_get_next(info->common.ns, &info->id, &info->fd,
+ &task, &files);
+ if (!file) {
+ info->files = NULL;
+ return NULL;
+ }
+
+ info->task = task;
+ info->files = files;
+
+ return file;
+}
+
+struct bpf_iter__task_file {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct task_struct *, task);
+ u32 fd;
+ __bpf_md_ptr(struct file *, file);
+};
+
+DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
+ struct task_struct *task, u32 fd,
+ struct file *file)
+
+static int __task_file_seq_show(struct seq_file *seq, void *v, bool in_stop)
+{
+ struct bpf_iter_seq_task_file_info *info = seq->private;
+ struct bpf_iter__task_file ctx;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int ret = 0;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (prog) {
+ ctx.meta = &meta;
+ ctx.task = info->task;
+ ctx.fd = info->fd;
+ ctx.file = v;
+ ret = bpf_iter_run_prog(prog, &ctx);
+ }
+
+ return ret;
+}
+
+static int task_file_seq_show(struct seq_file *seq, void *v)
+{
+ return __task_file_seq_show(seq, v, false);
+}
+
+static void task_file_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_task_file_info *info = seq->private;
+
+ if (!v)
+ __task_file_seq_show(seq, v, true);
+ else if (!IS_ERR(v))
+ fput((struct file *)v);
+
+ if (info->files) {
+ put_files_struct(info->files);
+ info->files = NULL;
+ }
+}
+
+/* The first field of task/task_file private data is
+ * struct bpf_iter_seq_task_common.
+ */
+static int init_seq_pidns(void *priv_data)
+{
+ struct bpf_iter_seq_task_common *common = priv_data;
+
+ common->ns = get_pid_ns(task_active_pid_ns(current));
+ return 0;
+}
+
+static void fini_seq_pidns(void *priv_data)
+{
+ struct bpf_iter_seq_task_common *common = priv_data;
+
+ put_pid_ns(common->ns);
+}
+
+static const struct seq_operations task_file_seq_ops = {
+ .start = task_file_seq_start,
+ .next = task_file_seq_next,
+ .stop = task_file_seq_stop,
+ .show = task_file_seq_show,
+};
+
+static int __init task_iter_init(void)
+{
+ struct bpf_iter_reg task_file_reg_info = {
+ .target = "task_file",
+ .seq_ops = &task_file_seq_ops,
+ .init_seq_private = init_seq_pidns,
+ .fini_seq_private = fini_seq_pidns,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info),
+ };
+ struct bpf_iter_reg task_reg_info = {
+ .target = "task",
+ .seq_ops = &task_seq_ops,
+ .init_seq_private = init_seq_pidns,
+ .fini_seq_private = fini_seq_pidns,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_task_info),
+ };
+ int ret;
+
+ ret = bpf_iter_reg_target(&task_reg_info);
+ if (ret)
+ return ret;
+
+ return bpf_iter_reg_target(&task_file_reg_info);
+}
+late_initcall(task_iter_init);
Only the tasks belonging to "current" pid namespace are enumerated. For task/file target, the bpf program will have access to struct task_struct *task u32 fd struct file *file where fd/file is an open file for the task. Signed-off-by: Yonghong Song <yhs@fb.com> --- kernel/bpf/Makefile | 2 +- kernel/bpf/task_iter.c | 336 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/task_iter.c