#include #include #include #include #include "lve_debug.h" #include "lve_kmod_c.h" #include "kernel_exp.h" #include "cgroup_lib.h" #include "lsm.h" void (*_lve_set_fs_pwd)(struct fs_struct *, const struct path *); void (*_lve_set_fs_root)(struct fs_struct *, const struct path *); void lve_set_fs_root_pwd(struct fs_struct *f, const struct path *p) { _lve_set_fs_root(f, p); _lve_set_fs_pwd(f, p); } #ifdef HAVE_2ARGS_CGROUP_ATTACH_TASK int (*_lve_cgroup_kernel_attach)(struct cgroup *cgrp, struct task_struct *tsk); int lve_cgroup_kernel_attach(struct cgroup *cgrp, struct task_struct *tsk) { if (cgrp == NULL) return 0; return _lve_cgroup_kernel_attach(cgrp, tsk); } #else struct mutex *lve_cgroup_mutex; int (*lve_cgroup_attach_task)(struct cgroup *, struct task_struct *, bool); int lve_cgroup_kernel_attach(struct cgroup *cgrp, struct task_struct *tsk) { int ret; if (cgrp == NULL) return 0; mutex_lock(lve_cgroup_mutex); ret = lve_cgroup_attach_task(cgrp, tsk, false); mutex_unlock(lve_cgroup_mutex); return ret; } #endif #if OPENVZ_VERSION > 0 #include #ifndef HAVE_UB_ATTACH_TASK #ifdef HAVE_UB_ATTACH int (*_lve_ub_attach)(struct user_beancounter *bc); #endif int lve_ub_attach_task(struct user_beancounter *bc, struct task_struct *task) { #ifndef HAVE_UB_ATTACH return set_task_exec_ub(task, bc); #else return (task == current) ? _lve_ub_attach(bc) : -EINVAL; #endif } #else int (*_lve_ub_attach_task)(struct user_beancounter *bc, struct task_struct *task); int lve_ub_attach_task(struct user_beancounter *bc, struct task_struct *task) { return _lve_ub_attach_task(bc, task); } #endif /* HAVE_UB_ATTACH_TASK */ #include #include struct cgroup *lve_ub_cgroup_root; struct vfsmount *lve_ub_cgroup_mnt; struct dentry *lve_get_ub_cgroup_root(void) { struct dentry *_lve_ub_cgroup_root = NULL; #ifdef HAVE_UB_CGROUP_ROOT _lve_ub_cgroup_root = lve_ub_cgroup_root->dentry; #else #ifdef HAVE_UB_CGROUP_MNT _lve_ub_cgroup_root = lve_cgroup_get_root(lve_ub_cgroup_mnt); #endif #endif /* HAVE_UB_CGROUP_ROOT */ return _lve_ub_cgroup_root; } #endif /* OpenVZ */ #if defined(HAVE_COPY_NS_WITH_2ARGS) int (*_lve_copy_namespaces)(unsigned long flags, struct task_struct *tsk); int lve_copy_namespaces(unsigned long flags, struct task_struct *tsk) { return _lve_copy_namespaces(CLONE_NEWNS, tsk); } #elif defined(HAVE_COPY_NS_WITH_3ARGS) int (*_lve_copy_namespaces)(unsigned long flags, struct task_struct *tsk, int fa); int lve_copy_namespaces(unsigned long flags, struct task_struct *tsk) { return _lve_copy_namespaces(CLONE_NEWNS, tsk, 0); } #else #error "Unsupported copy_namespaces() prototype" #endif struct fs_struct * (*lve_copy_fs_struct)(struct fs_struct *); void (*lve_free_fs_struct)(struct fs_struct *fs_struct); void (*lve_ub_precharge_snapshot)(struct user_beancounter *ub, int *precharge); #ifdef UBC_CL_API long (*lve_setublimit)(struct user_beancounter *ub, unsigned long resource, unsigned long *new_limits); #else long (*lve_setublimit)(uid_t ub, unsigned long resource, unsigned long *new_limits); #endif rwlock_t *lve_css_set_lock; int (*lve_freezer_change_state)(struct cgroup *c, enum freezer_state s); int (*lve_try_to_free_gang_pages)(struct gang_set *gs, gfp_t gfp); unsigned long (*lve_try_to_free_mem_cgroup_pages)(struct mem_cgroup *mem, unsigned long nr_pages, gfp_t gfp_mask, bool noswap); struct mem_cgroup * (*lve_mem_cgroup_from_cont)(struct cgroup *cont); #ifdef HAVE_TASKLIST_QRWLOCK qrwlock_t *lve_tasklist_lock_ptr; #else rwlock_t *lve_tasklist_lock_ptr; #endif void (*lve_switch_task_namespaces)(struct task_struct *tsk, struct nsproxy *new); void (*lve_free_nsproxy)(struct nsproxy *p); #ifdef HAVE___FPUT void (*lve__fput)(struct file *file); #endif struct task_struct *(*lve_find_task_by_vpid)(pid_t nr); struct cgroup_subsys_state *(*lve_ub_get_css)(struct user_beancounter *ub, int idx); #ifdef HAVE_GET_BEANCOUNTER_BYUID struct user_beancounter *(*lve_get_beancounter_byuid)(uid_t uid, int create); #endif #ifdef HAVE_GET_BEANCOUNTER_BYNAME struct user_beancounter *(*lve_get_beancounter_by_name)(const char *name, int create); #endif void (*lve_ub_sync_memcg)(struct user_beancounter *ub); #ifdef HAVE_LOADAVG_PTR void (*lve_cgroup_iter_start)(struct cgroup *cg, struct cgroup_iter *iter); void (*lve_cgroup_iter_end)(struct cgroup *cg, struct cgroup_iter *iter); struct task_struct * (*lve_cgroup_iter_next)(struct cgroup *cg, struct cgroup_iter *iter); #endif void *lve_iolimits_rq_issue_ptr; void *lve_iolimits_wb_dirty_ptr; #ifndef HAVE_IOMAP_DIO_RW void *lve_iolimits_ext4_direct_IO_enter_ptr; void *lve_iolimits_xfs_file_direct_write_ptr; #endif void (*lve_put_filesystem)(struct file_system_type *fs); int (*lve_task_work_add)(struct task_struct *task, struct callback_head *twork, bool); void *(*lve_task_work_cancel)(struct task_struct *task, void *func); struct filename * (*lve_getname)(const char __user * filename); struct filename * (*lve_getname_kernel)(const char * filename); void (*lve_putname)(struct filename * filename); int (*lve_cgroupstats_build)(struct cgroupstats *stats, struct dentry *dentry); #ifdef HAVE_CGOUPFS struct cgroup *(*lve_task_cgroup_from_root)(struct task_struct *task, struct cgroupfs_root *root); #else struct cgroup *(*lve_task_cgroup_from_root)(struct task_struct *task, struct cgroup_root *root); #endif #if OPENVZ_VERSION == 0 struct cred *lve_init_cred; #endif int (*lve_do_readlinkat)(int dfd, const char __user *pathname, char __user *buf, int bufsiz); struct dentry_operations *lve_tid_fd_dentry_operations; #if FEAT_LINK_PROT == 1 #ifdef IMPL_LINK_PROT_OLD #ifndef IMPL_LINK_PROT_EXPERIMENTAL int (*lve_lookup_slow)(struct nameidata *nd, struct path *path); #endif struct dentry * (*lve__lookup_hash)(struct qstr *, struct dentry *, unsigned int); int (*lve_follow_managed)(struct path *path, unsigned flags); int (*lve_filename_lookup)(int dfd, struct filename *name, unsigned int flags, struct nameidata *nd); #else struct dentry * (*lve_lookup_slow)(const struct qstr *name, struct dentry *dir, unsigned int flags); struct dentry * (*lve__lookup_slow)(const struct qstr *name, struct dentry *dir, unsigned int flags); #endif #ifdef IMPL_LINK_PROT_EXPERIMENTAL int (*lve_filename_lookup)(int dfd, struct filename *name, unsigned int flags, struct path *path, struct path *root); void (*lve_terminate_walk)(struct nameidata *nd); #ifdef HAVE_OPEN_LAST struct open_flags; char *(*lve_do_last)(struct nameidata *nd, struct file *file, const struct open_flags *op); char *(*lve_walk_component)(struct nameidata *nd, int flags); #else int (*lve_do_last)(struct nameidata *nd, struct file *file, const void *op); int (*lve_walk_component)(struct nameidata *nd, int flags); #endif #if RHEL_MAJOR<9 int (*lve_do_renameat2)(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, unsigned int flags); #else int (*lve_do_renameat2)(int olddfd, struct filename *oldname, int newdfd, struct filename *newname, unsigned int flags); #endif #if RHEL_MAJOR<9 int (*lve_do_symlinkat)(const char __user *oldname, int newdfd, const char __user *newname); #else int (*lve_do_symlinkat)(struct filename *oldname, int newdfd, struct filename *newname); #endif #endif #ifdef HAVE_TRY_TO_UNLAZY bool (*lve_try_to_unlazy)(struct nameidata *nd); #else int (*lve_unlazy_walk)(struct nameidata *nd); #endif struct file *(*lve_get_empty_filp)(void); struct file * (*lve_alloc_empty_file_noaccount)(int flags, const struct cred *cred); #endif void (*lve_proc_exit_connector)(struct task_struct *task); int (*lve_xfs_trans_dqresv)(void *tp, void *mp, void *dqp, long nblks, long ninos, uint flags); void (*lve_cgroup_post_fork)(struct task_struct *child, void *old_ss_priv); void (*lve_proc_sys_poll_notify)(struct ctl_table_poll *poll); long (*lve_tty_ioctl)(struct file *file, unsigned int cmd, unsigned long arg); #if RHEL_MAJOR == 0 int (*lve_de_thread)(struct task_struct *tsk); #endif #ifdef HAVE_RENAMEDATA int lve_vfs_rename(struct inode *olddir, struct dentry *olddentry, struct inode *newdir, struct dentry *newdentry) { struct renamedata rd = { .old_mnt_userns = &init_user_ns, .old_dir = olddir, .old_dentry = olddentry, .new_mnt_userns = &init_user_ns, .new_dir = newdir, .new_dentry = newdentry, .flags = 0, }; return vfs_rename(&rd); } #endif int (*lve_search_binary_handler)(struct linux_binprm *bprm); static struct lve_sym_s { const char *symname; void **symaddr; } lve_syms[] = { #ifdef HAVE___FPUT { "__fput", (void **)&lve__fput }, #endif { "__tracepoint_block_rq_issue",(void **)&lve_iolimits_rq_issue_ptr }, #ifdef HAVE_WRITEBACK_DIRTY_FOLIO { "__tracepoint_writeback_dirty_folio",(void **)&lve_iolimits_wb_dirty_ptr }, #else { "__tracepoint_writeback_dirty_page",(void **)&lve_iolimits_wb_dirty_ptr }, #endif #ifndef HAVE_IOMAP_DIO_RW { "__tracepoint_xfs_file_direct_write", (void**)&lve_iolimits_xfs_file_direct_write_ptr }, { "__tracepoint_ext4_direct_IO_enter",(void **)&lve_iolimits_ext4_direct_IO_enter_ptr }, #endif #ifndef HAVE_2ARGS_CGROUP_ATTACH_TASK { "cgroup_attach_task", (void **)&lve_cgroup_attach_task }, #endif #ifdef HAVE_2ARGS_CGROUP_ATTACH_TASK { "cgroup_kernel_attach", (void **)&_lve_cgroup_kernel_attach }, #endif #ifdef HAVE_LOADAVG_PTR { "cgroup_iter_start", (void **)&lve_cgroup_iter_start }, { "cgroup_iter_next", (void **)&lve_cgroup_iter_next }, { "cgroup_iter_end", (void **)&lve_cgroup_iter_end }, #endif #ifndef HAVE_2ARGS_CGROUP_ATTACH_TASK { "cgroup_mutex", (void **)&lve_cgroup_mutex }, #endif { "copy_fs_struct", (void **)&lve_copy_fs_struct }, { "copy_namespaces", (void **)&_lve_copy_namespaces }, { "css_set_lock", (void **)&lve_css_set_lock }, { "find_task_by_vpid", (void **)&lve_find_task_by_vpid }, { "freezer_change_state", (void **)&lve_freezer_change_state }, { "free_fs_struct", (void **)&lve_free_fs_struct }, { "free_nsproxy", (void **)&lve_free_nsproxy }, #ifdef HAVE_MEM_CGROUP_FROM_CONT { "mem_cgroup_from_cont", (void **)&lve_mem_cgroup_from_cont }, #endif { "set_fs_pwd", (void **)&_lve_set_fs_pwd }, { "set_fs_root" , (void **)&_lve_set_fs_root }, { "switch_task_namespaces", (void **)&lve_switch_task_namespaces }, { "task_work_add", (void **)&lve_task_work_add }, { "task_work_cancel", (void **)&lve_task_work_cancel }, #if OPENVZ_VERSION == 0 { "init_cred", (void **)&lve_init_cred }, #endif { "tasklist_lock", (void **)&lve_tasklist_lock_ptr }, #ifdef HAVE_TRY_TO_FREE_MEM_CGROUP_PAGES { "try_to_free_mem_cgroup_pages", (void **)&lve_try_to_free_mem_cgroup_pages }, #endif { "put_filesystem", (void **)&lve_put_filesystem }, #if OPENVZ_VERSION > 0 #ifdef HAVE_UB_GET_CSS { "__ub_get_css", (void **)&lve_ub_get_css }, #endif { "do_setublimit", (void **)&lve_setublimit }, #ifdef HAVE_GET_BEANCOUNTER_BYUID { "get_beancounter_byuid", (void **)&lve_get_beancounter_byuid }, #endif #ifdef HAVE_GET_BEANCOUNTER_BYNAME { "get_beancounter_by_name", (void **)&lve_get_beancounter_by_name }, #endif #ifdef HAVE_TRY_TO_FREE_GANG_PAGES { "try_to_free_gang_pages", (void **)&lve_try_to_free_gang_pages }, #endif #ifndef HAVE_UB_ATTACH_TASK #ifdef HAVE_UB_ATTACH { "ub_attach", (void **)&_lve_ub_attach }, #endif #endif #ifdef HAVE_UB_ATTACH_TASK { "ub_attach_task", (void **)&_lve_ub_attach_task }, #endif #ifdef HAVE_UB_CGROUP_ROOT { "ub_cgroup_root", (void **)&lve_ub_cgroup_root }, #endif #ifdef HAVE_UB_CGROUP_MNT { "ub_cgroup_mnt", (void **)&lve_ub_cgroup_mnt }, #endif { "ub_precharge_snapshot", (void **)&lve_ub_precharge_snapshot }, #ifdef HAVE_UB_SYNC_MEMCG { "ub_sync_memcg", (void **)&lve_ub_sync_memcg }, #endif #endif { "getname", (void **)&lve_getname }, { "getname_kernel", (void **)&lve_getname_kernel }, { "putname", (void **)&lve_putname }, { "cgroupstats_build", (void **)&lve_cgroupstats_build }, { "task_cgroup_from_root", (void **)&lve_task_cgroup_from_root }, #if FEAT_LINK_PROT == 1 #ifdef IMPL_LINK_PROT_OLD { "get_empty_filp", (void **)&lve_get_empty_filp }, { "filename_lookup", (void **)&lve_filename_lookup }, { "__lookup_hash", (void **)&lve__lookup_hash }, { "follow_managed", (void **)&lve_follow_managed }, #elif defined(IMPL_LINK_PROT_NEW) { "alloc_empty_file_noaccount", (void **)&lve_alloc_empty_file_noaccount }, { "__lookup_slow", (void **)&lve__lookup_slow }, #ifdef HAVE_TRY_TO_UNLAZY { "try_to_unlazy", (void **)&lve_try_to_unlazy}, #else { "unlazy_walk", (void **)&lve_unlazy_walk }, #endif #elif defined(IMPL_LINK_PROT_EXPERIMENTAL) { "filename_lookup", (void **)&lve_filename_lookup }, { "alloc_empty_file_noaccount", (void **)&lve_alloc_empty_file_noaccount }, { "walk_component", (void **)&lve_walk_component }, { "terminate_walk", (void **)&lve_terminate_walk }, { LVE_DO_LAST_SYM, (void **)&lve_do_last }, { "do_renameat2", (void **)&lve_do_renameat2 }, { "do_symlinkat", (void **)&lve_do_symlinkat }, #ifdef HAVE_TRY_TO_UNLAZY { "try_to_unlazy", (void **)&lve_try_to_unlazy}, #else { "unlazy_walk", (void **)&lve_unlazy_walk }, #endif #endif { "do_readlinkat", (void **)&lve_do_readlinkat }, #ifndef IMPL_LINK_PROT_EXPERIMENTAL { "lookup_slow", (void **)&lve_lookup_slow }, #endif #if KMOD_LSM == 1 { "security_inode_follow_link", (void **)&lve_security_inode_follow_link }, #endif #endif { "proc_exit_connector", (void **)&lve_proc_exit_connector }, { "xfs_trans_dqresv", (void **)&lve_xfs_trans_dqresv }, { "cgroup_post_fork", (void **)&lve_cgroup_post_fork }, { "proc_sys_poll_notify", (void **)&lve_proc_sys_poll_notify }, { "tid_fd_dentry_operations", (void **)&lve_tid_fd_dentry_operations }, { "tty_ioctl", (void **)&lve_tty_ioctl}, #if RHEL_MAJOR == 0 { "de_thread", (void **)&lve_de_thread}, #endif #if KMOD_LSM == 1 { "security_inode_permission", (void **)&lve_security_inode_permission }, { "__ptrace_may_access", (void **)&lve___ptrace_may_access }, { "security_ptrace_traceme", (void **)&lve_security_ptrace_traceme }, #if defined(CONFIG_SECURITY_PATH) && (FEAT_LINK_PROT == 1) { "security_path_symlink", (void **)&lve_security_path_symlink }, #endif { "security_task_fix_setuid", (void **)&lve_security_task_fix_setuid }, { "security_socket_bind", (void **)&lve_security_socket_bind }, { "security_d_instantiate", (void **)&lve_security_d_instantiate }, #endif /* KMOD_LSM == 1 */ { "search_binary_handler", (void **)&lve_search_binary_handler }, }; #ifndef HAVE_KGDB_GETSYMVAL static int lve_lookup_name_s(void *data, const char *name, struct module *mod, unsigned long addr) { struct lve_sym_s *ls = data; if (!strcmp(name, ls->symname)) { *(ls->symaddr) = (void *)addr; return 1; } return 0; } #define lve_lookup_name(ls) kallsyms_on_each_symbol(lve_lookup_name_s, ls) int lve_resolve_symbols(void) { int i; mutex_lock(&module_mutex); for (i = 0; i < ARRAY_SIZE(lve_syms); i++) { if (!lve_lookup_name(&lve_syms[i])) { mutex_unlock(&module_mutex); LVE_ERR("cannot resolve %s\n", lve_syms[i].symname); return -ESRCH; } LVE_DBG("\t[%s]=%px\n", lve_syms[i].symname, *(lve_syms[i].symaddr)); } mutex_unlock(&module_mutex); return 0; } #else extern int kdbgetsymval(const char *, void *); int lve_resolve_symbols(void) { char *buff; int i; int ret = -EINVAL; int off = 0; unsigned long addr; buff = kmalloc(4096, GFP_KERNEL | __GFP_ZERO); if (buff == NULL) return -ENOMEM; if (!kdbgetsymval("try_module_get", (void *)buff)) { LVE_ERR("cannot resolve %s\n", "try_module_get"); goto out_err; } LVE_DBG("try_module_get %px\n", try_module_get); for(i = 0; i < 1024; i++) { memcpy(&addr, buff+i, sizeof(void *)); LVE_DBG("aa %lx\n", addr); if (addr == (unsigned long)try_module_get) { off = i; break; } } if (off == 0) { LVE_ERR("can't find right offset %px\n", buff); BUG_ON(1); } for (i = 0; i < ARRAY_SIZE(lve_syms); i++) { memset(buff, 0, 4096); if (!kdbgetsymval(lve_syms[i].symname, (void *)buff)) { LVE_ERR("cannot resolve %s\n", lve_syms[i].symname); goto out_err; } memcpy(&addr, buff+off, sizeof(void *)); *lve_syms[i].symaddr = (void *)addr; LVE_DBG("\t[%s]=%px\n", lve_syms[i].symname, *(lve_syms[i].symaddr)); } ret = 0; out_err: kfree(buff); if (ret) LVE_ERR("can't init symbol resolving\n"); return ret; } #endif