#include #include #include #include #include #include #include #include #include #include #include "lve_internal.h" #include "lve_debug.h" #include "resource.h" #include "cgroup_lib.h" #include "cgroup_generic.h" struct cgrp_mount cmnt[NR_SUBSYS]; u64 ioprio_weight[] = {320, 365, 410, 460, 500, 550, 600, 640}; #ifdef HAVE_DENTRY_OPEN_PATH struct file *cgrp_dentry_open(struct dentry *d, struct vfsmount *m) { struct path path; path.mnt = m; path.dentry = d; return dentry_open(&path, O_RDWR, current_cred()); } #else struct file *cgrp_dentry_open(struct dentry *d, struct vfsmount *m) { dget(d); mntget(m); return dentry_open(d, m, O_RDWR, current_cred()); } #endif struct file *cgrp_param_open(struct vfsmount *v, struct dentry *grp, const char *param) { struct file *filp; struct dentry *p; LVE_ENTER("open %s - %px %px\n", param, v, grp); LOCK_INODE(grp->d_inode); p = lve_call(lookup_one_len(param, grp, strlen(param)), LVE_FAIL_CGRP_PARAM_GET, ERR_PTR(-ENOMEM)); UNLOCK_INODE(grp->d_inode); if (IS_ERR(p)) { LVE_ERR("cgrp_param_lookup %s failed with %ld\n", param, PTR_ERR(p)); filp = (void *)p; goto out; } if (!p->d_inode) { filp = ERR_PTR(-ENOENT); goto out_neg; } filp = cgrp_dentry_open(p, v); if (IS_ERR(filp)) LVE_ERR("cgrp_param_open failed %ld\n", PTR_ERR(filp)); /* * cgrp_dentry_open() has taken the required references to * mnt and dentry, so now drop the dentry reference from * lookup_one_len(). */ out_neg: dput(p); out: LVE_DBG("return %px\n", filp); return filp; } void cgrp_param_release(struct file *filp) { LVE_ENTER("close %px\n", filp); #ifdef HAVE___FPUT /* avoid async release when possible */ if (atomic_long_dec_and_test(&filp->f_count)) lve__fput(filp); #else filp_close(filp, NULL); #endif } int cgrp_param_set_string(struct file *filp, const char *buf, size_t nbytes) { int rc = -EBADFD; if (filp == NULL) return 0; LVE_ENTER("write %px %s - %s\n", filp, filp->f_path.dentry->d_name.name, buf); rc = lve_kernel_write(filp, buf, nbytes); if (rc > 0) rc = 0; LVE_DBG("write return %d\n", rc); return rc; } int cgrp_param_get_string(struct file *filp, char *buf, size_t nbytes) { int rc = -EBADFD; if (filp == NULL) return 0; LVE_ENTER("read %px %s\n", filp, filp->f_path.dentry->d_name.name); rc = lve_kernel_read(filp, buf, nbytes); if (rc > 0) rc = 0; LVE_DBG("read return %d - %s\n", rc, rc == 0 ? buf : NULL); return rc; } int cgrp_param_set_u64(struct file *filp, __u64 data) { char sval[22] = {0}; snprintf(sval, sizeof(sval), "%llu", data); return cgrp_param_set_string(filp, sval, sizeof(sval)); } int cgrp_param_set_s64(struct file *filp, s64 data) { char sval[22] = {0}; snprintf(sval, sizeof(sval), "%lld", data); return cgrp_param_set_string(filp, sval, sizeof(sval)); } /* return <0 if error */ __s64 cgrp_param_get(struct file *filp) { int rc; /* * NULL-terminated string with possible newline at the end */ char sval[23] = {0}; s64 sval_res; rc = cgrp_param_get_string(filp, sval, sizeof(sval)); if (rc < 0) goto out; rc = kstrtos64(sval, 0, &sval_res); if (rc == 0) return sval_res; out: return (__s64)rc; } int cgrp_param_open_write_string(struct vfsmount *mnt, struct dentry *cgrp, const char *param, const char *buf, unsigned count) { struct file *filp; int rc; if (!mnt || !cgrp) return -ENOENT; filp = cgrp_param_open(mnt, cgrp, param); if (IS_ERR(filp)) { LVE_ERR("can't open %s, rc=%ld\n", param, PTR_ERR(filp)); return PTR_ERR(filp); } rc = cgrp_param_set_string(filp, buf, count); if (rc != 0) LVE_ERR("failed to write %.*s to %s, rc=%d\n", count, buf, param, rc); cgrp_param_release(filp); return rc; } int cgrp_param_open_read_string(struct vfsmount *mnt, struct dentry *cgrp, const char *param, char *buf, unsigned count) { struct file *filp; int rc; if (!mnt || !cgrp) return -ENOENT; filp = cgrp_param_open(mnt, cgrp, param); if (IS_ERR(filp)) { LVE_ERR("can't open %s, rc=%ld\n", param, PTR_ERR(filp)); return PTR_ERR(filp); } rc = cgrp_param_get_string(filp, buf, count); if (rc != 0) LVE_ERR("failed to read %.*s from %s, rc=%d\n", count, buf, param, rc); cgrp_param_release(filp); return rc; } int cgrp_populate_dir(struct dentry **cgrp, struct file **filp, const struct params *p, int nr_params) { int i, rc = 0; const char *p_name; struct vfsmount *p_mnt; for (i = 0; i < nr_params; i++) { p_name = p[i].p_name; if (!p_name) continue; p_mnt = cmnt[p[i].p_subsys].mnt_root; filp[i] = cgrp_param_open(p_mnt, cgrp[p[i].p_subsys], p_name); if (IS_ERR(filp[i])) { /* Ok, in RHEL8 pids.max and pids.current are not * available in /, ignore that for now */ if (p[i].p_subsys != PIDS_SUBSYS) { rc = PTR_ERR(filp[i]); LVE_ERR("can't setup param %s <> %d\n", p_name, rc); filp[i] = NULL; break; } filp[i] = NULL; } } return rc; } unsigned long get_unlink_id(void) { static atomic_t counter = ATOMIC_INIT(0); return atomic_inc_return(&counter); } int cgrp_obfuscate(struct dentry *de, char *newname) { struct inode *pinode; struct dentry *new; int rc; BUG_ON(de == NULL); pinode = de->d_parent->d_inode; BUG_ON(pinode == NULL); LVE_DBG("obfuscating %.*s\n", de->d_name.len, de->d_name.name); LVE_DBG("cgrp = %px\n", de); LOCK_INODE(pinode); new = lookup_one_len(newname, de->d_parent, strlen(newname)); if (IS_ERR(new)) { UNLOCK_INODE(pinode); rc = PTR_ERR(new); goto out; } rc = lve_vfs_rename(pinode, de, pinode, new); UNLOCK_INODE(pinode); dput(new); out: if (rc != 0) LVE_ERR("failed to rename cgrp %.*s -> %s, rc %d\n", de->d_name.len, de->d_name.name, newname, rc); else LVE_DBG("cgrp_obfuscate newname=%s rc=%d\n", newname, rc); return rc; } struct dentry *lve_cgroup_get_root(struct vfsmount *mnt) { return mnt->mnt_root; } char *mnt_opts[] = { [MEM_SUBSYS] = "memory", [BLK_SUBSYS] = "blkio", #ifdef CONFIG_VZ_FAIRSCHED [CPU_SUBSYS] = "name=fairsched,cpu,cpuacct,cpuset", #else [CPU_SUBSYS] = "cpu,cpuacct", #endif [FREEZER_SUBSYS] = "freezer", [PIDS_SUBSYS] = "pids", [NETMARK_SUBSYS] = "net_cls,net_prio", #if OPENVZ_VERSION > 0 [UB_SUBSYS] = "beancounter" #endif }; int mount_cgroup_root_fs(struct cgrp_mount *cmnt, u64 mask) { int i, n, rc; struct file_system_type *cgroup_fs_type; cgroup_fs_type = get_fs_type("cgroup"); if (IS_ERR(cgroup_fs_type)) { LVE_ERR("cgroup filesystem type %ld", PTR_ERR(cgroup_fs_type)); return PTR_ERR(cgroup_fs_type); } for (i = 0; i < NR_SUBSYS; i++) { char opt[256]; if (((1 << i) & mask) == 0) continue; strcpy(opt, mnt_opts[i]); LVE_DBG("mount %d - %s\n", i, mnt_opts[i]); cmnt[i].mnt_root = lve_call(vfs_kern_mount(cgroup_fs_type, 0, cgroup_fs_type->name, opt), LVE_FAIL_MOUNT_CGROUP_ROOTFS, ERR_PTR(-ENOMEM)); if (IS_ERR(cmnt[i].mnt_root)) { rc = PTR_ERR(cmnt[i].mnt_root); LVE_ERR("failed to mount cgroup(%s), rc=%d\n", mnt_opts[i], rc); goto err; } cmnt[i].cgrp_root = lve_cgroup_get_root(cmnt[i].mnt_root); } lve_put_filesystem(cgroup_fs_type); return 0; err: for (n = 0; n < i; n++) { mntput(cmnt[n].mnt_root); cmnt[n].mnt_root = NULL; } lve_put_filesystem(cgroup_fs_type); return rc; } void umount_cgroup_root_fs(struct cgrp_mount *cmnt) { int i; for (i = 0; i < NR_SUBSYS; i++) { if (cmnt[i].mnt_root != NULL) mntput(cmnt[i].mnt_root); } } struct dentry *lve_cgroup_kernel_open(struct dentry *parent, enum lve_cgroup_open_flags flags, const char *name) { struct dentry *dentry; int ret = 0; LOCK_INODE_NESTED(parent->d_inode, I_MUTEX_PARENT); #ifdef CONFIG_KERNFS retry: #endif dentry = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(dentry)) goto out; if (flags & LVE_CGRP_CREAT) { if ((flags & LVE_CGRP_EXCL) && dentry->d_inode) { ret = -EEXIST; } else if (!dentry->d_inode) { ret = vfs_mkdir( #ifdef HAVE_VFS_RMDIR_3ARG &init_user_ns, #endif parent->d_inode, dentry, 0755); /* * Under kernfs cgroups, you cannot keep using * the dentry used for mkdir, it becomes invalid */ #ifdef CONFIG_KERNFS if (!ret) { dput(dentry); flags &= ~LVE_CGRP_CREAT; goto retry; } #endif } } if (ret || !dentry->d_inode) { dput(dentry); dentry = ERR_PTR(ret); } out: UNLOCK_INODE(parent->d_inode); return dentry; } int lve_cgroup_kernel_remove(struct dentry *cgrp) { struct dentry *parent = cgrp->d_parent; int ret = 0; LOCK_INODE_NESTED(parent->d_inode, I_MUTEX_PARENT); if (cgrp->d_inode == NULL) goto unlock; ret = vfs_rmdir( #ifdef HAVE_VFS_RMDIR_3ARG &init_user_ns, #endif parent->d_inode, cgrp); if (ret < 0) LVE_ERR("failed to rmdir %s %px, rc %d\n", cgrp->d_name.name, cgrp, ret); unlock: UNLOCK_INODE(parent->d_inode); return ret; } void lve_cgroup_kernel_close(struct dentry *cgrp) { dput(cgrp); }