#include #include #include #include #include #include /* for mem cg */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) #error "please update a kernel" #endif #include "lve-api.h" #include "lve_internal.h" #include "cgroup_lib.h" #include "resource.h" #include "lve_debug.h" #include "cgroup_generic.h" #include "lve_global_params.h" unsigned int os_context_private_sz(void) { return sizeof(struct c_private); } unsigned int os_lvp_private_sz(void) { return sizeof(struct lvp_private); } #define PARSE_STAT_BUF 8192 static __s64 parse_stat(struct file *f, const char *statname) { char *buf, *s; __s64 rc, data; if (!f || !statname) return -1; s = buf = kmalloc(PARSE_STAT_BUF, GFP_KERNEL | __GFP_ZERO); if (!buf) return -1; rc = cgrp_param_get_string(f, buf, PARSE_STAT_BUF - 1); if (rc) goto out; /* Look up for the stat name, it should start from a * new line and be followed by a space */ while ((s = strstr(s, statname))) { if ((s == buf || s[-1] == '\n') && isspace(s[strlen(statname)])) break; s++; } if (!s) { rc = -1; goto out; } s += strlen(statname); /* skip spaces before the number */ while (isspace(*s)) s++; sscanf(s, "%lld", &data); if (data < 0) { rc = -1; goto out; } rc = data; out: kfree(buf); return rc; } void os_resource_usage(struct c_private *private, struct lve_usage *buf) { __s64 data, data_ext; struct light_ve *lve = os_lve(private); LVE_ENTER("getusage for %u\n", lve->lve_id); data = cgrp_param_get(private->filps[PARAM_CPU_STAT]); if(data > 0) { LVE_DBG("cpu usage "LPU64"\n", data); buf->data[RES_CPU].data = data; } data = cgrp_param_get(private->filps[PARAM_PIDS_CURRENT]); if (data > 0) { LVE_DBG("pids usage "LPU64"\n", data); buf->data[RES_NPROC].data = data; } data = parse_stat(private->filps[PARAM_PIDS_EVENTS], "max"); if (data > 0) { buf->data[RES_NPROC].fail = data; LVE_DBG("pids failcnt "LPU64"\n", buf->data[RES_NPROC].fail); } buf->data[RES_MEM].data = 0; if (param_is_enabled(LVE_MEMSTAT_NO_CACHE)) { data = data_ext = 0; /* do not account cached memory */ data = parse_stat(private->filps[PARAM_MEM_ANON_STAT], "total_active_anon"); LVE_DBG("active anon mem usage "LPU64"\n", data); data_ext = parse_stat(private->filps[PARAM_MEM_ANON_STAT], "total_inactive_anon"); if (data_ext > 0) { LVE_DBG("inactive anon mem usage "LPU64"\n", data_ext); data += data_ext; } } else { /* account a page cache */ data = cgrp_param_get(private->filps[PARAM_MEM_STAT]); if (data > 0) LVE_DBG("mem usage "LPU64"\n", data); } buf->data[RES_MEM_PHY].data = data >> PAGE_SHIFT; data = parse_stat(private->filps[PARAM_MEM_FAILCNT], "oom_kill"); if (data > 0) { LVE_DBG("mem failcnt "LPU64"\n", data); buf->data[RES_MEM_PHY].fail = data; } buf->data[RES_IO].data = os_lve(private)->lve_iolimit.total_io >> 10; buf->data[RES_IOPS].data = os_lve(private)->lve_iolimit.total_iops; } int os_resource_usage_clear(struct c_private *private) { return cgrp_param_set_u64(private->filps[PARAM_CPU_STAT], 0); } int os_resource_init(struct light_ve *ve __attribute__((unused))) { char name[MAX_GRP_NAMESZ]; generic_lve_path(name, ve->lve_id); return generic_lve_init(ve, name); } /* XXX TODO move to generic */ void os_resource_unlink(uint32_t id, struct c_private *lcontext) { char name[MAX_GRP_NAMESZ] = {0}; generic_resource_unlink(id, lcontext, name); } void os_resource_fini(struct light_ve *ve) { generic_lve_fini(ve); } static int os_force_pmem_limit(struct c_private *lcontext, int32_t new) { struct light_ve *lve = os_lve(lcontext); int rc; if (lve != lve->lve_lvp->lvp_default) { /* For LVE pmem limit update we just kill all threads */ if (!lve_kill_on_shrink) return -EBUSY; LVE_WARN("lve %u threads will be killed to reduce physmem" " usage below the new limit\n", lve->lve_id); lve_kill_all_threads(0, lve->lve_id); schedule_timeout_killable(msecs_to_jiffies(10)); rc = cgrp_param_set_u64(lcontext->filps[PARAM_MEM_LIMIT], (uint64_t)new << PAGE_SHIFT); } else { /* For LVP pmem limit update things are more complicated */ uint64_t lim; lim = cgrp_param_get(lcontext->filps[PARAM_MEM_STAT]); if (lim < 0) return lim; lim >>= PAGE_SHIFT; LVE_DBG("mem usage "LPU64"\n", lim); if (new > lim) lim = new; rc = cgrp_param_set_u64(lcontext->filps[PARAM_MEM_LIMIT], lim << PAGE_SHIFT); LVE_DBG("attempt to shrink memory to %llu (target %d), rc=%d\n", lim, new, rc); if (!rc && lim == new) { /* OK, we've finally managed to set the limit */ lve->lve_lvp->lvp_pmem_pending = 0; } else if (!rc || rc == -EBUSY) { /* Managed to set an intermediate value or * maybe raced and failed, schedule a new * update, but don't return an error to * the caller */ lve->lve_lvp->lvp_pmem_pending = 1; rc = 0; } else { /* Unexpected error, don't schedule an * update and try to return error */ lve->lve_lvp->lvp_pmem_pending = 0; } } return rc; } int os_resource_setup(struct c_private *lcontext, int32_t new, enum lve_limits custom) { int rc = 0; struct light_ve *lve = os_lve(lcontext); struct light_ve *reseller = NULL; /* Check if we are changing limits for a reseller */ if (lve != lve->lve_lvp->lvp_default) reseller = lve->lve_lvp->lvp_default; switch (custom) { case LIM_CPU: rc = lve_cgroup_cpu_set(lcontext, reseller ? reseller->lve_limits : lve->lve_limits, lve->lve_limits, new); if (rc < 0 && reseller) { LVE_ERR("failed to set lve(%d) cpu lim, rc %d, lvp(%d) limit %d," " reseller quota %lld\n", lve->lve_id, rc, lve->lve_lvp->lvp_id, reseller->lve_limits[LIM_CPU], cgrp_param_get(lve_private(reseller)->filps[PARAM_CPU_LIMIT])); } break; case LIM_CPUS: rc = lve_cgroup_cpus_set(lcontext, reseller ? reseller->lve_limits : lve->lve_limits, lve->lve_limits, new); break; case LIM_CPU_WEIGHT: rc = lve_cgroup_cpuw_set(lcontext, new); break; #ifdef HAVE_PIDS_CGRP case LIM_NPROC: if (new == 0) rc = lve_call(cgrp_param_set_string(lcontext->filps[PARAM_PIDS_LIMIT], "max", sizeof("max")), LVE_FAIL_SET_LIM_NPROC, -EINVAL); else rc = lve_call(cgrp_param_set_u64(lcontext->filps[PARAM_PIDS_LIMIT], new), LVE_FAIL_SET_LIM_NPROC, -EINVAL); break; #endif case LIM_MEMORY: LVE_DBG("LVE VMEM limit isn't supported\n"); break; case LIM_MEMORY_PHY: { uint64_t limit = new; LVE_DBG("set phy mem to %u\n", new); rc = lve_call(cgrp_param_set_s64(lcontext->filps[PARAM_MEM_LIMIT], limit ? (limit << PAGE_SHIFT) : -1), LVE_FAIL_SET_LIM_NPROC, -EINVAL); if (rc == -EBUSY) { LVE_DBG("memcg set phys memory limit %d\n", rc); } else if (rc) { LVE_ERR("memcg set phys memory limit %d\n", rc); } else if (lve == lve->lve_lvp->lvp_default) { lve->lve_lvp->lvp_pmem_pending = 0; } if (rc != -EBUSY) break; rc = os_force_pmem_limit(lcontext, new); break; } default: break; } return rc; } /* enter to memory / io control usage */ int os_resource_push(struct task_struct *task, struct c_private *lcontext) { struct lvp_ve_private *lvp; lvp = os_lve(lcontext)->lve_lvp; if (lvp->lvp_pmem_pending) os_force_pmem_limit(lve_private(lvp->lvp_default), lvp->lvp_default->lve_limits[LIM_MEMORY_PHY]); return generic_cgroup_enter(task, lcontext, RESOURCE_SUBSYS_MASK); } int os_cpu_enter(struct task_struct *task, struct c_private *lcontext) { return generic_cgroup_enter(task, lcontext, 1 << CPU_SUBSYS); } void os_lvp_fini(struct lvp_ve_private *lvp) { generic_lvp_fini(lvp); } int os_lvp_init(struct lvp_ve_private *lvp, void *data) { char name[MAX_GRP_NAMESZ]; int rc; generic_lvp_path(name, lvp->lvp_id); rc = generic_lvp_init(lvp, name); return rc; } int os_freezer_enter(struct task_struct *task, struct c_private *lcontext) { return generic_cgroup_enter(task, lcontext, 1 << FREEZER_SUBSYS); } int os_freezer_freeze(struct light_ve *ve) { return -ENOSYS; } int os_freezer_thaw(struct light_ve *ve) { return -ENOSYS; } int freezer_change_state(struct cgroup *c, int s) { /* XXX: FIXME */ return -ENOSYS; } int os_global_init(void) { int rc; LVE_ENTER("os_global_init\n"); memset(cmnt, 0, sizeof(*cmnt) * NR_SUBSYS); rc = mount_cgroup_root_fs(cmnt, CGROUPS_SUPPORTED); if (rc) return rc; return 0; } void os_global_fini(void) { umount_cgroup_root_fs(cmnt); }