android_kernel_oneplus_msm8998/include/linux/memcontrol.h
KAMEZAWA Hiroyuki 569b846df5 memcg: coalesce uncharge during unmap/truncate
In massive parallel enviroment, res_counter can be a performance
bottleneck.  One strong techinque to reduce lock contention is reducing
calls by coalescing some amount of calls into one.

Considering charge/uncharge chatacteristic,
	- charge is done one by one via demand-paging.
	- uncharge is done by
		- in chunk at munmap, truncate, exit, execve...
		- one by one via vmscan/paging.

It seems we have a chance to coalesce uncharges for improving scalability
at unmap/truncation.

This patch is a for coalescing uncharge.  For avoiding scattering memcg's
structure to functions under /mm, this patch adds memcg batch uncharge
information to the task.  A reason for per-task batching is for making use
of caller's context information.  We do batched uncharge (deleyed
uncharge) when truncation/unmap occurs but do direct uncharge when
uncharge is called by memory reclaim (vmscan.c).

The degree of coalescing depends on callers
  - at invalidate/trucate... pagevec size
  - at unmap ....ZAP_BLOCK_SIZE
(memory itself will be freed in this degree.)
Then, we'll not coalescing too much.

On x86-64 8cpu server, I tested overheads of memcg at page fault by
running a program which does map/fault/unmap in a loop. Running
a task per a cpu by taskset and see sum of the number of page faults
in 60secs.

[without memcg config]
  40156968  page-faults              #      0.085 M/sec   ( +-   0.046% )
  27.67 cache-miss/faults
[root cgroup]
  36659599  page-faults              #      0.077 M/sec   ( +-   0.247% )
  31.58 miss/faults
[in a child cgroup]
  18444157  page-faults              #      0.039 M/sec   ( +-   0.133% )
  69.96 miss/faults
[child with this patch]
  27133719  page-faults              #      0.057 M/sec   ( +-   0.155% )
  47.16 miss/faults

We can see some amounts of improvement.
(root cgroup doesn't affected by this patch)
Another patch for "charge" will follow this and above will be improved more.

Changelog(since 2009/10/02):
 - renamed filed of memcg_batch (as pages to bytes, memsw to memsw_bytes)
 - some clean up and commentary/description updates.
 - added initialize code to copy_process(). (possible bug fix)

Changelog(old):
 - fixed !CONFIG_MEM_CGROUP case.
 - rebased onto the latest mmotm + softlimit fix patches.
 - unified patch for callers
 - added commetns.
 - make ->do_batch as bool.
 - removed css_get() at el. We don't need it.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-12-16 07:20:07 -08:00

305 lines
8 KiB
C

/* memcontrol.h - Memory Controller
*
* Copyright IBM Corporation, 2007
* Author Balbir Singh <balbir@linux.vnet.ibm.com>
*
* Copyright 2007 OpenVZ SWsoft Inc
* Author: Pavel Emelianov <xemul@openvz.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef _LINUX_MEMCONTROL_H
#define _LINUX_MEMCONTROL_H
#include <linux/cgroup.h>
struct mem_cgroup;
struct page_cgroup;
struct page;
struct mm_struct;
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
/*
* All "charge" functions with gfp_mask should use GFP_KERNEL or
* (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
* alloc memory but reclaims memory from all available zones. So, "where I want
* memory from" bits of gfp_mask has no meaning. So any bits of that field is
* available but adding a rule is better. charge functions' gfp_mask should
* be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
* codes.
* (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
*/
extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
/* for swap handling */
extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
struct page *page, gfp_t mask, struct mem_cgroup **ptr);
extern void mem_cgroup_commit_charge_swapin(struct page *page,
struct mem_cgroup *ptr);
extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
extern void mem_cgroup_del_lru(struct page *page);
extern void mem_cgroup_move_lists(struct page *page,
enum lru_list from, enum lru_list to);
/* For coalescing uncharge for reducing memcg' overhead*/
extern void mem_cgroup_uncharge_start(void);
extern void mem_cgroup_uncharge_end(void);
extern void mem_cgroup_uncharge_page(struct page *page);
extern void mem_cgroup_uncharge_cache_page(struct page *page);
extern int mem_cgroup_shmem_charge_fallback(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask);
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct list_head *dst,
unsigned long *scanned, int order,
int mode, struct zone *z,
struct mem_cgroup *mem_cont,
int active, int file);
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
static inline
int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
{
struct mem_cgroup *mem;
rcu_read_lock();
mem = mem_cgroup_from_task(rcu_dereference((mm)->owner));
rcu_read_unlock();
return cgroup == mem;
}
extern int
mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
struct page *oldpage, struct page *newpage);
/*
* For memory reclaim.
*/
extern int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem);
extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
int priority);
extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
int priority);
int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
struct zone *zone,
enum lru_list lru);
struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
struct zone *zone);
struct zone_reclaim_stat*
mem_cgroup_get_reclaim_stat_from_page(struct page *page);
extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
extern int do_swap_account;
#endif
static inline bool mem_cgroup_disabled(void)
{
if (mem_cgroup_subsys.disabled)
return true;
return false;
}
extern bool mem_cgroup_oom_called(struct task_struct *task);
void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask, int nid,
int zid);
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct mem_cgroup;
static inline int mem_cgroup_newpage_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
return 0;
}
static inline int mem_cgroup_cache_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
return 0;
}
static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
{
return 0;
}
static inline void mem_cgroup_commit_charge_swapin(struct page *page,
struct mem_cgroup *ptr)
{
}
static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
{
}
static inline void mem_cgroup_uncharge_start(void)
{
}
static inline void mem_cgroup_uncharge_end(void)
{
}
static inline void mem_cgroup_uncharge_page(struct page *page)
{
}
static inline void mem_cgroup_uncharge_cache_page(struct page *page)
{
}
static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
return 0;
}
static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
{
}
static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
{
return ;
}
static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
{
return ;
}
static inline void mem_cgroup_del_lru(struct page *page)
{
return ;
}
static inline void
mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
{
}
static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
{
return 1;
}
static inline int task_in_mem_cgroup(struct task_struct *task,
const struct mem_cgroup *mem)
{
return 1;
}
static inline int
mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
{
return 0;
}
static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
struct page *oldpage,
struct page *newpage)
{
}
static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
{
return 0;
}
static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
int priority)
{
}
static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
int priority)
{
}
static inline bool mem_cgroup_disabled(void)
{
return true;
}
static inline bool mem_cgroup_oom_called(struct task_struct *task)
{
return false;
}
static inline int
mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
{
return 1;
}
static inline int
mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
{
return 1;
}
static inline unsigned long
mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone,
enum lru_list lru)
{
return 0;
}
static inline struct zone_reclaim_stat*
mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone)
{
return NULL;
}
static inline struct zone_reclaim_stat*
mem_cgroup_get_reclaim_stat_from_page(struct page *page)
{
return NULL;
}
static inline void
mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}
static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
int val)
{
}
static inline
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask, int nid, int zid)
{
return 0;
}
#endif /* CONFIG_CGROUP_MEM_CONT */
#endif /* _LINUX_MEMCONTROL_H */