There is a small race in do_swap_page(). When the page swapped-in is charged, the mapcount can be greater than 0. But, at the same time some process (shares it ) call unmap and make mapcount 1->0 and the page is uncharged. CPUA CPUB mapcount == 1. (1) charge if mapcount==0 zap_pte_range() (2) mapcount 1 => 0. (3) uncharge(). (success) (4) set page's rmap() mapcount 0=>1 Then, this swap page's account is leaked. For fixing this, I added a new interface. - charge account to res_counter by PAGE_SIZE and try to free pages if necessary. - commit register page_cgroup and add to LRU if necessary. - cancel uncharge PAGE_SIZE because of do_swap_page failure. CPUA (1) charge (always) (2) set page's rmap (mapcount > 0) (3) commit charge was necessary or not after set_pte(). This protocol uses PCG_USED bit on page_cgroup for avoiding over accounting. Usual mem_cgroup_charge_common() does charge -> commit at a time. And this patch also adds following function to clarify all charges. - mem_cgroup_newpage_charge() ....replacement for mem_cgroup_charge() called against newly allocated anon pages. - mem_cgroup_charge_migrate_fixup() called only from remove_migration_ptes(). we'll have to rewrite this later.(this patch just keeps old behavior) This function will be removed by additional patch to make migration clearer. Good for clarifying "what we do" Then, we have 4 following charge points. - newpage - swap-in - add-to-cache. - migration. [akpm@linux-foundation.org: add missing inline directives to stubs] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
190 lines
5 KiB
C
190 lines
5 KiB
C
/* memcontrol.h - Memory Controller
|
|
*
|
|
* Copyright IBM Corporation, 2007
|
|
* Author Balbir Singh <balbir@linux.vnet.ibm.com>
|
|
*
|
|
* Copyright 2007 OpenVZ SWsoft Inc
|
|
* Author: Pavel Emelianov <xemul@openvz.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*/
|
|
|
|
#ifndef _LINUX_MEMCONTROL_H
|
|
#define _LINUX_MEMCONTROL_H
|
|
|
|
struct mem_cgroup;
|
|
struct page_cgroup;
|
|
struct page;
|
|
struct mm_struct;
|
|
|
|
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
|
|
|
extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
|
|
gfp_t gfp_mask);
|
|
extern int mem_cgroup_charge_migrate_fixup(struct page *page,
|
|
struct mm_struct *mm, gfp_t gfp_mask);
|
|
/* for swap handling */
|
|
extern int mem_cgroup_try_charge(struct mm_struct *mm,
|
|
gfp_t gfp_mask, struct mem_cgroup **ptr);
|
|
extern void mem_cgroup_commit_charge_swapin(struct page *page,
|
|
struct mem_cgroup *ptr);
|
|
extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
|
|
|
|
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
|
|
gfp_t gfp_mask);
|
|
extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
|
|
extern void mem_cgroup_uncharge_page(struct page *page);
|
|
extern void mem_cgroup_uncharge_cache_page(struct page *page);
|
|
extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
|
|
|
|
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
|
|
struct list_head *dst,
|
|
unsigned long *scanned, int order,
|
|
int mode, struct zone *z,
|
|
struct mem_cgroup *mem_cont,
|
|
int active, int file);
|
|
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
|
|
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
|
|
|
|
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
|
|
|
|
#define mm_match_cgroup(mm, cgroup) \
|
|
((cgroup) == mem_cgroup_from_task((mm)->owner))
|
|
|
|
extern int
|
|
mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
|
|
extern void mem_cgroup_end_migration(struct page *page);
|
|
|
|
/*
|
|
* For memory reclaim.
|
|
*/
|
|
extern int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem);
|
|
extern long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem);
|
|
|
|
extern int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem);
|
|
extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
|
|
int priority);
|
|
extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
|
|
int priority);
|
|
|
|
extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
|
|
int priority, enum lru_list lru);
|
|
|
|
|
|
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
|
|
struct mem_cgroup;
|
|
|
|
static inline int mem_cgroup_newpage_charge(struct page *page,
|
|
struct mm_struct *mm, gfp_t gfp_mask)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int mem_cgroup_cache_charge(struct page *page,
|
|
struct mm_struct *mm, gfp_t gfp_mask)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int mem_cgroup_charge_migrate_fixup(struct page *page,
|
|
struct mm_struct *mm, gfp_t gfp_mask)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int mem_cgroup_try_charge(struct mm_struct *mm,
|
|
gfp_t gfp_mask, struct mem_cgroup **ptr)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void mem_cgroup_commit_charge_swapin(struct page *page,
|
|
struct mem_cgroup *ptr)
|
|
{
|
|
}
|
|
|
|
static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
|
|
{
|
|
}
|
|
|
|
static inline void mem_cgroup_uncharge_page(struct page *page)
|
|
{
|
|
}
|
|
|
|
static inline void mem_cgroup_uncharge_cache_page(struct page *page)
|
|
{
|
|
}
|
|
|
|
static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void mem_cgroup_move_lists(struct page *page, bool active)
|
|
{
|
|
}
|
|
|
|
static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int task_in_mem_cgroup(struct task_struct *task,
|
|
const struct mem_cgroup *mem)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int
|
|
mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void mem_cgroup_end_migration(struct page *page)
|
|
{
|
|
}
|
|
|
|
static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
|
|
int priority)
|
|
{
|
|
}
|
|
|
|
static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
|
|
int priority)
|
|
{
|
|
}
|
|
|
|
static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
|
|
struct zone *zone, int priority,
|
|
enum lru_list lru)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_CGROUP_MEM_CONT */
|
|
|
|
#endif /* _LINUX_MEMCONTROL_H */
|
|
|