android_kernel_oneplus_msm8998/include/linux/vm_event_item.h

97 lines
2.5 KiB
C
Raw Normal View History

#ifndef VM_EVENT_ITEM_H_INCLUDED
#define VM_EVENT_ITEM_H_INCLUDED
#ifdef CONFIG_ZONE_DMA
#define DMA_ZONE(xx) xx##_DMA,
#else
#define DMA_ZONE(xx)
#endif
#ifdef CONFIG_ZONE_DMA32
#define DMA32_ZONE(xx) xx##_DMA32,
#else
#define DMA32_ZONE(xx)
#endif
#ifdef CONFIG_HIGHMEM
#define HIGHMEM_ZONE(xx) xx##_HIGH,
#else
#define HIGHMEM_ZONE(xx)
#endif
#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, HIGHMEM_ZONE(xx) xx##_MOVABLE
enum vm_event_item { PGPGIN, PGPGOUT, PGPGOUTCLEAN, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGALLOC),
PGFREE, PGACTIVATE, PGDEACTIVATE,
PGFAULT, PGMAJFAULT,
FOR_ALL_ZONES(PGREFILL),
FOR_ALL_ZONES(PGSTEAL_KSWAPD),
FOR_ALL_ZONES(PGSTEAL_DIRECT),
FOR_ALL_ZONES(PGSCAN_KSWAPD),
FOR_ALL_ZONES(PGSCAN_DIRECT),
PGSCAN_DIRECT_THROTTLE,
#ifdef CONFIG_NUMA
PGSCAN_ZONE_RECLAIM_FAILED,
#endif
PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
PAGEOUTRUN, ALLOCSTALL, PGROTATED,
2014-04-03 14:48:19 -07:00
DROP_PAGECACHE, DROP_SLAB,
mm: numa: Add pte updates, hinting and migration stats It is tricky to quantify the basic cost of automatic NUMA placement in a meaningful manner. This patch adds some vmstats that can be used as part of a basic costing model. u = basic unit = sizeof(void *) Ca = cost of struct page access = sizeof(struct page) / u Cpte = Cost PTE access = Ca Cupdate = Cost PTE update = (2 * Cpte) + (2 * Wlock) where Cpte is incurred twice for a read and a write and Wlock is a constant representing the cost of taking or releasing a lock Cnumahint = Cost of a minor page fault = some high constant e.g. 1000 Cpagerw = Cost to read or write a full page = Ca + PAGE_SIZE/u Ci = Cost of page isolation = Ca + Wi where Wi is a constant that should reflect the approximate cost of the locking operation Cpagecopy = Cpagerw + (Cpagerw * Wnuma) + Ci + (Ci * Wnuma) where Wnuma is the approximate NUMA factor. 1 is local. 1.2 would imply that remote accesses are 20% more expensive Balancing cost = Cpte * numa_pte_updates + Cnumahint * numa_hint_faults + Ci * numa_pages_migrated + Cpagecopy * numa_pages_migrated Note that numa_pages_migrated is used as a measure of how many pages were isolated even though it would miss pages that failed to migrate. A vmstat counter could have been added for it but the isolation cost is pretty marginal in comparison to the overall cost so it seemed overkill. The ideal way to measure automatic placement benefit would be to count the number of remote accesses versus local accesses and do something like benefit = (remote_accesses_before - remove_access_after) * Wnuma but the information is not readily available. As a workload converges, the expection would be that the number of remote numa hints would reduce to 0. convergence = numa_hint_faults_local / numa_hint_faults where this is measured for the last N number of numa hints recorded. When the workload is fully converged the value is 1. This can measure if the placement policy is converging and how fast it is doing it. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com>
2012-11-02 14:52:48 +00:00
#ifdef CONFIG_NUMA_BALANCING
NUMA_PTE_UPDATES,
mm: numa: return the number of base pages altered by protection changes Commit 0255d4918480 ("mm: Account for a THP NUMA hinting update as one PTE update") was added to account for the number of PTE updates when marking pages prot_numa. task_numa_work was using the old return value to track how much address space had been updated. Altering the return value causes the scanner to do more work than it is configured or documented to in a single unit of work. This patch reverts that commit and accounts for the number of THP updates separately in vmstat. It is up to the administrator to interpret the pair of values correctly. This is a straight-forward operation and likely to only be of interest when actively debugging NUMA balancing problems. The impact of this patch is that the NUMA PTE scanner will scan slower when THP is enabled and workloads may converge slower as a result. On the flip size system CPU usage should be lower than recent tests reported. This is an illustrative example of a short single JVM specjbb test specjbb 3.12.0 3.12.0 vanilla acctupdates TPut 1 26143.00 ( 0.00%) 25747.00 ( -1.51%) TPut 7 185257.00 ( 0.00%) 183202.00 ( -1.11%) TPut 13 329760.00 ( 0.00%) 346577.00 ( 5.10%) TPut 19 442502.00 ( 0.00%) 460146.00 ( 3.99%) TPut 25 540634.00 ( 0.00%) 549053.00 ( 1.56%) TPut 31 512098.00 ( 0.00%) 519611.00 ( 1.47%) TPut 37 461276.00 ( 0.00%) 474973.00 ( 2.97%) TPut 43 403089.00 ( 0.00%) 414172.00 ( 2.75%) 3.12.0 3.12.0 vanillaacctupdates User 5169.64 5184.14 System 100.45 80.02 Elapsed 252.75 251.85 Performance is similar but note the reduction in system CPU time. While this showed a performance gain, it will not be universal but at least it'll be behaving as documented. The vmstats are obviously different but here is an obvious interpretation of them from mmtests. 3.12.0 3.12.0 vanillaacctupdates NUMA page range updates 1408326 11043064 NUMA huge PMD updates 0 21040 NUMA PTE updates 1408326 291624 "NUMA page range updates" == nr_pte_updates and is the value returned to the NUMA pte scanner. NUMA huge PMD updates were the number of THP updates which in combination can be used to calculate how many ptes were updated from userspace. Signed-off-by: Mel Gorman <mgorman@suse.de> Reported-by: Alex Thorlton <athorlton@sgi.com> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-11-12 15:08:32 -08:00
NUMA_HUGE_PTE_UPDATES,
mm: numa: Add pte updates, hinting and migration stats It is tricky to quantify the basic cost of automatic NUMA placement in a meaningful manner. This patch adds some vmstats that can be used as part of a basic costing model. u = basic unit = sizeof(void *) Ca = cost of struct page access = sizeof(struct page) / u Cpte = Cost PTE access = Ca Cupdate = Cost PTE update = (2 * Cpte) + (2 * Wlock) where Cpte is incurred twice for a read and a write and Wlock is a constant representing the cost of taking or releasing a lock Cnumahint = Cost of a minor page fault = some high constant e.g. 1000 Cpagerw = Cost to read or write a full page = Ca + PAGE_SIZE/u Ci = Cost of page isolation = Ca + Wi where Wi is a constant that should reflect the approximate cost of the locking operation Cpagecopy = Cpagerw + (Cpagerw * Wnuma) + Ci + (Ci * Wnuma) where Wnuma is the approximate NUMA factor. 1 is local. 1.2 would imply that remote accesses are 20% more expensive Balancing cost = Cpte * numa_pte_updates + Cnumahint * numa_hint_faults + Ci * numa_pages_migrated + Cpagecopy * numa_pages_migrated Note that numa_pages_migrated is used as a measure of how many pages were isolated even though it would miss pages that failed to migrate. A vmstat counter could have been added for it but the isolation cost is pretty marginal in comparison to the overall cost so it seemed overkill. The ideal way to measure automatic placement benefit would be to count the number of remote accesses versus local accesses and do something like benefit = (remote_accesses_before - remove_access_after) * Wnuma but the information is not readily available. As a workload converges, the expection would be that the number of remote numa hints would reduce to 0. convergence = numa_hint_faults_local / numa_hint_faults where this is measured for the last N number of numa hints recorded. When the workload is fully converged the value is 1. This can measure if the placement policy is converging and how fast it is doing it. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com>
2012-11-02 14:52:48 +00:00
NUMA_HINT_FAULTS,
NUMA_HINT_FAULTS_LOCAL,
NUMA_PAGE_MIGRATE,
#endif
#ifdef CONFIG_MIGRATION
PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
#endif
#ifdef CONFIG_COMPACTION
mm: compaction: Add scanned and isolated counters for compaction Compaction already has tracepoints to count scanned and isolated pages but it requires that ftrace be enabled and if that information has to be written to disk then it can be disruptive. This patch adds vmstat counters for compaction called compact_migrate_scanned, compact_free_scanned and compact_isolated. With these counters, it is possible to define a basic cost model for compaction. This approximates of how much work compaction is doing and can be compared that with an oprofile showing TLB misses and see if the cost of compaction is being offset by THP for example. Minimally a compaction patch can be evaluated in terms of whether it increases or decreases cost. The basic cost model looks like this Fundamental unit u: a word sizeof(void *) Ca = cost of struct page access = sizeof(struct page) / u Cmc = Cost migrate page copy = (Ca + PAGE_SIZE/u) * 2 Cmf = Cost migrate failure = Ca * 2 Ci = Cost page isolation = (Ca + Wi) where Wi is a constant that should reflect the approximate cost of the locking operation. Csm = Cost migrate scanning = Ca Csf = Cost free scanning = Ca Overall cost = (Csm * compact_migrate_scanned) + (Csf * compact_free_scanned) + (Ci * compact_isolated) + (Cmc * pgmigrate_success) + (Cmf * pgmigrate_failed) Where the values are read from /proc/vmstat. This is very basic and ignores certain costs such as the allocation cost to do a migrate page copy but any improvement to the model would still use the same vmstat counters. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com>
2012-10-19 12:00:10 +01:00
COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
COMPACTISOLATED,
COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
mm, compaction: introduce kcompactd Memory compaction can be currently performed in several contexts: - kswapd balancing a zone after a high-order allocation failure - direct compaction to satisfy a high-order allocation, including THP page fault attemps - khugepaged trying to collapse a hugepage - manually from /proc The purpose of compaction is two-fold. The obvious purpose is to satisfy a (pending or future) high-order allocation, and is easy to evaluate. The other purpose is to keep overal memory fragmentation low and help the anti-fragmentation mechanism. The success wrt the latter purpose is more The current situation wrt the purposes has a few drawbacks: - compaction is invoked only when a high-order page or hugepage is not available (or manually). This might be too late for the purposes of keeping memory fragmentation low. - direct compaction increases latency of allocations. Again, it would be better if compaction was performed asynchronously to keep fragmentation low, before the allocation itself comes. - (a special case of the previous) the cost of compaction during THP page faults can easily offset the benefits of THP. - kswapd compaction appears to be complex, fragile and not working in some scenarios. It could also end up compacting for a high-order allocation request when it should be reclaiming memory for a later order-0 request. To improve the situation, we should be able to benefit from an equivalent of kswapd, but for compaction - i.e. a background thread which responds to fragmentation and the need for high-order allocations (including hugepages) somewhat proactively. One possibility is to extend the responsibilities of kswapd, which could however complicate its design too much. It should be better to let kswapd handle reclaim, as order-0 allocations are often more critical than high-order ones. Another possibility is to extend khugepaged, but this kthread is a single instance and tied to THP configs. This patch goes with the option of a new set of per-node kthreads called kcompactd, and lays the foundations, without introducing any new tunables. The lifecycle mimics kswapd kthreads, including the memory hotplug hooks. For compaction, kcompactd uses the standard compaction_suitable() and ompact_finished() criteria and the deferred compaction functionality. Unlike direct compaction, it uses only sync compaction, as there's no allocation latency to minimize. This patch doesn't yet add a call to wakeup_kcompactd. The kswapd compact/reclaim loop for high-order pages will be replaced by waking up kcompactd in the next patch with the description of what's wrong with the old approach. Waking up of the kcompactd threads is also tied to kswapd activity and follows these rules: - we don't want to affect any fastpaths, so wake up kcompactd only from the slowpath, as it's done for kswapd - if kswapd is doing reclaim, it's more important than compaction, so don't invoke kcompactd until kswapd goes to sleep - the target order used for kswapd is passed to kcompactd Future possible future uses for kcompactd include the ability to wake up kcompactd on demand in special situations, such as when hugepages are not available (currently not done due to __GFP_NO_KSWAPD) or when a fragmentation event (i.e. __rmqueue_fallback()) occurs. It's also possible to perform periodic compaction with kcompactd. [arnd@arndb.de: fix build errors with kcompactd] [paul.gortmaker@windriver.com: don't use modular references for non modular code] Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Rik van Riel <riel@redhat.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: David Rientjes <rientjes@google.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Git-commit: 698b1b30642f1ff0ea10ef1de9745ab633031377 Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Change-Id: I987ae548cba936987b8479dc02de67d0f88b9cb6 Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
2016-03-17 14:18:08 -07:00
KCOMPACTD_WAKE,
#endif
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
#endif
UNEVICTABLE_PGCULLED, /* culled to noreclaim list */
UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */
UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */
UNEVICTABLE_PGMLOCKED,
UNEVICTABLE_PGMUNLOCKED,
UNEVICTABLE_PGCLEARED, /* on COW, page truncate */
UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
THP_FAULT_ALLOC,
THP_FAULT_FALLBACK,
THP_COLLAPSE_ALLOC,
THP_COLLAPSE_ALLOC_FAILED,
THP_SPLIT,
THP_ZERO_PAGE_ALLOC,
THP_ZERO_PAGE_ALLOC_FAILED,
#endif
#ifdef CONFIG_MEMORY_BALLOON
BALLOON_INFLATE,
BALLOON_DEFLATE,
#ifdef CONFIG_BALLOON_COMPACTION
BALLOON_MIGRATE,
#endif
#endif
#ifdef CONFIG_DEBUG_TLBFLUSH
NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */
NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
NR_TLB_LOCAL_FLUSH_ALL,
NR_TLB_LOCAL_FLUSH_ONE,
#endif /* CONFIG_DEBUG_TLBFLUSH */
#ifdef CONFIG_DEBUG_VM_VMACACHE
VMACACHE_FIND_CALLS,
VMACACHE_FIND_HITS,
#endif
NR_VM_EVENT_ITEMS
};
#endif /* VM_EVENT_ITEM_H_INCLUDED */