mm: zbud: fix the locking scenarios with zcache
With zcache using zbud, strange locking scenarios are observed. The first problem seen is: Core 2 waiting on mapping->tree_lock which is taken by core 6 do_raw_spin_lock raw_spin_lock_irq atomic_cmpxchg page_freeze_refs __remove_mapping shrink_page_list Core 6 after taking mapping->tree_lock is waiting on zbud pool lock which is held by core 5 zbud_alloc zcache_store_page __cleancache_put_page cleancache_put_page __delete_from_page_cache spin_unlock_irq __remove_mapping shrink_page_list shrink_inactive_list Core 5 after taking zbud pool lock from zbud_free received an IRQ, and after IRQ exit, softirqs were scheduled and end_page_writeback tried to lock on mapping->tree_lock which is already held by Core 6. Deadlock. do_raw_spin_lock raw_spin_lock_irqsave test_clear_page_writeba end_page_writeback ext4_finish_bio ext4_end_bio bio_endio blk_update_request end_clone_bio bio_endio blk_update_request blk_update_bidi_request blk_end_bidi_request blk_end_request mmc_blk_cmdq_complete_r mmc_cmdq_softirq_done blk_done_softirq static_key_count static_key_false trace_softirq_exit __do_softirq() tick_irq_exit irq_exit() set_irq_regs __handle_domain_irq gic_handle_irq el1_irq exception __list_del_entry list_del zbud_free zcache_load_page __cleancache_get_page(? This shows that allowing softirqs while holding zbud pool lock can result in deadlocks. To fix this, 'commit 6a1fdaa36272 ("mm: zbud: prevent softirq during zbud alloc, free and reclaim")' decided to take spin_lock_bh during zbud_free, zbud_alloc and zbud_reclaim. But this resulted in another deadlock. spin_bug() do_raw_spin_lock() _raw_spin_lock_irqsave() test_clear_page_writeback() end_page_writeback() ext4_finish_bio() ext4_end_bio() bio_endio() blk_update_request() end_clone_bio() bio_endio() blk_update_request() blk_update_bidi_request() blk_end_request() mmc_blk_cmdq_complete_rq() mmc_cmdq_softirq_done() blk_done_softirq() __do_softirq() do_softirq() __local_bh_enable_ip() _raw_spin_unlock_bh() zbud_alloc() zcache_store_page() __cleancache_put_page() __delete_from_page_cache() __remove_mapping() shrink_page_list() Here, the spin_unlock_bh resulted in explicit invocation of do_sofirq, which resulted in the acquisition of mapping->tree_lock which was already taken by __remove_mapping. The new fix considers the following facts. 1) zcache_store_page is always be called from __delete_from_page_cache with mapping->tree_lock held and interrupts disabled. Thus zbud_alloc which is called only from zcache_store_page is always called with interrupts disabled. 2) zbud_free and zbud_reclaim_page can be called from zcache with or without interrupts disabled. So an interrupt while holding zbud pool lock can result in do_softirq and acquisition of mapping->tree_lock. (1) implies zbud_alloc need not explicitly disable bh. But disable interrupts to make sure zbud_alloc is safe with zcache, irrespective of future changes. This will fix the second scenario. (2) implies zbud_free and zbud_reclaim_page should use spin_lock_irqsave, so that interrupts will not be triggered and inturn softirqs. spin_lock_bh can't be used because a spin_unlock_bh can triger a softirq even in interrupt context. This will fix the first scenario. Change-Id: Ibc810525dddf97614db41643642fec7472bd6a2c Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
This commit is contained in:
parent
e97b6a0e02
commit
47b41f43ee
1 changed files with 16 additions and 13 deletions
29
mm/zbud.c
29
mm/zbud.c
|
@ -357,6 +357,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
|
|||
struct zbud_header *zhdr = NULL;
|
||||
enum buddy bud;
|
||||
struct page *page;
|
||||
unsigned long flags;
|
||||
int found = 0;
|
||||
|
||||
if (!size || (gfp & __GFP_HIGHMEM))
|
||||
|
@ -364,7 +365,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
|
|||
if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
|
||||
return -ENOSPC;
|
||||
chunks = size_to_chunks(size);
|
||||
spin_lock_bh(&pool->lock);
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
|
||||
/* First, try to find an unbuddied zbud page. */
|
||||
zhdr = NULL;
|
||||
|
@ -383,11 +384,11 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
|
|||
}
|
||||
|
||||
/* Couldn't find unbuddied zbud page, create new one */
|
||||
spin_unlock_bh(&pool->lock);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
page = alloc_page(gfp);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
spin_lock_bh(&pool->lock);
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
pool->pages_nr++;
|
||||
zhdr = init_zbud_page(page);
|
||||
bud = FIRST;
|
||||
|
@ -415,7 +416,7 @@ found:
|
|||
*handle = encode_handle(zhdr, bud);
|
||||
if ((gfp & __GFP_ZERO) && found)
|
||||
memset((void *)*handle, 0, size);
|
||||
spin_unlock_bh(&pool->lock);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -434,8 +435,9 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
|
|||
{
|
||||
struct zbud_header *zhdr;
|
||||
int freechunks;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_bh(&pool->lock);
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
zhdr = handle_to_zbud_header(handle);
|
||||
|
||||
/* If first buddy, handle will be page aligned */
|
||||
|
@ -446,7 +448,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
|
|||
|
||||
if (zhdr->under_reclaim) {
|
||||
/* zbud page is under reclaim, reclaim will free */
|
||||
spin_unlock_bh(&pool->lock);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -464,7 +466,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
|
|||
list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
|
||||
}
|
||||
|
||||
spin_unlock_bh(&pool->lock);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
}
|
||||
|
||||
#define list_tail_entry(ptr, type, member) \
|
||||
|
@ -509,12 +511,13 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
|
|||
{
|
||||
int i, ret, freechunks;
|
||||
struct zbud_header *zhdr;
|
||||
unsigned long flags;
|
||||
unsigned long first_handle = 0, last_handle = 0;
|
||||
|
||||
spin_lock_bh(&pool->lock);
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
|
||||
retries == 0) {
|
||||
spin_unlock_bh(&pool->lock);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
return -EINVAL;
|
||||
}
|
||||
for (i = 0; i < retries; i++) {
|
||||
|
@ -533,7 +536,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
|
|||
first_handle = encode_handle(zhdr, FIRST);
|
||||
if (zhdr->last_chunks)
|
||||
last_handle = encode_handle(zhdr, LAST);
|
||||
spin_unlock_bh(&pool->lock);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
|
||||
/* Issue the eviction callback(s) */
|
||||
if (first_handle) {
|
||||
|
@ -547,7 +550,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
|
|||
goto next;
|
||||
}
|
||||
next:
|
||||
spin_lock_bh(&pool->lock);
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
zhdr->under_reclaim = false;
|
||||
if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
|
||||
/*
|
||||
|
@ -556,7 +559,7 @@ next:
|
|||
*/
|
||||
free_zbud_page(zhdr);
|
||||
pool->pages_nr--;
|
||||
spin_unlock_bh(&pool->lock);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
return 0;
|
||||
} else if (zhdr->first_chunks == 0 ||
|
||||
zhdr->last_chunks == 0) {
|
||||
|
@ -571,7 +574,7 @@ next:
|
|||
/* add to beginning of LRU */
|
||||
list_add(&zhdr->lru, &pool->lru);
|
||||
}
|
||||
spin_unlock_bh(&pool->lock);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue