mm: zbud: fix the locking scenarios with zcache

With zcache using zbud, strange locking scenarios are
observed. The first problem seen is:

Core 2 waiting on mapping->tree_lock which is taken by core 6
do_raw_spin_lock

raw_spin_lock_irq
atomic_cmpxchg
page_freeze_refs
__remove_mapping
shrink_page_list

Core 6 after taking mapping->tree_lock is waiting on zbud pool lock
which is held by core 5
zbud_alloc
zcache_store_page
__cleancache_put_page
cleancache_put_page
__delete_from_page_cache
spin_unlock_irq
__remove_mapping
shrink_page_list
shrink_inactive_list

Core 5 after taking zbud pool lock from zbud_free received an IRQ, and
after IRQ exit, softirqs were scheduled and end_page_writeback tried to
lock on mapping->tree_lock which is already held by Core 6. Deadlock.
do_raw_spin_lock
raw_spin_lock_irqsave
test_clear_page_writeba
end_page_writeback
ext4_finish_bio
ext4_end_bio
bio_endio
blk_update_request
end_clone_bio
bio_endio
blk_update_request
blk_update_bidi_request
blk_end_bidi_request
blk_end_request
mmc_blk_cmdq_complete_r
mmc_cmdq_softirq_done
blk_done_softirq
static_key_count
static_key_false
trace_softirq_exit
__do_softirq()
tick_irq_exit
irq_exit()
set_irq_regs
__handle_domain_irq
gic_handle_irq
el1_irq
exception
__list_del_entry
list_del
zbud_free
zcache_load_page
__cleancache_get_page(?

This shows that allowing softirqs while holding zbud pool lock
can result in deadlocks. To fix this, 'commit 6a1fdaa36272
("mm: zbud: prevent softirq during zbud alloc, free and reclaim")'
decided to take spin_lock_bh during zbud_free, zbud_alloc and
zbud_reclaim. But this resulted in another deadlock.

spin_bug()
do_raw_spin_lock()
_raw_spin_lock_irqsave()
test_clear_page_writeback()
end_page_writeback()
ext4_finish_bio()
ext4_end_bio()
bio_endio()
blk_update_request()
end_clone_bio()
bio_endio()
blk_update_request()
blk_update_bidi_request()
blk_end_request()
mmc_blk_cmdq_complete_rq()
mmc_cmdq_softirq_done()
blk_done_softirq()
__do_softirq()
do_softirq()
__local_bh_enable_ip()
_raw_spin_unlock_bh()
zbud_alloc()
zcache_store_page()
__cleancache_put_page()
__delete_from_page_cache()
__remove_mapping()
shrink_page_list()

Here, the spin_unlock_bh resulted in explicit invocation of do_sofirq,
which resulted in the acquisition of mapping->tree_lock which was already
taken by __remove_mapping.

The new fix considers the following facts.
1) zcache_store_page is always be called from __delete_from_page_cache
with mapping->tree_lock held and interrupts disabled. Thus zbud_alloc
which is called only from zcache_store_page is always called with
interrupts disabled.
2) zbud_free and zbud_reclaim_page can be called from zcache with or
without interrupts disabled. So an interrupt while holding zbud pool
lock can result in do_softirq and acquisition of mapping->tree_lock.

(1) implies zbud_alloc need not explicitly disable bh. But disable
interrupts to make sure zbud_alloc is safe with zcache, irrespective
of future changes. This will fix the second scenario.
(2) implies zbud_free and zbud_reclaim_page should use spin_lock_irqsave,
so that interrupts will not be triggered and inturn softirqs.
spin_lock_bh can't be used because a spin_unlock_bh can triger a softirq
even in interrupt context. This will fix the first scenario.

Change-Id: Ibc810525dddf97614db41643642fec7472bd6a2c
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
This commit is contained in:
Vinayak Menon 2016-06-28 09:48:42 +05:30
parent e97b6a0e02
commit 47b41f43ee

View file

@ -357,6 +357,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
struct zbud_header *zhdr = NULL;
enum buddy bud;
struct page *page;
unsigned long flags;
int found = 0;
if (!size || (gfp & __GFP_HIGHMEM))
@ -364,7 +365,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
return -ENOSPC;
chunks = size_to_chunks(size);
spin_lock_bh(&pool->lock);
spin_lock_irqsave(&pool->lock, flags);
/* First, try to find an unbuddied zbud page. */
zhdr = NULL;
@ -383,11 +384,11 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
}
/* Couldn't find unbuddied zbud page, create new one */
spin_unlock_bh(&pool->lock);
spin_unlock_irqrestore(&pool->lock, flags);
page = alloc_page(gfp);
if (!page)
return -ENOMEM;
spin_lock_bh(&pool->lock);
spin_lock_irqsave(&pool->lock, flags);
pool->pages_nr++;
zhdr = init_zbud_page(page);
bud = FIRST;
@ -415,7 +416,7 @@ found:
*handle = encode_handle(zhdr, bud);
if ((gfp & __GFP_ZERO) && found)
memset((void *)*handle, 0, size);
spin_unlock_bh(&pool->lock);
spin_unlock_irqrestore(&pool->lock, flags);
return 0;
}
@ -434,8 +435,9 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
{
struct zbud_header *zhdr;
int freechunks;
unsigned long flags;
spin_lock_bh(&pool->lock);
spin_lock_irqsave(&pool->lock, flags);
zhdr = handle_to_zbud_header(handle);
/* If first buddy, handle will be page aligned */
@ -446,7 +448,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
if (zhdr->under_reclaim) {
/* zbud page is under reclaim, reclaim will free */
spin_unlock_bh(&pool->lock);
spin_unlock_irqrestore(&pool->lock, flags);
return;
}
@ -464,7 +466,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
}
spin_unlock_bh(&pool->lock);
spin_unlock_irqrestore(&pool->lock, flags);
}
#define list_tail_entry(ptr, type, member) \
@ -509,12 +511,13 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
{
int i, ret, freechunks;
struct zbud_header *zhdr;
unsigned long flags;
unsigned long first_handle = 0, last_handle = 0;
spin_lock_bh(&pool->lock);
spin_lock_irqsave(&pool->lock, flags);
if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
retries == 0) {
spin_unlock_bh(&pool->lock);
spin_unlock_irqrestore(&pool->lock, flags);
return -EINVAL;
}
for (i = 0; i < retries; i++) {
@ -533,7 +536,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
first_handle = encode_handle(zhdr, FIRST);
if (zhdr->last_chunks)
last_handle = encode_handle(zhdr, LAST);
spin_unlock_bh(&pool->lock);
spin_unlock_irqrestore(&pool->lock, flags);
/* Issue the eviction callback(s) */
if (first_handle) {
@ -547,7 +550,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
goto next;
}
next:
spin_lock_bh(&pool->lock);
spin_lock_irqsave(&pool->lock, flags);
zhdr->under_reclaim = false;
if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
/*
@ -556,7 +559,7 @@ next:
*/
free_zbud_page(zhdr);
pool->pages_nr--;
spin_unlock_bh(&pool->lock);
spin_unlock_irqrestore(&pool->lock, flags);
return 0;
} else if (zhdr->first_chunks == 0 ||
zhdr->last_chunks == 0) {
@ -571,7 +574,7 @@ next:
/* add to beginning of LRU */
list_add(&zhdr->lru, &pool->lru);
}
spin_unlock_bh(&pool->lock);
spin_unlock_irqrestore(&pool->lock, flags);
return -EAGAIN;
}