futex: Make lookup_pi_state more robust
The current implementation of lookup_pi_state has ambigous handling of the TID value 0 in the user space futex. We can get into the kernel even if the TID value is 0, because either there is a stale waiters bit or the owner died bit is set or we are called from the requeue_pi path or from user space just for fun. The current code avoids an explicit sanity check for pid = 0 in case that kernel internal state (waiters) are found for the user space address. This can lead to state leakage and worse under some circumstances. Handle the cases explicit: Waiter | pi_state | pi->owner | uTID | uODIED | ? [1] NULL | --- | --- | 0 | 0/1 | Valid [2] NULL | --- | --- | >0 | 0/1 | Valid [3] Found | NULL | -- | Any | 0/1 | Invalid [4] Found | Found | NULL | 0 | 1 | Valid [5] Found | Found | NULL | >0 | 1 | Invalid [6] Found | Found | task | 0 | 1 | Valid [7] Found | Found | NULL | Any | 0 | Invalid [8] Found | Found | task | ==taskTID | 0/1 | Valid [9] Found | Found | task | 0 | 0 | Invalid [10] Found | Found | task | !=taskTID | 0/1 | Invalid [1] Indicates that the kernel can acquire the futex atomically. We came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. [2] Valid, if TID does not belong to a kernel thread. If no matching thread is found then it indicates that the owner TID has died. [3] Invalid. The waiter is queued on a non PI futex [4] Valid state after exit_robust_list(), which sets the user space value to FUTEX_WAITERS | FUTEX_OWNER_DIED. [5] The user space value got manipulated between exit_robust_list() and exit_pi_state_list() [6] Valid state after exit_pi_state_list() which sets the new owner in the pi_state but cannot access the user space value. [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. [8] Owner and user space value match [9] There is no transient state which sets the user space TID to 0 except exit_robust_list(), but this is indicated by the FUTEX_OWNER_DIED bit. See [4] [10] There is no transient state which leaves owner and user space TID out of sync. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Kees Cook <keescook@chromium.org> Cc: Will Drewry <wad@chromium.org> Cc: Darren Hart <dvhart@linux.intel.com> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
13fbca4c6e
commit
54a217887a
1 changed files with 106 additions and 28 deletions
134
kernel/futex.c
134
kernel/futex.c
|
@ -743,10 +743,58 @@ void exit_pi_state_list(struct task_struct *curr)
|
||||||
raw_spin_unlock_irq(&curr->pi_lock);
|
raw_spin_unlock_irq(&curr->pi_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to check the following states:
|
||||||
|
*
|
||||||
|
* Waiter | pi_state | pi->owner | uTID | uODIED | ?
|
||||||
|
*
|
||||||
|
* [1] NULL | --- | --- | 0 | 0/1 | Valid
|
||||||
|
* [2] NULL | --- | --- | >0 | 0/1 | Valid
|
||||||
|
*
|
||||||
|
* [3] Found | NULL | -- | Any | 0/1 | Invalid
|
||||||
|
*
|
||||||
|
* [4] Found | Found | NULL | 0 | 1 | Valid
|
||||||
|
* [5] Found | Found | NULL | >0 | 1 | Invalid
|
||||||
|
*
|
||||||
|
* [6] Found | Found | task | 0 | 1 | Valid
|
||||||
|
*
|
||||||
|
* [7] Found | Found | NULL | Any | 0 | Invalid
|
||||||
|
*
|
||||||
|
* [8] Found | Found | task | ==taskTID | 0/1 | Valid
|
||||||
|
* [9] Found | Found | task | 0 | 0 | Invalid
|
||||||
|
* [10] Found | Found | task | !=taskTID | 0/1 | Invalid
|
||||||
|
*
|
||||||
|
* [1] Indicates that the kernel can acquire the futex atomically. We
|
||||||
|
* came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
|
||||||
|
*
|
||||||
|
* [2] Valid, if TID does not belong to a kernel thread. If no matching
|
||||||
|
* thread is found then it indicates that the owner TID has died.
|
||||||
|
*
|
||||||
|
* [3] Invalid. The waiter is queued on a non PI futex
|
||||||
|
*
|
||||||
|
* [4] Valid state after exit_robust_list(), which sets the user space
|
||||||
|
* value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
|
||||||
|
*
|
||||||
|
* [5] The user space value got manipulated between exit_robust_list()
|
||||||
|
* and exit_pi_state_list()
|
||||||
|
*
|
||||||
|
* [6] Valid state after exit_pi_state_list() which sets the new owner in
|
||||||
|
* the pi_state but cannot access the user space value.
|
||||||
|
*
|
||||||
|
* [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
|
||||||
|
*
|
||||||
|
* [8] Owner and user space value match
|
||||||
|
*
|
||||||
|
* [9] There is no transient state which sets the user space TID to 0
|
||||||
|
* except exit_robust_list(), but this is indicated by the
|
||||||
|
* FUTEX_OWNER_DIED bit. See [4]
|
||||||
|
*
|
||||||
|
* [10] There is no transient state which leaves owner and user space
|
||||||
|
* TID out of sync.
|
||||||
|
*/
|
||||||
static int
|
static int
|
||||||
lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
||||||
union futex_key *key, struct futex_pi_state **ps,
|
union futex_key *key, struct futex_pi_state **ps)
|
||||||
struct task_struct *task)
|
|
||||||
{
|
{
|
||||||
struct futex_pi_state *pi_state = NULL;
|
struct futex_pi_state *pi_state = NULL;
|
||||||
struct futex_q *this, *next;
|
struct futex_q *this, *next;
|
||||||
|
@ -756,12 +804,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
||||||
plist_for_each_entry_safe(this, next, &hb->chain, list) {
|
plist_for_each_entry_safe(this, next, &hb->chain, list) {
|
||||||
if (match_futex(&this->key, key)) {
|
if (match_futex(&this->key, key)) {
|
||||||
/*
|
/*
|
||||||
* Another waiter already exists - bump up
|
* Sanity check the waiter before increasing
|
||||||
* the refcount and return its pi_state:
|
* the refcount and attaching to it.
|
||||||
*/
|
*/
|
||||||
pi_state = this->pi_state;
|
pi_state = this->pi_state;
|
||||||
/*
|
/*
|
||||||
* Userspace might have messed up non-PI and PI futexes
|
* Userspace might have messed up non-PI and
|
||||||
|
* PI futexes [3]
|
||||||
*/
|
*/
|
||||||
if (unlikely(!pi_state))
|
if (unlikely(!pi_state))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -769,44 +818,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
||||||
WARN_ON(!atomic_read(&pi_state->refcount));
|
WARN_ON(!atomic_read(&pi_state->refcount));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When pi_state->owner is NULL then the owner died
|
* Handle the owner died case:
|
||||||
* and another waiter is on the fly. pi_state->owner
|
|
||||||
* is fixed up by the task which acquires
|
|
||||||
* pi_state->rt_mutex.
|
|
||||||
*
|
|
||||||
* We do not check for pid == 0 which can happen when
|
|
||||||
* the owner died and robust_list_exit() cleared the
|
|
||||||
* TID.
|
|
||||||
*/
|
*/
|
||||||
if (pid && pi_state->owner) {
|
if (uval & FUTEX_OWNER_DIED) {
|
||||||
/*
|
/*
|
||||||
* Bail out if user space manipulated the
|
* exit_pi_state_list sets owner to NULL and
|
||||||
* futex value.
|
* wakes the topmost waiter. The task which
|
||||||
|
* acquires the pi_state->rt_mutex will fixup
|
||||||
|
* owner.
|
||||||
*/
|
*/
|
||||||
if (pid != task_pid_vnr(pi_state->owner))
|
if (!pi_state->owner) {
|
||||||
|
/*
|
||||||
|
* No pi state owner, but the user
|
||||||
|
* space TID is not 0. Inconsistent
|
||||||
|
* state. [5]
|
||||||
|
*/
|
||||||
|
if (pid)
|
||||||
|
return -EINVAL;
|
||||||
|
/*
|
||||||
|
* Take a ref on the state and
|
||||||
|
* return. [4]
|
||||||
|
*/
|
||||||
|
goto out_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If TID is 0, then either the dying owner
|
||||||
|
* has not yet executed exit_pi_state_list()
|
||||||
|
* or some waiter acquired the rtmutex in the
|
||||||
|
* pi state, but did not yet fixup the TID in
|
||||||
|
* user space.
|
||||||
|
*
|
||||||
|
* Take a ref on the state and return. [6]
|
||||||
|
*/
|
||||||
|
if (!pid)
|
||||||
|
goto out_state;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* If the owner died bit is not set,
|
||||||
|
* then the pi_state must have an
|
||||||
|
* owner. [7]
|
||||||
|
*/
|
||||||
|
if (!pi_state->owner)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Protect against a corrupted uval. If uval
|
* Bail out if user space manipulated the
|
||||||
* is 0x80000000 then pid is 0 and the waiter
|
* futex value. If pi state exists then the
|
||||||
* bit is set. So the deadlock check in the
|
* owner TID must be the same as the user
|
||||||
* calling code has failed and we did not fall
|
* space TID. [9/10]
|
||||||
* into the check above due to !pid.
|
|
||||||
*/
|
*/
|
||||||
if (task && pi_state->owner == task)
|
if (pid != task_pid_vnr(pi_state->owner))
|
||||||
return -EDEADLK;
|
return -EINVAL;
|
||||||
|
|
||||||
|
out_state:
|
||||||
atomic_inc(&pi_state->refcount);
|
atomic_inc(&pi_state->refcount);
|
||||||
*ps = pi_state;
|
*ps = pi_state;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are the first waiter - try to look up the real owner and attach
|
* We are the first waiter - try to look up the real owner and attach
|
||||||
* the new pi_state to it, but bail out when TID = 0
|
* the new pi_state to it, but bail out when TID = 0 [1]
|
||||||
*/
|
*/
|
||||||
if (!pid)
|
if (!pid)
|
||||||
return -ESRCH;
|
return -ESRCH;
|
||||||
|
@ -839,6 +914,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* No existing pi state. First waiter. [2]
|
||||||
|
*/
|
||||||
pi_state = alloc_pi_state();
|
pi_state = alloc_pi_state();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -959,7 +1037,7 @@ retry:
|
||||||
* We dont have the lock. Look up the PI state (or create it if
|
* We dont have the lock. Look up the PI state (or create it if
|
||||||
* we are the first waiter):
|
* we are the first waiter):
|
||||||
*/
|
*/
|
||||||
ret = lookup_pi_state(uval, hb, key, ps, task);
|
ret = lookup_pi_state(uval, hb, key, ps);
|
||||||
|
|
||||||
if (unlikely(ret)) {
|
if (unlikely(ret)) {
|
||||||
switch (ret) {
|
switch (ret) {
|
||||||
|
@ -1565,7 +1643,7 @@ retry_private:
|
||||||
* rereading and handing potential crap to
|
* rereading and handing potential crap to
|
||||||
* lookup_pi_state.
|
* lookup_pi_state.
|
||||||
*/
|
*/
|
||||||
ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL);
|
ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (ret) {
|
switch (ret) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue