diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index b8624d53c379..228cf0b295db 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -124,7 +124,7 @@ struct _lowcore {
 	/* Address space pointer. */
 	__u32	kernel_asce;			/* 0x02ac */
 	__u32	user_asce;			/* 0x02b0 */
-	__u8	pad_0x02b4[0x02b8-0x02b4];	/* 0x02b4 */
+	__u32	current_pid;			/* 0x02b4 */
 
 	/* SMP info area */
 	__u32	cpu_nr;				/* 0x02b8 */
@@ -255,7 +255,7 @@ struct _lowcore {
 	/* Address space pointer. */
 	__u64	kernel_asce;			/* 0x0310 */
 	__u64	user_asce;			/* 0x0318 */
-	__u8	pad_0x0320[0x0328-0x0320];	/* 0x0320 */
+	__u64	current_pid;			/* 0x0320 */
 
 	/* SMP info area */
 	__u32	cpu_nr;				/* 0x0328 */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 2c79b6416271..1300c3025334 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -84,6 +84,7 @@ struct thread_struct {
 	struct per_event per_event;	/* Cause of the last PER trap */
         /* pfault_wait is used to block the process on a pfault event */
 	unsigned long pfault_wait;
+	struct list_head list;
 };
 
 typedef struct thread_struct thread_struct;
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index ef4555611013..edfbd17d7082 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -124,6 +124,7 @@ int main(void)
 	DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer));
 	DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock));
 	DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task));
+	DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid));
 	DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info));
 	DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
 	DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 1b67fc6ebdc2..0476174dfff5 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -212,6 +212,7 @@ __switch_to:
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
 	lm	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	st	%r3,__LC_CURRENT		# store task struct of next
+	mvc	__LC_CURRENT_PID(4,%r0),__TASK_pid(%r3)	# store pid of next
 	st	%r5,__LC_THREAD_INFO		# store thread info of next
 	ahi	%r5,STACK_SIZE			# end of kernel stack of next
 	st	%r5,__LC_KERNEL_STACK		# store end of kernel stack
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9fd864563499..d61967e2eab0 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -220,6 +220,7 @@ __switch_to:
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	stg	%r3,__LC_CURRENT		# store task struct of next
+	mvc	__LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
 	stg	%r5,__LC_THREAD_INFO		# store thread info of next
 	aghi	%r5,STACK_SIZE			# end of kernel stack of next
 	stg	%r5,__LC_KERNEL_STACK		# store end of kernel stack
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 177745c520ca..1ca656478326 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -466,7 +466,7 @@ typedef struct {
 int pfault_init(void)
 {
 	pfault_refbk_t refbk =
-		{ 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48,
+		{ 0x258, 0, 5, 2, __LC_CURRENT_PID, 1ULL << 48, 1ULL << 48,
 		  __PF_RES_FIELD };
         int rc;
 
@@ -498,11 +498,15 @@ void pfault_fini(void)
 		: : "a" (&refbk), "m" (refbk) : "cc");
 }
 
+static DEFINE_SPINLOCK(pfault_lock);
+static LIST_HEAD(pfault_list);
+
 static void pfault_interrupt(unsigned int ext_int_code,
 			     unsigned int param32, unsigned long param64)
 {
 	struct task_struct *tsk;
 	__u16 subcode;
+	pid_t pid;
 
 	/*
 	 * Get the external interruption subcode & pfault
@@ -514,44 +518,79 @@ static void pfault_interrupt(unsigned int ext_int_code,
 	if ((subcode & 0xff00) != __SUBCODE_MASK)
 		return;
 	kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
-
-	/*
-	 * Get the token (= address of the task structure of the affected task).
-	 */
-#ifdef CONFIG_64BIT
-	tsk = (struct task_struct *) param64;
-#else
-	tsk = (struct task_struct *) param32;
-#endif
-
+	if (subcode & 0x0080) {
+		/* Get the token (= pid of the affected task). */
+		pid = sizeof(void *) == 4 ? param32 : param64;
+		rcu_read_lock();
+		tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+		if (tsk)
+			get_task_struct(tsk);
+		rcu_read_unlock();
+		if (!tsk)
+			return;
+	} else {
+		tsk = current;
+	}
+	spin_lock(&pfault_lock);
 	if (subcode & 0x0080) {
 		/* signal bit is set -> a page has been swapped in by VM */
-		if (xchg(&tsk->thread.pfault_wait, -1) != 0) {
+		if (tsk->thread.pfault_wait == 1) {
 			/* Initial interrupt was faster than the completion
 			 * interrupt. pfault_wait is valid. Set pfault_wait
 			 * back to zero and wake up the process. This can
 			 * safely be done because the task is still sleeping
 			 * and can't produce new pfaults. */
 			tsk->thread.pfault_wait = 0;
+			list_del(&tsk->thread.list);
 			wake_up_process(tsk);
-			put_task_struct(tsk);
+		} else {
+			/* Completion interrupt was faster than initial
+			 * interrupt. Set pfault_wait to -1 so the initial
+			 * interrupt doesn't put the task to sleep. */
+			tsk->thread.pfault_wait = -1;
 		}
+		put_task_struct(tsk);
 	} else {
 		/* signal bit not set -> a real page is missing. */
-		get_task_struct(tsk);
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-		if (xchg(&tsk->thread.pfault_wait, 1) != 0) {
+		if (tsk->thread.pfault_wait == -1) {
 			/* Completion interrupt was faster than the initial
-			 * interrupt (swapped in a -1 for pfault_wait). Set
-			 * pfault_wait back to zero and exit. This can be
-			 * done safely because tsk is running in kernel 
-			 * mode and can't produce new pfaults. */
+			 * interrupt (pfault_wait == -1). Set pfault_wait
+			 * back to zero and exit. */
 			tsk->thread.pfault_wait = 0;
-			set_task_state(tsk, TASK_RUNNING);
-			put_task_struct(tsk);
-		} else
+		} else {
+			/* Initial interrupt arrived before completion
+			 * interrupt. Let the task sleep. */
+			tsk->thread.pfault_wait = 1;
+			list_add(&tsk->thread.list, &pfault_list);
+			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 			set_tsk_need_resched(tsk);
+		}
 	}
+	spin_unlock(&pfault_lock);
+}
+
+static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
+				       unsigned long action, void *hcpu)
+{
+	struct thread_struct *thread, *next;
+	struct task_struct *tsk;
+
+	switch (action) {
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		spin_lock_irq(&pfault_lock);
+		list_for_each_entry_safe(thread, next, &pfault_list, list) {
+			thread->pfault_wait = 0;
+			list_del(&thread->list);
+			tsk = container_of(thread, struct task_struct, thread);
+			wake_up_process(tsk);
+		}
+		spin_unlock_irq(&pfault_lock);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
 }
 
 static int __init pfault_irq_init(void)
@@ -568,8 +607,10 @@ static int __init pfault_irq_init(void)
 		pfault_disable = 1;
 		return rc;
 	}
-	if (pfault_init() == 0)
+	if (pfault_init() == 0) {
+		hotcpu_notifier(pfault_cpu_notify, 0);
 		return 0;
+	}
 
 	/* Tough luck, no pfault. */
 	pfault_disable = 1;