diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index fa27a6c2abb6..882779c07874 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -23,12 +23,65 @@
 #include <asm/hardirq.h>
 #include <asm/nmi.h>
 #include <asm/hw_irq.h>
+#include <mach_ipi.h>
 
 #define MAX_NOTE_BYTES 1024
 typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
 
 note_buf_t crash_notes[NR_CPUS];
 
+#ifdef CONFIG_SMP
+static atomic_t waiting_for_crash_ipi;
+
+static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+{
+	local_irq_disable();
+	atomic_dec(&waiting_for_crash_ipi);
+	/* Assume hlt works */
+	__asm__("hlt");
+	for(;;);
+	return 1;
+}
+
+/*
+ * By using the NMI code instead of a vector we just sneak thru the
+ * word generator coming out with just what we want.  AND it does
+ * not matter if clustered_apic_mode is set or not.
+ */
+static void smp_send_nmi_allbutself(void)
+{
+	send_IPI_allbutself(APIC_DM_NMI);
+}
+
+static void nmi_shootdown_cpus(void)
+{
+	unsigned long msecs;
+	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+
+	/* Would it be better to replace the trap vector here? */
+	set_nmi_callback(crash_nmi_callback);
+	/* Ensure the new callback function is set before sending
+	 * out the NMI
+	 */
+	wmb();
+
+	smp_send_nmi_allbutself();
+
+	msecs = 1000; /* Wait at most a second for the other cpus to stop */
+	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+		mdelay(1);
+		msecs--;
+	}
+
+	/* Leave the nmi callback set */
+}
+#else
+static void nmi_shootdown_cpus(void)
+{
+	/* There are no cpus to shootdown */
+}
+#endif
+
 void machine_crash_shutdown(void)
 {
 	/* This function is only called after the system
@@ -39,4 +92,7 @@ void machine_crash_shutdown(void)
 	 * In practice this means shooting down the other cpus in
 	 * an SMP system.
 	 */
+	/* The kernel is broken so disable interrupts */
+	local_irq_disable();
+	nmi_shootdown_cpus();
 }