diff --git a/CREDITS b/CREDITS
index c62dcb3b7e26..2358846f06be 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1653,14 +1653,14 @@ S: Chapel Hill, North Carolina 27514-4818
S: USA
N: Dave Jones
-E: davej@codemonkey.org.uk
+E: davej@redhat.com
W: http://www.codemonkey.org.uk
-D: x86 errata/setup maintenance.
-D: AGPGART driver.
+D: Assorted VIA x86 support.
+D: 2.5 AGPGART overhaul.
D: CPUFREQ maintenance.
-D: Backport/Forwardport merge monkey.
-D: Various Janitor work.
-S: United Kingdom
+D: Fedora kernel maintainence.
+D: Misc/Other.
+S: 314 Littleton Rd, Westford, MA 01886, USA
N: Martin Josfsson
E: gandalf@wlug.westbo.se
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index 4c63e5864160..ae15d55350ec 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = {
- Function names as strings (__FUNCTION__).
+ Function names as strings (__func__).
diff --git a/Documentation/MSI-HOWTO.txt b/Documentation/MSI-HOWTO.txt
index a51f693c1541..256defd7e174 100644
--- a/Documentation/MSI-HOWTO.txt
+++ b/Documentation/MSI-HOWTO.txt
@@ -236,10 +236,8 @@ software system can set different pages for controlling accesses to the
MSI-X structure. The implementation of MSI support requires the PCI
subsystem, not a device driver, to maintain full control of the MSI-X
table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
-table/MSI-X PBA. A device driver is prohibited from requesting the MMIO
-address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
-will fail enabling MSI-X on its hardware device when it calls the function
-pci_enable_msix().
+table/MSI-X PBA. A device driver should not access the MMIO address
+space of the MSI-X table/MSI-X PBA.
5.3.2 API pci_enable_msix
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
index 8d4dc6250c58..fd4907a2968c 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -163,6 +163,10 @@ need pass only as many optional fields as necessary:
o class and classmask fields default to 0
o driver_data defaults to 0UL.
+Note that driver_data must match the value used by any of the pci_device_id
+entries defined in the driver. This makes the driver_data field mandatory
+if all the pci_device_id entries have a non-zero driver_data value.
+
Once added, the driver probe routine will be invoked for any unclaimed
PCI devices listed in its (newly updated) pci_ids list.
diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
index 16c251230c82..ddeb14beacc8 100644
--- a/Documentation/PCI/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -203,22 +203,17 @@ to mmio_enabled.
3.3 helper functions
-3.3.1 int pci_find_aer_capability(struct pci_dev *dev);
-pci_find_aer_capability locates the PCI Express AER capability
-in the device configuration space. If the device doesn't support
-PCI-Express AER, the function returns 0.
-
-3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+3.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
pci_enable_pcie_error_reporting enables the device to send error
messages to root port when an error is detected. Note that devices
don't enable the error reporting by default, so device drivers need
call this function to enable it.
-3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+3.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
pci_disable_pcie_error_reporting disables the device to send error
messages to root port when an error is detected.
-3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+3.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
error status register.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0f1544f67400..53ba7c7d82b3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -101,6 +101,7 @@ parameter is applicable:
X86-64 X86-64 architecture is enabled.
More X86-64 boot options can be found in
Documentation/x86_64/boot-options.txt .
+ X86 Either 32bit or 64bit x86 (same as X86-32+X86-64)
In addition, the following text indicates that the option:
@@ -1588,7 +1589,7 @@ and is between 256 and 4096 characters. It is defined in the file
See also Documentation/paride.txt.
pci=option[,option...] [PCI] various PCI subsystem options:
- off [X86-32] don't probe for the PCI bus
+ off [X86] don't probe for the PCI bus
bios [X86-32] force use of PCI BIOS, don't access
the hardware directly. Use this if your machine
has a non-standard PCI host bridge.
@@ -1596,9 +1597,9 @@ and is between 256 and 4096 characters. It is defined in the file
hardware access methods are allowed. Use this
if you experience crashes upon bootup and you
suspect they are caused by the BIOS.
- conf1 [X86-32] Force use of PCI Configuration
+ conf1 [X86] Force use of PCI Configuration
Mechanism 1.
- conf2 [X86-32] Force use of PCI Configuration
+ conf2 [X86] Force use of PCI Configuration
Mechanism 2.
noaer [PCIE] If the PCIEAER kernel config parameter is
enabled, this kernel boot option can be used to
@@ -1618,37 +1619,37 @@ and is between 256 and 4096 characters. It is defined in the file
this option if the kernel is unable to allocate
IRQs or discover secondary PCI buses on your
motherboard.
- rom [X86-32] Assign address space to expansion ROMs.
+ rom [X86] Assign address space to expansion ROMs.
Use with caution as certain devices share
address decoders between ROMs and other
resources.
- norom [X86-32,X86_64] Do not assign address space to
+ norom [X86] Do not assign address space to
expansion ROMs that do not already have
BIOS assigned address ranges.
- irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be
+ irqmask=0xMMMM [X86] Set a bit mask of IRQs allowed to be
assigned automatically to PCI devices. You can
make the kernel exclude IRQs of your ISA cards
this way.
- pirqaddr=0xAAAAA [X86-32] Specify the physical address
+ pirqaddr=0xAAAAA [X86] Specify the physical address
of the PIRQ table (normally generated
by the BIOS) if it is outside the
F0000h-100000h range.
- lastbus=N [X86-32] Scan all buses thru bus #N. Can be
+ lastbus=N [X86] Scan all buses thru bus #N. Can be
useful if the kernel is unable to find your
secondary buses and you want to tell it
explicitly which ones they are.
- assign-busses [X86-32] Always assign all PCI bus
+ assign-busses [X86] Always assign all PCI bus
numbers ourselves, overriding
whatever the firmware may have done.
- usepirqmask [X86-32] Honor the possible IRQ mask stored
+ usepirqmask [X86] Honor the possible IRQ mask stored
in the BIOS $PIR table. This is needed on
some systems with broken BIOSes, notably
some HP Pavilion N5400 and Omnibook XE3
notebooks. This will have no effect if ACPI
IRQ routing is enabled.
- noacpi [X86-32] Do not use ACPI for IRQ routing
+ noacpi [X86] Do not use ACPI for IRQ routing
or for PCI scanning.
- use_crs [X86-32] Use _CRS for PCI resource
+ use_crs [X86] Use _CRS for PCI resource
allocation.
routeirq Do IRQ routing for all PCI devices.
This is normally done in pci_enable_device(),
@@ -1677,6 +1678,12 @@ and is between 256 and 4096 characters. It is defined in the file
reserved for the CardBus bridge's memory
window. The default value is 64 megabytes.
+ pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
+ Management.
+ off Disable ASPM.
+ force Enable ASPM even on devices that claim not to support it.
+ WARNING: Forcing ASPM on may cause system lockups.
+
pcmv= [HW,PCMCIA] BadgePAD 4
pd. [PARIDE]
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index d9f50a19fa0c..089f6138fcd9 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -50,10 +50,12 @@ Connecting a function (probe) to a marker is done by providing a probe (function
to call) for the specific marker through marker_probe_register() and can be
activated by calling marker_arm(). Marker deactivation can be done by calling
marker_disarm() as many times as marker_arm() has been called. Removing a probe
-is done through marker_probe_unregister(); it will disarm the probe and make
-sure there is no caller left using the probe when it returns. Probe removal is
-preempt-safe because preemption is disabled around the probe call. See the
-"Probe example" section below for a sample probe module.
+is done through marker_probe_unregister(); it will disarm the probe.
+marker_synchronize_unregister() must be called before the end of the module exit
+function to make sure there is no caller left using the probe. This, and the
+fact that preemption is disabled around the probe call, make sure that probe
+removal and module unload are safe. See the "Probe example" section below for a
+sample probe module.
The marker mechanism supports inserting multiple instances of the same marker.
Markers can be put in inline functions, inlined static functions, and
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 49378a9f2b5f..10a0263ebb3f 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,8 +95,9 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
'p' - Will dump the current registers and flags to your console.
-'q' - Will dump a list of all running hrtimers.
- WARNING: Does not cover any other timers
+'q' - Will dump per CPU lists of all armed hrtimers (but NOT regular
+ timer_list timers) and detailed information about all
+ clockevent devices.
'r' - Turns off keyboard raw mode and sets it to XLATE.
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
new file mode 100644
index 000000000000..5d354e167494
--- /dev/null
+++ b/Documentation/tracepoints.txt
@@ -0,0 +1,101 @@
+ Using the Linux Kernel Tracepoints
+
+ Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Tracepoints and their use. It provides
+examples of how to insert tracepoints in the kernel and connect probe functions
+to them and provides some examples of probe functions.
+
+
+* Purpose of tracepoints
+
+A tracepoint placed in code provides a hook to call a function (probe) that you
+can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
+"off" (no probe is attached). When a tracepoint is "off" it has no effect,
+except for adding a tiny time penalty (checking a condition for a branch) and
+space penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section). When a
+tracepoint is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the tracepoint
+site).
+
+You can put tracepoints at important locations in the code. They are
+lightweight hooks that can pass an arbitrary number of parameters,
+which prototypes are described in a tracepoint declaration placed in a header
+file.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+Two elements are required for tracepoints :
+
+- A tracepoint definition, placed in a header file.
+- The tracepoint statement, in C code.
+
+In order to use tracepoints, you should include linux/tracepoint.h.
+
+In include/trace/subsys.h :
+
+#include
+
+DEFINE_TRACE(subsys_eventname,
+ TPPTOTO(int firstarg, struct task_struct *p),
+ TPARGS(firstarg, p));
+
+In subsys/file.c (where the tracing statement must be added) :
+
+#include
+
+void somefct(void)
+{
+ ...
+ trace_subsys_eventname(arg, task);
+ ...
+}
+
+Where :
+- subsys_eventname is an identifier unique to your event
+ - subsys is the name of your subsystem.
+ - eventname is the name of the event to trace.
+- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
+ called by this tracepoint.
+- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
+
+Connecting a function (probe) to a tracepoint is done by providing a probe
+(function to call) for the specific tracepoint through
+register_trace_subsys_eventname(). Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe sure there is no
+caller left using the probe when it returns. Probe removal is preempt-safe
+because preemption is disabled around the probe call. See the "Probe example"
+section below for a sample probe module.
+
+The tracepoint mechanism supports inserting multiple instances of the same
+tracepoint, but a single definition must be made of a given tracepoint name over
+all the kernel to make sure no type conflict will occur. Name mangling of the
+tracepoints is done using the prototypes to make sure typing is correct.
+Verification of probe type correctness is done at the registration site by the
+compiler. Tracepoints can be put in inline functions, inlined static functions,
+and unrolled loops as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention intended
+to limit collisions. Tracepoint names are global to the kernel: they are
+considered as being the same whether they are in the core kernel image or in
+modules.
+
+
+* Probe / tracepoint example
+
+See the example provided in samples/tracepoints/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe tracepoint-example (insmod order is not important)
+modprobe tracepoint-probe-example
+cat /proc/tracepoint-example (returns an expected error)
+rmmod tracepoint-example tracepoint-probe-example
+dmesg
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index a4afb560a45b..5bbbe2096223 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -36,7 +36,7 @@ $ mount -t debugfs debugfs /debug
$ echo mmiotrace > /debug/tracing/current_tracer
$ cat /debug/tracing/trace_pipe > mydump.txt &
Start X or whatever.
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
$ echo none > /debug/tracing/current_tracer
Check for lost events.
@@ -59,9 +59,8 @@ The 'cat' process should stay running (sleeping) in the background.
Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
accesses to areas that are ioremapped while mmiotrace is active.
-[Unimplemented feature:]
During tracing you can place comments (markers) into the trace by
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
This makes it easier to see which part of the (huge) trace corresponds to
which action. It is recommended to place descriptive markers about what you
do.
diff --git a/MAINTAINERS b/MAINTAINERS
index 22303e5fe4ce..6d51f00dcdc0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1198,7 +1198,7 @@ S: Maintained
CPU FREQUENCY DRIVERS
P: Dave Jones
-M: davej@codemonkey.org.uk
+M: davej@redhat.com
L: cpufreq@vger.kernel.org
W: http://www.codemonkey.org.uk/projects/cpufreq/
T: git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 99a7f19da13a..a4555f497639 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -47,7 +47,7 @@ typedef struct irq_swizzle_struct
static irq_swizzle_t *sable_lynx_irq_swizzle;
-static void sable_lynx_init_irq(int nr_irqs);
+static void sable_lynx_init_irq(int nr_of_irqs);
#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
@@ -530,11 +530,11 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
}
static void __init
-sable_lynx_init_irq(int nr_irqs)
+sable_lynx_init_irq(int nr_of_irqs)
{
long i;
- for (i = 0; i < nr_irqs; ++i) {
+ for (i = 0; i < nr_of_irqs; ++i) {
irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
irq_desc[i].chip = &sable_lynx_irq_type;
}
diff --git a/arch/arm/mach-iop13xx/include/mach/time.h b/arch/arm/mach-iop13xx/include/mach/time.h
index 49213d9d7cad..d6d52527589d 100644
--- a/arch/arm/mach-iop13xx/include/mach/time.h
+++ b/arch/arm/mach-iop13xx/include/mach/time.h
@@ -41,7 +41,7 @@ static inline unsigned long iop13xx_core_freq(void)
return 1200000000;
default:
printk("%s: warning unknown frequency, defaulting to 800Mhz\n",
- __FUNCTION__);
+ __func__);
}
return 800000000;
@@ -60,7 +60,7 @@ static inline unsigned long iop13xx_xsi_bus_ratio(void)
return 4;
default:
printk("%s: warning unknown ratio, defaulting to 2\n",
- __FUNCTION__);
+ __func__);
}
return 2;
diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c
index b0653a87159a..30451300751b 100644
--- a/arch/arm/mach-ixp2000/ixdp2x00.c
+++ b/arch/arm/mach-ixp2000/ixdp2x00.c
@@ -143,7 +143,7 @@ static struct irq_chip ixdp2x00_cpld_irq_chip = {
.unmask = ixdp2x00_irq_unmask
};
-void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
+void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_of_irqs)
{
unsigned int irq;
@@ -154,7 +154,7 @@ void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigne
board_irq_stat = stat_reg;
board_irq_mask = mask_reg;
- board_irq_count = nr_irqs;
+ board_irq_count = nr_of_irqs;
*board_irq_mask = 0xffffffff;
diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c
index d354e0fe4477..c40fc378a251 100644
--- a/arch/arm/mach-omap2/irq.c
+++ b/arch/arm/mach-omap2/irq.c
@@ -119,7 +119,7 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
void __init omap_init_irq(void)
{
- unsigned long nr_irqs = 0;
+ unsigned long nr_of_irqs = 0;
unsigned int nr_banks = 0;
int i;
@@ -133,14 +133,14 @@ void __init omap_init_irq(void)
omap_irq_bank_init_one(bank);
- nr_irqs += bank->nr_irqs;
+ nr_of_irqs += bank->nr_irqs;
nr_banks++;
}
printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n",
- nr_irqs, nr_banks, nr_banks > 1 ? "s" : "");
+ nr_of_irqs, nr_banks, nr_banks > 1 ? "s" : "");
- for (i = 0; i < nr_irqs; i++) {
+ for (i = 0; i < nr_of_irqs; i++) {
set_irq_chip(i, &omap_irq_chip);
set_irq_handler(i, handle_level_irq);
set_irq_flags(i, IRQF_VALID);
diff --git a/arch/arm/mach-pxa/include/mach/zylonite.h b/arch/arm/mach-pxa/include/mach/zylonite.h
index 0d35ca04731e..bf6785adccf4 100644
--- a/arch/arm/mach-pxa/include/mach/zylonite.h
+++ b/arch/arm/mach-pxa/include/mach/zylonite.h
@@ -30,7 +30,7 @@ extern void zylonite_pxa300_init(void);
static inline void zylonite_pxa300_init(void)
{
if (cpu_is_pxa300() || cpu_is_pxa310())
- panic("%s: PXA300/PXA310 not supported\n", __FUNCTION__);
+ panic("%s: PXA300/PXA310 not supported\n", __func__);
}
#endif
@@ -40,7 +40,7 @@ extern void zylonite_pxa320_init(void);
static inline void zylonite_pxa320_init(void)
{
if (cpu_is_pxa320())
- panic("%s: PXA320 not supported\n", __FUNCTION__);
+ panic("%s: PXA320 not supported\n", __func__);
}
#endif
diff --git a/arch/arm/mach-sa1100/include/mach/ide.h b/arch/arm/mach-sa1100/include/mach/ide.h
deleted file mode 100644
index 4c99c8f5e617..000000000000
--- a/arch/arm/mach-sa1100/include/mach/ide.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * arch/arm/mach-sa1100/include/mach/ide.h
- *
- * Copyright (c) 1998 Hugo Fiennes & Nicolas Pitre
- *
- * 18-aug-2000: Cleanup by Erik Mouw (J.A.K.Mouw@its.tudelft.nl)
- * Get rid of the special ide_init_hwif_ports() functions
- * and make a generalised function that can be used by all
- * architectures.
- */
-
-#include
-#include
-#include
-
-#error "This code is broken and needs update to match with current ide support"
-
-
-/*
- * Set up a hw structure for a specified data port, control port and IRQ.
- * This should follow whatever the default interface uses.
- */
-static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
- unsigned long ctrl_port, int *irq)
-{
- unsigned long reg = data_port;
- int i;
- int regincr = 1;
-
- /* The Empeg board has the first two address lines unused */
- if (machine_is_empeg())
- regincr = 1 << 2;
-
- /* The LART doesn't use A0 for IDE */
- if (machine_is_lart())
- regincr = 1 << 1;
-
- memset(hw, 0, sizeof(*hw));
-
- for (i = 0; i <= 7; i++) {
- hw->io_ports_array[i] = reg;
- reg += regincr;
- }
-
- hw->io_ports.ctl_addr = ctrl_port;
-
- if (irq)
- *irq = 0;
-}
-
-/*
- * This registers the standard ports for this architecture with the IDE
- * driver.
- */
-static __inline__ void
-ide_init_default_hwifs(void)
-{
- if (machine_is_lart()) {
-#ifdef CONFIG_SA1100_LART
- hw_regs_t hw;
-
- /* Enable GPIO as interrupt line */
- GPDR &= ~LART_GPIO_IDE;
- set_irq_type(LART_IRQ_IDE, IRQ_TYPE_EDGE_RISING);
-
- /* set PCMCIA interface timing */
- MECR = 0x00060006;
-
- /* init the interface */
- ide_init_hwif_ports(&hw, PCMCIA_IO_0_BASE + 0x0000, PCMCIA_IO_0_BASE + 0x1000, NULL);
- hw.irq = LART_IRQ_IDE;
- ide_register_hw(&hw);
-#endif
- }
-}
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index c36a6d59d6f0..310477ba1bbf 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev)
struct eic *eic;
struct resource *regs;
unsigned int i;
- unsigned int nr_irqs;
+ unsigned int nr_of_irqs;
unsigned int int_irq;
int ret;
u32 pattern;
@@ -224,7 +224,7 @@ static int __init eic_probe(struct platform_device *pdev)
eic_writel(eic, IDR, ~0UL);
eic_writel(eic, MODE, ~0UL);
pattern = eic_readl(eic, MODE);
- nr_irqs = fls(pattern);
+ nr_of_irqs = fls(pattern);
/* Trigger on low level unless overridden by driver */
eic_writel(eic, EDGE, 0UL);
@@ -232,7 +232,7 @@ static int __init eic_probe(struct platform_device *pdev)
eic->chip = &eic_chip;
- for (i = 0; i < nr_irqs; i++) {
+ for (i = 0; i < nr_of_irqs; i++) {
set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
handle_level_irq);
set_irq_chip_data(eic->first_irq + i, eic);
@@ -256,7 +256,7 @@ static int __init eic_probe(struct platform_device *pdev)
eic->regs, int_irq);
dev_info(&pdev->dev,
"Handling %u external IRQs, starting with IRQ %u\n",
- nr_irqs, eic->first_irq);
+ nr_of_irqs, eic->first_irq);
return 0;
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 0149097b736d..ce342fb74246 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -95,16 +95,8 @@ extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
enum pci_mmap_state mmap_state, int write_combine);
#define HAVE_PCI_LEGACY
extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
- struct vm_area_struct *vma);
-extern ssize_t pci_read_legacy_io(struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t off, size_t count);
-extern ssize_t pci_write_legacy_io(struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t off, size_t count);
-extern int pci_mmap_legacy_mem(struct kobject *kobj,
- struct bin_attribute *attr,
- struct vm_area_struct *vma);
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state);
#define pci_get_legacy_mem platform_pci_get_legacy_mem
#define pci_legacy_read platform_pci_legacy_read
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7545037a8625..211fcfd115f9 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -614,12 +614,17 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
* vector to get the base address.
*/
int
-pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
+pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
{
unsigned long size = vma->vm_end - vma->vm_start;
pgprot_t prot;
char *addr;
+ /* We only support mmap'ing of legacy memory space */
+ if (mmap_state != pci_mmap_mem)
+ return -ENOSYS;
+
/*
* Avoid attribute aliasing. See Documentation/ia64/aliasing.txt
* for more details.
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index fc2994811f15..39cb6da72dcb 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -40,6 +40,7 @@
*/
#include
+#include
#include
#include
#include
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 2bd1f6ef5db0..644a70b1b04e 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,6 +9,8 @@ config PARISC
def_bool y
select HAVE_IDE
select HAVE_OPROFILE
+ select RTC_CLASS
+ select RTC_DRV_PARISC
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used
in many of their workstations & servers (HP9000 700 and 800 series,
diff --git a/include/asm-parisc/Kbuild b/arch/parisc/include/asm/Kbuild
similarity index 100%
rename from include/asm-parisc/Kbuild
rename to arch/parisc/include/asm/Kbuild
diff --git a/include/asm-parisc/agp.h b/arch/parisc/include/asm/agp.h
similarity index 100%
rename from include/asm-parisc/agp.h
rename to arch/parisc/include/asm/agp.h
diff --git a/include/asm-parisc/asmregs.h b/arch/parisc/include/asm/asmregs.h
similarity index 100%
rename from include/asm-parisc/asmregs.h
rename to arch/parisc/include/asm/asmregs.h
diff --git a/include/asm-parisc/assembly.h b/arch/parisc/include/asm/assembly.h
similarity index 100%
rename from include/asm-parisc/assembly.h
rename to arch/parisc/include/asm/assembly.h
diff --git a/include/asm-parisc/atomic.h b/arch/parisc/include/asm/atomic.h
similarity index 100%
rename from include/asm-parisc/atomic.h
rename to arch/parisc/include/asm/atomic.h
diff --git a/include/asm-parisc/auxvec.h b/arch/parisc/include/asm/auxvec.h
similarity index 100%
rename from include/asm-parisc/auxvec.h
rename to arch/parisc/include/asm/auxvec.h
diff --git a/include/asm-parisc/bitops.h b/arch/parisc/include/asm/bitops.h
similarity index 100%
rename from include/asm-parisc/bitops.h
rename to arch/parisc/include/asm/bitops.h
diff --git a/include/asm-parisc/bug.h b/arch/parisc/include/asm/bug.h
similarity index 100%
rename from include/asm-parisc/bug.h
rename to arch/parisc/include/asm/bug.h
diff --git a/include/asm-parisc/bugs.h b/arch/parisc/include/asm/bugs.h
similarity index 100%
rename from include/asm-parisc/bugs.h
rename to arch/parisc/include/asm/bugs.h
diff --git a/include/asm-parisc/byteorder.h b/arch/parisc/include/asm/byteorder.h
similarity index 100%
rename from include/asm-parisc/byteorder.h
rename to arch/parisc/include/asm/byteorder.h
diff --git a/include/asm-parisc/cache.h b/arch/parisc/include/asm/cache.h
similarity index 100%
rename from include/asm-parisc/cache.h
rename to arch/parisc/include/asm/cache.h
diff --git a/include/asm-parisc/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
similarity index 100%
rename from include/asm-parisc/cacheflush.h
rename to arch/parisc/include/asm/cacheflush.h
diff --git a/include/asm-parisc/checksum.h b/arch/parisc/include/asm/checksum.h
similarity index 100%
rename from include/asm-parisc/checksum.h
rename to arch/parisc/include/asm/checksum.h
diff --git a/include/asm-parisc/compat.h b/arch/parisc/include/asm/compat.h
similarity index 100%
rename from include/asm-parisc/compat.h
rename to arch/parisc/include/asm/compat.h
diff --git a/include/asm-parisc/compat_rt_sigframe.h b/arch/parisc/include/asm/compat_rt_sigframe.h
similarity index 100%
rename from include/asm-parisc/compat_rt_sigframe.h
rename to arch/parisc/include/asm/compat_rt_sigframe.h
diff --git a/include/asm-parisc/compat_signal.h b/arch/parisc/include/asm/compat_signal.h
similarity index 100%
rename from include/asm-parisc/compat_signal.h
rename to arch/parisc/include/asm/compat_signal.h
diff --git a/include/asm-parisc/compat_ucontext.h b/arch/parisc/include/asm/compat_ucontext.h
similarity index 100%
rename from include/asm-parisc/compat_ucontext.h
rename to arch/parisc/include/asm/compat_ucontext.h
diff --git a/include/asm-parisc/cputime.h b/arch/parisc/include/asm/cputime.h
similarity index 100%
rename from include/asm-parisc/cputime.h
rename to arch/parisc/include/asm/cputime.h
diff --git a/include/asm-parisc/current.h b/arch/parisc/include/asm/current.h
similarity index 100%
rename from include/asm-parisc/current.h
rename to arch/parisc/include/asm/current.h
diff --git a/include/asm-parisc/delay.h b/arch/parisc/include/asm/delay.h
similarity index 100%
rename from include/asm-parisc/delay.h
rename to arch/parisc/include/asm/delay.h
diff --git a/include/asm-parisc/device.h b/arch/parisc/include/asm/device.h
similarity index 100%
rename from include/asm-parisc/device.h
rename to arch/parisc/include/asm/device.h
diff --git a/include/asm-parisc/div64.h b/arch/parisc/include/asm/div64.h
similarity index 100%
rename from include/asm-parisc/div64.h
rename to arch/parisc/include/asm/div64.h
diff --git a/include/asm-parisc/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
similarity index 100%
rename from include/asm-parisc/dma-mapping.h
rename to arch/parisc/include/asm/dma-mapping.h
diff --git a/include/asm-parisc/dma.h b/arch/parisc/include/asm/dma.h
similarity index 100%
rename from include/asm-parisc/dma.h
rename to arch/parisc/include/asm/dma.h
diff --git a/include/asm-parisc/eisa_bus.h b/arch/parisc/include/asm/eisa_bus.h
similarity index 100%
rename from include/asm-parisc/eisa_bus.h
rename to arch/parisc/include/asm/eisa_bus.h
diff --git a/include/asm-parisc/eisa_eeprom.h b/arch/parisc/include/asm/eisa_eeprom.h
similarity index 100%
rename from include/asm-parisc/eisa_eeprom.h
rename to arch/parisc/include/asm/eisa_eeprom.h
diff --git a/include/asm-parisc/elf.h b/arch/parisc/include/asm/elf.h
similarity index 100%
rename from include/asm-parisc/elf.h
rename to arch/parisc/include/asm/elf.h
diff --git a/include/asm-parisc/emergency-restart.h b/arch/parisc/include/asm/emergency-restart.h
similarity index 100%
rename from include/asm-parisc/emergency-restart.h
rename to arch/parisc/include/asm/emergency-restart.h
diff --git a/include/asm-parisc/errno.h b/arch/parisc/include/asm/errno.h
similarity index 100%
rename from include/asm-parisc/errno.h
rename to arch/parisc/include/asm/errno.h
diff --git a/include/asm-parisc/fb.h b/arch/parisc/include/asm/fb.h
similarity index 100%
rename from include/asm-parisc/fb.h
rename to arch/parisc/include/asm/fb.h
diff --git a/include/asm-parisc/fcntl.h b/arch/parisc/include/asm/fcntl.h
similarity index 100%
rename from include/asm-parisc/fcntl.h
rename to arch/parisc/include/asm/fcntl.h
diff --git a/include/asm-parisc/fixmap.h b/arch/parisc/include/asm/fixmap.h
similarity index 100%
rename from include/asm-parisc/fixmap.h
rename to arch/parisc/include/asm/fixmap.h
diff --git a/include/asm-parisc/floppy.h b/arch/parisc/include/asm/floppy.h
similarity index 100%
rename from include/asm-parisc/floppy.h
rename to arch/parisc/include/asm/floppy.h
diff --git a/include/asm-parisc/futex.h b/arch/parisc/include/asm/futex.h
similarity index 100%
rename from include/asm-parisc/futex.h
rename to arch/parisc/include/asm/futex.h
diff --git a/include/asm-parisc/grfioctl.h b/arch/parisc/include/asm/grfioctl.h
similarity index 100%
rename from include/asm-parisc/grfioctl.h
rename to arch/parisc/include/asm/grfioctl.h
diff --git a/include/asm-parisc/hardirq.h b/arch/parisc/include/asm/hardirq.h
similarity index 100%
rename from include/asm-parisc/hardirq.h
rename to arch/parisc/include/asm/hardirq.h
diff --git a/include/asm-parisc/hardware.h b/arch/parisc/include/asm/hardware.h
similarity index 100%
rename from include/asm-parisc/hardware.h
rename to arch/parisc/include/asm/hardware.h
diff --git a/include/asm-parisc/hw_irq.h b/arch/parisc/include/asm/hw_irq.h
similarity index 100%
rename from include/asm-parisc/hw_irq.h
rename to arch/parisc/include/asm/hw_irq.h
diff --git a/include/asm-parisc/ide.h b/arch/parisc/include/asm/ide.h
similarity index 77%
rename from include/asm-parisc/ide.h
rename to arch/parisc/include/asm/ide.h
index c246ef75017d..81700a2321cf 100644
--- a/include/asm-parisc/ide.h
+++ b/arch/parisc/include/asm/ide.h
@@ -13,10 +13,6 @@
#ifdef __KERNEL__
-#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id))
-#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id))
-#define ide_request_region(from,extent,name) request_region((from), (extent), (name))
-#define ide_release_region(from,extent) release_region((from), (extent))
/* Generic I/O and MEMIO string operations. */
#define __ide_insw insw
diff --git a/include/asm-parisc/io.h b/arch/parisc/include/asm/io.h
similarity index 100%
rename from include/asm-parisc/io.h
rename to arch/parisc/include/asm/io.h
diff --git a/include/asm-parisc/ioctl.h b/arch/parisc/include/asm/ioctl.h
similarity index 100%
rename from include/asm-parisc/ioctl.h
rename to arch/parisc/include/asm/ioctl.h
diff --git a/include/asm-parisc/ioctls.h b/arch/parisc/include/asm/ioctls.h
similarity index 100%
rename from include/asm-parisc/ioctls.h
rename to arch/parisc/include/asm/ioctls.h
diff --git a/include/asm-parisc/ipcbuf.h b/arch/parisc/include/asm/ipcbuf.h
similarity index 100%
rename from include/asm-parisc/ipcbuf.h
rename to arch/parisc/include/asm/ipcbuf.h
diff --git a/include/asm-parisc/irq.h b/arch/parisc/include/asm/irq.h
similarity index 100%
rename from include/asm-parisc/irq.h
rename to arch/parisc/include/asm/irq.h
diff --git a/include/asm-parisc/irq_regs.h b/arch/parisc/include/asm/irq_regs.h
similarity index 100%
rename from include/asm-parisc/irq_regs.h
rename to arch/parisc/include/asm/irq_regs.h
diff --git a/include/asm-parisc/kdebug.h b/arch/parisc/include/asm/kdebug.h
similarity index 100%
rename from include/asm-parisc/kdebug.h
rename to arch/parisc/include/asm/kdebug.h
diff --git a/include/asm-parisc/kmap_types.h b/arch/parisc/include/asm/kmap_types.h
similarity index 100%
rename from include/asm-parisc/kmap_types.h
rename to arch/parisc/include/asm/kmap_types.h
diff --git a/include/asm-parisc/led.h b/arch/parisc/include/asm/led.h
similarity index 100%
rename from include/asm-parisc/led.h
rename to arch/parisc/include/asm/led.h
diff --git a/include/asm-parisc/linkage.h b/arch/parisc/include/asm/linkage.h
similarity index 100%
rename from include/asm-parisc/linkage.h
rename to arch/parisc/include/asm/linkage.h
diff --git a/include/asm-parisc/local.h b/arch/parisc/include/asm/local.h
similarity index 100%
rename from include/asm-parisc/local.h
rename to arch/parisc/include/asm/local.h
diff --git a/include/asm-parisc/machdep.h b/arch/parisc/include/asm/machdep.h
similarity index 100%
rename from include/asm-parisc/machdep.h
rename to arch/parisc/include/asm/machdep.h
diff --git a/include/asm-parisc/mc146818rtc.h b/arch/parisc/include/asm/mc146818rtc.h
similarity index 100%
rename from include/asm-parisc/mc146818rtc.h
rename to arch/parisc/include/asm/mc146818rtc.h
diff --git a/include/asm-parisc/mckinley.h b/arch/parisc/include/asm/mckinley.h
similarity index 100%
rename from include/asm-parisc/mckinley.h
rename to arch/parisc/include/asm/mckinley.h
diff --git a/include/asm-parisc/mman.h b/arch/parisc/include/asm/mman.h
similarity index 100%
rename from include/asm-parisc/mman.h
rename to arch/parisc/include/asm/mman.h
diff --git a/include/asm-parisc/mmu.h b/arch/parisc/include/asm/mmu.h
similarity index 100%
rename from include/asm-parisc/mmu.h
rename to arch/parisc/include/asm/mmu.h
diff --git a/include/asm-parisc/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
similarity index 100%
rename from include/asm-parisc/mmu_context.h
rename to arch/parisc/include/asm/mmu_context.h
diff --git a/include/asm-parisc/mmzone.h b/arch/parisc/include/asm/mmzone.h
similarity index 100%
rename from include/asm-parisc/mmzone.h
rename to arch/parisc/include/asm/mmzone.h
diff --git a/include/asm-parisc/module.h b/arch/parisc/include/asm/module.h
similarity index 100%
rename from include/asm-parisc/module.h
rename to arch/parisc/include/asm/module.h
diff --git a/include/asm-parisc/msgbuf.h b/arch/parisc/include/asm/msgbuf.h
similarity index 100%
rename from include/asm-parisc/msgbuf.h
rename to arch/parisc/include/asm/msgbuf.h
diff --git a/include/asm-parisc/mutex.h b/arch/parisc/include/asm/mutex.h
similarity index 100%
rename from include/asm-parisc/mutex.h
rename to arch/parisc/include/asm/mutex.h
diff --git a/include/asm-parisc/page.h b/arch/parisc/include/asm/page.h
similarity index 100%
rename from include/asm-parisc/page.h
rename to arch/parisc/include/asm/page.h
diff --git a/include/asm-parisc/param.h b/arch/parisc/include/asm/param.h
similarity index 100%
rename from include/asm-parisc/param.h
rename to arch/parisc/include/asm/param.h
diff --git a/include/asm-parisc/parisc-device.h b/arch/parisc/include/asm/parisc-device.h
similarity index 100%
rename from include/asm-parisc/parisc-device.h
rename to arch/parisc/include/asm/parisc-device.h
diff --git a/include/asm-parisc/parport.h b/arch/parisc/include/asm/parport.h
similarity index 100%
rename from include/asm-parisc/parport.h
rename to arch/parisc/include/asm/parport.h
diff --git a/include/asm-parisc/pci.h b/arch/parisc/include/asm/pci.h
similarity index 100%
rename from include/asm-parisc/pci.h
rename to arch/parisc/include/asm/pci.h
diff --git a/include/asm-parisc/pdc.h b/arch/parisc/include/asm/pdc.h
similarity index 99%
rename from include/asm-parisc/pdc.h
rename to arch/parisc/include/asm/pdc.h
index 9eaa794c3e4a..c584b00c6074 100644
--- a/include/asm-parisc/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -332,6 +332,9 @@
#define BOOT_CONSOLE_SPA_OFFSET 0x3c4
#define BOOT_CONSOLE_PATH_OFFSET 0x3a8
+/* size of the pdc_result buffer for firmware.c */
+#define NUM_PDC_RESULT 32
+
#if !defined(__ASSEMBLY__)
#ifdef __KERNEL__
@@ -600,6 +603,7 @@ int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsi
int pdc_chassis_disp(unsigned long disp);
int pdc_chassis_warn(unsigned long *warn);
int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info);
+int pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info);
int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index,
void *iodc_data, unsigned int iodc_data_size);
int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
@@ -638,6 +642,7 @@ int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr,
#endif
void set_firmware_width(void);
+void set_firmware_width_unlocked(void);
int pdc_do_firm_test_reset(unsigned long ftc_bitmap);
int pdc_do_reset(void);
int pdc_soft_power_info(unsigned long *power_reg);
diff --git a/include/asm-parisc/pdc_chassis.h b/arch/parisc/include/asm/pdc_chassis.h
similarity index 100%
rename from include/asm-parisc/pdc_chassis.h
rename to arch/parisc/include/asm/pdc_chassis.h
diff --git a/include/asm-parisc/pdcpat.h b/arch/parisc/include/asm/pdcpat.h
similarity index 100%
rename from include/asm-parisc/pdcpat.h
rename to arch/parisc/include/asm/pdcpat.h
diff --git a/include/asm-parisc/percpu.h b/arch/parisc/include/asm/percpu.h
similarity index 100%
rename from include/asm-parisc/percpu.h
rename to arch/parisc/include/asm/percpu.h
diff --git a/include/asm-parisc/perf.h b/arch/parisc/include/asm/perf.h
similarity index 100%
rename from include/asm-parisc/perf.h
rename to arch/parisc/include/asm/perf.h
diff --git a/include/asm-parisc/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
similarity index 100%
rename from include/asm-parisc/pgalloc.h
rename to arch/parisc/include/asm/pgalloc.h
diff --git a/include/asm-parisc/pgtable.h b/arch/parisc/include/asm/pgtable.h
similarity index 100%
rename from include/asm-parisc/pgtable.h
rename to arch/parisc/include/asm/pgtable.h
diff --git a/include/asm-parisc/poll.h b/arch/parisc/include/asm/poll.h
similarity index 100%
rename from include/asm-parisc/poll.h
rename to arch/parisc/include/asm/poll.h
diff --git a/include/asm-parisc/posix_types.h b/arch/parisc/include/asm/posix_types.h
similarity index 100%
rename from include/asm-parisc/posix_types.h
rename to arch/parisc/include/asm/posix_types.h
diff --git a/include/asm-parisc/prefetch.h b/arch/parisc/include/asm/prefetch.h
similarity index 100%
rename from include/asm-parisc/prefetch.h
rename to arch/parisc/include/asm/prefetch.h
diff --git a/include/asm-parisc/processor.h b/arch/parisc/include/asm/processor.h
similarity index 100%
rename from include/asm-parisc/processor.h
rename to arch/parisc/include/asm/processor.h
diff --git a/include/asm-parisc/psw.h b/arch/parisc/include/asm/psw.h
similarity index 100%
rename from include/asm-parisc/psw.h
rename to arch/parisc/include/asm/psw.h
diff --git a/include/asm-parisc/ptrace.h b/arch/parisc/include/asm/ptrace.h
similarity index 85%
rename from include/asm-parisc/ptrace.h
rename to arch/parisc/include/asm/ptrace.h
index 3e94c5d85ff5..afa5333187b4 100644
--- a/include/asm-parisc/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -47,6 +47,16 @@ struct pt_regs {
#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
+#define __ARCH_WANT_COMPAT_SYS_PTRACE
+
+struct task_struct;
+#define arch_has_single_step() 1
+void user_disable_single_step(struct task_struct *task);
+void user_enable_single_step(struct task_struct *task);
+
+#define arch_has_block_step() 1
+void user_enable_block_step(struct task_struct *task);
+
/* XXX should we use iaoq[1] or iaoq[0] ? */
#define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0)
#define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0)
diff --git a/include/asm-parisc/real.h b/arch/parisc/include/asm/real.h
similarity index 100%
rename from include/asm-parisc/real.h
rename to arch/parisc/include/asm/real.h
diff --git a/include/asm-parisc/resource.h b/arch/parisc/include/asm/resource.h
similarity index 100%
rename from include/asm-parisc/resource.h
rename to arch/parisc/include/asm/resource.h
diff --git a/include/asm-parisc/ropes.h b/arch/parisc/include/asm/ropes.h
similarity index 99%
rename from include/asm-parisc/ropes.h
rename to arch/parisc/include/asm/ropes.h
index 007a880615eb..09f51d5ab57c 100644
--- a/include/asm-parisc/ropes.h
+++ b/arch/parisc/include/asm/ropes.h
@@ -1,7 +1,7 @@
#ifndef _ASM_PARISC_ROPES_H_
#define _ASM_PARISC_ROPES_H_
-#include
+#include
#ifdef CONFIG_64BIT
/* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */
diff --git a/include/asm-parisc/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
similarity index 100%
rename from include/asm-parisc/rt_sigframe.h
rename to arch/parisc/include/asm/rt_sigframe.h
diff --git a/include/asm-parisc/rtc.h b/arch/parisc/include/asm/rtc.h
similarity index 100%
rename from include/asm-parisc/rtc.h
rename to arch/parisc/include/asm/rtc.h
diff --git a/include/asm-parisc/runway.h b/arch/parisc/include/asm/runway.h
similarity index 100%
rename from include/asm-parisc/runway.h
rename to arch/parisc/include/asm/runway.h
diff --git a/include/asm-parisc/scatterlist.h b/arch/parisc/include/asm/scatterlist.h
similarity index 100%
rename from include/asm-parisc/scatterlist.h
rename to arch/parisc/include/asm/scatterlist.h
diff --git a/include/asm-parisc/sections.h b/arch/parisc/include/asm/sections.h
similarity index 100%
rename from include/asm-parisc/sections.h
rename to arch/parisc/include/asm/sections.h
diff --git a/include/asm-parisc/segment.h b/arch/parisc/include/asm/segment.h
similarity index 100%
rename from include/asm-parisc/segment.h
rename to arch/parisc/include/asm/segment.h
diff --git a/include/asm-parisc/sembuf.h b/arch/parisc/include/asm/sembuf.h
similarity index 100%
rename from include/asm-parisc/sembuf.h
rename to arch/parisc/include/asm/sembuf.h
diff --git a/include/asm-parisc/serial.h b/arch/parisc/include/asm/serial.h
similarity index 100%
rename from include/asm-parisc/serial.h
rename to arch/parisc/include/asm/serial.h
diff --git a/include/asm-parisc/setup.h b/arch/parisc/include/asm/setup.h
similarity index 100%
rename from include/asm-parisc/setup.h
rename to arch/parisc/include/asm/setup.h
diff --git a/include/asm-parisc/shmbuf.h b/arch/parisc/include/asm/shmbuf.h
similarity index 100%
rename from include/asm-parisc/shmbuf.h
rename to arch/parisc/include/asm/shmbuf.h
diff --git a/include/asm-parisc/shmparam.h b/arch/parisc/include/asm/shmparam.h
similarity index 100%
rename from include/asm-parisc/shmparam.h
rename to arch/parisc/include/asm/shmparam.h
diff --git a/include/asm-parisc/sigcontext.h b/arch/parisc/include/asm/sigcontext.h
similarity index 100%
rename from include/asm-parisc/sigcontext.h
rename to arch/parisc/include/asm/sigcontext.h
diff --git a/include/asm-parisc/siginfo.h b/arch/parisc/include/asm/siginfo.h
similarity index 100%
rename from include/asm-parisc/siginfo.h
rename to arch/parisc/include/asm/siginfo.h
diff --git a/include/asm-parisc/signal.h b/arch/parisc/include/asm/signal.h
similarity index 100%
rename from include/asm-parisc/signal.h
rename to arch/parisc/include/asm/signal.h
diff --git a/include/asm-parisc/smp.h b/arch/parisc/include/asm/smp.h
similarity index 100%
rename from include/asm-parisc/smp.h
rename to arch/parisc/include/asm/smp.h
diff --git a/include/asm-parisc/socket.h b/arch/parisc/include/asm/socket.h
similarity index 100%
rename from include/asm-parisc/socket.h
rename to arch/parisc/include/asm/socket.h
diff --git a/include/asm-parisc/sockios.h b/arch/parisc/include/asm/sockios.h
similarity index 100%
rename from include/asm-parisc/sockios.h
rename to arch/parisc/include/asm/sockios.h
diff --git a/include/asm-parisc/spinlock.h b/arch/parisc/include/asm/spinlock.h
similarity index 100%
rename from include/asm-parisc/spinlock.h
rename to arch/parisc/include/asm/spinlock.h
diff --git a/include/asm-parisc/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
similarity index 100%
rename from include/asm-parisc/spinlock_types.h
rename to arch/parisc/include/asm/spinlock_types.h
diff --git a/include/asm-parisc/stat.h b/arch/parisc/include/asm/stat.h
similarity index 100%
rename from include/asm-parisc/stat.h
rename to arch/parisc/include/asm/stat.h
diff --git a/include/asm-parisc/statfs.h b/arch/parisc/include/asm/statfs.h
similarity index 100%
rename from include/asm-parisc/statfs.h
rename to arch/parisc/include/asm/statfs.h
diff --git a/include/asm-parisc/string.h b/arch/parisc/include/asm/string.h
similarity index 100%
rename from include/asm-parisc/string.h
rename to arch/parisc/include/asm/string.h
diff --git a/include/asm-parisc/superio.h b/arch/parisc/include/asm/superio.h
similarity index 100%
rename from include/asm-parisc/superio.h
rename to arch/parisc/include/asm/superio.h
diff --git a/include/asm-parisc/system.h b/arch/parisc/include/asm/system.h
similarity index 100%
rename from include/asm-parisc/system.h
rename to arch/parisc/include/asm/system.h
diff --git a/include/asm-parisc/termbits.h b/arch/parisc/include/asm/termbits.h
similarity index 100%
rename from include/asm-parisc/termbits.h
rename to arch/parisc/include/asm/termbits.h
diff --git a/include/asm-parisc/termios.h b/arch/parisc/include/asm/termios.h
similarity index 100%
rename from include/asm-parisc/termios.h
rename to arch/parisc/include/asm/termios.h
diff --git a/include/asm-parisc/thread_info.h b/arch/parisc/include/asm/thread_info.h
similarity index 100%
rename from include/asm-parisc/thread_info.h
rename to arch/parisc/include/asm/thread_info.h
diff --git a/include/asm-parisc/timex.h b/arch/parisc/include/asm/timex.h
similarity index 100%
rename from include/asm-parisc/timex.h
rename to arch/parisc/include/asm/timex.h
diff --git a/include/asm-parisc/tlb.h b/arch/parisc/include/asm/tlb.h
similarity index 100%
rename from include/asm-parisc/tlb.h
rename to arch/parisc/include/asm/tlb.h
diff --git a/include/asm-parisc/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
similarity index 100%
rename from include/asm-parisc/tlbflush.h
rename to arch/parisc/include/asm/tlbflush.h
diff --git a/include/asm-parisc/topology.h b/arch/parisc/include/asm/topology.h
similarity index 100%
rename from include/asm-parisc/topology.h
rename to arch/parisc/include/asm/topology.h
diff --git a/include/asm-parisc/traps.h b/arch/parisc/include/asm/traps.h
similarity index 100%
rename from include/asm-parisc/traps.h
rename to arch/parisc/include/asm/traps.h
diff --git a/include/asm-parisc/types.h b/arch/parisc/include/asm/types.h
similarity index 100%
rename from include/asm-parisc/types.h
rename to arch/parisc/include/asm/types.h
diff --git a/include/asm-parisc/uaccess.h b/arch/parisc/include/asm/uaccess.h
similarity index 100%
rename from include/asm-parisc/uaccess.h
rename to arch/parisc/include/asm/uaccess.h
diff --git a/include/asm-parisc/ucontext.h b/arch/parisc/include/asm/ucontext.h
similarity index 100%
rename from include/asm-parisc/ucontext.h
rename to arch/parisc/include/asm/ucontext.h
diff --git a/include/asm-parisc/unaligned.h b/arch/parisc/include/asm/unaligned.h
similarity index 100%
rename from include/asm-parisc/unaligned.h
rename to arch/parisc/include/asm/unaligned.h
diff --git a/include/asm-parisc/unistd.h b/arch/parisc/include/asm/unistd.h
similarity index 99%
rename from include/asm-parisc/unistd.h
rename to arch/parisc/include/asm/unistd.h
index a7d857f0e4f4..ef26b009dc5d 100644
--- a/include/asm-parisc/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -801,8 +801,14 @@
#define __NR_timerfd_create (__NR_Linux + 306)
#define __NR_timerfd_settime (__NR_Linux + 307)
#define __NR_timerfd_gettime (__NR_Linux + 308)
+#define __NR_signalfd4 (__NR_Linux + 309)
+#define __NR_eventfd2 (__NR_Linux + 310)
+#define __NR_epoll_create1 (__NR_Linux + 311)
+#define __NR_dup3 (__NR_Linux + 312)
+#define __NR_pipe2 (__NR_Linux + 313)
+#define __NR_inotify_init1 (__NR_Linux + 314)
-#define __NR_Linux_syscalls (__NR_timerfd_gettime + 1)
+#define __NR_Linux_syscalls (__NR_inotify_init1 + 1)
#define __IGNORE_select /* newselect */
diff --git a/include/asm-parisc/unwind.h b/arch/parisc/include/asm/unwind.h
similarity index 99%
rename from include/asm-parisc/unwind.h
rename to arch/parisc/include/asm/unwind.h
index 2f7e6e50a158..52482e4fc20d 100644
--- a/include/asm-parisc/unwind.h
+++ b/arch/parisc/include/asm/unwind.h
@@ -74,4 +74,6 @@ void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *r
int unwind_once(struct unwind_frame_info *info);
int unwind_to_user(struct unwind_frame_info *info);
+int unwind_init(void);
+
#endif
diff --git a/include/asm-parisc/user.h b/arch/parisc/include/asm/user.h
similarity index 100%
rename from include/asm-parisc/user.h
rename to arch/parisc/include/asm/user.h
diff --git a/include/asm-parisc/vga.h b/arch/parisc/include/asm/vga.h
similarity index 100%
rename from include/asm-parisc/vga.h
rename to arch/parisc/include/asm/vga.h
diff --git a/include/asm-parisc/xor.h b/arch/parisc/include/asm/xor.h
similarity index 100%
rename from include/asm-parisc/xor.h
rename to arch/parisc/include/asm/xor.h
diff --git a/arch/parisc/kernel/.gitignore b/arch/parisc/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/parisc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 3efc0b73e4ff..699cf8ef2118 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -290,5 +290,8 @@ int main(void)
DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
+ BLANK();
+ DEFINE(ASM_PDC_RESULT_SIZE, NUM_PDC_RESULT * sizeof(unsigned long));
+ BLANK();
return 0;
}
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 7177a6cd1b7f..03f26bd75bd8 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -71,8 +71,8 @@
#include /* for boot_cpu_data */
static DEFINE_SPINLOCK(pdc_lock);
-static unsigned long pdc_result[32] __attribute__ ((aligned (8)));
-static unsigned long pdc_result2[32] __attribute__ ((aligned (8)));
+extern unsigned long pdc_result[NUM_PDC_RESULT];
+extern unsigned long pdc_result2[NUM_PDC_RESULT];
#ifdef CONFIG_64BIT
#define WIDE_FIRMWARE 0x1
@@ -150,26 +150,40 @@ static void convert_to_wide(unsigned long *addr)
#endif
}
+#ifdef CONFIG_64BIT
+void __init set_firmware_width_unlocked(void)
+{
+ int ret;
+
+ ret = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES,
+ __pa(pdc_result), 0);
+ convert_to_wide(pdc_result);
+ if (pdc_result[0] != NARROW_FIRMWARE)
+ parisc_narrow_firmware = 0;
+}
+
/**
* set_firmware_width - Determine if the firmware is wide or narrow.
*
- * This function must be called before any pdc_* function that uses the convert_to_wide
- * function.
+ * This function must be called before any pdc_* function that uses the
+ * convert_to_wide function.
*/
void __init set_firmware_width(void)
{
-#ifdef CONFIG_64BIT
- int retval;
unsigned long flags;
-
- spin_lock_irqsave(&pdc_lock, flags);
- retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0);
- convert_to_wide(pdc_result);
- if(pdc_result[0] != NARROW_FIRMWARE)
- parisc_narrow_firmware = 0;
- spin_unlock_irqrestore(&pdc_lock, flags);
-#endif
+ spin_lock_irqsave(&pdc_lock, flags);
+ set_firmware_width_unlocked();
+ spin_unlock_irqrestore(&pdc_lock, flags);
}
+#else
+void __init set_firmware_width_unlocked(void) {
+ return;
+}
+
+void __init set_firmware_width(void) {
+ return;
+}
+#endif /*CONFIG_64BIT*/
/**
* pdc_emergency_unlock - Unlock the linux pdc lock
@@ -288,6 +302,20 @@ int pdc_chassis_warn(unsigned long *warn)
return retval;
}
+int __init pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info)
+{
+ int ret;
+
+ ret = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
+ convert_to_wide(pdc_result);
+ pdc_coproc_info->ccr_functional = pdc_result[0];
+ pdc_coproc_info->ccr_present = pdc_result[1];
+ pdc_coproc_info->revision = pdc_result[17];
+ pdc_coproc_info->model = pdc_result[18];
+
+ return ret;
+}
+
/**
* pdc_coproc_cfg - To identify coprocessors attached to the processor.
* @pdc_coproc_info: Return buffer address.
@@ -297,19 +325,14 @@ int pdc_chassis_warn(unsigned long *warn)
*/
int __init pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info)
{
- int retval;
+ int ret;
unsigned long flags;
- spin_lock_irqsave(&pdc_lock, flags);
- retval = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
- convert_to_wide(pdc_result);
- pdc_coproc_info->ccr_functional = pdc_result[0];
- pdc_coproc_info->ccr_present = pdc_result[1];
- pdc_coproc_info->revision = pdc_result[17];
- pdc_coproc_info->model = pdc_result[18];
- spin_unlock_irqrestore(&pdc_lock, flags);
+ spin_lock_irqsave(&pdc_lock, flags);
+ ret = pdc_coproc_cfg_unlocked(pdc_coproc_info);
+ spin_unlock_irqrestore(&pdc_lock, flags);
- return retval;
+ return ret;
}
/**
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index a84e31e82876..0e3d9f9b9e33 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -121,7 +121,7 @@ $pgt_fill_loop:
copy %r0,%r2
/* And the RFI Target address too */
- load32 start_kernel,%r11
+ load32 start_parisc,%r11
/* And the initial task pointer */
load32 init_thread_union,%r6
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 49c637970789..90904f9dfc50 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -4,6 +4,7 @@
* Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc.
* Copyright (C) 2000 Matthew Wilcox
* Copyright (C) 2000 David Huggins-Daines
+ * Copyright (C) 2008 Helge Deller
*/
#include
@@ -27,15 +28,149 @@
/* PSW bits we allow the debugger to modify */
#define USER_PSW_BITS (PSW_N | PSW_V | PSW_CB)
-#undef DEBUG_PTRACE
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+ task->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
-#ifdef DEBUG_PTRACE
-#define DBG(x...) printk(x)
-#else
-#define DBG(x...)
-#endif
+ /* make sure the trap bits are not set */
+ pa_psw(task)->r = 0;
+ pa_psw(task)->t = 0;
+ pa_psw(task)->h = 0;
+ pa_psw(task)->l = 0;
+}
-#ifdef CONFIG_64BIT
+/*
+ * The following functions are called by ptrace_resume() when
+ * enabling or disabling single/block tracing.
+ */
+void user_disable_single_step(struct task_struct *task)
+{
+ ptrace_disable(task);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+ task->ptrace &= ~PT_BLOCKSTEP;
+ task->ptrace |= PT_SINGLESTEP;
+
+ if (pa_psw(task)->n) {
+ struct siginfo si;
+
+ /* Nullified, just crank over the queue. */
+ task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1];
+ task_regs(task)->iasq[0] = task_regs(task)->iasq[1];
+ task_regs(task)->iaoq[1] = task_regs(task)->iaoq[0] + 4;
+ pa_psw(task)->n = 0;
+ pa_psw(task)->x = 0;
+ pa_psw(task)->y = 0;
+ pa_psw(task)->z = 0;
+ pa_psw(task)->b = 0;
+ ptrace_disable(task);
+ /* Don't wake up the task, but let the
+ parent know something happened. */
+ si.si_code = TRAP_TRACE;
+ si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3);
+ si.si_signo = SIGTRAP;
+ si.si_errno = 0;
+ force_sig_info(SIGTRAP, &si, task);
+ /* notify_parent(task, SIGCHLD); */
+ return;
+ }
+
+ /* Enable recovery counter traps. The recovery counter
+ * itself will be set to zero on a task switch. If the
+ * task is suspended on a syscall then the syscall return
+ * path will overwrite the recovery counter with a suitable
+ * value such that it traps once back in user space. We
+ * disable interrupts in the tasks PSW here also, to avoid
+ * interrupts while the recovery counter is decrementing.
+ */
+ pa_psw(task)->r = 1;
+ pa_psw(task)->t = 0;
+ pa_psw(task)->h = 0;
+ pa_psw(task)->l = 0;
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ task->ptrace &= ~PT_SINGLESTEP;
+ task->ptrace |= PT_BLOCKSTEP;
+
+ /* Enable taken branch trap. */
+ pa_psw(task)->r = 0;
+ pa_psw(task)->t = 1;
+ pa_psw(task)->h = 0;
+ pa_psw(task)->l = 0;
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+ unsigned long tmp;
+ long ret = -EIO;
+
+ switch (request) {
+
+ /* Read the word at location addr in the USER area. For ptraced
+ processes, the kernel saves all regs on a syscall. */
+ case PTRACE_PEEKUSR:
+ if ((addr & (sizeof(long)-1)) ||
+ (unsigned long) addr >= sizeof(struct pt_regs))
+ break;
+ tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
+ ret = put_user(tmp, (unsigned long *) data);
+ break;
+
+ /* Write the word at location addr in the USER area. This will need
+ to change when the kernel no longer saves all regs on a syscall.
+ FIXME. There is a problem at the moment in that r3-r18 are only
+ saved if the process is ptraced on syscall entry, and even then
+ those values are overwritten by actual register values on syscall
+ exit. */
+ case PTRACE_POKEUSR:
+ /* Some register values written here may be ignored in
+ * entry.S:syscall_restore_rfi; e.g. iaoq is written with
+ * r31/r31+4, and not with the values in pt_regs.
+ */
+ if (addr == PT_PSW) {
+ /* Allow writing to Nullify, Divide-step-correction,
+ * and carry/borrow bits.
+ * BEWARE, if you set N, and then single step, it won't
+ * stop on the nullified instruction.
+ */
+ data &= USER_PSW_BITS;
+ task_regs(child)->gr[0] &= ~USER_PSW_BITS;
+ task_regs(child)->gr[0] |= data;
+ ret = 0;
+ break;
+ }
+
+ if ((addr & (sizeof(long)-1)) ||
+ (unsigned long) addr >= sizeof(struct pt_regs))
+ break;
+ if ((addr >= PT_GR1 && addr <= PT_GR31) ||
+ addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
+ (addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
+ addr == PT_SAR) {
+ *(unsigned long *) ((char *) task_regs(child) + addr) = data;
+ ret = 0;
+ }
+ break;
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+
+ return ret;
+}
+
+
+#ifdef CONFIG_COMPAT
/* This function is needed to translate 32 bit pt_regs offsets in to
* 64 bit pt_regs offsets. For example, a 32 bit gdb under a 64 bit kernel
@@ -61,106 +196,25 @@ static long translate_usr_offset(long offset)
else
return -1;
}
-#endif
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure single step bits etc are not set.
- */
-void ptrace_disable(struct task_struct *child)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ compat_ulong_t addr, compat_ulong_t data)
{
- /* make sure the trap bits are not set */
- pa_psw(child)->r = 0;
- pa_psw(child)->t = 0;
- pa_psw(child)->h = 0;
- pa_psw(child)->l = 0;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
- long ret;
-#ifdef DEBUG_PTRACE
- long oaddr=addr, odata=data;
-#endif
+ compat_uint_t tmp;
+ long ret = -EIO;
switch (request) {
- case PTRACE_PEEKTEXT: /* read word at location addr. */
- case PTRACE_PEEKDATA: {
-#ifdef CONFIG_64BIT
- if (__is_compat_task(child)) {
- int copied;
- unsigned int tmp;
- addr &= 0xffffffffL;
- copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
- ret = -EIO;
- if (copied != sizeof(tmp))
- goto out_tsk;
- ret = put_user(tmp,(unsigned int *) data);
- DBG("sys_ptrace(PEEK%s, %d, %lx, %lx) returning %ld, data %x\n",
- request == PTRACE_PEEKTEXT ? "TEXT" : "DATA",
- pid, oaddr, odata, ret, tmp);
- }
- else
-#endif
- ret = generic_ptrace_peekdata(child, addr, data);
- goto out_tsk;
- }
+ case PTRACE_PEEKUSR:
+ if (addr & (sizeof(compat_uint_t)-1))
+ break;
+ addr = translate_usr_offset(addr);
+ if (addr < 0)
+ break;
- /* when I and D space are separate, this will have to be fixed. */
- case PTRACE_POKETEXT: /* write the word at location addr. */
- case PTRACE_POKEDATA:
- ret = 0;
-#ifdef CONFIG_64BIT
- if (__is_compat_task(child)) {
- unsigned int tmp = (unsigned int)data;
- DBG("sys_ptrace(POKE%s, %d, %lx, %lx)\n",
- request == PTRACE_POKETEXT ? "TEXT" : "DATA",
- pid, oaddr, odata);
- addr &= 0xffffffffL;
- if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) == sizeof(tmp))
- goto out_tsk;
- }
- else
-#endif
- {
- if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
- goto out_tsk;
- }
- ret = -EIO;
- goto out_tsk;
-
- /* Read the word at location addr in the USER area. For ptraced
- processes, the kernel saves all regs on a syscall. */
- case PTRACE_PEEKUSR: {
- ret = -EIO;
-#ifdef CONFIG_64BIT
- if (__is_compat_task(child)) {
- unsigned int tmp;
-
- if (addr & (sizeof(int)-1))
- goto out_tsk;
- if ((addr = translate_usr_offset(addr)) < 0)
- goto out_tsk;
-
- tmp = *(unsigned int *) ((char *) task_regs(child) + addr);
- ret = put_user(tmp, (unsigned int *) data);
- DBG("sys_ptrace(PEEKUSR, %d, %lx, %lx) returning %ld, addr %lx, data %x\n",
- pid, oaddr, odata, ret, addr, tmp);
- }
- else
-#endif
- {
- unsigned long tmp;
-
- if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
- goto out_tsk;
- tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
- ret = put_user(tmp, (unsigned long *) data);
- }
- goto out_tsk;
- }
+ tmp = *(compat_uint_t *) ((char *) task_regs(child) + addr);
+ ret = put_user(tmp, (compat_uint_t *) (unsigned long) data);
+ break;
/* Write the word at location addr in the USER area. This will need
to change when the kernel no longer saves all regs on a syscall.
@@ -169,185 +223,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
those values are overwritten by actual register values on syscall
exit. */
case PTRACE_POKEUSR:
- ret = -EIO;
/* Some register values written here may be ignored in
* entry.S:syscall_restore_rfi; e.g. iaoq is written with
* r31/r31+4, and not with the values in pt_regs.
*/
- /* PT_PSW=0, so this is valid for 32 bit processes under 64
- * bit kernels.
- */
if (addr == PT_PSW) {
- /* PT_PSW=0, so this is valid for 32 bit processes
- * under 64 bit kernels.
- *
- * Allow writing to Nullify, Divide-step-correction,
- * and carry/borrow bits.
- * BEWARE, if you set N, and then single step, it won't
- * stop on the nullified instruction.
+ /* Since PT_PSW==0, it is valid for 32 bit processes
+ * under 64 bit kernels as well.
*/
- DBG("sys_ptrace(POKEUSR, %d, %lx, %lx)\n",
- pid, oaddr, odata);
- data &= USER_PSW_BITS;
- task_regs(child)->gr[0] &= ~USER_PSW_BITS;
- task_regs(child)->gr[0] |= data;
- ret = 0;
- goto out_tsk;
- }
-#ifdef CONFIG_64BIT
- if (__is_compat_task(child)) {
- if (addr & (sizeof(int)-1))
- goto out_tsk;
- if ((addr = translate_usr_offset(addr)) < 0)
- goto out_tsk;
- DBG("sys_ptrace(POKEUSR, %d, %lx, %lx) addr %lx\n",
- pid, oaddr, odata, addr);
+ ret = arch_ptrace(child, request, addr, data);
+ } else {
+ if (addr & (sizeof(compat_uint_t)-1))
+ break;
+ addr = translate_usr_offset(addr);
+ if (addr < 0)
+ break;
if (addr >= PT_FR0 && addr <= PT_FR31 + 4) {
/* Special case, fp regs are 64 bits anyway */
- *(unsigned int *) ((char *) task_regs(child) + addr) = data;
+ *(__u64 *) ((char *) task_regs(child) + addr) = data;
ret = 0;
}
else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) ||
addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 ||
addr == PT_SAR+4) {
/* Zero the top 32 bits */
- *(unsigned int *) ((char *) task_regs(child) + addr - 4) = 0;
- *(unsigned int *) ((char *) task_regs(child) + addr) = data;
+ *(__u32 *) ((char *) task_regs(child) + addr - 4) = 0;
+ *(__u32 *) ((char *) task_regs(child) + addr) = data;
ret = 0;
}
- goto out_tsk;
}
- else
-#endif
- {
- if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
- goto out_tsk;
- if ((addr >= PT_GR1 && addr <= PT_GR31) ||
- addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
- (addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
- addr == PT_SAR) {
- *(unsigned long *) ((char *) task_regs(child) + addr) = data;
- ret = 0;
- }
- goto out_tsk;
- }
-
- case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
- case PTRACE_CONT:
- ret = -EIO;
- DBG("sys_ptrace(%s)\n",
- request == PTRACE_SYSCALL ? "SYSCALL" : "CONT");
- if (!valid_signal(data))
- goto out_tsk;
- child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
- if (request == PTRACE_SYSCALL) {
- set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- } else {
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- }
- child->exit_code = data;
- goto out_wake_notrap;
-
- case PTRACE_KILL:
- /*
- * make the child exit. Best I can do is send it a
- * sigkill. perhaps it should be put in the status
- * that it wants to exit.
- */
- ret = 0;
- DBG("sys_ptrace(KILL)\n");
- if (child->exit_state == EXIT_ZOMBIE) /* already dead */
- goto out_tsk;
- child->exit_code = SIGKILL;
- goto out_wake_notrap;
-
- case PTRACE_SINGLEBLOCK:
- DBG("sys_ptrace(SINGLEBLOCK)\n");
- ret = -EIO;
- if (!valid_signal(data))
- goto out_tsk;
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- child->ptrace &= ~PT_SINGLESTEP;
- child->ptrace |= PT_BLOCKSTEP;
- child->exit_code = data;
-
- /* Enable taken branch trap. */
- pa_psw(child)->r = 0;
- pa_psw(child)->t = 1;
- pa_psw(child)->h = 0;
- pa_psw(child)->l = 0;
- goto out_wake;
-
- case PTRACE_SINGLESTEP:
- DBG("sys_ptrace(SINGLESTEP)\n");
- ret = -EIO;
- if (!valid_signal(data))
- goto out_tsk;
-
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- child->ptrace &= ~PT_BLOCKSTEP;
- child->ptrace |= PT_SINGLESTEP;
- child->exit_code = data;
-
- if (pa_psw(child)->n) {
- struct siginfo si;
-
- /* Nullified, just crank over the queue. */
- task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1];
- task_regs(child)->iasq[0] = task_regs(child)->iasq[1];
- task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4;
- pa_psw(child)->n = 0;
- pa_psw(child)->x = 0;
- pa_psw(child)->y = 0;
- pa_psw(child)->z = 0;
- pa_psw(child)->b = 0;
- ptrace_disable(child);
- /* Don't wake up the child, but let the
- parent know something happened. */
- si.si_code = TRAP_TRACE;
- si.si_addr = (void __user *) (task_regs(child)->iaoq[0] & ~3);
- si.si_signo = SIGTRAP;
- si.si_errno = 0;
- force_sig_info(SIGTRAP, &si, child);
- //notify_parent(child, SIGCHLD);
- //ret = 0;
- goto out_wake;
- }
-
- /* Enable recovery counter traps. The recovery counter
- * itself will be set to zero on a task switch. If the
- * task is suspended on a syscall then the syscall return
- * path will overwrite the recovery counter with a suitable
- * value such that it traps once back in user space. We
- * disable interrupts in the childs PSW here also, to avoid
- * interrupts while the recovery counter is decrementing.
- */
- pa_psw(child)->r = 1;
- pa_psw(child)->t = 0;
- pa_psw(child)->h = 0;
- pa_psw(child)->l = 0;
- /* give it a chance to run. */
- goto out_wake;
-
- case PTRACE_GETEVENTMSG:
- ret = put_user(child->ptrace_message, (unsigned int __user *) data);
- goto out_tsk;
+ break;
default:
- ret = ptrace_request(child, request, addr, data);
- goto out_tsk;
+ ret = compat_ptrace_request(child, request, addr, data);
+ break;
}
-out_wake_notrap:
- ptrace_disable(child);
-out_wake:
- wake_up_process(child);
- ret = 0;
-out_tsk:
- DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n",
- request, pid, oaddr, odata, ret);
return ret;
}
+#endif
+
void syscall_trace(void)
{
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
index 7a92695d95a6..5f3d3a1f9037 100644
--- a/arch/parisc/kernel/real2.S
+++ b/arch/parisc/kernel/real2.S
@@ -8,12 +8,24 @@
*
*/
+#include
#include
#include
+#include
#include
+
.section .bss
+
+ .export pdc_result
+ .export pdc_result2
+ .align 8
+pdc_result:
+ .block ASM_PDC_RESULT_SIZE
+pdc_result2:
+ .block ASM_PDC_RESULT_SIZE
+
.export real_stack
.export real32_stack
.export real64_stack
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 39e7c5a5946a..7d27853ff8c8 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -44,6 +44,7 @@
#include
#include
#include
+#include
static char __initdata command_line[COMMAND_LINE_SIZE];
@@ -123,6 +124,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_64BIT
extern int parisc_narrow_firmware;
#endif
+ unwind_init();
init_per_cpu(smp_processor_id()); /* Set Modes & Enable FP */
@@ -368,6 +370,31 @@ static int __init parisc_init(void)
return 0;
}
-
arch_initcall(parisc_init);
+void start_parisc(void)
+{
+ extern void start_kernel(void);
+
+ int ret, cpunum;
+ struct pdc_coproc_cfg coproc_cfg;
+
+ cpunum = smp_processor_id();
+
+ set_firmware_width_unlocked();
+
+ ret = pdc_coproc_cfg_unlocked(&coproc_cfg);
+ if (ret >= 0 && coproc_cfg.ccr_functional) {
+ mtctl(coproc_cfg.ccr_functional, 10);
+
+ cpu_data[cpunum].fp_rev = coproc_cfg.revision;
+ cpu_data[cpunum].fp_model = coproc_cfg.model;
+
+ asm volatile ("fstd %fr0,8(%sp)");
+ } else {
+ panic("must have an fpu to boot linux");
+ }
+
+ start_kernel();
+ // not reached
+}
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index c7e59f548817..303d2b647e41 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -87,7 +87,7 @@
ENTRY_SAME(setuid)
ENTRY_SAME(getuid)
ENTRY_COMP(stime) /* 25 */
- ENTRY_SAME(ptrace)
+ ENTRY_COMP(ptrace)
ENTRY_SAME(alarm)
/* see stat comment */
ENTRY_COMP(newfstat)
@@ -407,6 +407,12 @@
ENTRY_SAME(timerfd_create)
ENTRY_COMP(timerfd_settime)
ENTRY_COMP(timerfd_gettime)
+ ENTRY_COMP(signalfd4)
+ ENTRY_SAME(eventfd2) /* 310 */
+ ENTRY_SAME(epoll_create1)
+ ENTRY_SAME(dup3)
+ ENTRY_SAME(pipe2)
+ ENTRY_SAME(inotify_init1)
/* Nothing yet */
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 24be86bba94d..4d09203bc693 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -23,6 +23,7 @@
#include
#include
#include
+#include
#include
#include
@@ -215,6 +216,24 @@ void __init start_cpu_itimer(void)
cpu_data[cpu].it_value = next_tick;
}
+struct platform_device rtc_parisc_dev = {
+ .name = "rtc-parisc",
+ .id = -1,
+};
+
+static int __init rtc_init(void)
+{
+ int ret;
+
+ ret = platform_device_register(&rtc_parisc_dev);
+ if (ret < 0)
+ printk(KERN_ERR "unable to register rtc device...\n");
+
+ /* not necessarily an error */
+ return 0;
+}
+module_init(rtc_init);
+
void __init time_init(void)
{
static struct pdc_tod tod_data;
@@ -245,4 +264,3 @@ void __init time_init(void)
xtime.tv_nsec = 0;
}
}
-
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 701b2d2d8882..6773c582e457 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -170,7 +170,7 @@ void unwind_table_remove(struct unwind_table *table)
}
/* Called from setup_arch to import the kernel unwind info */
-static int unwind_init(void)
+int unwind_init(void)
{
long start, stop;
register unsigned long gp __asm__ ("r27");
@@ -417,5 +417,3 @@ int unwind_to_user(struct unwind_frame_info *info)
return ret;
}
-
-module_init(unwind_init);
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 64e144505f65..5ac51e6efc1d 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -10,9 +10,13 @@
* 2 of the License, or (at your option) any later version.
*/
+#ifndef __ASSEMBLY__
+#include
+#else
+#include
+#endif
#include
#include
-#include
/*
* On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index ae2ea803a0f2..9047af7baa69 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -74,6 +74,13 @@ struct pci_controller {
unsigned long pci_io_size;
#endif
+ /* Some machines have a special region to forward the ISA
+ * "memory" cycles such as VGA memory regions. Left to 0
+ * if unsupported
+ */
+ resource_size_t isa_mem_phys;
+ resource_size_t isa_mem_size;
+
struct pci_ops *ops;
unsigned int __iomem *cfg_addr;
void __iomem *cfg_data;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 0e52c7828ea4..39d547fde956 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -123,6 +123,16 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
#define HAVE_PCI_MMAP 1
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+ size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+ size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state);
+
+#define HAVE_PCI_LEGACY 1
+
#if defined(CONFIG_PPC64) || defined(CONFIG_NOT_COHERENT_CACHE)
/*
* For 64-bit kernels, pci_unmap_{single,page} is not a nop.
@@ -226,5 +236,6 @@ extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
extern void pcibios_do_bus_setup(struct pci_bus *bus);
extern void pcibios_fixup_of_probed_bus(struct pci_bus *bus);
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 734e0754fb9b..280a90cc9894 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -129,7 +129,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno,
#define CHECK_FULL_REGS(regs) \
do { \
if ((regs)->trap & 1) \
- printk(KERN_CRIT "%s: partial register set\n", __FUNCTION__); \
+ printk(KERN_CRIT "%s: partial register set\n", __func__); \
} while (0)
#endif /* __powerpc64__ */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 01ce8c38bae6..3815d84a1ef4 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -451,7 +451,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
pci_dev_put(pdev);
}
- DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
+ DBG("non-PCI map for %llx, prot: %lx\n",
+ (unsigned long long)offset, prot);
return __pgprot(prot);
}
@@ -490,6 +491,131 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
return ret;
}
+/* This provides legacy IO read access on a bus */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+ unsigned long offset;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct resource *rp = &hose->io_resource;
+ void __iomem *addr;
+
+ /* Check if port can be supported by that bus. We only check
+ * the ranges of the PHB though, not the bus itself as the rules
+ * for forwarding legacy cycles down bridges are not our problem
+ * here. So if the host bridge supports it, we do it.
+ */
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ offset += port;
+
+ if (!(rp->flags & IORESOURCE_IO))
+ return -ENXIO;
+ if (offset < rp->start || (offset + size) > rp->end)
+ return -ENXIO;
+ addr = hose->io_base_virt + port;
+
+ switch(size) {
+ case 1:
+ *((u8 *)val) = in_8(addr);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ *((u16 *)val) = in_le16(addr);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ *((u32 *)val) = in_le32(addr);
+ return 4;
+ }
+ return -EINVAL;
+}
+
+/* This provides legacy IO write access on a bus */
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+ unsigned long offset;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct resource *rp = &hose->io_resource;
+ void __iomem *addr;
+
+ /* Check if port can be supported by that bus. We only check
+ * the ranges of the PHB though, not the bus itself as the rules
+ * for forwarding legacy cycles down bridges are not our problem
+ * here. So if the host bridge supports it, we do it.
+ */
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ offset += port;
+
+ if (!(rp->flags & IORESOURCE_IO))
+ return -ENXIO;
+ if (offset < rp->start || (offset + size) > rp->end)
+ return -ENXIO;
+ addr = hose->io_base_virt + port;
+
+ /* WARNING: The generic code is idiotic. It gets passed a pointer
+ * to what can be a 1, 2 or 4 byte quantity and always reads that
+ * as a u32, which means that we have to correct the location of
+ * the data read within those 32 bits for size 1 and 2
+ */
+ switch(size) {
+ case 1:
+ out_8(addr, val >> 24);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ out_le16(addr, val >> 16);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ out_le32(addr, val);
+ return 4;
+ }
+ return -EINVAL;
+}
+
+/* This provides legacy IO or memory mmap access on a bus */
+int pci_mmap_legacy_page_range(struct pci_bus *bus,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ resource_size_t offset =
+ ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+ resource_size_t size = vma->vm_end - vma->vm_start;
+ struct resource *rp;
+
+ pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n",
+ pci_domain_nr(bus), bus->number,
+ mmap_state == pci_mmap_mem ? "MEM" : "IO",
+ (unsigned long long)offset,
+ (unsigned long long)(offset + size - 1));
+
+ if (mmap_state == pci_mmap_mem) {
+ if ((offset + size) > hose->isa_mem_size)
+ return -ENXIO;
+ offset += hose->isa_mem_phys;
+ } else {
+ unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ unsigned long roffset = offset + io_offset;
+ rp = &hose->io_resource;
+ if (!(rp->flags & IORESOURCE_IO))
+ return -ENXIO;
+ if (roffset < rp->start || (roffset + size) > rp->end)
+ return -ENXIO;
+ offset += hose->io_base_phys;
+ }
+ pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset);
+
+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ vma->vm_page_prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+ return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
void pci_resource_to_user(const struct pci_dev *dev, int bar,
const struct resource *rsrc,
resource_size_t *start, resource_size_t *end)
@@ -592,6 +718,12 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
cpu_addr = of_translate_address(dev, ranges + 3);
size = of_read_number(ranges + pna + 3, 2);
ranges += np;
+
+ /* If we failed translation or got a zero-sized region
+ * (some FW try to feed us with non sensical zero sized regions
+ * such as power3 which look like some kind of attempt at exposing
+ * the VGA memory hole)
+ */
if (cpu_addr == OF_BAD_ADDR || size == 0)
continue;
@@ -665,6 +797,8 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
isa_hole = memno;
if (primary || isa_mem_base == 0)
isa_mem_base = cpu_addr;
+ hose->isa_mem_phys = cpu_addr;
+ hose->isa_mem_size = size;
}
/* We get the PCI/Mem offset from the first range or
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 92d20e993ede..2ece399f2862 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -232,6 +232,7 @@ static void __exit sputrace_exit(void)
remove_proc_entry("sputrace", NULL);
kfree(sputrace_log);
+ marker_synchronize_unregister();
}
module_init(sputrace_init);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 49349ba77d80..5b9b12321ad1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,6 +26,7 @@ config X86
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_KRETPROBES
+ select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
@@ -1242,14 +1243,6 @@ config EFI
resultant kernel should continue to boot on existing non-EFI
platforms.
-config IRQBALANCE
- def_bool y
- prompt "Enable kernel irq balancing"
- depends on X86_32 && SMP && X86_IO_APIC
- help
- The default yes will allow the kernel to do irq load balancing.
- Saying no will keep the kernel from doing irq load balancing.
-
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 52d0359719d7..13b8c86ae985 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_X86_PAT=y
CONFIG_EFI=y
-# CONFIG_IRQBALANCE is not set
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0d41f0343dc0..d7e5a58ee22f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp)
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
-obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o
+obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
@@ -60,8 +60,8 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o
obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o
-obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
+obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
- obj-y += bios_uv.o
+ obj-y += bios_uv.o uv_irq.o uv_sysfs.o
obj-y += genx2apic_cluster.o
obj-y += genx2apic_phys.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index eb875cdc7367..0d1c26a583c5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1256,7 +1256,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
count =
acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
- NR_IRQ_VECTORS);
+ nr_irqs);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing interrupt source overrides entry\n");
@@ -1276,7 +1276,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
count =
acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
- NR_IRQ_VECTORS);
+ nr_irqs);
if (count < 0) {
printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
/* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 426e5d91b63a..c44cd6dbfa14 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
#include "realmode/wakeup.h"
#include "sleep.h"
@@ -98,6 +99,8 @@ int acpi_save_state_mem(void)
header->trampoline_segment = setup_trampoline() >> 4;
#ifdef CONFIG_SMP
stack_start.sp = temp_stack + 4096;
+ early_gdt_descr.address =
+ (unsigned long)get_cpu_gdt_table(smp_processor_id());
#endif
initial_code = (unsigned long)wakeup_long64;
saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic.c
similarity index 79%
rename from arch/x86/kernel/apic_32.c
rename to arch/x86/kernel/apic.c
index 21c831d96af3..04a7f960bbc0 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic.c
@@ -23,11 +23,13 @@
#include
#include
#include
+#include
#include
#include
#include
#include
#include
+#include
#include
#include
@@ -36,8 +38,14 @@
#include
#include
#include
+#include
#include
#include
+#include
+#include
+#include
+#include
+#include
#include
#include
@@ -50,16 +58,58 @@
# error SPURIOUS_APIC_VECTOR definition error
#endif
-unsigned long mp_lapic_addr;
-
+#ifdef CONFIG_X86_32
/*
* Knob to control our willingness to enable the local APIC.
*
* +1=force-enable
*/
static int force_enable_local_apic;
-int disable_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+ force_enable_local_apic = 1;
+ return 0;
+}
+early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+#endif
+
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+ apic_calibrate_pmtmr = 1;
+ notsc_setup(NULL);
+ return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+#ifdef CONFIG_X86_64
+#define HAVE_X2APIC
+#endif
+
+#ifdef HAVE_X2APIC
+int x2apic;
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
+int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+ disable_x2apic = 1;
+ setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+ return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
+
+unsigned long mp_lapic_addr;
+int disable_apic;
/* Disable local APIC timer from the kernel commandline or via dmi quirk */
static int disable_apic_timer __cpuinitdata;
/* Local APIC timer works in C2 */
@@ -110,9 +160,6 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
static unsigned long apic_phys;
/*
@@ -202,6 +249,42 @@ static struct apic_ops xapic_ops = {
struct apic_ops __read_mostly *apic_ops = &xapic_ops;
EXPORT_SYMBOL_GPL(apic_ops);
+#ifdef HAVE_X2APIC
+static void x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+ wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+ unsigned long val;
+
+ rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+ return val;
+}
+
+static struct apic_ops x2apic_ops = {
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = x2apic_icr_read,
+ .icr_write = x2apic_icr_write,
+ .wait_icr_idle = x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+#endif
+
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
@@ -219,6 +302,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
apic_write(APIC_LVT0, v);
}
+#ifdef CONFIG_X86_32
/**
* get_physical_broadcast - Get number of physical broadcast IDs
*/
@@ -226,6 +310,7 @@ int get_physical_broadcast(void)
{
return modern_apic() ? 0xff : 0xf;
}
+#endif
/**
* lapic_get_maxlvt - get the maximum number of local vector table entries
@@ -247,11 +332,7 @@ int lapic_get_maxlvt(void)
*/
/* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
#define APIC_DIVISOR 16
-#endif
/*
* This function sets up the local APIC timer, with a timeout of
@@ -383,7 +464,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
* Setup the local APIC timer for this CPU. Copy the initilized values
* of the boot CPU and register the clock event in the framework.
*/
-static void __devinit setup_APIC_timer(void)
+static void __cpuinit setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
@@ -453,14 +534,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
}
}
+static int __init calibrate_by_pmtimer(long deltapm, long *delta)
+{
+ const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+ const long pm_thresh = pm_100ms / 100;
+ unsigned long mult;
+ u64 res;
+
+#ifndef CONFIG_X86_PM_TIMER
+ return -1;
+#endif
+
+ apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+
+ /* Check, if the PM timer is available */
+ if (!deltapm)
+ return -1;
+
+ mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+ if (deltapm > (pm_100ms - pm_thresh) &&
+ deltapm < (pm_100ms + pm_thresh)) {
+ apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+ } else {
+ res = (((u64)deltapm) * mult) >> 22;
+ do_div(res, 1000000);
+ printk(KERN_WARNING "APIC calibration not consistent "
+ "with PM Timer: %ldms instead of 100ms\n",
+ (long)res);
+ /* Correct the lapic counter value */
+ res = (((u64)(*delta)) * pm_100ms);
+ do_div(res, deltapm);
+ printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+ "%lu (%ld)\n", (unsigned long)res, *delta);
+ *delta = (long)res;
+ }
+
+ return 0;
+}
+
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
- const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
- const long pm_thresh = pm_100ms/100;
void (*real_handler)(struct clock_event_device *dev);
unsigned long deltaj;
- long delta, deltapm;
+ long delta;
int pm_referenced = 0;
local_irq_disable();
@@ -470,10 +588,10 @@ static int __init calibrate_APIC_clock(void)
global_clock_event->event_handler = lapic_cal_handler;
/*
- * Setup the APIC counter to 1e9. There is no way the lapic
+ * Setup the APIC counter to maximum. There is no way the lapic
* can underflow in the 100ms detection time frame
*/
- __setup_APIC_LVTT(1000000000, 0, 0);
+ __setup_APIC_LVTT(0xffffffff, 0, 0);
/* Let the interrupts run */
local_irq_enable();
@@ -490,34 +608,9 @@ static int __init calibrate_APIC_clock(void)
delta = lapic_cal_t1 - lapic_cal_t2;
apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
- /* Check, if the PM timer is available */
- deltapm = lapic_cal_pm2 - lapic_cal_pm1;
- apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
-
- if (deltapm) {
- unsigned long mult;
- u64 res;
-
- mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
- if (deltapm > (pm_100ms - pm_thresh) &&
- deltapm < (pm_100ms + pm_thresh)) {
- apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
- } else {
- res = (((u64) deltapm) * mult) >> 22;
- do_div(res, 1000000);
- printk(KERN_WARNING "APIC calibration not consistent "
- "with PM Timer: %ldms instead of 100ms\n",
- (long)res);
- /* Correct the lapic counter value */
- res = (((u64) delta) * pm_100ms);
- do_div(res, deltapm);
- printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
- "%lu (%ld)\n", (unsigned long) res, delta);
- delta = (long) res;
- }
- pm_referenced = 1;
- }
+ /* we trust the PM based calibration if possible */
+ pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+ &delta);
/* Calculate the scaled math multiplication factor */
lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -559,7 +652,10 @@ static int __init calibrate_APIC_clock(void)
levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
- /* We trust the pm timer based calibration */
+ /*
+ * PM timer calibration failed or not turned on
+ * so lets try APIC timer based calibration
+ */
if (!pm_referenced) {
apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
@@ -652,7 +748,7 @@ void __init setup_boot_APIC_clock(void)
setup_APIC_timer();
}
-void __devinit setup_secondary_APIC_clock(void)
+void __cpuinit setup_secondary_APIC_clock(void)
{
setup_APIC_timer();
}
@@ -718,6 +814,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
+#ifdef CONFIG_X86_64
+ exit_idle();
+#endif
irq_enter();
local_apic_timer_interrupt();
irq_exit();
@@ -991,40 +1090,43 @@ void __init init_bsp_APIC(void)
static void __cpuinit lapic_setup_esr(void)
{
- unsigned long oldvalue, value, maxlvt;
- if (lapic_is_integrated() && !esr_disable) {
- if (esr_disable) {
- /*
- * Something untraceable is creating bad interrupts on
- * secondary quads ... for the moment, just leave the
- * ESR disabled - we can't do anything useful with the
- * errors anyway - mbligh
- */
- printk(KERN_INFO "Leaving ESR disabled.\n");
- return;
- }
- /* !82489DX */
- maxlvt = lapic_get_maxlvt();
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- oldvalue = apic_read(APIC_ESR);
+ unsigned int oldvalue, value, maxlvt;
- /* enables sending errors */
- value = ERROR_APIC_VECTOR;
- apic_write(APIC_LVTERR, value);
- /*
- * spec says clear errors after enabling vector.
- */
- if (maxlvt > 3)
- apic_write(APIC_ESR, 0);
- value = apic_read(APIC_ESR);
- if (value != oldvalue)
- apic_printk(APIC_VERBOSE, "ESR value before enabling "
- "vector: 0x%08lx after: 0x%08lx\n",
- oldvalue, value);
- } else {
+ if (!lapic_is_integrated()) {
printk(KERN_INFO "No ESR for 82489DX.\n");
+ return;
}
+
+ if (esr_disable) {
+ /*
+ * Something untraceable is creating bad interrupts on
+ * secondary quads ... for the moment, just leave the
+ * ESR disabled - we can't do anything useful with the
+ * errors anyway - mbligh
+ */
+ printk(KERN_INFO "Leaving ESR disabled.\n");
+ return;
+ }
+
+ maxlvt = lapic_get_maxlvt();
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ oldvalue = apic_read(APIC_ESR);
+
+ /* enables sending errors */
+ value = ERROR_APIC_VECTOR;
+ apic_write(APIC_LVTERR, value);
+
+ /*
+ * spec says clear errors after enabling vector.
+ */
+ if (maxlvt > 3)
+ apic_write(APIC_ESR, 0);
+ value = apic_read(APIC_ESR);
+ if (value != oldvalue)
+ apic_printk(APIC_VERBOSE, "ESR value before enabling "
+ "vector: 0x%08x after: 0x%08x\n",
+ oldvalue, value);
}
@@ -1033,24 +1135,27 @@ static void __cpuinit lapic_setup_esr(void)
*/
void __cpuinit setup_local_APIC(void)
{
- unsigned long value, integrated;
+ unsigned int value;
int i, j;
+#ifdef CONFIG_X86_32
/* Pound the ESR really hard over the head with a big hammer - mbligh */
- if (esr_disable) {
+ if (lapic_is_integrated() && esr_disable) {
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
}
+#endif
- integrated = lapic_is_integrated();
+ preempt_disable();
/*
* Double-check whether this APIC is really registered.
+ * This is meaningless in clustered apic mode, so we skip it.
*/
if (!apic_id_registered())
- WARN_ON_ONCE(1);
+ BUG();
/*
* Intel recommends to set DFR, LDR and TPR before enabling
@@ -1096,6 +1201,7 @@ void __cpuinit setup_local_APIC(void)
*/
value |= APIC_SPIV_APIC_ENABLED;
+#ifdef CONFIG_X86_32
/*
* Some unknown Intel IO/APIC (or APIC) errata is biting us with
* certain networking cards. If high frequency interrupts are
@@ -1116,8 +1222,13 @@ void __cpuinit setup_local_APIC(void)
* See also the comment in end_level_ioapic_irq(). --macro
*/
- /* Enable focus processor (bit==0) */
+ /*
+ * - enable focus processor (bit==0)
+ * - 64bit mode always use processor focus
+ * so no need to set it
+ */
value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
/*
* Set spurious IRQ vector
@@ -1154,9 +1265,11 @@ void __cpuinit setup_local_APIC(void)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
- if (!integrated) /* 82489DX */
+ if (!lapic_is_integrated()) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write(APIC_LVT1, value);
+
+ preempt_enable();
}
void __cpuinit end_local_APIC_setup(void)
@@ -1177,6 +1290,153 @@ void __cpuinit end_local_APIC_setup(void)
apic_pm_activate();
}
+#ifdef HAVE_X2APIC
+void check_x2apic(void)
+{
+ int msr, msr2;
+
+ rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+ if (msr & X2APIC_ENABLE) {
+ printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+ x2apic_preenabled = x2apic = 1;
+ apic_ops = &x2apic_ops;
+ }
+}
+
+void enable_x2apic(void)
+{
+ int msr, msr2;
+
+ rdmsr(MSR_IA32_APICBASE, msr, msr2);
+ if (!(msr & X2APIC_ENABLE)) {
+ printk("Enabling x2apic\n");
+ wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+ }
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+ int ret;
+ unsigned long flags;
+
+ if (!cpu_has_x2apic)
+ return;
+
+ if (!x2apic_preenabled && disable_x2apic) {
+ printk(KERN_INFO
+ "Skipped enabling x2apic and Interrupt-remapping "
+ "because of nox2apic\n");
+ return;
+ }
+
+ if (x2apic_preenabled && disable_x2apic)
+ panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+ if (!x2apic_preenabled && skip_ioapic_setup) {
+ printk(KERN_INFO
+ "Skipped enabling x2apic and Interrupt-remapping "
+ "because of skipping io-apic setup\n");
+ return;
+ }
+
+ ret = dmar_table_init();
+ if (ret) {
+ printk(KERN_INFO
+ "dmar_table_init() failed with %d:\n", ret);
+
+ if (x2apic_preenabled)
+ panic("x2apic enabled by bios. But IR enabling failed");
+ else
+ printk(KERN_INFO
+ "Not enabling x2apic,Intr-remapping\n");
+ return;
+ }
+
+ local_irq_save(flags);
+ mask_8259A();
+
+ ret = save_mask_IO_APIC_setup();
+ if (ret) {
+ printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+ goto end;
+ }
+
+ ret = enable_intr_remapping(1);
+
+ if (ret && x2apic_preenabled) {
+ local_irq_restore(flags);
+ panic("x2apic enabled by bios. But IR enabling failed");
+ }
+
+ if (ret)
+ goto end_restore;
+
+ if (!x2apic) {
+ x2apic = 1;
+ apic_ops = &x2apic_ops;
+ enable_x2apic();
+ }
+
+end_restore:
+ if (ret)
+ /*
+ * IR enabling failed
+ */
+ restore_IO_APIC_setup();
+ else
+ reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+end:
+ unmask_8259A();
+ local_irq_restore(flags);
+
+ if (!ret) {
+ if (!x2apic_preenabled)
+ printk(KERN_INFO
+ "Enabled x2apic and interrupt-remapping\n");
+ else
+ printk(KERN_INFO
+ "Enabled Interrupt-remapping\n");
+ } else
+ printk(KERN_ERR
+ "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+ if (!cpu_has_x2apic)
+ return;
+
+ if (x2apic_preenabled)
+ panic("x2apic enabled prior OS handover,"
+ " enable CONFIG_INTR_REMAP");
+
+ printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+ " and x2apic\n");
+#endif
+
+ return;
+}
+#endif /* HAVE_X2APIC */
+
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+ if (!cpu_has_apic) {
+ printk(KERN_INFO "No local APIC present\n");
+ return -1;
+ }
+
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+ boot_cpu_physical_apicid = 0;
+ return 0;
+}
+#else
/*
* Detect and initialize APIC
*/
@@ -1255,12 +1515,46 @@ no_apic:
printk(KERN_INFO "No local APIC present or hardware disabled\n");
return -1;
}
+#endif
+
+#ifdef CONFIG_X86_64
+void __init early_init_lapic_mapping(void)
+{
+ unsigned long phys_addr;
+
+ /*
+ * If no local APIC can be found then go out
+ * : it means there is no mpatable and MADT
+ */
+ if (!smp_found_config)
+ return;
+
+ phys_addr = mp_lapic_addr;
+
+ set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+ apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+ APIC_BASE, phys_addr);
+
+ /*
+ * Fetch the APIC ID of the BSP in case we have a
+ * default configuration (or the MP table is broken).
+ */
+ boot_cpu_physical_apicid = read_apic_id();
+}
+#endif
/**
* init_apic_mappings - initialize APIC mappings
*/
void __init init_apic_mappings(void)
{
+#ifdef HAVE_X2APIC
+ if (x2apic) {
+ boot_cpu_physical_apicid = read_apic_id();
+ return;
+ }
+#endif
+
/*
* If no local APIC can be found then set up a fake all
* zeroes page to simulate the local APIC and another
@@ -1273,8 +1567,8 @@ void __init init_apic_mappings(void)
apic_phys = mp_lapic_addr;
set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
- printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
- apic_phys);
+ apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+ APIC_BASE, apic_phys);
/*
* Fetch the APIC ID of the BSP in case we have a
@@ -1282,18 +1576,27 @@ void __init init_apic_mappings(void)
*/
if (boot_cpu_physical_apicid == -1U)
boot_cpu_physical_apicid = read_apic_id();
-
}
/*
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
*/
-
int apic_version[MAX_APICS];
int __init APIC_init_uniprocessor(void)
{
+#ifdef CONFIG_X86_64
+ if (disable_apic) {
+ printk(KERN_INFO "Apic disabled\n");
+ return -1;
+ }
+ if (!cpu_has_apic) {
+ disable_apic = 1;
+ printk(KERN_INFO "Apic disabled by BIOS\n");
+ return -1;
+ }
+#else
if (!smp_found_config && !cpu_has_apic)
return -1;
@@ -1302,39 +1605,68 @@ int __init APIC_init_uniprocessor(void)
*/
if (!cpu_has_apic &&
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
- printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+ printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
boot_cpu_physical_apicid);
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
return -1;
}
+#endif
+
+#ifdef HAVE_X2APIC
+ enable_IR_x2apic();
+#endif
+#ifdef CONFIG_X86_64
+ setup_apic_routing();
+#endif
verify_local_APIC();
-
connect_bsp_APIC();
+#ifdef CONFIG_X86_64
+ apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
/*
* Hack: In case of kdump, after a crash, kernel might be booting
* on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
* might be zero if read from MP tables. Get it from LAPIC.
*/
-#ifdef CONFIG_CRASH_DUMP
+# ifdef CONFIG_CRASH_DUMP
boot_cpu_physical_apicid = read_apic_id();
+# endif
#endif
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-
setup_local_APIC();
+#ifdef CONFIG_X86_64
+ /*
+ * Now enable IO-APICs, actually call clear_IO_APIC
+ * We need clear_IO_APIC before enabling vector on BP
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ enable_IO_APIC();
+#endif
+
#ifdef CONFIG_X86_IO_APIC
if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
#endif
localise_nmi_watchdog();
end_local_APIC_setup();
+
#ifdef CONFIG_X86_IO_APIC
- if (smp_found_config)
- if (!skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
+ if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+# ifdef CONFIG_X86_64
+ else
+ nr_ioapics = 0;
+# endif
#endif
+
+#ifdef CONFIG_X86_64
+ setup_boot_APIC_clock();
+ check_nmi_watchdog();
+#else
setup_boot_clock();
+#endif
return 0;
}
@@ -1348,8 +1680,11 @@ int __init APIC_init_uniprocessor(void)
*/
void smp_spurious_interrupt(struct pt_regs *regs)
{
- unsigned long v;
+ u32 v;
+#ifdef CONFIG_X86_64
+ exit_idle();
+#endif
irq_enter();
/*
* Check if this really is a spurious interrupt and ACK it
@@ -1360,10 +1695,14 @@ void smp_spurious_interrupt(struct pt_regs *regs)
if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
ack_APIC_irq();
+#ifdef CONFIG_X86_64
+ add_pda(irq_spurious_count, 1);
+#else
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
"should never happen.\n", smp_processor_id());
__get_cpu_var(irq_stat).irq_spurious_count++;
+#endif
irq_exit();
}
@@ -1372,8 +1711,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
*/
void smp_error_interrupt(struct pt_regs *regs)
{
- unsigned long v, v1;
+ u32 v, v1;
+#ifdef CONFIG_X86_64
+ exit_idle();
+#endif
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
@@ -1392,7 +1734,7 @@ void smp_error_interrupt(struct pt_regs *regs)
6: Received illegal vector
7: Illegal register address
*/
- printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+ printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
smp_processor_id(), v , v1);
irq_exit();
}
@@ -1565,6 +1907,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
cpu_set(cpu, cpu_present_map);
}
+#ifdef CONFIG_X86_64
+int hard_smp_processor_id(void)
+{
+ return read_apic_id();
+}
+#endif
+
/*
* Power management
*/
@@ -1640,7 +1989,7 @@ static int lapic_resume(struct sys_device *dev)
local_irq_save(flags);
-#ifdef CONFIG_X86_64
+#ifdef HAVE_X2APIC
if (x2apic)
enable_x2apic();
else
@@ -1702,7 +2051,7 @@ static struct sys_device device_lapic = {
.cls = &lapic_sysclass,
};
-static void __devinit apic_pm_activate(void)
+static void __cpuinit apic_pm_activate(void)
{
apic_pm_state.active = 1;
}
@@ -1728,16 +2077,87 @@ static void apic_pm_activate(void) { }
#endif /* CONFIG_PM */
+#ifdef CONFIG_X86_64
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ *
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis. Use available data to take a good guess.
+ * If in doubt, go HPET.
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+ int i, clusters, zeros;
+ unsigned id;
+ u16 *bios_cpu_apicid;
+ DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+
+ /*
+ * there is not this kind of box with AMD CPU yet.
+ * Some AMD box with quadcore cpu and 8 sockets apicid
+ * will be [4, 0x23] or [8, 0x27] could be thought to
+ * vsmp box still need checking...
+ */
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+ return 0;
+
+ bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+ bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+
+ for (i = 0; i < NR_CPUS; i++) {
+ /* are we being called early in kernel startup? */
+ if (bios_cpu_apicid) {
+ id = bios_cpu_apicid[i];
+ }
+ else if (i < nr_cpu_ids) {
+ if (cpu_present(i))
+ id = per_cpu(x86_bios_cpu_apicid, i);
+ else
+ continue;
+ }
+ else
+ break;
+
+ if (id != BAD_APICID)
+ __set_bit(APIC_CLUSTERID(id), clustermap);
+ }
+
+ /* Problem: Partially populated chassis may not have CPUs in some of
+ * the APIC clusters they have been allocated. Only present CPUs have
+ * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+ * Since clusters are allocated sequentially, count zeros only if
+ * they are bounded by ones.
+ */
+ clusters = 0;
+ zeros = 0;
+ for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+ if (test_bit(i, clustermap)) {
+ clusters += 1 + zeros;
+ zeros = 0;
+ } else
+ ++zeros;
+ }
+
+ /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+ * not guaranteed to be synced between boards
+ */
+ if (is_vsmp_box() && clusters > 1)
+ return 1;
+
+ /*
+ * If clusters > 2, then should be multi-chassis.
+ * May have to revisit this when multi-core + hyperthreaded CPUs come
+ * out, but AFAIK this will work even for them.
+ */
+ return (clusters > 2);
+}
+#endif
+
/*
* APIC command line parameters
*/
-static int __init parse_lapic(char *arg)
-{
- force_enable_local_apic = 1;
- return 0;
-}
-early_param("lapic", parse_lapic);
-
static int __init setup_disableapic(char *arg)
{
disable_apic = 1;
@@ -1779,7 +2199,6 @@ static int __init apic_set_verbosity(char *arg)
if (!arg) {
#ifdef CONFIG_X86_64
skip_ioapic_setup = 0;
- ioapic_force = 1;
return 0;
#endif
return -EINVAL;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
deleted file mode 100644
index 94ddb69ae15e..000000000000
--- a/arch/x86/kernel/apic_64.c
+++ /dev/null
@@ -1,1848 +0,0 @@
-/*
- * Local APIC handling, local APIC timers
- *
- * (c) 1999, 2000 Ingo Molnar
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively.
- * Maciej W. Rozycki : Various updates and fixes.
- * Mikael Pettersson : Power Management for UP-APIC.
- * Pavel Machek and
- * Mikael Pettersson : PM converted to driver model.
- */
-
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
-static int apic_calibrate_pmtmr __initdata;
-int disable_apic;
-int disable_x2apic;
-int x2apic;
-
-/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-
-/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-
-/*
- * Debug level, exported for io_apic.c
- */
-unsigned int apic_verbosity;
-
-/* Have we found an MP table */
-int smp_found_config;
-
-static struct resource lapic_resource = {
- .name = "Local APIC",
- .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
-static unsigned int calibration_result;
-
-static int lapic_next_event(unsigned long delta,
- struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
- struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
-static void apic_pm_activate(void);
-
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
- .name = "lapic",
- .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
- | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
- .shift = 32,
- .set_mode = lapic_timer_setup,
- .set_next_event = lapic_next_event,
- .broadcast = lapic_timer_broadcast,
- .rating = 100,
- .irq = -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
-static unsigned long apic_phys;
-
-unsigned long mp_lapic_addr;
-
-/*
- * Get the LAPIC version
- */
-static inline int lapic_get_version(void)
-{
- return GET_APIC_VERSION(apic_read(APIC_LVR));
-}
-
-/*
- * Check, if the APIC is integrated or a separate chip
- */
-static inline int lapic_is_integrated(void)
-{
-#ifdef CONFIG_X86_64
- return 1;
-#else
- return APIC_INTEGRATED(lapic_get_version());
-#endif
-}
-
-/*
- * Check, whether this is a modern or a first generation APIC
- */
-static int modern_apic(void)
-{
- /* AMD systems use old APIC versions, so check the CPU */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 >= 0xf)
- return 1;
- return lapic_get_version() >= 0x14;
-}
-
-/*
- * Paravirt kernels also might be using these below ops. So we still
- * use generic apic_read()/apic_write(), which might be pointing to different
- * ops in PARAVIRT case.
- */
-void xapic_wait_icr_idle(void)
-{
- while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
- cpu_relax();
-}
-
-u32 safe_xapic_wait_icr_idle(void)
-{
- u32 send_status;
- int timeout;
-
- timeout = 0;
- do {
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- if (!send_status)
- break;
- udelay(100);
- } while (timeout++ < 1000);
-
- return send_status;
-}
-
-void xapic_icr_write(u32 low, u32 id)
-{
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
- apic_write(APIC_ICR, low);
-}
-
-u64 xapic_icr_read(void)
-{
- u32 icr1, icr2;
-
- icr2 = apic_read(APIC_ICR2);
- icr1 = apic_read(APIC_ICR);
-
- return icr1 | ((u64)icr2 << 32);
-}
-
-static struct apic_ops xapic_ops = {
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .icr_read = xapic_icr_read,
- .icr_write = xapic_icr_write,
- .wait_icr_idle = xapic_wait_icr_idle,
- .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
-};
-
-struct apic_ops __read_mostly *apic_ops = &xapic_ops;
-EXPORT_SYMBOL_GPL(apic_ops);
-
-static void x2apic_wait_icr_idle(void)
-{
- /* no need to wait for icr idle in x2apic */
- return;
-}
-
-static u32 safe_x2apic_wait_icr_idle(void)
-{
- /* no need to wait for icr idle in x2apic */
- return 0;
-}
-
-void x2apic_icr_write(u32 low, u32 id)
-{
- wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
-}
-
-u64 x2apic_icr_read(void)
-{
- unsigned long val;
-
- rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
- return val;
-}
-
-static struct apic_ops x2apic_ops = {
- .read = native_apic_msr_read,
- .write = native_apic_msr_write,
- .icr_read = x2apic_icr_read,
- .icr_write = x2apic_icr_write,
- .wait_icr_idle = x2apic_wait_icr_idle,
- .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
-};
-
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
- unsigned int v;
-
- /* unmask and set to NMI */
- v = APIC_DM_NMI;
-
- /* Level triggered for 82489DX (32bit mode) */
- if (!lapic_is_integrated())
- v |= APIC_LVT_LEVEL_TRIGGER;
-
- apic_write(APIC_LVT0, v);
-}
-
-/**
- * lapic_get_maxlvt - get the maximum number of local vector table entries
- */
-int lapic_get_maxlvt(void)
-{
- unsigned int v;
-
- v = apic_read(APIC_LVR);
- /*
- * - we always have APIC integrated on 64bit mode
- * - 82489DXs do not report # of LVT entries
- */
- return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
-}
-
-/*
- * Local APIC timer
- */
-
-/* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
-#define APIC_DIVISOR 16
-#endif
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
- unsigned int lvtt_value, tmp_value;
-
- lvtt_value = LOCAL_TIMER_VECTOR;
- if (!oneshot)
- lvtt_value |= APIC_LVT_TIMER_PERIODIC;
- if (!lapic_is_integrated())
- lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
-
- if (!irqen)
- lvtt_value |= APIC_LVT_MASKED;
-
- apic_write(APIC_LVTT, lvtt_value);
-
- /*
- * Divide PICLK by 16
- */
- tmp_value = apic_read(APIC_TDCR);
- apic_write(APIC_TDCR,
- (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
- APIC_TDR_DIV_16);
-
- if (!oneshot)
- apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
-}
-
-/*
- * Setup extended LVT, AMD specific (K8, family 10h)
- *
- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
- * MCE interrupts are supported. Thus MCE offset must be set to 0.
- *
- * If mask=1, the LVT entry does not generate interrupts while mask=0
- * enables the vector. See also the BKDGs.
- */
-
-#define APIC_EILVT_LVTOFF_MCE 0
-#define APIC_EILVT_LVTOFF_IBS 1
-
-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
-{
- unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
- unsigned int v = (mask << 16) | (msg_type << 8) | vector;
-
- apic_write(reg, v);
-}
-
-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
-{
- setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
- return APIC_EILVT_LVTOFF_MCE;
-}
-
-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
-{
- setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
- return APIC_EILVT_LVTOFF_IBS;
-}
-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
-
-/*
- * Program the next event, relative to now
- */
-static int lapic_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- apic_write(APIC_TMICT, delta);
- return 0;
-}
-
-/*
- * Setup the lapic timer in periodic or oneshot mode
- */
-static void lapic_timer_setup(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- unsigned long flags;
- unsigned int v;
-
- /* Lapic used as dummy for broadcast ? */
- if (evt->features & CLOCK_EVT_FEAT_DUMMY)
- return;
-
- local_irq_save(flags);
-
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- case CLOCK_EVT_MODE_ONESHOT:
- __setup_APIC_LVTT(calibration_result,
- mode != CLOCK_EVT_MODE_PERIODIC, 1);
- break;
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- v = apic_read(APIC_LVTT);
- v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
- apic_write(APIC_LVTT, v);
- break;
- case CLOCK_EVT_MODE_RESUME:
- /* Nothing to do here */
- break;
- }
-
- local_irq_restore(flags);
-}
-
-/*
- * Local APIC timer broadcast function
- */
-static void lapic_timer_broadcast(cpumask_t mask)
-{
-#ifdef CONFIG_SMP
- send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-#endif
-}
-
-/*
- * Setup the local APIC timer for this CPU. Copy the initilized values
- * of the boot CPU and register the clock event in the framework.
- */
-static void setup_APIC_timer(void)
-{
- struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
- memcpy(levt, &lapic_clockevent, sizeof(*levt));
- levt->cpumask = cpumask_of_cpu(smp_processor_id());
-
- clockevents_register_device(levt);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
- unsigned apic, apic_start;
- unsigned long tsc, tsc_start;
- int result;
-
- local_irq_disable();
-
- /*
- * Put whatever arbitrary (but long enough) timeout
- * value into the APIC clock, we just want to get the
- * counter running for calibration.
- *
- * No interrupt enable !
- */
- __setup_APIC_LVTT(250000000, 0, 0);
-
- apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
- if (apic_calibrate_pmtmr && pmtmr_ioport) {
- pmtimer_wait(5000); /* 5ms wait */
- apic = apic_read(APIC_TMCCT);
- result = (apic_start - apic) * 1000L / 5;
- } else
-#endif
- {
- rdtscll(tsc_start);
-
- do {
- apic = apic_read(APIC_TMCCT);
- rdtscll(tsc);
- } while ((tsc - tsc_start) < TICK_COUNT &&
- (apic_start - apic) < TICK_COUNT);
-
- result = (apic_start - apic) * 1000L * tsc_khz /
- (tsc - tsc_start);
- }
-
- local_irq_enable();
-
- printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
- printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
- result / 1000 / 1000, result / 1000 % 1000);
-
- /* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
- lapic_clockevent.shift);
- lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
- lapic_clockevent.min_delta_ns =
- clockevent_delta2ns(0xF, &lapic_clockevent);
-
- calibration_result = (result * APIC_DIVISOR) / HZ;
-
- /*
- * Do a sanity check on the APIC calibration result
- */
- if (calibration_result < (1000000 / HZ)) {
- printk(KERN_WARNING
- "APIC frequency too slow, disabling apic timer\n");
- return -1;
- }
-
- return 0;
-}
-
-/*
- * Setup the boot APIC
- *
- * Calibrate and verify the result.
- */
-void __init setup_boot_APIC_clock(void)
-{
- /*
- * The local apic timer can be disabled via the kernel
- * commandline or from the CPU detection code. Register the lapic
- * timer as a dummy clock event source on SMP systems, so the
- * broadcast mechanism is used. On UP systems simply ignore it.
- */
- if (disable_apic_timer) {
- printk(KERN_INFO "Disabling APIC timer\n");
- /* No broadcast on UP ! */
- if (num_possible_cpus() > 1) {
- lapic_clockevent.mult = 1;
- setup_APIC_timer();
- }
- return;
- }
-
- apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
- "calibrating APIC timer ...\n");
-
- if (calibrate_APIC_clock()) {
- /* No broadcast on UP ! */
- if (num_possible_cpus() > 1)
- setup_APIC_timer();
- return;
- }
-
- /*
- * If nmi_watchdog is set to IO_APIC, we need the
- * PIT/HPET going. Otherwise register lapic as a dummy
- * device.
- */
- if (nmi_watchdog != NMI_IO_APIC)
- lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
- else
- printk(KERN_WARNING "APIC timer registered as dummy,"
- " due to nmi_watchdog=%d!\n", nmi_watchdog);
-
- /* Setup the lapic or request the broadcast */
- setup_APIC_timer();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
- setup_APIC_timer();
-}
-
-/*
- * The guts of the apic timer interrupt
- */
-static void local_apic_timer_interrupt(void)
-{
- int cpu = smp_processor_id();
- struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
- /*
- * Normally we should not be here till LAPIC has been initialized but
- * in some cases like kdump, its possible that there is a pending LAPIC
- * timer interrupt from previous kernel's context and is delivered in
- * new kernel the moment interrupts are enabled.
- *
- * Interrupts are enabled early and LAPIC is setup much later, hence
- * its possible that when we get here evt->event_handler is NULL.
- * Check for event_handler being NULL and discard the interrupt as
- * spurious.
- */
- if (!evt->event_handler) {
- printk(KERN_WARNING
- "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
- /* Switch it off */
- lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
- return;
- }
-
- /*
- * the NMI deadlock-detector uses this.
- */
-#ifdef CONFIG_X86_64
- add_pda(apic_timer_irqs, 1);
-#else
- per_cpu(irq_stat, cpu).apic_timer_irqs++;
-#endif
-
- evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- * interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- */
- ack_APIC_irq();
- /*
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- exit_idle();
- irq_enter();
- local_apic_timer_interrupt();
- irq_exit();
-
- set_irq_regs(old_regs);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
- return -EINVAL;
-}
-
-
-/*
- * Local APIC start and shutdown
- */
-
-/**
- * clear_local_APIC - shutdown the local APIC
- *
- * This is called, when a CPU is disabled and before rebooting, so the state of
- * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
- * leftovers during boot.
- */
-void clear_local_APIC(void)
-{
- int maxlvt;
- u32 v;
-
- /* APIC hasn't been mapped yet */
- if (!apic_phys)
- return;
-
- maxlvt = lapic_get_maxlvt();
- /*
- * Masking an LVT entry can trigger a local APIC error
- * if the vector is zero. Mask LVTERR first to prevent this.
- */
- if (maxlvt >= 3) {
- v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
- apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
- }
- /*
- * Careful: we have to set masks only first to deassert
- * any level-triggered sources.
- */
- v = apic_read(APIC_LVTT);
- apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
- v = apic_read(APIC_LVT0);
- apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
- v = apic_read(APIC_LVT1);
- apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
- if (maxlvt >= 4) {
- v = apic_read(APIC_LVTPC);
- apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
- }
-
- /* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
- if (maxlvt >= 5) {
- v = apic_read(APIC_LVTTHMR);
- apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
- }
-#endif
- /*
- * Clean APIC state for other OSs:
- */
- apic_write(APIC_LVTT, APIC_LVT_MASKED);
- apic_write(APIC_LVT0, APIC_LVT_MASKED);
- apic_write(APIC_LVT1, APIC_LVT_MASKED);
- if (maxlvt >= 3)
- apic_write(APIC_LVTERR, APIC_LVT_MASKED);
- if (maxlvt >= 4)
- apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-
- /* Integrated APIC (!82489DX) ? */
- if (lapic_is_integrated()) {
- if (maxlvt > 3)
- /* Clear ESR due to Pentium errata 3AP and 11AP */
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-}
-
-/**
- * disable_local_APIC - clear and disable the local APIC
- */
-void disable_local_APIC(void)
-{
- unsigned int value;
-
- clear_local_APIC();
-
- /*
- * Disable APIC (implies clearing of registers
- * for 82489DX!).
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_SPIV_APIC_ENABLED;
- apic_write(APIC_SPIV, value);
-
-#ifdef CONFIG_X86_32
- /*
- * When LAPIC was disabled by the BIOS and enabled by the kernel,
- * restore the disabled state.
- */
- if (enabled_via_apicbase) {
- unsigned int l, h;
-
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_ENABLE;
- wrmsr(MSR_IA32_APICBASE, l, h);
- }
-#endif
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default disable it down before
- * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
- * not power-off. Additionally clear all LVT entries before disable_local_APIC
- * for the case where Linux didn't enable the LAPIC.
- */
-void lapic_shutdown(void)
-{
- unsigned long flags;
-
- if (!cpu_has_apic)
- return;
-
- local_irq_save(flags);
-
-#ifdef CONFIG_X86_32
- if (!enabled_via_apicbase)
- clear_local_APIC();
- else
-#endif
- disable_local_APIC();
-
-
- local_irq_restore(flags);
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
- unsigned int reg0, reg1;
-
- /*
- * The version register is read-only in a real APIC.
- */
- reg0 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
- apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
- reg1 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
- /*
- * The two version reads above should print the same
- * numbers. If the second one is different, then we
- * poke at a non-APIC.
- */
- if (reg1 != reg0)
- return 0;
-
- /*
- * Check if the version looks reasonably.
- */
- reg1 = GET_APIC_VERSION(reg0);
- if (reg1 == 0x00 || reg1 == 0xff)
- return 0;
- reg1 = lapic_get_maxlvt();
- if (reg1 < 0x02 || reg1 == 0xff)
- return 0;
-
- /*
- * The ID register is read/write in a real APIC.
- */
- reg0 = apic_read(APIC_ID);
- apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
- apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
- reg1 = apic_read(APIC_ID);
- apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
- apic_write(APIC_ID, reg0);
- if (reg1 != (reg0 ^ APIC_ID_MASK))
- return 0;
-
- /*
- * The next two are just to see if we have sane values.
- * They're only really relevant if we're in Virtual Wire
- * compatibility mode, but most boxes are anymore.
- */
- reg0 = apic_read(APIC_LVT0);
- apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
- reg1 = apic_read(APIC_LVT1);
- apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
- return 1;
-}
-
-/**
- * sync_Arb_IDs - synchronize APIC bus arbitration IDs
- */
-void __init sync_Arb_IDs(void)
-{
- /*
- * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
- * needed on AMD.
- */
- if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
- apic_write(APIC_ICR, APIC_DEST_ALLINC |
- APIC_INT_LEVELTRIG | APIC_DM_INIT);
-}
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
- unsigned int value;
-
- /*
- * Don't do the setup now if we have a SMP BIOS as the
- * through-I/O-APIC virtual wire mode might be active.
- */
- if (smp_found_config || !cpu_has_apic)
- return;
-
- /*
- * Do not trust the local APIC being empty at bootup.
- */
- clear_local_APIC();
-
- /*
- * Enable APIC.
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
- /* This bit is reserved on P4/Xeon and should be cleared */
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- (boot_cpu_data.x86 == 15))
- value &= ~APIC_SPIV_FOCUS_DISABLED;
- else
-#endif
- value |= APIC_SPIV_FOCUS_DISABLED;
- value |= SPURIOUS_APIC_VECTOR;
- apic_write(APIC_SPIV, value);
-
- /*
- * Set up the virtual wire mode.
- */
- apic_write(APIC_LVT0, APIC_DM_EXTINT);
- value = APIC_DM_NMI;
- if (!lapic_is_integrated()) /* 82489DX */
- value |= APIC_LVT_LEVEL_TRIGGER;
- apic_write(APIC_LVT1, value);
-}
-
-static void __cpuinit lapic_setup_esr(void)
-{
- unsigned long oldvalue, value, maxlvt;
- if (lapic_is_integrated() && !esr_disable) {
- if (esr_disable) {
- /*
- * Something untraceable is creating bad interrupts on
- * secondary quads ... for the moment, just leave the
- * ESR disabled - we can't do anything useful with the
- * errors anyway - mbligh
- */
- printk(KERN_INFO "Leaving ESR disabled.\n");
- return;
- }
- /* !82489DX */
- maxlvt = lapic_get_maxlvt();
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- oldvalue = apic_read(APIC_ESR);
-
- /* enables sending errors */
- value = ERROR_APIC_VECTOR;
- apic_write(APIC_LVTERR, value);
- /*
- * spec says clear errors after enabling vector.
- */
- if (maxlvt > 3)
- apic_write(APIC_ESR, 0);
- value = apic_read(APIC_ESR);
- if (value != oldvalue)
- apic_printk(APIC_VERBOSE, "ESR value before enabling "
- "vector: 0x%08lx after: 0x%08lx\n",
- oldvalue, value);
- } else {
- printk(KERN_INFO "No ESR for 82489DX.\n");
- }
-}
-
-
-/**
- * setup_local_APIC - setup the local APIC
- */
-void __cpuinit setup_local_APIC(void)
-{
- unsigned int value;
- int i, j;
-
- preempt_disable();
- value = apic_read(APIC_LVR);
-
- BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
-
- /*
- * Double-check whether this APIC is really registered.
- * This is meaningless in clustered apic mode, so we skip it.
- */
- if (!apic_id_registered())
- BUG();
-
- /*
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
- init_apic_ldr();
-
- /*
- * Set Task Priority to 'accept all'. We never change this
- * later on.
- */
- value = apic_read(APIC_TASKPRI);
- value &= ~APIC_TPRI_MASK;
- apic_write(APIC_TASKPRI, value);
-
- /*
- * After a crash, we no longer service the interrupts and a pending
- * interrupt from previous kernel might still have ISR bit set.
- *
- * Most probably by now CPU has serviced that pending interrupt and
- * it might not have done the ack_APIC_irq() because it thought,
- * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
- * does not clear the ISR bit and cpu thinks it has already serivced
- * the interrupt. Hence a vector might get locked. It was noticed
- * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
- */
- for (i = APIC_ISR_NR - 1; i >= 0; i--) {
- value = apic_read(APIC_ISR + i*0x10);
- for (j = 31; j >= 0; j--) {
- if (value & (1<> 1));
- if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
- ack_APIC_irq();
-
- add_pda(irq_spurious_count, 1);
- irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-asmlinkage void smp_error_interrupt(void)
-{
- unsigned int v, v1;
-
- exit_idle();
- irq_enter();
- /* First tickle the hardware, only then report what went on. -- REW */
- v = apic_read(APIC_ESR);
- apic_write(APIC_ESR, 0);
- v1 = apic_read(APIC_ESR);
- ack_APIC_irq();
- atomic_inc(&irq_err_count);
-
- /* Here is what the APIC error bits mean:
- 0: Send CS error
- 1: Receive CS error
- 2: Send accept error
- 3: Receive accept error
- 4: Reserved
- 5: Send illegal vector
- 6: Received illegal vector
- 7: Illegal register address
- */
- printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
- smp_processor_id(), v , v1);
- irq_exit();
-}
-
-/**
- * connect_bsp_APIC - attach the APIC to the interrupt system
- */
-void __init connect_bsp_APIC(void)
-{
-#ifdef CONFIG_X86_32
- if (pic_mode) {
- /*
- * Do not trust the local APIC being empty at bootup.
- */
- clear_local_APIC();
- /*
- * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
- * local APIC to INT and NMI lines.
- */
- apic_printk(APIC_VERBOSE, "leaving PIC mode, "
- "enabling APIC mode.\n");
- outb(0x70, 0x22);
- outb(0x01, 0x23);
- }
-#endif
- enable_apic_mode();
-}
-
-/**
- * disconnect_bsp_APIC - detach the APIC from the interrupt system
- * @virt_wire_setup: indicates, whether virtual wire mode is selected
- *
- * Virtual wire mode is necessary to deliver legacy interrupts even when the
- * APIC is disabled.
- */
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
- unsigned int value;
-
-#ifdef CONFIG_X86_32
- if (pic_mode) {
- /*
- * Put the board back into PIC mode (has an effect only on
- * certain older boards). Note that APIC interrupts, including
- * IPIs, won't work beyond this point! The only exception are
- * INIT IPIs.
- */
- apic_printk(APIC_VERBOSE, "disabling APIC mode, "
- "entering PIC mode.\n");
- outb(0x70, 0x22);
- outb(0x00, 0x23);
- return;
- }
-#endif
-
- /* Go back to Virtual Wire compatibility mode */
-
- /* For the spurious interrupt use vector F, and enable it */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- value |= APIC_SPIV_APIC_ENABLED;
- value |= 0xf;
- apic_write(APIC_SPIV, value);
-
- if (!virt_wire_setup) {
- /*
- * For LVT0 make it edge triggered, active high,
- * external and enabled
- */
- value = apic_read(APIC_LVT0);
- value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
- APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
- value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
- apic_write(APIC_LVT0, value);
- } else {
- /* Disable LVT0 */
- apic_write(APIC_LVT0, APIC_LVT_MASKED);
- }
-
- /*
- * For LVT1 make it edge triggered, active high,
- * nmi and enabled
- */
- value = apic_read(APIC_LVT1);
- value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
- APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
- value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
- apic_write(APIC_LVT1, value);
-}
-
-void __cpuinit generic_processor_info(int apicid, int version)
-{
- int cpu;
- cpumask_t tmp_map;
-
- /*
- * Validate version
- */
- if (version == 0x0) {
- printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
- version = 0x10;
- }
- apic_version[apicid] = version;
-
- if (num_processors >= NR_CPUS) {
- printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
- " Processor ignored.\n", NR_CPUS);
- return;
- }
-
- num_processors++;
- cpus_complement(tmp_map, cpu_present_map);
- cpu = first_cpu(tmp_map);
-
- physid_set(apicid, phys_cpu_present_map);
- if (apicid == boot_cpu_physical_apicid) {
- /*
- * x86_bios_cpu_apicid is required to have processors listed
- * in same order as logical cpu numbers. Hence the first
- * entry is BSP, and so on.
- */
- cpu = 0;
- }
- if (apicid > max_physical_apicid)
- max_physical_apicid = apicid;
-
-#ifdef CONFIG_X86_32
- /*
- * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
- * but we need to work other dependencies like SMP_SUSPEND etc
- * before this can be done without some confusion.
- * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
- * - Ashok Raj
- */
- if (max_physical_apicid >= 8) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_INTEL:
- if (!APIC_XAPIC(version)) {
- def_to_bigsmp = 0;
- break;
- }
- /* If P4 and above fall through */
- case X86_VENDOR_AMD:
- def_to_bigsmp = 1;
- }
- }
-#endif
-
-#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
- /* are we being called early in kernel startup? */
- if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
- u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
- u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-
- cpu_to_apicid[cpu] = apicid;
- bios_cpu_apicid[cpu] = apicid;
- } else {
- per_cpu(x86_cpu_to_apicid, cpu) = apicid;
- per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
- }
-#endif
-
- cpu_set(cpu, cpu_possible_map);
- cpu_set(cpu, cpu_present_map);
-}
-
-int hard_smp_processor_id(void)
-{
- return read_apic_id();
-}
-
-/*
- * Power management
- */
-#ifdef CONFIG_PM
-
-static struct {
- /*
- * 'active' is true if the local APIC was enabled by us and
- * not the BIOS; this signifies that we are also responsible
- * for disabling it before entering apm/acpi suspend
- */
- int active;
- /* r/w apic fields */
- unsigned int apic_id;
- unsigned int apic_taskpri;
- unsigned int apic_ldr;
- unsigned int apic_dfr;
- unsigned int apic_spiv;
- unsigned int apic_lvtt;
- unsigned int apic_lvtpc;
- unsigned int apic_lvt0;
- unsigned int apic_lvt1;
- unsigned int apic_lvterr;
- unsigned int apic_tmict;
- unsigned int apic_tdcr;
- unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
- unsigned long flags;
- int maxlvt;
-
- if (!apic_pm_state.active)
- return 0;
-
- maxlvt = lapic_get_maxlvt();
-
- apic_pm_state.apic_id = apic_read(APIC_ID);
- apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
- apic_pm_state.apic_ldr = apic_read(APIC_LDR);
- apic_pm_state.apic_dfr = apic_read(APIC_DFR);
- apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
- apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
- if (maxlvt >= 4)
- apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
- apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
- apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
- apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
- apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
- apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
- if (maxlvt >= 5)
- apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
-
- local_irq_save(flags);
- disable_local_APIC();
- local_irq_restore(flags);
- return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
- unsigned int l, h;
- unsigned long flags;
- int maxlvt;
-
- if (!apic_pm_state.active)
- return 0;
-
- maxlvt = lapic_get_maxlvt();
-
- local_irq_save(flags);
-
-#ifdef CONFIG_X86_64
- if (x2apic)
- enable_x2apic();
- else
-#endif
- {
- /*
- * Make sure the APICBASE points to the right address
- *
- * FIXME! This will be wrong if we ever support suspend on
- * SMP! We'll need to do this as part of the CPU restore!
- */
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
- wrmsr(MSR_IA32_APICBASE, l, h);
- }
-
- apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
- apic_write(APIC_ID, apic_pm_state.apic_id);
- apic_write(APIC_DFR, apic_pm_state.apic_dfr);
- apic_write(APIC_LDR, apic_pm_state.apic_ldr);
- apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
- apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
- apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
- apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
- if (maxlvt >= 5)
- apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
- if (maxlvt >= 4)
- apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
- apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
- apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
- apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
-
- local_irq_restore(flags);
-
- return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
- .name = "lapic",
- .resume = lapic_resume,
- .suspend = lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
- .id = 0,
- .cls = &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
- apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
- int error;
-
- if (!cpu_has_apic)
- return 0;
- /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
- error = sysdev_class_register(&lapic_sysclass);
- if (!error)
- error = sysdev_register(&device_lapic);
- return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else /* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif /* CONFIG_PM */
-
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
-{
- int i, clusters, zeros;
- unsigned id;
- u16 *bios_cpu_apicid;
- DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
- /*
- * there is not this kind of box with AMD CPU yet.
- * Some AMD box with quadcore cpu and 8 sockets apicid
- * will be [4, 0x23] or [8, 0x27] could be thought to
- * vsmp box still need checking...
- */
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
- return 0;
-
- bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
- bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
- for (i = 0; i < NR_CPUS; i++) {
- /* are we being called early in kernel startup? */
- if (bios_cpu_apicid) {
- id = bios_cpu_apicid[i];
- }
- else if (i < nr_cpu_ids) {
- if (cpu_present(i))
- id = per_cpu(x86_bios_cpu_apicid, i);
- else
- continue;
- }
- else
- break;
-
- if (id != BAD_APICID)
- __set_bit(APIC_CLUSTERID(id), clustermap);
- }
-
- /* Problem: Partially populated chassis may not have CPUs in some of
- * the APIC clusters they have been allocated. Only present CPUs have
- * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
- * Since clusters are allocated sequentially, count zeros only if
- * they are bounded by ones.
- */
- clusters = 0;
- zeros = 0;
- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
- if (test_bit(i, clustermap)) {
- clusters += 1 + zeros;
- zeros = 0;
- } else
- ++zeros;
- }
-
- /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
- * not guaranteed to be synced between boards
- */
- if (is_vsmp_box() && clusters > 1)
- return 1;
-
- /*
- * If clusters > 2, then should be multi-chassis.
- * May have to revisit this when multi-core + hyperthreaded CPUs come
- * out, but AFAIK this will work even for them.
- */
- return (clusters > 2);
-}
-
-static __init int setup_nox2apic(char *str)
-{
- disable_x2apic = 1;
- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
- return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-
-
-/*
- * APIC command line parameters
- */
-static int __init setup_disableapic(char *arg)
-{
- disable_apic = 1;
- setup_clear_cpu_cap(X86_FEATURE_APIC);
- return 0;
-}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static int __init setup_nolapic(char *arg)
-{
- return setup_disableapic(arg);
-}
-early_param("nolapic", setup_nolapic);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
- local_apic_timer_c2_ok = 1;
- return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init parse_disable_apic_timer(char *arg)
-{
- disable_apic_timer = 1;
- return 0;
-}
-early_param("noapictimer", parse_disable_apic_timer);
-
-static int __init parse_nolapic_timer(char *arg)
-{
- disable_apic_timer = 1;
- return 0;
-}
-early_param("nolapic_timer", parse_nolapic_timer);
-
-static __init int setup_apicpmtimer(char *s)
-{
- apic_calibrate_pmtmr = 1;
- notsc_setup(NULL);
- return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-
-static int __init apic_set_verbosity(char *arg)
-{
- if (!arg) {
-#ifdef CONFIG_X86_64
- skip_ioapic_setup = 0;
- ioapic_force = 1;
- return 0;
-#endif
- return -EINVAL;
- }
-
- if (strcmp("debug", arg) == 0)
- apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", arg) == 0)
- apic_verbosity = APIC_VERBOSE;
- else {
- printk(KERN_WARNING "APIC Verbosity level %s not recognised"
- " use apic=verbose or apic=debug\n", arg);
- return -EINVAL;
- }
-
- return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-static int __init lapic_insert_resource(void)
-{
- if (!apic_phys)
- return -1;
-
- /* Put local APIC into the resource map. */
- lapic_resource.start = apic_phys;
- lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
- insert_resource(&iomem_resource, &lapic_resource);
-
- return 0;
-}
-
-/*
- * need call insert after e820_reserve_resources()
- * that is using request_resource
- */
-late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index fdd585f9c53d..f0dfe6f17e7e 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -1,8 +1,6 @@
/*
* BIOS run time interface routines.
*
- * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -16,33 +14,128 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Russ Anderson
*/
+#include
+#include
+#include
#include
+#include
-const char *
-x86_bios_strerror(long status)
+struct uv_systab uv_systab;
+
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
{
- const char *str;
- switch (status) {
- case 0: str = "Call completed without error"; break;
- case -1: str = "Not implemented"; break;
- case -2: str = "Invalid argument"; break;
- case -3: str = "Call completed with error"; break;
- default: str = "Unknown BIOS status code"; break;
+ struct uv_systab *tab = &uv_systab;
+
+ if (!tab->function)
+ /*
+ * BIOS does not support UV systab
+ */
+ return BIOS_STATUS_UNIMPLEMENTED;
+
+ return efi_call6((void *)__va(tab->function),
+ (u64)which, a1, a2, a3, a4, a5);
+}
+
+s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+ u64 a4, u64 a5)
+{
+ unsigned long bios_flags;
+ s64 ret;
+
+ local_irq_save(bios_flags);
+ ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+ local_irq_restore(bios_flags);
+
+ return ret;
+}
+
+s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+ u64 a4, u64 a5)
+{
+ s64 ret;
+
+ preempt_disable();
+ ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+ preempt_enable();
+
+ return ret;
+}
+
+
+long sn_partition_id;
+EXPORT_SYMBOL_GPL(sn_partition_id);
+long uv_coherency_id;
+EXPORT_SYMBOL_GPL(uv_coherency_id);
+long uv_region_size;
+EXPORT_SYMBOL_GPL(uv_region_size);
+int uv_type;
+
+
+s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
+ long *region)
+{
+ s64 ret;
+ u64 v0, v1;
+ union partition_info_u part;
+
+ ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
+ (u64)(&v0), (u64)(&v1), 0, 0);
+ if (ret != BIOS_STATUS_SUCCESS)
+ return ret;
+
+ part.val = v0;
+ if (uvtype)
+ *uvtype = part.hub_version;
+ if (partid)
+ *partid = part.partition_id;
+ if (coher)
+ *coher = part.coherence_id;
+ if (region)
+ *region = part.region_size;
+ return ret;
+}
+
+
+s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
+{
+ return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
+ (u64)ticks_per_second, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_freq_base);
+
+
+#ifdef CONFIG_EFI
+void uv_bios_init(void)
+{
+ struct uv_systab *tab;
+
+ if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+ (efi.uv_systab == (unsigned long)NULL)) {
+ printk(KERN_CRIT "No EFI UV System Table.\n");
+ uv_systab.function = (unsigned long)NULL;
+ return;
}
- return str;
-}
-long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
- unsigned long *drift_info)
-{
- struct uv_bios_retval isrv;
+ tab = (struct uv_systab *)ioremap(efi.uv_systab,
+ sizeof(struct uv_systab));
+ if (strncmp(tab->signature, "UVST", 4) != 0)
+ printk(KERN_ERR "bad signature in UV system table!");
- BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
- *ticks_per_second = isrv.v0;
- *drift_info = isrv.v1;
- return isrv.status;
+ /*
+ * Copy table to permanent spot for later use.
+ */
+ memcpy(&uv_systab, tab, sizeof(struct uv_systab));
+ iounmap(tab);
+
+ printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
}
-EXPORT_SYMBOL_GPL(x86_bios_freq_base);
+#else /* !CONFIG_EFI */
+
+void uv_bios_init(void) { }
+#endif
+
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 32e73520adf7..8f1e31db2ad5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
}
numa_set_node(cpu, node);
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+ printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
#endif
}
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 06fcce516d51..b0461856acfb 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -1,5 +1,5 @@
/*
- * (C) 2001-2004 Dave Jones.
+ * (C) 2001-2004 Dave Jones.
* (C) 2002 Padraig Brady.
*
* Licensed under the terms of the GNU GPL License version 2.
@@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
module_param(revid_errata, int, 0644);
MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
-MODULE_AUTHOR ("Dave Jones ");
+MODULE_AUTHOR ("Dave Jones ");
MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
MODULE_LICENSE ("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index b5ced806a316..c1ac5790c63e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void)
}
-MODULE_AUTHOR("Arjan van de Ven , Dave Jones , Dominik Brodowski ");
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones , Dominik Brodowski ");
MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 0a61159d7b71..7c7d56b43136 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -1,6 +1,6 @@
/*
* AMD K7 Powernow driver.
- * (C) 2003 Dave Jones on behalf of SuSE Labs.
+ * (C) 2003 Dave Jones on behalf of SuSE Labs.
* (C) 2003-2004 Dave Jones
*
* Licensed under the terms of the GNU GPL License version 2.
@@ -692,7 +692,7 @@ static void __exit powernow_exit (void)
module_param(acpi_force, int, 0444);
MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
-MODULE_AUTHOR ("Dave Jones ");
+MODULE_AUTHOR ("Dave Jones ");
MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
MODULE_LICENSE ("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 84bb395038d8..008d23ba491b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -7,7 +7,7 @@
* Support : mark.langsdorf@amd.com
*
* Based on the powernow-k7.c module written by Dave Jones.
- * (C) 2003 Dave Jones on behalf of SuSE Labs
+ * (C) 2003 Dave Jones on behalf of SuSE Labs
* (C) 2004 Dominik Brodowski
* (C) 2004 Pavel Machek
* Licensed under the terms of the GNU GPL License version 2.
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 191f7263c61d..04d0376b64b0 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -431,7 +431,7 @@ static void __exit speedstep_exit(void)
}
-MODULE_AUTHOR ("Dave Jones , Dominik Brodowski ");
+MODULE_AUTHOR ("Dave Jones , Dominik Brodowski ");
MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
MODULE_LICENSE ("GPL");
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 99468dbd08da..cce0b6118d55 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void)
node = first_node(node_online_map);
numa_set_node(cpu, node);
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+ printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
#endif
}
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index f390c9f66351..dd3af6e7b39a 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -1,6 +1,6 @@
/*
- * Athlon/Hammer specific Machine Check Exception Reporting
- * (C) Copyright 2002 Dave Jones
+ * Athlon specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones
*/
#include
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 774d87cfd8cd..0ebf3fc6a610 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -1,6 +1,6 @@
/*
* mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox , Dave Jones
+ * (c) 2002 Alan Cox , Dave Jones
*/
#include
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index cc1fccdd31e0..a74af128efc9 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -1,7 +1,7 @@
/*
* Non Fatal Machine Check Exception Reporting
*
- * (C) Copyright 2002 Dave Jones.
+ * (C) Copyright 2002 Dave Jones.
*
* This file contains routines to check for non-fatal MCEs every 15s
*
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 6bff382094f5..9abd48b22674 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -17,6 +17,8 @@
#include
#include
#include
+#include
+
#include
#include
@@ -336,7 +338,8 @@ static void single_msr_unreserve(void)
release_perfctr_nmi(wd_ops->perfctr);
}
-static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes
+single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
{
/* start the cycle over again */
write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
@@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
return 1;
}
-static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
{
/*
* P6 based Pentium M need to re-unmask
@@ -605,7 +608,7 @@ static void p4_unreserve(void)
release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
}
-static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
{
unsigned dummy;
/*
@@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz)
return hz;
}
-int lapic_wd_event(unsigned nmi_hz)
+int __kprobes lapic_wd_event(unsigned nmi_hz)
{
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
u64 ctr;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 945a31cdd81f..1119d247fe11 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,6 +366,10 @@ void __init efi_init(void)
SMBIOS_TABLE_GUID)) {
efi.smbios = config_tables[i].table;
printk(" SMBIOS=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(config_tables[i].guid,
+ UV_SYSTEM_TABLE_GUID)) {
+ efi.uv_systab = config_tables[i].table;
+ printk(" UVsystab=0x%lx ", config_tables[i].table);
} else if (!efi_guidcmp(config_tables[i].guid,
HCDP_TABLE_GUID)) {
efi.hcdp = config_tables[i].table;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b21fbfaffe39..c356423a6026 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -629,7 +629,7 @@ ENTRY(interrupt)
ENTRY(irq_entries_start)
RING0_INT_FRAME
vector=0
-.rept NR_IRQS
+.rept NR_VECTORS
ALIGN
.if vector
CFI_ADJUST_CFA_OFFSET -4
@@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback)
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
- pushl %eax
- pushl %ecx
- pushl %edx
- movl 0xc(%esp), %eax
- subl $MCOUNT_INSN_SIZE, %eax
-
-.globl mcount_call
-mcount_call:
- call ftrace_stub
-
- popl %edx
- popl %ecx
- popl %eax
-
ret
END(mcount)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1db6ce4314e1..09e7145484c5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -64,32 +64,6 @@
#ifdef CONFIG_FTRACE
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
-
- subq $0x38, %rsp
- movq %rax, (%rsp)
- movq %rcx, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rsi, 24(%rsp)
- movq %rdi, 32(%rsp)
- movq %r8, 40(%rsp)
- movq %r9, 48(%rsp)
-
- movq 0x38(%rsp), %rdi
- subq $MCOUNT_INSN_SIZE, %rdi
-
-.globl mcount_call
-mcount_call:
- call ftrace_stub
-
- movq 48(%rsp), %r9
- movq 40(%rsp), %r8
- movq 32(%rsp), %rdi
- movq 24(%rsp), %rsi
- movq 16(%rsp), %rdx
- movq 8(%rsp), %rcx
- movq (%rsp), %rax
- addq $0x38, %rsp
-
retq
END(mcount)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ab115cd15fdf..d073d981a730 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -11,17 +11,18 @@
#include
#include
+#include
#include
#include
#include
#include
-#include
#include
+#include
/* Long is fine, even if it is only 4 bytes ;-) */
-static long *ftrace_nop;
+static unsigned long *ftrace_nop;
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
@@ -60,11 +61,7 @@ notrace int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
- unsigned replaced;
- unsigned old = *(unsigned *)old_code; /* 4 bytes */
- unsigned new = *(unsigned *)new_code; /* 4 bytes */
- unsigned char newch = new_code[4];
- int faulted = 0;
+ unsigned char replaced[MCOUNT_INSN_SIZE];
/*
* Note: Due to modules and __init, code can
@@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
* as well as code changing.
*
* No real locking needed, this code is run through
- * kstop_machine.
+ * kstop_machine, or before SMP starts.
*/
- asm volatile (
- "1: lock\n"
- " cmpxchg %3, (%2)\n"
- " jnz 2f\n"
- " movb %b4, 4(%2)\n"
- "2:\n"
- ".section .fixup, \"ax\"\n"
- "3: movl $1, %0\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : "=r"(faulted), "=a"(replaced)
- : "r"(ip), "r"(new), "c"(newch),
- "0"(faulted), "a"(old)
- : "memory");
+ if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
+ return 1;
+
+ if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+ return 2;
+
+ WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
+ MCOUNT_INSN_SIZE));
+
sync_core();
- if (replaced != old && replaced != new)
- faulted = 2;
-
- return faulted;
+ return 0;
}
notrace int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -112,30 +100,76 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
notrace int ftrace_mcount_set(unsigned long *data)
{
- unsigned long ip = (long)(&mcount_call);
- unsigned long *addr = data;
- unsigned char old[MCOUNT_INSN_SIZE], *new;
-
- /*
- * Replace the mcount stub with a pointer to the
- * ip recorder function.
- */
- memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
- new = ftrace_call_replace(ip, *addr);
- *addr = ftrace_modify_code(ip, old, new);
-
+ /* mcount is initialized as a nop */
+ *data = 0;
return 0;
}
int __init ftrace_dyn_arch_init(void *data)
{
- const unsigned char *const *noptable = find_nop_table();
+ extern const unsigned char ftrace_test_p6nop[];
+ extern const unsigned char ftrace_test_nop5[];
+ extern const unsigned char ftrace_test_jmp[];
+ int faulted = 0;
- /* This is running in kstop_machine */
+ /*
+ * There is no good nop for all x86 archs.
+ * We will default to using the P6_NOP5, but first we
+ * will test to make sure that the nop will actually
+ * work on this CPU. If it faults, we will then
+ * go to a lesser efficient 5 byte nop. If that fails
+ * we then just use a jmp as our nop. This isn't the most
+ * efficient nop, but we can not use a multi part nop
+ * since we would then risk being preempted in the middle
+ * of that nop, and if we enabled tracing then, it might
+ * cause a system crash.
+ *
+ * TODO: check the cpuid to determine the best nop.
+ */
+ asm volatile (
+ "jmp ftrace_test_jmp\n"
+ /* This code needs to stay around */
+ ".section .text, \"ax\"\n"
+ "ftrace_test_jmp:"
+ "jmp ftrace_test_p6nop\n"
+ "nop\n"
+ "nop\n"
+ "nop\n" /* 2 byte jmp + 3 bytes */
+ "ftrace_test_p6nop:"
+ P6_NOP5
+ "jmp 1f\n"
+ "ftrace_test_nop5:"
+ ".byte 0x66,0x66,0x66,0x66,0x90\n"
+ "jmp 1f\n"
+ ".previous\n"
+ "1:"
+ ".section .fixup, \"ax\"\n"
+ "2: movl $1, %0\n"
+ " jmp ftrace_test_nop5\n"
+ "3: movl $2, %0\n"
+ " jmp 1b\n"
+ ".previous\n"
+ _ASM_EXTABLE(ftrace_test_p6nop, 2b)
+ _ASM_EXTABLE(ftrace_test_nop5, 3b)
+ : "=r"(faulted) : "0" (faulted));
- ftrace_mcount_set(data);
+ switch (faulted) {
+ case 0:
+ pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
+ ftrace_nop = (unsigned long *)ftrace_test_p6nop;
+ break;
+ case 1:
+ pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
+ ftrace_nop = (unsigned long *)ftrace_test_nop5;
+ break;
+ case 2:
+ pr_info("ftrace: converting mcount calls to jmp . + 5\n");
+ ftrace_nop = (unsigned long *)ftrace_test_jmp;
+ break;
+ }
- ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+ /* The return code is retured via data */
+ *(unsigned long *)data = 0;
return 0;
}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 9eca5ba7a6b1..2ec2de8d8c46 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
* is an example).
*/
if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+ printk(KERN_DEBUG "system APIC only can use physical flat");
return 1;
+ }
#endif
return 0;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 33581d94a90e..bfd532843df6 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode)
static __init void uv_rtc_init(void)
{
- long status, ticks_per_sec, drift;
+ long status;
+ u64 ticks_per_sec;
- status =
- x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
- &drift);
- if (status != 0 || ticks_per_sec < 100000) {
+ status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+ &ticks_per_sec);
+ if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
printk(KERN_WARNING
"unable to determine platform RTC clock frequency, "
"guessing.\n");
@@ -356,7 +356,22 @@ static __init void uv_rtc_init(void)
sn_rtc_cycles_per_second = ticks_per_sec;
}
-static bool uv_system_inited;
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ * ZZZ hotplug not supported yet
+ */
+void __cpuinit uv_cpu_init(void)
+{
+ /* CPU 0 initilization will be done via uv_system_init. */
+ if (!uv_blade_info)
+ return;
+
+ uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+ if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+ set_x2apic_extra_bits(uv_hub_info->pnode);
+}
+
void __init uv_system_init(void)
{
@@ -412,6 +427,9 @@ void __init uv_system_init(void)
gnode_upper = (((unsigned long)node_id.s.node_id) &
~((1 << n_val) - 1)) << m_val;
+ uv_bios_init();
+ uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
+ &uv_coherency_id, &uv_region_size);
uv_rtc_init();
for_each_present_cpu(cpu) {
@@ -433,7 +451,7 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
- uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+ uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
uv_node_to_blade[nid] = blade;
uv_cpu_to_blade[cpu] = blade;
max_pnode = max(pnode, max_pnode);
@@ -448,21 +466,6 @@ void __init uv_system_init(void)
map_mmr_high(max_pnode);
map_config_high(max_pnode);
map_mmioh_high(max_pnode);
- uv_system_inited = true;
+
+ uv_cpu_init();
}
-
-/*
- * Called on each cpu to initialize the per_cpu UV data area.
- * ZZZ hotplug not supported yet
- */
-void __cpuinit uv_cpu_init(void)
-{
- BUG_ON(!uv_system_inited);
-
- uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
-
- if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
- set_x2apic_extra_bits(uv_hub_info->pnode);
-}
-
-
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index acf62fc233da..77017e834cf7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,29 +1,49 @@
#include
#include
+#include
+#include
#include
#include
#include
#include
-#include
+#include
#include
+#include
#include
-#include
#include
-#include
+#include
-#define HPET_MASK CLOCKSOURCE_MASK(32)
-#define HPET_SHIFT 22
+#define HPET_MASK CLOCKSOURCE_MASK(32)
+#define HPET_SHIFT 22
/* FSEC = 10^-15
NSEC = 10^-9 */
-#define FSEC_PER_NSEC 1000000L
+#define FSEC_PER_NSEC 1000000L
+
+#define HPET_DEV_USED_BIT 2
+#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
+#define HPET_DEV_VALID 0x8
+#define HPET_DEV_FSB_CAP 0x1000
+#define HPET_DEV_PERI_CAP 0x2000
+
+#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
/*
* HPET address is set in acpi/boot.c, when an ACPI entry exists
*/
-unsigned long hpet_address;
-static void __iomem *hpet_virt_address;
+unsigned long hpet_address;
+unsigned long hpet_num_timers;
+static void __iomem *hpet_virt_address;
+
+struct hpet_dev {
+ struct clock_event_device evt;
+ unsigned int num;
+ int cpu;
+ unsigned int irq;
+ unsigned int flags;
+ char name[10];
+};
unsigned long hpet_readl(unsigned long a)
{
@@ -59,7 +79,7 @@ static inline void hpet_clear_mapping(void)
static int boot_hpet_disable;
int hpet_force_user;
-static int __init hpet_setup(char* str)
+static int __init hpet_setup(char *str)
{
if (str) {
if (!strncmp("disable", str, 7))
@@ -80,7 +100,7 @@ __setup("nohpet", disable_hpet);
static inline int is_hpet_capable(void)
{
- return (!boot_hpet_disable && hpet_address);
+ return !boot_hpet_disable && hpet_address;
}
/*
@@ -102,6 +122,9 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
* timer 0 and timer 1 in case of RTC emulation.
*/
#ifdef CONFIG_HPET
+
+static void hpet_reserve_msi_timers(struct hpet_data *hd);
+
static void hpet_reserve_platform_timers(unsigned long id)
{
struct hpet __iomem *hpet = hpet_virt_address;
@@ -111,10 +134,10 @@ static void hpet_reserve_platform_timers(unsigned long id)
nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
- memset(&hd, 0, sizeof (hd));
- hd.hd_phys_address = hpet_address;
- hd.hd_address = hpet;
- hd.hd_nirqs = nrtimers;
+ memset(&hd, 0, sizeof(hd));
+ hd.hd_phys_address = hpet_address;
+ hd.hd_address = hpet;
+ hd.hd_nirqs = nrtimers;
hpet_reserve_timer(&hd, 0);
#ifdef CONFIG_HPET_EMULATE_RTC
@@ -130,10 +153,12 @@ static void hpet_reserve_platform_timers(unsigned long id)
hd.hd_irq[1] = HPET_LEGACY_RTC;
for (i = 2; i < nrtimers; timer++, i++) {
- hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >>
- Tn_INT_ROUTE_CNF_SHIFT;
+ hd.hd_irq[i] = (readl(&timer->hpet_config) &
+ Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
}
+ hpet_reserve_msi_timers(&hd);
+
hpet_alloc(&hd);
}
@@ -227,60 +252,70 @@ static void hpet_legacy_clockevent_register(void)
printk(KERN_DEBUG "hpet clockevent registered\n");
}
-static void hpet_legacy_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
+static int hpet_setup_msi_irq(unsigned int irq);
+
+static void hpet_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt, int timer)
{
unsigned long cfg, cmp, now;
uint64_t delta;
- switch(mode) {
+ switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
- delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
- delta >>= hpet_clockevent.shift;
+ delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
+ delta >>= evt->shift;
now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned long) delta;
- cfg = hpet_readl(HPET_T0_CFG);
+ cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_T0_CFG);
+ hpet_writel(cfg, HPET_Tn_CFG(timer));
/*
* The first write after writing TN_SETVAL to the
* config register sets the counter value, the second
* write sets the period.
*/
- hpet_writel(cmp, HPET_T0_CMP);
+ hpet_writel(cmp, HPET_Tn_CMP(timer));
udelay(1);
- hpet_writel((unsigned long) delta, HPET_T0_CMP);
+ hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
break;
case CLOCK_EVT_MODE_ONESHOT:
- cfg = hpet_readl(HPET_T0_CFG);
+ cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_T0_CFG);
+ hpet_writel(cfg, HPET_Tn_CFG(timer));
break;
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
- cfg = hpet_readl(HPET_T0_CFG);
+ cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg &= ~HPET_TN_ENABLE;
- hpet_writel(cfg, HPET_T0_CFG);
+ hpet_writel(cfg, HPET_Tn_CFG(timer));
break;
case CLOCK_EVT_MODE_RESUME:
- hpet_enable_legacy_int();
+ if (timer == 0) {
+ hpet_enable_legacy_int();
+ } else {
+ struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+ hpet_setup_msi_irq(hdev->irq);
+ disable_irq(hdev->irq);
+ irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+ enable_irq(hdev->irq);
+ }
break;
}
}
-static int hpet_legacy_next_event(unsigned long delta,
- struct clock_event_device *evt)
+static int hpet_next_event(unsigned long delta,
+ struct clock_event_device *evt, int timer)
{
u32 cnt;
cnt = hpet_readl(HPET_COUNTER);
cnt += (u32) delta;
- hpet_writel(cnt, HPET_T0_CMP);
+ hpet_writel(cnt, HPET_Tn_CMP(timer));
/*
* We need to read back the CMP register to make sure that
@@ -292,6 +327,347 @@ static int hpet_legacy_next_event(unsigned long delta,
return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
}
+static void hpet_legacy_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ hpet_set_mode(mode, evt, 0);
+}
+
+static int hpet_legacy_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ return hpet_next_event(delta, evt, 0);
+}
+
+/*
+ * HPET MSI Support
+ */
+#ifdef CONFIG_PCI_MSI
+
+static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+static struct hpet_dev *hpet_devs;
+
+void hpet_msi_unmask(unsigned int irq)
+{
+ struct hpet_dev *hdev = get_irq_data(irq);
+ unsigned long cfg;
+
+ /* unmask it */
+ cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+ cfg |= HPET_TN_FSB;
+ hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_mask(unsigned int irq)
+{
+ unsigned long cfg;
+ struct hpet_dev *hdev = get_irq_data(irq);
+
+ /* mask it */
+ cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+ cfg &= ~HPET_TN_FSB;
+ hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
+{
+ struct hpet_dev *hdev = get_irq_data(irq);
+
+ hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
+ hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+}
+
+void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
+{
+ struct hpet_dev *hdev = get_irq_data(irq);
+
+ msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
+ msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
+ msg->address_hi = 0;
+}
+
+static void hpet_msi_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+ hpet_set_mode(mode, evt, hdev->num);
+}
+
+static int hpet_msi_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+ return hpet_next_event(delta, evt, hdev->num);
+}
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+ if (arch_setup_hpet_msi(irq)) {
+ destroy_irq(irq);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int hpet_assign_irq(struct hpet_dev *dev)
+{
+ unsigned int irq;
+
+ irq = create_irq();
+ if (!irq)
+ return -EINVAL;
+
+ set_irq_data(irq, dev);
+
+ if (hpet_setup_msi_irq(irq))
+ return -EINVAL;
+
+ dev->irq = irq;
+ return 0;
+}
+
+static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+{
+ struct hpet_dev *dev = (struct hpet_dev *)data;
+ struct clock_event_device *hevt = &dev->evt;
+
+ if (!hevt->event_handler) {
+ printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
+ dev->num);
+ return IRQ_HANDLED;
+ }
+
+ hevt->event_handler(hevt);
+ return IRQ_HANDLED;
+}
+
+static int hpet_setup_irq(struct hpet_dev *dev)
+{
+
+ if (request_irq(dev->irq, hpet_interrupt_handler,
+ IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
+ return -1;
+
+ disable_irq(dev->irq);
+ irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+ enable_irq(dev->irq);
+
+ printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
+ dev->name, dev->irq);
+
+ return 0;
+}
+
+/* This should be called in specific @cpu */
+static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+{
+ struct clock_event_device *evt = &hdev->evt;
+ uint64_t hpet_freq;
+
+ WARN_ON(cpu != smp_processor_id());
+ if (!(hdev->flags & HPET_DEV_VALID))
+ return;
+
+ if (hpet_setup_msi_irq(hdev->irq))
+ return;
+
+ hdev->cpu = cpu;
+ per_cpu(cpu_hpet_dev, cpu) = hdev;
+ evt->name = hdev->name;
+ hpet_setup_irq(hdev);
+ evt->irq = hdev->irq;
+
+ evt->rating = 110;
+ evt->features = CLOCK_EVT_FEAT_ONESHOT;
+ if (hdev->flags & HPET_DEV_PERI_CAP)
+ evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+
+ evt->set_mode = hpet_msi_set_mode;
+ evt->set_next_event = hpet_msi_next_event;
+ evt->shift = 32;
+
+ /*
+ * The period is a femto seconds value. We need to calculate the
+ * scaled math multiplication factor for nanosecond to hpet tick
+ * conversion.
+ */
+ hpet_freq = 1000000000000000ULL;
+ do_div(hpet_freq, hpet_period);
+ evt->mult = div_sc((unsigned long) hpet_freq,
+ NSEC_PER_SEC, evt->shift);
+ /* Calculate the max delta */
+ evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
+ /* 5 usec minimum reprogramming delta. */
+ evt->min_delta_ns = 5000;
+
+ evt->cpumask = cpumask_of_cpu(hdev->cpu);
+ clockevents_register_device(evt);
+}
+
+#ifdef CONFIG_HPET
+/* Reserve at least one timer for userspace (/dev/hpet) */
+#define RESERVE_TIMERS 1
+#else
+#define RESERVE_TIMERS 0
+#endif
+
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+ unsigned int id;
+ unsigned int num_timers;
+ unsigned int num_timers_used = 0;
+ int i;
+
+ id = hpet_readl(HPET_ID);
+
+ num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+ num_timers++; /* Value read out starts from 0 */
+
+ hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
+ if (!hpet_devs)
+ return;
+
+ hpet_num_timers = num_timers;
+
+ for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
+ struct hpet_dev *hdev = &hpet_devs[num_timers_used];
+ unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
+
+ /* Only consider HPET timer with MSI support */
+ if (!(cfg & HPET_TN_FSB_CAP))
+ continue;
+
+ hdev->flags = 0;
+ if (cfg & HPET_TN_PERIODIC_CAP)
+ hdev->flags |= HPET_DEV_PERI_CAP;
+ hdev->num = i;
+
+ sprintf(hdev->name, "hpet%d", i);
+ if (hpet_assign_irq(hdev))
+ continue;
+
+ hdev->flags |= HPET_DEV_FSB_CAP;
+ hdev->flags |= HPET_DEV_VALID;
+ num_timers_used++;
+ if (num_timers_used == num_possible_cpus())
+ break;
+ }
+
+ printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
+ num_timers, num_timers_used);
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+ int i;
+
+ if (!hpet_devs)
+ return;
+
+ for (i = 0; i < hpet_num_timers; i++) {
+ struct hpet_dev *hdev = &hpet_devs[i];
+
+ if (!(hdev->flags & HPET_DEV_VALID))
+ continue;
+
+ hd->hd_irq[hdev->num] = hdev->irq;
+ hpet_reserve_timer(hd, hdev->num);
+ }
+}
+#endif
+
+static struct hpet_dev *hpet_get_unused_timer(void)
+{
+ int i;
+
+ if (!hpet_devs)
+ return NULL;
+
+ for (i = 0; i < hpet_num_timers; i++) {
+ struct hpet_dev *hdev = &hpet_devs[i];
+
+ if (!(hdev->flags & HPET_DEV_VALID))
+ continue;
+ if (test_and_set_bit(HPET_DEV_USED_BIT,
+ (unsigned long *)&hdev->flags))
+ continue;
+ return hdev;
+ }
+ return NULL;
+}
+
+struct hpet_work_struct {
+ struct delayed_work work;
+ struct completion complete;
+};
+
+static void hpet_work(struct work_struct *w)
+{
+ struct hpet_dev *hdev;
+ int cpu = smp_processor_id();
+ struct hpet_work_struct *hpet_work;
+
+ hpet_work = container_of(w, struct hpet_work_struct, work.work);
+
+ hdev = hpet_get_unused_timer();
+ if (hdev)
+ init_one_hpet_msi_clockevent(hdev, cpu);
+
+ complete(&hpet_work->complete);
+}
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+ unsigned long action, void *hcpu)
+{
+ unsigned long cpu = (unsigned long)hcpu;
+ struct hpet_work_struct work;
+ struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+
+ switch (action & 0xf) {
+ case CPU_ONLINE:
+ INIT_DELAYED_WORK(&work.work, hpet_work);
+ init_completion(&work.complete);
+ /* FIXME: add schedule_work_on() */
+ schedule_delayed_work_on(cpu, &work.work, 0);
+ wait_for_completion(&work.complete);
+ break;
+ case CPU_DEAD:
+ if (hdev) {
+ free_irq(hdev->irq, hdev);
+ hdev->flags &= ~HPET_DEV_USED;
+ per_cpu(cpu_hpet_dev, cpu) = NULL;
+ }
+ break;
+ }
+ return NOTIFY_OK;
+}
+#else
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+ return 0;
+}
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+ return;
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+ return;
+}
+#endif
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+ unsigned long action, void *hcpu)
+{
+ return NOTIFY_OK;
+}
+
+#endif
+
/*
* Clock source related code
*/
@@ -427,8 +803,10 @@ int __init hpet_enable(void)
if (id & HPET_ID_LEGSUP) {
hpet_legacy_clockevent_register();
+ hpet_msi_capability_lookup(2);
return 1;
}
+ hpet_msi_capability_lookup(0);
return 0;
out_nohpet:
@@ -445,6 +823,8 @@ out_nohpet:
*/
static __init int hpet_late_init(void)
{
+ int cpu;
+
if (boot_hpet_disable)
return -ENODEV;
@@ -460,6 +840,13 @@ static __init int hpet_late_init(void)
hpet_reserve_platform_timers(hpet_readl(HPET_ID));
+ for_each_online_cpu(cpu) {
+ hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
+ }
+
+ /* This notifier should be called after workqueue is ready */
+ hotcpu_notifier(hpet_cpuhp_notify, -20);
+
return 0;
}
fs_initcall(hpet_late_init);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic.c
similarity index 68%
rename from arch/x86/kernel/io_apic_64.c
rename to arch/x86/kernel/io_apic.c
index 02063ae042f7..b764d7429c61 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic.c
@@ -27,17 +27,21 @@
#include
#include
#include
+#include
#include
+#include
#include
#include
#include
-#include
-#include
+#include
+#include
+#include /* time_after() */
#ifdef CONFIG_ACPI
#include
#endif
#include
#include
+#include
#include
#include
@@ -46,61 +50,28 @@
#include
#include
#include
+#include
#include
#include
#include
#include
+#include
#include
+#include
+#include
+#include
#include
#include
+#include
#define __apicdebuginit(type) static type __init
-struct irq_cfg {
- cpumask_t domain;
- cpumask_t old_domain;
- unsigned move_cleanup_count;
- u8 vector;
- u8 move_in_progress : 1;
-};
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
- [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
- [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
- [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
- [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
- [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
- [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
- [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
- [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
- [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
- [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
- [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
- [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
- [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
- [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
- [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
- [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
-};
-
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-int first_system_vector = 0xfe;
-
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
-int sis_apic_bug; /* not actually supported, dummy for compile */
-
-static int no_timer_check;
-
-static int disable_timer_pin_1 __initdata;
-
-int timer_through_8259 __initdata;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+/*
+ * Is the SiS APIC rmw bug present ?
+ * -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
static DEFINE_SPINLOCK(ioapic_lock);
static DEFINE_SPINLOCK(vector_lock);
@@ -110,9 +81,6 @@ static DEFINE_SPINLOCK(vector_lock);
*/
int nr_ioapic_registers[MAX_IO_APICS];
-/* I/O APIC RTE contents at the OS boot up */
-struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
/* I/O APIC entries */
struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
@@ -123,11 +91,69 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *str)
+{
+ /* disable IO-APIC */
+ disable_ioapic_setup();
+ return 0;
+}
+early_param("noapic", parse_noapic);
+
+struct irq_pin_list;
+struct irq_cfg {
+ unsigned int irq;
+ struct irq_pin_list *irq_2_pin;
+ cpumask_t domain;
+ cpumask_t old_domain;
+ unsigned move_cleanup_count;
+ u8 vector;
+ u8 move_in_progress : 1;
+};
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+static struct irq_cfg irq_cfgx[NR_IRQS] = {
+ [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
+ [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
+ [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
+ [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
+ [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
+ [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
+ [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
+ [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
+ [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
+ [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
+ [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+ [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+ [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+ [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+ [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+ [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+};
+
+#define for_each_irq_cfg(irq, cfg) \
+ for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+ return irq < nr_irqs ? irq_cfgx + irq : NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+ return irq_cfg(irq);
+}
+
/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
+ * Rough estimation of how many shared IRQs there are, can be changed
+ * anytime.
*/
#define MAX_PLUS_SHARED_IRQS NR_IRQS
#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
@@ -139,9 +165,36 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
* between pins and IRQs.
*/
-static struct irq_pin_list {
- short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+struct irq_pin_list {
+ int apic, pin;
+ struct irq_pin_list *next;
+};
+
+static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
+static struct irq_pin_list *irq_2_pin_ptr;
+
+static void __init irq_2_pin_init(void)
+{
+ struct irq_pin_list *pin = irq_2_pin_head;
+ int i;
+
+ for (i = 1; i < PIN_MAP_SIZE; i++)
+ pin[i-1].next = &pin[i];
+
+ irq_2_pin_ptr = &pin[0];
+}
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+ struct irq_pin_list *pin = irq_2_pin_ptr;
+
+ if (!pin)
+ panic("can not get more irq_2_pin\n");
+
+ irq_2_pin_ptr = pin->next;
+ pin->next = NULL;
+ return pin;
+}
struct io_apic {
unsigned int index;
@@ -172,10 +225,15 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
/*
* Re-write a value: to be used for read-modify-write
* cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
*/
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
+
+ if (sis_apic_bug)
+ writel(reg, &io_apic->index);
writel(value, &io_apic->data);
}
@@ -183,16 +241,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
{
struct irq_pin_list *entry;
unsigned long flags;
+ struct irq_cfg *cfg = irq_cfg(irq);
spin_lock_irqsave(&ioapic_lock, flags);
- entry = irq_2_pin + irq;
+ entry = cfg->irq_2_pin;
for (;;) {
unsigned int reg;
int pin;
- pin = entry->pin;
- if (pin == -1)
+ if (!entry)
break;
+ pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin*2);
/* Is the remote IRR bit set? */
if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -201,45 +260,13 @@ static bool io_apic_level_ack_pending(unsigned int irq)
}
if (!entry->next)
break;
- entry = irq_2_pin + entry->next;
+ entry = entry->next;
}
spin_unlock_irqrestore(&ioapic_lock, flags);
return false;
}
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
- struct io_apic __iomem *io_apic = io_apic_base(apic);
- readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL) \
- \
-{ \
- int pin; \
- struct irq_pin_list *entry = irq_2_pin + irq; \
- \
- BUG_ON(irq >= NR_IRQS); \
- for (;;) { \
- unsigned int reg; \
- pin = entry->pin; \
- if (pin == -1) \
- break; \
- reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
- reg ACTION; \
- io_apic_modify(entry->apic, reg); \
- FINAL; \
- if (!entry->next) \
- break; \
- entry = irq_2_pin + entry->next; \
- } \
-}
-
union entry_union {
struct { u32 w1, w2; };
struct IO_APIC_route_entry entry;
@@ -299,59 +326,71 @@ static void ioapic_mask_entry(int apic, int pin)
static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
{
int apic, pin;
- struct irq_pin_list *entry = irq_2_pin + irq;
+ struct irq_cfg *cfg;
+ struct irq_pin_list *entry;
- BUG_ON(irq >= NR_IRQS);
+ cfg = irq_cfg(irq);
+ entry = cfg->irq_2_pin;
for (;;) {
unsigned int reg;
+
+ if (!entry)
+ break;
+
apic = entry->apic;
pin = entry->pin;
- if (pin == -1)
- break;
+#ifdef CONFIG_INTR_REMAP
/*
* With interrupt-remapping, destination information comes
* from interrupt-remapping table entry.
*/
if (!irq_remapped(irq))
io_apic_write(apic, 0x11 + pin*2, dest);
+#else
+ io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
- io_apic_modify(apic, reg);
+ io_apic_modify(apic, 0x10 + pin*2, reg);
if (!entry->next)
break;
- entry = irq_2_pin + entry->next;
+ entry = entry->next;
}
}
+static int assign_irq_vector(int irq, cpumask_t mask);
+
static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
{
- struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_cfg *cfg;
unsigned long flags;
unsigned int dest;
cpumask_t tmp;
+ struct irq_desc *desc;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
return;
+ cfg = irq_cfg(irq);
if (assign_irq_vector(irq, mask))
return;
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
-
/*
* Only the high 8 bits are valid.
*/
dest = SET_APIC_LOGICAL_ID(dest);
+ desc = irq_to_desc(irq);
spin_lock_irqsave(&ioapic_lock, flags);
__target_IO_APIC_irq(irq, dest, cfg->vector);
- irq_desc[irq].affinity = mask;
+ desc->affinity = mask;
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-#endif
+#endif /* CONFIG_SMP */
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -360,19 +399,30 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
*/
static void add_pin_to_irq(unsigned int irq, int apic, int pin)
{
- static int first_free_entry = NR_IRQS;
- struct irq_pin_list *entry = irq_2_pin + irq;
+ struct irq_cfg *cfg;
+ struct irq_pin_list *entry;
- BUG_ON(irq >= NR_IRQS);
- while (entry->next)
- entry = irq_2_pin + entry->next;
-
- if (entry->pin != -1) {
- entry->next = first_free_entry;
- entry = irq_2_pin + entry->next;
- if (++first_free_entry >= PIN_MAP_SIZE)
- panic("io_apic.c: ran out of irq_2_pin entries!");
+ /* first time to refer irq_cfg, so with new */
+ cfg = irq_cfg_alloc(irq);
+ entry = cfg->irq_2_pin;
+ if (!entry) {
+ entry = get_one_free_irq_2_pin();
+ cfg->irq_2_pin = entry;
+ entry->apic = apic;
+ entry->pin = pin;
+ return;
}
+
+ while (entry->next) {
+ /* not again, please */
+ if (entry->apic == apic && entry->pin == pin)
+ return;
+
+ entry = entry->next;
+ }
+
+ entry->next = get_one_free_irq_2_pin();
+ entry = entry->next;
entry->apic = apic;
entry->pin = pin;
}
@@ -384,30 +434,86 @@ static void __init replace_pin_at_irq(unsigned int irq,
int oldapic, int oldpin,
int newapic, int newpin)
{
- struct irq_pin_list *entry = irq_2_pin + irq;
+ struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_pin_list *entry = cfg->irq_2_pin;
+ int replaced = 0;
- while (1) {
+ while (entry) {
if (entry->apic == oldapic && entry->pin == oldpin) {
entry->apic = newapic;
entry->pin = newpin;
- }
- if (!entry->next)
+ replaced = 1;
+ /* every one is different, right? */
break;
- entry = irq_2_pin + entry->next;
+ }
+ entry = entry->next;
+ }
+
+ /* why? call replace before add? */
+ if (!replaced)
+ add_pin_to_irq(irq, newapic, newpin);
+}
+
+static inline void io_apic_modify_irq(unsigned int irq,
+ int mask_and, int mask_or,
+ void (*final)(struct irq_pin_list *entry))
+{
+ int pin;
+ struct irq_cfg *cfg;
+ struct irq_pin_list *entry;
+
+ cfg = irq_cfg(irq);
+ for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+ unsigned int reg;
+ pin = entry->pin;
+ reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+ reg &= mask_and;
+ reg |= mask_or;
+ io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+ if (final)
+ final(entry);
}
}
+static void __unmask_IO_APIC_irq(unsigned int irq)
+{
+ io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
-#define DO_ACTION(name,R,ACTION, FINAL) \
- \
- static void name##_IO_APIC_irq (unsigned int irq) \
- __DO_ACTION(R, ACTION, FINAL)
+#ifdef CONFIG_X86_64
+void io_apic_sync(struct irq_pin_list *entry)
+{
+ /*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+ struct io_apic __iomem *io_apic;
+ io_apic = io_apic_base(entry->apic);
+ readl(&io_apic->data);
+}
-/* mask = 1 */
-DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+ io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+#else /* CONFIG_X86_32 */
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+ io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+}
-/* mask = 0 */
-DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, )
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+{
+ io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+ IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+{
+ io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+ IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+}
+#endif /* CONFIG_X86_32 */
static void mask_IO_APIC_irq (unsigned int irq)
{
@@ -450,6 +556,68 @@ static void clear_IO_APIC (void)
clear_IO_APIC_pin(apic, pin);
}
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+void send_IPI_self(int vector)
+{
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+ cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write(APIC_ICR, cfg);
+}
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+
+static int __init ioapic_pirq_setup(char *str)
+{
+ int i, max;
+ int ints[MAX_PIRQS+1];
+
+ get_options(str, ARRAY_SIZE(ints), ints);
+
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ pirqs_enabled = 1;
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "PIRQ redirection, working around broken MP-BIOS.\n");
+ max = MAX_PIRQS;
+ if (ints[0] < MAX_PIRQS)
+ max = ints[0];
+
+ for (i = 0; i < max; i++) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+ /*
+ * PIRQs are mapped upside down, usually.
+ */
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+ }
+ return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
/*
* Saves and masks all the unmasked IO-APIC RTE's
*/
@@ -474,7 +642,7 @@ int save_mask_IO_APIC_setup(void)
kzalloc(sizeof(struct IO_APIC_route_entry) *
nr_ioapic_registers[apic], GFP_KERNEL);
if (!early_ioapic_entries[apic])
- return -ENOMEM;
+ goto nomem;
}
for (apic = 0; apic < nr_ioapics; apic++)
@@ -488,17 +656,31 @@ int save_mask_IO_APIC_setup(void)
ioapic_write_entry(apic, pin, entry);
}
}
+
return 0;
+
+nomem:
+ while (apic >= 0)
+ kfree(early_ioapic_entries[apic--]);
+ memset(early_ioapic_entries, 0,
+ ARRAY_SIZE(early_ioapic_entries));
+
+ return -ENOMEM;
}
void restore_IO_APIC_setup(void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++)
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ if (!early_ioapic_entries[apic])
+ break;
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
ioapic_write_entry(apic, pin,
early_ioapic_entries[apic][pin]);
+ kfree(early_ioapic_entries[apic]);
+ early_ioapic_entries[apic] = NULL;
+ }
}
void reinit_intr_remapped_IO_APIC(int intr_remapping)
@@ -512,25 +694,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
*/
restore_IO_APIC_setup();
}
-
-int skip_ioapic_setup;
-int ioapic_force;
-
-static int __init parse_noapic(char *str)
-{
- disable_ioapic_setup();
- return 0;
-}
-early_param("noapic", parse_noapic);
-
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
- disable_timer_pin_1 = 1;
- return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
+#endif
/*
* Find the IRQ entry number of a certain pin.
@@ -634,22 +798,54 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
best_guess = irq;
}
}
- BUG_ON(best_guess >= NR_IRQS);
return best_guess;
}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+ if (irq < 16) {
+ unsigned int port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+ }
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "Broken MPtable reports ISA irq %d\n", irq);
+ return 0;
+}
+
+#endif
+
/* ISA interrupts are always polarity zero edge triggered,
* when listed as conforming in the MP table. */
#define default_ISA_trigger(idx) (0)
#define default_ISA_polarity(idx) (0)
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value. If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+#define default_EISA_polarity(idx) default_ISA_polarity(idx)
+
/* PCI interrupts are always polarity one level triggered,
* when listed as conforming in the MP table. */
#define default_PCI_trigger(idx) (1)
#define default_PCI_polarity(idx) (1)
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx) (1)
+#define default_MCA_polarity(idx) default_ISA_polarity(idx)
+
static int MPBIOS_polarity(int idx)
{
int bus = mp_irqs[idx].mp_srcbus;
@@ -707,6 +903,36 @@ static int MPBIOS_trigger(int idx)
trigger = default_ISA_trigger(idx);
else
trigger = default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+ switch (mp_bus_id_to_type[bus]) {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ /* set before the switch */
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ trigger = default_EISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ /* set before the switch */
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ trigger = default_MCA_trigger(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ }
+#endif
break;
case 1: /* edge */
{
@@ -744,6 +970,7 @@ static inline int irq_trigger(int idx)
return MPBIOS_trigger(idx);
}
+int (*ioapic_renumber_irq)(int ioapic, int irq);
static int pin_2_irq(int idx, int apic, int pin)
{
int irq, i;
@@ -765,8 +992,32 @@ static int pin_2_irq(int idx, int apic, int pin)
while (i < apic)
irq += nr_ioapic_registers[i++];
irq += pin;
+ /*
+ * For MPS mode, so far only needed by ES7000 platform
+ */
+ if (ioapic_renumber_irq)
+ irq = ioapic_renumber_irq(apic, irq);
}
- BUG_ON(irq >= NR_IRQS);
+
+#ifdef CONFIG_X86_32
+ /*
+ * PCI IRQ command line redirection. Yes, limits are hardcoded.
+ */
+ if ((pin >= 16) && (pin <= 23)) {
+ if (pirq_entries[pin-16] != -1) {
+ if (!pirq_entries[pin-16]) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "disabling PIRQ%d\n", pin-16);
+ } else {
+ irq = pirq_entries[pin-16];
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "using PIRQ%d -> IRQ %d\n",
+ pin-16, irq);
+ }
+ }
+ }
+#endif
+
return irq;
}
@@ -801,8 +1052,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
int cpu;
struct irq_cfg *cfg;
- BUG_ON((unsigned)irq >= NR_IRQS);
- cfg = &irq_cfg[irq];
+ cfg = irq_cfg(irq);
/* Only try and allocate irqs on cpus that are present */
cpus_and(mask, mask, cpu_online_map);
@@ -837,8 +1087,13 @@ next:
}
if (unlikely(current_vector == vector))
continue;
+#ifdef CONFIG_X86_64
if (vector == IA32_SYSCALL_VECTOR)
goto next;
+#else
+ if (vector == SYSCALL_VECTOR)
+ goto next;
+#endif
for_each_cpu_mask_nr(new_cpu, new_mask)
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
goto next;
@@ -875,8 +1130,7 @@ static void __clear_irq_vector(int irq)
cpumask_t mask;
int cpu, vector;
- BUG_ON((unsigned)irq >= NR_IRQS);
- cfg = &irq_cfg[irq];
+ cfg = irq_cfg(irq);
BUG_ON(!cfg->vector);
vector = cfg->vector;
@@ -893,12 +1147,13 @@ void __setup_vector_irq(int cpu)
/* Initialize vector_irq on a new cpu */
/* This function must be called with vector_lock held */
int irq, vector;
+ struct irq_cfg *cfg;
/* Mark the inuse vectors */
- for (irq = 0; irq < NR_IRQS; ++irq) {
- if (!cpu_isset(cpu, irq_cfg[irq].domain))
+ for_each_irq_cfg(irq, cfg) {
+ if (!cpu_isset(cpu, cfg->domain))
continue;
- vector = irq_cfg[irq].vector;
+ vector = cfg->vector;
per_cpu(vector_irq, cpu)[vector] = irq;
}
/* Mark the free vectors */
@@ -906,7 +1161,9 @@ void __setup_vector_irq(int cpu)
irq = per_cpu(vector_irq, cpu)[vector];
if (irq < 0)
continue;
- if (!cpu_isset(cpu, irq_cfg[irq].domain))
+
+ cfg = irq_cfg(irq);
+ if (!cpu_isset(cpu, cfg->domain))
per_cpu(vector_irq, cpu)[vector] = -1;
}
}
@@ -916,16 +1173,49 @@ static struct irq_chip ioapic_chip;
static struct irq_chip ir_ioapic_chip;
#endif
+#define IOAPIC_AUTO -1
+#define IOAPIC_EDGE 0
+#define IOAPIC_LEVEL 1
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+ int apic, idx, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ idx = find_irq_entry(apic, pin, mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+ return irq_trigger(idx);
+ }
+ }
+ /*
+ * nonexistent IRQs are edge default
+ */
+ return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+ return 1;
+}
+#endif
+
static void ioapic_register_intr(int irq, unsigned long trigger)
{
- if (trigger)
- irq_desc[irq].status |= IRQ_LEVEL;
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ desc->status |= IRQ_LEVEL;
else
- irq_desc[irq].status &= ~IRQ_LEVEL;
+ desc->status &= ~IRQ_LEVEL;
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
- irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+ desc->status |= IRQ_MOVE_PCNTXT;
if (trigger)
set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
handle_fasteoi_irq,
@@ -936,7 +1226,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
return;
}
#endif
- if (trigger)
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_fasteoi_irq,
"fasteoi");
@@ -1009,13 +1300,15 @@ static int setup_ioapic_entry(int apic, int irq,
static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
int trigger, int polarity)
{
- struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
cpumask_t mask;
if (!IO_APIC_IRQ(irq))
return;
+ cfg = irq_cfg(irq);
+
mask = TARGET_CPUS;
if (assign_irq_vector(irq, mask))
return;
@@ -1047,37 +1340,49 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
static void __init setup_IO_APIC_irqs(void)
{
- int apic, pin, idx, irq, first_notcon = 1;
+ int apic, pin, idx, irq;
+ int notcon = 0;
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- idx = find_irq_entry(apic,pin,mp_INT);
- if (idx == -1) {
- if (first_notcon) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
- first_notcon = 0;
- } else
- apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
- continue;
+ idx = find_irq_entry(apic, pin, mp_INT);
+ if (idx == -1) {
+ if (!notcon) {
+ notcon = 1;
+ apic_printk(APIC_VERBOSE,
+ KERN_DEBUG " %d-%d",
+ mp_ioapics[apic].mp_apicid,
+ pin);
+ } else
+ apic_printk(APIC_VERBOSE, " %d-%d",
+ mp_ioapics[apic].mp_apicid,
+ pin);
+ continue;
+ }
+ if (notcon) {
+ apic_printk(APIC_VERBOSE,
+ " (apicid-pin) not connected\n");
+ notcon = 0;
+ }
+
+ irq = pin_2_irq(idx, apic, pin);
+#ifdef CONFIG_X86_32
+ if (multi_timer_check(apic, irq))
+ continue;
+#endif
+ add_pin_to_irq(irq, apic, pin);
+
+ setup_IO_APIC_irq(apic, pin, irq,
+ irq_trigger(idx), irq_polarity(idx));
}
- if (!first_notcon) {
- apic_printk(APIC_VERBOSE, " not connected.\n");
- first_notcon = 1;
- }
-
- irq = pin_2_irq(idx, apic, pin);
- add_pin_to_irq(irq, apic, pin);
-
- setup_IO_APIC_irq(apic, pin, irq,
- irq_trigger(idx), irq_polarity(idx));
- }
}
- if (!first_notcon)
- apic_printk(APIC_VERBOSE, " not connected.\n");
+ if (notcon)
+ apic_printk(APIC_VERBOSE,
+ " (apicid-pin) not connected\n");
}
/*
@@ -1088,8 +1393,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
{
struct IO_APIC_route_entry entry;
+#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
return;
+#endif
memset(&entry, 0, sizeof(entry));
@@ -1124,7 +1431,10 @@ __apicdebuginit(void) print_IO_APIC(void)
union IO_APIC_reg_00 reg_00;
union IO_APIC_reg_01 reg_01;
union IO_APIC_reg_02 reg_02;
+ union IO_APIC_reg_03 reg_03;
unsigned long flags;
+ struct irq_cfg *cfg;
+ unsigned int irq;
if (apic_verbosity == APIC_QUIET)
return;
@@ -1147,12 +1457,16 @@ __apicdebuginit(void) print_IO_APIC(void)
reg_01.raw = io_apic_read(apic, 1);
if (reg_01.bits.version >= 0x10)
reg_02.raw = io_apic_read(apic, 2);
+ if (reg_01.bits.version >= 0x20)
+ reg_03.raw = io_apic_read(apic, 3);
spin_unlock_irqrestore(&ioapic_lock, flags);
printk("\n");
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
+ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
+ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01);
printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
@@ -1160,11 +1474,27 @@ __apicdebuginit(void) print_IO_APIC(void)
printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
- if (reg_01.bits.version >= 0x10) {
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+ * but the value of reg_02 is read as the previous read register
+ * value, so ignore it if reg_02 == reg_01.
+ */
+ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
}
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+ * or reg_03, but the value of reg_0[23] is read as the previous read
+ * register value, so ignore it if reg_03 == reg_0[12].
+ */
+ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+ reg_03.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
+ }
+
printk(KERN_DEBUG ".... IRQ redirection table:\n");
printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
@@ -1193,16 +1523,16 @@ __apicdebuginit(void) print_IO_APIC(void)
}
}
printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for (i = 0; i < NR_IRQS; i++) {
- struct irq_pin_list *entry = irq_2_pin + i;
- if (entry->pin < 0)
+ for_each_irq_cfg(irq, cfg) {
+ struct irq_pin_list *entry = cfg->irq_2_pin;
+ if (!entry)
continue;
- printk(KERN_DEBUG "IRQ%d ", i);
+ printk(KERN_DEBUG "IRQ%d ", irq);
for (;;) {
printk("-> %d:%d", entry->apic, entry->pin);
if (!entry->next)
break;
- entry = irq_2_pin + entry->next;
+ entry = entry->next;
}
printk("\n");
}
@@ -1236,7 +1566,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
__apicdebuginit(void) print_local_APIC(void *dummy)
{
unsigned int v, ver, maxlvt;
- unsigned long icr;
+ u64 icr;
if (apic_verbosity == APIC_QUIET)
return;
@@ -1253,20 +1583,31 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
v = apic_read(APIC_TASKPRI);
printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
- v = apic_read(APIC_ARBPRI);
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
- v & APIC_ARBPRI_MASK);
- v = apic_read(APIC_PROCPRI);
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (!APIC_XAPIC(ver)) {
+ v = apic_read(APIC_ARBPRI);
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+ v & APIC_ARBPRI_MASK);
+ }
+ v = apic_read(APIC_PROCPRI);
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+ }
+
+ /*
+ * Remote read supported only in the 82489DX and local APIC for
+ * Pentium processors.
+ */
+ if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+ v = apic_read(APIC_RRR);
+ printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+ }
- v = apic_read(APIC_EOI);
- printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
- v = apic_read(APIC_RRR);
- printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
v = apic_read(APIC_LDR);
printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
- v = apic_read(APIC_DFR);
- printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+ if (!x2apic_enabled()) {
+ v = apic_read(APIC_DFR);
+ printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+ }
v = apic_read(APIC_SPIV);
printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
@@ -1277,8 +1618,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
printk(KERN_DEBUG "... APIC IRR field:\n");
print_APIC_bitfield(APIC_IRR);
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+
+ v = apic_read(APIC_ESR);
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+ }
icr = apic_icr_read();
printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
@@ -1312,7 +1658,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
__apicdebuginit(void) print_all_local_APICs(void)
{
- on_each_cpu(print_local_APIC, NULL, 1);
+ int cpu;
+
+ preempt_disable();
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+ preempt_enable();
}
__apicdebuginit(void) print_PIC(void)
@@ -1359,17 +1710,22 @@ __apicdebuginit(int) print_all_ICs(void)
fs_initcall(print_all_ICs);
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
void __init enable_IO_APIC(void)
{
union IO_APIC_reg_01 reg_01;
int i8259_apic, i8259_pin;
- int i, apic;
+ int apic;
unsigned long flags;
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
+#ifdef CONFIG_X86_32
+ int i;
+ if (!pirqs_enabled)
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+#endif
/*
* The number of IO-APIC IRQ registers (== #pins):
@@ -1399,6 +1755,10 @@ void __init enable_IO_APIC(void)
}
found_i8259:
/* Look to see what if the MP table has reported the ExtINT */
+ /* If we could not find the appropriate pin by looking at the ioapic
+ * the i8259 probably is not connected the ioapic but give the
+ * mptable a chance anyway.
+ */
i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
/* Trust the MP table if nothing is setup in the hardware */
@@ -1458,6 +1818,133 @@ void disable_IO_APIC(void)
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
}
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+ union IO_APIC_reg_00 reg_00;
+ physid_mask_t phys_id_present_map;
+ int apic;
+ int i;
+ unsigned char old_id;
+ unsigned long flags;
+
+ if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+ return;
+
+ /*
+ * Don't check I/O APIC IDs for xAPIC systems. They have
+ * no meaning without the serial APIC bus.
+ */
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return;
+ /*
+ * This is broken; anything with a real cpu count has to
+ * circumvent this idiocy regardless.
+ */
+ phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+ /*
+ * Set the IOAPIC ID to the value stored in the MPC table.
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ /* Read the register 0 value */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ old_id = mp_ioapics[apic].mp_apicid;
+
+ if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+ apic, mp_ioapics[apic].mp_apicid);
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ reg_00.bits.ID);
+ mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+ }
+
+ /*
+ * Sanity check, is the ID really free? Every APIC in a
+ * system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (check_apicid_used(phys_id_present_map,
+ mp_ioapics[apic].mp_apicid)) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+ apic, mp_ioapics[apic].mp_apicid);
+ for (i = 0; i < get_physical_broadcast(); i++)
+ if (!physid_isset(i, phys_id_present_map))
+ break;
+ if (i >= get_physical_broadcast())
+ panic("Max APIC ID exceeded!\n");
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ i);
+ physid_set(i, phys_id_present_map);
+ mp_ioapics[apic].mp_apicid = i;
+ } else {
+ physid_mask_t tmp;
+ tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+ apic_printk(APIC_VERBOSE, "Setting %d in the "
+ "phys_id_present_map\n",
+ mp_ioapics[apic].mp_apicid);
+ physids_or(phys_id_present_map, phys_id_present_map, tmp);
+ }
+
+
+ /*
+ * We need to adjust the IRQ routing table
+ * if the ID changed.
+ */
+ if (old_id != mp_ioapics[apic].mp_apicid)
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mp_dstapic == old_id)
+ mp_irqs[i].mp_dstapic
+ = mp_ioapics[apic].mp_apicid;
+
+ /*
+ * Read the right value from the MPC table and
+ * write it into the ID register.
+ */
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "...changing IO-APIC physical APIC ID to %d ...",
+ mp_ioapics[apic].mp_apicid);
+
+ reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0, reg_00.raw);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /*
+ * Sanity check
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+ printk("could not set ID!\n");
+ else
+ apic_printk(APIC_VERBOSE, " ok.\n");
+ }
+}
+#endif
+
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+ no_timer_check = 1;
+ return 1;
+}
+__setup("no_timer_check", notimercheck);
+
/*
* There is a nasty bug in some older SMP boards, their mptable lies
* about the timer IRQ. We do the following to work around the situation:
@@ -1471,6 +1958,9 @@ static int __init timer_irq_works(void)
unsigned long t1 = jiffies;
unsigned long flags;
+ if (no_timer_check)
+ return 1;
+
local_save_flags(flags);
local_irq_enable();
/* Let ten ticks pass... */
@@ -1531,9 +2021,11 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
return was_pending;
}
+#ifdef CONFIG_X86_64
static int ioapic_retrigger_irq(unsigned int irq)
{
- struct irq_cfg *cfg = &irq_cfg[irq];
+
+ struct irq_cfg *cfg = irq_cfg(irq);
unsigned long flags;
spin_lock_irqsave(&vector_lock, flags);
@@ -1542,6 +2034,14 @@ static int ioapic_retrigger_irq(unsigned int irq)
return 1;
}
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+ send_IPI_self(irq_cfg(irq)->vector);
+
+ return 1;
+}
+#endif
/*
* Level and edge triggered IO-APIC interrupts need different handling,
@@ -1580,11 +2080,11 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
*/
static void migrate_ioapic_irq(int irq, cpumask_t mask)
{
- struct irq_cfg *cfg = irq_cfg + irq;
- struct irq_desc *desc = irq_desc + irq;
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
cpumask_t tmp, cleanup_mask;
struct irte irte;
- int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+ int modify_ioapic_rte;
unsigned int dest;
unsigned long flags;
@@ -1598,9 +2098,12 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
if (assign_irq_vector(irq, mask))
return;
+ cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
+ desc = irq_to_desc(irq);
+ modify_ioapic_rte = desc->status & IRQ_LEVEL;
if (modify_ioapic_rte) {
spin_lock_irqsave(&ioapic_lock, flags);
__target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1622,18 +2125,19 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
cfg->move_in_progress = 0;
}
- irq_desc[irq].affinity = mask;
+ desc->affinity = mask;
}
static int migrate_irq_remapped_level(int irq)
{
int ret = -1;
+ struct irq_desc *desc = irq_to_desc(irq);
mask_IO_APIC_irq(irq);
if (io_apic_level_ack_pending(irq)) {
/*
- * Interrupt in progress. Migrating irq now will change the
+ * Interrupt in progress. Migrating irq now will change the
* vector information in the IO-APIC RTE and that will confuse
* the EOI broadcast performed by cpu.
* So, delay the irq migration to the next instance.
@@ -1643,11 +2147,11 @@ static int migrate_irq_remapped_level(int irq)
}
/* everthing is clear. we have right of way */
- migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+ migrate_ioapic_irq(irq, desc->pending_mask);
ret = 0;
- irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
- cpus_clear(irq_desc[irq].pending_mask);
+ desc->status &= ~IRQ_MOVE_PENDING;
+ cpus_clear(desc->pending_mask);
unmask:
unmask_IO_APIC_irq(irq);
@@ -1656,10 +2160,10 @@ unmask:
static void ir_irq_migration(struct work_struct *work)
{
- int irq;
+ unsigned int irq;
+ struct irq_desc *desc;
- for (irq = 0; irq < NR_IRQS; irq++) {
- struct irq_desc *desc = irq_desc + irq;
+ for_each_irq_desc(irq, desc) {
if (desc->status & IRQ_MOVE_PENDING) {
unsigned long flags;
@@ -1671,8 +2175,7 @@ static void ir_irq_migration(struct work_struct *work)
continue;
}
- desc->chip->set_affinity(irq,
- irq_desc[irq].pending_mask);
+ desc->chip->set_affinity(irq, desc->pending_mask);
spin_unlock_irqrestore(&desc->lock, flags);
}
}
@@ -1683,9 +2186,11 @@ static void ir_irq_migration(struct work_struct *work)
*/
static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
{
- if (irq_desc[irq].status & IRQ_LEVEL) {
- irq_desc[irq].status |= IRQ_MOVE_PENDING;
- irq_desc[irq].pending_mask = mask;
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (desc->status & IRQ_LEVEL) {
+ desc->status |= IRQ_MOVE_PENDING;
+ desc->pending_mask = mask;
migrate_irq_remapped_level(irq);
return;
}
@@ -1698,7 +2203,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
ack_APIC_irq();
+#ifdef CONFIG_X86_64
exit_idle();
+#endif
irq_enter();
me = smp_processor_id();
@@ -1707,11 +2214,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
struct irq_desc *desc;
struct irq_cfg *cfg;
irq = __get_cpu_var(vector_irq)[vector];
- if (irq >= NR_IRQS)
+
+ desc = irq_to_desc(irq);
+ if (!desc)
continue;
- desc = irq_desc + irq;
- cfg = irq_cfg + irq;
+ cfg = irq_cfg(irq);
spin_lock(&desc->lock);
if (!cfg->move_cleanup_count)
goto unlock;
@@ -1730,7 +2238,7 @@ unlock:
static void irq_complete_move(unsigned int irq)
{
- struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_cfg *cfg = irq_cfg(irq);
unsigned vector, me;
if (likely(!cfg->move_in_progress))
@@ -1769,19 +2277,50 @@ static void ack_apic_edge(unsigned int irq)
ack_APIC_irq();
}
+atomic_t irq_mis_count;
+
static void ack_apic_level(unsigned int irq)
{
+#ifdef CONFIG_X86_32
+ unsigned long v;
+ int i;
+#endif
int do_unmask_irq = 0;
irq_complete_move(irq);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+ if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
do_unmask_irq = 1;
mask_IO_APIC_irq(irq);
}
#endif
+#ifdef CONFIG_X86_32
+ /*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets). Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless. As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source. The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually. We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt. We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul. --macro
+ */
+ i = irq_cfg(irq)->vector;
+
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+#endif
+
/*
* We must acknowledge the irq before we move it or the acknowledge will
* not propagate properly.
@@ -1820,31 +2359,41 @@ static void ack_apic_level(unsigned int irq)
move_masked_irq(irq);
unmask_IO_APIC_irq(irq);
}
+
+#ifdef CONFIG_X86_32
+ if (!(v & (1 << (i & 0x1f)))) {
+ atomic_inc(&irq_mis_count);
+ spin_lock(&ioapic_lock);
+ __mask_and_edge_IO_APIC_irq(irq);
+ __unmask_and_level_IO_APIC_irq(irq);
+ spin_unlock(&ioapic_lock);
+ }
+#endif
}
static struct irq_chip ioapic_chip __read_mostly = {
- .name = "IO-APIC",
- .startup = startup_ioapic_irq,
- .mask = mask_IO_APIC_irq,
- .unmask = unmask_IO_APIC_irq,
- .ack = ack_apic_edge,
- .eoi = ack_apic_level,
+ .name = "IO-APIC",
+ .startup = startup_ioapic_irq,
+ .mask = mask_IO_APIC_irq,
+ .unmask = unmask_IO_APIC_irq,
+ .ack = ack_apic_edge,
+ .eoi = ack_apic_level,
#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity_irq,
+ .set_affinity = set_ioapic_affinity_irq,
#endif
.retrigger = ioapic_retrigger_irq,
};
#ifdef CONFIG_INTR_REMAP
static struct irq_chip ir_ioapic_chip __read_mostly = {
- .name = "IR-IO-APIC",
- .startup = startup_ioapic_irq,
- .mask = mask_IO_APIC_irq,
- .unmask = unmask_IO_APIC_irq,
- .ack = ack_x2apic_edge,
- .eoi = ack_x2apic_level,
+ .name = "IR-IO-APIC",
+ .startup = startup_ioapic_irq,
+ .mask = mask_IO_APIC_irq,
+ .unmask = unmask_IO_APIC_irq,
+ .ack = ack_x2apic_edge,
+ .eoi = ack_x2apic_level,
#ifdef CONFIG_SMP
- .set_affinity = set_ir_ioapic_affinity_irq,
+ .set_affinity = set_ir_ioapic_affinity_irq,
#endif
.retrigger = ioapic_retrigger_irq,
};
@@ -1853,6 +2402,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
static inline void init_IO_APIC_traps(void)
{
int irq;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
/*
* NOTE! The local APIC isn't very good at handling
@@ -1865,8 +2416,8 @@ static inline void init_IO_APIC_traps(void)
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- for (irq = 0; irq < NR_IRQS ; irq++) {
- if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+ for_each_irq_cfg(irq, cfg) {
+ if (IO_APIC_IRQ(irq) && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
* so default to an old-fashioned 8259
@@ -1874,13 +2425,27 @@ static inline void init_IO_APIC_traps(void)
*/
if (irq < 16)
make_8259A_irq(irq);
- else
+ else {
+ desc = irq_to_desc(irq);
/* Strange. Oh, well.. */
- irq_desc[irq].chip = &no_irq_chip;
+ desc->chip = &no_irq_chip;
+ }
}
}
}
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
static void unmask_lapic_irq(unsigned int irq)
{
unsigned long v;
@@ -1889,14 +2454,6 @@ static void unmask_lapic_irq(unsigned int irq)
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
-static void mask_lapic_irq(unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
static void ack_lapic_irq (unsigned int irq)
{
ack_APIC_irq();
@@ -1911,7 +2468,10 @@ static struct irq_chip lapic_chip __read_mostly = {
static void lapic_register_intr(int irq)
{
- irq_desc[irq].status &= ~IRQ_LEVEL;
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+ desc->status &= ~IRQ_LEVEL;
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
}
@@ -1919,19 +2479,19 @@ static void lapic_register_intr(int irq)
static void __init setup_nmi(void)
{
/*
- * Dirty trick to enable the NMI watchdog ...
+ * Dirty trick to enable the NMI watchdog ...
* We put the 8259A master into AEOI mode and
* unmask on all local APICs LVT0 as NMI.
*
* The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
* is from Maciej W. Rozycki - so we do not have to EOI from
* the NMI handler or the timer interrupt.
- */
- printk(KERN_INFO "activating NMI Watchdog ...");
+ */
+ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
enable_NMI_through_LVT0();
- printk(" done.\n");
+ apic_printk(APIC_VERBOSE, " done.\n");
}
/*
@@ -1948,12 +2508,17 @@ static inline void __init unlock_ExtINT_logic(void)
unsigned char save_control, save_freq_select;
pin = find_isa_irq_pin(8, mp_INT);
- apic = find_isa_irq_apic(8, mp_INT);
- if (pin == -1)
+ if (pin == -1) {
+ WARN_ON_ONCE(1);
return;
+ }
+ apic = find_isa_irq_apic(8, mp_INT);
+ if (apic == -1) {
+ WARN_ON_ONCE(1);
+ return;
+ }
entry0 = ioapic_read_entry(apic, pin);
-
clear_IO_APIC_pin(apic, pin);
memset(&entry1, 0, sizeof(entry1));
@@ -1988,23 +2553,38 @@ static inline void __init unlock_ExtINT_logic(void)
ioapic_write_entry(apic, pin, entry0);
}
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+ disable_timer_pin_1 = 1;
+ return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
/*
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
* is so screwy. Thanks to Brian Perkins for testing/hacking this beast
* fanatically on his truly buggy board.
*
- * FIXME: really need to revamp this for modern platforms only.
+ * FIXME: really need to revamp this for all platforms.
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = irq_cfg + 0;
+ struct irq_cfg *cfg = irq_cfg(0);
int apic1, pin1, apic2, pin2;
unsigned long flags;
+ unsigned int ver;
int no_pin1 = 0;
local_irq_save(flags);
+ ver = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(ver);
+
/*
* get/set the timer IRQ vector:
*/
@@ -2013,10 +2593,18 @@ static inline void __init check_timer(void)
/*
* As IRQ0 is to be enabled in the 8259A, the virtual
- * wire has to be disabled in the local APIC.
+ * wire has to be disabled in the local APIC. Also
+ * timer interrupts need to be acknowledged manually in
+ * the 8259A for the i82489DX when using the NMI
+ * watchdog as that APIC treats NMIs as level-triggered.
+ * The AEOI mode will finish them in the 8259A
+ * automatically.
*/
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
init_8259A(1);
+#ifdef CONFIG_X86_32
+ timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+#endif
pin1 = find_isa_irq_pin(0, mp_INT);
apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2035,8 +2623,10 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
panic("BIOS bug: timer not connected to IO-APIC");
+#endif
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
@@ -2054,7 +2644,7 @@ static inline void __init check_timer(void)
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
}
unmask_IO_APIC_irq(0);
- if (!no_timer_check && timer_irq_works()) {
+ if (timer_irq_works()) {
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
enable_8259A_irq(0);
@@ -2063,8 +2653,10 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1);
goto out;
}
+#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2104,6 +2696,9 @@ static inline void __init check_timer(void)
"through the IO-APIC - disabling NMI Watchdog!\n");
nmi_watchdog = NMI_NONE;
}
+#ifdef CONFIG_X86_32
+ timer_ack = 0;
+#endif
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2140,13 +2735,6 @@ out:
local_irq_restore(flags);
}
-static int __init notimercheck(char *s)
-{
- no_timer_check = 1;
- return 1;
-}
-__setup("no_timer_check", notimercheck);
-
/*
* Traditionally ISA IRQ2 is the cascade IRQ, and is not available
* to devices. However there may be an I/O APIC pin available for
@@ -2164,25 +2752,49 @@ __setup("no_timer_check", notimercheck);
* the I/O APIC in all cases now. No actual device should request
* it anyway. --macro
*/
-#define PIC_IRQS (1<<2)
+#define PIC_IRQS (1 << PIC_CASCADE_IR)
void __init setup_IO_APIC(void)
{
+#ifdef CONFIG_X86_32
+ enable_IO_APIC();
+#else
/*
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
*/
+#endif
io_apic_irqs = ~PIC_IRQS;
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-
+ /*
+ * Set up IO-APIC IRQ routing.
+ */
+#ifdef CONFIG_X86_32
+ if (!acpi_ioapic)
+ setup_ioapic_ids_from_mpc();
+#endif
sync_Arb_IDs();
setup_IO_APIC_irqs();
init_IO_APIC_traps();
check_timer();
}
+/*
+ * Called after all the initialization is done. If we didnt find any
+ * APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+ if (sis_apic_bug == -1)
+ sis_apic_bug = 0;
+ return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
struct sysfs_ioapic_data {
struct sys_device dev;
struct IO_APIC_route_entry entry[0];
@@ -2270,32 +2882,51 @@ device_initcall(ioapic_init_sysfs);
/*
* Dynamic irq allocate and deallocation
*/
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
{
/* Allocate an unused irq */
- int irq;
- int new;
+ unsigned int irq;
+ unsigned int new;
unsigned long flags;
+ struct irq_cfg *cfg_new;
- irq = -ENOSPC;
+ irq_want = nr_irqs - 1;
+
+ irq = 0;
spin_lock_irqsave(&vector_lock, flags);
- for (new = (NR_IRQS - 1); new >= 0; new--) {
+ for (new = irq_want; new > 0; new--) {
if (platform_legacy_irq(new))
continue;
- if (irq_cfg[new].vector != 0)
+ cfg_new = irq_cfg(new);
+ if (cfg_new && cfg_new->vector != 0)
continue;
+ /* check if need to create one */
+ if (!cfg_new)
+ cfg_new = irq_cfg_alloc(new);
if (__assign_irq_vector(new, TARGET_CPUS) == 0)
irq = new;
break;
}
spin_unlock_irqrestore(&vector_lock, flags);
- if (irq >= 0) {
+ if (irq > 0) {
dynamic_irq_init(irq);
}
return irq;
}
+int create_irq(void)
+{
+ int irq;
+
+ irq = create_irq_nr(nr_irqs - 1);
+
+ if (irq == 0)
+ irq = -1;
+
+ return irq;
+}
+
void destroy_irq(unsigned int irq)
{
unsigned long flags;
@@ -2316,7 +2947,7 @@ void destroy_irq(unsigned int irq)
#ifdef CONFIG_PCI_MSI
static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
{
- struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_cfg *cfg;
int err;
unsigned dest;
cpumask_t tmp;
@@ -2326,6 +2957,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
if (err)
return err;
+ cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, tmp);
dest = cpu_mask_to_apicid(tmp);
@@ -2383,10 +3015,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
#ifdef CONFIG_SMP
static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
{
- struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
cpumask_t tmp;
+ struct irq_desc *desc;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
@@ -2395,6 +3028,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
if (assign_irq_vector(irq, mask))
return;
+ cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
@@ -2406,7 +3040,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
write_msi_msg(irq, &msg);
- irq_desc[irq].affinity = mask;
+ desc = irq_to_desc(irq);
+ desc->affinity = mask;
}
#ifdef CONFIG_INTR_REMAP
@@ -2416,10 +3051,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
*/
static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
{
- struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_cfg *cfg;
unsigned int dest;
cpumask_t tmp, cleanup_mask;
struct irte irte;
+ struct irq_desc *desc;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
@@ -2431,6 +3067,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
if (assign_irq_vector(irq, mask))
return;
+ cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
@@ -2454,7 +3091,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
cfg->move_in_progress = 0;
}
- irq_desc[irq].affinity = mask;
+ desc = irq_to_desc(irq);
+ desc->affinity = mask;
}
#endif
#endif /* CONFIG_SMP */
@@ -2507,7 +3145,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
if (index < 0) {
printk(KERN_ERR
"Unable to allocate %d IRTE for PCI %s\n", nvec,
- pci_name(dev));
+ pci_name(dev));
return -ENOSPC;
}
return index;
@@ -2528,7 +3166,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
- struct irq_desc *desc = irq_desc + irq;
+ struct irq_desc *desc = irq_to_desc(irq);
/*
* irq migration in process context
*/
@@ -2538,16 +3176,34 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
#endif
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
return 0;
}
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+ unsigned int irq;
+
+ irq = dev->bus->number;
+ irq <<= 8;
+ irq |= dev->devfn;
+ irq <<= 12;
+
+ return irq;
+}
+
int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
- int irq, ret;
+ unsigned int irq;
+ int ret;
+ unsigned int irq_want;
- irq = create_irq();
- if (irq < 0)
- return irq;
+ irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+ irq = create_irq_nr(irq_want);
+ if (irq == 0)
+ return -1;
#ifdef CONFIG_INTR_REMAP
if (!intr_remapping_enabled)
@@ -2574,18 +3230,22 @@ error:
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int irq, ret, sub_handle;
+ unsigned int irq;
+ int ret, sub_handle;
struct msi_desc *desc;
+ unsigned int irq_want;
+
#ifdef CONFIG_INTR_REMAP
struct intel_iommu *iommu = 0;
int index = 0;
#endif
+ irq_want = build_irq_for_pci_dev(dev) + 0x100;
sub_handle = 0;
list_for_each_entry(desc, &dev->msi_list, list) {
- irq = create_irq();
- if (irq < 0)
- return irq;
+ irq = create_irq_nr(irq_want--);
+ if (irq == 0)
+ return -1;
#ifdef CONFIG_INTR_REMAP
if (!intr_remapping_enabled)
goto no_ir;
@@ -2636,10 +3296,11 @@ void arch_teardown_msi_irq(unsigned int irq)
#ifdef CONFIG_SMP
static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
{
- struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
cpumask_t tmp;
+ struct irq_desc *desc;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
@@ -2648,6 +3309,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
if (assign_irq_vector(irq, mask))
return;
+ cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
@@ -2659,7 +3321,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
dmar_msi_write(irq, &msg);
- irq_desc[irq].affinity = mask;
+ desc = irq_to_desc(irq);
+ desc->affinity = mask;
}
#endif /* CONFIG_SMP */
@@ -2689,6 +3352,69 @@ int arch_setup_dmar_msi(unsigned int irq)
}
#endif
+#ifdef CONFIG_HPET_TIMER
+
+#ifdef CONFIG_SMP
+static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+ struct msi_msg msg;
+ unsigned int dest;
+ cpumask_t tmp;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cfg = irq_cfg(irq);
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
+
+ hpet_msi_read(irq, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+ hpet_msi_write(irq, &msg);
+ desc = irq_to_desc(irq);
+ desc->affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip hpet_msi_type = {
+ .name = "HPET_MSI",
+ .unmask = hpet_msi_unmask,
+ .mask = hpet_msi_mask,
+ .ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+ .set_affinity = hpet_msi_set_affinity,
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_hpet_msi(unsigned int irq)
+{
+ int ret;
+ struct msi_msg msg;
+
+ ret = msi_compose_msg(NULL, irq, &msg);
+ if (ret < 0)
+ return ret;
+
+ hpet_msi_write(irq, &msg);
+ set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+ "edge");
+
+ return 0;
+}
+#endif
+
#endif /* CONFIG_PCI_MSI */
/*
* Hypertransport interrupt support
@@ -2713,9 +3439,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
{
- struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_cfg *cfg;
unsigned int dest;
cpumask_t tmp;
+ struct irq_desc *desc;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
@@ -2724,11 +3451,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
if (assign_irq_vector(irq, mask))
return;
+ cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, mask);
dest = cpu_mask_to_apicid(tmp);
target_ht_irq(irq, dest, cfg->vector);
- irq_desc[irq].affinity = mask;
+ desc = irq_to_desc(irq);
+ desc->affinity = mask;
}
#endif
@@ -2745,7 +3474,7 @@ static struct irq_chip ht_irq_chip = {
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
- struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_cfg *cfg;
int err;
cpumask_t tmp;
@@ -2755,6 +3484,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
struct ht_irq_msg msg;
unsigned dest;
+ cfg = irq_cfg(irq);
cpus_and(tmp, cfg->domain, tmp);
dest = cpu_mask_to_apicid(tmp);
@@ -2777,18 +3507,78 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
set_irq_chip_and_handler_name(irq, &ht_irq_chip,
handle_edge_irq, "edge");
+
+ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
}
return err;
}
#endif /* CONFIG_HT_IRQ */
-/* --------------------------------------------------------------------------
- ACPI-based IOAPIC Configuration
- -------------------------------------------------------------------------- */
+#ifdef CONFIG_X86_64
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+ unsigned long mmr_offset)
+{
+ const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+ struct irq_cfg *cfg;
+ int mmr_pnode;
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ unsigned long flags;
+ int err;
-#ifdef CONFIG_ACPI
+ err = assign_irq_vector(irq, *eligible_cpu);
+ if (err != 0)
+ return err;
-#define IO_APIC_MAX_ID 0xFE
+ spin_lock_irqsave(&vector_lock, flags);
+ set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+ irq_name);
+ spin_unlock_irqrestore(&vector_lock, flags);
+
+ cfg = irq_cfg(irq);
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+ entry->vector = cfg->vector;
+ entry->delivery_mode = INT_DELIVERY_MODE;
+ entry->dest_mode = INT_DEST_MODE;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+
+ mmr_pnode = uv_blade_to_pnode(mmr_blade);
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+{
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ int mmr_pnode;
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+ entry->mask = 1;
+
+ mmr_pnode = uv_blade_to_pnode(mmr_blade);
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+#endif /* CONFIG_X86_64 */
int __init io_apic_get_redir_entries (int ioapic)
{
@@ -2802,6 +3592,122 @@ int __init io_apic_get_redir_entries (int ioapic)
return reg_01.bits.entries;
}
+int __init probe_nr_irqs(void)
+{
+ int idx;
+ int nr = 0;
+#ifndef CONFIG_XEN
+ int nr_min = 32;
+#else
+ int nr_min = NR_IRQS;
+#endif
+
+ for (idx = 0; idx < nr_ioapics; idx++)
+ nr += io_apic_get_redir_entries(idx) + 1;
+
+ /* double it for hotplug and msi and nmi */
+ nr <<= 1;
+
+ /* something wrong ? */
+ if (nr < nr_min)
+ nr = nr_min;
+
+ return nr;
+}
+
+/* --------------------------------------------------------------------------
+ ACPI-based IOAPIC Configuration
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI
+
+#ifdef CONFIG_X86_32
+int __init io_apic_get_unique_id(int ioapic, int apic_id)
+{
+ union IO_APIC_reg_00 reg_00;
+ static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+ physid_mask_t tmp;
+ unsigned long flags;
+ int i = 0;
+
+ /*
+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
+ * supports up to 16 on one shared APIC bus.
+ *
+ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+ * advantage of new APIC bus architecture.
+ */
+
+ if (physids_empty(apic_id_map))
+ apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ if (apic_id >= get_physical_broadcast()) {
+ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+ "%d\n", ioapic, apic_id, reg_00.bits.ID);
+ apic_id = reg_00.bits.ID;
+ }
+
+ /*
+ * Every APIC in a system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (check_apicid_used(apic_id_map, apic_id)) {
+
+ for (i = 0; i < get_physical_broadcast(); i++) {
+ if (!check_apicid_used(apic_id_map, i))
+ break;
+ }
+
+ if (i == get_physical_broadcast())
+ panic("Max apic_id exceeded!\n");
+
+ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+ "trying %d\n", ioapic, apic_id, i);
+
+ apic_id = i;
+ }
+
+ tmp = apicid_to_cpu_present(apic_id);
+ physids_or(apic_id_map, apic_id_map, tmp);
+
+ if (reg_00.bits.ID != apic_id) {
+ reg_00.bits.ID = apic_id;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /* Sanity check */
+ if (reg_00.bits.ID != apic_id) {
+ printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+ return -1;
+ }
+ }
+
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+ return apic_id;
+}
+
+int __init io_apic_get_version(int ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.bits.version;
+}
+#endif
int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
{
@@ -2853,6 +3759,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
+ struct irq_cfg *cfg;
if (skip_ioapic_setup == 1)
return;
@@ -2868,7 +3775,8 @@ void __init setup_ioapic_dest(void)
* when you have too many devices, because at that time only boot
* cpu is online.
*/
- if (!irq_cfg[irq].vector)
+ cfg = irq_cfg(irq);
+ if (!cfg->vector)
setup_IO_APIC_irq(ioapic, pin, irq,
irq_trigger(irq_entry),
irq_polarity(irq_entry));
@@ -2926,18 +3834,33 @@ void __init ioapic_init_mappings(void)
struct resource *ioapic_res;
int i;
+ irq_2_pin_init();
ioapic_res = ioapic_setup_resources();
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
ioapic_phys = mp_ioapics[i].mp_apicaddr;
+#ifdef CONFIG_X86_32
+ if (!ioapic_phys) {
+ printk(KERN_ERR
+ "WARNING: bogus zero IO-APIC "
+ "address found in MPTABLE, "
+ "disabling IO/APIC support!\n");
+ smp_found_config = 0;
+ skip_ioapic_setup = 1;
+ goto fake_ioapic_page;
+ }
+#endif
} else {
+#ifdef CONFIG_X86_32
+fake_ioapic_page:
+#endif
ioapic_phys = (unsigned long)
alloc_bootmem_pages(PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
set_fixmap_nocache(idx, ioapic_phys);
apic_printk(APIC_VERBOSE,
- "mapped IOAPIC to %016lx (%016lx)\n",
+ "mapped IOAPIC to %08lx (%08lx)\n",
__fix_to_virt(idx), ioapic_phys);
idx++;
@@ -2971,4 +3894,3 @@ static int __init ioapic_insert_resources(void)
/* Insert the IO APIC resources after PCI initialization has occured to handle
* IO APICS that are mapped in on a BAR in PCI space. */
late_initcall(ioapic_insert_resources);
-
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
deleted file mode 100644
index e710289f673e..000000000000
--- a/arch/x86/kernel/io_apic_32.c
+++ /dev/null
@@ -1,2908 +0,0 @@
-/*
- * Intel IO-APIC support for multi-Pentium hosts.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- * Many thanks to Stig Venaas for trying out countless experimental
- * patches and reporting/debugging problems patiently!
- *
- * (c) 1999, Multiple IO-APIC support, developed by
- * Ken-ichi Yaku and
- * Hidemi Kishimoto ,
- * further tested and cleaned up by Zach Brown
- * and Ingo Molnar
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively
- * Paul Diefenbaugh : Added full ACPI support
- */
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include /* time_after() */
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-
-#define __apicdebuginit(type) static type __init
-
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-atomic_t irq_mis_count;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
-
-int timer_through_8259 __initdata;
-
-/*
- * Is the SiS APIC rmw bug present ?
- * -1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
-int nr_ioapics;
-
-/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* # of MP IRQ source entries */
-int mp_irq_entries;
-
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-int mp_bus_id_to_type[MAX_MP_BUSSES];
-#endif
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-
-static int disable_timer_pin_1 __initdata;
-
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-static struct irq_pin_list {
- int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
-
-struct io_apic {
- unsigned int index;
- unsigned int unused[3];
- unsigned int data;
-};
-
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
-{
- return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
- + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
-}
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
- struct io_apic __iomem *io_apic = io_apic_base(apic);
- writel(reg, &io_apic->index);
- return readl(&io_apic->data);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
- struct io_apic __iomem *io_apic = io_apic_base(apic);
- writel(reg, &io_apic->index);
- writel(value, &io_apic->data);
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index register
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
- volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
- if (sis_apic_bug)
- writel(reg, &io_apic->index);
- writel(value, &io_apic->data);
-}
-
-union entry_union {
- struct { u32 w1, w2; };
- struct IO_APIC_route_entry entry;
-};
-
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
-{
- union entry_union eu;
- unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- return eu.entry;
-}
-
-/*
- * When we write a new IO APIC routing entry, we need to write the high
- * word first! If the mask bit in the low word is clear, we will enable
- * the interrupt, and we need to make sure the entry is fully populated
- * before that happens.
- */
-static void
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
- union entry_union eu;
- eu.entry = e;
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
-}
-
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
-{
- unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
- __ioapic_write_entry(apic, pin, e);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * When we mask an IO APIC routing entry, we need to write the low
- * word first, in order to set the mask bit before we change the
- * high bits!
- */
-static void ioapic_mask_entry(int apic, int pin)
-{
- unsigned long flags;
- union entry_union eu = { .entry.mask = 1 };
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
- static int first_free_entry = NR_IRQS;
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- while (entry->next)
- entry = irq_2_pin + entry->next;
-
- if (entry->pin != -1) {
- entry->next = first_free_entry;
- entry = irq_2_pin + entry->next;
- if (++first_free_entry >= PIN_MAP_SIZE)
- panic("io_apic.c: whoops");
- }
- entry->apic = apic;
- entry->pin = pin;
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq(unsigned int irq,
- int oldapic, int oldpin,
- int newapic, int newpin)
-{
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- while (1) {
- if (entry->apic == oldapic && entry->pin == oldpin) {
- entry->apic = newapic;
- entry->pin = newpin;
- }
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
-}
-
-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
-{
- struct irq_pin_list *entry = irq_2_pin + irq;
- unsigned int pin, reg;
-
- for (;;) {
- pin = entry->pin;
- if (pin == -1)
- break;
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
- reg &= ~disable;
- reg |= enable;
- io_apic_modify(entry->apic, 0x10 + pin*2, reg);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
-}
-
-/* mask = 1 */
-static void __mask_IO_APIC_irq(unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
-}
-
-/* mask = 0 */
-static void __unmask_IO_APIC_irq(unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
-}
-
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
- IO_APIC_REDIR_LEVEL_TRIGGER);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
- IO_APIC_REDIR_MASKED);
-}
-
-static void mask_IO_APIC_irq(unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq(unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
- struct IO_APIC_route_entry entry;
-
- /* Check delivery_mode to be sure we're not clearing an SMI pin */
- entry = ioapic_read_entry(apic, pin);
- if (entry.delivery_mode == dest_SMI)
- return;
-
- /*
- * Disable it in the IO-APIC irq-routing table:
- */
- ioapic_mask_entry(apic, pin);
-}
-
-static void clear_IO_APIC(void)
-{
- int apic, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
- clear_IO_APIC_pin(apic, pin);
-}
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
-{
- unsigned long flags;
- int pin;
- struct irq_pin_list *entry = irq_2_pin + irq;
- unsigned int apicid_value;
- cpumask_t tmp;
-
- cpus_and(tmp, cpumask, cpu_online_map);
- if (cpus_empty(tmp))
- tmp = TARGET_CPUS;
-
- cpus_and(cpumask, tmp, CPU_MASK_ALL);
-
- apicid_value = cpu_mask_to_apicid(cpumask);
- /* Prepare to do the io_apic_write */
- apicid_value = apicid_value << 24;
- spin_lock_irqsave(&ioapic_lock, flags);
- for (;;) {
- pin = entry->pin;
- if (pin == -1)
- break;
- io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- irq_desc[irq].affinity = cpumask;
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#if defined(CONFIG_IRQBALANCE)
-# include /* kernel_thread() */
-# include /* kstat */
-# include /* kmalloc() */
-# include
-
-#define IRQBALANCE_CHECK_ARCH -999
-#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
-#define BALANCED_IRQ_MORE_DELTA (HZ/10)
-#define BALANCED_IRQ_LESS_DELTA (HZ)
-
-static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
-static int physical_balance __read_mostly;
-static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
-
-static struct irq_cpu_info {
- unsigned long *last_irq;
- unsigned long *irq_delta;
- unsigned long irq;
-} irq_cpu_data[NR_CPUS];
-
-#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
-#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
-#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
-
-#define IDLE_ENOUGH(cpu,now) \
- (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
-
-#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
-
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
-
-static cpumask_t balance_irq_affinity[NR_IRQS] = {
- [0 ... NR_IRQS-1] = CPU_MASK_ALL
-};
-
-void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
-{
- balance_irq_affinity[irq] = mask;
-}
-
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
- unsigned long now, int direction)
-{
- int search_idle = 1;
- int cpu = curr_cpu;
-
- goto inside;
-
- do {
- if (unlikely(cpu == curr_cpu))
- search_idle = 0;
-inside:
- if (direction == 1) {
- cpu++;
- if (cpu >= NR_CPUS)
- cpu = 0;
- } else {
- cpu--;
- if (cpu == -1)
- cpu = NR_CPUS-1;
- }
- } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
- (search_idle && !IDLE_ENOUGH(cpu, now)));
-
- return cpu;
-}
-
-static inline void balance_irq(int cpu, int irq)
-{
- unsigned long now = jiffies;
- cpumask_t allowed_mask;
- unsigned int new_cpu;
-
- if (irqbalance_disabled)
- return;
-
- cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
- new_cpu = move(cpu, allowed_mask, now, 1);
- if (cpu != new_cpu)
- set_pending_irq(irq, cpumask_of_cpu(new_cpu));
-}
-
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
-{
- int i, j;
-
- for_each_online_cpu(i) {
- for (j = 0; j < NR_IRQS; j++) {
- if (!irq_desc[j].action)
- continue;
- /* Is it a significant load ? */
- if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
- useful_load_threshold)
- continue;
- balance_irq(i, j);
- }
- }
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
- return;
-}
-
-static void do_irq_balance(void)
-{
- int i, j;
- unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
- unsigned long move_this_load = 0;
- int max_loaded = 0, min_loaded = 0;
- int load;
- unsigned long useful_load_threshold = balanced_irq_interval + 10;
- int selected_irq;
- int tmp_loaded, first_attempt = 1;
- unsigned long tmp_cpu_irq;
- unsigned long imbalance = 0;
- cpumask_t allowed_mask, target_cpu_mask, tmp;
-
- for_each_possible_cpu(i) {
- int package_index;
- CPU_IRQ(i) = 0;
- if (!cpu_online(i))
- continue;
- package_index = CPU_TO_PACKAGEINDEX(i);
- for (j = 0; j < NR_IRQS; j++) {
- unsigned long value_now, delta;
- /* Is this an active IRQ or balancing disabled ? */
- if (!irq_desc[j].action || irq_balancing_disabled(j))
- continue;
- if (package_index == i)
- IRQ_DELTA(package_index, j) = 0;
- /* Determine the total count per processor per IRQ */
- value_now = (unsigned long) kstat_cpu(i).irqs[j];
-
- /* Determine the activity per processor per IRQ */
- delta = value_now - LAST_CPU_IRQ(i, j);
-
- /* Update last_cpu_irq[][] for the next time */
- LAST_CPU_IRQ(i, j) = value_now;
-
- /* Ignore IRQs whose rate is less than the clock */
- if (delta < useful_load_threshold)
- continue;
- /* update the load for the processor or package total */
- IRQ_DELTA(package_index, j) += delta;
-
- /* Keep track of the higher numbered sibling as well */
- if (i != package_index)
- CPU_IRQ(i) += delta;
- /*
- * We have sibling A and sibling B in the package
- *
- * cpu_irq[A] = load for cpu A + load for cpu B
- * cpu_irq[B] = load for cpu B
- */
- CPU_IRQ(package_index) += delta;
- }
- }
- /* Find the least loaded processor package */
- for_each_online_cpu(i) {
- if (i != CPU_TO_PACKAGEINDEX(i))
- continue;
- if (min_cpu_irq > CPU_IRQ(i)) {
- min_cpu_irq = CPU_IRQ(i);
- min_loaded = i;
- }
- }
- max_cpu_irq = ULONG_MAX;
-
-tryanothercpu:
- /*
- * Look for heaviest loaded processor.
- * We may come back to get the next heaviest loaded processor.
- * Skip processors with trivial loads.
- */
- tmp_cpu_irq = 0;
- tmp_loaded = -1;
- for_each_online_cpu(i) {
- if (i != CPU_TO_PACKAGEINDEX(i))
- continue;
- if (max_cpu_irq <= CPU_IRQ(i))
- continue;
- if (tmp_cpu_irq < CPU_IRQ(i)) {
- tmp_cpu_irq = CPU_IRQ(i);
- tmp_loaded = i;
- }
- }
-
- if (tmp_loaded == -1) {
- /*
- * In the case of small number of heavy interrupt sources,
- * loading some of the cpus too much. We use Ingo's original
- * approach to rotate them around.
- */
- if (!first_attempt && imbalance >= useful_load_threshold) {
- rotate_irqs_among_cpus(useful_load_threshold);
- return;
- }
- goto not_worth_the_effort;
- }
-
- first_attempt = 0; /* heaviest search */
- max_cpu_irq = tmp_cpu_irq; /* load */
- max_loaded = tmp_loaded; /* processor */
- imbalance = (max_cpu_irq - min_cpu_irq) / 2;
-
- /*
- * if imbalance is less than approx 10% of max load, then
- * observe diminishing returns action. - quit
- */
- if (imbalance < (max_cpu_irq >> 3))
- goto not_worth_the_effort;
-
-tryanotherirq:
- /* if we select an IRQ to move that can't go where we want, then
- * see if there is another one to try.
- */
- move_this_load = 0;
- selected_irq = -1;
- for (j = 0; j < NR_IRQS; j++) {
- /* Is this an active IRQ? */
- if (!irq_desc[j].action)
- continue;
- if (imbalance <= IRQ_DELTA(max_loaded, j))
- continue;
- /* Try to find the IRQ that is closest to the imbalance
- * without going over.
- */
- if (move_this_load < IRQ_DELTA(max_loaded, j)) {
- move_this_load = IRQ_DELTA(max_loaded, j);
- selected_irq = j;
- }
- }
- if (selected_irq == -1)
- goto tryanothercpu;
-
- imbalance = move_this_load;
-
- /* For physical_balance case, we accumulated both load
- * values in the one of the siblings cpu_irq[],
- * to use the same code for physical and logical processors
- * as much as possible.
- *
- * NOTE: the cpu_irq[] array holds the sum of the load for
- * sibling A and sibling B in the slot for the lowest numbered
- * sibling (A), _AND_ the load for sibling B in the slot for
- * the higher numbered sibling.
- *
- * We seek the least loaded sibling by making the comparison
- * (A+B)/2 vs B
- */
- load = CPU_IRQ(min_loaded) >> 1;
- for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
- if (load > CPU_IRQ(j)) {
- /* This won't change cpu_sibling_map[min_loaded] */
- load = CPU_IRQ(j);
- min_loaded = j;
- }
- }
-
- cpus_and(allowed_mask,
- cpu_online_map,
- balance_irq_affinity[selected_irq]);
- target_cpu_mask = cpumask_of_cpu(min_loaded);
- cpus_and(tmp, target_cpu_mask, allowed_mask);
-
- if (!cpus_empty(tmp)) {
- /* mark for change destination */
- set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
-
- /* Since we made a change, come back sooner to
- * check for more variation.
- */
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
- return;
- }
- goto tryanotherirq;
-
-not_worth_the_effort:
- /*
- * if we did not find an IRQ to move, then adjust the time interval
- * upward
- */
- balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
- balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
- return;
-}
-
-static int balanced_irq(void *unused)
-{
- int i;
- unsigned long prev_balance_time = jiffies;
- long time_remaining = balanced_irq_interval;
-
- /* push everything to CPU 0 to give us a starting point. */
- for (i = 0 ; i < NR_IRQS ; i++) {
- irq_desc[i].pending_mask = cpumask_of_cpu(0);
- set_pending_irq(i, cpumask_of_cpu(0));
- }
-
- set_freezable();
- for ( ; ; ) {
- time_remaining = schedule_timeout_interruptible(time_remaining);
- try_to_freeze();
- if (time_after(jiffies,
- prev_balance_time+balanced_irq_interval)) {
- preempt_disable();
- do_irq_balance();
- prev_balance_time = jiffies;
- time_remaining = balanced_irq_interval;
- preempt_enable();
- }
- }
- return 0;
-}
-
-static int __init balanced_irq_init(void)
-{
- int i;
- struct cpuinfo_x86 *c;
- cpumask_t tmp;
-
- cpus_shift_right(tmp, cpu_online_map, 2);
- c = &boot_cpu_data;
- /* When not overwritten by the command line ask subarchitecture. */
- if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
- irqbalance_disabled = NO_BALANCE_IRQ;
- if (irqbalance_disabled)
- return 0;
-
- /* disable irqbalance completely if there is only one processor online */
- if (num_online_cpus() < 2) {
- irqbalance_disabled = 1;
- return 0;
- }
- /*
- * Enable physical balance only if more than 1 physical processor
- * is present
- */
- if (smp_num_siblings > 1 && !cpus_empty(tmp))
- physical_balance = 1;
-
- for_each_online_cpu(i) {
- irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
- irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
- if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
- printk(KERN_ERR "balanced_irq_init: out of memory");
- goto failed;
- }
- }
-
- printk(KERN_INFO "Starting balanced_irq\n");
- if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
- return 0;
- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
-failed:
- for_each_possible_cpu(i) {
- kfree(irq_cpu_data[i].irq_delta);
- irq_cpu_data[i].irq_delta = NULL;
- kfree(irq_cpu_data[i].last_irq);
- irq_cpu_data[i].last_irq = NULL;
- }
- return 0;
-}
-
-int __devinit irqbalance_disable(char *str)
-{
- irqbalance_disabled = 1;
- return 1;
-}
-
-__setup("noirqbalance", irqbalance_disable);
-
-late_initcall(balanced_irq_init);
-#endif /* CONFIG_IRQBALANCE */
-#endif /* CONFIG_SMP */
-
-#ifndef CONFIG_SMP
-void send_IPI_self(int vector)
-{
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
- cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write(APIC_ICR, cfg);
-}
-#endif /* !CONFIG_SMP */
-
-
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
-int skip_ioapic_setup;
-
-static int __init ioapic_pirq_setup(char *str)
-{
- int i, max;
- int ints[MAX_PIRQS+1];
-
- get_options(str, ARRAY_SIZE(ints), ints);
-
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- pirqs_enabled = 1;
- apic_printk(APIC_VERBOSE, KERN_INFO
- "PIRQ redirection, working around broken MP-BIOS.\n");
- max = MAX_PIRQS;
- if (ints[0] < MAX_PIRQS)
- max = ints[0];
-
- for (i = 0; i < max; i++) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
- /*
- * PIRQs are mapped upside down, usually.
- */
- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
- }
- return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mp_irqtype == type &&
- (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
- mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
- mp_irqs[i].mp_dstirq == pin)
- return i;
-
- return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mp_srcbus;
-
- if (test_bit(lbus, mp_bus_not_pci) &&
- (mp_irqs[i].mp_irqtype == type) &&
- (mp_irqs[i].mp_srcbusirq == irq))
-
- return mp_irqs[i].mp_dstirq;
- }
- return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mp_srcbus;
-
- if (test_bit(lbus, mp_bus_not_pci) &&
- (mp_irqs[i].mp_irqtype == type) &&
- (mp_irqs[i].mp_srcbusirq == irq))
- break;
- }
- if (i < mp_irq_entries) {
- int apic;
- for (apic = 0; apic < nr_ioapics; apic++) {
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
- return apic;
- }
- }
-
- return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
- int apic, i, best_guess = -1;
-
- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
- "slot:%d, pin:%d.\n", bus, slot, pin);
- if (test_bit(bus, mp_bus_not_pci)) {
- printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mp_srcbus;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
- mp_irqs[i].mp_dstapic == MP_APIC_ALL)
- break;
-
- if (!test_bit(lbus, mp_bus_not_pci) &&
- !mp_irqs[i].mp_irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
-
- if (!(apic || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].mp_srcbusirq & 3))
- return irq;
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0)
- best_guess = irq;
- }
- }
- return best_guess;
-}
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
- int pin, ioapic, irq, irq_entry;
-
- if (skip_ioapic_setup == 1)
- return;
-
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
- }
-
- }
-}
-#endif
-
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
- if (irq < 16) {
- unsigned int port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
- }
- apic_printk(APIC_VERBOSE, KERN_INFO
- "Broken MPtable reports ISA irq %d\n", irq);
- return 0;
-}
-#endif
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx) (0)
-#define default_ISA_polarity(idx) (0)
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value. If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
-#define default_EISA_polarity(idx) default_ISA_polarity(idx)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx) (1)
-#define default_PCI_polarity(idx) (1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx) (1)
-#define default_MCA_polarity(idx) default_ISA_polarity(idx)
-
-static int MPBIOS_polarity(int idx)
-{
- int bus = mp_irqs[idx].mp_srcbus;
- int polarity;
-
- /*
- * Determine IRQ line polarity (high active or low active):
- */
- switch (mp_irqs[idx].mp_irqflag & 3) {
- case 0: /* conforms, ie. bus-type dependent polarity */
- {
- polarity = test_bit(bus, mp_bus_not_pci)?
- default_ISA_polarity(idx):
- default_PCI_polarity(idx);
- break;
- }
- case 1: /* high active */
- {
- polarity = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- case 3: /* low active */
- {
- polarity = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
- int bus = mp_irqs[idx].mp_srcbus;
- int trigger;
-
- /*
- * Determine IRQ trigger mode (edge or level sensitive):
- */
- switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
- case 0: /* conforms, ie. bus-type dependent */
- {
- trigger = test_bit(bus, mp_bus_not_pci)?
- default_ISA_trigger(idx):
- default_PCI_trigger(idx);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
- switch (mp_bus_id_to_type[bus]) {
- case MP_BUS_ISA: /* ISA pin */
- {
- /* set before the switch */
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- trigger = default_EISA_trigger(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- /* set before the switch */
- break;
- }
- case MP_BUS_MCA: /* MCA pin */
- {
- trigger = default_MCA_trigger(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- }
-#endif
- break;
- }
- case 1: /* edge */
- {
- trigger = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- case 3: /* level */
- {
- trigger = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 0;
- break;
- }
- }
- return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
- return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
- return MPBIOS_trigger(idx);
-}
-
-static int pin_2_irq(int idx, int apic, int pin)
-{
- int irq, i;
- int bus = mp_irqs[idx].mp_srcbus;
-
- /*
- * Debugging check, we are in big trouble if this message pops up!
- */
- if (mp_irqs[idx].mp_dstirq != pin)
- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
- if (test_bit(bus, mp_bus_not_pci))
- irq = mp_irqs[idx].mp_srcbusirq;
- else {
- /*
- * PCI IRQs are mapped in order
- */
- i = irq = 0;
- while (i < apic)
- irq += nr_ioapic_registers[i++];
- irq += pin;
-
- /*
- * For MPS mode, so far only needed by ES7000 platform
- */
- if (ioapic_renumber_irq)
- irq = ioapic_renumber_irq(apic, irq);
- }
-
- /*
- * PCI IRQ command line redirection. Yes, limits are hardcoded.
- */
- if ((pin >= 16) && (pin <= 23)) {
- if (pirq_entries[pin-16] != -1) {
- if (!pirq_entries[pin-16]) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- "disabling PIRQ%d\n", pin-16);
- } else {
- irq = pirq_entries[pin-16];
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- "using PIRQ%d -> IRQ %d\n",
- pin-16, irq);
- }
- }
- }
- return irq;
-}
-
-static inline int IO_APIC_irq_trigger(int irq)
-{
- int apic, idx, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- idx = find_irq_entry(apic, pin, mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
- return irq_trigger(idx);
- }
- }
- /*
- * nonexistent IRQs are edge default
- */
- return 0;
-}
-
-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
-
-static int __assign_irq_vector(int irq)
-{
- static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
- int vector, offset;
-
- BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
-
- if (irq_vector[irq] > 0)
- return irq_vector[irq];
-
- vector = current_vector;
- offset = current_offset;
-next:
- vector += 8;
- if (vector >= first_system_vector) {
- offset = (offset + 1) % 8;
- vector = FIRST_DEVICE_VECTOR + offset;
- }
- if (vector == current_vector)
- return -ENOSPC;
- if (test_and_set_bit(vector, used_vectors))
- goto next;
-
- current_vector = vector;
- current_offset = offset;
- irq_vector[irq] = vector;
-
- return vector;
-}
-
-static int assign_irq_vector(int irq)
-{
- unsigned long flags;
- int vector;
-
- spin_lock_irqsave(&vector_lock, flags);
- vector = __assign_irq_vector(irq);
- spin_unlock_irqrestore(&vector_lock, flags);
-
- return vector;
-}
-
-static struct irq_chip ioapic_chip;
-
-#define IOAPIC_AUTO -1
-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
-
-static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
-{
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL) {
- irq_desc[irq].status |= IRQ_LEVEL;
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_fasteoi_irq, "fasteoi");
- } else {
- irq_desc[irq].status &= ~IRQ_LEVEL;
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_edge_irq, "edge");
- }
- set_intr_gate(vector, interrupt[irq]);
-}
-
-static void __init setup_IO_APIC_irqs(void)
-{
- struct IO_APIC_route_entry entry;
- int apic, pin, idx, irq, first_notcon = 1, vector;
-
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry, 0, sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 0; /* enable IRQ */
- entry.dest.logical.logical_dest =
- cpu_mask_to_apicid(TARGET_CPUS);
-
- idx = find_irq_entry(apic, pin, mp_INT);
- if (idx == -1) {
- if (first_notcon) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- " IO-APIC (apicid-pin) %d-%d",
- mp_ioapics[apic].mp_apicid,
- pin);
- first_notcon = 0;
- } else
- apic_printk(APIC_VERBOSE, ", %d-%d",
- mp_ioapics[apic].mp_apicid, pin);
- continue;
- }
-
- if (!first_notcon) {
- apic_printk(APIC_VERBOSE, " not connected.\n");
- first_notcon = 1;
- }
-
- entry.trigger = irq_trigger(idx);
- entry.polarity = irq_polarity(idx);
-
- if (irq_trigger(idx)) {
- entry.trigger = 1;
- entry.mask = 1;
- }
-
- irq = pin_2_irq(idx, apic, pin);
- /*
- * skip adding the timer int on secondary nodes, which causes
- * a small but painful rift in the time-space continuum
- */
- if (multi_timer_check(apic, irq))
- continue;
- else
- add_pin_to_irq(irq, apic, pin);
-
- if (!apic && !IO_APIC_IRQ(irq))
- continue;
-
- if (IO_APIC_IRQ(irq)) {
- vector = assign_irq_vector(irq);
- entry.vector = vector;
- ioapic_register_intr(irq, vector, IOAPIC_AUTO);
-
- if (!apic && (irq < 16))
- disable_8259A_irq(irq);
- }
- ioapic_write_entry(apic, pin, entry);
- }
- }
-
- if (!first_notcon)
- apic_printk(APIC_VERBOSE, " not connected.\n");
-}
-
-/*
- * Set up the timer pin, possibly with the 8259A-master behind.
- */
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
- int vector)
-{
- struct IO_APIC_route_entry entry;
-
- memset(&entry, 0, sizeof(entry));
-
- /*
- * We use logical delivery to get the timer IRQ
- * to the first CPU.
- */
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 1; /* mask IRQ now */
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.polarity = 0;
- entry.trigger = 0;
- entry.vector = vector;
-
- /*
- * The timer IRQ doesn't have to know that behind the
- * scene we may have a 8259A-master in AEOI mode ...
- */
- ioapic_register_intr(0, vector, IOAPIC_EDGE);
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- ioapic_write_entry(apic, pin, entry);
-}
-
-
-__apicdebuginit(void) print_IO_APIC(void)
-{
- int apic, i;
- union IO_APIC_reg_00 reg_00;
- union IO_APIC_reg_01 reg_01;
- union IO_APIC_reg_02 reg_02;
- union IO_APIC_reg_03 reg_03;
- unsigned long flags;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
- for (i = 0; i < nr_ioapics; i++)
- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
-
- /*
- * We are a bit conservative about what we expect. We have to
- * know about every hardware change ASAP.
- */
- printk(KERN_INFO "testing the IO APIC.......................\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- reg_01.raw = io_apic_read(apic, 1);
- if (reg_01.bits.version >= 0x10)
- reg_02.raw = io_apic_read(apic, 2);
- if (reg_01.bits.version >= 0x20)
- reg_03.raw = io_apic_read(apic, 3);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
- printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
- printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
- printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
-
- printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
-
- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
-
- /*
- * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
- * but the value of reg_02 is read as the previous read register
- * value, so ignore it if reg_02 == reg_01.
- */
- if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
- printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
- }
-
- /*
- * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
- * or reg_03, but the value of reg_0[23] is read as the previous read
- * register value, so ignore it if reg_03 == reg_0[12].
- */
- if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
- reg_03.raw != reg_01.raw) {
- printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
- printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
- }
-
- printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
- printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
- " Stat Dest Deli Vect: \n");
-
- for (i = 0; i <= reg_01.bits.entries; i++) {
- struct IO_APIC_route_entry entry;
-
- entry = ioapic_read_entry(apic, i);
-
- printk(KERN_DEBUG " %02x %03X %02X ",
- i,
- entry.dest.logical.logical_dest,
- entry.dest.physical.physical_dest
- );
-
- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector
- );
- }
- }
- printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for (i = 0; i < NR_IRQS; i++) {
- struct irq_pin_list *entry = irq_2_pin + i;
- if (entry->pin < 0)
- continue;
- printk(KERN_DEBUG "IRQ%d ", i);
- for (;;) {
- printk("-> %d:%d", entry->apic, entry->pin);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- printk("\n");
- }
-
- printk(KERN_INFO ".................................... done.\n");
-
- return;
-}
-
-__apicdebuginit(void) print_APIC_bitfield(int base)
-{
- unsigned int v;
- int i, j;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
- for (i = 0; i < 8; i++) {
- v = apic_read(base + i*0x10);
- for (j = 0; j < 32; j++) {
- if (v & (1< 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- }
-
- icr = apic_icr_read();
- printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
-
- v = apic_read(APIC_LVTT);
- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
- if (maxlvt > 3) { /* PC is LVT#4. */
- v = apic_read(APIC_LVTPC);
- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
- }
- v = apic_read(APIC_LVT0);
- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
- v = apic_read(APIC_LVT1);
- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
- if (maxlvt > 2) { /* ERR is LVT#3. */
- v = apic_read(APIC_LVTERR);
- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
- }
-
- v = apic_read(APIC_TMICT);
- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
- v = apic_read(APIC_TMCCT);
- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
- v = apic_read(APIC_TDCR);
- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
- printk("\n");
-}
-
-__apicdebuginit(void) print_all_local_APICs(void)
-{
- on_each_cpu(print_local_APIC, NULL, 1);
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
- unsigned int v;
- unsigned long flags;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "\nprinting PIC contents\n");
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- v = inb(0xa1) << 8 | inb(0x21);
- printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
-
- v = inb(0xa0) << 8 | inb(0x20);
- printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
-
- outb(0x0b, 0xa0);
- outb(0x0b, 0x20);
- v = inb(0xa0) << 8 | inb(0x20);
- outb(0x0a, 0xa0);
- outb(0x0a, 0x20);
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
-
- printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
-
- v = inb(0x4d1) << 8 | inb(0x4d0);
- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-__apicdebuginit(int) print_all_ICs(void)
-{
- print_PIC();
- print_all_local_APICs();
- print_IO_APIC();
-
- return 0;
-}
-
-fs_initcall(print_all_ICs);
-
-
-static void __init enable_IO_APIC(void)
-{
- union IO_APIC_reg_01 reg_01;
- int i8259_apic, i8259_pin;
- int i, apic;
- unsigned long flags;
-
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
- if (!pirqs_enabled)
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- /*
- * The number of IO-APIC IRQ registers (== #pins):
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(apic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[apic] = reg_01.bits.entries+1;
- }
- for (apic = 0; apic < nr_ioapics; apic++) {
- int pin;
- /* See if any of the pins is in ExtINT mode */
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- struct IO_APIC_route_entry entry;
- entry = ioapic_read_entry(apic, pin);
-
-
- /* If the interrupt line is enabled and in ExtInt mode
- * I have found the pin where the i8259 is connected.
- */
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
- ioapic_i8259.apic = apic;
- ioapic_i8259.pin = pin;
- goto found_i8259;
- }
- }
- }
- found_i8259:
- /* Look to see what if the MP table has reported the ExtINT */
- /* If we could not find the appropriate pin by looking at the ioapic
- * the i8259 probably is not connected the ioapic but give the
- * mptable a chance anyway.
- */
- i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
- i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
- /* Trust the MP table if nothing is setup in the hardware */
- if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
- printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
- ioapic_i8259.pin = i8259_pin;
- ioapic_i8259.apic = i8259_apic;
- }
- /* Complain if the MP table and the hardware disagree */
- if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
- (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
- {
- printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
- }
-
- /*
- * Do not trust the IO-APIC being empty at bootup
- */
- clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
- /*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
- /*
- * If the i8259 is routed through an IOAPIC
- * Put that IOAPIC in virtual wire mode
- * so legacy interrupts can be delivered.
- */
- if (ioapic_i8259.pin != -1) {
- struct IO_APIC_route_entry entry;
-
- memset(&entry, 0, sizeof(entry));
- entry.mask = 0; /* Enabled */
- entry.trigger = 0; /* Edge */
- entry.irr = 0;
- entry.polarity = 0; /* High */
- entry.delivery_status = 0;
- entry.dest_mode = 0; /* Physical */
- entry.delivery_mode = dest_ExtINT; /* ExtInt */
- entry.vector = 0;
- entry.dest.physical.physical_dest = read_apic_id();
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
- }
- disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-}
-
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch Tue Dec 21 12:25:05 CST 1999
- */
-
-static void __init setup_ioapic_ids_from_mpc(void)
-{
- union IO_APIC_reg_00 reg_00;
- physid_mask_t phys_id_present_map;
- int apic;
- int i;
- unsigned char old_id;
- unsigned long flags;
-
- if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
- return;
-
- /*
- * Don't check I/O APIC IDs for xAPIC systems. They have
- * no meaning without the serial APIC bus.
- */
- if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
- return;
- /*
- * This is broken; anything with a real cpu count has to
- * circumvent this idiocy regardless.
- */
- phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
-
- /*
- * Set the IOAPIC ID to the value stored in the MPC table.
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- /* Read the register 0 value */
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- old_id = mp_ioapics[apic].mp_apicid;
-
- if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
- apic, mp_ioapics[apic].mp_apicid);
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- reg_00.bits.ID);
- mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
- }
-
- /*
- * Sanity check, is the ID really free? Every APIC in a
- * system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- */
- if (check_apicid_used(phys_id_present_map,
- mp_ioapics[apic].mp_apicid)) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
- apic, mp_ioapics[apic].mp_apicid);
- for (i = 0; i < get_physical_broadcast(); i++)
- if (!physid_isset(i, phys_id_present_map))
- break;
- if (i >= get_physical_broadcast())
- panic("Max APIC ID exceeded!\n");
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- i);
- physid_set(i, phys_id_present_map);
- mp_ioapics[apic].mp_apicid = i;
- } else {
- physid_mask_t tmp;
- tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
- apic_printk(APIC_VERBOSE, "Setting %d in the "
- "phys_id_present_map\n",
- mp_ioapics[apic].mp_apicid);
- physids_or(phys_id_present_map, phys_id_present_map, tmp);
- }
-
-
- /*
- * We need to adjust the IRQ routing table
- * if the ID changed.
- */
- if (old_id != mp_ioapics[apic].mp_apicid)
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mp_dstapic == old_id)
- mp_irqs[i].mp_dstapic
- = mp_ioapics[apic].mp_apicid;
-
- /*
- * Read the right value from the MPC table and
- * write it into the ID register.
- */
- apic_printk(APIC_VERBOSE, KERN_INFO
- "...changing IO-APIC physical APIC ID to %d ...",
- mp_ioapics[apic].mp_apicid);
-
- reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0, reg_00.raw);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /*
- * Sanity check
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
- printk("could not set ID!\n");
- else
- apic_printk(APIC_VERBOSE, " ok.\n");
- }
-}
-
-int no_timer_check __initdata;
-
-static int __init notimercheck(char *s)
-{
- no_timer_check = 1;
- return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- * - timer IRQ defaults to IO-APIC IRQ
- * - if this function detects that timer IRQs are defunct, then we fall
- * back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
- unsigned long t1 = jiffies;
- unsigned long flags;
-
- if (no_timer_check)
- return 1;
-
- local_save_flags(flags);
- local_irq_enable();
- /* Let ten ticks pass... */
- mdelay((10 * 1000) / HZ);
- local_irq_restore(flags);
-
- /*
- * Expect a few ticks at least, to be sure some possible
- * glue logic does not lock up after one or two first
- * ticks in a non-ExtINT mode. Also the local APIC
- * might have cached one ExtINT interrupt. Finally, at
- * least one tick may be lost due to delays.
- */
- if (time_after(jiffies, t1 + 4))
- return 1;
-
- return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Startup quirk:
- *
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- *
- * (We do this for level-triggered IRQs too - it cannot hurt.)
- */
-static unsigned int startup_ioapic_irq(unsigned int irq)
-{
- int was_pending = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < 16) {
- disable_8259A_irq(irq);
- if (i8259A_irq_pending(irq))
- was_pending = 1;
- }
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return was_pending;
-}
-
-static void ack_ioapic_irq(unsigned int irq)
-{
- move_native_irq(irq);
- ack_APIC_irq();
-}
-
-static void ack_ioapic_quirk_irq(unsigned int irq)
-{
- unsigned long v;
- int i;
-
- move_native_irq(irq);
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets). Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless. As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source. The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually. We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt. We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul. --macro
- */
- i = irq_vector[irq];
-
- v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
- ack_APIC_irq();
-
- if (!(v & (1 << (i & 0x1f)))) {
- atomic_inc(&irq_mis_count);
- spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(irq);
- __unmask_and_level_IO_APIC_irq(irq);
- spin_unlock(&ioapic_lock);
- }
-}
-
-static int ioapic_retrigger_irq(unsigned int irq)
-{
- send_IPI_self(irq_vector[irq]);
-
- return 1;
-}
-
-static struct irq_chip ioapic_chip __read_mostly = {
- .name = "IO-APIC",
- .startup = startup_ioapic_irq,
- .mask = mask_IO_APIC_irq,
- .unmask = unmask_IO_APIC_irq,
- .ack = ack_ioapic_irq,
- .eoi = ack_ioapic_quirk_irq,
-#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity_irq,
-#endif
- .retrigger = ioapic_retrigger_irq,
-};
-
-
-static inline void init_IO_APIC_traps(void)
-{
- int irq;
-
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
- for (irq = 0; irq < NR_IRQS ; irq++) {
- if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
- /*
- * Hmm.. We don't have an entry for this,
- * so default to an old-fashioned 8259
- * interrupt if we can..
- */
- if (irq < 16)
- make_8259A_irq(irq);
- else
- /* Strange. Oh, well.. */
- irq_desc[irq].chip = &no_irq_chip;
- }
- }
-}
-
-/*
- * The local APIC irq-chip implementation:
- */
-
-static void ack_lapic_irq(unsigned int irq)
-{
- ack_APIC_irq();
-}
-
-static void mask_lapic_irq(unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void unmask_lapic_irq(unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static struct irq_chip lapic_chip __read_mostly = {
- .name = "local-APIC",
- .mask = mask_lapic_irq,
- .unmask = unmask_lapic_irq,
- .ack = ack_lapic_irq,
-};
-
-static void lapic_register_intr(int irq, int vector)
-{
- irq_desc[irq].status &= ~IRQ_LEVEL;
- set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
- "edge");
- set_intr_gate(vector, interrupt[irq]);
-}
-
-static void __init setup_nmi(void)
-{
- /*
- * Dirty trick to enable the NMI watchdog ...
- * We put the 8259A master into AEOI mode and
- * unmask on all local APICs LVT0 as NMI.
- *
- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
- * is from Maciej W. Rozycki - so we do not have to EOI from
- * the NMI handler or the timer interrupt.
- */
- apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
- enable_NMI_through_LVT0();
-
- apic_printk(APIC_VERBOSE, " done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic. ICR does
- * not support the ExtINT mode, unfortunately. We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA. --macro
- */
-static inline void __init unlock_ExtINT_logic(void)
-{
- int apic, pin, i;
- struct IO_APIC_route_entry entry0, entry1;
- unsigned char save_control, save_freq_select;
-
- pin = find_isa_irq_pin(8, mp_INT);
- if (pin == -1) {
- WARN_ON_ONCE(1);
- return;
- }
- apic = find_isa_irq_apic(8, mp_INT);
- if (apic == -1) {
- WARN_ON_ONCE(1);
- return;
- }
-
- entry0 = ioapic_read_entry(apic, pin);
- clear_IO_APIC_pin(apic, pin);
-
- memset(&entry1, 0, sizeof(entry1));
-
- entry1.dest_mode = 0; /* physical delivery */
- entry1.mask = 0; /* unmask IRQ now */
- entry1.dest.physical.physical_dest = hard_smp_processor_id();
- entry1.delivery_mode = dest_ExtINT;
- entry1.polarity = entry0.polarity;
- entry1.trigger = 0;
- entry1.vector = 0;
-
- ioapic_write_entry(apic, pin, entry1);
-
- save_control = CMOS_READ(RTC_CONTROL);
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
- RTC_FREQ_SELECT);
- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
- i = 100;
- while (i-- > 0) {
- mdelay(10);
- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
- i -= 10;
- }
-
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- clear_IO_APIC_pin(apic, pin);
-
- ioapic_write_entry(apic, pin, entry0);
-}
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
- * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- */
-static inline void __init check_timer(void)
-{
- int apic1, pin1, apic2, pin2;
- int no_pin1 = 0;
- int vector;
- unsigned int ver;
- unsigned long flags;
-
- local_irq_save(flags);
-
- ver = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(ver);
-
- /*
- * get/set the timer IRQ vector:
- */
- disable_8259A_irq(0);
- vector = assign_irq_vector(0);
- set_intr_gate(vector, interrupt[0]);
-
- /*
- * As IRQ0 is to be enabled in the 8259A, the virtual
- * wire has to be disabled in the local APIC. Also
- * timer interrupts need to be acknowledged manually in
- * the 8259A for the i82489DX when using the NMI
- * watchdog as that APIC treats NMIs as level-triggered.
- * The AEOI mode will finish them in the 8259A
- * automatically.
- */
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
- init_8259A(1);
- timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-
- pin1 = find_isa_irq_pin(0, mp_INT);
- apic1 = find_isa_irq_apic(0, mp_INT);
- pin2 = ioapic_i8259.pin;
- apic2 = ioapic_i8259.apic;
-
- apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
- "apic1=%d pin1=%d apic2=%d pin2=%d\n",
- vector, apic1, pin1, apic2, pin2);
-
- /*
- * Some BIOS writers are clueless and report the ExtINTA
- * I/O APIC input from the cascaded 8259A as the timer
- * interrupt input. So just in case, if only one pin
- * was found above, try it both directly and through the
- * 8259A.
- */
- if (pin1 == -1) {
- pin1 = pin2;
- apic1 = apic2;
- no_pin1 = 1;
- } else if (pin2 == -1) {
- pin2 = pin1;
- apic2 = apic1;
- }
-
- if (pin1 != -1) {
- /*
- * Ok, does IRQ0 through the IOAPIC work?
- */
- if (no_pin1) {
- add_pin_to_irq(0, apic1, pin1);
- setup_timer_IRQ0_pin(apic1, pin1, vector);
- }
- unmask_IO_APIC_irq(0);
- if (timer_irq_works()) {
- if (nmi_watchdog == NMI_IO_APIC) {
- setup_nmi();
- enable_8259A_irq(0);
- }
- if (disable_timer_pin_1 > 0)
- clear_IO_APIC_pin(0, pin1);
- goto out;
- }
- clear_IO_APIC_pin(apic1, pin1);
- if (!no_pin1)
- apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
- "8254 timer not connected to IO-APIC\n");
-
- apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
- "(IRQ0) through the 8259A ...\n");
- apic_printk(APIC_QUIET, KERN_INFO
- "..... (found apic %d pin %d) ...\n", apic2, pin2);
- /*
- * legacy devices should be connected to IO APIC #0
- */
- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
- setup_timer_IRQ0_pin(apic2, pin2, vector);
- unmask_IO_APIC_irq(0);
- enable_8259A_irq(0);
- if (timer_irq_works()) {
- apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
- timer_through_8259 = 1;
- if (nmi_watchdog == NMI_IO_APIC) {
- disable_8259A_irq(0);
- setup_nmi();
- enable_8259A_irq(0);
- }
- goto out;
- }
- /*
- * Cleanup, just in case ...
- */
- disable_8259A_irq(0);
- clear_IO_APIC_pin(apic2, pin2);
- apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
- }
-
- if (nmi_watchdog == NMI_IO_APIC) {
- apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
- "through the IO-APIC - disabling NMI Watchdog!\n");
- nmi_watchdog = NMI_NONE;
- }
- timer_ack = 0;
-
- apic_printk(APIC_QUIET, KERN_INFO
- "...trying to set up timer as Virtual Wire IRQ...\n");
-
- lapic_register_intr(0, vector);
- apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
- enable_8259A_irq(0);
-
- if (timer_irq_works()) {
- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
- goto out;
- }
- disable_8259A_irq(0);
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
- apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
-
- apic_printk(APIC_QUIET, KERN_INFO
- "...trying to set up timer as ExtINT IRQ...\n");
-
- init_8259A(0);
- make_8259A_irq(0);
- apic_write(APIC_LVT0, APIC_DM_EXTINT);
-
- unlock_ExtINT_logic();
-
- if (timer_irq_works()) {
- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
- goto out;
- }
- apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
- panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
- "report. Then try booting with the 'noapic' option.\n");
-out:
- local_irq_restore(flags);
-}
-
-/*
- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
- * to devices. However there may be an I/O APIC pin available for
- * this interrupt regardless. The pin may be left unconnected, but
- * typically it will be reused as an ExtINT cascade interrupt for
- * the master 8259A. In the MPS case such a pin will normally be
- * reported as an ExtINT interrupt in the MP table. With ACPI
- * there is no provision for ExtINT interrupts, and in the absence
- * of an override it would be treated as an ordinary ISA I/O APIC
- * interrupt, that is edge-triggered and unmasked by default. We
- * used to do this, but it caused problems on some systems because
- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
- * the same ExtINT cascade interrupt to drive the local APIC of the
- * bootstrap processor. Therefore we refrain from routing IRQ2 to
- * the I/O APIC in all cases now. No actual device should request
- * it anyway. --macro
- */
-#define PIC_IRQS (1 << PIC_CASCADE_IR)
-
-void __init setup_IO_APIC(void)
-{
- int i;
-
- /* Reserve all the system vectors. */
- for (i = first_system_vector; i < NR_VECTORS; i++)
- set_bit(i, used_vectors);
-
- enable_IO_APIC();
-
- io_apic_irqs = ~PIC_IRQS;
-
- printk("ENABLING IO-APIC IRQs\n");
-
- /*
- * Set up IO-APIC IRQ routing.
- */
- if (!acpi_ioapic)
- setup_ioapic_ids_from_mpc();
- sync_Arb_IDs();
- setup_IO_APIC_irqs();
- init_IO_APIC_traps();
- check_timer();
-}
-
-/*
- * Called after all the initialization is done. If we didnt find any
- * APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
- if (sis_apic_bug == -1)
- sis_apic_bug = 0;
- return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
-struct sysfs_ioapic_data {
- struct sys_device dev;
- struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
- int i;
-
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
- for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
- entry[i] = ioapic_read_entry(dev->id, i);
-
- return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
- unsigned long flags;
- union IO_APIC_reg_00 reg_00;
- int i;
-
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(dev->id, 0);
- if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
- reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
- io_apic_write(dev->id, 0, reg_00.raw);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
- for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
- ioapic_write_entry(dev->id, i, entry[i]);
-
- return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
- .name = "ioapic",
- .suspend = ioapic_suspend,
- .resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
- struct sys_device *dev;
- int i, size, error = 0;
-
- error = sysdev_class_register(&ioapic_sysdev_class);
- if (error)
- return error;
-
- for (i = 0; i < nr_ioapics; i++) {
- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
- * sizeof(struct IO_APIC_route_entry);
- mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
- if (!mp_ioapic_data[i]) {
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
- dev = &mp_ioapic_data[i]->dev;
- dev->id = i;
- dev->cls = &ioapic_sysdev_class;
- error = sysdev_register(dev);
- if (error) {
- kfree(mp_ioapic_data[i]);
- mp_ioapic_data[i] = NULL;
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
- }
-
- return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-/*
- * Dynamic irq allocate and deallocation
- */
-int create_irq(void)
-{
- /* Allocate an unused irq */
- int irq, new, vector = 0;
- unsigned long flags;
-
- irq = -ENOSPC;
- spin_lock_irqsave(&vector_lock, flags);
- for (new = (NR_IRQS - 1); new >= 0; new--) {
- if (platform_legacy_irq(new))
- continue;
- if (irq_vector[new] != 0)
- continue;
- vector = __assign_irq_vector(new);
- if (likely(vector > 0))
- irq = new;
- break;
- }
- spin_unlock_irqrestore(&vector_lock, flags);
-
- if (irq >= 0) {
- set_intr_gate(vector, interrupt[irq]);
- dynamic_irq_init(irq);
- }
- return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
- unsigned long flags;
-
- dynamic_irq_cleanup(irq);
-
- spin_lock_irqsave(&vector_lock, flags);
- clear_bit(irq_vector[irq], used_vectors);
- irq_vector[irq] = 0;
- spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-/*
- * MSI message composition
- */
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
-{
- int vector;
- unsigned dest;
-
- vector = assign_irq_vector(irq);
- if (vector >= 0) {
- dest = cpu_mask_to_apicid(TARGET_CPUS);
-
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo =
- MSI_ADDR_BASE_LO |
- ((INT_DEST_MODE == 0) ?
-MSI_ADDR_DEST_MODE_PHYSICAL:
- MSI_ADDR_DEST_MODE_LOGICAL) |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU:
- MSI_ADDR_REDIRECTION_LOWPRI) |
- MSI_ADDR_DEST_ID(dest);
-
- msg->data =
- MSI_DATA_TRIGGER_EDGE |
- MSI_DATA_LEVEL_ASSERT |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
-MSI_DATA_DELIVERY_FIXED:
- MSI_DATA_DELIVERY_LOWPRI) |
- MSI_DATA_VECTOR(vector);
- }
- return vector;
-}
-
-#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
-{
- struct msi_msg msg;
- unsigned int dest;
- cpumask_t tmp;
- int vector;
-
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- tmp = TARGET_CPUS;
-
- vector = assign_irq_vector(irq);
- if (vector < 0)
- return;
-
- dest = cpu_mask_to_apicid(mask);
-
- read_msi_msg(irq, &msg);
-
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
- write_msi_msg(irq, &msg);
- irq_desc[irq].affinity = mask;
-}
-#endif /* CONFIG_SMP */
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
- .name = "PCI-MSI",
- .unmask = unmask_msi_irq,
- .mask = mask_msi_irq,
- .ack = ack_ioapic_irq,
-#ifdef CONFIG_SMP
- .set_affinity = set_msi_irq_affinity,
-#endif
- .retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
- struct msi_msg msg;
- int irq, ret;
- irq = create_irq();
- if (irq < 0)
- return irq;
-
- ret = msi_compose_msg(dev, irq, &msg);
- if (ret < 0) {
- destroy_irq(irq);
- return ret;
- }
-
- set_irq_msi(irq, desc);
- write_msi_msg(irq, &msg);
-
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
- "edge");
-
- return 0;
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
- destroy_irq(irq);
-}
-
-#endif /* CONFIG_PCI_MSI */
-
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-#ifdef CONFIG_SMP
-
-static void target_ht_irq(unsigned int irq, unsigned int dest)
-{
- struct ht_irq_msg msg;
- fetch_ht_irq_msg(irq, &msg);
-
- msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
- msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
- write_ht_irq_msg(irq, &msg);
-}
-
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
-{
- unsigned int dest;
- cpumask_t tmp;
-
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- tmp = TARGET_CPUS;
-
- cpus_and(mask, tmp, CPU_MASK_ALL);
-
- dest = cpu_mask_to_apicid(mask);
-
- target_ht_irq(irq, dest);
- irq_desc[irq].affinity = mask;
-}
-#endif
-
-static struct irq_chip ht_irq_chip = {
- .name = "PCI-HT",
- .mask = mask_ht_irq,
- .unmask = unmask_ht_irq,
- .ack = ack_ioapic_irq,
-#ifdef CONFIG_SMP
- .set_affinity = set_ht_irq_affinity,
-#endif
- .retrigger = ioapic_retrigger_irq,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
- int vector;
-
- vector = assign_irq_vector(irq);
- if (vector >= 0) {
- struct ht_irq_msg msg;
- unsigned dest;
- cpumask_t tmp;
-
- cpus_clear(tmp);
- cpu_set(vector >> 8, tmp);
- dest = cpu_mask_to_apicid(tmp);
-
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
- msg.address_lo =
- HT_IRQ_LOW_BASE |
- HT_IRQ_LOW_DEST_ID(dest) |
- HT_IRQ_LOW_VECTOR(vector) |
- ((INT_DEST_MODE == 0) ?
- HT_IRQ_LOW_DM_PHYSICAL :
- HT_IRQ_LOW_DM_LOGICAL) |
- HT_IRQ_LOW_RQEOI_EDGE |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
- HT_IRQ_LOW_MT_FIXED :
- HT_IRQ_LOW_MT_ARBITRATED) |
- HT_IRQ_LOW_IRQ_MASKED;
-
- write_ht_irq_msg(irq, &msg);
-
- set_irq_chip_and_handler_name(irq, &ht_irq_chip,
- handle_edge_irq, "edge");
- }
- return vector;
-}
-#endif /* CONFIG_HT_IRQ */
-
-/* --------------------------------------------------------------------------
- ACPI-based IOAPIC Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
-{
- union IO_APIC_reg_00 reg_00;
- static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
- physid_mask_t tmp;
- unsigned long flags;
- int i = 0;
-
- /*
- * The P4 platform supports up to 256 APIC IDs on two separate APIC
- * buses (one for LAPICs, one for IOAPICs), where predecessors only
- * supports up to 16 on one shared APIC bus.
- *
- * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
- * advantage of new APIC bus architecture.
- */
-
- if (physids_empty(apic_id_map))
- apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- if (apic_id >= get_physical_broadcast()) {
- printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
- "%d\n", ioapic, apic_id, reg_00.bits.ID);
- apic_id = reg_00.bits.ID;
- }
-
- /*
- * Every APIC in a system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- */
- if (check_apicid_used(apic_id_map, apic_id)) {
-
- for (i = 0; i < get_physical_broadcast(); i++) {
- if (!check_apicid_used(apic_id_map, i))
- break;
- }
-
- if (i == get_physical_broadcast())
- panic("Max apic_id exceeded!\n");
-
- printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
- "trying %d\n", ioapic, apic_id, i);
-
- apic_id = i;
- }
-
- tmp = apicid_to_cpu_present(apic_id);
- physids_or(apic_id_map, apic_id_map, tmp);
-
- if (reg_00.bits.ID != apic_id) {
- reg_00.bits.ID = apic_id;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0, reg_00.raw);
- reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /* Sanity check */
- if (reg_00.bits.ID != apic_id) {
- printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
- return -1;
- }
- }
-
- apic_printk(APIC_VERBOSE, KERN_INFO
- "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
- return apic_id;
-}
-
-
-int __init io_apic_get_version(int ioapic)
-{
- union IO_APIC_reg_01 reg_01;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return reg_01.bits.version;
-}
-
-
-int __init io_apic_get_redir_entries(int ioapic)
-{
- union IO_APIC_reg_01 reg_01;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return reg_01.bits.entries;
-}
-
-
-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
-{
- struct IO_APIC_route_entry entry;
-
- if (!IO_APIC_IRQ(irq)) {
- printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
- return -EINVAL;
- }
-
- /*
- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
- * Note that we mask (disable) IRQs now -- these get enabled when the
- * corresponding device driver registers for this IRQ.
- */
-
- memset(&entry, 0, sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- entry.trigger = edge_level;
- entry.polarity = active_high_low;
- entry.mask = 1;
-
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
-
- entry.vector = assign_irq_vector(irq);
-
- apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
- "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
- mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
- edge_level, active_high_low);
-
- ioapic_register_intr(irq, entry.vector, edge_level);
-
- if (!ioapic && (irq < 16))
- disable_8259A_irq(irq);
-
- ioapic_write_entry(ioapic, pin, entry);
-
- return 0;
-}
-
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
-{
- int i;
-
- if (skip_ioapic_setup)
- return -1;
-
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mp_irqtype == mp_INT &&
- mp_irqs[i].mp_srcbusirq == bus_irq)
- break;
- if (i >= mp_irq_entries)
- return -1;
-
- *trigger = irq_trigger(i);
- *polarity = irq_polarity(i);
- return 0;
-}
-
-#endif /* CONFIG_ACPI */
-
-static int __init parse_disable_timer_pin_1(char *arg)
-{
- disable_timer_pin_1 = 1;
- return 0;
-}
-early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
-
-static int __init parse_enable_timer_pin_1(char *arg)
-{
- disable_timer_pin_1 = -1;
- return 0;
-}
-early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
-
-static int __init parse_noapic(char *arg)
-{
- /* disable IO-APIC */
- disable_ioapic_setup();
- return 0;
-}
-early_param("noapic", parse_noapic);
-
-void __init ioapic_init_mappings(void)
-{
- unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
- int i;
-
- for (i = 0; i < nr_ioapics; i++) {
- if (smp_found_config) {
- ioapic_phys = mp_ioapics[i].mp_apicaddr;
- if (!ioapic_phys) {
- printk(KERN_ERR
- "WARNING: bogus zero IO-APIC "
- "address found in MPTABLE, "
- "disabling IO/APIC support!\n");
- smp_found_config = 0;
- skip_ioapic_setup = 1;
- goto fake_ioapic_page;
- }
- } else {
-fake_ioapic_page:
- ioapic_phys = (unsigned long)
- alloc_bootmem_pages(PAGE_SIZE);
- ioapic_phys = __pa(ioapic_phys);
- }
- set_fixmap_nocache(idx, ioapic_phys);
- printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
- __fix_to_virt(idx), ioapic_phys);
- idx++;
- }
-}
-
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
new file mode 100644
index 000000000000..ccf6c503fc3b
--- /dev/null
+++ b/arch/x86/kernel/irq.c
@@ -0,0 +1,189 @@
+/*
+ * Common interrupt code for 32 and 64 bit
+ */
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+atomic_t irq_err_count;
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+ printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ * But only ack when the APIC is enabled -AK
+ */
+ if (cpu_has_apic)
+ ack_APIC_irq();
+#endif
+}
+
+#ifdef CONFIG_X86_32
+# define irq_stats(x) (&per_cpu(irq_stat,x))
+#else
+# define irq_stats(x) cpu_pda(x)
+#endif
+/*
+ * /proc/interrupts printing:
+ */
+static int show_other_interrupts(struct seq_file *p)
+{
+ int j;
+
+ seq_printf(p, "NMI: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
+ seq_printf(p, " Non-maskable interrupts\n");
+#ifdef CONFIG_X86_LOCAL_APIC
+ seq_printf(p, "LOC: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
+ seq_printf(p, " Local timer interrupts\n");
+#endif
+#ifdef CONFIG_SMP
+ seq_printf(p, "RES: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
+ seq_printf(p, " Rescheduling interrupts\n");
+ seq_printf(p, "CAL: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+ seq_printf(p, " Function call interrupts\n");
+ seq_printf(p, "TLB: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
+ seq_printf(p, " TLB shootdowns\n");
+#endif
+#ifdef CONFIG_X86_MCE
+ seq_printf(p, "TRM: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
+ seq_printf(p, " Thermal event interrupts\n");
+# ifdef CONFIG_X86_64
+ seq_printf(p, "THR: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
+ seq_printf(p, " Threshold APIC interrupts\n");
+# endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+ seq_printf(p, "SPU: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
+ seq_printf(p, " Spurious interrupts\n");
+#endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+ return 0;
+}
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ unsigned long flags, any_count = 0;
+ int i = *(loff_t *) v, j;
+ struct irqaction *action;
+ struct irq_desc *desc;
+
+ if (i > nr_irqs)
+ return 0;
+
+ if (i == nr_irqs)
+ return show_other_interrupts(p);
+
+ /* print header */
+ if (i == 0) {
+ seq_printf(p, " ");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%-8d",j);
+ seq_putc(p, '\n');
+ }
+
+ desc = irq_to_desc(i);
+ spin_lock_irqsave(&desc->lock, flags);
+#ifndef CONFIG_SMP
+ any_count = kstat_irqs(i);
+#else
+ for_each_online_cpu(j)
+ any_count |= kstat_irqs_cpu(i, j);
+#endif
+ action = desc->action;
+ if (!action && !any_count)
+ goto out;
+
+ seq_printf(p, "%3d: ", i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+#endif
+ seq_printf(p, " %8s", desc->chip->name);
+ seq_printf(p, "-%-8s", desc->name);
+
+ if (action) {
+ seq_printf(p, " %s", action->name);
+ while ((action = action->next) != NULL)
+ seq_printf(p, ", %s", action->name);
+ }
+
+ seq_putc(p, '\n');
+out:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return 0;
+}
+
+/*
+ * /proc/stat helpers
+ */
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+ u64 sum = irq_stats(cpu)->__nmi_count;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ sum += irq_stats(cpu)->apic_timer_irqs;
+#endif
+#ifdef CONFIG_SMP
+ sum += irq_stats(cpu)->irq_resched_count;
+ sum += irq_stats(cpu)->irq_call_count;
+ sum += irq_stats(cpu)->irq_tlb_count;
+#endif
+#ifdef CONFIG_X86_MCE
+ sum += irq_stats(cpu)->irq_thermal_count;
+# ifdef CONFIG_X86_64
+ sum += irq_stats(cpu)->irq_threshold_count;
+#endif
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+ sum += irq_stats(cpu)->irq_spurious_count;
+#endif
+ return sum;
+}
+
+u64 arch_irq_stat(void)
+{
+ u64 sum = atomic_read(&irq_err_count);
+
+#ifdef CONFIG_X86_IO_APIC
+ sum += atomic_read(&irq_mis_count);
+#endif
+ return sum;
+}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b71e02d42f4f..a51382672de0 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
- printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
-
-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Currently unexpected vectors happen only on SMP and APIC.
- * We _must_ ack these because every local APIC has only N
- * irq slots per priority level, and a 'hanging, unacked' IRQ
- * holds up an irq slot - in excessive cases (when multiple
- * unexpected vectors occur) that might lock up the APIC
- * completely.
- * But only ack when the APIC is enabled -AK
- */
- if (cpu_has_apic)
- ack_APIC_irq();
-#endif
-}
-
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
static int check_stack_overflow(void)
@@ -223,20 +200,25 @@ unsigned int do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs;
/* high bit used in ret_from_ code */
- int overflow, irq = ~regs->orig_ax;
- struct irq_desc *desc = irq_desc + irq;
+ int overflow;
+ unsigned vector = ~regs->orig_ax;
+ struct irq_desc *desc;
+ unsigned irq;
- if (unlikely((unsigned)irq >= NR_IRQS)) {
- printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
- __func__, irq);
- BUG();
- }
old_regs = set_irq_regs(regs);
irq_enter();
+ irq = __get_cpu_var(vector_irq)[vector];
overflow = check_stack_overflow();
+ desc = irq_to_desc(irq);
+ if (unlikely(!desc)) {
+ printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
+ __func__, irq, vector, smp_processor_id());
+ BUG();
+ }
+
if (!execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
print_stack_overflow();
@@ -248,146 +230,6 @@ unsigned int do_IRQ(struct pt_regs *regs)
return 1;
}
-/*
- * Interrupt statistics:
- */
-
-atomic_t irq_err_count;
-
-/*
- * /proc/interrupts printing:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
- int i = *(loff_t *) v, j;
- struct irqaction * action;
- unsigned long flags;
-
- if (i == 0) {
- seq_printf(p, " ");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%-8d",j);
- seq_putc(p, '\n');
- }
-
- if (i < NR_IRQS) {
- unsigned any_count = 0;
-
- spin_lock_irqsave(&irq_desc[i].lock, flags);
-#ifndef CONFIG_SMP
- any_count = kstat_irqs(i);
-#else
- for_each_online_cpu(j)
- any_count |= kstat_cpu(j).irqs[i];
-#endif
- action = irq_desc[i].action;
- if (!action && !any_count)
- goto skip;
- seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
- seq_printf(p, " %8s", irq_desc[i].chip->name);
- seq_printf(p, "-%-8s", irq_desc[i].name);
-
- if (action) {
- seq_printf(p, " %s", action->name);
- while ((action = action->next) != NULL)
- seq_printf(p, ", %s", action->name);
- }
-
- seq_putc(p, '\n');
-skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
- } else if (i == NR_IRQS) {
- seq_printf(p, "NMI: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", nmi_count(j));
- seq_printf(p, " Non-maskable interrupts\n");
-#ifdef CONFIG_X86_LOCAL_APIC
- seq_printf(p, "LOC: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).apic_timer_irqs);
- seq_printf(p, " Local timer interrupts\n");
-#endif
-#ifdef CONFIG_SMP
- seq_printf(p, "RES: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).irq_resched_count);
- seq_printf(p, " Rescheduling interrupts\n");
- seq_printf(p, "CAL: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).irq_call_count);
- seq_printf(p, " Function call interrupts\n");
- seq_printf(p, "TLB: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).irq_tlb_count);
- seq_printf(p, " TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
- seq_printf(p, "TRM: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).irq_thermal_count);
- seq_printf(p, " Thermal event interrupts\n");
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
- seq_printf(p, "SPU: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).irq_spurious_count);
- seq_printf(p, " Spurious interrupts\n");
-#endif
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
- }
- return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
- u64 sum = nmi_count(cpu);
-
-#ifdef CONFIG_X86_LOCAL_APIC
- sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
-#endif
-#ifdef CONFIG_SMP
- sum += per_cpu(irq_stat, cpu).irq_resched_count;
- sum += per_cpu(irq_stat, cpu).irq_call_count;
- sum += per_cpu(irq_stat, cpu).irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
- sum += per_cpu(irq_stat, cpu).irq_thermal_count;
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
- sum += per_cpu(irq_stat, cpu).irq_spurious_count;
-#endif
- return sum;
-}
-
-u64 arch_irq_stat(void)
-{
- u64 sum = atomic_read(&irq_err_count);
-
-#ifdef CONFIG_X86_IO_APIC
- sum += atomic_read(&irq_mis_count);
-#endif
- return sum;
-}
-
#ifdef CONFIG_HOTPLUG_CPU
#include
@@ -395,20 +237,22 @@ void fixup_irqs(cpumask_t map)
{
unsigned int irq;
static int warned;
+ struct irq_desc *desc;
- for (irq = 0; irq < NR_IRQS; irq++) {
+ for_each_irq_desc(irq, desc) {
cpumask_t mask;
+
if (irq == 2)
continue;
- cpus_and(mask, irq_desc[irq].affinity, map);
+ cpus_and(mask, desc->affinity, map);
if (any_online_cpu(mask) == NR_CPUS) {
printk("Breaking affinity for irq %i\n", irq);
mask = map;
}
- if (irq_desc[irq].chip->set_affinity)
- irq_desc[irq].chip->set_affinity(irq, mask);
- else if (irq_desc[irq].action && !(warned++))
+ if (desc->chip->set_affinity)
+ desc->chip->set_affinity(irq, mask);
+ else if (desc->action && !(warned++))
printk("Cannot set affinity for irq %i\n", irq);
}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f065fe9071b9..60eb84eb77a0 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,28 +18,6 @@
#include
#include
-atomic_t irq_err_count;
-
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
- printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
- /*
- * Currently unexpected vectors happen only on SMP and APIC.
- * We _must_ ack these because every local APIC has only N
- * irq slots per priority level, and a 'hanging, unacked' IRQ
- * holds up an irq slot - in excessive cases (when multiple
- * unexpected vectors occur) that might lock up the APIC
- * completely.
- * But don't ack when the APIC is disabled. -AK
- */
- if (!disable_apic)
- ack_APIC_irq();
-}
-
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/*
* Probabilistic stack overflow check:
@@ -64,122 +42,6 @@ static inline void stack_overflow_check(struct pt_regs *regs)
}
#endif
-/*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
- int i = *(loff_t *) v, j;
- struct irqaction * action;
- unsigned long flags;
-
- if (i == 0) {
- seq_printf(p, " ");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%-8d",j);
- seq_putc(p, '\n');
- }
-
- if (i < NR_IRQS) {
- unsigned any_count = 0;
-
- spin_lock_irqsave(&irq_desc[i].lock, flags);
-#ifndef CONFIG_SMP
- any_count = kstat_irqs(i);
-#else
- for_each_online_cpu(j)
- any_count |= kstat_cpu(j).irqs[i];
-#endif
- action = irq_desc[i].action;
- if (!action && !any_count)
- goto skip;
- seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
- seq_printf(p, " %8s", irq_desc[i].chip->name);
- seq_printf(p, "-%-8s", irq_desc[i].name);
-
- if (action) {
- seq_printf(p, " %s", action->name);
- while ((action = action->next) != NULL)
- seq_printf(p, ", %s", action->name);
- }
- seq_putc(p, '\n');
-skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
- } else if (i == NR_IRQS) {
- seq_printf(p, "NMI: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
- seq_printf(p, " Non-maskable interrupts\n");
- seq_printf(p, "LOC: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
- seq_printf(p, " Local timer interrupts\n");
-#ifdef CONFIG_SMP
- seq_printf(p, "RES: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
- seq_printf(p, " Rescheduling interrupts\n");
- seq_printf(p, "CAL: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
- seq_printf(p, " Function call interrupts\n");
- seq_printf(p, "TLB: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
- seq_printf(p, " TLB shootdowns\n");
-#endif
-#ifdef CONFIG_X86_MCE
- seq_printf(p, "TRM: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
- seq_printf(p, " Thermal event interrupts\n");
- seq_printf(p, "THR: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
- seq_printf(p, " Threshold APIC interrupts\n");
-#endif
- seq_printf(p, "SPU: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
- seq_printf(p, " Spurious interrupts\n");
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
- }
- return 0;
-}
-
-/*
- * /proc/stat helpers
- */
-u64 arch_irq_stat_cpu(unsigned int cpu)
-{
- u64 sum = cpu_pda(cpu)->__nmi_count;
-
- sum += cpu_pda(cpu)->apic_timer_irqs;
-#ifdef CONFIG_SMP
- sum += cpu_pda(cpu)->irq_resched_count;
- sum += cpu_pda(cpu)->irq_call_count;
- sum += cpu_pda(cpu)->irq_tlb_count;
-#endif
-#ifdef CONFIG_X86_MCE
- sum += cpu_pda(cpu)->irq_thermal_count;
- sum += cpu_pda(cpu)->irq_threshold_count;
-#endif
- sum += cpu_pda(cpu)->irq_spurious_count;
- return sum;
-}
-
-u64 arch_irq_stat(void)
-{
- return atomic_read(&irq_err_count);
-}
-
/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
@@ -188,6 +50,7 @@ u64 arch_irq_stat(void)
asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
+ struct irq_desc *desc;
/* high bit used in ret_from_ code */
unsigned vector = ~regs->orig_ax;
@@ -201,8 +64,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
stack_overflow_check(regs);
#endif
- if (likely(irq < NR_IRQS))
- generic_handle_irq(irq);
+ desc = irq_to_desc(irq);
+ if (likely(desc))
+ generic_handle_irq_desc(irq, desc);
else {
if (!disable_apic)
ack_APIC_irq();
@@ -223,8 +87,9 @@ void fixup_irqs(cpumask_t map)
{
unsigned int irq;
static int warned;
+ struct irq_desc *desc;
- for (irq = 0; irq < NR_IRQS; irq++) {
+ for_each_irq_desc(irq, desc) {
cpumask_t mask;
int break_affinity = 0;
int set_affinity = 1;
@@ -233,32 +98,32 @@ void fixup_irqs(cpumask_t map)
continue;
/* interrupt's are disabled at this point */
- spin_lock(&irq_desc[irq].lock);
+ spin_lock(&desc->lock);
if (!irq_has_action(irq) ||
- cpus_equal(irq_desc[irq].affinity, map)) {
- spin_unlock(&irq_desc[irq].lock);
+ cpus_equal(desc->affinity, map)) {
+ spin_unlock(&desc->lock);
continue;
}
- cpus_and(mask, irq_desc[irq].affinity, map);
+ cpus_and(mask, desc->affinity, map);
if (cpus_empty(mask)) {
break_affinity = 1;
mask = map;
}
- if (irq_desc[irq].chip->mask)
- irq_desc[irq].chip->mask(irq);
+ if (desc->chip->mask)
+ desc->chip->mask(irq);
- if (irq_desc[irq].chip->set_affinity)
- irq_desc[irq].chip->set_affinity(irq, mask);
+ if (desc->chip->set_affinity)
+ desc->chip->set_affinity(irq, mask);
else if (!(warned++))
set_affinity = 0;
- if (irq_desc[irq].chip->unmask)
- irq_desc[irq].chip->unmask(irq);
+ if (desc->chip->unmask)
+ desc->chip->unmask(irq);
- spin_unlock(&irq_desc[irq].lock);
+ spin_unlock(&desc->lock);
if (break_affinity && set_affinity)
printk("Broke affinity for irq %i\n", irq);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9200a1e2752d..845aa9803e80 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,6 +69,13 @@ void __init init_ISA_irqs (void)
* 16 old-style INTA-cycle interrupts:
*/
for (i = 0; i < 16; i++) {
+ /* first time call this irq_desc */
+ struct irq_desc *desc = irq_to_desc(i);
+
+ desc->status = IRQ_DISABLED;
+ desc->action = NULL;
+ desc->depth = 1;
+
set_irq_chip_and_handler_name(i, &i8259A_chip,
handle_level_irq, "XT");
}
@@ -83,6 +90,27 @@ static struct irqaction irq2 = {
.name = "cascade",
};
+DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+ [0 ... IRQ0_VECTOR - 1] = -1,
+ [IRQ0_VECTOR] = 0,
+ [IRQ1_VECTOR] = 1,
+ [IRQ2_VECTOR] = 2,
+ [IRQ3_VECTOR] = 3,
+ [IRQ4_VECTOR] = 4,
+ [IRQ5_VECTOR] = 5,
+ [IRQ6_VECTOR] = 6,
+ [IRQ7_VECTOR] = 7,
+ [IRQ8_VECTOR] = 8,
+ [IRQ9_VECTOR] = 9,
+ [IRQ10_VECTOR] = 10,
+ [IRQ11_VECTOR] = 11,
+ [IRQ12_VECTOR] = 12,
+ [IRQ13_VECTOR] = 13,
+ [IRQ14_VECTOR] = 14,
+ [IRQ15_VECTOR] = 15,
+ [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
+};
+
/* Overridden in paravirt.c */
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
@@ -98,22 +126,14 @@ void __init native_init_IRQ(void)
* us. (some of these will be overridden and become
* 'special' SMP interrupts)
*/
- for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
- int vector = FIRST_EXTERNAL_VECTOR + i;
- if (i >= NR_IRQS)
- break;
+ for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
/* SYSCALL_VECTOR was reserved in trap_init. */
- if (!test_bit(vector, used_vectors))
- set_intr_gate(vector, interrupt[i]);
+ if (i != SYSCALL_VECTOR)
+ set_intr_gate(i, interrupt[i]);
}
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
- /*
- * IRQ0 must be given a fixed assignment and initialized,
- * because it's used before the IO-APIC is set up.
- */
- set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
/*
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
* IPI, driven by wakeup.
@@ -128,6 +148,9 @@ void __init native_init_IRQ(void)
/* IPI for single call function */
set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+
+ /* Low priority IPI to cleanup after moving an irq */
+ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 5b5be9d43c2a..ff0235391285 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,23 +142,19 @@ void __init init_ISA_irqs(void)
init_bsp_APIC();
init_8259A(0);
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc[i].status = IRQ_DISABLED;
- irq_desc[i].action = NULL;
- irq_desc[i].depth = 1;
+ for (i = 0; i < 16; i++) {
+ /* first time call this irq_desc */
+ struct irq_desc *desc = irq_to_desc(i);
- if (i < 16) {
- /*
- * 16 old-style INTA-cycle interrupts:
- */
- set_irq_chip_and_handler_name(i, &i8259A_chip,
+ desc->status = IRQ_DISABLED;
+ desc->action = NULL;
+ desc->depth = 1;
+
+ /*
+ * 16 old-style INTA-cycle interrupts:
+ */
+ set_irq_chip_and_handler_name(i, &i8259A_chip,
handle_level_irq, "XT");
- } else {
- /*
- * 'high' PCI IRQs filled in on demand
- */
- irq_desc[i].chip = &no_irq_chip;
- }
}
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index f6a11b9b1f98..67465ed89310 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
if (!(word & (1 << 13))) {
dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
"disabling irq balancing and affinity\n");
-#ifdef CONFIG_IRQBALANCE
- irqbalance_disable("");
-#endif
noirqdebug_setup("");
#ifdef CONFIG_PROC_FS
no_irq_affinity = 1;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b2c97874ec0f..0fa6790c1dd3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1073,6 +1073,7 @@ void __init setup_arch(char **cmdline_p)
#endif
prefill_possible_map();
+
#ifdef CONFIG_X86_64
init_cpu_to_node();
#endif
@@ -1080,6 +1081,9 @@ void __init setup_arch(char **cmdline_p)
init_apic_mappings();
ioapic_init_mappings();
+ /* need to wait for io_apic is mapped */
+ nr_irqs = probe_nr_irqs();
+
kvm_guest_init();
e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0e67f72d9316..410c88f0bfeb 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,25 +140,30 @@ static void __init setup_cpu_pda_map(void)
*/
void __init setup_per_cpu_areas(void)
{
- ssize_t size = PERCPU_ENOUGH_ROOM;
+ ssize_t size, old_size;
char *ptr;
int cpu;
+ unsigned long align = 1;
/* Setup cpu_pda map */
setup_cpu_pda_map();
/* Copy section for each CPU (we discard the original) */
- size = PERCPU_ENOUGH_ROOM;
+ old_size = PERCPU_ENOUGH_ROOM;
+ align = max_t(unsigned long, PAGE_SIZE, align);
+ size = roundup(old_size, align);
printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
size);
for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
- ptr = alloc_bootmem_pages(size);
+ ptr = __alloc_bootmem(size, align,
+ __pa(MAX_DMA_ADDRESS));
#else
int node = early_cpu_to_node(cpu);
if (!node_online(node) || !NODE_DATA(node)) {
- ptr = alloc_bootmem_pages(size);
+ ptr = __alloc_bootmem(size, align,
+ __pa(MAX_DMA_ADDRESS));
printk(KERN_INFO
"cpu %d has no node %d or node-local memory\n",
cpu, node);
@@ -167,7 +172,8 @@ void __init setup_per_cpu_areas(void)
cpu, __pa(ptr));
}
else {
- ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+ ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
+ __pa(MAX_DMA_ADDRESS));
if (ptr)
printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
cpu, node, __pa(ptr));
@@ -175,7 +181,6 @@ void __init setup_per_cpu_areas(void)
#endif
per_cpu_offset(cpu) = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-
}
printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7ed9e070a6e9..7ece815ea637 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid)
int timeout;
u32 status;
- printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
+ printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
for (i = 0; i < ARRAY_SIZE(regs); i++) {
- printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
+ printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
/*
* Wait for idle.
@@ -874,7 +874,7 @@ do_rest:
start_ip = setup_trampoline();
/* So we see what's up */
- printk(KERN_INFO "Booting processor %d/%d ip %lx\n",
+ printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
cpu, apicid, start_ip);
/*
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
new file mode 100644
index 000000000000..aeef529917e4
--- /dev/null
+++ b/arch/x86/kernel/uv_irq.c
@@ -0,0 +1,79 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV IRQ functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include
+#include
+
+#include
+#include