Merge commit 'v2.6.29-rc7' into perfcounters/core
Conflicts:
arch/x86/mm/iomap_32.c
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index 45932ec..b41f3e5 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -18,11 +18,11 @@
these macros in include/asm-XXX/topology.h:
#define topology_physical_package_id(cpu)
#define topology_core_id(cpu)
-#define topology_thread_siblings(cpu)
-#define topology_core_siblings(cpu)
+#define topology_thread_cpumask(cpu)
+#define topology_core_cpumask(cpu)
The type of **_id is int.
-The type of siblings is cpumask_t.
+The type of siblings is (const) struct cpumask *.
To be consistent on all architectures, include/linux/topology.h
provides default definitions for any of the above macros that are
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 54f21a5..28de395 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1310,8 +1310,13 @@
memtest= [KNL,X86] Enable memtest
Format: <integer>
- range: 0,4 : pattern number
default : 0 <disable>
+ Specifies the number of memtest passes to be
+ performed. Each pass selects another test
+ pattern from a given set of patterns. Memtest
+ fills the memory with this pattern, validates
+ memory contents and reserves bad memory
+ regions that are detected.
meye.*= [HW] Set MotionEye Camera parameters
See Documentation/video4linux/meye.txt.
diff --git a/Documentation/perf-counters.txt b/Documentation/perf-counters.txt
new file mode 100644
index 0000000..fddd321
--- /dev/null
+++ b/Documentation/perf-counters.txt
@@ -0,0 +1,147 @@
+
+Performance Counters for Linux
+------------------------------
+
+Performance counters are special hardware registers available on most modern
+CPUs. These registers count the number of certain types of hw events: such
+as instructions executed, cachemisses suffered, or branches mis-predicted -
+without slowing down the kernel or applications. These registers can also
+trigger interrupts when a threshold number of events have passed - and can
+thus be used to profile the code that runs on that CPU.
+
+The Linux Performance Counter subsystem provides an abstraction of these
+hardware capabilities. It provides per task and per CPU counters, counter
+groups, and it provides event capabilities on top of those.
+
+Performance counters are accessed via special file descriptors.
+There's one file descriptor per virtual counter used.
+
+The special file descriptor is opened via the perf_counter_open()
+system call:
+
+ int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+ pid_t pid, int cpu, int group_fd);
+
+The syscall returns the new fd. The fd can be used via the normal
+VFS system calls: read() can be used to read the counter, fcntl()
+can be used to set the blocking mode, etc.
+
+Multiple counters can be kept open at a time, and the counters
+can be poll()ed.
+
+When creating a new counter fd, 'perf_counter_hw_event' is:
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+ s64 type;
+
+ u64 irq_period;
+ u32 record_type;
+
+ u32 disabled : 1, /* off by default */
+ nmi : 1, /* NMI sampling */
+ raw : 1, /* raw event type */
+ __reserved_1 : 29;
+
+ u64 __reserved_2;
+};
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_types {
+ /*
+ * Common hardware events, generalized by the kernel:
+ */
+ PERF_COUNT_CYCLES = 0,
+ PERF_COUNT_INSTRUCTIONS = 1,
+ PERF_COUNT_CACHE_REFERENCES = 2,
+ PERF_COUNT_CACHE_MISSES = 3,
+ PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
+ PERF_COUNT_BRANCH_MISSES = 5,
+
+ /*
+ * Special "software" counters provided by the kernel, even if
+ * the hardware does not support performance counters. These
+ * counters measure various physical and sw events of the
+ * kernel (and allow the profiling of them as well):
+ */
+ PERF_COUNT_CPU_CLOCK = -1,
+ PERF_COUNT_TASK_CLOCK = -2,
+ /*
+ * Future software events:
+ */
+ /* PERF_COUNT_PAGE_FAULTS = -3,
+ PERF_COUNT_CONTEXT_SWITCHES = -4, */
+};
+
+These are standardized types of events that work uniformly on all CPUs
+that implements Performance Counters support under Linux. If a CPU is
+not able to count branch-misses, then the system call will return
+-EINVAL.
+
+More hw_event_types are supported as well, but they are CPU
+specific and are enumerated via /sys on a per CPU basis. Raw hw event
+types can be passed in under hw_event.type if hw_event.raw is 1.
+For example, to count "External bus cycles while bus lock signal asserted"
+events on Intel Core CPUs, pass in a 0x4064 event type value and set
+hw_event.raw to 1.
+
+'record_type' is the type of data that a read() will provide for the
+counter, and it can be one of:
+
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_counter_record_type {
+ PERF_RECORD_SIMPLE = 0,
+ PERF_RECORD_IRQ = 1,
+ PERF_RECORD_GROUP = 2,
+};
+
+a "simple" counter is one that counts hardware events and allows
+them to be read out into a u64 count value. (read() returns 8 on
+a successful read of a simple counter.)
+
+An "irq" counter is one that will also provide an IRQ context information:
+the IP of the interrupted context. In this case read() will return
+the 8-byte counter value, plus the Instruction Pointer address of the
+interrupted context.
+
+The parameter 'hw_event_period' is the number of events before waking up
+a read() that is blocked on a counter fd. Zero value means a non-blocking
+counter.
+
+The 'pid' parameter allows the counter to be specific to a task:
+
+ pid == 0: if the pid parameter is zero, the counter is attached to the
+ current task.
+
+ pid > 0: the counter is attached to a specific task (if the current task
+ has sufficient privilege to do so)
+
+ pid < 0: all tasks are counted (per cpu counters)
+
+The 'cpu' parameter allows a counter to be made specific to a full
+CPU:
+
+ cpu >= 0: the counter is restricted to a specific CPU
+ cpu == -1: the counter counts on all CPUs
+
+(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
+
+A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
+events of that task and 'follows' that task to whatever CPU the task
+gets schedule to. Per task counters can be created by any user, for
+their own tasks.
+
+A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
+all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+
+Group counters are created by passing in a group_fd of another counter.
+Groups are scheduled at once and can be used with PERF_RECORD_GROUP
+to record multi-dimensional timestamps.
+
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 7b4596a..1229969 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -158,7 +158,7 @@
0202/4 2.00+ header Magic signature "HdrS"
0206/2 2.00+ version Boot protocol version supported
0208/4 2.00+ realmode_swtch Boot loader hook (see below)
-020C/2 2.00+ start_sys The load-low segment (0x1000) (obsolete)
+020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete)
020E/2 2.00+ kernel_version Pointer to kernel version string
0210/1 2.00+ type_of_loader Boot loader identifier
0211/1 2.00+ loadflags Boot protocol option flags
@@ -170,10 +170,11 @@
0224/2 2.01+ heap_end_ptr Free memory after setup end
0226/2 N/A pad1 Unused
0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line
-022C/4 2.03+ initrd_addr_max Highest legal initrd address
+022C/4 2.03+ ramdisk_max Highest legal initrd address
0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
-0235/3 N/A pad2 Unused
+0235/1 N/A pad2 Unused
+0236/2 N/A pad3 Unused
0238/4 2.06+ cmdline_size Maximum size of the kernel command line
023C/4 2.07+ hardware_subarch Hardware subarchitecture
0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
@@ -299,14 +300,14 @@
e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
10.17.
-Field name: readmode_swtch
+Field name: realmode_swtch
Type: modify (optional)
Offset/size: 0x208/4
Protocol: 2.00+
Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.)
-Field name: start_sys
+Field name: start_sys_seg
Type: read
Offset/size: 0x20c/2
Protocol: 2.00+
@@ -468,7 +469,7 @@
zero, the kernel will assume that your boot loader does not support
the 2.02+ protocol.
-Field name: initrd_addr_max
+Field name: ramdisk_max
Type: read
Offset/size: 0x22c/4
Protocol: 2.03+
diff --git a/Makefile b/Makefile
index d04ee0a..c40d83a 100644
--- a/Makefile
+++ b/Makefile
@@ -533,8 +533,9 @@
endif
# Force gcc to behave correct even for buggy distributions
-# Arch Makefiles may override this setting
+ifndef CONFIG_CC_STACKPROTECTOR
KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector)
+endif
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
diff --git a/arch/alpha/include/asm/statfs.h b/arch/alpha/include/asm/statfs.h
index de35cd4..ccd2e18 100644
--- a/arch/alpha/include/asm/statfs.h
+++ b/arch/alpha/include/asm/statfs.h
@@ -1,6 +1,8 @@
#ifndef _ALPHA_STATFS_H
#define _ALPHA_STATFS_H
+#include <linux/types.h>
+
/* Alpha is the only 64-bit platform with 32-bit statfs. And doesn't
even seem to implement statfs64 */
#define __statfs_word __u32
diff --git a/arch/alpha/include/asm/swab.h b/arch/alpha/include/asm/swab.h
index 68e7089..4d682b1 100644
--- a/arch/alpha/include/asm/swab.h
+++ b/arch/alpha/include/asm/swab.h
@@ -1,7 +1,7 @@
#ifndef _ALPHA_SWAB_H
#define _ALPHA_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#include <asm/compiler.h>
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 703731a..7bc7489 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -55,7 +55,7 @@
cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
last_cpu = cpu;
- irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+ cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu));
return 0;
}
diff --git a/arch/arm/include/asm/a.out.h b/arch/arm/include/asm/a.out.h
index 79489fd..083894b 100644
--- a/arch/arm/include/asm/a.out.h
+++ b/arch/arm/include/asm/a.out.h
@@ -2,7 +2,7 @@
#define __ARM_A_OUT_H__
#include <linux/personality.h>
-#include <asm/types.h>
+#include <linux/types.h>
struct exec
{
diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index f2cd18a..ee1304f 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -14,7 +14,7 @@
#ifndef __ASMARM_SETUP_H
#define __ASMARM_SETUP_H
-#include <asm/types.h>
+#include <linux/types.h>
#define COMMAND_LINE_SIZE 1024
diff --git a/arch/arm/include/asm/swab.h b/arch/arm/include/asm/swab.h
index 27a689b..ca2bf2f 100644
--- a/arch/arm/include/asm/swab.h
+++ b/arch/arm/include/asm/swab.h
@@ -16,7 +16,7 @@
#define __ASM_ARM_SWAB_H
#include <linux/compiler.h>
-#include <asm/types.h>
+#include <linux/types.h>
#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
# define __SWAB_64_THRU_32__
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 363db18..45eacb5 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -104,6 +104,11 @@
.lock = __SPIN_LOCK_UNLOCKED(bad_irq_desc.lock),
};
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* We are not allocating bad_irq_desc.affinity or .pending_mask */
+#error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK."
+#endif
+
/*
* do_IRQ handles all hardware IRQ's. Decoded IRQs should not
* come via this function. Instead, they should provide their
@@ -161,7 +166,7 @@
irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE;
#ifdef CONFIG_SMP
- bad_irq_desc.affinity = CPU_MASK_ALL;
+ cpumask_setall(bad_irq_desc.affinity);
bad_irq_desc.cpu = smp_processor_id();
#endif
init_arch_irq();
@@ -191,15 +196,16 @@
struct irq_desc *desc = irq_desc + i;
if (desc->cpu == cpu) {
- unsigned int newcpu = any_online_cpu(desc->affinity);
-
- if (newcpu == NR_CPUS) {
+ unsigned int newcpu = cpumask_any_and(desc->affinity,
+ cpu_online_mask);
+ if (newcpu >= nr_cpu_ids) {
if (printk_ratelimit())
printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n",
i, cpu);
- cpus_setall(desc->affinity);
- newcpu = any_online_cpu(desc->affinity);
+ cpumask_setall(desc->affinity);
+ newcpu = cpumask_any_and(desc->affinity,
+ cpu_online_mask);
}
route_irq(desc, i, newcpu);
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 0021607..85598f7 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -65,6 +65,7 @@
#endif
. = ALIGN(4096);
__per_cpu_start = .;
+ *(.data.percpu.page_aligned)
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 6d6bd58..853d42b 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -263,7 +263,7 @@
const struct cpumask *mask = cpumask_of(cpu);
spin_lock_irq(&desc->lock);
- desc->affinity = *mask;
+ cpumask_copy(desc->affinity, mask);
desc->chip->set_affinity(irq, mask);
spin_unlock_irq(&desc->lock);
}
diff --git a/arch/avr32/include/asm/swab.h b/arch/avr32/include/asm/swab.h
index a14aa5b..14cc737 100644
--- a/arch/avr32/include/asm/swab.h
+++ b/arch/avr32/include/asm/swab.h
@@ -4,7 +4,7 @@
#ifndef __ASM_AVR32_SWAB_H
#define __ASM_AVR32_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#define __SWAB_64_THRU_32__
diff --git a/arch/blackfin/include/asm/swab.h b/arch/blackfin/include/asm/swab.h
index 69a051b..6403ad2 100644
--- a/arch/blackfin/include/asm/swab.h
+++ b/arch/blackfin/include/asm/swab.h
@@ -1,7 +1,7 @@
#ifndef _BLACKFIN_SWAB_H
#define _BLACKFIN_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c
index 75724ee..23e9aa0 100644
--- a/arch/blackfin/kernel/irqchip.c
+++ b/arch/blackfin/kernel/irqchip.c
@@ -70,6 +70,11 @@
#endif
};
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* We are not allocating a variable-sized bad_irq_desc.affinity */
+#error "Blackfin architecture does not support CONFIG_CPUMASK_OFFSTACK."
+#endif
+
int show_interrupts(struct seq_file *p, void *v)
{
int i = *(loff_t *) v, j;
diff --git a/arch/h8300/include/asm/swab.h b/arch/h8300/include/asm/swab.h
index c108f39..39abbf5 100644
--- a/arch/h8300/include/asm/swab.h
+++ b/arch/h8300/include/asm/swab.h
@@ -1,7 +1,7 @@
#ifndef _H8300_SWAB_H
#define _H8300_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
# define __SWAB_64_THRU_32__
diff --git a/arch/ia64/include/asm/fpu.h b/arch/ia64/include/asm/fpu.h
index 3859558..0c26157 100644
--- a/arch/ia64/include/asm/fpu.h
+++ b/arch/ia64/include/asm/fpu.h
@@ -6,8 +6,6 @@
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
-#include <asm/types.h>
-
/* floating point status register: */
#define FPSR_TRAP_VD (1 << 0) /* invalid op trap disabled */
#define FPSR_TRAP_DD (1 << 1) /* denormal trap disabled */
diff --git a/arch/ia64/include/asm/gcc_intrin.h b/arch/ia64/include/asm/gcc_intrin.h
index 0f5b559..c2c5fd8 100644
--- a/arch/ia64/include/asm/gcc_intrin.h
+++ b/arch/ia64/include/asm/gcc_intrin.h
@@ -6,6 +6,7 @@
* Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
*/
+#include <linux/types.h>
#include <linux/compiler.h>
/* define this macro to get some asm stmts included in 'c' files */
diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h
index a3e44a5..c47830e 100644
--- a/arch/ia64/include/asm/intrinsics.h
+++ b/arch/ia64/include/asm/intrinsics.h
@@ -10,6 +10,7 @@
#ifndef __ASSEMBLY__
+#include <linux/types.h>
/* include compiler specific intrinsics */
#include <asm/ia64regs.h>
#ifdef __INTEL_COMPILER
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index bfa86b6..2b0a38e 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -21,8 +21,7 @@
*
*/
-#include <asm/types.h>
-
+#include <linux/types.h>
#include <linux/ioctl.h>
/* Select x86 specific features in <linux/kvm.h> */
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 77f30b6..30cf465 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h
@@ -27,12 +27,12 @@
#else /* ! SMP */
-#define PER_CPU_ATTRIBUTES __attribute__((__section__(".data.percpu")))
-
#define per_cpu_init() (__phys_per_cpu_start)
#endif /* SMP */
+#define PER_CPU_BASE_SECTION ".data.percpu"
+
/*
* Be extremely careful when taking the address of this variable! Due to virtual
* remapping, it is different from the canonical address returned by __get_cpu_var(var)!
diff --git a/arch/ia64/include/asm/swab.h b/arch/ia64/include/asm/swab.h
index 6aa58b6..c89a8cb 100644
--- a/arch/ia64/include/asm/swab.h
+++ b/arch/ia64/include/asm/swab.h
@@ -6,7 +6,7 @@
* David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co.
*/
-#include <asm/types.h>
+#include <linux/types.h>
#include <asm/intrinsics.h>
#include <linux/compiler.h>
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 32f3af1..3193f44 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -84,7 +84,7 @@
.child = NULL, \
.groups = NULL, \
.min_interval = 8, \
- .max_interval = 8*(min(num_online_cpus(), 32)), \
+ .max_interval = 8*(min(num_online_cpus(), 32U)), \
.busy_factor = 64, \
.imbalance_pct = 125, \
.cache_nice_tries = 2, \
diff --git a/arch/ia64/include/asm/uv/uv.h b/arch/ia64/include/asm/uv/uv.h
new file mode 100644
index 0000000..61b5bdf
--- /dev/null
+++ b/arch/ia64/include/asm/uv/uv.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_IA64_UV_UV_H
+#define _ASM_IA64_UV_UV_H
+
+#include <asm/system.h>
+#include <asm/sn/simulator.h>
+
+static inline int is_uv_system(void)
+{
+ /* temporary support for running on hardware simulator */
+ return IS_MEDUSA() || ia64_platform_is("uv");
+}
+
+#endif /* _ASM_IA64_UV_UV_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index d541671..bdef2ce 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -199,6 +199,10 @@
return __va(phys_addr);
}
+void __init __acpi_unmap_table(char *map, unsigned long size)
+{
+}
+
/* --------------------------------------------------------------------------
Boot-time Table Parsing
-------------------------------------------------------------------------- */
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index e131250..166e0d8 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -880,7 +880,7 @@
if (iosapic_intr_info[irq].count == 0) {
#ifdef CONFIG_SMP
/* Clear affinity */
- cpus_setall(idesc->affinity);
+ cpumask_setall(idesc->affinity);
#endif
/* Clear the interrupt information */
iosapic_intr_info[irq].dest = 0;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index a58f64c..226233a 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -103,7 +103,7 @@
void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
{
if (irq < NR_IRQS) {
- cpumask_copy(&irq_desc[irq].affinity,
+ cpumask_copy(irq_desc[irq].affinity,
cpumask_of(cpu_logical_id(hwid)));
irq_redir[irq] = (char) (redir & 0xff);
}
@@ -148,7 +148,7 @@
if (desc->status == IRQ_PER_CPU)
continue;
- if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask)
+ if (cpumask_any_and(irq_desc[irq].affinity, cpu_online_mask)
>= nr_cpu_ids) {
/*
* Save it for phase 2 processing
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 28d3d48..927ad02 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -493,11 +493,13 @@
saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
ia64_srlz_d();
while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ struct irq_desc *desc = irq_to_desc(vector);
+
if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
smp_local_flush_tlb();
- kstat_this_cpu.irqs[vector]++;
+ kstat_incr_irqs_this_cpu(vector, desc);
} else if (unlikely(IS_RESCHEDULE(vector)))
- kstat_this_cpu.irqs[vector]++;
+ kstat_incr_irqs_this_cpu(vector, desc);
else {
int irq = local_vector_to_irq(vector);
@@ -551,11 +553,13 @@
* Perform normal interrupt style processing
*/
while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ struct irq_desc *desc = irq_to_desc(vector);
+
if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
smp_local_flush_tlb();
- kstat_this_cpu.irqs[vector]++;
+ kstat_incr_irqs_this_cpu(vector, desc);
} else if (unlikely(IS_RESCHEDULE(vector)))
- kstat_this_cpu.irqs[vector]++;
+ kstat_incr_irqs_this_cpu(vector, desc);
else {
struct pt_regs *old_regs = set_irq_regs(NULL);
int irq = local_vector_to_irq(vector);
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 8903393..dcb6b7c 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -75,7 +75,7 @@
msg.data = data;
write_msi_msg(irq, &msg);
- irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+ cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
}
#endif /* CONFIG_SMP */
@@ -187,7 +187,7 @@
msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
dmar_msi_write(irq, &msg);
- irq_desc[irq].affinity = *mask;
+ cpumask_copy(irq_desc[irq].affinity, mask);
}
#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 10a7d47e..f45e4e5 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -219,6 +219,7 @@
.data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
{
__per_cpu_start = .;
+ *(.data.percpu.page_aligned)
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index ca553b0..81e4289 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -205,7 +205,7 @@
msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
write_msi_msg(irq, &msg);
- irq_desc[irq].affinity = *cpu_mask;
+ cpumask_copy(irq_desc[irq].affinity, cpu_mask);
}
#endif /* CONFIG_SMP */
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index abc62aa..3214ade 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -66,7 +66,7 @@
*/
#define IRQ_AFFINITY_HOOK(irq) \
do { \
- if (!cpu_isset(smp_processor_id(), irq_desc[irq].affinity)) { \
+ if (!cpumask_test_cpu(smp_processor_id(), irq_desc[irq].affinity)) {\
smtc_forward_irq(irq); \
irq_exit(); \
return; \
diff --git a/arch/mips/include/asm/sigcontext.h b/arch/mips/include/asm/sigcontext.h
index 9ce0607..9e89cf9 100644
--- a/arch/mips/include/asm/sigcontext.h
+++ b/arch/mips/include/asm/sigcontext.h
@@ -9,6 +9,7 @@
#ifndef _ASM_SIGCONTEXT_H
#define _ASM_SIGCONTEXT_H
+#include <linux/types.h>
#include <asm/sgidefs.h>
#if _MIPS_SIM == _MIPS_SIM_ABI32
diff --git a/arch/mips/include/asm/swab.h b/arch/mips/include/asm/swab.h
index 88f1f7d..99993c0 100644
--- a/arch/mips/include/asm/swab.h
+++ b/arch/mips/include/asm/swab.h
@@ -9,7 +9,7 @@
#define _ASM_SWAB_H
#include <linux/compiler.h>
-#include <asm/types.h>
+#include <linux/types.h>
#define __SWAB_64_THRU_32__
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index 494a49a..87deb8f 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -187,7 +187,7 @@
set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
}
- irq_desc[irq].affinity = *cpumask;
+ cpumask_copy(irq_desc[irq].affinity, cpumask);
spin_unlock_irqrestore(&gic_lock, flags);
}
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index b6cca01..5f5af7d 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -686,7 +686,7 @@
* and efficiency, we just pick the easiest one to find.
*/
- target = first_cpu(irq_desc[irq].affinity);
+ target = cpumask_first(irq_desc[irq].affinity);
/*
* We depend on the platform code to have correctly processed
@@ -921,11 +921,13 @@
struct clock_event_device *cd;
void *arg_copy = pipi->arg;
int type_copy = pipi->type;
+ int irq = MIPS_CPU_IRQ_BASE + 1;
+
smtc_ipi_nq(&freeIPIq, pipi);
switch (type_copy) {
case SMTC_CLOCK_TICK:
irq_enter();
- kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++;
+ kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
cd = &per_cpu(mips_clockevent_device, cpu);
cd->event_handler(cd);
irq_exit();
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index aabd727..5ba3188 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -116,7 +116,7 @@
void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
{
- cpumask_t tmask = *affinity;
+ cpumask_t tmask;
int cpu = 0;
void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff);
@@ -139,11 +139,12 @@
* be made to forward to an offline "CPU".
*/
+ cpumask_copy(&tmask, affinity);
for_each_cpu(cpu, affinity) {
if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
cpu_clear(cpu, tmask);
}
- irq_desc[irq].affinity = tmask;
+ cpumask_copy(irq_desc[irq].affinity, &tmask);
if (cpus_empty(tmask))
/*
diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c
index f8b18af..0ecd5fe 100644
--- a/arch/mips/sgi-ip22/ip22-int.c
+++ b/arch/mips/sgi-ip22/ip22-int.c
@@ -155,7 +155,7 @@
int irq = SGI_BUSERR_IRQ;
irq_enter();
- kstat_this_cpu.irqs[irq]++;
+ kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
ip22_be_interrupt(irq);
irq_exit();
}
diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c
index 3dcb27e..c8f7d23 100644
--- a/arch/mips/sgi-ip22/ip22-time.c
+++ b/arch/mips/sgi-ip22/ip22-time.c
@@ -122,7 +122,7 @@
char c;
irq_enter();
- kstat_this_cpu.irqs[irq]++;
+ kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
printk(KERN_ALERT "Oops, got 8254 interrupt.\n");
ArcRead(0, &c, 1, &cnt);
ArcEnterInteractiveMode();
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index dddfda8..3146916 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -178,9 +178,10 @@
void bcm1480_mailbox_interrupt(void)
{
int cpu = smp_processor_id();
+ int irq = K_BCM1480_INT_MBOX_0_0;
unsigned int action;
- kstat_this_cpu.irqs[K_BCM1480_INT_MBOX_0_0]++;
+ kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
/* Load the mailbox register to figure out what we're supposed to do */
action = (__raw_readq(mailbox_0_regs[cpu]) >> 48) & 0xffff;
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index 5950a288..cad1400 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -166,9 +166,10 @@
void sb1250_mailbox_interrupt(void)
{
int cpu = smp_processor_id();
+ int irq = K_INT_MBOX_0;
unsigned int action;
- kstat_this_cpu.irqs[K_INT_MBOX_0]++;
+ kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
/* Load the mailbox register to figure out what we're supposed to do */
action = (____raw_readq(mailbox_regs[cpu]) >> 48) & 0xffff;
diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c
index 10811e9..2e370d8 100644
--- a/arch/mn10300/kernel/mn10300-watchdog.c
+++ b/arch/mn10300/kernel/mn10300-watchdog.c
@@ -130,6 +130,7 @@
* the stack NMI-atomically, it's safe to use smp_processor_id().
*/
int sum, cpu = smp_processor_id();
+ int irq = NMIIRQ;
u8 wdt, tmp;
wdt = WDCTR & ~WDCTR_WDCNE;
@@ -138,7 +139,7 @@
NMICR = NMICR_WDIF;
nmi_count(cpu)++;
- kstat_this_cpu.irqs[NMIIRQ]++;
+ kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
sum = irq_stat[cpu].__irq_count;
if (last_irq_sums[cpu] == sum) {
diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index c584b00..430f1ae 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -336,10 +336,11 @@
#define NUM_PDC_RESULT 32
#if !defined(__ASSEMBLY__)
-#ifdef __KERNEL__
#include <linux/types.h>
+#ifdef __KERNEL__
+
extern int pdc_type;
/* Values for pdc_type */
diff --git a/arch/parisc/include/asm/swab.h b/arch/parisc/include/asm/swab.h
index 3ff16c5..e78403b 100644
--- a/arch/parisc/include/asm/swab.h
+++ b/arch/parisc/include/asm/swab.h
@@ -1,7 +1,7 @@
#ifndef _PARISC_SWAB_H
#define _PARISC_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#define __SWAB_64_THRU_32__
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index ac2c822..4948280 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -120,7 +120,7 @@
if (CHECK_IRQ_PER_CPU(irq)) {
/* Bad linux design decision. The mask has already
* been set; we must reset it */
- irq_desc[irq].affinity = CPU_MASK_ALL;
+ cpumask_setall(irq_desc[irq].affinity);
return -EINVAL;
}
@@ -136,7 +136,7 @@
if (cpu_check_affinity(irq, dest))
return;
- irq_desc[irq].affinity = *dest;
+ cpumask_copy(irq_desc[irq].affinity, dest);
}
#endif
@@ -295,7 +295,7 @@
unsigned long txn_affinity_addr(unsigned int irq, int cpu)
{
#ifdef CONFIG_SMP
- irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+ cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
#endif
return per_cpu(cpu_data, cpu).txn_addr;
@@ -352,7 +352,7 @@
irq = eirr_to_irq(eirr_val);
#ifdef CONFIG_SMP
- dest = irq_desc[irq].affinity;
+ cpumask_copy(&dest, irq_desc[irq].affinity);
if (CHECK_IRQ_PER_CPU(irq_desc[irq].status) &&
!cpu_isset(smp_processor_id(), dest)) {
int cpu = first_cpu(dest);
diff --git a/arch/powerpc/include/asm/bootx.h b/arch/powerpc/include/asm/bootx.h
index 57b82e3..60a3c9e 100644
--- a/arch/powerpc/include/asm/bootx.h
+++ b/arch/powerpc/include/asm/bootx.h
@@ -9,7 +9,7 @@
#ifndef __ASM_BOOTX_H__
#define __ASM_BOOTX_H__
-#include <asm/types.h>
+#include <linux/types.h>
#ifdef macintosh
#include <Types.h>
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index cd46f02..b5600ce 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -7,7 +7,7 @@
#include <asm/string.h>
#endif
-#include <asm/types.h>
+#include <linux/types.h>
#include <asm/ptrace.h>
#include <asm/cputable.h>
#include <asm/auxvec.h>
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index f75a5fc..e10f151 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -131,5 +131,36 @@
*/
struct hw_interrupt_type;
+#ifdef CONFIG_PERF_COUNTERS
+static inline unsigned long get_perf_counter_pending(void)
+{
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, perf_counter_pending)));
+ return x;
+}
+
+static inline void set_perf_counter_pending(int x)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (x),
+ "i" (offsetof(struct paca_struct, perf_counter_pending)));
+}
+
+extern void perf_counter_do_pending(void);
+
+#else
+
+static inline unsigned long get_perf_counter_pending(void)
+{
+ return 0;
+}
+
+static inline void set_perf_counter_pending(int x) {}
+static inline void perf_counter_do_pending(void) {}
+#endif /* CONFIG_PERF_COUNTERS */
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index f993e41..4e0cf65 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -20,7 +20,7 @@
#ifndef __LINUX_KVM_POWERPC_H
#define __LINUX_KVM_POWERPC_H
-#include <asm/types.h>
+#include <linux/types.h>
struct kvm_regs {
__u64 pc;
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 082b3ae..6ef0557 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -99,6 +99,7 @@
u8 soft_enabled; /* irq soft-enable flag */
u8 hard_enabled; /* set if irqs are enabled in MSR */
u8 io_sync; /* writel() needs spin_unlock sync */
+ u8 perf_counter_pending; /* PM interrupt while soft-disabled */
/* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
new file mode 100644
index 0000000..9d7ff6d
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -0,0 +1,72 @@
+/*
+ * Performance counter support - PowerPC-specific definitions.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+
+#define MAX_HWCOUNTERS 8
+#define MAX_EVENT_ALTERNATIVES 8
+
+/*
+ * This struct provides the constants and functions needed to
+ * describe the PMU on a particular POWER-family CPU.
+ */
+struct power_pmu {
+ int n_counter;
+ int max_alternatives;
+ u64 add_fields;
+ u64 test_adder;
+ int (*compute_mmcr)(unsigned int events[], int n_ev,
+ unsigned int hwc[], u64 mmcr[]);
+ int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp);
+ int (*get_alternatives)(unsigned int event, unsigned int alt[]);
+ void (*disable_pmc)(unsigned int pmc, u64 mmcr[]);
+ int n_generic;
+ int *generic_events;
+};
+
+extern struct power_pmu *ppmu;
+
+/*
+ * The power_pmu.get_constraint function returns a 64-bit value and
+ * a 64-bit mask that express the constraints between this event and
+ * other events.
+ *
+ * The value and mask are divided up into (non-overlapping) bitfields
+ * of three different types:
+ *
+ * Select field: this expresses the constraint that some set of bits
+ * in MMCR* needs to be set to a specific value for this event. For a
+ * select field, the mask contains 1s in every bit of the field, and
+ * the value contains a unique value for each possible setting of the
+ * MMCR* bits. The constraint checking code will ensure that two events
+ * that set the same field in their masks have the same value in their
+ * value dwords.
+ *
+ * Add field: this expresses the constraint that there can be at most
+ * N events in a particular class. A field of k bits can be used for
+ * N <= 2^(k-1) - 1. The mask has the most significant bit of the field
+ * set (and the other bits 0), and the value has only the least significant
+ * bit of the field set. In addition, the 'add_fields' and 'test_adder'
+ * in the struct power_pmu for this processor come into play. The
+ * add_fields value contains 1 in the LSB of the field, and the
+ * test_adder contains 2^(k-1) - 1 - N in the field.
+ *
+ * NAND field: this expresses the constraint that you may not have events
+ * in all of a set of classes. (For example, on PPC970, you can't select
+ * events from the FPU, ISU and IDU simultaneously, although any two are
+ * possible.) For N classes, the field is N+1 bits wide, and each class
+ * is assigned one bit from the least-significant N bits. The mask has
+ * only the most-significant bit set, and the value has only the bit
+ * for the event's class set. The test_adder has the least significant
+ * bit set in the field.
+ *
+ * If an event is not subject to the constraint expressed by a particular
+ * field, then it will have 0 in both the mask and value for that field.
+ */
diff --git a/arch/powerpc/include/asm/ps3fb.h b/arch/powerpc/include/asm/ps3fb.h
index 3f121fe..e7233a8 100644
--- a/arch/powerpc/include/asm/ps3fb.h
+++ b/arch/powerpc/include/asm/ps3fb.h
@@ -19,6 +19,7 @@
#ifndef _ASM_POWERPC_PS3FB_H_
#define _ASM_POWERPC_PS3FB_H_
+#include <linux/types.h>
#include <linux/ioctl.h>
/* ioctl */
diff --git a/arch/powerpc/include/asm/spu_info.h b/arch/powerpc/include/asm/spu_info.h
index 3545efb..1286c82 100644
--- a/arch/powerpc/include/asm/spu_info.h
+++ b/arch/powerpc/include/asm/spu_info.h
@@ -23,9 +23,10 @@
#ifndef _SPU_INFO_H
#define _SPU_INFO_H
+#include <linux/types.h>
+
#ifdef __KERNEL__
#include <asm/spu.h>
-#include <linux/types.h>
#else
struct mfc_cq_sr {
__u64 mfc_cq_data0_RW;
diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h
index ef824ae..c581e3e 100644
--- a/arch/powerpc/include/asm/swab.h
+++ b/arch/powerpc/include/asm/swab.h
@@ -8,7 +8,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#ifdef __GNUC__
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 72353f6..d312eec 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,3 +322,4 @@
SYSCALL_SPU(dup3)
SYSCALL_SPU(pipe2)
SYSCALL(inotify_init1)
+SYSCALL_SPU(perf_counter_open)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index e07d0c7..7cef5af 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,10 +341,11 @@
#define __NR_dup3 316
#define __NR_pipe2 317
#define __NR_inotify_init1 318
+#define __NR_perf_counter_open 319
#ifdef __KERNEL__
-#define __NR_syscalls 319
+#define __NR_syscalls 320
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 8d1a419..b4c6f46 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,6 +94,8 @@
obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o power5-pmu.o \
+ power6-pmu.o
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 19ee491..3734973 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -131,6 +131,7 @@
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
+ DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 383ed6e..f30b4e5 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -526,6 +526,15 @@
2:
TRACE_AND_RESTORE_IRQ(r5);
+#ifdef CONFIG_PERF_COUNTERS
+ /* check paca->perf_counter_pending if we're enabling ints */
+ lbz r3,PACAPERFPEND(r13)
+ and. r3,r3,r5
+ beq 27f
+ bl .perf_counter_do_pending
+27:
+#endif /* CONFIG_PERF_COUNTERS */
+
/* extract EE bit and use it to restore paca->hard_enabled */
ld r3,_MSR(r1)
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 23b8b5e..7f8e6a9 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -104,6 +104,13 @@
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
}
+#ifdef CONFIG_PERF_COUNTERS
+notrace void __weak perf_counter_do_pending(void)
+{
+ set_perf_counter_pending(0);
+}
+#endif
+
notrace void raw_local_irq_restore(unsigned long en)
{
/*
@@ -135,6 +142,9 @@
iseries_handle_interrupts();
}
+ if (get_perf_counter_pending())
+ perf_counter_do_pending();
+
/*
* if (get_paca()->hard_enabled) return;
* But again we need to take care that gcc gets hard_enabled directly
@@ -231,7 +241,7 @@
if (irq_desc[irq].status & IRQ_PER_CPU)
continue;
- cpus_and(mask, irq_desc[irq].affinity, map);
+ cpumask_and(&mask, irq_desc[irq].affinity, &map);
if (any_online_cpu(mask) == NR_CPUS) {
printk("Breaking affinity for irq %i\n", irq);
mask = map;
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
new file mode 100644
index 0000000..112332d
--- /dev/null
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -0,0 +1,864 @@
+/*
+ * Performance counter support - powerpc architecture code
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_counter.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/reg.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+struct cpu_hw_counters {
+ int n_counters;
+ int n_percpu;
+ int disabled;
+ int n_added;
+ struct perf_counter *counter[MAX_HWCOUNTERS];
+ unsigned int events[MAX_HWCOUNTERS];
+ u64 mmcr[3];
+ u8 pmcs_enabled;
+};
+DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
+
+struct power_pmu *ppmu;
+
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze counters
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_counters_kernel = MMCR0_FCS;
+
+void perf_counter_print_debug(void)
+{
+}
+
+/*
+ * Read one performance monitor counter (PMC).
+ */
+static unsigned long read_pmc(int idx)
+{
+ unsigned long val;
+
+ switch (idx) {
+ case 1:
+ val = mfspr(SPRN_PMC1);
+ break;
+ case 2:
+ val = mfspr(SPRN_PMC2);
+ break;
+ case 3:
+ val = mfspr(SPRN_PMC3);
+ break;
+ case 4:
+ val = mfspr(SPRN_PMC4);
+ break;
+ case 5:
+ val = mfspr(SPRN_PMC5);
+ break;
+ case 6:
+ val = mfspr(SPRN_PMC6);
+ break;
+ case 7:
+ val = mfspr(SPRN_PMC7);
+ break;
+ case 8:
+ val = mfspr(SPRN_PMC8);
+ break;
+ default:
+ printk(KERN_ERR "oops trying to read PMC%d\n", idx);
+ val = 0;
+ }
+ return val;
+}
+
+/*
+ * Write one PMC.
+ */
+static void write_pmc(int idx, unsigned long val)
+{
+ switch (idx) {
+ case 1:
+ mtspr(SPRN_PMC1, val);
+ break;
+ case 2:
+ mtspr(SPRN_PMC2, val);
+ break;
+ case 3:
+ mtspr(SPRN_PMC3, val);
+ break;
+ case 4:
+ mtspr(SPRN_PMC4, val);
+ break;
+ case 5:
+ mtspr(SPRN_PMC5, val);
+ break;
+ case 6:
+ mtspr(SPRN_PMC6, val);
+ break;
+ case 7:
+ mtspr(SPRN_PMC7, val);
+ break;
+ case 8:
+ mtspr(SPRN_PMC8, val);
+ break;
+ default:
+ printk(KERN_ERR "oops trying to write PMC%d\n", idx);
+ }
+}
+
+/*
+ * Check if a set of events can all go on the PMU at once.
+ * If they can't, this will look at alternative codes for the events
+ * and see if any combination of alternative codes is feasible.
+ * The feasible set is returned in event[].
+ */
+static int power_check_constraints(unsigned int event[], int n_ev)
+{
+ u64 mask, value, nv;
+ unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+ u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+ u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+ u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
+ int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
+ int i, j;
+ u64 addf = ppmu->add_fields;
+ u64 tadd = ppmu->test_adder;
+
+ if (n_ev > ppmu->n_counter)
+ return -1;
+
+ /* First see if the events will go on as-is */
+ for (i = 0; i < n_ev; ++i) {
+ alternatives[i][0] = event[i];
+ if (ppmu->get_constraint(event[i], &amasks[i][0],
+ &avalues[i][0]))
+ return -1;
+ choice[i] = 0;
+ }
+ value = mask = 0;
+ for (i = 0; i < n_ev; ++i) {
+ nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
+ if ((((nv + tadd) ^ value) & mask) != 0 ||
+ (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
+ break;
+ value = nv;
+ mask |= amasks[i][0];
+ }
+ if (i == n_ev)
+ return 0; /* all OK */
+
+ /* doesn't work, gather alternatives... */
+ if (!ppmu->get_alternatives)
+ return -1;
+ for (i = 0; i < n_ev; ++i) {
+ n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
+ for (j = 1; j < n_alt[i]; ++j)
+ ppmu->get_constraint(alternatives[i][j],
+ &amasks[i][j], &avalues[i][j]);
+ }
+
+ /* enumerate all possibilities and see if any will work */
+ i = 0;
+ j = -1;
+ value = mask = nv = 0;
+ while (i < n_ev) {
+ if (j >= 0) {
+ /* we're backtracking, restore context */
+ value = svalues[i];
+ mask = smasks[i];
+ j = choice[i];
+ }
+ /*
+ * See if any alternative k for event i,
+ * where k > j, will satisfy the constraints.
+ */
+ while (++j < n_alt[i]) {
+ nv = (value | avalues[i][j]) +
+ (value & avalues[i][j] & addf);
+ if ((((nv + tadd) ^ value) & mask) == 0 &&
+ (((nv + tadd) ^ avalues[i][j])
+ & amasks[i][j]) == 0)
+ break;
+ }
+ if (j >= n_alt[i]) {
+ /*
+ * No feasible alternative, backtrack
+ * to event i-1 and continue enumerating its
+ * alternatives from where we got up to.
+ */
+ if (--i < 0)
+ return -1;
+ } else {
+ /*
+ * Found a feasible alternative for event i,
+ * remember where we got up to with this event,
+ * go on to the next event, and start with
+ * the first alternative for it.
+ */
+ choice[i] = j;
+ svalues[i] = value;
+ smasks[i] = mask;
+ value = nv;
+ mask |= amasks[i][j];
+ ++i;
+ j = -1;
+ }
+ }
+
+ /* OK, we have a feasible combination, tell the caller the solution */
+ for (i = 0; i < n_ev; ++i)
+ event[i] = alternatives[i][choice[i]];
+ return 0;
+}
+
+/*
+ * Check if newly-added counters have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added counters.
+ */
+static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
+{
+ int eu, ek, eh;
+ int i, n;
+ struct perf_counter *counter;
+
+ n = n_prev + n_new;
+ if (n <= 1)
+ return 0;
+
+ eu = ctrs[0]->hw_event.exclude_user;
+ ek = ctrs[0]->hw_event.exclude_kernel;
+ eh = ctrs[0]->hw_event.exclude_hv;
+ if (n_prev == 0)
+ n_prev = 1;
+ for (i = n_prev; i < n; ++i) {
+ counter = ctrs[i];
+ if (counter->hw_event.exclude_user != eu ||
+ counter->hw_event.exclude_kernel != ek ||
+ counter->hw_event.exclude_hv != eh)
+ return -EAGAIN;
+ }
+ return 0;
+}
+
+static void power_perf_read(struct perf_counter *counter)
+{
+ long val, delta, prev;
+
+ if (!counter->hw.idx)
+ return;
+ /*
+ * Performance monitor interrupts come even when interrupts
+ * are soft-disabled, as long as interrupts are hard-enabled.
+ * Therefore we treat them like NMIs.
+ */
+ do {
+ prev = atomic64_read(&counter->hw.prev_count);
+ barrier();
+ val = read_pmc(counter->hw.idx);
+ } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
+
+ /* The counters are only 32 bits wide */
+ delta = (val - prev) & 0xfffffffful;
+ atomic64_add(delta, &counter->count);
+ atomic64_sub(delta, &counter->hw.period_left);
+}
+
+/*
+ * Disable all counters to prevent PMU interrupts and to allow
+ * counters to be added or removed.
+ */
+u64 hw_perf_save_disable(void)
+{
+ struct cpu_hw_counters *cpuhw;
+ unsigned long ret;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+
+ ret = cpuhw->disabled;
+ if (!ret) {
+ cpuhw->disabled = 1;
+ cpuhw->n_added = 0;
+
+ /*
+ * Check if we ever enabled the PMU on this cpu.
+ */
+ if (!cpuhw->pmcs_enabled) {
+ if (ppc_md.enable_pmcs)
+ ppc_md.enable_pmcs();
+ cpuhw->pmcs_enabled = 1;
+ }
+
+ /*
+ * Set the 'freeze counters' bit.
+ * The barrier is to make sure the mtspr has been
+ * executed and the PMU has frozen the counters
+ * before we return.
+ */
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+ mb();
+ }
+ local_irq_restore(flags);
+ return ret;
+}
+
+/*
+ * Re-enable all counters if disable == 0.
+ * If we were previously disabled and counters were added, then
+ * put the new config on the PMU.
+ */
+void hw_perf_restore(u64 disable)
+{
+ struct perf_counter *counter;
+ struct cpu_hw_counters *cpuhw;
+ unsigned long flags;
+ long i;
+ unsigned long val;
+ s64 left;
+ unsigned int hwc_index[MAX_HWCOUNTERS];
+
+ if (disable)
+ return;
+ local_irq_save(flags);
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+ cpuhw->disabled = 0;
+
+ /*
+ * If we didn't change anything, or only removed counters,
+ * no need to recalculate MMCR* settings and reset the PMCs.
+ * Just reenable the PMU with the current MMCR* settings
+ * (possibly updated for removal of counters).
+ */
+ if (!cpuhw->n_added) {
+ mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+ mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
+ if (cpuhw->n_counters == 0)
+ get_lppaca()->pmcregs_in_use = 0;
+ goto out;
+ }
+
+ /*
+ * Compute MMCR* values for the new set of counters
+ */
+ if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
+ cpuhw->mmcr)) {
+ /* shouldn't ever get here */
+ printk(KERN_ERR "oops compute_mmcr failed\n");
+ goto out;
+ }
+
+ /*
+ * Add in MMCR0 freeze bits corresponding to the
+ * hw_event.exclude_* bits for the first counter.
+ * We have already checked that all counters have the
+ * same values for these bits as the first counter.
+ */
+ counter = cpuhw->counter[0];
+ if (counter->hw_event.exclude_user)
+ cpuhw->mmcr[0] |= MMCR0_FCP;
+ if (counter->hw_event.exclude_kernel)
+ cpuhw->mmcr[0] |= freeze_counters_kernel;
+ if (counter->hw_event.exclude_hv)
+ cpuhw->mmcr[0] |= MMCR0_FCHV;
+
+ /*
+ * Write the new configuration to MMCR* with the freeze
+ * bit set and set the hardware counters to their initial values.
+ * Then unfreeze the counters.
+ */
+ get_lppaca()->pmcregs_in_use = 1;
+ mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+ mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+ | MMCR0_FC);
+
+ /*
+ * Read off any pre-existing counters that need to move
+ * to another PMC.
+ */
+ for (i = 0; i < cpuhw->n_counters; ++i) {
+ counter = cpuhw->counter[i];
+ if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
+ power_perf_read(counter);
+ write_pmc(counter->hw.idx, 0);
+ counter->hw.idx = 0;
+ }
+ }
+
+ /*
+ * Initialize the PMCs for all the new and moved counters.
+ */
+ for (i = 0; i < cpuhw->n_counters; ++i) {
+ counter = cpuhw->counter[i];
+ if (counter->hw.idx)
+ continue;
+ val = 0;
+ if (counter->hw_event.irq_period) {
+ left = atomic64_read(&counter->hw.period_left);
+ if (left < 0x80000000L)
+ val = 0x80000000L - left;
+ }
+ atomic64_set(&counter->hw.prev_count, val);
+ counter->hw.idx = hwc_index[i] + 1;
+ write_pmc(counter->hw.idx, val);
+ }
+ mb();
+ cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+ mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
+
+ out:
+ local_irq_restore(flags);
+}
+
+static int collect_events(struct perf_counter *group, int max_count,
+ struct perf_counter *ctrs[], unsigned int *events)
+{
+ int n = 0;
+ struct perf_counter *counter;
+
+ if (!is_software_counter(group)) {
+ if (n >= max_count)
+ return -1;
+ ctrs[n] = group;
+ events[n++] = group->hw.config;
+ }
+ list_for_each_entry(counter, &group->sibling_list, list_entry) {
+ if (!is_software_counter(counter) &&
+ counter->state != PERF_COUNTER_STATE_OFF) {
+ if (n >= max_count)
+ return -1;
+ ctrs[n] = counter;
+ events[n++] = counter->hw.config;
+ }
+ }
+ return n;
+}
+
+static void counter_sched_in(struct perf_counter *counter, int cpu)
+{
+ counter->state = PERF_COUNTER_STATE_ACTIVE;
+ counter->oncpu = cpu;
+ if (is_software_counter(counter))
+ counter->hw_ops->enable(counter);
+}
+
+/*
+ * Called to enable a whole group of counters.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ */
+int hw_perf_group_sched_in(struct perf_counter *group_leader,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx, int cpu)
+{
+ struct cpu_hw_counters *cpuhw;
+ long i, n, n0;
+ struct perf_counter *sub;
+
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+ n0 = cpuhw->n_counters;
+ n = collect_events(group_leader, ppmu->n_counter - n0,
+ &cpuhw->counter[n0], &cpuhw->events[n0]);
+ if (n < 0)
+ return -EAGAIN;
+ if (check_excludes(cpuhw->counter, n0, n))
+ return -EAGAIN;
+ if (power_check_constraints(cpuhw->events, n + n0))
+ return -EAGAIN;
+ cpuhw->n_counters = n0 + n;
+ cpuhw->n_added += n;
+
+ /*
+ * OK, this group can go on; update counter states etc.,
+ * and enable any software counters
+ */
+ for (i = n0; i < n0 + n; ++i)
+ cpuhw->counter[i]->hw.config = cpuhw->events[i];
+ cpuctx->active_oncpu += n;
+ n = 1;
+ counter_sched_in(group_leader, cpu);
+ list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
+ if (sub->state != PERF_COUNTER_STATE_OFF) {
+ counter_sched_in(sub, cpu);
+ ++n;
+ }
+ }
+ ctx->nr_active += n;
+
+ return 1;
+}
+
+/*
+ * Add a counter to the PMU.
+ * If all counters are not already frozen, then we disable and
+ * re-enable the PMU in order to get hw_perf_restore to do the
+ * actual work of reconfiguring the PMU.
+ */
+static int power_perf_enable(struct perf_counter *counter)
+{
+ struct cpu_hw_counters *cpuhw;
+ unsigned long flags;
+ u64 pmudis;
+ int n0;
+ int ret = -EAGAIN;
+
+ local_irq_save(flags);
+ pmudis = hw_perf_save_disable();
+
+ /*
+ * Add the counter to the list (if there is room)
+ * and check whether the total set is still feasible.
+ */
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+ n0 = cpuhw->n_counters;
+ if (n0 >= ppmu->n_counter)
+ goto out;
+ cpuhw->counter[n0] = counter;
+ cpuhw->events[n0] = counter->hw.config;
+ if (check_excludes(cpuhw->counter, n0, 1))
+ goto out;
+ if (power_check_constraints(cpuhw->events, n0 + 1))
+ goto out;
+
+ counter->hw.config = cpuhw->events[n0];
+ ++cpuhw->n_counters;
+ ++cpuhw->n_added;
+
+ ret = 0;
+ out:
+ hw_perf_restore(pmudis);
+ local_irq_restore(flags);
+ return ret;
+}
+
+/*
+ * Remove a counter from the PMU.
+ */
+static void power_perf_disable(struct perf_counter *counter)
+{
+ struct cpu_hw_counters *cpuhw;
+ long i;
+ u64 pmudis;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ pmudis = hw_perf_save_disable();
+
+ power_perf_read(counter);
+
+ cpuhw = &__get_cpu_var(cpu_hw_counters);
+ for (i = 0; i < cpuhw->n_counters; ++i) {
+ if (counter == cpuhw->counter[i]) {
+ while (++i < cpuhw->n_counters)
+ cpuhw->counter[i-1] = cpuhw->counter[i];
+ --cpuhw->n_counters;
+ ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
+ write_pmc(counter->hw.idx, 0);
+ counter->hw.idx = 0;
+ break;
+ }
+ }
+ if (cpuhw->n_counters == 0) {
+ /* disable exceptions if no counters are running */
+ cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+ }
+
+ hw_perf_restore(pmudis);
+ local_irq_restore(flags);
+}
+
+struct hw_perf_counter_ops power_perf_ops = {
+ .enable = power_perf_enable,
+ .disable = power_perf_disable,
+ .read = power_perf_read
+};
+
+const struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter)
+{
+ unsigned long ev;
+ struct perf_counter *ctrs[MAX_HWCOUNTERS];
+ unsigned int events[MAX_HWCOUNTERS];
+ int n;
+
+ if (!ppmu)
+ return NULL;
+ if ((s64)counter->hw_event.irq_period < 0)
+ return NULL;
+ ev = counter->hw_event.type;
+ if (!counter->hw_event.raw) {
+ if (ev >= ppmu->n_generic ||
+ ppmu->generic_events[ev] == 0)
+ return NULL;
+ ev = ppmu->generic_events[ev];
+ }
+ counter->hw.config_base = ev;
+ counter->hw.idx = 0;
+
+ /*
+ * If we are not running on a hypervisor, force the
+ * exclude_hv bit to 0 so that we don't care what
+ * the user set it to.
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ counter->hw_event.exclude_hv = 0;
+
+ /*
+ * If this is in a group, check if it can go on with all the
+ * other hardware counters in the group. We assume the counter
+ * hasn't been linked into its leader's sibling list at this point.
+ */
+ n = 0;
+ if (counter->group_leader != counter) {
+ n = collect_events(counter->group_leader, ppmu->n_counter - 1,
+ ctrs, events);
+ if (n < 0)
+ return NULL;
+ }
+ events[n] = ev;
+ if (check_excludes(ctrs, n, 1))
+ return NULL;
+ if (power_check_constraints(events, n + 1))
+ return NULL;
+
+ counter->hw.config = events[n];
+ atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+ return &power_perf_ops;
+}
+
+/*
+ * Handle wakeups.
+ */
+void perf_counter_do_pending(void)
+{
+ int i;
+ struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
+ struct perf_counter *counter;
+
+ set_perf_counter_pending(0);
+ for (i = 0; i < cpuhw->n_counters; ++i) {
+ counter = cpuhw->counter[i];
+ if (counter && counter->wakeup_pending) {
+ counter->wakeup_pending = 0;
+ wake_up(&counter->waitq);
+ }
+ }
+}
+
+/*
+ * Record data for an irq counter.
+ * This function was lifted from the x86 code; maybe it should
+ * go in the core?
+ */
+static void perf_store_irq_data(struct perf_counter *counter, u64 data)
+{
+ struct perf_data *irqdata = counter->irqdata;
+
+ if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
+ irqdata->overrun++;
+ } else {
+ u64 *p = (u64 *) &irqdata->data[irqdata->len];
+
+ *p = data;
+ irqdata->len += sizeof(u64);
+ }
+}
+
+/*
+ * Record all the values of the counters in a group
+ */
+static void perf_handle_group(struct perf_counter *counter)
+{
+ struct perf_counter *leader, *sub;
+
+ leader = counter->group_leader;
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ if (sub != counter)
+ sub->hw_ops->read(sub);
+ perf_store_irq_data(counter, sub->hw_event.type);
+ perf_store_irq_data(counter, atomic64_read(&sub->count));
+ }
+}
+
+/*
+ * A counter has overflowed; update its count and record
+ * things if requested. Note that interrupts are hard-disabled
+ * here so there is no possibility of being interrupted.
+ */
+static void record_and_restart(struct perf_counter *counter, long val,
+ struct pt_regs *regs)
+{
+ s64 prev, delta, left;
+ int record = 0;
+
+ /* we don't have to worry about interrupts here */
+ prev = atomic64_read(&counter->hw.prev_count);
+ delta = (val - prev) & 0xfffffffful;
+ atomic64_add(delta, &counter->count);
+
+ /*
+ * See if the total period for this counter has expired,
+ * and update for the next period.
+ */
+ val = 0;
+ left = atomic64_read(&counter->hw.period_left) - delta;
+ if (counter->hw_event.irq_period) {
+ if (left <= 0) {
+ left += counter->hw_event.irq_period;
+ if (left <= 0)
+ left = counter->hw_event.irq_period;
+ record = 1;
+ }
+ if (left < 0x80000000L)
+ val = 0x80000000L - left;
+ }
+ write_pmc(counter->hw.idx, val);
+ atomic64_set(&counter->hw.prev_count, val);
+ atomic64_set(&counter->hw.period_left, left);
+
+ /*
+ * Finally record data if requested.
+ */
+ if (record) {
+ switch (counter->hw_event.record_type) {
+ case PERF_RECORD_SIMPLE:
+ break;
+ case PERF_RECORD_IRQ:
+ perf_store_irq_data(counter, instruction_pointer(regs));
+ counter->wakeup_pending = 1;
+ break;
+ case PERF_RECORD_GROUP:
+ perf_handle_group(counter);
+ counter->wakeup_pending = 1;
+ break;
+ }
+ }
+}
+
+/*
+ * Performance monitor interrupt stuff
+ */
+static void perf_counter_interrupt(struct pt_regs *regs)
+{
+ int i;
+ struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
+ struct perf_counter *counter;
+ long val;
+ int need_wakeup = 0, found = 0;
+
+ for (i = 0; i < cpuhw->n_counters; ++i) {
+ counter = cpuhw->counter[i];
+ val = read_pmc(counter->hw.idx);
+ if ((int)val < 0) {
+ /* counter has overflowed */
+ found = 1;
+ record_and_restart(counter, val, regs);
+ if (counter->wakeup_pending)
+ need_wakeup = 1;
+ }
+ }
+
+ /*
+ * In case we didn't find and reset the counter that caused
+ * the interrupt, scan all counters and reset any that are
+ * negative, to avoid getting continual interrupts.
+ * Any that we processed in the previous loop will not be negative.
+ */
+ if (!found) {
+ for (i = 0; i < ppmu->n_counter; ++i) {
+ val = read_pmc(i + 1);
+ if ((int)val < 0)
+ write_pmc(i + 1, 0);
+ }
+ }
+
+ /*
+ * Reset MMCR0 to its normal value. This will set PMXE and
+ * clear FC (freeze counters) and PMAO (perf mon alert occurred)
+ * and thus allow interrupts to occur again.
+ * XXX might want to use MSR.PM to keep the counters frozen until
+ * we get back out of this interrupt.
+ */
+ mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
+
+ /*
+ * If we need a wakeup, check whether interrupts were soft-enabled
+ * when we took the interrupt. If they were, we can wake stuff up
+ * immediately; otherwise we'll have to set a flag and do the
+ * wakeup when interrupts get soft-enabled.
+ */
+ if (need_wakeup) {
+ if (regs->softe) {
+ irq_enter();
+ perf_counter_do_pending();
+ irq_exit();
+ } else {
+ set_perf_counter_pending(1);
+ }
+ }
+}
+
+void hw_perf_counter_setup(int cpu)
+{
+ struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
+
+ memset(cpuhw, 0, sizeof(*cpuhw));
+ cpuhw->mmcr[0] = MMCR0_FC;
+}
+
+extern struct power_pmu ppc970_pmu;
+extern struct power_pmu power5_pmu;
+extern struct power_pmu power6_pmu;
+
+static int init_perf_counters(void)
+{
+ unsigned long pvr;
+
+ if (reserve_pmc_hardware(perf_counter_interrupt)) {
+ printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
+ return -EBUSY;
+ }
+
+ /* XXX should get this from cputable */
+ pvr = mfspr(SPRN_PVR);
+ switch (PVR_VER(pvr)) {
+ case PV_970:
+ case PV_970FX:
+ case PV_970MP:
+ ppmu = &ppc970_pmu;
+ break;
+ case PV_POWER5:
+ ppmu = &power5_pmu;
+ break;
+ case 0x3e:
+ ppmu = &power6_pmu;
+ break;
+ }
+
+ /*
+ * Use FCHV to ignore kernel events if MSR.HV is set.
+ */
+ if (mfmsr() & MSR_HV)
+ freeze_counters_kernel = MMCR0_FCHV;
+
+ return 0;
+}
+
+arch_initcall(init_perf_counters);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
new file mode 100644
index 0000000..379ed10
--- /dev/null
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -0,0 +1,475 @@
+/*
+ * Performance counter support for POWER5 (not POWER5++) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER5 (not POWER5++)
+ */
+#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
+#define PM_PMC_MSK 0xf
+#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK 0xf
+#define PM_BYTE_SH 12 /* Byte number of event bus to use */
+#define PM_BYTE_MSK 7
+#define PM_GRS_SH 8 /* Storage subsystem mux select */
+#define PM_GRS_MSK 7
+#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
+#define PM_PMCSEL_MSK 0x7f
+
+/* Values in PM_UNIT field */
+#define PM_FPU 0
+#define PM_ISU0 1
+#define PM_IFU 2
+#define PM_ISU1 3
+#define PM_IDU 4
+#define PM_ISU0_ALT 6
+#define PM_GRS 7
+#define PM_LSU0 8
+#define PM_LSU1 0xc
+#define PM_LASTUNIT 0xc
+
+/*
+ * Bits in MMCR1 for POWER5
+ */
+#define MMCR1_TTM0SEL_SH 62
+#define MMCR1_TTM1SEL_SH 60
+#define MMCR1_TTM2SEL_SH 58
+#define MMCR1_TTM3SEL_SH 56
+#define MMCR1_TTMSEL_MSK 3
+#define MMCR1_TD_CP_DBG0SEL_SH 54
+#define MMCR1_TD_CP_DBG1SEL_SH 52
+#define MMCR1_TD_CP_DBG2SEL_SH 50
+#define MMCR1_TD_CP_DBG3SEL_SH 48
+#define MMCR1_GRS_L2SEL_SH 46
+#define MMCR1_GRS_L2SEL_MSK 3
+#define MMCR1_GRS_L3SEL_SH 44
+#define MMCR1_GRS_L3SEL_MSK 3
+#define MMCR1_GRS_MCSEL_SH 41
+#define MMCR1_GRS_MCSEL_MSK 7
+#define MMCR1_GRS_FABSEL_SH 39
+#define MMCR1_GRS_FABSEL_MSK 3
+#define MMCR1_PMC1_ADDER_SEL_SH 35
+#define MMCR1_PMC2_ADDER_SEL_SH 34
+#define MMCR1_PMC3_ADDER_SEL_SH 33
+#define MMCR1_PMC4_ADDER_SEL_SH 32
+#define MMCR1_PMC1SEL_SH 25
+#define MMCR1_PMC2SEL_SH 17
+#define MMCR1_PMC3SEL_SH 9
+#define MMCR1_PMC4SEL_SH 1
+#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK 0x7f
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
+ * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
+ *
+ * T0 - TTM0 constraint
+ * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
+ *
+ * NC - number of counters
+ * 51: NC error 0x0008_0000_0000_0000
+ * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
+ *
+ * G0..G3 - GRS mux constraints
+ * 46-47: GRS_L2SEL value
+ * 44-45: GRS_L3SEL value
+ * 41-44: GRS_MCSEL value
+ * 39-40: GRS_FABSEL value
+ * Note that these match up with their bit positions in MMCR1
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
+ * 37: UC3 error 0x20_0000_0000
+ * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
+ * 35: ISU0 events needed 0x08_0000_0000
+ * 34: IDU|GRS events needed 0x04_0000_0000
+ *
+ * PS1
+ * 33: PS1 error 0x2_0000_0000
+ * 31-32: count of events needing PMC1/2 0x1_8000_0000
+ *
+ * PS2
+ * 30: PS2 error 0x4000_0000
+ * 28-29: count of events needing PMC3/4 0x3000_0000
+ *
+ * B0
+ * 24-27: Byte 0 event source 0x0f00_0000
+ * Encoding as for the event code
+ *
+ * B1, B2, B3
+ * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
+ *
+ * P1..P6
+ * 0-11: Count of events needing PMC1..PMC6
+ */
+
+static const int grsel_shift[8] = {
+ MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
+ MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
+ MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
+};
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+ [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull },
+ [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull },
+ [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull },
+ [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull },
+ [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull },
+ [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull },
+};
+
+static int power5_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+{
+ int pmc, byte, unit, sh;
+ int bit, fmask;
+ u64 mask = 0, value = 0;
+ int grp = -1;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 6)
+ return -1;
+ sh = (pmc - 1) * 2;
+ mask |= 2 << sh;
+ value |= 1 << sh;
+ if (pmc <= 4)
+ grp = (pmc - 1) >> 1;
+ else if (event != 0x500009 && event != 0x600005)
+ return -1;
+ }
+ if (event & PM_BUSEVENT_MSK) {
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ if (unit > PM_LASTUNIT)
+ return -1;
+ if (unit == PM_ISU0_ALT)
+ unit = PM_ISU0;
+ mask |= unit_cons[unit][0];
+ value |= unit_cons[unit][1];
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ if (byte >= 4) {
+ if (unit != PM_LSU1)
+ return -1;
+ /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
+ ++unit;
+ byte &= 3;
+ }
+ if (unit == PM_GRS) {
+ bit = event & 7;
+ fmask = (bit == 6)? 7: 3;
+ sh = grsel_shift[bit];
+ mask |= (u64)fmask << sh;
+ value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
+ }
+ /*
+ * Bus events on bytes 0 and 2 can be counted
+ * on PMC1/2; bytes 1 and 3 on PMC3/4.
+ */
+ if (!pmc)
+ grp = byte & 1;
+ /* Set byte lane select field */
+ mask |= 0xfULL << (24 - 4 * byte);
+ value |= (u64)unit << (24 - 4 * byte);
+ }
+ if (grp == 0) {
+ /* increment PMC1/2 field */
+ mask |= 0x200000000ull;
+ value |= 0x080000000ull;
+ } else if (grp == 1) {
+ /* increment PMC3/4 field */
+ mask |= 0x40000000ull;
+ value |= 0x10000000ull;
+ }
+ if (pmc < 5) {
+ /* need a counter from PMC1-4 set */
+ mask |= 0x8000000000000ull;
+ value |= 0x1000000000000ull;
+ }
+ *maskp = mask;
+ *valp = value;
+ return 0;
+}
+
+#define MAX_ALT 3 /* at most 3 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+ { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
+ { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
+ { 0x100005, 0x600005 }, /* PM_RUN_CYC */
+ { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
+ { 0x300009, 0x400009 }, /* PM_INST_DISP */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(unsigned int event)
+{
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+ if (event < event_alternatives[i][0])
+ break;
+ for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+ if (event == event_alternatives[i][j])
+ return i;
+ }
+ return -1;
+}
+
+static const unsigned char bytedecode_alternatives[4][4] = {
+ /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
+ /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
+ /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
+ /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
+};
+
+/*
+ * Some direct events for decodes of event bus byte 3 have alternative
+ * PMCSEL values on other counters. This returns the alternative
+ * event code for those that do, or -1 otherwise.
+ */
+static int find_alternative_bdecode(unsigned int event)
+{
+ int pmc, altpmc, pp, j;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc == 0 || pmc > 4)
+ return -1;
+ altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
+ pp = event & PM_PMCSEL_MSK;
+ for (j = 0; j < 4; ++j) {
+ if (bytedecode_alternatives[pmc - 1][j] == pp) {
+ return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
+ (altpmc << PM_PMC_SH) |
+ bytedecode_alternatives[altpmc - 1][j];
+ }
+ }
+ return -1;
+}
+
+static int power5_get_alternatives(unsigned int event, unsigned int alt[])
+{
+ int i, j, ae, nalt = 1;
+
+ alt[0] = event;
+ nalt = 1;
+ i = find_alternative(event);
+ if (i >= 0) {
+ for (j = 0; j < MAX_ALT; ++j) {
+ ae = event_alternatives[i][j];
+ if (ae && ae != event)
+ alt[nalt++] = ae;
+ }
+ } else {
+ ae = find_alternative_bdecode(event);
+ if (ae > 0)
+ alt[nalt++] = ae;
+ }
+ return nalt;
+}
+
+static int power5_compute_mmcr(unsigned int event[], int n_ev,
+ unsigned int hwc[], u64 mmcr[])
+{
+ u64 mmcr1 = 0;
+ unsigned int pmc, unit, byte, psel;
+ unsigned int ttm, grp;
+ int i, isbus, bit, grsel;
+ unsigned int pmc_inuse = 0;
+ unsigned int pmc_grp_use[2];
+ unsigned char busbyte[4];
+ unsigned char unituse[16];
+ int ttmuse;
+
+ if (n_ev > 6)
+ return -1;
+
+ /* First pass to count resource use */
+ pmc_grp_use[0] = pmc_grp_use[1] = 0;
+ memset(busbyte, 0, sizeof(busbyte));
+ memset(unituse, 0, sizeof(unituse));
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 6)
+ return -1;
+ if (pmc_inuse & (1 << (pmc - 1)))
+ return -1;
+ pmc_inuse |= 1 << (pmc - 1);
+ /* count 1/2 vs 3/4 use */
+ if (pmc <= 4)
+ ++pmc_grp_use[(pmc - 1) >> 1];
+ }
+ if (event[i] & PM_BUSEVENT_MSK) {
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ if (unit > PM_LASTUNIT)
+ return -1;
+ if (unit == PM_ISU0_ALT)
+ unit = PM_ISU0;
+ if (byte >= 4) {
+ if (unit != PM_LSU1)
+ return -1;
+ ++unit;
+ byte &= 3;
+ }
+ if (!pmc)
+ ++pmc_grp_use[byte & 1];
+ if (busbyte[byte] && busbyte[byte] != unit)
+ return -1;
+ busbyte[byte] = unit;
+ unituse[unit] = 1;
+ }
+ }
+ if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
+ return -1;
+
+ /*
+ * Assign resources and set multiplexer selects.
+ *
+ * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
+ * choice we have to deal with.
+ */
+ if (unituse[PM_ISU0] &
+ (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
+ unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
+ unituse[PM_ISU0] = 0;
+ }
+ /* Set TTM[01]SEL fields. */
+ ttmuse = 0;
+ for (i = PM_FPU; i <= PM_ISU1; ++i) {
+ if (!unituse[i])
+ continue;
+ if (ttmuse++)
+ return -1;
+ mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
+ }
+ ttmuse = 0;
+ for (; i <= PM_GRS; ++i) {
+ if (!unituse[i])
+ continue;
+ if (ttmuse++)
+ return -1;
+ mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
+ }
+ if (ttmuse > 1)
+ return -1;
+
+ /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
+ for (byte = 0; byte < 4; ++byte) {
+ unit = busbyte[byte];
+ if (!unit)
+ continue;
+ if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
+ /* get ISU0 through TTM1 rather than TTM0 */
+ unit = PM_ISU0_ALT;
+ } else if (unit == PM_LSU1 + 1) {
+ /* select lower word of LSU1 for this byte */
+ mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+ }
+ ttm = unit >> 2;
+ mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+ }
+
+ /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ psel = event[i] & PM_PMCSEL_MSK;
+ isbus = event[i] & PM_BUSEVENT_MSK;
+ if (!pmc) {
+ /* Bus event or any-PMC direct event */
+ for (pmc = 0; pmc < 4; ++pmc) {
+ if (pmc_inuse & (1 << pmc))
+ continue;
+ grp = (pmc >> 1) & 1;
+ if (isbus) {
+ if (grp == (byte & 1))
+ break;
+ } else if (pmc_grp_use[grp] < 2) {
+ ++pmc_grp_use[grp];
+ break;
+ }
+ }
+ pmc_inuse |= 1 << pmc;
+ } else if (pmc <= 4) {
+ /* Direct event */
+ --pmc;
+ if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
+ /* add events on higher-numbered bus */
+ mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+ } else {
+ /* Instructions or run cycles on PMC5/6 */
+ --pmc;
+ }
+ if (isbus && unit == PM_GRS) {
+ bit = psel & 7;
+ grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
+ mmcr1 |= (u64)grsel << grsel_shift[bit];
+ }
+ if (pmc <= 3)
+ mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+ hwc[i] = pmc;
+ }
+
+ /* Return MMCRx values */
+ mmcr[0] = 0;
+ if (pmc_inuse & 1)
+ mmcr[0] = MMCR0_PMC1CE;
+ if (pmc_inuse & 0x3e)
+ mmcr[0] |= MMCR0_PMCjCE;
+ mmcr[1] = mmcr1;
+ mmcr[2] = 0;
+ return 0;
+}
+
+static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+ if (pmc <= 3)
+ mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power5_generic_events[] = {
+ [PERF_COUNT_CPU_CYCLES] = 0xf,
+ [PERF_COUNT_INSTRUCTIONS] = 0x100009,
+ [PERF_COUNT_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
+ [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
+ [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
+ [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
+};
+
+struct power_pmu power5_pmu = {
+ .n_counter = 6,
+ .max_alternatives = MAX_ALT,
+ .add_fields = 0x7000090000555ull,
+ .test_adder = 0x3000490000000ull,
+ .compute_mmcr = power5_compute_mmcr,
+ .get_constraint = power5_get_constraint,
+ .get_alternatives = power5_get_alternatives,
+ .disable_pmc = power5_disable_pmc,
+ .n_generic = ARRAY_SIZE(power5_generic_events),
+ .generic_events = power5_generic_events,
+};
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
new file mode 100644
index 0000000..b1f61f3
--- /dev/null
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -0,0 +1,283 @@
+/*
+ * Performance counter support for POWER6 processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER6
+ */
+#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
+#define PM_PMC_MSK 0x7
+#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
+#define PM_UNIT_MSK 0xf
+#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
+#define PM_LLAV 0x8000 /* Load lookahead match value */
+#define PM_LLA 0x4000 /* Load lookahead match enable */
+#define PM_BYTE_SH 12 /* Byte of event bus to use */
+#define PM_BYTE_MSK 3
+#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
+#define PM_SUBUNIT_MSK 7
+#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
+#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
+#define PM_BUSEVENT_MSK 0xf3700
+
+/*
+ * Bits in MMCR1 for POWER6
+ */
+#define MMCR1_TTM0SEL_SH 60
+#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
+#define MMCR1_TTMSEL_MSK 0xf
+#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
+#define MMCR1_NESTSEL_SH 45
+#define MMCR1_NESTSEL_MSK 0x7
+#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
+#define MMCR1_PMC1_LLA ((u64)1 << 44)
+#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39)
+#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35)
+#define MMCR1_PMC1SEL_SH 24
+#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK 0xff
+
+/*
+ * Assign PMC numbers and compute MMCR1 value for a set of events
+ */
+static int p6_compute_mmcr(unsigned int event[], int n_ev,
+ unsigned int hwc[], u64 mmcr[])
+{
+ u64 mmcr1 = 0;
+ int i;
+ unsigned int pmc, ev, b, u, s, psel;
+ unsigned int ttmset = 0;
+ unsigned int pmc_inuse = 0;
+
+ if (n_ev > 4)
+ return -1;
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc_inuse & (1 << (pmc - 1)))
+ return -1; /* collision! */
+ pmc_inuse |= 1 << (pmc - 1);
+ }
+ }
+ for (i = 0; i < n_ev; ++i) {
+ ev = event[i];
+ pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ --pmc;
+ } else {
+ /* can go on any PMC; find a free one */
+ for (pmc = 0; pmc < 4; ++pmc)
+ if (!(pmc_inuse & (1 << pmc)))
+ break;
+ pmc_inuse |= 1 << pmc;
+ }
+ hwc[i] = pmc;
+ psel = ev & PM_PMCSEL_MSK;
+ if (ev & PM_BUSEVENT_MSK) {
+ /* this event uses the event bus */
+ b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
+ u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
+ /* check for conflict on this byte of event bus */
+ if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
+ return -1;
+ mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
+ ttmset |= 1 << b;
+ if (u == 5) {
+ /* Nest events have a further mux */
+ s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+ if ((ttmset & 0x10) &&
+ MMCR1_NESTSEL(mmcr1) != s)
+ return -1;
+ ttmset |= 0x10;
+ mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
+ }
+ if (0x30 <= psel && psel <= 0x3d) {
+ /* these need the PMCx_ADDR_SEL bits */
+ if (b >= 2)
+ mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
+ }
+ /* bus select values are different for PMC3/4 */
+ if (pmc >= 2 && (psel & 0x90) == 0x80)
+ psel ^= 0x20;
+ }
+ if (ev & PM_LLA) {
+ mmcr1 |= MMCR1_PMC1_LLA >> pmc;
+ if (ev & PM_LLAV)
+ mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
+ }
+ mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
+ }
+ mmcr[0] = 0;
+ if (pmc_inuse & 1)
+ mmcr[0] = MMCR0_PMC1CE;
+ if (pmc_inuse & 0xe)
+ mmcr[0] |= MMCR0_PMCjCE;
+ mmcr[1] = mmcr1;
+ mmcr[2] = 0;
+ return 0;
+}
+
+/*
+ * Layout of constraint bits:
+ *
+ * 0-1 add field: number of uses of PMC1 (max 1)
+ * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4
+ * 8-10 select field: nest (subunit) event selector
+ * 16-19 select field: unit on byte 0 of event bus
+ * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
+ */
+static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+{
+ int pmc, byte, sh;
+ unsigned int mask = 0, value = 0;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 4)
+ return -1;
+ sh = (pmc - 1) * 2;
+ mask |= 2 << sh;
+ value |= 1 << sh;
+ }
+ if (event & PM_BUSEVENT_MSK) {
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ sh = byte * 4;
+ mask |= PM_UNIT_MSKS << sh;
+ value |= (event & PM_UNIT_MSKS) << sh;
+ if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
+ mask |= PM_SUBUNIT_MSKS;
+ value |= event & PM_SUBUNIT_MSKS;
+ }
+ }
+ *maskp = mask;
+ *valp = value;
+ return 0;
+}
+
+#define MAX_ALT 4 /* at most 4 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+ { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
+ { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
+ { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
+ { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */
+ { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
+ { 0x10000e, 0x400010 }, /* PM_PURR */
+ { 0x100010, 0x4000f8 }, /* PM_FLUSH */
+ { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
+ { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
+ { 0x100054, 0x2000f0 }, /* PM_ST_FIN */
+ { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
+ { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
+ { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
+ { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
+ { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
+ { 0x200012, 0x300012 }, /* PM_INST_DISP */
+ { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
+ { 0x2000f8, 0x300010 }, /* PM_EXT_INT */
+ { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
+ { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
+ { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
+ { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
+ { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
+};
+
+/*
+ * This could be made more efficient with a binary search on
+ * a presorted list, if necessary
+ */
+static int find_alternatives_list(unsigned int event)
+{
+ int i, j;
+ unsigned int alt;
+
+ for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+ if (event < event_alternatives[i][0])
+ return -1;
+ for (j = 0; j < MAX_ALT; ++j) {
+ alt = event_alternatives[i][j];
+ if (!alt || event < alt)
+ break;
+ if (event == alt)
+ return i;
+ }
+ }
+ return -1;
+}
+
+static int p6_get_alternatives(unsigned int event, unsigned int alt[])
+{
+ int i, j;
+ unsigned int aevent, psel, pmc;
+ unsigned int nalt = 1;
+
+ alt[0] = event;
+
+ /* check the alternatives table */
+ i = find_alternatives_list(event);
+ if (i >= 0) {
+ /* copy out alternatives from list */
+ for (j = 0; j < MAX_ALT; ++j) {
+ aevent = event_alternatives[i][j];
+ if (!aevent)
+ break;
+ if (aevent != event)
+ alt[nalt++] = aevent;
+ }
+
+ } else {
+ /* Check for alternative ways of computing sum events */
+ /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
+ psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc && (psel == 0x32 || psel == 0x34))
+ alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
+ ((5 - pmc) << PM_PMC_SH);
+
+ /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
+ if (pmc && (psel == 0x38 || psel == 0x3a))
+ alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
+ ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
+ }
+
+ return nalt;
+}
+
+static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+ /* Set PMCxSEL to 0 to disable PMCx */
+ mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power6_generic_events[] = {
+ [PERF_COUNT_CPU_CYCLES] = 0x1e,
+ [PERF_COUNT_INSTRUCTIONS] = 2,
+ [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
+ [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
+ [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
+ [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
+};
+
+struct power_pmu power6_pmu = {
+ .n_counter = 4,
+ .max_alternatives = MAX_ALT,
+ .add_fields = 0x55,
+ .test_adder = 0,
+ .compute_mmcr = p6_compute_mmcr,
+ .get_constraint = p6_get_constraint,
+ .get_alternatives = p6_get_alternatives,
+ .disable_pmc = p6_disable_pmc,
+ .n_generic = ARRAY_SIZE(power6_generic_events),
+ .generic_events = power6_generic_events,
+};
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
new file mode 100644
index 0000000..c325658
--- /dev/null
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -0,0 +1,375 @@
+/*
+ * Performance counter support for PPC970-family processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/string.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for PPC970
+ */
+#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
+#define PM_PMC_MSK 0xf
+#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK 0xf
+#define PM_BYTE_SH 4 /* Byte number of event bus to use */
+#define PM_BYTE_MSK 3
+#define PM_PMCSEL_MSK 0xf
+
+/* Values in PM_UNIT field */
+#define PM_NONE 0
+#define PM_FPU 1
+#define PM_VPU 2
+#define PM_ISU 3
+#define PM_IFU 4
+#define PM_IDU 5
+#define PM_STS 6
+#define PM_LSU0 7
+#define PM_LSU1U 8
+#define PM_LSU1L 9
+#define PM_LASTUNIT 9
+
+/*
+ * Bits in MMCR0 for PPC970
+ */
+#define MMCR0_PMC1SEL_SH 8
+#define MMCR0_PMC2SEL_SH 1
+#define MMCR_PMCSEL_MSK 0x1f
+
+/*
+ * Bits in MMCR1 for PPC970
+ */
+#define MMCR1_TTM0SEL_SH 62
+#define MMCR1_TTM1SEL_SH 59
+#define MMCR1_TTM3SEL_SH 53
+#define MMCR1_TTMSEL_MSK 3
+#define MMCR1_TD_CP_DBG0SEL_SH 50
+#define MMCR1_TD_CP_DBG1SEL_SH 48
+#define MMCR1_TD_CP_DBG2SEL_SH 46
+#define MMCR1_TD_CP_DBG3SEL_SH 44
+#define MMCR1_PMC1_ADDER_SEL_SH 39
+#define MMCR1_PMC2_ADDER_SEL_SH 38
+#define MMCR1_PMC6_ADDER_SEL_SH 37
+#define MMCR1_PMC5_ADDER_SEL_SH 36
+#define MMCR1_PMC8_ADDER_SEL_SH 35
+#define MMCR1_PMC7_ADDER_SEL_SH 34
+#define MMCR1_PMC3_ADDER_SEL_SH 33
+#define MMCR1_PMC4_ADDER_SEL_SH 32
+#define MMCR1_PMC3SEL_SH 27
+#define MMCR1_PMC4SEL_SH 22
+#define MMCR1_PMC5SEL_SH 17
+#define MMCR1_PMC6SEL_SH 12
+#define MMCR1_PMC7SEL_SH 7
+#define MMCR1_PMC8SEL_SH 2
+
+static short mmcr1_adder_bits[8] = {
+ MMCR1_PMC1_ADDER_SEL_SH,
+ MMCR1_PMC2_ADDER_SEL_SH,
+ MMCR1_PMC3_ADDER_SEL_SH,
+ MMCR1_PMC4_ADDER_SEL_SH,
+ MMCR1_PMC5_ADDER_SEL_SH,
+ MMCR1_PMC6_ADDER_SEL_SH,
+ MMCR1_PMC7_ADDER_SEL_SH,
+ MMCR1_PMC8_ADDER_SEL_SH
+};
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * <><>[ >[ >[ >< >< >< >< ><><><><><><><><>
+ * T0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
+ *
+ * T0 - TTM0 constraint
+ * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
+ * 43: UC3 error 0x0800_0000_0000
+ * 42: FPU|IFU|VPU events needed 0x0400_0000_0000
+ * 41: ISU events needed 0x0200_0000_0000
+ * 40: IDU|STS events needed 0x0100_0000_0000
+ *
+ * PS1
+ * 39: PS1 error 0x0080_0000_0000
+ * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
+ *
+ * PS2
+ * 35: PS2 error 0x0008_0000_0000
+ * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
+ *
+ * B0
+ * 28-31: Byte 0 event source 0xf000_0000
+ * Encoding as for the event code
+ *
+ * B1, B2, B3
+ * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
+ *
+ * P1
+ * 15: P1 error 0x8000
+ * 14-15: Count of events needing PMC1
+ *
+ * P2..P8
+ * 0-13: Count of events needing PMC2..PMC8
+ */
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+ [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
+ [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
+ [PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
+ [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
+ [PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
+ [PM_STS] = { 0x380000000000ull, 0x310000000000ull },
+};
+
+static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+{
+ int pmc, byte, unit, sh;
+ u64 mask = 0, value = 0;
+ int grp = -1;
+
+ pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc > 8)
+ return -1;
+ sh = (pmc - 1) * 2;
+ mask |= 2 << sh;
+ value |= 1 << sh;
+ grp = ((pmc - 1) >> 1) & 1;
+ }
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ if (unit) {
+ if (unit > PM_LASTUNIT)
+ return -1;
+ mask |= unit_cons[unit][0];
+ value |= unit_cons[unit][1];
+ byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+ /*
+ * Bus events on bytes 0 and 2 can be counted
+ * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
+ */
+ if (!pmc)
+ grp = byte & 1;
+ /* Set byte lane select field */
+ mask |= 0xfULL << (28 - 4 * byte);
+ value |= (u64)unit << (28 - 4 * byte);
+ }
+ if (grp == 0) {
+ /* increment PMC1/2/5/6 field */
+ mask |= 0x8000000000ull;
+ value |= 0x1000000000ull;
+ } else if (grp == 1) {
+ /* increment PMC3/4/7/8 field */
+ mask |= 0x800000000ull;
+ value |= 0x100000000ull;
+ }
+ *maskp = mask;
+ *valp = value;
+ return 0;
+}
+
+static int p970_get_alternatives(unsigned int event, unsigned int alt[])
+{
+ alt[0] = event;
+
+ /* 2 alternatives for LSU empty */
+ if (event == 0x2002 || event == 0x3002) {
+ alt[1] = event ^ 0x1000;
+ return 2;
+ }
+
+ return 1;
+}
+
+static int p970_compute_mmcr(unsigned int event[], int n_ev,
+ unsigned int hwc[], u64 mmcr[])
+{
+ u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
+ unsigned int pmc, unit, byte, psel;
+ unsigned int ttm, grp;
+ unsigned int pmc_inuse = 0;
+ unsigned int pmc_grp_use[2];
+ unsigned char busbyte[4];
+ unsigned char unituse[16];
+ unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
+ unsigned char ttmuse[2];
+ unsigned char pmcsel[8];
+ int i;
+
+ if (n_ev > 8)
+ return -1;
+
+ /* First pass to count resource use */
+ pmc_grp_use[0] = pmc_grp_use[1] = 0;
+ memset(busbyte, 0, sizeof(busbyte));
+ memset(unituse, 0, sizeof(unituse));
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ if (pmc) {
+ if (pmc_inuse & (1 << (pmc - 1)))
+ return -1;
+ pmc_inuse |= 1 << (pmc - 1);
+ /* count 1/2/5/6 vs 3/4/7/8 use */
+ ++pmc_grp_use[((pmc - 1) >> 1) & 1];
+ }
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ if (unit) {
+ if (unit > PM_LASTUNIT)
+ return -1;
+ if (!pmc)
+ ++pmc_grp_use[byte & 1];
+ if (busbyte[byte] && busbyte[byte] != unit)
+ return -1;
+ busbyte[byte] = unit;
+ unituse[unit] = 1;
+ }
+ }
+ if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
+ return -1;
+
+ /*
+ * Assign resources and set multiplexer selects.
+ *
+ * PM_ISU can go either on TTM0 or TTM1, but that's the only
+ * choice we have to deal with.
+ */
+ if (unituse[PM_ISU] &
+ (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
+ unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
+ /* Set TTM[01]SEL fields. */
+ ttmuse[0] = ttmuse[1] = 0;
+ for (i = PM_FPU; i <= PM_STS; ++i) {
+ if (!unituse[i])
+ continue;
+ ttm = unitmap[i];
+ ++ttmuse[(ttm >> 2) & 1];
+ mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
+ }
+ /* Check only one unit per TTMx */
+ if (ttmuse[0] > 1 || ttmuse[1] > 1)
+ return -1;
+
+ /* Set byte lane select fields and TTM3SEL. */
+ for (byte = 0; byte < 4; ++byte) {
+ unit = busbyte[byte];
+ if (!unit)
+ continue;
+ if (unit <= PM_STS)
+ ttm = (unitmap[unit] >> 2) & 1;
+ else if (unit == PM_LSU0)
+ ttm = 2;
+ else {
+ ttm = 3;
+ if (unit == PM_LSU1L && byte >= 2)
+ mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+ }
+ mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+ }
+
+ /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+ memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+ unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+ byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+ psel = event[i] & PM_PMCSEL_MSK;
+ if (!pmc) {
+ /* Bus event or any-PMC direct event */
+ if (unit)
+ psel |= 0x10 | ((byte & 2) << 2);
+ else
+ psel |= 8;
+ for (pmc = 0; pmc < 8; ++pmc) {
+ if (pmc_inuse & (1 << pmc))
+ continue;
+ grp = (pmc >> 1) & 1;
+ if (unit) {
+ if (grp == (byte & 1))
+ break;
+ } else if (pmc_grp_use[grp] < 4) {
+ ++pmc_grp_use[grp];
+ break;
+ }
+ }
+ pmc_inuse |= 1 << pmc;
+ } else {
+ /* Direct event */
+ --pmc;
+ if (psel == 0 && (byte & 2))
+ /* add events on higher-numbered bus */
+ mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
+ }
+ pmcsel[pmc] = psel;
+ hwc[i] = pmc;
+ }
+ for (pmc = 0; pmc < 2; ++pmc)
+ mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
+ for (; pmc < 8; ++pmc)
+ mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
+ if (pmc_inuse & 1)
+ mmcr0 |= MMCR0_PMC1CE;
+ if (pmc_inuse & 0xfe)
+ mmcr0 |= MMCR0_PMCjCE;
+
+ mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
+
+ /* Return MMCRx values */
+ mmcr[0] = mmcr0;
+ mmcr[1] = mmcr1;
+ mmcr[2] = mmcra;
+ return 0;
+}
+
+static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+ int shift, i;
+
+ if (pmc <= 1) {
+ shift = MMCR0_PMC1SEL_SH - 7 * pmc;
+ i = 0;
+ } else {
+ shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
+ i = 1;
+ }
+ /*
+ * Setting the PMCxSEL field to 0x08 disables PMC x.
+ */
+ mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
+}
+
+static int ppc970_generic_events[] = {
+ [PERF_COUNT_CPU_CYCLES] = 7,
+ [PERF_COUNT_INSTRUCTIONS] = 1,
+ [PERF_COUNT_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
+ [PERF_COUNT_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
+ [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
+ [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
+};
+
+struct power_pmu ppc970_pmu = {
+ .n_counter = 8,
+ .max_alternatives = 2,
+ .add_fields = 0x001100005555ull,
+ .test_adder = 0x013300000000ull,
+ .compute_mmcr = p970_compute_mmcr,
+ .get_constraint = p970_get_constraint,
+ .get_alternatives = p970_get_alternatives,
+ .disable_pmc = p970_disable_pmc,
+ .n_generic = ARRAY_SIZE(ppc970_generic_events),
+ .generic_events = ppc970_generic_events,
+};
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 161b9b9..295ccc5 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -184,6 +184,7 @@
. = ALIGN(PAGE_SIZE);
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
__per_cpu_start = .;
+ *(.data.percpu.page_aligned)
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e868b5c..dc0f3c9 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
config PPC64
bool "64-bit kernel"
default n
+ select HAVE_PERF_COUNTERS
help
This option selects whether a 32-bit or a 64-bit kernel
will be built.
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 84e058f..80b5134 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -153,9 +153,10 @@
{
int server;
/* For the moment only implement delivery to all cpus or one cpu */
- cpumask_t cpumask = irq_desc[virq].affinity;
+ cpumask_t cpumask;
cpumask_t tmp = CPU_MASK_NONE;
+ cpumask_copy(&cpumask, irq_desc[virq].affinity);
if (!distribute_irqs)
return default_server;
@@ -869,7 +870,7 @@
virq, cpu);
/* Reset affinity to all cpus */
- irq_desc[virq].affinity = CPU_MASK_ALL;
+ cpumask_setall(irq_desc[virq].affinity);
desc->chip->set_affinity(virq, cpu_all_mask);
unlock:
spin_unlock_irqrestore(&desc->lock, flags);
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index a35297d..532e205 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -566,9 +566,10 @@
#ifdef CONFIG_SMP
static int irq_choose_cpu(unsigned int virt_irq)
{
- cpumask_t mask = irq_desc[virt_irq].affinity;
+ cpumask_t mask;
int cpuid;
+ cpumask_copy(&mask, irq_desc[virt_irq].affinity);
if (cpus_equal(mask, CPU_MASK_ALL)) {
static int irq_rover;
static DEFINE_SPINLOCK(irq_rover_lock);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index e289376..3d2c6ba 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -252,9 +252,10 @@
#ifdef CONFIG_SMP
static int irq_choose_cpu(unsigned int virt_irq)
{
- cpumask_t mask = irq_desc[virt_irq].affinity;
+ cpumask_t mask;
int cpuid;
+ cpumask_copy(&mask, irq_desc[virt_irq].affinity);
if (cpus_equal(mask, CPU_MASK_ALL)) {
static int irq_rover;
static DEFINE_SPINLOCK(irq_rover_lock);
@@ -796,7 +797,7 @@
!(irq_desc[irq].status & IRQ_PER_CPU)) {
if (irq_desc[irq].chip->set_affinity)
irq_desc[irq].chip->set_affinity(irq,
- &irq_desc[irq].affinity);
+ irq_desc[irq].affinity);
}
spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
}
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 2db3c22..db310aa 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -729,7 +729,7 @@
irq_enter();
- kstat_this_cpu.irqs[0]++;
+ kstat_incr_irqs_this_cpu(0, irq_to_desc(0));
if (unlikely(!evt->event_handler)) {
printk(KERN_WARNING
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bc2fbad..e18dfd3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -5,7 +5,7 @@
config 64BIT
bool "64-bit kernel" if ARCH = "x86"
default ARCH = "x86_64"
- help
+ ---help---
Say yes to build a 64-bit kernel - formerly known as x86_64
Say no to build a 32-bit kernel - formerly known as i386
@@ -34,8 +34,8 @@
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
- select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
- select HAVE_ARCH_KGDB if !X86_VOYAGER
+ select HAVE_KVM
+ select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -133,18 +133,16 @@
def_bool y
config HAVE_SETUP_PER_CPU_AREA
- def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)
+ def_bool y
config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP
config ARCH_HIBERNATION_POSSIBLE
def_bool y
- depends on !SMP || !X86_VOYAGER
config ARCH_SUSPEND_POSSIBLE
def_bool y
- depends on !X86_VOYAGER
config ZONE_DMA32
bool
@@ -174,11 +172,6 @@
depends on GENERIC_HARDIRQS && SMP
default y
-config X86_SMP
- bool
- depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
- default y
-
config USE_GENERIC_SMP_HELPERS
def_bool y
depends on SMP
@@ -194,19 +187,17 @@
config X86_HT
bool
depends on SMP
- depends on (X86_32 && !X86_VOYAGER) || X86_64
- default y
-
-config X86_BIOS_REBOOT
- bool
- depends on !X86_VOYAGER
default y
config X86_TRAMPOLINE
bool
- depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
+ depends on SMP || (64BIT && ACPI_SLEEP)
default y
+config X86_32_LAZY_GS
+ def_bool y
+ depends on X86_32 && !CC_STACKPROTECTOR
+
config KTIME_SCALAR
def_bool X86_32
source "init/Kconfig"
@@ -244,14 +235,24 @@
If you don't know what to do here, say N.
-config X86_HAS_BOOT_CPU_ID
- def_bool y
- depends on X86_VOYAGER
+config X86_X2APIC
+ bool "Support x2apic"
+ depends on X86_LOCAL_APIC && X86_64
+ ---help---
+ This enables x2apic support on CPUs that have this feature.
+
+ This allows 32-bit apic IDs (so it can support very large systems),
+ and accesses the local apic via MSRs not via mmio.
+
+ ( On certain CPU models you may need to enable INTR_REMAP too,
+ to get functional x2apic mode. )
+
+ If you don't know what to do here, say N.
config SPARSE_IRQ
bool "Support sparse irq numbering"
depends on PCI_MSI || HT_IRQ
- help
+ ---help---
This enables support for sparse irqs. This is useful for distro
kernels that want to define a high CONFIG_NR_CPUS value but still
want to have low kernel memory footprint on smaller machines.
@@ -265,114 +266,140 @@
bool "Move irq desc when changing irq smp_affinity"
depends on SPARSE_IRQ && NUMA
default n
- help
+ ---help---
This enables moving irq_desc to cpu/node that irq will use handled.
If you don't know what to do here, say N.
-config X86_FIND_SMP_CONFIG
- def_bool y
- depends on X86_MPPARSE || X86_VOYAGER
-
config X86_MPPARSE
bool "Enable MPS table" if ACPI
default y
depends on X86_LOCAL_APIC
- help
+ ---help---
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
-choice
- prompt "Subarchitecture Type"
- default X86_PC
+config X86_BIGSMP
+ bool "Support for big SMP systems with more than 8 CPUs"
+ depends on X86_32 && SMP
+ ---help---
+ This option is needed for the systems that have more than 8 CPUs
-config X86_PC
- bool "PC-compatible"
- help
- Choose this option if your computer is a standard PC or compatible.
+if X86_32
+config X86_EXTENDED_PLATFORM
+ bool "Support for extended (non-PC) x86 platforms"
+ default y
+ ---help---
+ If you disable this option then the kernel will only support
+ standard PC platforms. (which covers the vast majority of
+ systems out there.)
+
+ If you enable this option then you'll be able to select support
+ for the following (non-PC) 32 bit x86 platforms:
+ AMD Elan
+ NUMAQ (IBM/Sequent)
+ RDC R-321x SoC
+ SGI 320/540 (Visual Workstation)
+ Summit/EXA (IBM x440)
+ Unisys ES7000 IA32 series
+
+ If you have one of these systems, or if you want to build a
+ generic distribution kernel, say Y here - otherwise say N.
+endif
+
+if X86_64
+config X86_EXTENDED_PLATFORM
+ bool "Support for extended (non-PC) x86 platforms"
+ default y
+ ---help---
+ If you disable this option then the kernel will only support
+ standard PC platforms. (which covers the vast majority of
+ systems out there.)
+
+ If you enable this option then you'll be able to select support
+ for the following (non-PC) 64 bit x86 platforms:
+ ScaleMP vSMP
+ SGI Ultraviolet
+
+ If you have one of these systems, or if you want to build a
+ generic distribution kernel, say Y here - otherwise say N.
+endif
+# This is an alphabetically sorted list of 64 bit extended platforms
+# Please maintain the alphabetic order if and when there are additions
+
+config X86_VSMP
+ bool "ScaleMP vSMP"
+ select PARAVIRT
+ depends on X86_64 && PCI
+ depends on X86_EXTENDED_PLATFORM
+ ---help---
+ Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is
+ supposed to run on these EM64T-based machines. Only choose this option
+ if you have one of these machines.
+
+config X86_UV
+ bool "SGI Ultraviolet"
+ depends on X86_64
+ depends on X86_EXTENDED_PLATFORM
+ select X86_X2APIC
+ ---help---
+ This option is needed in order to support SGI Ultraviolet systems.
+ If you don't have one of these, you should say N here.
+
+# Following is an alphabetically sorted list of 32 bit extended platforms
+# Please maintain the alphabetic order if and when there are additions
config X86_ELAN
bool "AMD Elan"
depends on X86_32
- help
+ depends on X86_EXTENDED_PLATFORM
+ ---help---
Select this for an AMD Elan processor.
Do not use this option for K6/Athlon/Opteron processors!
If unsure, choose "PC-compatible" instead.
-config X86_VOYAGER
- bool "Voyager (NCR)"
- depends on X86_32 && (SMP || BROKEN) && !PCI
- help
- Voyager is an MCA-based 32-way capable SMP architecture proprietary
- to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based.
-
- *** WARNING ***
-
- If you do not specifically know you have a Voyager based machine,
- say N here, otherwise the kernel you build will not be bootable.
-
-config X86_GENERICARCH
- bool "Generic architecture"
+config X86_RDC321X
+ bool "RDC R-321x SoC"
depends on X86_32
- help
- This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
+ depends on X86_EXTENDED_PLATFORM
+ select M486
+ select X86_REBOOTFIXUPS
+ ---help---
+ This option is needed for RDC R-321x system-on-chip, also known
+ as R-8610-(G).
+ If you don't have one of these chips, you should say N here.
+
+config X86_32_NON_STANDARD
+ bool "Support non-standard 32-bit SMP architectures"
+ depends on X86_32 && SMP
+ depends on X86_EXTENDED_PLATFORM
+ ---help---
+ This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
subarchitectures. It is intended for a generic binary kernel.
if you select them all, kernel will probe it one by one. and will
fallback to default.
-if X86_GENERICARCH
+# Alphabetically sorted list of Non standard 32 bit platforms
config X86_NUMAQ
bool "NUMAQ (IBM/Sequent)"
- depends on SMP && X86_32 && PCI && X86_MPPARSE
+ depends on X86_32_NON_STANDARD
select NUMA
- help
+ select X86_MPPARSE
+ ---help---
This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
NUMA multiquad box. This changes the way that processors are
bootstrapped, and uses Clustered Logical APIC addressing mode instead
of Flat Logical. You will need a new lynxer.elf file to flash your
firmware with - send email to <Martin.Bligh@us.ibm.com>.
-config X86_SUMMIT
- bool "Summit/EXA (IBM x440)"
- depends on X86_32 && SMP
- help
- This option is needed for IBM systems that use the Summit/EXA chipset.
- In particular, it is needed for the x440.
-
-config X86_ES7000
- bool "Support for Unisys ES7000 IA32 series"
- depends on X86_32 && SMP
- help
- Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
- supposed to run on an IA32-based Unisys ES7000 system.
-
-config X86_BIGSMP
- bool "Support for big SMP systems with more than 8 CPUs"
- depends on X86_32 && SMP
- help
- This option is needed for the systems that have more than 8 CPUs
- and if the system is not of any sub-arch type above.
-
-endif
-
-config X86_VSMP
- bool "Support for ScaleMP vSMP"
- select PARAVIRT
- depends on X86_64 && PCI
- help
- Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is
- supposed to run on these EM64T-based machines. Only choose this option
- if you have one of these machines.
-
-endchoice
-
config X86_VISWS
bool "SGI 320/540 (Visual Workstation)"
- depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT
- help
+ depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT
+ depends on X86_32_NON_STANDARD
+ ---help---
The SGI Visual Workstation series is an IA32-based workstation
based on SGI systems chips with some legacy PC hardware attached.
@@ -381,21 +408,25 @@
A kernel compiled for the Visual Workstation will run on general
PCs as well. See <file:Documentation/sgi-visws.txt> for details.
-config X86_RDC321X
- bool "RDC R-321x SoC"
- depends on X86_32
- select M486
- select X86_REBOOTFIXUPS
- help
- This option is needed for RDC R-321x system-on-chip, also known
- as R-8610-(G).
- If you don't have one of these chips, you should say N here.
+config X86_SUMMIT
+ bool "Summit/EXA (IBM x440)"
+ depends on X86_32_NON_STANDARD
+ ---help---
+ This option is needed for IBM systems that use the Summit/EXA chipset.
+ In particular, it is needed for the x440.
+
+config X86_ES7000
+ bool "Unisys ES7000 IA32 series"
+ depends on X86_32_NON_STANDARD && X86_BIGSMP
+ ---help---
+ Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
+ supposed to run on an IA32-based Unisys ES7000 system.
config SCHED_OMIT_FRAME_POINTER
def_bool y
prompt "Single-depth WCHAN output"
depends on X86
- help
+ ---help---
Calculate simpler /proc/<PID>/wchan values. If this option
is disabled then wchan values will recurse back to the
caller function. This provides more accurate wchan values,
@@ -405,7 +436,7 @@
menuconfig PARAVIRT_GUEST
bool "Paravirtualized guest support"
- help
+ ---help---
Say Y here to get to see options related to running Linux under
various hypervisors. This option alone does not add any kernel code.
@@ -419,8 +450,7 @@
bool "VMI Guest support"
select PARAVIRT
depends on X86_32
- depends on !X86_VOYAGER
- help
+ ---help---
VMI provides a paravirtualized interface to the VMware ESX server
(it could be used by other hypervisors in theory too, but is not
at the moment), by linking the kernel to a GPL-ed ROM module
@@ -430,8 +460,7 @@
bool "KVM paravirtualized clock"
select PARAVIRT
select PARAVIRT_CLOCK
- depends on !X86_VOYAGER
- help
+ ---help---
Turning on this option will allow you to run a paravirtualized clock
when running over the KVM hypervisor. Instead of relying on a PIT
(or probably other) emulation by the underlying device model, the host
@@ -441,17 +470,15 @@
config KVM_GUEST
bool "KVM Guest support"
select PARAVIRT
- depends on !X86_VOYAGER
- help
- This option enables various optimizations for running under the KVM
- hypervisor.
+ ---help---
+ This option enables various optimizations for running under the KVM
+ hypervisor.
source "arch/x86/lguest/Kconfig"
config PARAVIRT
bool "Enable paravirtualization code"
- depends on !X86_VOYAGER
- help
+ ---help---
This changes the kernel so it can modify itself when it is run
under a hypervisor, potentially improving performance significantly
over full virtualization. However, when run without a hypervisor
@@ -464,51 +491,51 @@
endif
config PARAVIRT_DEBUG
- bool "paravirt-ops debugging"
- depends on PARAVIRT && DEBUG_KERNEL
- help
- Enable to debug paravirt_ops internals. Specifically, BUG if
- a paravirt_op is missing when it is called.
+ bool "paravirt-ops debugging"
+ depends on PARAVIRT && DEBUG_KERNEL
+ ---help---
+ Enable to debug paravirt_ops internals. Specifically, BUG if
+ a paravirt_op is missing when it is called.
config MEMTEST
bool "Memtest"
- help
+ ---help---
This option adds a kernel parameter 'memtest', which allows memtest
to be set.
- memtest=0, mean disabled; -- default
- memtest=1, mean do 1 test pattern;
- ...
- memtest=4, mean do 4 test patterns.
+ memtest=0, mean disabled; -- default
+ memtest=1, mean do 1 test pattern;
+ ...
+ memtest=4, mean do 4 test patterns.
If you are unsure how to answer this question, answer N.
config X86_SUMMIT_NUMA
def_bool y
- depends on X86_32 && NUMA && X86_GENERICARCH
+ depends on X86_32 && NUMA && X86_32_NON_STANDARD
config X86_CYCLONE_TIMER
def_bool y
- depends on X86_GENERICARCH
+ depends on X86_32_NON_STANDARD
source "arch/x86/Kconfig.cpu"
config HPET_TIMER
def_bool X86_64
prompt "HPET Timer Support" if X86_32
- help
- Use the IA-PC HPET (High Precision Event Timer) to manage
- time in preference to the PIT and RTC, if a HPET is
- present.
- HPET is the next generation timer replacing legacy 8254s.
- The HPET provides a stable time base on SMP
- systems, unlike the TSC, but it is more expensive to access,
- as it is off-chip. You can find the HPET spec at
- <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>.
+ ---help---
+ Use the IA-PC HPET (High Precision Event Timer) to manage
+ time in preference to the PIT and RTC, if a HPET is
+ present.
+ HPET is the next generation timer replacing legacy 8254s.
+ The HPET provides a stable time base on SMP
+ systems, unlike the TSC, but it is more expensive to access,
+ as it is off-chip. You can find the HPET spec at
+ <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>.
- You can safely choose Y here. However, HPET will only be
- activated if the platform and the BIOS support this feature.
- Otherwise the 8254 will be used for timing services.
+ You can safely choose Y here. However, HPET will only be
+ activated if the platform and the BIOS support this feature.
+ Otherwise the 8254 will be used for timing services.
- Choose N to continue using the legacy 8254 timer.
+ Choose N to continue using the legacy 8254 timer.
config HPET_EMULATE_RTC
def_bool y
@@ -519,7 +546,7 @@
config DMI
default y
bool "Enable DMI scanning" if EMBEDDED
- help
+ ---help---
Enabled scanning of DMI to identify machine quirks. Say Y
here unless you have verified that your setup is not
affected by entries in the DMI blacklist. Required by PNP
@@ -531,7 +558,7 @@
select SWIOTLB
select AGP
depends on X86_64 && PCI
- help
+ ---help---
Support for full DMA access of devices with 32bit memory access only
on systems with more than 3GB. This is usually needed for USB,
sound, many IDE/SATA chipsets and some other devices.
@@ -546,7 +573,7 @@
bool "IBM Calgary IOMMU support"
select SWIOTLB
depends on X86_64 && PCI && EXPERIMENTAL
- help
+ ---help---
Support for hardware IOMMUs in IBM's xSeries x366 and x460
systems. Needed to run systems with more than 3GB of memory
properly with 32-bit PCI devices that do not support DAC
@@ -564,7 +591,7 @@
def_bool y
prompt "Should Calgary be enabled by default?"
depends on CALGARY_IOMMU
- help
+ ---help---
Should Calgary be enabled by default? if you choose 'y', Calgary
will be used (if it exists). If you choose 'n', Calgary will not be
used even if it exists. If you choose 'n' and would like to use
@@ -576,7 +603,7 @@
select SWIOTLB
select PCI_MSI
depends on X86_64 && PCI && ACPI
- help
+ ---help---
With this option you can enable support for AMD IOMMU hardware in
your system. An IOMMU is a hardware component which provides
remapping of DMA memory accesses from devices. With an AMD IOMMU you
@@ -591,7 +618,7 @@
bool "Export AMD IOMMU statistics to debugfs"
depends on AMD_IOMMU
select DEBUG_FS
- help
+ ---help---
This option enables code in the AMD IOMMU driver to collect various
statistics about whats happening in the driver and exports that
information to userspace via debugfs.
@@ -600,7 +627,7 @@
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
def_bool y if X86_64
- help
+ ---help---
Support for software bounce buffers used on x86-64 systems
which don't have a hardware IOMMU (e.g. the current generation
of Intel's x86-64 CPUs). Using this PCI devices which can only
@@ -618,7 +645,7 @@
depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
select CPUMASK_OFFSTACK
default n
- help
+ ---help---
Configure maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
@@ -629,7 +656,7 @@
default "4096" if MAXSMP
default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
default "8" if SMP
- help
+ ---help---
This allows you to specify the maximum number of CPUs which this
kernel will support. The maximum supported value is 512 and the
minimum value which makes sense is 2.
@@ -640,7 +667,7 @@
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on X86_HT
- help
+ ---help---
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
cost of slightly increased overhead in some places. If unsure say
@@ -650,7 +677,7 @@
def_bool y
prompt "Multi-core scheduler support"
depends on X86_HT
- help
+ ---help---
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
@@ -659,8 +686,8 @@
config X86_UP_APIC
bool "Local APIC support on uniprocessors"
- depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH)
- help
+ depends on X86_32 && !SMP && !X86_32_NON_STANDARD
+ ---help---
A local APIC (Advanced Programmable Interrupt Controller) is an
integrated interrupt controller in the CPU. If you have a single-CPU
system which has a processor with a local APIC, you can say Y here to
@@ -673,7 +700,7 @@
config X86_UP_IOAPIC
bool "IO-APIC support on uniprocessors"
depends on X86_UP_APIC
- help
+ ---help---
An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
SMP-capable replacement for PC-style interrupt controllers. Most
SMP systems and many recent uniprocessor systems have one.
@@ -684,11 +711,12 @@
config X86_LOCAL_APIC
def_bool y
- depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
+ depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
+ select HAVE_PERF_COUNTERS if (!M386 && !M486)
config X86_IO_APIC
def_bool y
- depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
+ depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
config X86_VISWS_APIC
def_bool y
@@ -698,7 +726,7 @@
bool "Reroute for broken boot IRQs"
default n
depends on X86_IO_APIC
- help
+ ---help---
This option enables a workaround that fixes a source of
spurious interrupts. This is recommended when threaded
interrupt handling is used on systems where the generation of
@@ -720,7 +748,6 @@
config X86_MCE
bool "Machine Check Exception"
- depends on !X86_VOYAGER
---help---
Machine Check Exception support allows the processor to notify the
kernel if it detects a problem (e.g. overheating, component failure).
@@ -739,7 +766,7 @@
def_bool y
prompt "Intel MCE features"
depends on X86_64 && X86_MCE && X86_LOCAL_APIC
- help
+ ---help---
Additional support for intel specific MCE features such as
the thermal monitor.
@@ -747,14 +774,14 @@
def_bool y
prompt "AMD MCE features"
depends on X86_64 && X86_MCE && X86_LOCAL_APIC
- help
+ ---help---
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.
config X86_MCE_NONFATAL
tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
depends on X86_32 && X86_MCE
- help
+ ---help---
Enabling this feature starts a timer that triggers every 5 seconds which
will look at the machine check registers to see if anything happened.
Non-fatal problems automatically get corrected (but still logged).
@@ -767,7 +794,7 @@
config X86_MCE_P4THERMAL
bool "check for P4 thermal throttling interrupt."
depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
- help
+ ---help---
Enabling this feature will cause a message to be printed when the P4
enters thermal throttling.
@@ -775,11 +802,11 @@
bool "Enable VM86 support" if EMBEDDED
default y
depends on X86_32
- help
- This option is required by programs like DOSEMU to run 16-bit legacy
+ ---help---
+ This option is required by programs like DOSEMU to run 16-bit legacy
code on X86 processors. It also may be needed by software like
- XFree86 to initialize some video cards via BIOS. Disabling this
- option saves about 6k.
+ XFree86 to initialize some video cards via BIOS. Disabling this
+ option saves about 6k.
config TOSHIBA
tristate "Toshiba Laptop support"
@@ -853,33 +880,33 @@
module will be called microcode.
config MICROCODE_INTEL
- bool "Intel microcode patch loading support"
- depends on MICROCODE
- default MICROCODE
- select FW_LOADER
- --help---
- This options enables microcode patch loading support for Intel
- processors.
+ bool "Intel microcode patch loading support"
+ depends on MICROCODE
+ default MICROCODE
+ select FW_LOADER
+ ---help---
+ This options enables microcode patch loading support for Intel
+ processors.
- For latest news and information on obtaining all the required
- Intel ingredients for this driver, check:
- <http://www.urbanmyth.org/microcode/>.
+ For latest news and information on obtaining all the required
+ Intel ingredients for this driver, check:
+ <http://www.urbanmyth.org/microcode/>.
config MICROCODE_AMD
- bool "AMD microcode patch loading support"
- depends on MICROCODE
- select FW_LOADER
- --help---
- If you select this option, microcode patch loading support for AMD
- processors will be enabled.
+ bool "AMD microcode patch loading support"
+ depends on MICROCODE
+ select FW_LOADER
+ ---help---
+ If you select this option, microcode patch loading support for AMD
+ processors will be enabled.
- config MICROCODE_OLD_INTERFACE
+config MICROCODE_OLD_INTERFACE
def_bool y
depends on MICROCODE
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
- help
+ ---help---
This device gives privileged processes access to the x86
Model-Specific Registers (MSRs). It is a character device with
major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
@@ -888,7 +915,7 @@
config X86_CPUID
tristate "/dev/cpu/*/cpuid - CPU information support"
- help
+ ---help---
This device gives processes access to the x86 CPUID instruction to
be executed on a specific processor. It is a character device
with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
@@ -940,7 +967,7 @@
config HIGHMEM4G
bool "4GB"
depends on !X86_NUMAQ
- help
+ ---help---
Select this if you have a 32-bit processor and between 1 and 4
gigabytes of physical RAM.
@@ -948,7 +975,7 @@
bool "64GB"
depends on !M386 && !M486
select X86_PAE
- help
+ ---help---
Select this if you have a 32-bit processor and more than 4
gigabytes of physical RAM.
@@ -959,7 +986,7 @@
prompt "Memory split" if EMBEDDED
default VMSPLIT_3G
depends on X86_32
- help
+ ---help---
Select the desired split between kernel and user memory.
If the address range available to the kernel is less than the
@@ -1005,20 +1032,20 @@
config X86_PAE
bool "PAE (Physical Address Extension) Support"
depends on X86_32 && !HIGHMEM4G
- help
+ ---help---
PAE is required for NX support, and furthermore enables
larger swapspace support for non-overcommit purposes. It
has the cost of more pagetable lookup overhead, and also
consumes more pagetable space per process.
config ARCH_PHYS_ADDR_T_64BIT
- def_bool X86_64 || X86_PAE
+ def_bool X86_64 || X86_PAE
config DIRECT_GBPAGES
bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
default y
depends on X86_64
- help
+ ---help---
Allow the kernel linear mapping to use 1GB pages on CPUs that
support it. This can improve the kernel's performance a tiny bit by
reducing TLB pressure. If in doubt, say "Y".
@@ -1028,9 +1055,8 @@
bool "Numa Memory Allocation and Scheduler Support"
depends on SMP
depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
- default n if X86_PC
default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
- help
+ ---help---
Enable NUMA (Non Uniform Memory Access) support.
The kernel will try to allocate memory used by a CPU on the
@@ -1053,19 +1079,19 @@
def_bool y
prompt "Old style AMD Opteron NUMA detection"
depends on X86_64 && NUMA && PCI
- help
- Enable K8 NUMA node topology detection. You should say Y here if
- you have a multi processor AMD K8 system. This uses an old
- method to read the NUMA configuration directly from the builtin
- Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
- instead, which also takes priority if both are compiled in.
+ ---help---
+ Enable K8 NUMA node topology detection. You should say Y here if
+ you have a multi processor AMD K8 system. This uses an old
+ method to read the NUMA configuration directly from the builtin
+ Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
+ instead, which also takes priority if both are compiled in.
config X86_64_ACPI_NUMA
def_bool y
prompt "ACPI NUMA detection"
depends on X86_64 && NUMA && ACPI && PCI
select ACPI_NUMA
- help
+ ---help---
Enable ACPI SRAT based node topology detection.
# Some NUMA nodes have memory ranges that span
@@ -1080,7 +1106,7 @@
config NUMA_EMU
bool "NUMA emulation"
depends on X86_64 && NUMA
- help
+ ---help---
Enable NUMA emulation. A flat machine will be split
into virtual nodes when booted with "numa=fake=N", where N is the
number of nodes. This is only useful for debugging.
@@ -1093,7 +1119,7 @@
default "4" if X86_NUMAQ
default "3"
depends on NEED_MULTIPLE_NODES
- help
+ ---help---
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accomodate various tables.
@@ -1131,7 +1157,7 @@
config ARCH_SPARSEMEM_ENABLE
def_bool y
- depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH
+ depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
@@ -1148,61 +1174,61 @@
config HIGHPTE
bool "Allocate 3rd-level pagetables from highmem"
depends on X86_32 && (HIGHMEM4G || HIGHMEM64G)
- help
+ ---help---
The VM uses one page table entry for each page of physical memory.
For systems with a lot of RAM, this can be wasteful of precious
low memory. Setting this option will put user-space page table
entries in high memory.
config X86_CHECK_BIOS_CORRUPTION
- bool "Check for low memory corruption"
- help
- Periodically check for memory corruption in low memory, which
- is suspected to be caused by BIOS. Even when enabled in the
- configuration, it is disabled at runtime. Enable it by
- setting "memory_corruption_check=1" on the kernel command
- line. By default it scans the low 64k of memory every 60
- seconds; see the memory_corruption_check_size and
- memory_corruption_check_period parameters in
- Documentation/kernel-parameters.txt to adjust this.
+ bool "Check for low memory corruption"
+ ---help---
+ Periodically check for memory corruption in low memory, which
+ is suspected to be caused by BIOS. Even when enabled in the
+ configuration, it is disabled at runtime. Enable it by
+ setting "memory_corruption_check=1" on the kernel command
+ line. By default it scans the low 64k of memory every 60
+ seconds; see the memory_corruption_check_size and
+ memory_corruption_check_period parameters in
+ Documentation/kernel-parameters.txt to adjust this.
- When enabled with the default parameters, this option has
- almost no overhead, as it reserves a relatively small amount
- of memory and scans it infrequently. It both detects corruption
- and prevents it from affecting the running system.
+ When enabled with the default parameters, this option has
+ almost no overhead, as it reserves a relatively small amount
+ of memory and scans it infrequently. It both detects corruption
+ and prevents it from affecting the running system.
- It is, however, intended as a diagnostic tool; if repeatable
- BIOS-originated corruption always affects the same memory,
- you can use memmap= to prevent the kernel from using that
- memory.
+ It is, however, intended as a diagnostic tool; if repeatable
+ BIOS-originated corruption always affects the same memory,
+ you can use memmap= to prevent the kernel from using that
+ memory.
config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
- bool "Set the default setting of memory_corruption_check"
+ bool "Set the default setting of memory_corruption_check"
depends on X86_CHECK_BIOS_CORRUPTION
default y
- help
- Set whether the default state of memory_corruption_check is
- on or off.
+ ---help---
+ Set whether the default state of memory_corruption_check is
+ on or off.
config X86_RESERVE_LOW_64K
- bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen"
+ bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen"
default y
- help
- Reserve the first 64K of physical RAM on BIOSes that are known
- to potentially corrupt that memory range. A numbers of BIOSes are
- known to utilize this area during suspend/resume, so it must not
- be used by the kernel.
+ ---help---
+ Reserve the first 64K of physical RAM on BIOSes that are known
+ to potentially corrupt that memory range. A numbers of BIOSes are
+ known to utilize this area during suspend/resume, so it must not
+ be used by the kernel.
- Set this to N if you are absolutely sure that you trust the BIOS
- to get all its memory reservations and usages right.
+ Set this to N if you are absolutely sure that you trust the BIOS
+ to get all its memory reservations and usages right.
- If you have doubts about the BIOS (e.g. suspend/resume does not
- work or there's kernel crashes after certain hardware hotplug
- events) and it's not AMI or Phoenix, then you might want to enable
- X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical
- corruption patterns.
+ If you have doubts about the BIOS (e.g. suspend/resume does not
+ work or there's kernel crashes after certain hardware hotplug
+ events) and it's not AMI or Phoenix, then you might want to enable
+ X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical
+ corruption patterns.
- Say Y if unsure.
+ Say Y if unsure.
config MATH_EMULATION
bool
@@ -1268,7 +1294,7 @@
def_bool y
prompt "MTRR cleanup support"
depends on MTRR
- help
+ ---help---
Convert MTRR layout from continuous to discrete, so X drivers can
add writeback entries.
@@ -1283,7 +1309,7 @@
range 0 1
default "0"
depends on MTRR_SANITIZER
- help
+ ---help---
Enable mtrr cleanup default value
config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
@@ -1291,7 +1317,7 @@
range 0 7
default "1"
depends on MTRR_SANITIZER
- help
+ ---help---
mtrr cleanup spare entries default, it can be changed via
mtrr_spare_reg_nr=N on the kernel command line.
@@ -1299,7 +1325,7 @@
bool
prompt "x86 PAT support"
depends on MTRR
- help
+ ---help---
Use PAT attributes to setup page level cache control.
PATs are the modern equivalents of MTRRs and are much more
@@ -1314,20 +1340,20 @@
bool "EFI runtime service support"
depends on ACPI
---help---
- This enables the kernel to use EFI runtime services that are
- available (such as the EFI variable services).
+ This enables the kernel to use EFI runtime services that are
+ available (such as the EFI variable services).
- This option is only useful on systems that have EFI firmware.
- In addition, you should use the latest ELILO loader available
- at <http://elilo.sourceforge.net> in order to take advantage
- of EFI runtime services. However, even with this option, the
- resultant kernel should continue to boot on existing non-EFI
- platforms.
+ This option is only useful on systems that have EFI firmware.
+ In addition, you should use the latest ELILO loader available
+ at <http://elilo.sourceforge.net> in order to take advantage
+ of EFI runtime services. However, even with this option, the
+ resultant kernel should continue to boot on existing non-EFI
+ platforms.
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
- help
+ ---help---
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
@@ -1340,13 +1366,16 @@
If unsure, say Y. Only embedded should say N here.
+config CC_STACKPROTECTOR_ALL
+ bool
+
config CC_STACKPROTECTOR
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
- depends on X86_64 && EXPERIMENTAL && BROKEN
- help
- This option turns on the -fstack-protector GCC feature. This
- feature puts, at the beginning of critical functions, a canary
- value on the stack just before the return address, and validates
+ select CC_STACKPROTECTOR_ALL
+ ---help---
+ This option turns on the -fstack-protector GCC feature. This
+ feature puts, at the beginning of functions, a canary value on
+ the stack just before the return address, and validates
the value just before actually returning. Stack based buffer
overflows (that need to overwrite this return address) now also
overwrite the canary, which gets detected and the attack is then
@@ -1354,22 +1383,14 @@
This feature requires gcc version 4.2 or above, or a distribution
gcc with the feature backported. Older versions are automatically
- detected and for those versions, this configuration option is ignored.
-
-config CC_STACKPROTECTOR_ALL
- bool "Use stack-protector for all functions"
- depends on CC_STACKPROTECTOR
- help
- Normally, GCC only inserts the canary value protection for
- functions that use large-ish on-stack buffers. By enabling
- this option, GCC will be asked to do this for ALL functions.
+ detected and for those versions, this configuration option is
+ ignored. (and a warning is printed during bootup)
source kernel/Kconfig.hz
config KEXEC
bool "kexec system call"
- depends on X86_BIOS_REBOOT
- help
+ ---help---
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
but it is independent of the system firmware. And like a reboot
@@ -1386,7 +1407,7 @@
config CRASH_DUMP
bool "kernel crash dumps"
depends on X86_64 || (X86_32 && HIGHMEM)
- help
+ ---help---
Generate crash dump after being started by kexec.
This should be normally only set in special crash dump kernels
which are loaded in the main kernel with kexec-tools into
@@ -1401,7 +1422,7 @@
bool "kexec jump (EXPERIMENTAL)"
depends on EXPERIMENTAL
depends on KEXEC && HIBERNATION && X86_32
- help
+ ---help---
Jump between original kernel and kexeced kernel and invoke
code in physical address mode via KEXEC
@@ -1410,7 +1431,7 @@
default "0x1000000" if X86_NUMAQ
default "0x200000" if X86_64
default "0x100000"
- help
+ ---help---
This gives the physical address where the kernel is loaded.
If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
@@ -1451,7 +1472,7 @@
config RELOCATABLE
bool "Build a relocatable kernel (EXPERIMENTAL)"
depends on EXPERIMENTAL
- help
+ ---help---
This builds a kernel image that retains relocation information
so it can be loaded someplace besides the default 1MB.
The relocations tend to make the kernel binary about 10% larger,
@@ -1471,7 +1492,7 @@
default "0x100000" if X86_32
default "0x200000" if X86_64
range 0x2000 0x400000
- help
+ ---help---
This value puts the alignment restrictions on physical address
where kernel is loaded and run from. Kernel is compiled for an
address which meets above alignment restriction.
@@ -1492,7 +1513,7 @@
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
- depends on SMP && HOTPLUG && !X86_VOYAGER
+ depends on SMP && HOTPLUG
---help---
Say Y here to allow turning CPUs off and on. CPUs can be
controlled through /sys/devices/system/cpu.
@@ -1504,7 +1525,7 @@
def_bool y
prompt "Compat VDSO support"
depends on X86_32 || IA32_EMULATION
- help
+ ---help---
Map the 32-bit VDSO to the predictable old-style address too.
---help---
Say N here if you are running a sufficiently recent glibc
@@ -1516,7 +1537,7 @@
config CMDLINE_BOOL
bool "Built-in kernel command line"
default n
- help
+ ---help---
Allow for specifying boot arguments to the kernel at
build time. On some systems (e.g. embedded ones), it is
necessary or convenient to provide some or all of the
@@ -1534,7 +1555,7 @@
string "Built-in kernel command string"
depends on CMDLINE_BOOL
default ""
- help
+ ---help---
Enter arguments here that should be compiled into the kernel
image and used at boot time. If the boot loader provides a
command line at boot time, it is appended to this string to
@@ -1551,7 +1572,7 @@
bool "Built-in command line overrides boot loader arguments"
default n
depends on CMDLINE_BOOL
- help
+ ---help---
Set this option to 'Y' to have the kernel ignore the boot loader
command line, and use ONLY the built-in command line.
@@ -1573,7 +1594,6 @@
depends on NUMA
menu "Power management and ACPI options"
- depends on !X86_VOYAGER
config ARCH_HIBERNATION_HEADER
def_bool y
@@ -1651,7 +1671,7 @@
config APM_IGNORE_USER_SUSPEND
bool "Ignore USER SUSPEND"
- help
+ ---help---
This option will ignore USER SUSPEND requests. On machines with a
compliant APM BIOS, you want to say N. However, on the NEC Versa M
series notebooks, it is necessary to say Y because of a BIOS bug.
@@ -1675,7 +1695,7 @@
config APM_CPU_IDLE
bool "Make CPU Idle calls when idle"
- help
+ ---help---
Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
On some machines, this can activate improved power savings, such as
a slowed CPU clock rate, when the machine is idle. These idle calls
@@ -1686,7 +1706,7 @@
config APM_DISPLAY_BLANK
bool "Enable console blanking using APM"
- help
+ ---help---
Enable console blanking using the APM. Some laptops can use this to
turn off the LCD backlight when the screen blanker of the Linux
virtual console blanks the screen. Note that this is only used by
@@ -1699,7 +1719,7 @@
config APM_ALLOW_INTS
bool "Allow interrupts during APM BIOS calls"
- help
+ ---help---
Normally we disable external interrupts while we are making calls to
the APM BIOS as a measure to lessen the effects of a badly behaving
BIOS implementation. The BIOS should reenable interrupts if it
@@ -1724,7 +1744,7 @@
bool "PCI support"
default y
select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
- help
+ ---help---
Find out whether you have a PCI motherboard. PCI is the name of a
bus system, i.e. the way the CPU talks to the other stuff inside
your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
@@ -1795,7 +1815,7 @@
config DMAR
bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL
- help
+ ---help---
DMA remapping (DMAR) devices support enables independent address
translations for Direct Memory Access (DMA) from devices.
These DMA remapping devices are reported via ACPI tables
@@ -1817,29 +1837,30 @@
def_bool y
prompt "Support for Graphics workaround"
depends on DMAR
- help
- Current Graphics drivers tend to use physical address
- for DMA and avoid using DMA APIs. Setting this config
- option permits the IOMMU driver to set a unity map for
- all the OS-visible memory. Hence the driver can continue
- to use physical addresses for DMA.
+ ---help---
+ Current Graphics drivers tend to use physical address
+ for DMA and avoid using DMA APIs. Setting this config
+ option permits the IOMMU driver to set a unity map for
+ all the OS-visible memory. Hence the driver can continue
+ to use physical addresses for DMA.
config DMAR_FLOPPY_WA
def_bool y
depends on DMAR
- help
- Floppy disk drivers are know to bypass DMA API calls
- thereby failing to work when IOMMU is enabled. This
- workaround will setup a 1:1 mapping for the first
- 16M to make floppy (an ISA device) work.
+ ---help---
+ Floppy disk drivers are know to bypass DMA API calls
+ thereby failing to work when IOMMU is enabled. This
+ workaround will setup a 1:1 mapping for the first
+ 16M to make floppy (an ISA device) work.
config INTR_REMAP
bool "Support for Interrupt Remapping (EXPERIMENTAL)"
depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
- help
- Supports Interrupt remapping for IO-APIC and MSI devices.
- To use x2apic mode in the CPU's which support x2APIC enhancements or
- to support platforms with CPU's having > 8 bit APIC ID, say Y.
+ select X86_X2APIC
+ ---help---
+ Supports Interrupt remapping for IO-APIC and MSI devices.
+ To use x2apic mode in the CPU's which support x2APIC enhancements or
+ to support platforms with CPU's having > 8 bit APIC ID, say Y.
source "drivers/pci/pcie/Kconfig"
@@ -1853,8 +1874,7 @@
config ISA
bool "ISA support"
- depends on !X86_VOYAGER
- help
+ ---help---
Find out whether you have ISA slots on your motherboard. ISA is the
name of a bus system, i.e. the way the CPU talks to the other stuff
inside your box. Other bus systems are PCI, EISA, MicroChannel
@@ -1880,9 +1900,8 @@
source "drivers/eisa/Kconfig"
config MCA
- bool "MCA support" if !X86_VOYAGER
- default y if X86_VOYAGER
- help
+ bool "MCA support"
+ ---help---
MicroChannel Architecture is found in some IBM PS/2 machines and
laptops. It is a bus system similar to PCI or ISA. See
<file:Documentation/mca.txt> (and especially the web page given
@@ -1892,8 +1911,7 @@
config SCx200
tristate "NatSemi SCx200 support"
- depends on !X86_VOYAGER
- help
+ ---help---
This provides basic support for National Semiconductor's
(now AMD's) Geode processors. The driver probes for the
PCI-IDs of several on-chip devices, so its a good dependency
@@ -1905,7 +1923,7 @@
tristate "NatSemi SCx200 27MHz High-Resolution Timer Support"
depends on SCx200 && GENERIC_TIME
default y
- help
+ ---help---
This driver provides a clocksource built upon the on-chip
27MHz high-resolution timer. Its also a workaround for
NSC Geode SC-1100's buggy TSC, which loses time when the
@@ -1916,7 +1934,7 @@
def_bool y
prompt "Geode Multi-Function General Purpose Timer (MFGPT) events"
depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS
- help
+ ---help---
This driver provides a clock event source based on the MFGPT
timer(s) in the CS5535 and CS5536 companion chip for the geode.
MFGPTs have a better resolution and max interval than the
@@ -1925,7 +1943,7 @@
config OLPC
bool "One Laptop Per Child support"
default n
- help
+ ---help---
Add support for detecting the unique features of the OLPC
XO hardware.
@@ -1950,16 +1968,16 @@
bool "IA32 Emulation"
depends on X86_64
select COMPAT_BINFMT_ELF
- help
+ ---help---
Include code to run 32-bit programs under a 64-bit kernel. You should
likely turn this on, unless you're 100% sure that you don't have any
32-bit programs left.
config IA32_AOUT
- tristate "IA32 a.out support"
- depends on IA32_EMULATION
- help
- Support old a.out binaries in the 32bit emulation.
+ tristate "IA32 a.out support"
+ depends on IA32_EMULATION
+ ---help---
+ Support old a.out binaries in the 32bit emulation.
config COMPAT
def_bool y
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index c98d52e8..a95eaf0 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -50,7 +50,7 @@
config M486
bool "486"
depends on X86_32
- help
+ ---help---
Select this for a 486 series processor, either Intel or one of the
compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX,
DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
@@ -59,7 +59,7 @@
config M586
bool "586/K5/5x86/6x86/6x86MX"
depends on X86_32
- help
+ ---help---
Select this for an 586 or 686 series processor such as the AMD K5,
the Cyrix 5x86, 6x86 and 6x86MX. This choice does not
assume the RDTSC (Read Time Stamp Counter) instruction.
@@ -67,21 +67,21 @@
config M586TSC
bool "Pentium-Classic"
depends on X86_32
- help
+ ---help---
Select this for a Pentium Classic processor with the RDTSC (Read
Time Stamp Counter) instruction for benchmarking.
config M586MMX
bool "Pentium-MMX"
depends on X86_32
- help
+ ---help---
Select this for a Pentium with the MMX graphics/multimedia
extended instructions.
config M686
bool "Pentium-Pro"
depends on X86_32
- help
+ ---help---
Select this for Intel Pentium Pro chips. This enables the use of
Pentium Pro extended instructions, and disables the init-time guard
against the f00f bug found in earlier Pentiums.
@@ -89,7 +89,7 @@
config MPENTIUMII
bool "Pentium-II/Celeron(pre-Coppermine)"
depends on X86_32
- help
+ ---help---
Select this for Intel chips based on the Pentium-II and
pre-Coppermine Celeron core. This option enables an unaligned
copy optimization, compiles the kernel with optimization flags
@@ -99,7 +99,7 @@
config MPENTIUMIII
bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
depends on X86_32
- help
+ ---help---
Select this for Intel chips based on the Pentium-III and
Celeron-Coppermine core. This option enables use of some
extended prefetch instructions in addition to the Pentium II
@@ -108,14 +108,14 @@
config MPENTIUMM
bool "Pentium M"
depends on X86_32
- help
+ ---help---
Select this for Intel Pentium M (not Pentium-4 M)
notebook chips.
config MPENTIUM4
bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
depends on X86_32
- help
+ ---help---
Select this for Intel Pentium 4 chips. This includes the
Pentium 4, Pentium D, P4-based Celeron and Xeon, and
Pentium-4 M (not Pentium M) chips. This option enables compile
@@ -151,7 +151,7 @@
config MK6
bool "K6/K6-II/K6-III"
depends on X86_32
- help
+ ---help---
Select this for an AMD K6-family processor. Enables use of
some extended instructions, and passes appropriate optimization
flags to GCC.
@@ -159,14 +159,14 @@
config MK7
bool "Athlon/Duron/K7"
depends on X86_32
- help
+ ---help---
Select this for an AMD Athlon K7-family processor. Enables use of
some extended instructions, and passes appropriate optimization
flags to GCC.
config MK8
bool "Opteron/Athlon64/Hammer/K8"
- help
+ ---help---
Select this for an AMD Opteron or Athlon64 Hammer-family processor.
Enables use of some extended instructions, and passes appropriate
optimization flags to GCC.
@@ -174,7 +174,7 @@
config MCRUSOE
bool "Crusoe"
depends on X86_32
- help
+ ---help---
Select this for a Transmeta Crusoe processor. Treats the processor
like a 586 with TSC, and sets some GCC optimization flags (like a
Pentium Pro with no alignment requirements).
@@ -182,13 +182,13 @@
config MEFFICEON
bool "Efficeon"
depends on X86_32
- help
+ ---help---
Select this for a Transmeta Efficeon processor.
config MWINCHIPC6
bool "Winchip-C6"
depends on X86_32
- help
+ ---help---
Select this for an IDT Winchip C6 chip. Linux and GCC
treat this chip as a 586TSC with some extended instructions
and alignment requirements.
@@ -196,7 +196,7 @@
config MWINCHIP3D
bool "Winchip-2/Winchip-2A/Winchip-3"
depends on X86_32
- help
+ ---help---
Select this for an IDT Winchip-2, 2A or 3. Linux and GCC
treat this chip as a 586TSC with some extended instructions
and alignment requirements. Also enable out of order memory
@@ -206,19 +206,19 @@
config MGEODEGX1
bool "GeodeGX1"
depends on X86_32
- help
+ ---help---
Select this for a Geode GX1 (Cyrix MediaGX) chip.
config MGEODE_LX
bool "Geode GX/LX"
depends on X86_32
- help
+ ---help---
Select this for AMD Geode GX and LX processors.
config MCYRIXIII
bool "CyrixIII/VIA-C3"
depends on X86_32
- help
+ ---help---
Select this for a Cyrix III or C3 chip. Presently Linux and GCC
treat this chip as a generic 586. Whilst the CPU is 686 class,
it lacks the cmov extension which gcc assumes is present when
@@ -230,7 +230,7 @@
config MVIAC3_2
bool "VIA C3-2 (Nehemiah)"
depends on X86_32
- help
+ ---help---
Select this for a VIA C3 "Nehemiah". Selecting this enables usage
of SSE and tells gcc to treat the CPU as a 686.
Note, this kernel will not boot on older (pre model 9) C3s.
@@ -238,14 +238,14 @@
config MVIAC7
bool "VIA C7"
depends on X86_32
- help
+ ---help---
Select this for a VIA C7. Selecting this uses the correct cache
shift and tells gcc to treat the CPU as a 686.
config MPSC
bool "Intel P4 / older Netburst based Xeon"
depends on X86_64
- help
+ ---help---
Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
Xeon CPUs with Intel 64bit which is compatible with x86-64.
Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
@@ -255,7 +255,7 @@
config MCORE2
bool "Core 2/newer Xeon"
- help
+ ---help---
Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
53xx) CPUs. You can distinguish newer from older Xeons by the CPU
@@ -265,7 +265,7 @@
config GENERIC_CPU
bool "Generic-x86-64"
depends on X86_64
- help
+ ---help---
Generic x86-64 CPU.
Run equally well on all x86-64 CPUs.
@@ -274,7 +274,7 @@
config X86_GENERIC
bool "Generic x86 support"
depends on X86_32
- help
+ ---help---
Instead of just including optimizations for the selected
x86 variant (e.g. PII, Crusoe or Athlon), include some more
generic optimizations as well. This will make the kernel
@@ -294,25 +294,23 @@
# Define implied options from the CPU selection here
config X86_L1_CACHE_BYTES
int
- default "128" if GENERIC_CPU || MPSC
- default "64" if MK8 || MCORE2
- depends on X86_64
+ default "128" if MPSC
+ default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32
config X86_INTERNODE_CACHE_BYTES
int
default "4096" if X86_VSMP
default X86_L1_CACHE_BYTES if !X86_VSMP
- depends on X86_64
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)
config X86_L1_CACHE_SHIFT
int
- default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC
+ default "7" if MPENTIUM4 || MPSC
default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
+ default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
config X86_XADD
def_bool y
@@ -321,7 +319,7 @@
config X86_PPRO_FENCE
bool "PentiumPro memory ordering errata workaround"
depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
- help
+ ---help---
Old PentiumPro multiprocessor systems had errata that could cause
memory operations to violate the x86 ordering standard in rare cases.
Enabling this option will attempt to work around some (but not all)
@@ -414,14 +412,14 @@
menuconfig PROCESSOR_SELECT
bool "Supported processor vendors" if EMBEDDED
- help
+ ---help---
This lets you choose what x86 vendor support code your kernel
will include.
config CPU_SUP_INTEL
default y
bool "Support Intel processors" if PROCESSOR_SELECT
- help
+ ---help---
This enables detection, tunings and quirks for Intel processors
You need this enabled if you want your kernel to run on an
@@ -435,7 +433,7 @@
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
depends on !64BIT
- help
+ ---help---
This enables detection, tunings and quirks for Cyrix processors
You need this enabled if you want your kernel to run on a
@@ -448,7 +446,7 @@
config CPU_SUP_AMD
default y
bool "Support AMD processors" if PROCESSOR_SELECT
- help
+ ---help---
This enables detection, tunings and quirks for AMD processors
You need this enabled if you want your kernel to run on an
@@ -462,7 +460,7 @@
default y
bool "Support Centaur processors" if PROCESSOR_SELECT
depends on !64BIT
- help
+ ---help---
This enables detection, tunings and quirks for Centaur processors
You need this enabled if you want your kernel to run on a
@@ -476,7 +474,7 @@
default y
bool "Support Centaur processors" if PROCESSOR_SELECT
depends on 64BIT
- help
+ ---help---
This enables detection, tunings and quirks for Centaur processors
You need this enabled if you want your kernel to run on a
@@ -490,7 +488,7 @@
default y
bool "Support Transmeta processors" if PROCESSOR_SELECT
depends on !64BIT
- help
+ ---help---
This enables detection, tunings and quirks for Transmeta processors
You need this enabled if you want your kernel to run on a
@@ -504,7 +502,7 @@
default y
bool "Support UMC processors" if PROCESSOR_SELECT
depends on !64BIT
- help
+ ---help---
This enables detection, tunings and quirks for UMC processors
You need this enabled if you want your kernel to run on a
@@ -523,7 +521,7 @@
bool "Branch Trace Store"
default y
depends on X86_DEBUGCTLMSR
- help
+ ---help---
This adds a ptrace interface to the hardware's branch trace store.
Debuggers may use it to collect an execution trace of the debugged
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index e1983fa..fdb45df 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -7,7 +7,7 @@
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
- help
+ ---help---
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
access to this is obviously disastrous, but specific access can
@@ -25,7 +25,7 @@
config X86_VERBOSE_BOOTUP
bool "Enable verbose x86 bootup info messages"
default y
- help
+ ---help---
Enables the informational output from the decompression stage
(e.g. bzImage) of the boot. If you disable this you will still
see errors. Disable this if you want silent bootup.
@@ -33,7 +33,7 @@
config EARLY_PRINTK
bool "Early printk" if EMBEDDED
default y
- help
+ ---help---
Write kernel log output directly into the VGA buffer or to a serial
port.
@@ -47,7 +47,7 @@
bool "Early printk via EHCI debug port"
default n
depends on EARLY_PRINTK && PCI
- help
+ ---help---
Write kernel log output directly into the EHCI debug port.
This is useful for kernel debugging when your machine crashes very
@@ -59,14 +59,14 @@
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
- help
+ ---help---
This option will cause messages to be printed if free stack space
drops below a certain limit.
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL
- help
+ ---help---
Enables the display of the minimum amount of free stack which each
task has ever had available in the sysrq-T and sysrq-P debug output.
@@ -75,7 +75,7 @@
config DEBUG_PAGEALLOC
bool "Debug page memory allocations"
depends on DEBUG_KERNEL
- help
+ ---help---
Unmap pages from the kernel linear mapping after free_pages().
This results in a large slowdown, but helps to find certain types
of memory corruptions.
@@ -83,9 +83,9 @@
config DEBUG_PER_CPU_MAPS
bool "Debug access to per_cpu maps"
depends on DEBUG_KERNEL
- depends on X86_SMP
+ depends on SMP
default n
- help
+ ---help---
Say Y to verify that the per_cpu map being accessed has
been setup. Adds a fair amount of code to kernel memory
and decreases performance.
@@ -96,7 +96,7 @@
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
- help
+ ---help---
Say Y here if you want to show the kernel pagetable layout in a
debugfs file. This information is only useful for kernel developers
who are working in architecture specific areas of the kernel.
@@ -108,7 +108,7 @@
bool "Write protect kernel read-only data structures"
default y
depends on DEBUG_KERNEL
- help
+ ---help---
Mark the kernel read-only data as write-protected in the pagetables,
in order to catch accidental (and incorrect) writes to such const
data. This is recommended so that we can catch kernel bugs sooner.
@@ -117,7 +117,8 @@
config DEBUG_RODATA_TEST
bool "Testcase for the DEBUG_RODATA feature"
depends on DEBUG_RODATA
- help
+ default y
+ ---help---
This option enables a testcase for the DEBUG_RODATA
feature as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
@@ -125,7 +126,7 @@
config DEBUG_NX_TEST
tristate "Testcase for the NX non-executable stack feature"
depends on DEBUG_KERNEL && m
- help
+ ---help---
This option enables a testcase for the CPU NX capability
and the software setup of this feature.
If in doubt, say "N"
@@ -133,7 +134,7 @@
config 4KSTACKS
bool "Use 4Kb for kernel stacks instead of 8Kb"
depends on X86_32
- help
+ ---help---
If you say Y here the kernel will use a 4Kb stacksize for the
kernel stack attached to each process/thread. This facilitates
running more threads on a system and also reduces the pressure
@@ -144,7 +145,7 @@
default y
bool "Enable doublefault exception handler" if EMBEDDED
depends on X86_32
- help
+ ---help---
This option allows trapping of rare doublefault exceptions that
would otherwise cause a system to silently reboot. Disabling this
option saves about 4k and might cause you much additional grey
@@ -154,7 +155,7 @@
bool "Enable IOMMU debugging"
depends on GART_IOMMU && DEBUG_KERNEL
depends on X86_64
- help
+ ---help---
Force the IOMMU to on even when you have less than 4GB of
memory and add debugging code. On overflow always panic. And
allow to enable IOMMU leak tracing. Can be disabled at boot
@@ -170,7 +171,7 @@
bool "IOMMU leak tracing"
depends on DEBUG_KERNEL
depends on IOMMU_DEBUG
- help
+ ---help---
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
@@ -203,25 +204,25 @@
config IO_DELAY_0X80
bool "port 0x80 based port-IO delay [recommended]"
- help
+ ---help---
This is the traditional Linux IO delay used for in/out_p.
It is the most tested hence safest selection here.
config IO_DELAY_0XED
bool "port 0xed based port-IO delay"
- help
+ ---help---
Use port 0xed as the IO delay. This frees up port 0x80 which is
often used as a hardware-debug port.
config IO_DELAY_UDELAY
bool "udelay based port-IO delay"
- help
+ ---help---
Use udelay(2) as the IO delay method. This provides the delay
while not having any side-effect on the IO port space.
config IO_DELAY_NONE
bool "no port-IO delay"
- help
+ ---help---
No port-IO delay. Will break on old boxes that require port-IO
delay for certain operations. Should work on most new machines.
@@ -255,18 +256,18 @@
bool "Debug boot parameters"
depends on DEBUG_KERNEL
depends on DEBUG_FS
- help
+ ---help---
This option will cause struct boot_params to be exported via debugfs.
config CPA_DEBUG
bool "CPA self-test code"
depends on DEBUG_KERNEL
- help
+ ---help---
Do change_page_attr() self-tests every 30 seconds.
config OPTIMIZE_INLINING
bool "Allow gcc to uninline functions marked 'inline'"
- help
+ ---help---
This option determines if the kernel forces gcc to inline the functions
developers have marked 'inline'. Doing so takes away freedom from gcc to
do what it thinks is best, which is desirable for the gcc 3.x series of
@@ -279,4 +280,3 @@
If unsure, say N.
endmenu
-
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d1a47ad..1836191 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -70,14 +70,17 @@
# this works around some issues with generating unwind tables in older gccs
# newer gccs do it by default
KBUILD_CFLAGS += -maccumulate-outgoing-args
+endif
- stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
- stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
- "$(CC)" -fstack-protector )
- stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
- "$(CC)" -fstack-protector-all )
-
- KBUILD_CFLAGS += $(stackp-y)
+ifdef CONFIG_CC_STACKPROTECTOR
+ cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
+ ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y)
+ stackp-y := -fstack-protector
+ stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
+ KBUILD_CFLAGS += $(stackp-y)
+ else
+ $(warning stack protector enabled but no compiler support)
+ endif
endif
# Stackpointer is addressed different for 32 bit and 64 bit x86
@@ -102,29 +105,6 @@
# prevent gcc from generating any FP code by mistake
KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
-###
-# Sub architecture support
-# fcore-y is linked before mcore-y files.
-
-# Default subarch .c files
-mcore-y := arch/x86/mach-default/
-
-# Voyager subarch support
-mflags-$(CONFIG_X86_VOYAGER) := -Iarch/x86/include/asm/mach-voyager
-mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/
-
-# generic subarchitecture
-mflags-$(CONFIG_X86_GENERICARCH):= -Iarch/x86/include/asm/mach-generic
-fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/
-mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/
-
-# default subarch .h files
-mflags-y += -Iarch/x86/include/asm/mach-default
-
-# 64 bit does not support subarch support - clear sub arch variables
-fcore-$(CONFIG_X86_64) :=
-mcore-$(CONFIG_X86_64) :=
-
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
@@ -150,9 +130,6 @@
core-y += arch/x86/kernel/
core-y += arch/x86/mm/
-# Remaining sub architecture files
-core-y += $(mcore-y)
-
core-y += arch/x86/crypto/
core-y += arch/x86/vdso/
core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index cd48c72..c70eff6 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -32,7 +32,6 @@
setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
setup-y += printf.o string.o tty.o video.o video-mode.o version.o
setup-$(CONFIG_X86_APM_BOOT) += apm.o
-setup-$(CONFIG_X86_VOYAGER) += voyager.o
# The link order of the video-*.o modules can matter. In particular,
# video-vga.o *must* be listed first, followed by video-vesa.o.
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 4063d63..7c19ce8 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -2,6 +2,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright 2007-2008 rPath, Inc. - All Rights Reserved
+ * Copyright 2009 Intel Corporation
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -15,16 +16,23 @@
#include "boot.h"
#define MAX_8042_LOOPS 100000
+#define MAX_8042_FF 32
static int empty_8042(void)
{
u8 status;
int loops = MAX_8042_LOOPS;
+ int ffs = MAX_8042_FF;
while (loops--) {
io_delay();
status = inb(0x64);
+ if (status == 0xff) {
+ /* FF is a plausible, but very unlikely status */
+ if (!--ffs)
+ return -1; /* Assume no KBC present */
+ }
if (status & 1) {
/* Read and discard input data */
io_delay();
@@ -118,44 +126,37 @@
int enable_a20(void)
{
-#if defined(CONFIG_X86_ELAN)
- /* Elan croaks if we try to touch the KBC */
- enable_a20_fast();
- while (!a20_test_long())
- ;
- return 0;
-#elif defined(CONFIG_X86_VOYAGER)
- /* On Voyager, a20_test() is unsafe? */
- enable_a20_kbc();
- return 0;
-#else
int loops = A20_ENABLE_LOOPS;
- while (loops--) {
- /* First, check to see if A20 is already enabled
- (legacy free, etc.) */
- if (a20_test_short())
- return 0;
+ int kbc_err;
- /* Next, try the BIOS (INT 0x15, AX=0x2401) */
- enable_a20_bios();
- if (a20_test_short())
- return 0;
+ while (loops--) {
+ /* First, check to see if A20 is already enabled
+ (legacy free, etc.) */
+ if (a20_test_short())
+ return 0;
+
+ /* Next, try the BIOS (INT 0x15, AX=0x2401) */
+ enable_a20_bios();
+ if (a20_test_short())
+ return 0;
+
+ /* Try enabling A20 through the keyboard controller */
+ kbc_err = empty_8042();
- /* Try enabling A20 through the keyboard controller */
- empty_8042();
- if (a20_test_short())
- return 0; /* BIOS worked, but with delayed reaction */
-
- enable_a20_kbc();
- if (a20_test_long())
- return 0;
-
- /* Finally, try enabling the "fast A20 gate" */
- enable_a20_fast();
- if (a20_test_long())
- return 0;
- }
-
- return -1;
-#endif
+ if (a20_test_short())
+ return 0; /* BIOS worked, but with delayed reaction */
+
+ if (!kbc_err) {
+ enable_a20_kbc();
+ if (a20_test_long())
+ return 0;
+ }
+
+ /* Finally, try enabling the "fast A20 gate" */
+ enable_a20_fast();
+ if (a20_test_long())
+ return 0;
+ }
+
+ return -1;
}
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index cc0ef13..7b2692e 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -302,9 +302,6 @@
/* video-vesa.c */
void vesa_store_edid(void);
-/* voyager.c */
-int query_voyager(void);
-
#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 29c5fbf..3a8a866 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -25,14 +25,12 @@
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/asm-offsets.h>
.section ".text.head","ax",@progbits
- .globl startup_32
-
-startup_32:
+ENTRY(startup_32)
cld
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments */
@@ -113,6 +111,8 @@
*/
leal relocated(%ebx), %eax
jmp *%eax
+ENDPROC(startup_32)
+
.section ".text"
relocated:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1d5dff4..ed4a829 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -26,8 +26,8 @@
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
+#include <asm/pgtable_types.h>
+#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/msr.h>
#include <asm/processor-flags.h>
@@ -35,9 +35,7 @@
.section ".text.head"
.code32
- .globl startup_32
-
-startup_32:
+ENTRY(startup_32)
cld
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments */
@@ -176,6 +174,7 @@
/* Jump from 32bit compatibility mode into 64bit mode. */
lret
+ENDPROC(startup_32)
no_longmode:
/* This isn't an x86-64 CPU so hang */
@@ -295,7 +294,6 @@
call decompress_kernel
popq %rsi
-
/*
* Jump to the decompressed kernel.
*/
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index ef50c84..11f272c 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -8,6 +8,8 @@
*
* ----------------------------------------------------------------------- */
+#include <linux/linkage.h>
+
/*
* Memory copy routines
*/
@@ -15,9 +17,7 @@
.code16gcc
.text
- .globl memcpy
- .type memcpy, @function
-memcpy:
+GLOBAL(memcpy)
pushw %si
pushw %di
movw %ax, %di
@@ -31,11 +31,9 @@
popw %di
popw %si
ret
- .size memcpy, .-memcpy
+ENDPROC(memcpy)
- .globl memset
- .type memset, @function
-memset:
+GLOBAL(memset)
pushw %di
movw %ax, %di
movzbl %dl, %eax
@@ -48,52 +46,42 @@
rep; stosb
popw %di
ret
- .size memset, .-memset
+ENDPROC(memset)
- .globl copy_from_fs
- .type copy_from_fs, @function
-copy_from_fs:
+GLOBAL(copy_from_fs)
pushw %ds
pushw %fs
popw %ds
call memcpy
popw %ds
ret
- .size copy_from_fs, .-copy_from_fs
+ENDPROC(copy_from_fs)
- .globl copy_to_fs
- .type copy_to_fs, @function
-copy_to_fs:
+GLOBAL(copy_to_fs)
pushw %es
pushw %fs
popw %es
call memcpy
popw %es
ret
- .size copy_to_fs, .-copy_to_fs
+ENDPROC(copy_to_fs)
#if 0 /* Not currently used, but can be enabled as needed */
-
- .globl copy_from_gs
- .type copy_from_gs, @function
-copy_from_gs:
+GLOBAL(copy_from_gs)
pushw %ds
pushw %gs
popw %ds
call memcpy
popw %ds
ret
- .size copy_from_gs, .-copy_from_gs
- .globl copy_to_gs
+ENDPROC(copy_from_gs)
- .type copy_to_gs, @function
-copy_to_gs:
+GLOBAL(copy_to_gs)
pushw %es
pushw %gs
popw %es
call memcpy
popw %es
ret
- .size copy_to_gs, .-copy_to_gs
-
+ENDPROC(copy_to_gs)
#endif
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b993062..7ccff48 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -19,7 +19,7 @@
#include <linux/utsrelease.h>
#include <asm/boot.h>
#include <asm/e820.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/setup.h>
#include "boot.h"
#include "offsets.h"
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 197421d..58f0415 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -149,11 +149,6 @@
/* Query MCA information */
query_mca();
- /* Voyager */
-#ifdef CONFIG_X86_VOYAGER
- query_voyager();
-#endif
-
/* Query Intel SpeedStep (IST) information */
query_ist();
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index 141b6e2..019c17a 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -15,18 +15,15 @@
#include <asm/boot.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
+#include <linux/linkage.h>
.text
-
- .globl protected_mode_jump
- .type protected_mode_jump, @function
-
.code16
/*
* void protected_mode_jump(u32 entrypoint, u32 bootparams);
*/
-protected_mode_jump:
+GLOBAL(protected_mode_jump)
movl %edx, %esi # Pointer to boot_params table
xorl %ebx, %ebx
@@ -47,12 +44,10 @@
.byte 0x66, 0xea # ljmpl opcode
2: .long in_pm32 # offset
.word __BOOT_CS # segment
-
- .size protected_mode_jump, .-protected_mode_jump
+ENDPROC(protected_mode_jump)
.code32
- .type in_pm32, @function
-in_pm32:
+GLOBAL(in_pm32)
# Set up data segments for flat 32-bit mode
movl %ecx, %ds
movl %ecx, %es
@@ -78,5 +73,4 @@
lldt %cx
jmpl *%eax # Jump to the 32-bit entrypoint
-
- .size in_pm32, .-in_pm32
+ENDPROC(in_pm32)
diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c
deleted file mode 100644
index 433909d..0000000
--- a/arch/x86/boot/voyager.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright 2007 rPath, Inc. - All Rights Reserved
- *
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * Get the Voyager config information
- */
-
-#include "boot.h"
-
-int query_voyager(void)
-{
- u8 err;
- u16 es, di;
- /* Abuse the apm_bios_info area for this */
- u8 *data_ptr = (u8 *)&boot_params.apm_bios_info;
-
- data_ptr[0] = 0xff; /* Flag on config not found(?) */
-
- asm("pushw %%es ; "
- "int $0x15 ; "
- "setc %0 ; "
- "movw %%es, %1 ; "
- "popw %%es"
- : "=q" (err), "=r" (es), "=D" (di)
- : "a" (0xffc0));
-
- if (err)
- return -1; /* Not Voyager */
-
- set_fs(es);
- copy_from_fs(data_ptr, di, 7); /* Table is 7 bytes apparently */
- return 0;
-}
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index edba00d..235b81d 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,14 +1,13 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc5
-# Wed Sep 3 17:23:09 2008
+# Linux kernel version: 2.6.29-rc4
+# Tue Feb 24 15:50:58 2009
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
# CONFIG_X86_64 is not set
CONFIG_X86=y
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
-# CONFIG_GENERIC_LOCKBREAK is not set
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
@@ -24,16 +23,14 @@
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_HWEIGHT=y
-# CONFIG_GENERIC_GPIO is not set
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
# CONFIG_GENERIC_TIME_VSYSCALL is not set
CONFIG_ARCH_HAS_CPU_RELAX=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
@@ -42,12 +39,12 @@
# CONFIG_ZONE_DMA32 is not set
CONFIG_ARCH_POPULATES_NODE_MAP=y
# CONFIG_AUDIT_ARCH is not set
-CONFIG_ARCH_SUPPORTS_AOUT=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_X86_SMP=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
CONFIG_X86_32_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
@@ -76,30 +73,44 @@
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
CONFIG_AUDIT_TREE=y
+
+#
+# RCU Subsystem
+#
+# CONFIG_CLASSIC_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
# CONFIG_IKCONFIG is not set
CONFIG_LOG_BUF_SHIFT=18
-CONFIG_CGROUPS=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_CGROUP_NS=y
-# CONFIG_CGROUP_DEVICE is not set
-CONFIG_CPUSETS=y
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_GROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
# CONFIG_RT_GROUP_SCHED is not set
# CONFIG_USER_SCHED is not set
CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+# CONFIG_CGROUP_DEBUG is not set
+CONFIG_CGROUP_NS=y
+CONFIG_CGROUP_FREEZER=y
+# CONFIG_CGROUP_DEVICE is not set
+CONFIG_CPUSETS=y
+CONFIG_PROC_PID_CPUSET=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
# CONFIG_CGROUP_MEM_RES_CTLR is not set
# CONFIG_SYSFS_DEPRECATED_V2 is not set
-CONFIG_PROC_PID_CPUSET=y
CONFIG_RELAY=y
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
CONFIG_USER_NS=y
CONFIG_PID_NS=y
+CONFIG_NET_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -124,12 +135,15 @@
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
+CONFIG_AIO=y
CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_PCI_QUIRKS=y
CONFIG_SLUB_DEBUG=y
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
CONFIG_MARKERS=y
# CONFIG_OPROFILE is not set
CONFIG_HAVE_OPROFILE=y
@@ -139,15 +153,10 @@
CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
-# CONFIG_HAVE_ARCH_TRACEHOOK is not set
-# CONFIG_HAVE_DMA_ATTRS is not set
-CONFIG_USE_GENERIC_SMP_HELPERS=y
-# CONFIG_HAVE_CLK is not set
-CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
CONFIG_HAVE_GENERIC_DMA_COHERENT=y
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
# CONFIG_MODULE_FORCE_LOAD is not set
@@ -155,12 +164,10 @@
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
# CONFIG_LBD is not set
CONFIG_BLK_DEV_IO_TRACE=y
-# CONFIG_LSF is not set
CONFIG_BLK_DEV_BSG=y
# CONFIG_BLK_DEV_INTEGRITY is not set
@@ -176,7 +183,7 @@
CONFIG_DEFAULT_CFQ=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="cfq"
-CONFIG_CLASSIC_RCU=y
+CONFIG_FREEZER=y
#
# Processor type and features
@@ -186,15 +193,14 @@
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
+CONFIG_SPARSE_IRQ=y
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
-CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
-# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_GENERICARCH is not set
# CONFIG_X86_VSMP is not set
# CONFIG_X86_RDC321X is not set
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT_GUEST is not set
# CONFIG_MEMTEST is not set
# CONFIG_M386 is not set
@@ -238,10 +244,19 @@
CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=4
CONFIG_X86_DEBUGCTLMSR=y
+CONFIG_CPU_SUP_INTEL=y
+CONFIG_CPU_SUP_CYRIX_32=y
+CONFIG_CPU_SUP_AMD=y
+CONFIG_CPU_SUP_CENTAUR_32=y
+CONFIG_CPU_SUP_TRANSMETA_32=y
+CONFIG_CPU_SUP_UMC_32=y
+CONFIG_X86_DS=y
+CONFIG_X86_PTRACE_BTS=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_DMI=y
# CONFIG_IOMMU_HELPER is not set
+# CONFIG_IOMMU_API is not set
CONFIG_NR_CPUS=64
CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
@@ -250,12 +265,17 @@
# CONFIG_PREEMPT is not set
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
-# CONFIG_X86_MCE is not set
+CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
+CONFIG_X86_MCE=y
+CONFIG_X86_MCE_NONFATAL=y
+CONFIG_X86_MCE_P4THERMAL=y
CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
CONFIG_X86_REBOOTFIXUPS=y
CONFIG_MICROCODE=y
+CONFIG_MICROCODE_INTEL=y
+CONFIG_MICROCODE_AMD=y
CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
@@ -264,6 +284,7 @@
# CONFIG_HIGHMEM64G is not set
CONFIG_PAGE_OFFSET=0xC0000000
CONFIG_HIGHMEM=y
+# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
@@ -274,14 +295,17 @@
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_SPARSEMEM_STATIC=y
-# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
CONFIG_PAGEFLAGS_EXTENDED=y
CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_RESOURCES_64BIT=y
+# CONFIG_PHYS_ADDR_T_64BIT is not set
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
CONFIG_HIGHPTE=y
+CONFIG_X86_CHECK_BIOS_CORRUPTION=y
+CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
+CONFIG_X86_RESERVE_LOW_64K=y
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
# CONFIG_MTRR_SANITIZER is not set
@@ -302,10 +326,11 @@
CONFIG_PHYSICAL_ALIGN=0x200000
CONFIG_HOTPLUG_CPU=y
# CONFIG_COMPAT_VDSO is not set
+# CONFIG_CMDLINE_BOOL is not set
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
#
-# Power management options
+# Power management and ACPI options
#
CONFIG_PM=y
CONFIG_PM_DEBUG=y
@@ -331,19 +356,13 @@
CONFIG_ACPI_BUTTON=y
CONFIG_ACPI_FAN=y
CONFIG_ACPI_DOCK=y
-# CONFIG_ACPI_BAY is not set
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_HOTPLUG_CPU=y
CONFIG_ACPI_THERMAL=y
-# CONFIG_ACPI_WMI is not set
-# CONFIG_ACPI_ASUS is not set
-# CONFIG_ACPI_TOSHIBA is not set
# CONFIG_ACPI_CUSTOM_DSDT is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_EC=y
# CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
CONFIG_X86_PM_TIMER=y
CONFIG_ACPI_CONTAINER=y
@@ -388,7 +407,6 @@
#
# shared options
#
-# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
# CONFIG_X86_SPEEDSTEP_LIB is not set
CONFIG_CPU_IDLE=y
CONFIG_CPU_IDLE_GOV_LADDER=y
@@ -415,6 +433,7 @@
CONFIG_PCI_MSI=y
# CONFIG_PCI_LEGACY is not set
# CONFIG_PCI_DEBUG is not set
+# CONFIG_PCI_STUB is not set
CONFIG_HT_IRQ=y
CONFIG_ISA_DMA_API=y
# CONFIG_ISA is not set
@@ -452,13 +471,17 @@
# Executable file formats / Emulations
#
CONFIG_BINFMT_ELF=y
+CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
+CONFIG_HAVE_AOUT=y
# CONFIG_BINFMT_AOUT is not set
CONFIG_BINFMT_MISC=y
+CONFIG_HAVE_ATOMIC_IOMAP=y
CONFIG_NET=y
#
# Networking options
#
+CONFIG_COMPAT_NET_DEV_OPS=y
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
@@ -519,7 +542,6 @@
# CONFIG_DEFAULT_RENO is not set
CONFIG_DEFAULT_TCP_CONG="cubic"
CONFIG_TCP_MD5SIG=y
-# CONFIG_IP_VS is not set
CONFIG_IPV6=y
# CONFIG_IPV6_PRIVACY is not set
# CONFIG_IPV6_ROUTER_PREF is not set
@@ -557,19 +579,21 @@
CONFIG_NF_CONNTRACK_SIP=y
CONFIG_NF_CT_NETLINK=y
CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
CONFIG_NETFILTER_XT_MATCH_MARK=y
CONFIG_NETFILTER_XT_MATCH_POLICY=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
+# CONFIG_IP_VS is not set
#
# IP: Netfilter Configuration
#
+CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_NF_CONNTRACK_PROC_COMPAT=y
CONFIG_IP_NF_IPTABLES=y
@@ -595,8 +619,8 @@
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
-CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_LOG=y
+CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
# CONFIG_IP_DCCP is not set
@@ -604,6 +628,7 @@
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
CONFIG_LLC=y
@@ -623,6 +648,7 @@
# CONFIG_NET_SCH_HTB is not set
# CONFIG_NET_SCH_HFSC is not set
# CONFIG_NET_SCH_PRIO is not set
+# CONFIG_NET_SCH_MULTIQ is not set
# CONFIG_NET_SCH_RED is not set
# CONFIG_NET_SCH_SFQ is not set
# CONFIG_NET_SCH_TEQL is not set
@@ -630,6 +656,7 @@
# CONFIG_NET_SCH_GRED is not set
# CONFIG_NET_SCH_DSMARK is not set
# CONFIG_NET_SCH_NETEM is not set
+# CONFIG_NET_SCH_DRR is not set
# CONFIG_NET_SCH_INGRESS is not set
#
@@ -644,6 +671,7 @@
# CONFIG_NET_CLS_RSVP is not set
# CONFIG_NET_CLS_RSVP6 is not set
# CONFIG_NET_CLS_FLOW is not set
+# CONFIG_NET_CLS_CGROUP is not set
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_STACK=32
# CONFIG_NET_EMATCH_CMP is not set
@@ -659,7 +687,9 @@
# CONFIG_NET_ACT_NAT is not set
# CONFIG_NET_ACT_PEDIT is not set
# CONFIG_NET_ACT_SIMP is not set
+# CONFIG_NET_ACT_SKBEDIT is not set
CONFIG_NET_SCH_FIFO=y
+# CONFIG_DCB is not set
#
# Network testing
@@ -676,29 +706,33 @@
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
+# CONFIG_PHONET is not set
CONFIG_FIB_RULES=y
-
-#
-# Wireless
-#
+CONFIG_WIRELESS=y
CONFIG_CFG80211=y
+# CONFIG_CFG80211_REG_DEBUG is not set
CONFIG_NL80211=y
+CONFIG_WIRELESS_OLD_REGULATORY=y
CONFIG_WIRELESS_EXT=y
CONFIG_WIRELESS_EXT_SYSFS=y
+# CONFIG_LIB80211 is not set
CONFIG_MAC80211=y
#
# Rate control algorithm selection
#
-CONFIG_MAC80211_RC_PID=y
-CONFIG_MAC80211_RC_DEFAULT_PID=y
-CONFIG_MAC80211_RC_DEFAULT="pid"
+CONFIG_MAC80211_RC_MINSTREL=y
+# CONFIG_MAC80211_RC_DEFAULT_PID is not set
+CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
+CONFIG_MAC80211_RC_DEFAULT="minstrel"
# CONFIG_MAC80211_MESH is not set
CONFIG_MAC80211_LEDS=y
# CONFIG_MAC80211_DEBUGFS is not set
# CONFIG_MAC80211_DEBUG_MENU is not set
-# CONFIG_IEEE80211 is not set
-# CONFIG_RFKILL is not set
+# CONFIG_WIMAX is not set
+CONFIG_RFKILL=y
+# CONFIG_RFKILL_INPUT is not set
+CONFIG_RFKILL_LEDS=y
# CONFIG_NET_9P is not set
#
@@ -722,7 +756,7 @@
# CONFIG_MTD is not set
# CONFIG_PARPORT is not set
CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
+CONFIG_PNP_DEBUG_MESSAGES=y
#
# Protocols
@@ -750,20 +784,19 @@
CONFIG_MISC_DEVICES=y
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
-# CONFIG_EEPROM_93CX6 is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
-# CONFIG_ACER_WMI is not set
-# CONFIG_ASUS_LAPTOP is not set
-# CONFIG_FUJITSU_LAPTOP is not set
-# CONFIG_TC1100_WMI is not set
-# CONFIG_MSI_LAPTOP is not set
-# CONFIG_COMPAL_LAPTOP is not set
-# CONFIG_SONY_LAPTOP is not set
-# CONFIG_THINKPAD_ACPI is not set
-# CONFIG_INTEL_MENLOW is not set
+# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_HP_ILO is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_93CX6 is not set
CONFIG_HAVE_IDE=y
# CONFIG_IDE is not set
@@ -802,7 +835,7 @@
#
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
-CONFIG_SCSI_ISCSI_ATTRS=y
+# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
@@ -875,6 +908,7 @@
CONFIG_PATA_SCH=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
+CONFIG_MD_AUTODETECT=y
# CONFIG_MD_LINEAR is not set
# CONFIG_MD_RAID0 is not set
# CONFIG_MD_RAID1 is not set
@@ -930,6 +964,9 @@
# CONFIG_BROADCOM_PHY is not set
# CONFIG_ICPLUS_PHY is not set
# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
# CONFIG_FIXED_PHY is not set
# CONFIG_MDIO_BITBANG is not set
CONFIG_NET_ETHERNET=y
@@ -953,6 +990,9 @@
# CONFIG_IBM_NEW_EMAC_RGMII is not set
# CONFIG_IBM_NEW_EMAC_TAH is not set
# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
# CONFIG_AMD8111_ETH is not set
@@ -960,7 +1000,6 @@
# CONFIG_B44 is not set
CONFIG_FORCEDETH=y
# CONFIG_FORCEDETH_NAPI is not set
-# CONFIG_EEPRO100 is not set
CONFIG_E100=y
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
@@ -974,15 +1013,16 @@
# CONFIG_R6040 is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
+# CONFIG_SMSC9420 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
# CONFIG_SC92031 is not set
+# CONFIG_ATL2 is not set
CONFIG_NETDEV_1000=y
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
CONFIG_E1000=y
-# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
CONFIG_E1000E=y
# CONFIG_IP1000 is not set
# CONFIG_IGB is not set
@@ -1000,18 +1040,23 @@
# CONFIG_QLA3XXX is not set
# CONFIG_ATL1 is not set
# CONFIG_ATL1E is not set
+# CONFIG_JME is not set
CONFIG_NETDEV_10000=y
# CONFIG_CHELSIO_T1 is not set
+CONFIG_CHELSIO_T3_DEPENDS=y
# CONFIG_CHELSIO_T3 is not set
+# CONFIG_ENIC is not set
# CONFIG_IXGBE is not set
# CONFIG_IXGB is not set
# CONFIG_S2IO is not set
# CONFIG_MYRI10GE is not set
# CONFIG_NETXEN_NIC is not set
# CONFIG_NIU is not set
+# CONFIG_MLX4_EN is not set
# CONFIG_MLX4_CORE is not set
# CONFIG_TEHUTI is not set
# CONFIG_BNX2X is not set
+# CONFIG_QLGE is not set
# CONFIG_SFC is not set
CONFIG_TR=y
# CONFIG_IBMOL is not set
@@ -1025,9 +1070,8 @@
# CONFIG_WLAN_PRE80211 is not set
CONFIG_WLAN_80211=y
# CONFIG_PCMCIA_RAYCS is not set
-# CONFIG_IPW2100 is not set
-# CONFIG_IPW2200 is not set
# CONFIG_LIBERTAS is not set
+# CONFIG_LIBERTAS_THINFIRM is not set
# CONFIG_AIRO is not set
# CONFIG_HERMES is not set
# CONFIG_ATMEL is not set
@@ -1044,6 +1088,8 @@
CONFIG_ATH5K=y
# CONFIG_ATH5K_DEBUG is not set
# CONFIG_ATH9K is not set
+# CONFIG_IPW2100 is not set
+# CONFIG_IPW2200 is not set
# CONFIG_IWLCORE is not set
# CONFIG_IWLWIFI_LEDS is not set
# CONFIG_IWLAGN is not set
@@ -1055,6 +1101,10 @@
# CONFIG_RT2X00 is not set
#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+
+#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
@@ -1062,6 +1112,7 @@
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
+# CONFIG_USB_HSO is not set
CONFIG_NET_PCMCIA=y
# CONFIG_PCMCIA_3C589 is not set
# CONFIG_PCMCIA_3C574 is not set
@@ -1123,6 +1174,7 @@
CONFIG_MOUSE_PS2_SYNAPTICS=y
CONFIG_MOUSE_PS2_LIFEBOOK=y
CONFIG_MOUSE_PS2_TRACKPOINT=y
+# CONFIG_MOUSE_PS2_ELANTECH is not set
# CONFIG_MOUSE_PS2_TOUCHKIT is not set
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_APPLETOUCH is not set
@@ -1160,15 +1212,16 @@
# CONFIG_TOUCHSCREEN_FUJITSU is not set
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
# CONFIG_TOUCHSCREEN_MTOUCH is not set
# CONFIG_TOUCHSCREEN_INEXIO is not set
# CONFIG_TOUCHSCREEN_MK712 is not set
# CONFIG_TOUCHSCREEN_PENMOUNT is not set
# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
-# CONFIG_TOUCHSCREEN_UCB1400 is not set
# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+# CONFIG_TOUCHSCREEN_TSC2007 is not set
CONFIG_INPUT_MISC=y
# CONFIG_INPUT_PCSPKR is not set
# CONFIG_INPUT_APANEL is not set
@@ -1179,6 +1232,7 @@
# CONFIG_INPUT_KEYSPAN_REMOTE is not set
# CONFIG_INPUT_POWERMATE is not set
# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INPUT_CM109 is not set
# CONFIG_INPUT_UINPUT is not set
#
@@ -1245,6 +1299,7 @@
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
# CONFIG_LEGACY_PTYS is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
@@ -1279,6 +1334,7 @@
CONFIG_I2C_BOARDINFO=y
# CONFIG_I2C_CHARDEV is not set
CONFIG_I2C_HELPER_AUTO=y
+CONFIG_I2C_ALGOBIT=y
#
# I2C Hardware Bus support
@@ -1331,8 +1387,6 @@
# Miscellaneous I2C Chip support
#
# CONFIG_DS1682 is not set
-# CONFIG_EEPROM_AT24 is not set
-# CONFIG_EEPROM_LEGACY is not set
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_PCF8575 is not set
# CONFIG_SENSORS_PCA9539 is not set
@@ -1351,8 +1405,78 @@
# CONFIG_POWER_SUPPLY_DEBUG is not set
# CONFIG_PDA_POWER is not set
# CONFIG_BATTERY_DS2760 is not set
-# CONFIG_HWMON is not set
+# CONFIG_BATTERY_BQ27x00 is not set
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_ABITUGURU is not set
+# CONFIG_SENSORS_ABITUGURU3 is not set
+# CONFIG_SENSORS_AD7414 is not set
+# CONFIG_SENSORS_AD7418 is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1029 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ADT7462 is not set
+# CONFIG_SENSORS_ADT7470 is not set
+# CONFIG_SENSORS_ADT7473 is not set
+# CONFIG_SENSORS_ADT7475 is not set
+# CONFIG_SENSORS_K8TEMP is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_I5K_AMB is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_FSCHER is not set
+# CONFIG_SENSORS_FSCPOS is not set
+# CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_CORETEMP is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_MAX6650 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_DME1737 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_THMC50 is not set
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83793 is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83L786NG is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_SENSORS_HDAPS is not set
+# CONFIG_SENSORS_LIS3LV02D is not set
+# CONFIG_SENSORS_APPLESMC is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
CONFIG_THERMAL=y
+# CONFIG_THERMAL_HWMON is not set
CONFIG_WATCHDOG=y
# CONFIG_WATCHDOG_NOWAYOUT is not set
@@ -1372,6 +1496,7 @@
# CONFIG_I6300ESB_WDT is not set
# CONFIG_ITCO_WDT is not set
# CONFIG_IT8712F_WDT is not set
+# CONFIG_IT87_WDT is not set
# CONFIG_HP_WATCHDOG is not set
# CONFIG_SC1200_WDT is not set
# CONFIG_PC87413_WDT is not set
@@ -1379,9 +1504,11 @@
# CONFIG_SBC8360_WDT is not set
# CONFIG_SBC7240_WDT is not set
# CONFIG_CPU5_WDT is not set
+# CONFIG_SMSC_SCH311X_WDT is not set
# CONFIG_SMSC37B787_WDT is not set
# CONFIG_W83627HF_WDT is not set
# CONFIG_W83697HF_WDT is not set
+# CONFIG_W83697UG_WDT is not set
# CONFIG_W83877F_WDT is not set
# CONFIG_W83977F_WDT is not set
# CONFIG_MACHZ_WDT is not set
@@ -1397,11 +1524,11 @@
# USB-based Watchdog Cards
#
# CONFIG_USBPCWATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
#
# Sonics Silicon Backplane
#
-CONFIG_SSB_POSSIBLE=y
# CONFIG_SSB is not set
#
@@ -1410,7 +1537,13 @@
# CONFIG_MFD_CORE is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TWL4030_CORE is not set
# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_REGULATOR is not set
#
# Multimedia devices
@@ -1450,6 +1583,7 @@
# CONFIG_DRM_I810 is not set
# CONFIG_DRM_I830 is not set
CONFIG_DRM_I915=y
+# CONFIG_DRM_I915_KMS is not set
# CONFIG_DRM_MGA is not set
# CONFIG_DRM_SIS is not set
# CONFIG_DRM_VIA is not set
@@ -1459,6 +1593,7 @@
CONFIG_FB=y
# CONFIG_FIRMWARE_EDID is not set
# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
@@ -1487,7 +1622,6 @@
# CONFIG_FB_UVESA is not set
# CONFIG_FB_VESA is not set
CONFIG_FB_EFI=y
-# CONFIG_FB_IMAC is not set
# CONFIG_FB_N411 is not set
# CONFIG_FB_HGA is not set
# CONFIG_FB_S1D13XXX is not set
@@ -1503,6 +1637,7 @@
# CONFIG_FB_S3 is not set
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
+# CONFIG_FB_VIA is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
@@ -1515,12 +1650,15 @@
# CONFIG_FB_CARMINE is not set
# CONFIG_FB_GEODE is not set
# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
-# CONFIG_BACKLIGHT_CORGI is not set
+CONFIG_BACKLIGHT_GENERIC=y
# CONFIG_BACKLIGHT_PROGEAR is not set
# CONFIG_BACKLIGHT_MBP_NVIDIA is not set
+# CONFIG_BACKLIGHT_SAHARA is not set
#
# Display device support
@@ -1540,10 +1678,12 @@
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_LOGO_LINUX_CLUT224=y
CONFIG_SOUND=y
+CONFIG_SOUND_OSS_CORE=y
CONFIG_SND=y
CONFIG_SND_TIMER=y
CONFIG_SND_PCM=y
CONFIG_SND_HWDEP=y
+CONFIG_SND_JACK=y
CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQ_DUMMY=y
CONFIG_SND_OSSEMUL=y
@@ -1551,6 +1691,8 @@
CONFIG_SND_PCM_OSS=y
CONFIG_SND_PCM_OSS_PLUGINS=y
CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_HRTIMER=y
+CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
CONFIG_SND_DYNAMIC_MINORS=y
CONFIG_SND_SUPPORT_OLD_API=y
CONFIG_SND_VERBOSE_PROCFS=y
@@ -1605,11 +1747,16 @@
# CONFIG_SND_FM801 is not set
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
+# CONFIG_SND_HDA_RECONFIG is not set
+# CONFIG_SND_HDA_INPUT_BEEP is not set
CONFIG_SND_HDA_CODEC_REALTEK=y
CONFIG_SND_HDA_CODEC_ANALOG=y
CONFIG_SND_HDA_CODEC_SIGMATEL=y
CONFIG_SND_HDA_CODEC_VIA=y
CONFIG_SND_HDA_CODEC_ATIHDMI=y
+CONFIG_SND_HDA_CODEC_NVHDMI=y
+CONFIG_SND_HDA_CODEC_INTELHDMI=y
+CONFIG_SND_HDA_ELD=y
CONFIG_SND_HDA_CODEC_CONEXANT=y
CONFIG_SND_HDA_CODEC_CMEDIA=y
CONFIG_SND_HDA_CODEC_SI3054=y
@@ -1643,6 +1790,7 @@
# CONFIG_SND_USB_AUDIO is not set
# CONFIG_SND_USB_USX2Y is not set
# CONFIG_SND_USB_CAIAQ is not set
+# CONFIG_SND_USB_US122L is not set
CONFIG_SND_PCMCIA=y
# CONFIG_SND_VXPOCKET is not set
# CONFIG_SND_PDAUDIOCF is not set
@@ -1657,15 +1805,37 @@
# USB Input Devices
#
CONFIG_USB_HID=y
-CONFIG_USB_HIDINPUT_POWERBOOK=y
-CONFIG_HID_FF=y
CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+
+#
+# Special HID drivers
+#
+CONFIG_HID_COMPAT=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_LOGITECH=y
CONFIG_LOGITECH_FF=y
# CONFIG_LOGIRUMBLEPAD2_FF is not set
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_PANTHERLORD=y
CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SUNPLUS=y
+# CONFIG_GREENASIA_FF is not set
+CONFIG_HID_TOPSEED=y
CONFIG_THRUSTMASTER_FF=y
CONFIG_ZEROPLUS_FF=y
-CONFIG_USB_HIDDEV=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1683,6 +1853,8 @@
CONFIG_USB_SUSPEND=y
# CONFIG_USB_OTG is not set
CONFIG_USB_MON=y
+# CONFIG_USB_WUSB is not set
+# CONFIG_USB_WUSB_CBAF is not set
#
# USB Host Controller Drivers
@@ -1691,6 +1863,7 @@
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+# CONFIG_USB_OXU210HP_HCD is not set
# CONFIG_USB_ISP116X_HCD is not set
# CONFIG_USB_ISP1760_HCD is not set
CONFIG_USB_OHCI_HCD=y
@@ -1700,6 +1873,8 @@
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
# CONFIG_USB_R8A66597_HCD is not set
+# CONFIG_USB_WHCI_HCD is not set
+# CONFIG_USB_HWA_HCD is not set
#
# USB Device Class drivers
@@ -1707,20 +1882,20 @@
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y
# CONFIG_USB_WDM is not set
+# CONFIG_USB_TMC is not set
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
#
#
-# may also be needed; see USB_STORAGE Help for more information
+# see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
# CONFIG_USB_STORAGE_DATAFAB is not set
# CONFIG_USB_STORAGE_FREECOM is not set
# CONFIG_USB_STORAGE_ISD200 is not set
-# CONFIG_USB_STORAGE_DPCM is not set
# CONFIG_USB_STORAGE_USBAT is not set
# CONFIG_USB_STORAGE_SDDR09 is not set
# CONFIG_USB_STORAGE_SDDR55 is not set
@@ -1728,7 +1903,6 @@
# CONFIG_USB_STORAGE_ALAUDA is not set
# CONFIG_USB_STORAGE_ONETOUCH is not set
# CONFIG_USB_STORAGE_KARMA is not set
-# CONFIG_USB_STORAGE_SIERRA is not set
# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
CONFIG_USB_LIBUSUAL=y
@@ -1749,6 +1923,7 @@
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
+# CONFIG_USB_SEVSEG is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
@@ -1766,7 +1941,13 @@
# CONFIG_USB_IOWARRIOR is not set
# CONFIG_USB_TEST is not set
# CONFIG_USB_ISIGHTFW is not set
+# CONFIG_USB_VST is not set
# CONFIG_USB_GADGET is not set
+
+#
+# OTG and related infrastructure
+#
+# CONFIG_UWB is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
CONFIG_NEW_LEDS=y
@@ -1775,6 +1956,7 @@
#
# LED drivers
#
+# CONFIG_LEDS_ALIX2 is not set
# CONFIG_LEDS_PCA9532 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
# CONFIG_LEDS_PCA955X is not set
@@ -1785,6 +1967,7 @@
CONFIG_LEDS_TRIGGERS=y
# CONFIG_LEDS_TRIGGER_TIMER is not set
# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
+# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
# CONFIG_ACCESSIBILITY is not set
# CONFIG_INFINIBAND is not set
@@ -1824,6 +2007,7 @@
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_S35390A is not set
# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
#
# SPI RTC drivers
@@ -1833,12 +2017,15 @@
# Platform RTC drivers
#
CONFIG_RTC_DRV_CMOS=y
+# CONFIG_RTC_DRV_DS1286 is not set
# CONFIG_RTC_DRV_DS1511 is not set
# CONFIG_RTC_DRV_DS1553 is not set
# CONFIG_RTC_DRV_DS1742 is not set
# CONFIG_RTC_DRV_STK17TA8 is not set
# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
# CONFIG_RTC_DRV_V3020 is not set
#
@@ -1851,6 +2038,22 @@
#
# CONFIG_INTEL_IOATDMA is not set
# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+CONFIG_X86_PLATFORM_DEVICES=y
+# CONFIG_ACER_WMI is not set
+# CONFIG_ASUS_LAPTOP is not set
+# CONFIG_FUJITSU_LAPTOP is not set
+# CONFIG_TC1100_WMI is not set
+# CONFIG_MSI_LAPTOP is not set
+# CONFIG_PANASONIC_LAPTOP is not set
+# CONFIG_COMPAL_LAPTOP is not set
+# CONFIG_SONY_LAPTOP is not set
+# CONFIG_THINKPAD_ACPI is not set
+# CONFIG_INTEL_MENLOW is not set
+CONFIG_EEEPC_LAPTOP=y
+# CONFIG_ACPI_WMI is not set
+# CONFIG_ACPI_ASUS is not set
+# CONFIG_ACPI_TOSHIBA is not set
#
# Firmware Drivers
@@ -1861,8 +2064,7 @@
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_DMIID=y
-CONFIG_ISCSI_IBFT_FIND=y
-CONFIG_ISCSI_IBFT=y
+# CONFIG_ISCSI_IBFT_FIND is not set
#
# File systems
@@ -1872,21 +2074,24 @@
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
-# CONFIG_EXT4DEV_FS is not set
+# CONFIG_EXT4_FS is not set
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
+CONFIG_FILE_LOCKING=y
# CONFIG_XFS_FS is not set
# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
CONFIG_DNOTIFY=y
CONFIG_INOTIFY=y
CONFIG_INOTIFY_USER=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QUOTA_TREE=y
# CONFIG_QFMT_V1 is not set
CONFIG_QFMT_V2=y
CONFIG_QUOTACTL=y
@@ -1920,16 +2125,14 @@
CONFIG_PROC_KCORE=y
CONFIG_PROC_VMCORE=y
CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
+CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_ECRYPT_FS is not set
@@ -1939,6 +2142,7 @@
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_OMFS_FS is not set
@@ -1960,6 +2164,7 @@
CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
+# CONFIG_SUNRPC_REGISTER_V4 is not set
CONFIG_RPCSEC_GSS_KRB5=y
# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
@@ -2036,7 +2241,7 @@
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_PRINTK_TIME=y
-CONFIG_ENABLE_WARN_DEPRECATED=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_WARN=2048
CONFIG_MAGIC_SYSRQ=y
@@ -2066,33 +2271,54 @@
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_VIRTUAL is not set
# CONFIG_DEBUG_WRITECOUNT is not set
CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+CONFIG_ARCH_WANT_FRAME_POINTERS=y
CONFIG_FRAME_POINTER=y
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_KPROBES_SANITY_TEST is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_HAVE_FTRACE=y
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
-# CONFIG_FTRACE is not set
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_HAVE_HW_BRANCH_TRACER=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
# CONFIG_IRQSOFF_TRACER is not set
# CONFIG_SYSPROF_TRACER is not set
# CONFIG_SCHED_TRACER is not set
# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_POWER_TRACER is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_HW_BRANCH_TRACER is not set
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_KGDB is not set
# CONFIG_STRICT_DEVMEM is not set
CONFIG_X86_VERBOSE_BOOTUP=y
CONFIG_EARLY_PRINTK=y
+CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y
# CONFIG_DEBUG_PAGEALLOC is not set
@@ -2123,8 +2349,10 @@
CONFIG_KEYS=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
+# CONFIG_SECURITYFS is not set
CONFIG_SECURITY_NETWORK=y
# CONFIG_SECURITY_NETWORK_XFRM is not set
+# CONFIG_SECURITY_PATH is not set
CONFIG_SECURITY_FILE_CAPABILITIES=y
# CONFIG_SECURITY_ROOTPLUG is not set
CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
@@ -2135,7 +2363,6 @@
CONFIG_SECURITY_SELINUX_DEVELOP=y
CONFIG_SECURITY_SELINUX_AVC_STATS=y
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
-# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set
# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
# CONFIG_SECURITY_SMACK is not set
CONFIG_CRYPTO=y
@@ -2143,11 +2370,18 @@
#
# Crypto core or helper
#
+# CONFIG_CRYPTO_FIPS is not set
CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD2=y
CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
# CONFIG_CRYPTO_GF128MUL is not set
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_CRYPTD is not set
@@ -2182,6 +2416,7 @@
# Digest
#
# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_CRC32C_INTEL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
@@ -2222,6 +2457,11 @@
#
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_HW=y
# CONFIG_CRYPTO_DEV_PADLOCK is not set
# CONFIG_CRYPTO_DEV_GEODE is not set
@@ -2239,6 +2479,7 @@
CONFIG_BITREVERSE=y
CONFIG_GENERIC_FIND_FIRST_BIT=y
CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 322dd27..9fe5d21 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,14 +1,13 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc5
-# Wed Sep 3 17:13:39 2008
+# Linux kernel version: 2.6.29-rc4
+# Tue Feb 24 15:44:16 2009
#
CONFIG_64BIT=y
# CONFIG_X86_32 is not set
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
-# CONFIG_GENERIC_LOCKBREAK is not set
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
@@ -23,17 +22,16 @@
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
-# CONFIG_GENERIC_GPIO is not set
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_GENERIC_TIME_VSYSCALL=y
CONFIG_ARCH_HAS_CPU_RELAX=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
@@ -42,12 +40,12 @@
CONFIG_ZONE_DMA32=y
CONFIG_ARCH_POPULATES_NODE_MAP=y
CONFIG_AUDIT_ARCH=y
-CONFIG_ARCH_SUPPORTS_AOUT=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_X86_SMP=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
CONFIG_X86_64_SMP=y
CONFIG_X86_HT=y
CONFIG_X86_BIOS_REBOOT=y
@@ -76,30 +74,44 @@
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
CONFIG_AUDIT_TREE=y
+
+#
+# RCU Subsystem
+#
+# CONFIG_CLASSIC_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=64
+# CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
# CONFIG_IKCONFIG is not set
CONFIG_LOG_BUF_SHIFT=18
-CONFIG_CGROUPS=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_CGROUP_NS=y
-# CONFIG_CGROUP_DEVICE is not set
-CONFIG_CPUSETS=y
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_GROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
# CONFIG_RT_GROUP_SCHED is not set
# CONFIG_USER_SCHED is not set
CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+# CONFIG_CGROUP_DEBUG is not set
+CONFIG_CGROUP_NS=y
+CONFIG_CGROUP_FREEZER=y
+# CONFIG_CGROUP_DEVICE is not set
+CONFIG_CPUSETS=y
+CONFIG_PROC_PID_CPUSET=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
# CONFIG_CGROUP_MEM_RES_CTLR is not set
# CONFIG_SYSFS_DEPRECATED_V2 is not set
-CONFIG_PROC_PID_CPUSET=y
CONFIG_RELAY=y
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
CONFIG_USER_NS=y
CONFIG_PID_NS=y
+CONFIG_NET_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -124,12 +136,15 @@
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
+CONFIG_AIO=y
CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_PCI_QUIRKS=y
CONFIG_SLUB_DEBUG=y
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
CONFIG_MARKERS=y
# CONFIG_OPROFILE is not set
CONFIG_HAVE_OPROFILE=y
@@ -139,15 +154,10 @@
CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
-# CONFIG_HAVE_ARCH_TRACEHOOK is not set
-# CONFIG_HAVE_DMA_ATTRS is not set
-CONFIG_USE_GENERIC_SMP_HELPERS=y
-# CONFIG_HAVE_CLK is not set
-CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
# CONFIG_MODULE_FORCE_LOAD is not set
@@ -155,7 +165,6 @@
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
CONFIG_BLK_DEV_IO_TRACE=y
@@ -175,7 +184,7 @@
CONFIG_DEFAULT_CFQ=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="cfq"
-CONFIG_CLASSIC_RCU=y
+CONFIG_FREEZER=y
#
# Processor type and features
@@ -185,13 +194,14 @@
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
+CONFIG_SPARSE_IRQ=y
+# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
CONFIG_X86_FIND_SMP_CONFIG=y
CONFIG_X86_MPPARSE=y
-CONFIG_X86_PC=y
# CONFIG_X86_ELAN is not set
-# CONFIG_X86_VOYAGER is not set
# CONFIG_X86_GENERICARCH is not set
# CONFIG_X86_VSMP is not set
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
# CONFIG_PARAVIRT_GUEST is not set
# CONFIG_MEMTEST is not set
# CONFIG_M386 is not set
@@ -230,6 +240,11 @@
CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=64
CONFIG_X86_DEBUGCTLMSR=y
+CONFIG_CPU_SUP_INTEL=y
+CONFIG_CPU_SUP_AMD=y
+CONFIG_CPU_SUP_CENTAUR_64=y
+CONFIG_X86_DS=y
+CONFIG_X86_PTRACE_BTS=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_DMI=y
@@ -237,8 +252,11 @@
CONFIG_CALGARY_IOMMU=y
CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
CONFIG_AMD_IOMMU=y
+CONFIG_AMD_IOMMU_STATS=y
CONFIG_SWIOTLB=y
CONFIG_IOMMU_HELPER=y
+CONFIG_IOMMU_API=y
+# CONFIG_MAXSMP is not set
CONFIG_NR_CPUS=64
CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
@@ -247,12 +265,19 @@
# CONFIG_PREEMPT is not set
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
-# CONFIG_X86_MCE is not set
+CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
+CONFIG_X86_MCE=y
+CONFIG_X86_MCE_INTEL=y
+CONFIG_X86_MCE_AMD=y
# CONFIG_I8K is not set
CONFIG_MICROCODE=y
+CONFIG_MICROCODE_INTEL=y
+CONFIG_MICROCODE_AMD=y
CONFIG_MICROCODE_OLD_INTERFACE=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
+CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
+CONFIG_DIRECT_GBPAGES=y
CONFIG_NUMA=y
CONFIG_K8_NUMA=y
CONFIG_X86_64_ACPI_NUMA=y
@@ -269,7 +294,6 @@
CONFIG_SPARSEMEM=y
CONFIG_NEED_MULTIPLE_NODES=y
CONFIG_HAVE_MEMORY_PRESENT=y
-# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_SPARSEMEM_EXTREME=y
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_VMEMMAP=y
@@ -280,10 +304,14 @@
CONFIG_PAGEFLAGS_EXTENDED=y
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_MIGRATION=y
-CONFIG_RESOURCES_64BIT=y
+CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_X86_CHECK_BIOS_CORRUPTION=y
+CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
+CONFIG_X86_RESERVE_LOW_64K=y
CONFIG_MTRR=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_X86_PAT=y
@@ -302,11 +330,12 @@
CONFIG_PHYSICAL_ALIGN=0x200000
CONFIG_HOTPLUG_CPU=y
# CONFIG_COMPAT_VDSO is not set
+# CONFIG_CMDLINE_BOOL is not set
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
#
-# Power management options
+# Power management and ACPI options
#
CONFIG_ARCH_HIBERNATION_HEADER=y
CONFIG_PM=y
@@ -333,20 +362,14 @@
CONFIG_ACPI_BUTTON=y
CONFIG_ACPI_FAN=y
CONFIG_ACPI_DOCK=y
-# CONFIG_ACPI_BAY is not set
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_HOTPLUG_CPU=y
CONFIG_ACPI_THERMAL=y
CONFIG_ACPI_NUMA=y
-# CONFIG_ACPI_WMI is not set
-# CONFIG_ACPI_ASUS is not set
-# CONFIG_ACPI_TOSHIBA is not set
# CONFIG_ACPI_CUSTOM_DSDT is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_EC=y
# CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
CONFIG_X86_PM_TIMER=y
CONFIG_ACPI_CONTAINER=y
@@ -381,13 +404,17 @@
#
# shared options
#
-# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
# CONFIG_X86_SPEEDSTEP_LIB is not set
CONFIG_CPU_IDLE=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPU_IDLE_GOV_MENU=y
#
+# Memory power savings
+#
+# CONFIG_I7300_IDLE is not set
+
+#
# Bus options (PCI etc.)
#
CONFIG_PCI=y
@@ -395,8 +422,10 @@
CONFIG_PCI_MMCONFIG=y
CONFIG_PCI_DOMAINS=y
CONFIG_DMAR=y
+# CONFIG_DMAR_DEFAULT_ON is not set
CONFIG_DMAR_GFX_WA=y
CONFIG_DMAR_FLOPPY_WA=y
+# CONFIG_INTR_REMAP is not set
CONFIG_PCIEPORTBUS=y
# CONFIG_HOTPLUG_PCI_PCIE is not set
CONFIG_PCIEAER=y
@@ -405,6 +434,7 @@
CONFIG_PCI_MSI=y
# CONFIG_PCI_LEGACY is not set
# CONFIG_PCI_DEBUG is not set
+# CONFIG_PCI_STUB is not set
CONFIG_HT_IRQ=y
CONFIG_ISA_DMA_API=y
CONFIG_K8_NB=y
@@ -438,6 +468,8 @@
#
CONFIG_BINFMT_ELF=y
CONFIG_COMPAT_BINFMT_ELF=y
+CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
+# CONFIG_HAVE_AOUT is not set
CONFIG_BINFMT_MISC=y
CONFIG_IA32_EMULATION=y
# CONFIG_IA32_AOUT is not set
@@ -449,6 +481,7 @@
#
# Networking options
#
+CONFIG_COMPAT_NET_DEV_OPS=y
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
@@ -509,7 +542,6 @@
# CONFIG_DEFAULT_RENO is not set
CONFIG_DEFAULT_TCP_CONG="cubic"
CONFIG_TCP_MD5SIG=y
-# CONFIG_IP_VS is not set
CONFIG_IPV6=y
# CONFIG_IPV6_PRIVACY is not set
# CONFIG_IPV6_ROUTER_PREF is not set
@@ -547,19 +579,21 @@
CONFIG_NF_CONNTRACK_SIP=y
CONFIG_NF_CT_NETLINK=y
CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
CONFIG_NETFILTER_XT_TARGET_NFLOG=y
CONFIG_NETFILTER_XT_TARGET_SECMARK=y
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
CONFIG_NETFILTER_XT_MATCH_MARK=y
CONFIG_NETFILTER_XT_MATCH_POLICY=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
+# CONFIG_IP_VS is not set
#
# IP: Netfilter Configuration
#
+CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_NF_CONNTRACK_PROC_COMPAT=y
CONFIG_IP_NF_IPTABLES=y
@@ -585,8 +619,8 @@
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
-CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_LOG=y
+CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
# CONFIG_IP_DCCP is not set
@@ -594,6 +628,7 @@
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
CONFIG_LLC=y
@@ -613,6 +648,7 @@
# CONFIG_NET_SCH_HTB is not set
# CONFIG_NET_SCH_HFSC is not set
# CONFIG_NET_SCH_PRIO is not set
+# CONFIG_NET_SCH_MULTIQ is not set
# CONFIG_NET_SCH_RED is not set
# CONFIG_NET_SCH_SFQ is not set
# CONFIG_NET_SCH_TEQL is not set
@@ -620,6 +656,7 @@
# CONFIG_NET_SCH_GRED is not set
# CONFIG_NET_SCH_DSMARK is not set
# CONFIG_NET_SCH_NETEM is not set
+# CONFIG_NET_SCH_DRR is not set
# CONFIG_NET_SCH_INGRESS is not set
#
@@ -634,6 +671,7 @@
# CONFIG_NET_CLS_RSVP is not set
# CONFIG_NET_CLS_RSVP6 is not set
# CONFIG_NET_CLS_FLOW is not set
+# CONFIG_NET_CLS_CGROUP is not set
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_STACK=32
# CONFIG_NET_EMATCH_CMP is not set
@@ -649,7 +687,9 @@
# CONFIG_NET_ACT_NAT is not set
# CONFIG_NET_ACT_PEDIT is not set
# CONFIG_NET_ACT_SIMP is not set
+# CONFIG_NET_ACT_SKBEDIT is not set
CONFIG_NET_SCH_FIFO=y
+# CONFIG_DCB is not set
#
# Network testing
@@ -666,29 +706,33 @@
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
+# CONFIG_PHONET is not set
CONFIG_FIB_RULES=y
-
-#
-# Wireless
-#
+CONFIG_WIRELESS=y
CONFIG_CFG80211=y
+# CONFIG_CFG80211_REG_DEBUG is not set
CONFIG_NL80211=y
+CONFIG_WIRELESS_OLD_REGULATORY=y
CONFIG_WIRELESS_EXT=y
CONFIG_WIRELESS_EXT_SYSFS=y
+# CONFIG_LIB80211 is not set
CONFIG_MAC80211=y
#
# Rate control algorithm selection
#
-CONFIG_MAC80211_RC_PID=y
-CONFIG_MAC80211_RC_DEFAULT_PID=y
-CONFIG_MAC80211_RC_DEFAULT="pid"
+CONFIG_MAC80211_RC_MINSTREL=y
+# CONFIG_MAC80211_RC_DEFAULT_PID is not set
+CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
+CONFIG_MAC80211_RC_DEFAULT="minstrel"
# CONFIG_MAC80211_MESH is not set
CONFIG_MAC80211_LEDS=y
# CONFIG_MAC80211_DEBUGFS is not set
# CONFIG_MAC80211_DEBUG_MENU is not set
-# CONFIG_IEEE80211 is not set
-# CONFIG_RFKILL is not set
+# CONFIG_WIMAX is not set
+CONFIG_RFKILL=y
+# CONFIG_RFKILL_INPUT is not set
+CONFIG_RFKILL_LEDS=y
# CONFIG_NET_9P is not set
#
@@ -712,7 +756,7 @@
# CONFIG_MTD is not set
# CONFIG_PARPORT is not set
CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
+CONFIG_PNP_DEBUG_MESSAGES=y
#
# Protocols
@@ -740,21 +784,21 @@
CONFIG_MISC_DEVICES=y
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
-# CONFIG_EEPROM_93CX6 is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
-# CONFIG_ACER_WMI is not set
-# CONFIG_ASUS_LAPTOP is not set
-# CONFIG_FUJITSU_LAPTOP is not set
-# CONFIG_MSI_LAPTOP is not set
-# CONFIG_COMPAL_LAPTOP is not set
-# CONFIG_SONY_LAPTOP is not set
-# CONFIG_THINKPAD_ACPI is not set
-# CONFIG_INTEL_MENLOW is not set
+# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_SGI_XP is not set
# CONFIG_HP_ILO is not set
# CONFIG_SGI_GRU is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_93CX6 is not set
CONFIG_HAVE_IDE=y
# CONFIG_IDE is not set
@@ -793,7 +837,7 @@
#
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
-CONFIG_SCSI_ISCSI_ATTRS=y
+# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
@@ -864,6 +908,7 @@
CONFIG_PATA_SCH=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
+CONFIG_MD_AUTODETECT=y
# CONFIG_MD_LINEAR is not set
# CONFIG_MD_RAID0 is not set
# CONFIG_MD_RAID1 is not set
@@ -919,6 +964,9 @@
# CONFIG_BROADCOM_PHY is not set
# CONFIG_ICPLUS_PHY is not set
# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
# CONFIG_FIXED_PHY is not set
# CONFIG_MDIO_BITBANG is not set
CONFIG_NET_ETHERNET=y
@@ -942,6 +990,9 @@
# CONFIG_IBM_NEW_EMAC_RGMII is not set
# CONFIG_IBM_NEW_EMAC_TAH is not set
# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
# CONFIG_AMD8111_ETH is not set
@@ -949,7 +1000,6 @@
# CONFIG_B44 is not set
CONFIG_FORCEDETH=y
# CONFIG_FORCEDETH_NAPI is not set
-# CONFIG_EEPRO100 is not set
CONFIG_E100=y
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
@@ -963,15 +1013,16 @@
# CONFIG_R6040 is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
+# CONFIG_SMSC9420 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
# CONFIG_SC92031 is not set
+# CONFIG_ATL2 is not set
CONFIG_NETDEV_1000=y
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
CONFIG_E1000=y
-# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
# CONFIG_E1000E is not set
# CONFIG_IP1000 is not set
# CONFIG_IGB is not set
@@ -989,18 +1040,23 @@
# CONFIG_QLA3XXX is not set
# CONFIG_ATL1 is not set
# CONFIG_ATL1E is not set
+# CONFIG_JME is not set
CONFIG_NETDEV_10000=y
# CONFIG_CHELSIO_T1 is not set
+CONFIG_CHELSIO_T3_DEPENDS=y
# CONFIG_CHELSIO_T3 is not set
+# CONFIG_ENIC is not set
# CONFIG_IXGBE is not set
# CONFIG_IXGB is not set
# CONFIG_S2IO is not set
# CONFIG_MYRI10GE is not set
# CONFIG_NETXEN_NIC is not set
# CONFIG_NIU is not set
+# CONFIG_MLX4_EN is not set
# CONFIG_MLX4_CORE is not set
# CONFIG_TEHUTI is not set
# CONFIG_BNX2X is not set
+# CONFIG_QLGE is not set
# CONFIG_SFC is not set
CONFIG_TR=y
# CONFIG_IBMOL is not set
@@ -1013,9 +1069,8 @@
# CONFIG_WLAN_PRE80211 is not set
CONFIG_WLAN_80211=y
# CONFIG_PCMCIA_RAYCS is not set
-# CONFIG_IPW2100 is not set
-# CONFIG_IPW2200 is not set
# CONFIG_LIBERTAS is not set
+# CONFIG_LIBERTAS_THINFIRM is not set
# CONFIG_AIRO is not set
# CONFIG_HERMES is not set
# CONFIG_ATMEL is not set
@@ -1032,6 +1087,8 @@
CONFIG_ATH5K=y
# CONFIG_ATH5K_DEBUG is not set
# CONFIG_ATH9K is not set
+# CONFIG_IPW2100 is not set
+# CONFIG_IPW2200 is not set
# CONFIG_IWLCORE is not set
# CONFIG_IWLWIFI_LEDS is not set
# CONFIG_IWLAGN is not set
@@ -1043,6 +1100,10 @@
# CONFIG_RT2X00 is not set
#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+
+#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
@@ -1050,6 +1111,7 @@
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET is not set
+# CONFIG_USB_HSO is not set
CONFIG_NET_PCMCIA=y
# CONFIG_PCMCIA_3C589 is not set
# CONFIG_PCMCIA_3C574 is not set
@@ -1059,6 +1121,7 @@
# CONFIG_PCMCIA_SMC91C92 is not set
# CONFIG_PCMCIA_XIRC2PS is not set
# CONFIG_PCMCIA_AXNET is not set
+# CONFIG_PCMCIA_IBMTR is not set
# CONFIG_WAN is not set
CONFIG_FDDI=y
# CONFIG_DEFXX is not set
@@ -1110,6 +1173,7 @@
CONFIG_MOUSE_PS2_SYNAPTICS=y
CONFIG_MOUSE_PS2_LIFEBOOK=y
CONFIG_MOUSE_PS2_TRACKPOINT=y
+# CONFIG_MOUSE_PS2_ELANTECH is not set
# CONFIG_MOUSE_PS2_TOUCHKIT is not set
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_APPLETOUCH is not set
@@ -1147,15 +1211,16 @@
# CONFIG_TOUCHSCREEN_FUJITSU is not set
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
# CONFIG_TOUCHSCREEN_MTOUCH is not set
# CONFIG_TOUCHSCREEN_INEXIO is not set
# CONFIG_TOUCHSCREEN_MK712 is not set
# CONFIG_TOUCHSCREEN_PENMOUNT is not set
# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
-# CONFIG_TOUCHSCREEN_UCB1400 is not set
# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+# CONFIG_TOUCHSCREEN_TSC2007 is not set
CONFIG_INPUT_MISC=y
# CONFIG_INPUT_PCSPKR is not set
# CONFIG_INPUT_APANEL is not set
@@ -1165,6 +1230,7 @@
# CONFIG_INPUT_KEYSPAN_REMOTE is not set
# CONFIG_INPUT_POWERMATE is not set
# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INPUT_CM109 is not set
# CONFIG_INPUT_UINPUT is not set
#
@@ -1231,6 +1297,7 @@
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
# CONFIG_LEGACY_PTYS is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
@@ -1260,6 +1327,7 @@
CONFIG_I2C_BOARDINFO=y
# CONFIG_I2C_CHARDEV is not set
CONFIG_I2C_HELPER_AUTO=y
+CONFIG_I2C_ALGOBIT=y
#
# I2C Hardware Bus support
@@ -1311,8 +1379,6 @@
# Miscellaneous I2C Chip support
#
# CONFIG_DS1682 is not set
-# CONFIG_EEPROM_AT24 is not set
-# CONFIG_EEPROM_LEGACY is not set
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_PCF8575 is not set
# CONFIG_SENSORS_PCA9539 is not set
@@ -1331,8 +1397,78 @@
# CONFIG_POWER_SUPPLY_DEBUG is not set
# CONFIG_PDA_POWER is not set
# CONFIG_BATTERY_DS2760 is not set
-# CONFIG_HWMON is not set
+# CONFIG_BATTERY_BQ27x00 is not set
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_ABITUGURU is not set
+# CONFIG_SENSORS_ABITUGURU3 is not set
+# CONFIG_SENSORS_AD7414 is not set
+# CONFIG_SENSORS_AD7418 is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1029 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ADT7462 is not set
+# CONFIG_SENSORS_ADT7470 is not set
+# CONFIG_SENSORS_ADT7473 is not set
+# CONFIG_SENSORS_ADT7475 is not set
+# CONFIG_SENSORS_K8TEMP is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_I5K_AMB is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_FSCHER is not set
+# CONFIG_SENSORS_FSCPOS is not set
+# CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_CORETEMP is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_MAX6650 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_DME1737 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_THMC50 is not set
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83793 is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83L786NG is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_SENSORS_HDAPS is not set
+# CONFIG_SENSORS_LIS3LV02D is not set
+# CONFIG_SENSORS_APPLESMC is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
CONFIG_THERMAL=y
+# CONFIG_THERMAL_HWMON is not set
CONFIG_WATCHDOG=y
# CONFIG_WATCHDOG_NOWAYOUT is not set
@@ -1352,15 +1488,18 @@
# CONFIG_I6300ESB_WDT is not set
# CONFIG_ITCO_WDT is not set
# CONFIG_IT8712F_WDT is not set
+# CONFIG_IT87_WDT is not set
# CONFIG_HP_WATCHDOG is not set
# CONFIG_SC1200_WDT is not set
# CONFIG_PC87413_WDT is not set
# CONFIG_60XX_WDT is not set
# CONFIG_SBC8360_WDT is not set
# CONFIG_CPU5_WDT is not set
+# CONFIG_SMSC_SCH311X_WDT is not set
# CONFIG_SMSC37B787_WDT is not set
# CONFIG_W83627HF_WDT is not set
# CONFIG_W83697HF_WDT is not set
+# CONFIG_W83697UG_WDT is not set
# CONFIG_W83877F_WDT is not set
# CONFIG_W83977F_WDT is not set
# CONFIG_MACHZ_WDT is not set
@@ -1376,11 +1515,11 @@
# USB-based Watchdog Cards
#
# CONFIG_USBPCWATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
#
# Sonics Silicon Backplane
#
-CONFIG_SSB_POSSIBLE=y
# CONFIG_SSB is not set
#
@@ -1389,7 +1528,13 @@
# CONFIG_MFD_CORE is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TWL4030_CORE is not set
# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_REGULATOR is not set
#
# Multimedia devices
@@ -1423,6 +1568,7 @@
# CONFIG_DRM_I810 is not set
# CONFIG_DRM_I830 is not set
CONFIG_DRM_I915=y
+CONFIG_DRM_I915_KMS=y
# CONFIG_DRM_MGA is not set
# CONFIG_DRM_SIS is not set
# CONFIG_DRM_VIA is not set
@@ -1432,6 +1578,7 @@
CONFIG_FB=y
# CONFIG_FIRMWARE_EDID is not set
# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
@@ -1460,7 +1607,6 @@
# CONFIG_FB_UVESA is not set
# CONFIG_FB_VESA is not set
CONFIG_FB_EFI=y
-# CONFIG_FB_IMAC is not set
# CONFIG_FB_N411 is not set
# CONFIG_FB_HGA is not set
# CONFIG_FB_S1D13XXX is not set
@@ -1475,6 +1621,7 @@
# CONFIG_FB_S3 is not set
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
+# CONFIG_FB_VIA is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
@@ -1486,12 +1633,15 @@
# CONFIG_FB_CARMINE is not set
# CONFIG_FB_GEODE is not set
# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
-# CONFIG_BACKLIGHT_CORGI is not set
+CONFIG_BACKLIGHT_GENERIC=y
# CONFIG_BACKLIGHT_PROGEAR is not set
# CONFIG_BACKLIGHT_MBP_NVIDIA is not set
+# CONFIG_BACKLIGHT_SAHARA is not set
#
# Display device support
@@ -1511,10 +1661,12 @@
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_LOGO_LINUX_CLUT224=y
CONFIG_SOUND=y
+CONFIG_SOUND_OSS_CORE=y
CONFIG_SND=y
CONFIG_SND_TIMER=y
CONFIG_SND_PCM=y
CONFIG_SND_HWDEP=y
+CONFIG_SND_JACK=y
CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQ_DUMMY=y
CONFIG_SND_OSSEMUL=y
@@ -1522,6 +1674,8 @@
CONFIG_SND_PCM_OSS=y
CONFIG_SND_PCM_OSS_PLUGINS=y
CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_HRTIMER=y
+CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
CONFIG_SND_DYNAMIC_MINORS=y
CONFIG_SND_SUPPORT_OLD_API=y
CONFIG_SND_VERBOSE_PROCFS=y
@@ -1575,11 +1729,16 @@
# CONFIG_SND_FM801 is not set
CONFIG_SND_HDA_INTEL=y
CONFIG_SND_HDA_HWDEP=y
+# CONFIG_SND_HDA_RECONFIG is not set
+# CONFIG_SND_HDA_INPUT_BEEP is not set
CONFIG_SND_HDA_CODEC_REALTEK=y
CONFIG_SND_HDA_CODEC_ANALOG=y
CONFIG_SND_HDA_CODEC_SIGMATEL=y
CONFIG_SND_HDA_CODEC_VIA=y
CONFIG_SND_HDA_CODEC_ATIHDMI=y
+CONFIG_SND_HDA_CODEC_NVHDMI=y
+CONFIG_SND_HDA_CODEC_INTELHDMI=y
+CONFIG_SND_HDA_ELD=y
CONFIG_SND_HDA_CODEC_CONEXANT=y
CONFIG_SND_HDA_CODEC_CMEDIA=y
CONFIG_SND_HDA_CODEC_SI3054=y
@@ -1612,6 +1771,7 @@
# CONFIG_SND_USB_AUDIO is not set
# CONFIG_SND_USB_USX2Y is not set
# CONFIG_SND_USB_CAIAQ is not set
+# CONFIG_SND_USB_US122L is not set
CONFIG_SND_PCMCIA=y
# CONFIG_SND_VXPOCKET is not set
# CONFIG_SND_PDAUDIOCF is not set
@@ -1626,15 +1786,37 @@
# USB Input Devices
#
CONFIG_USB_HID=y
-CONFIG_USB_HIDINPUT_POWERBOOK=y
-CONFIG_HID_FF=y
CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+
+#
+# Special HID drivers
+#
+CONFIG_HID_COMPAT=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_LOGITECH=y
CONFIG_LOGITECH_FF=y
# CONFIG_LOGIRUMBLEPAD2_FF is not set
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_PANTHERLORD=y
CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SUNPLUS=y
+# CONFIG_GREENASIA_FF is not set
+CONFIG_HID_TOPSEED=y
CONFIG_THRUSTMASTER_FF=y
CONFIG_ZEROPLUS_FF=y
-CONFIG_USB_HIDDEV=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1652,6 +1834,8 @@
CONFIG_USB_SUSPEND=y
# CONFIG_USB_OTG is not set
CONFIG_USB_MON=y
+# CONFIG_USB_WUSB is not set
+# CONFIG_USB_WUSB_CBAF is not set
#
# USB Host Controller Drivers
@@ -1660,6 +1844,7 @@
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+# CONFIG_USB_OXU210HP_HCD is not set
# CONFIG_USB_ISP116X_HCD is not set
# CONFIG_USB_ISP1760_HCD is not set
CONFIG_USB_OHCI_HCD=y
@@ -1669,6 +1854,8 @@
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
# CONFIG_USB_R8A66597_HCD is not set
+# CONFIG_USB_WHCI_HCD is not set
+# CONFIG_USB_HWA_HCD is not set
#
# USB Device Class drivers
@@ -1676,20 +1863,20 @@
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y
# CONFIG_USB_WDM is not set
+# CONFIG_USB_TMC is not set
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
#
#
-# may also be needed; see USB_STORAGE Help for more information
+# see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
# CONFIG_USB_STORAGE_DATAFAB is not set
# CONFIG_USB_STORAGE_FREECOM is not set
# CONFIG_USB_STORAGE_ISD200 is not set
-# CONFIG_USB_STORAGE_DPCM is not set
# CONFIG_USB_STORAGE_USBAT is not set
# CONFIG_USB_STORAGE_SDDR09 is not set
# CONFIG_USB_STORAGE_SDDR55 is not set
@@ -1697,7 +1884,6 @@
# CONFIG_USB_STORAGE_ALAUDA is not set
# CONFIG_USB_STORAGE_ONETOUCH is not set
# CONFIG_USB_STORAGE_KARMA is not set
-# CONFIG_USB_STORAGE_SIERRA is not set
# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
CONFIG_USB_LIBUSUAL=y
@@ -1718,6 +1904,7 @@
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
+# CONFIG_USB_SEVSEG is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
@@ -1735,7 +1922,13 @@
# CONFIG_USB_IOWARRIOR is not set
# CONFIG_USB_TEST is not set
# CONFIG_USB_ISIGHTFW is not set
+# CONFIG_USB_VST is not set
# CONFIG_USB_GADGET is not set
+
+#
+# OTG and related infrastructure
+#
+# CONFIG_UWB is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
CONFIG_NEW_LEDS=y
@@ -1744,6 +1937,7 @@
#
# LED drivers
#
+# CONFIG_LEDS_ALIX2 is not set
# CONFIG_LEDS_PCA9532 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
# CONFIG_LEDS_PCA955X is not set
@@ -1754,6 +1948,7 @@
CONFIG_LEDS_TRIGGERS=y
# CONFIG_LEDS_TRIGGER_TIMER is not set
# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
+# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
# CONFIG_ACCESSIBILITY is not set
# CONFIG_INFINIBAND is not set
@@ -1793,6 +1988,7 @@
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_S35390A is not set
# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
#
# SPI RTC drivers
@@ -1802,12 +1998,15 @@
# Platform RTC drivers
#
CONFIG_RTC_DRV_CMOS=y
+# CONFIG_RTC_DRV_DS1286 is not set
# CONFIG_RTC_DRV_DS1511 is not set
# CONFIG_RTC_DRV_DS1553 is not set
# CONFIG_RTC_DRV_DS1742 is not set
# CONFIG_RTC_DRV_STK17TA8 is not set
# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
# CONFIG_RTC_DRV_V3020 is not set
#
@@ -1820,6 +2019,21 @@
#
# CONFIG_INTEL_IOATDMA is not set
# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+CONFIG_X86_PLATFORM_DEVICES=y
+# CONFIG_ACER_WMI is not set
+# CONFIG_ASUS_LAPTOP is not set
+# CONFIG_FUJITSU_LAPTOP is not set
+# CONFIG_MSI_LAPTOP is not set
+# CONFIG_PANASONIC_LAPTOP is not set
+# CONFIG_COMPAL_LAPTOP is not set
+# CONFIG_SONY_LAPTOP is not set
+# CONFIG_THINKPAD_ACPI is not set
+# CONFIG_INTEL_MENLOW is not set
+CONFIG_EEEPC_LAPTOP=y
+# CONFIG_ACPI_WMI is not set
+# CONFIG_ACPI_ASUS is not set
+# CONFIG_ACPI_TOSHIBA is not set
#
# Firmware Drivers
@@ -1830,8 +2044,7 @@
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_DMIID=y
-CONFIG_ISCSI_IBFT_FIND=y
-CONFIG_ISCSI_IBFT=y
+# CONFIG_ISCSI_IBFT_FIND is not set
#
# File systems
@@ -1841,22 +2054,25 @@
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
-# CONFIG_EXT4DEV_FS is not set
+# CONFIG_EXT4_FS is not set
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
+CONFIG_FILE_LOCKING=y
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
CONFIG_DNOTIFY=y
CONFIG_INOTIFY=y
CONFIG_INOTIFY_USER=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QUOTA_TREE=y
# CONFIG_QFMT_V1 is not set
CONFIG_QFMT_V2=y
CONFIG_QUOTACTL=y
@@ -1890,16 +2106,14 @@
CONFIG_PROC_KCORE=y
CONFIG_PROC_VMCORE=y
CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
+CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_ECRYPT_FS is not set
@@ -1909,6 +2123,7 @@
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_OMFS_FS is not set
@@ -1930,6 +2145,7 @@
CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
+# CONFIG_SUNRPC_REGISTER_V4 is not set
CONFIG_RPCSEC_GSS_KRB5=y
# CONFIG_RPCSEC_GSS_SPKM3 is not set
# CONFIG_SMB_FS is not set
@@ -2006,7 +2222,7 @@
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_PRINTK_TIME=y
-CONFIG_ENABLE_WARN_DEPRECATED=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_WARN=2048
CONFIG_MAGIC_SYSRQ=y
@@ -2035,40 +2251,60 @@
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_VIRTUAL is not set
# CONFIG_DEBUG_WRITECOUNT is not set
CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+CONFIG_ARCH_WANT_FRAME_POINTERS=y
CONFIG_FRAME_POINTER=y
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_KPROBES_SANITY_TEST is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_HAVE_FTRACE=y
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
-# CONFIG_FTRACE is not set
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_HAVE_HW_BRANCH_TRACER=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
# CONFIG_IRQSOFF_TRACER is not set
# CONFIG_SYSPROF_TRACER is not set
# CONFIG_SCHED_TRACER is not set
# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_POWER_TRACER is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_HW_BRANCH_TRACER is not set
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_KGDB is not set
# CONFIG_STRICT_DEVMEM is not set
CONFIG_X86_VERBOSE_BOOTUP=y
CONFIG_EARLY_PRINTK=y
+CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACK_USAGE=y
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_PER_CPU_MAPS is not set
# CONFIG_X86_PTDUMP is not set
CONFIG_DEBUG_RODATA=y
-# CONFIG_DIRECT_GBPAGES is not set
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_NX_TEST=m
# CONFIG_IOMMU_DEBUG is not set
@@ -2092,8 +2328,10 @@
CONFIG_KEYS=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
+# CONFIG_SECURITYFS is not set
CONFIG_SECURITY_NETWORK=y
# CONFIG_SECURITY_NETWORK_XFRM is not set
+# CONFIG_SECURITY_PATH is not set
CONFIG_SECURITY_FILE_CAPABILITIES=y
# CONFIG_SECURITY_ROOTPLUG is not set
CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
@@ -2104,7 +2342,6 @@
CONFIG_SECURITY_SELINUX_DEVELOP=y
CONFIG_SECURITY_SELINUX_AVC_STATS=y
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
-# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set
# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
# CONFIG_SECURITY_SMACK is not set
CONFIG_CRYPTO=y
@@ -2112,11 +2349,18 @@
#
# Crypto core or helper
#
+# CONFIG_CRYPTO_FIPS is not set
CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD2=y
CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
# CONFIG_CRYPTO_GF128MUL is not set
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_CRYPTD is not set
@@ -2151,6 +2395,7 @@
# Digest
#
# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_CRC32C_INTEL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
@@ -2191,6 +2436,11 @@
#
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_HW=y
# CONFIG_CRYPTO_DEV_HIFN_795X is not set
CONFIG_HAVE_KVM=y
@@ -2205,6 +2455,7 @@
CONFIG_BITREVERSE=y
CONFIG_GENERIC_FIND_FIRST_BIT=y
CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 9dabd00..588a7aa 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -33,8 +33,6 @@
#include <asm/sigframe.h>
#include <asm/sys_ia32.h>
-#define DEBUG_SIG 0
-
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
@@ -46,78 +44,83 @@
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
{
- int err;
+ int err = 0;
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
return -EFAULT;
- /* If you change siginfo_t structure, please make sure that
- this code is fixed accordingly.
- It should never copy any pad contained in the structure
- to avoid security leaks, but must copy the generic
- 3 ints plus the relevant union member. */
- err = __put_user(from->si_signo, &to->si_signo);
- err |= __put_user(from->si_errno, &to->si_errno);
- err |= __put_user((short)from->si_code, &to->si_code);
+ put_user_try {
+ /* If you change siginfo_t structure, please make sure that
+ this code is fixed accordingly.
+ It should never copy any pad contained in the structure
+ to avoid security leaks, but must copy the generic
+ 3 ints plus the relevant union member. */
+ put_user_ex(from->si_signo, &to->si_signo);
+ put_user_ex(from->si_errno, &to->si_errno);
+ put_user_ex((short)from->si_code, &to->si_code);
- if (from->si_code < 0) {
- err |= __put_user(from->si_pid, &to->si_pid);
- err |= __put_user(from->si_uid, &to->si_uid);
- err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
- } else {
- /*
- * First 32bits of unions are always present:
- * si_pid === si_band === si_tid === si_addr(LS half)
- */
- err |= __put_user(from->_sifields._pad[0],
- &to->_sifields._pad[0]);
- switch (from->si_code >> 16) {
- case __SI_FAULT >> 16:
- break;
- case __SI_CHLD >> 16:
- err |= __put_user(from->si_utime, &to->si_utime);
- err |= __put_user(from->si_stime, &to->si_stime);
- err |= __put_user(from->si_status, &to->si_status);
- /* FALL THROUGH */
- default:
- case __SI_KILL >> 16:
- err |= __put_user(from->si_uid, &to->si_uid);
- break;
- case __SI_POLL >> 16:
- err |= __put_user(from->si_fd, &to->si_fd);
- break;
- case __SI_TIMER >> 16:
- err |= __put_user(from->si_overrun, &to->si_overrun);
- err |= __put_user(ptr_to_compat(from->si_ptr),
- &to->si_ptr);
- break;
- /* This is not generated by the kernel as of now. */
- case __SI_RT >> 16:
- case __SI_MESGQ >> 16:
- err |= __put_user(from->si_uid, &to->si_uid);
- err |= __put_user(from->si_int, &to->si_int);
- break;
+ if (from->si_code < 0) {
+ put_user_ex(from->si_pid, &to->si_pid);
+ put_user_ex(from->si_uid, &to->si_uid);
+ put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr);
+ } else {
+ /*
+ * First 32bits of unions are always present:
+ * si_pid === si_band === si_tid === si_addr(LS half)
+ */
+ put_user_ex(from->_sifields._pad[0],
+ &to->_sifields._pad[0]);
+ switch (from->si_code >> 16) {
+ case __SI_FAULT >> 16:
+ break;
+ case __SI_CHLD >> 16:
+ put_user_ex(from->si_utime, &to->si_utime);
+ put_user_ex(from->si_stime, &to->si_stime);
+ put_user_ex(from->si_status, &to->si_status);
+ /* FALL THROUGH */
+ default:
+ case __SI_KILL >> 16:
+ put_user_ex(from->si_uid, &to->si_uid);
+ break;
+ case __SI_POLL >> 16:
+ put_user_ex(from->si_fd, &to->si_fd);
+ break;
+ case __SI_TIMER >> 16:
+ put_user_ex(from->si_overrun, &to->si_overrun);
+ put_user_ex(ptr_to_compat(from->si_ptr),
+ &to->si_ptr);
+ break;
+ /* This is not generated by the kernel as of now. */
+ case __SI_RT >> 16:
+ case __SI_MESGQ >> 16:
+ put_user_ex(from->si_uid, &to->si_uid);
+ put_user_ex(from->si_int, &to->si_int);
+ break;
+ }
}
- }
+ } put_user_catch(err);
+
return err;
}
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{
- int err;
+ int err = 0;
u32 ptr32;
if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
return -EFAULT;
- err = __get_user(to->si_signo, &from->si_signo);
- err |= __get_user(to->si_errno, &from->si_errno);
- err |= __get_user(to->si_code, &from->si_code);
+ get_user_try {
+ get_user_ex(to->si_signo, &from->si_signo);
+ get_user_ex(to->si_errno, &from->si_errno);
+ get_user_ex(to->si_code, &from->si_code);
- err |= __get_user(to->si_pid, &from->si_pid);
- err |= __get_user(to->si_uid, &from->si_uid);
- err |= __get_user(ptr32, &from->si_ptr);
- to->si_ptr = compat_ptr(ptr32);
+ get_user_ex(to->si_pid, &from->si_pid);
+ get_user_ex(to->si_uid, &from->si_uid);
+ get_user_ex(ptr32, &from->si_ptr);
+ to->si_ptr = compat_ptr(ptr32);
+ } get_user_catch(err);
return err;
}
@@ -142,17 +145,23 @@
struct pt_regs *regs)
{
stack_t uss, uoss;
- int ret;
+ int ret, err = 0;
mm_segment_t seg;
if (uss_ptr) {
u32 ptr;
memset(&uss, 0, sizeof(stack_t));
- if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) ||
- __get_user(ptr, &uss_ptr->ss_sp) ||
- __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
- __get_user(uss.ss_size, &uss_ptr->ss_size))
+ if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)))
+ return -EFAULT;
+
+ get_user_try {
+ get_user_ex(ptr, &uss_ptr->ss_sp);
+ get_user_ex(uss.ss_flags, &uss_ptr->ss_flags);
+ get_user_ex(uss.ss_size, &uss_ptr->ss_size);
+ } get_user_catch(err);
+
+ if (err)
return -EFAULT;
uss.ss_sp = compat_ptr(ptr);
}
@@ -161,10 +170,16 @@
ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp);
set_fs(seg);
if (ret >= 0 && uoss_ptr) {
- if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) ||
- __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
- __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
- __put_user(uoss.ss_size, &uoss_ptr->ss_size))
+ if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
+ return -EFAULT;
+
+ put_user_try {
+ put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp);
+ put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags);
+ put_user_ex(uoss.ss_size, &uoss_ptr->ss_size);
+ } put_user_catch(err);
+
+ if (err)
ret = -EFAULT;
}
return ret;
@@ -173,75 +188,78 @@
/*
* Do a signal return; undo the signal stack.
*/
+#define loadsegment_gs(v) load_gs_index(v)
+#define loadsegment_fs(v) loadsegment(fs, v)
+#define loadsegment_ds(v) loadsegment(ds, v)
+#define loadsegment_es(v) loadsegment(es, v)
+
+#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; })
+#define set_user_seg(seg, v) loadsegment_##seg(v)
+
#define COPY(x) { \
- err |= __get_user(regs->x, &sc->x); \
+ get_user_ex(regs->x, &sc->x); \
}
-#define COPY_SEG_CPL3(seg) { \
- unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- regs->seg = tmp | 3; \
-}
+#define GET_SEG(seg) ({ \
+ unsigned short tmp; \
+ get_user_ex(tmp, &sc->seg); \
+ tmp; \
+})
+
+#define COPY_SEG_CPL3(seg) do { \
+ regs->seg = GET_SEG(seg) | 3; \
+} while (0)
#define RELOAD_SEG(seg) { \
- unsigned int cur, pre; \
- err |= __get_user(pre, &sc->seg); \
- savesegment(seg, cur); \
+ unsigned int pre = GET_SEG(seg); \
+ unsigned int cur = get_user_seg(seg); \
pre |= 3; \
if (pre != cur) \
- loadsegment(seg, pre); \
+ set_user_seg(seg, pre); \
}
static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_ia32 __user *sc,
unsigned int *pax)
{
- unsigned int tmpflags, gs, oldgs, err = 0;
+ unsigned int tmpflags, err = 0;
void __user *buf;
u32 tmp;
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
-#if DEBUG_SIG
- printk(KERN_DEBUG "SIG restore_sigcontext: "
- "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
- sc, sc->err, sc->ip, sc->cs, sc->flags);
-#endif
+ get_user_try {
+ /*
+ * Reload fs and gs if they have changed in the signal
+ * handler. This does not handle long fs/gs base changes in
+ * the handler, but does not clobber them at least in the
+ * normal case.
+ */
+ RELOAD_SEG(gs);
+ RELOAD_SEG(fs);
+ RELOAD_SEG(ds);
+ RELOAD_SEG(es);
- /*
- * Reload fs and gs if they have changed in the signal
- * handler. This does not handle long fs/gs base changes in
- * the handler, but does not clobber them at least in the
- * normal case.
- */
- err |= __get_user(gs, &sc->gs);
- gs |= 3;
- savesegment(gs, oldgs);
- if (gs != oldgs)
- load_gs_index(gs);
+ COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+ COPY(dx); COPY(cx); COPY(ip);
+ /* Don't touch extended registers */
- RELOAD_SEG(fs);
- RELOAD_SEG(ds);
- RELOAD_SEG(es);
+ COPY_SEG_CPL3(cs);
+ COPY_SEG_CPL3(ss);
- COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
- COPY(dx); COPY(cx); COPY(ip);
- /* Don't touch extended registers */
+ get_user_ex(tmpflags, &sc->flags);
+ regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+ /* disable syscall checks */
+ regs->orig_ax = -1;
- COPY_SEG_CPL3(cs);
- COPY_SEG_CPL3(ss);
+ get_user_ex(tmp, &sc->fpstate);
+ buf = compat_ptr(tmp);
+ err |= restore_i387_xstate_ia32(buf);
- err |= __get_user(tmpflags, &sc->flags);
- regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
- /* disable syscall checks */
- regs->orig_ax = -1;
+ get_user_ex(*pax, &sc->ax);
+ } get_user_catch(err);
- err |= __get_user(tmp, &sc->fpstate);
- buf = compat_ptr(tmp);
- err |= restore_i387_xstate_ia32(buf);
-
- err |= __get_user(*pax, &sc->ax);
return err;
}
@@ -317,38 +335,36 @@
void __user *fpstate,
struct pt_regs *regs, unsigned int mask)
{
- int tmp, err = 0;
+ int err = 0;
- savesegment(gs, tmp);
- err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
- savesegment(fs, tmp);
- err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
- savesegment(ds, tmp);
- err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
- savesegment(es, tmp);
- err |= __put_user(tmp, (unsigned int __user *)&sc->es);
+ put_user_try {
+ put_user_ex(get_user_seg(gs), (unsigned int __user *)&sc->gs);
+ put_user_ex(get_user_seg(fs), (unsigned int __user *)&sc->fs);
+ put_user_ex(get_user_seg(ds), (unsigned int __user *)&sc->ds);
+ put_user_ex(get_user_seg(es), (unsigned int __user *)&sc->es);
- err |= __put_user(regs->di, &sc->di);
- err |= __put_user(regs->si, &sc->si);
- err |= __put_user(regs->bp, &sc->bp);
- err |= __put_user(regs->sp, &sc->sp);
- err |= __put_user(regs->bx, &sc->bx);
- err |= __put_user(regs->dx, &sc->dx);
- err |= __put_user(regs->cx, &sc->cx);
- err |= __put_user(regs->ax, &sc->ax);
- err |= __put_user(current->thread.trap_no, &sc->trapno);
- err |= __put_user(current->thread.error_code, &sc->err);
- err |= __put_user(regs->ip, &sc->ip);
- err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
- err |= __put_user(regs->flags, &sc->flags);
- err |= __put_user(regs->sp, &sc->sp_at_signal);
- err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+ put_user_ex(regs->di, &sc->di);
+ put_user_ex(regs->si, &sc->si);
+ put_user_ex(regs->bp, &sc->bp);
+ put_user_ex(regs->sp, &sc->sp);
+ put_user_ex(regs->bx, &sc->bx);
+ put_user_ex(regs->dx, &sc->dx);
+ put_user_ex(regs->cx, &sc->cx);
+ put_user_ex(regs->ax, &sc->ax);
+ put_user_ex(current->thread.trap_no, &sc->trapno);
+ put_user_ex(current->thread.error_code, &sc->err);
+ put_user_ex(regs->ip, &sc->ip);
+ put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
+ put_user_ex(regs->flags, &sc->flags);
+ put_user_ex(regs->sp, &sc->sp_at_signal);
+ put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
- err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate);
+ put_user_ex(ptr_to_compat(fpstate), &sc->fpstate);
- /* non-iBCS2 extensions.. */
- err |= __put_user(mask, &sc->oldmask);
- err |= __put_user(current->thread.cr2, &sc->cr2);
+ /* non-iBCS2 extensions.. */
+ put_user_ex(mask, &sc->oldmask);
+ put_user_ex(current->thread.cr2, &sc->cr2);
+ } put_user_catch(err);
return err;
}
@@ -437,13 +453,17 @@
else
restorer = &frame->retcode;
}
- err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
- /*
- * These are actually not used anymore, but left because some
- * gdb versions depend on them as a marker.
- */
- err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
+ put_user_try {
+ put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
+
+ /*
+ * These are actually not used anymore, but left because some
+ * gdb versions depend on them as a marker.
+ */
+ put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
+ } put_user_catch(err);
+
if (err)
return -EFAULT;
@@ -462,11 +482,6 @@
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
-#if DEBUG_SIG
- printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
- current->comm, current->pid, frame, regs->ip, frame->pretcode);
-#endif
-
return 0;
}
@@ -496,41 +511,40 @@
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
- err |= __put_user(sig, &frame->sig);
- err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
- err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
- err |= copy_siginfo_to_user32(&frame->info, info);
- if (err)
- return -EFAULT;
+ put_user_try {
+ put_user_ex(sig, &frame->sig);
+ put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo);
+ put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
+ err |= copy_siginfo_to_user32(&frame->info, info);
- /* Create the ucontext. */
- if (cpu_has_xsave)
- err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- return -EFAULT;
+ /* Create the ucontext. */
+ if (cpu_has_xsave)
+ put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(0, &frame->uc.uc_link);
+ put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ put_user_ex(sas_ss_flags(regs->sp),
+ &frame->uc.uc_stack.ss_flags);
+ put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+ regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
- else
- restorer = VDSO32_SYMBOL(current->mm->context.vdso,
- rt_sigreturn);
- err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+ if (ka->sa.sa_flags & SA_RESTORER)
+ restorer = ka->sa.sa_restorer;
+ else
+ restorer = VDSO32_SYMBOL(current->mm->context.vdso,
+ rt_sigreturn);
+ put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
- /*
- * Not actually used anymore, but left because some gdb
- * versions need it.
- */
- err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
+ /*
+ * Not actually used anymore, but left because some gdb
+ * versions need it.
+ */
+ put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
+ } put_user_catch(err);
+
if (err)
return -EFAULT;
@@ -549,10 +563,5 @@
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
-#if DEBUG_SIG
- printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
- current->comm, current->pid, frame, regs->ip, frame->pretcode);
-#endif
-
return 0;
}
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 5a0d76d..e4baa06 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -112,8 +112,8 @@
CFI_DEF_CFA rsp,0
CFI_REGISTER rsp,rbp
SWAPGS_UNSAFE_STACK
- movq %gs:pda_kernelstack, %rsp
- addq $(PDA_STACKOFFSET),%rsp
+ movq PER_CPU_VAR(kernel_stack), %rsp
+ addq $(KERNEL_STACK_OFFSET),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs, here we enable it straight after entry:
@@ -273,13 +273,13 @@
ENTRY(ia32_cstar_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,PDA_STACKOFFSET
+ CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
- movq %gs:pda_kernelstack,%rsp
+ movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
@@ -825,7 +825,8 @@
.quad compat_sys_signalfd4
.quad sys_eventfd2
.quad sys_epoll_create1
- .quad sys_dup3 /* 330 */
+ .quad sys_dup3 /* 330 */
.quad sys_pipe2
.quad sys_inotify_init1
+ .quad sys_perf_counter_open
ia32_syscall_end:
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index 3c601f8..bb70e39 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -55,7 +55,7 @@
dump->regs.ds = (u16)regs->ds;
dump->regs.es = (u16)regs->es;
dump->regs.fs = (u16)regs->fs;
- savesegment(gs, dump->regs.gs);
+ dump->regs.gs = get_user_gs(regs);
dump->regs.orig_ax = regs->orig_ax;
dump->regs.ip = regs->ip;
dump->regs.cs = (u16)regs->cs;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 9830681..4518dc5 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -102,9 +102,6 @@
acpi_noirq = 1;
}
-/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
-#define FIX_ACPI_PAGES 4
-
extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index ab1d51a..a6208dc 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -1,15 +1,18 @@
#ifndef _ASM_X86_APIC_H
#define _ASM_X86_APIC_H
-#include <linux/pm.h>
+#include <linux/cpumask.h>
#include <linux/delay.h>
+#include <linux/pm.h>
#include <asm/alternative.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/apicdef.h>
+#include <asm/atomic.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/system.h>
#include <asm/msr.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -33,7 +36,13 @@
} while (0)
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
extern void generic_apic_probe(void);
+#else
+static inline void generic_apic_probe(void)
+{
+}
+#endif
#ifdef CONFIG_X86_LOCAL_APIC
@@ -41,6 +50,21 @@
extern int local_apic_timer_c2_ok;
extern int disable_apic;
+
+#ifdef CONFIG_SMP
+extern void __inquire_remote_apic(int apicid);
+#else /* CONFIG_SMP */
+static inline void __inquire_remote_apic(int apicid)
+{
+}
+#endif /* CONFIG_SMP */
+
+static inline void default_inquire_remote_apic(int apicid)
+{
+ if (apic_verbosity >= APIC_DEBUG)
+ __inquire_remote_apic(apicid);
+}
+
/*
* Basic functions accessing APICs.
*/
@@ -71,6 +95,12 @@
return *((volatile u32 *)(APIC_BASE + reg));
}
+extern void native_apic_wait_icr_idle(void);
+extern u32 native_safe_apic_wait_icr_idle(void);
+extern void native_apic_icr_write(u32 low, u32 id);
+extern u64 native_apic_icr_read(void);
+
+#ifdef CONFIG_X86_X2APIC
static inline void native_apic_msr_write(u32 reg, u32 v)
{
if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
@@ -91,8 +121,32 @@
return low;
}
-#ifndef CONFIG_X86_32
-extern int x2apic;
+static inline void native_x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return;
+}
+
+static inline u32 native_safe_x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return 0;
+}
+
+static inline void native_x2apic_icr_write(u32 low, u32 id)
+{
+ wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+static inline u64 native_x2apic_icr_read(void)
+{
+ unsigned long val;
+
+ rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+ return val;
+}
+
+extern int x2apic, x2apic_phys;
extern void check_x2apic(void);
extern void enable_x2apic(void);
extern void enable_IR_x2apic(void);
@@ -110,30 +164,24 @@
return 0;
}
#else
-#define x2apic_enabled() 0
+static inline void check_x2apic(void)
+{
+}
+static inline void enable_x2apic(void)
+{
+}
+static inline void enable_IR_x2apic(void)
+{
+}
+static inline int x2apic_enabled(void)
+{
+ return 0;
+}
#endif
-struct apic_ops {
- u32 (*read)(u32 reg);
- void (*write)(u32 reg, u32 v);
- u64 (*icr_read)(void);
- void (*icr_write)(u32 low, u32 high);
- void (*wait_icr_idle)(void);
- u32 (*safe_wait_icr_idle)(void);
-};
-
-extern struct apic_ops *apic_ops;
-
-#define apic_read (apic_ops->read)
-#define apic_write (apic_ops->write)
-#define apic_icr_read (apic_ops->icr_read)
-#define apic_icr_write (apic_ops->icr_write)
-#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
-#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
-
extern int get_physical_broadcast(void);
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_X2APIC
static inline void ack_x2APIC_irq(void)
{
/* Docs say use 0 for future compatibility */
@@ -141,18 +189,6 @@
}
#endif
-
-static inline void ack_APIC_irq(void)
-{
- /*
- * ack_APIC_irq() actually gets compiled as a single instruction
- * ... yummie.
- */
-
- /* Docs say use 0 for future compatibility */
- apic_write(APIC_EOI, 0);
-}
-
extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void connect_bsp_APIC(void);
@@ -196,4 +232,316 @@
#endif /* !CONFIG_X86_LOCAL_APIC */
+#ifdef CONFIG_X86_64
+#define SET_APIC_ID(x) (apic->set_apic_id(x))
+#else
+
+#endif
+
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch data struct.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+struct apic {
+ char *name;
+
+ int (*probe)(void);
+ int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
+ int (*apic_id_registered)(void);
+
+ u32 irq_delivery_mode;
+ u32 irq_dest_mode;
+
+ const struct cpumask *(*target_cpus)(void);
+
+ int disable_esr;
+
+ int dest_logical;
+ unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid);
+ unsigned long (*check_apicid_present)(int apicid);
+
+ void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+ void (*init_apic_ldr)(void);
+
+ physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
+
+ void (*setup_apic_routing)(void);
+ int (*multi_timer_check)(int apic, int irq);
+ int (*apicid_to_node)(int logical_apicid);
+ int (*cpu_to_logical_apicid)(int cpu);
+ int (*cpu_present_to_apicid)(int mps_cpu);
+ physid_mask_t (*apicid_to_cpu_present)(int phys_apicid);
+ void (*setup_portio_remap)(void);
+ int (*check_phys_apicid_present)(int boot_cpu_physical_apicid);
+ void (*enable_apic_mode)(void);
+ int (*phys_pkg_id)(int cpuid_apic, int index_msb);
+
+ /*
+ * When one of the next two hooks returns 1 the apic
+ * is switched to this. Essentially they are additional
+ * probe functions:
+ */
+ int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid);
+
+ unsigned int (*get_apic_id)(unsigned long x);
+ unsigned long (*set_apic_id)(unsigned int id);
+ unsigned long apic_id_mask;
+
+ unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
+ unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+ const struct cpumask *andmask);
+
+ /* ipi */
+ void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+ void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+ int vector);
+ void (*send_IPI_allbutself)(int vector);
+ void (*send_IPI_all)(int vector);
+ void (*send_IPI_self)(int vector);
+
+ /* wakeup_secondary_cpu */
+ int (*wakeup_cpu)(int apicid, unsigned long start_eip);
+
+ int trampoline_phys_low;
+ int trampoline_phys_high;
+
+ void (*wait_for_init_deassert)(atomic_t *deassert);
+ void (*smp_callin_clear_local_apic)(void);
+ void (*inquire_remote_apic)(int apicid);
+
+ /* apic ops */
+ u32 (*read)(u32 reg);
+ void (*write)(u32 reg, u32 v);
+ u64 (*icr_read)(void);
+ void (*icr_write)(u32 low, u32 high);
+ void (*wait_icr_idle)(void);
+ u32 (*safe_wait_icr_idle)(void);
+};
+
+extern struct apic *apic;
+
+static inline u32 apic_read(u32 reg)
+{
+ return apic->read(reg);
+}
+
+static inline void apic_write(u32 reg, u32 val)
+{
+ apic->write(reg, val);
+}
+
+static inline u64 apic_icr_read(void)
+{
+ return apic->icr_read();
+}
+
+static inline void apic_icr_write(u32 low, u32 high)
+{
+ apic->icr_write(low, high);
+}
+
+static inline void apic_wait_icr_idle(void)
+{
+ apic->wait_icr_idle();
+}
+
+static inline u32 safe_apic_wait_icr_idle(void)
+{
+ return apic->safe_wait_icr_idle();
+}
+
+
+static inline void ack_APIC_irq(void)
+{
+ /*
+ * ack_APIC_irq() actually gets compiled as a single instruction
+ * ... yummie.
+ */
+
+ /* Docs say use 0 for future compatibility */
+ apic_write(APIC_EOI, 0);
+}
+
+static inline unsigned default_get_apic_id(unsigned long x)
+{
+ unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
+
+ if (APIC_XAPIC(ver))
+ return (x >> 24) & 0xFF;
+ else
+ return (x >> 24) & 0x0F;
+}
+
+/*
+ * Warm reset vector default position:
+ */
+#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467
+#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469
+
+#ifdef CONFIG_X86_32
+extern void es7000_update_apic_to_cluster(void);
+#else
+extern struct apic apic_flat;
+extern struct apic apic_physflat;
+extern struct apic apic_x2apic_cluster;
+extern struct apic apic_x2apic_phys;
+extern int default_acpi_madt_oem_check(char *, char *);
+
+extern void apic_send_IPI_self(int vector);
+
+extern struct apic apic_x2apic_uv_x;
+DECLARE_PER_CPU(int, x2apic_extra_bits);
+
+extern int default_cpu_present_to_apicid(int mps_cpu);
+extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid);
+#endif
+
+static inline void default_wait_for_init_deassert(atomic_t *deassert)
+{
+ while (!atomic_read(deassert))
+ cpu_relax();
+ return;
+}
+
+extern void generic_bigsmp_probe(void);
+
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#include <asm/smp.h>
+
+#define APIC_DFR_VALUE (APIC_DFR_FLAT)
+
+static inline const struct cpumask *default_target_cpus(void)
+{
+#ifdef CONFIG_SMP
+ return cpu_online_mask;
+#else
+ return cpumask_of(0);
+#endif
+}
+
+DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+
+
+static inline unsigned int read_apic_id(void)
+{
+ unsigned int reg;
+
+ reg = apic_read(APIC_ID);
+
+ return apic->get_apic_id(reg);
+}
+
+extern void default_setup_apic_routing(void);
+
+#ifdef CONFIG_X86_32
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+extern void default_init_apic_ldr(void);
+
+static inline int default_apic_id_registered(void)
+{
+ return physid_isset(read_apic_id(), phys_cpu_present_map);
+}
+
+static inline unsigned int
+default_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ return cpumask_bits(cpumask)[0];
+}
+
+static inline unsigned int
+default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ unsigned long mask1 = cpumask_bits(cpumask)[0];
+ unsigned long mask2 = cpumask_bits(andmask)[0];
+ unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
+
+ return (unsigned int)(mask1 & mask2 & mask3);
+}
+
+static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+extern int default_apicid_to_node(int logical_apicid);
+
+#endif
+
+static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+ return physid_isset(apicid, bitmap);
+}
+
+static inline unsigned long default_check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map)
+{
+ return phys_map;
+}
+
+/* Mapping from cpu number to logical apicid */
+static inline int default_cpu_to_logical_apicid(int cpu)
+{
+ return 1 << cpu;
+}
+
+static inline int __default_cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
+ return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+ else
+ return BAD_APICID;
+}
+
+static inline int
+__default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+{
+ return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map);
+}
+
+#ifdef CONFIG_X86_32
+static inline int default_cpu_present_to_apicid(int mps_cpu)
+{
+ return __default_cpu_present_to_apicid(mps_cpu);
+}
+
+static inline int
+default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+{
+ return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
+}
+#else
+extern int default_cpu_present_to_apicid(int mps_cpu);
+extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid);
+#endif
+
+static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid)
+{
+ return physid_mask_of_physid(phys_apicid);
+}
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_32
+extern u8 cpu_2_logical_apicid[NR_CPUS];
+#endif
+
#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h
new file mode 100644
index 0000000..82f613c
--- /dev/null
+++ b/arch/x86/include/asm/apicnum.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_X86_APICNUM_H
+#define _ASM_X86_APICNUM_H
+
+/* define MAX_IO_APICS */
+#ifdef CONFIG_X86_32
+# define MAX_IO_APICS 64
+#else
+# define MAX_IO_APICS 128
+# define MAX_LOCAL_APIC 32768
+#endif
+
+#endif /* _ASM_X86_APICNUM_H */
diff --git a/arch/x86/include/asm/mach-default/apm.h b/arch/x86/include/asm/apm.h
similarity index 100%
rename from arch/x86/include/asm/mach-default/apm.h
rename to arch/x86/include/asm/apm.h
diff --git a/arch/x86/include/asm/arch_hooks.h b/arch/x86/include/asm/arch_hooks.h
deleted file mode 100644
index cbd4957..0000000
--- a/arch/x86/include/asm/arch_hooks.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef _ASM_X86_ARCH_HOOKS_H
-#define _ASM_X86_ARCH_HOOKS_H
-
-#include <linux/interrupt.h>
-
-/*
- * linux/include/asm/arch_hooks.h
- *
- * define the architecture specific hooks
- */
-
-/* these aren't arch hooks, they are generic routines
- * that can be used by the hooks */
-extern void init_ISA_irqs(void);
-extern irqreturn_t timer_interrupt(int irq, void *dev_id);
-
-/* these are the defined hooks */
-extern void intr_init_hook(void);
-extern void pre_intr_init_hook(void);
-extern void pre_setup_arch_hook(void);
-extern void trap_init_hook(void);
-extern void pre_time_init_hook(void);
-extern void time_init_hook(void);
-extern void mca_nmi_hook(void);
-
-#endif /* _ASM_X86_ARCH_HOOKS_H */
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 85b46fb..977250e 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -247,5 +247,223 @@
#define smp_mb__before_atomic_inc() barrier()
#define smp_mb__after_atomic_inc() barrier()
+/* An 64bit atomic type */
+
+typedef struct {
+ unsigned long long counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(val) { (val) }
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically reads the value of @v.
+ * Doesn't imply a read memory barrier.
+ */
+#define __atomic64_read(ptr) ((ptr)->counter)
+
+static inline unsigned long long
+cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
+{
+ asm volatile(
+
+ LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
+
+ : "=A" (old)
+
+ : [ptr] "D" (ptr),
+ "A" (old),
+ "b" (ll_low(new)),
+ "c" (ll_high(new))
+
+ : "memory");
+
+ return old;
+}
+
+static inline unsigned long long
+atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
+ unsigned long long new_val)
+{
+ return cmpxchg8b(&ptr->counter, old_val, new_val);
+}
+
+/**
+ * atomic64_set - set atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ * @new_val: value to assign
+ *
+ * Atomically sets the value of @ptr to @new_val.
+ */
+static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
+{
+ unsigned long long old_val;
+
+ do {
+ old_val = atomic_read(ptr);
+ } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+}
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically reads the value of @ptr and returns it.
+ */
+static inline unsigned long long atomic64_read(atomic64_t *ptr)
+{
+ unsigned long long curr_val;
+
+ do {
+ curr_val = __atomic64_read(ptr);
+ } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
+
+ return curr_val;
+}
+
+/**
+ * atomic64_add_return - add and return
+ * @delta: integer value to add
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns @delta + *@ptr
+ */
+static inline unsigned long long
+atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
+{
+ unsigned long long old_val, new_val;
+
+ do {
+ old_val = atomic_read(ptr);
+ new_val = old_val + delta;
+
+ } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+
+ return new_val;
+}
+
+static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
+{
+ return atomic64_add_return(-delta, ptr);
+}
+
+static inline long atomic64_inc_return(atomic64_t *ptr)
+{
+ return atomic64_add_return(1, ptr);
+}
+
+static inline long atomic64_dec_return(atomic64_t *ptr)
+{
+ return atomic64_sub_return(1, ptr);
+}
+
+/**
+ * atomic64_add - add integer to atomic64 variable
+ * @delta: integer value to add
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr.
+ */
+static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
+{
+ atomic64_add_return(delta, ptr);
+}
+
+/**
+ * atomic64_sub - subtract the atomic64 variable
+ * @delta: integer value to subtract
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr.
+ */
+static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
+{
+ atomic64_add(-delta, ptr);
+}
+
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @delta: integer value to subtract
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int
+atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
+{
+ unsigned long long old_val = atomic64_sub_return(delta, ptr);
+
+ return old_val == 0;
+}
+
+/**
+ * atomic64_inc - increment atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1.
+ */
+static inline void atomic64_inc(atomic64_t *ptr)
+{
+ atomic64_add(1, ptr);
+}
+
+/**
+ * atomic64_dec - decrement atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1.
+ */
+static inline void atomic64_dec(atomic64_t *ptr)
+{
+ atomic64_sub(1, ptr);
+}
+
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic64_dec_and_test(atomic64_t *ptr)
+{
+ return atomic64_sub_and_test(1, ptr);
+}
+
+/**
+ * atomic64_inc_and_test - increment and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int atomic64_inc_and_test(atomic64_t *ptr)
+{
+ return atomic64_sub_and_test(-1, ptr);
+}
+
+/**
+ * atomic64_add_negative - add and test if negative
+ * @delta: integer value to add
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline int
+atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
+{
+ long long old_val = atomic64_add_return(delta, ptr);
+
+ return old_val < 0;
+}
+
#include <asm-generic/atomic.h>
#endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
deleted file mode 100644
index d8dd9f5..0000000
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
-
-#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu))
-#define esr_disable (1)
-
-static inline int apic_id_registered(void)
-{
- return (1);
-}
-
-static inline const cpumask_t *target_cpus(void)
-{
-#ifdef CONFIG_SMP
- return &cpu_online_map;
-#else
- return &cpumask_of_cpu(0);
-#endif
-}
-
-#undef APIC_DEST_LOGICAL
-#define APIC_DEST_LOGICAL 0
-#define APIC_DFR_VALUE (APIC_DFR_FLAT)
-#define INT_DELIVERY_MODE (dest_Fixed)
-#define INT_DEST_MODE (0) /* phys delivery to target proc */
-#define NO_BALANCE_IRQ (0)
-
-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
-{
- return (0);
-}
-
-static inline unsigned long check_apicid_present(int bit)
-{
- return (1);
-}
-
-static inline unsigned long calculate_ldr(int cpu)
-{
- unsigned long val, id;
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- id = xapic_phys_to_log_apicid(cpu);
- val |= SET_APIC_LOGICAL_ID(id);
- return val;
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static inline void init_apic_ldr(void)
-{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_VALUE);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
-}
-
-static inline void setup_apic_routing(void)
-{
- printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
- "Physflat", nr_ioapics);
-}
-
-static inline int multi_timer_check(int apic, int irq)
-{
- return (0);
-}
-
-static inline int apicid_to_node(int logical_apicid)
-{
- return apicid_2_node[hard_smp_processor_id()];
-}
-
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < nr_cpu_ids)
- return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
-
- return BAD_APICID;
-}
-
-static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
-{
- return physid_mask_of_physid(phys_apicid);
-}
-
-extern u8 cpu_2_logical_apicid[];
-/* Mapping from cpu number to logical apicid */
-static inline int cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_physical_id(cpu);
-}
-
-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
-{
- /* For clustered we don't have a good way to do this yet - hack */
- return physids_promote(0xFFL);
-}
-
-static inline void setup_portio_remap(void)
-{
-}
-
-static inline void enable_apic_mode(void)
-{
-}
-
-static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
-{
- return (1);
-}
-
-/* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
-{
- int cpu;
- int apicid;
-
- cpu = first_cpu(*cpumask);
- apicid = cpu_to_logical_apicid(cpu);
- return apicid;
-}
-
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask)
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- if (cpu < nr_cpu_ids)
- return cpu_to_logical_apicid(cpu);
-
- return BAD_APICID;
-}
-
-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-#endif /* __ASM_MACH_APIC_H */
diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h
deleted file mode 100644
index 392c3f5..0000000
--- a/arch/x86/include/asm/bigsmp/apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
-
-#define APIC_ID_MASK (0xFF<<24)
-
-static inline unsigned get_apic_id(unsigned long x)
-{
- return (((x)>>24)&0xFF);
-}
-
-#define GET_APIC_ID(x) get_apic_id(x)
-
-#endif
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
deleted file mode 100644
index 27fcd01b..0000000
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
-
-void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-
-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
-{
- send_IPI_mask_sequence(mask, vector);
-}
-
-static inline void send_IPI_allbutself(int vector)
-{
- send_IPI_mask_allbutself(cpu_online_mask, vector);
-}
-
-static inline void send_IPI_all(int vector)
-{
- send_IPI_mask(cpu_online_mask, vector);
-}
-
-#endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 2bc162e..0e63c9a 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -1,5 +1,55 @@
/*
- * Some macros to handle stack frames in assembly.
+
+ x86 function call convention, 64-bit:
+ -------------------------------------
+ arguments | callee-saved | extra caller-saved | return
+ [callee-clobbered] | | [callee-clobbered] |
+ ---------------------------------------------------------------------------
+ rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**]
+
+ ( rsp is obviously invariant across normal function calls. (gcc can 'merge'
+ functions when it sees tail-call optimization possibilities) rflags is
+ clobbered. Leftover arguments are passed over the stack frame.)
+
+ [*] In the frame-pointers case rbp is fixed to the stack frame.
+
+ [**] for struct return values wider than 64 bits the return convention is a
+ bit more complex: up to 128 bits width we return small structures
+ straight in rax, rdx. For structures larger than that (3 words or
+ larger) the caller puts a pointer to an on-stack return struct
+ [allocated in the caller's stack frame] into the first argument - i.e.
+ into rdi. All other arguments shift up by one in this case.
+ Fortunately this case is rare in the kernel.
+
+For 32-bit we have the following conventions - kernel is built with
+-mregparm=3 and -freg-struct-return:
+
+ x86 function calling convention, 32-bit:
+ ----------------------------------------
+ arguments | callee-saved | extra caller-saved | return
+ [callee-clobbered] | | [callee-clobbered] |
+ -------------------------------------------------------------------------
+ eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**]
+
+ ( here too esp is obviously invariant across normal function calls. eflags
+ is clobbered. Leftover arguments are passed over the stack frame. )
+
+ [*] In the frame-pointers case ebp is fixed to the stack frame.
+
+ [**] We build with -freg-struct-return, which on 32-bit means similar
+ semantics as on 64-bit: edx can be used for a second return value
+ (i.e. covering integer and structure sizes up to 64 bits) - after that
+ it gets more complex and more expensive: 3-word or larger struct returns
+ get done in the caller's frame and the pointer to the return struct goes
+ into regparm0, i.e. eax - the other arguments shift up and the
+ function's register parameters degenerate to regparm=2 in essence.
+
+*/
+
+
+/*
+ * 64-bit system call stack frame layout defines and helpers,
+ * for assembly code:
*/
#define R15 0
@@ -9,7 +59,7 @@
#define RBP 32
#define RBX 40
-/* arguments: interrupts/non tracing syscalls only save upto here*/
+/* arguments: interrupts/non tracing syscalls only save up to here: */
#define R11 48
#define R10 56
#define R9 64
@@ -22,7 +72,7 @@
#define ORIG_RAX 120 /* + error_code */
/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
+/* cpu exception frame or undefined in case of fast syscall: */
#define RIP 128
#define CS 136
#define EFLAGS 144
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index bae482d..b185091 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -7,6 +7,20 @@
#include <linux/nodemask.h>
#include <linux/percpu.h>
+#ifdef CONFIG_SMP
+
+extern void prefill_possible_map(void);
+
+#else /* CONFIG_SMP */
+
+static inline void prefill_possible_map(void) {}
+
+#define cpu_physical_id(cpu) boot_cpu_physical_apicid
+#define safe_smp_processor_id() 0
+#define stack_smp_processor_id() 0
+
+#endif /* CONFIG_SMP */
+
struct x86_cpu {
struct cpu cpu;
};
@@ -17,4 +31,7 @@
#endif
DECLARE_PER_CPU(int, cpu_state);
+
+extern unsigned int boot_cpu_id;
+
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
new file mode 100644
index 0000000..a7f3c75
--- /dev/null
+++ b/arch/x86/include/asm/cpumask.h
@@ -0,0 +1,32 @@
+#ifndef _ASM_X86_CPUMASK_H
+#define _ASM_X86_CPUMASK_H
+#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
+
+#ifdef CONFIG_X86_64
+
+extern cpumask_var_t cpu_callin_mask;
+extern cpumask_var_t cpu_callout_mask;
+extern cpumask_var_t cpu_initialized_mask;
+extern cpumask_var_t cpu_sibling_setup_mask;
+
+extern void setup_cpu_local_masks(void);
+
+#else /* CONFIG_X86_32 */
+
+extern cpumask_t cpu_callin_map;
+extern cpumask_t cpu_callout_map;
+extern cpumask_t cpu_initialized;
+extern cpumask_t cpu_sibling_setup_map;
+
+#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
+#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
+#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
+#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
+
+static inline void setup_cpu_local_masks(void) { }
+
+#endif /* CONFIG_X86_32 */
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0930b4f..c68c361 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -1,39 +1,21 @@
#ifndef _ASM_X86_CURRENT_H
#define _ASM_X86_CURRENT_H
-#ifdef CONFIG_X86_32
#include <linux/compiler.h>
#include <asm/percpu.h>
+#ifndef __ASSEMBLY__
struct task_struct;
DECLARE_PER_CPU(struct task_struct *, current_task);
-static __always_inline struct task_struct *get_current(void)
-{
- return x86_read_percpu(current_task);
-}
-
-#else /* X86_32 */
-
-#ifndef __ASSEMBLY__
-#include <asm/pda.h>
-
-struct task_struct;
static __always_inline struct task_struct *get_current(void)
{
- return read_pda(pcurrent);
+ return percpu_read(current_task);
}
-#else /* __ASSEMBLY__ */
-
-#include <asm/asm-offsets.h>
-#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* X86_32 */
-
#define current get_current()
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/mach-default/do_timer.h b/arch/x86/include/asm/do_timer.h
similarity index 100%
rename from arch/x86/include/asm/mach-default/do_timer.h
rename to arch/x86/include/asm/do_timer.h
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index f51a3dd..83c1bc8 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -112,7 +112,7 @@
* now struct_user_regs, they are different)
*/
-#define ELF_CORE_COPY_REGS(pr_reg, regs) \
+#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \
do { \
pr_reg[0] = regs->bx; \
pr_reg[1] = regs->cx; \
@@ -124,7 +124,6 @@
pr_reg[7] = regs->ds & 0xffff; \
pr_reg[8] = regs->es & 0xffff; \
pr_reg[9] = regs->fs & 0xffff; \
- savesegment(gs, pr_reg[10]); \
pr_reg[11] = regs->orig_ax; \
pr_reg[12] = regs->ip; \
pr_reg[13] = regs->cs & 0xffff; \
@@ -133,6 +132,18 @@
pr_reg[16] = regs->ss & 0xffff; \
} while (0);
+#define ELF_CORE_COPY_REGS(pr_reg, regs) \
+do { \
+ ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
+ pr_reg[10] = get_user_gs(regs); \
+} while (0);
+
+#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \
+do { \
+ ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
+ savesegment(gs, pr_reg[10]); \
+} while (0);
+
#define ELF_PLATFORM (utsname()->machine)
#define set_personality_64bit() do { } while (0)
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
new file mode 100644
index 0000000..854d538
--- /dev/null
+++ b/arch/x86/include/asm/entry_arch.h
@@ -0,0 +1,57 @@
+/*
+ * This file is designed to contain the BUILD_INTERRUPT specifications for
+ * all of the extra named interrupt vectors used by the architecture.
+ * Usually this is the Inter Process Interrupts (IPIs)
+ */
+
+/*
+ * The following vectors are part of the Linux architecture, there
+ * is no hardware IRQ pin equivalent for them, they are triggered
+ * through the ICC by us (IPIs)
+ */
+#ifdef CONFIG_SMP
+BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
+BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
+BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
+
+BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
+ smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
+ smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
+ smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
+ smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
+ smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
+ smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
+ smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
+ smp_invalidate_interrupt)
+#endif
+
+/*
+ * every pentium local APIC has two 'local interrupts', with a
+ * soft-definable vector attached to both interrupts, one of
+ * which is a timer interrupt, the other one is error counter
+ * overflow. Linux uses the local APIC timer interrupt to get
+ * a much simpler SMP time architecture:
+ */
+#ifdef CONFIG_X86_LOCAL_APIC
+
+BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
+BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
+BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+
+#ifdef CONFIG_PERF_COUNTERS
+BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
+#endif
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
deleted file mode 100644
index c58b9cc..0000000
--- a/arch/x86/include/asm/es7000/apic.h
+++ /dev/null
@@ -1,242 +0,0 @@
-#ifndef __ASM_ES7000_APIC_H
-#define __ASM_ES7000_APIC_H
-
-#include <linux/gfp.h>
-
-#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
-#define esr_disable (1)
-
-static inline int apic_id_registered(void)
-{
- return (1);
-}
-
-static inline const cpumask_t *target_cpus_cluster(void)
-{
- return &CPU_MASK_ALL;
-}
-
-static inline const cpumask_t *target_cpus(void)
-{
- return &cpumask_of_cpu(smp_processor_id());
-}
-
-#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
-#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
-#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
-#define NO_BALANCE_IRQ_CLUSTER (1)
-
-#define APIC_DFR_VALUE (APIC_DFR_FLAT)
-#define INT_DELIVERY_MODE (dest_Fixed)
-#define INT_DEST_MODE (0) /* phys delivery to target procs */
-#define NO_BALANCE_IRQ (0)
-#undef APIC_DEST_LOGICAL
-#define APIC_DEST_LOGICAL 0x0
-
-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
-{
- return 0;
-}
-static inline unsigned long check_apicid_present(int bit)
-{
- return physid_isset(bit, phys_cpu_present_map);
-}
-
-#define apicid_cluster(apicid) (apicid & 0xF0)
-
-static inline unsigned long calculate_ldr(int cpu)
-{
- unsigned long id;
- id = xapic_phys_to_log_apicid(cpu);
- return (SET_APIC_LOGICAL_ID(id));
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LdR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static inline void init_apic_ldr_cluster(void)
-{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
-}
-
-static inline void init_apic_ldr(void)
-{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_VALUE);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
-}
-
-extern int apic_version [MAX_APICS];
-static inline void setup_apic_routing(void)
-{
- int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
- printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
- (apic_version[apic] == 0x14) ?
- "Physical Cluster" : "Logical Cluster",
- nr_ioapics, cpus_addr(*target_cpus())[0]);
-}
-
-static inline int multi_timer_check(int apic, int irq)
-{
- return 0;
-}
-
-static inline int apicid_to_node(int logical_apicid)
-{
- return 0;
-}
-
-
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
- if (!mps_cpu)
- return boot_cpu_physical_apicid;
- else if (mps_cpu < nr_cpu_ids)
- return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
- else
- return BAD_APICID;
-}
-
-static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
-{
- static int id = 0;
- physid_mask_t mask;
- mask = physid_mask_of_physid(id);
- ++id;
- return mask;
-}
-
-extern u8 cpu_2_logical_apicid[];
-/* Mapping from cpu number to logical apicid */
-static inline int cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return (int)cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
-{
- /* For clustered we don't have a good way to do this yet - hack */
- return physids_promote(0xff);
-}
-
-
-static inline void setup_portio_remap(void)
-{
-}
-
-extern unsigned int boot_cpu_physical_apicid;
-static inline int check_phys_apicid_present(int cpu_physical_apicid)
-{
- boot_cpu_physical_apicid = read_apic_id();
- return (1);
-}
-
-static inline unsigned int
-cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
-{
- int num_bits_set;
- int cpus_found = 0;
- int cpu;
- int apicid;
-
- num_bits_set = cpumask_weight(cpumask);
- /* Return id to all */
- if (num_bits_set == nr_cpu_ids)
- return 0xFF;
- /*
- * The cpus in the mask must all be on the apic cluster. If are not
- * on the same apicid cluster return default value of TARGET_CPUS.
- */
- cpu = cpumask_first(cpumask);
- apicid = cpu_to_logical_apicid(cpu);
- while (cpus_found < num_bits_set) {
- if (cpumask_test_cpu(cpu, cpumask)) {
- int new_apicid = cpu_to_logical_apicid(cpu);
- if (apicid_cluster(apicid) !=
- apicid_cluster(new_apicid)){
- printk ("%s: Not a valid mask!\n", __func__);
- return 0xFF;
- }
- apicid = new_apicid;
- cpus_found++;
- }
- cpu++;
- }
- return apicid;
-}
-
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
-{
- int num_bits_set;
- int cpus_found = 0;
- int cpu;
- int apicid;
-
- num_bits_set = cpus_weight(*cpumask);
- /* Return id to all */
- if (num_bits_set == nr_cpu_ids)
- return cpu_to_logical_apicid(0);
- /*
- * The cpus in the mask must all be on the apic cluster. If are not
- * on the same apicid cluster return default value of TARGET_CPUS.
- */
- cpu = first_cpu(*cpumask);
- apicid = cpu_to_logical_apicid(cpu);
- while (cpus_found < num_bits_set) {
- if (cpu_isset(cpu, *cpumask)) {
- int new_apicid = cpu_to_logical_apicid(cpu);
- if (apicid_cluster(apicid) !=
- apicid_cluster(new_apicid)){
- printk ("%s: Not a valid mask!\n", __func__);
- return cpu_to_logical_apicid(0);
- }
- apicid = new_apicid;
- cpus_found++;
- }
- cpu++;
- }
- return apicid;
-}
-
-
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
- const struct cpumask *andmask)
-{
- int apicid = cpu_to_logical_apicid(0);
- cpumask_var_t cpumask;
-
- if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
- return apicid;
-
- cpumask_and(cpumask, inmask, andmask);
- cpumask_and(cpumask, cpumask, cpu_online_mask);
- apicid = cpu_mask_to_apicid(cpumask);
-
- free_cpumask_var(cpumask);
- return apicid;
-}
-
-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-#endif /* __ASM_ES7000_APIC_H */
diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h
deleted file mode 100644
index 8b234a3..0000000
--- a/arch/x86/include/asm/es7000/apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_ES7000_APICDEF_H
-#define __ASM_ES7000_APICDEF_H
-
-#define APIC_ID_MASK (0xFF<<24)
-
-static inline unsigned get_apic_id(unsigned long x)
-{
- return (((x)>>24)&0xFF);
-}
-
-#define GET_APIC_ID(x) get_apic_id(x)
-
-#endif
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
deleted file mode 100644
index 7e8ed24..0000000
--- a/arch/x86/include/asm/es7000/ipi.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef __ASM_ES7000_IPI_H
-#define __ASM_ES7000_IPI_H
-
-void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-
-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
-{
- send_IPI_mask_sequence(mask, vector);
-}
-
-static inline void send_IPI_allbutself(int vector)
-{
- send_IPI_mask_allbutself(cpu_online_mask, vector);
-}
-
-static inline void send_IPI_all(int vector)
-{
- send_IPI_mask(cpu_online_mask, vector);
-}
-
-#endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h
deleted file mode 100644
index c1629b0..0000000
--- a/arch/x86/include/asm/es7000/mpparse.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef __ASM_ES7000_MPPARSE_H
-#define __ASM_ES7000_MPPARSE_H
-
-#include <linux/acpi.h>
-
-extern int parse_unisys_oem (char *oemptr);
-extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
-extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
-extern void setup_unisys(void);
-
-#ifndef CONFIG_X86_GENERICARCH
-extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
-extern int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid);
-#endif
-
-#ifdef CONFIG_ACPI
-
-static inline int es7000_check_dsdt(void)
-{
- struct acpi_table_header header;
-
- if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
- !strncmp(header.oem_id, "UNISYS", 6))
- return 1;
- return 0;
-}
-#endif
-
-#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
deleted file mode 100644
index 78f0daa..0000000
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef __ASM_ES7000_WAKECPU_H
-#define __ASM_ES7000_WAKECPU_H
-
-#define TRAMPOLINE_PHYS_LOW 0x467
-#define TRAMPOLINE_PHYS_HIGH 0x469
-
-static inline void wait_for_init_deassert(atomic_t *deassert)
-{
-#ifndef CONFIG_ES7000_CLUSTERED_APIC
- while (!atomic_read(deassert))
- cpu_relax();
-#endif
- return;
-}
-
-/* Nothing to do for most platforms, since cleared by the INIT cycle */
-static inline void smp_callin_clear_local_apic(void)
-{
-}
-
-static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
-{
-}
-
-static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
-{
-}
-
-extern void __inquire_remote_apic(int apicid);
-
-static inline void inquire_remote_apic(int apicid)
-{
- if (apic_verbosity >= APIC_DEBUG)
- __inquire_remote_apic(apicid);
-}
-
-#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h
index c7115c1..047d9ba 100644
--- a/arch/x86/include/asm/fixmap_32.h
+++ b/arch/x86/include/asm/fixmap_32.h
@@ -95,10 +95,6 @@
(__end_of_permanent_fixed_addresses & 255),
FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
FIX_WP_TEST,
-#ifdef CONFIG_ACPI
- FIX_ACPI_BEGIN,
- FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
-#endif
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
FIX_OHCI1394_BASE,
#endif
diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h
index 00a30ab9..298d9ba 100644
--- a/arch/x86/include/asm/fixmap_64.h
+++ b/arch/x86/include/asm/fixmap_64.h
@@ -50,10 +50,6 @@
FIX_PARAVIRT_BOOTMAP,
#endif
__end_of_permanent_fixed_addresses,
-#ifdef CONFIG_ACPI
- FIX_ACPI_BEGIN,
- FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
-#endif
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
FIX_OHCI1394_BASE,
#endif
diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h
index d48bee6..4b8b98f 100644
--- a/arch/x86/include/asm/genapic.h
+++ b/arch/x86/include/asm/genapic.h
@@ -1,5 +1 @@
-#ifdef CONFIG_X86_32
-# include "genapic_32.h"
-#else
-# include "genapic_64.h"
-#endif
+#include <asm/apic.h>
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
deleted file mode 100644
index 2c05b73..0000000
--- a/arch/x86/include/asm/genapic_32.h
+++ /dev/null
@@ -1,148 +0,0 @@
-#ifndef _ASM_X86_GENAPIC_32_H
-#define _ASM_X86_GENAPIC_32_H
-
-#include <asm/mpspec.h>
-#include <asm/atomic.h>
-
-/*
- * Generic APIC driver interface.
- *
- * An straight forward mapping of the APIC related parts of the
- * x86 subarchitecture interface to a dynamic object.
- *
- * This is used by the "generic" x86 subarchitecture.
- *
- * Copyright 2003 Andi Kleen, SuSE Labs.
- */
-
-struct mpc_bus;
-struct mpc_table;
-struct mpc_cpu;
-
-struct genapic {
- char *name;
- int (*probe)(void);
-
- int (*apic_id_registered)(void);
- const struct cpumask *(*target_cpus)(void);
- int int_delivery_mode;
- int int_dest_mode;
- int ESR_DISABLE;
- int apic_destination_logical;
- unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid);
- unsigned long (*check_apicid_present)(int apicid);
- int no_balance_irq;
- int no_ioapic_check;
- void (*init_apic_ldr)(void);
- physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
-
- void (*setup_apic_routing)(void);
- int (*multi_timer_check)(int apic, int irq);
- int (*apicid_to_node)(int logical_apicid);
- int (*cpu_to_logical_apicid)(int cpu);
- int (*cpu_present_to_apicid)(int mps_cpu);
- physid_mask_t (*apicid_to_cpu_present)(int phys_apicid);
- void (*setup_portio_remap)(void);
- int (*check_phys_apicid_present)(int boot_cpu_physical_apicid);
- void (*enable_apic_mode)(void);
- u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb);
-
- /* mpparse */
- /* When one of the next two hooks returns 1 the genapic
- is switched to this. Essentially they are additional probe
- functions. */
- int (*mps_oem_check)(struct mpc_table *mpc, char *oem,
- char *productid);
- int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
-
- unsigned (*get_apic_id)(unsigned long x);
- unsigned long apic_id_mask;
- unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
- unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
- const struct cpumask *andmask);
- void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
-
-#ifdef CONFIG_SMP
- /* ipi */
- void (*send_IPI_mask)(const struct cpumask *mask, int vector);
- void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
- int vector);
- void (*send_IPI_allbutself)(int vector);
- void (*send_IPI_all)(int vector);
-#endif
- int (*wakeup_cpu)(int apicid, unsigned long start_eip);
- int trampoline_phys_low;
- int trampoline_phys_high;
- void (*wait_for_init_deassert)(atomic_t *deassert);
- void (*smp_callin_clear_local_apic)(void);
- void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
- void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
- void (*inquire_remote_apic)(int apicid);
-};
-
-#define APICFUNC(x) .x = x,
-
-/* More functions could be probably marked IPIFUNC and save some space
- in UP GENERICARCH kernels, but I don't have the nerve right now
- to untangle this mess. -AK */
-#ifdef CONFIG_SMP
-#define IPIFUNC(x) APICFUNC(x)
-#else
-#define IPIFUNC(x)
-#endif
-
-#define APIC_INIT(aname, aprobe) \
-{ \
- .name = aname, \
- .probe = aprobe, \
- .int_delivery_mode = INT_DELIVERY_MODE, \
- .int_dest_mode = INT_DEST_MODE, \
- .no_balance_irq = NO_BALANCE_IRQ, \
- .ESR_DISABLE = esr_disable, \
- .apic_destination_logical = APIC_DEST_LOGICAL, \
- APICFUNC(apic_id_registered) \
- APICFUNC(target_cpus) \
- APICFUNC(check_apicid_used) \
- APICFUNC(check_apicid_present) \
- APICFUNC(init_apic_ldr) \
- APICFUNC(ioapic_phys_id_map) \
- APICFUNC(setup_apic_routing) \
- APICFUNC(multi_timer_check) \
- APICFUNC(apicid_to_node) \
- APICFUNC(cpu_to_logical_apicid) \
- APICFUNC(cpu_present_to_apicid) \
- APICFUNC(apicid_to_cpu_present) \
- APICFUNC(setup_portio_remap) \
- APICFUNC(check_phys_apicid_present) \
- APICFUNC(mps_oem_check) \
- APICFUNC(get_apic_id) \
- .apic_id_mask = APIC_ID_MASK, \
- APICFUNC(cpu_mask_to_apicid) \
- APICFUNC(cpu_mask_to_apicid_and) \
- APICFUNC(vector_allocation_domain) \
- APICFUNC(acpi_madt_oem_check) \
- IPIFUNC(send_IPI_mask) \
- IPIFUNC(send_IPI_allbutself) \
- IPIFUNC(send_IPI_all) \
- APICFUNC(enable_apic_mode) \
- APICFUNC(phys_pkg_id) \
- .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \
- .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \
- APICFUNC(wait_for_init_deassert) \
- APICFUNC(smp_callin_clear_local_apic) \
- APICFUNC(store_NMI_vector) \
- APICFUNC(restore_NMI_vector) \
- APICFUNC(inquire_remote_apic) \
-}
-
-extern struct genapic *genapic;
-extern void es7000_update_genapic_to_cluster(void);
-
-enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
-#define get_uv_system_type() UV_NONE
-#define is_uv_system() 0
-#define uv_wakeup_secondary(a, b) 1
-#define uv_system_init() do {} while (0)
-
-
-#endif /* _ASM_X86_GENAPIC_32_H */
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
deleted file mode 100644
index adf32fb..0000000
--- a/arch/x86/include/asm/genapic_64.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef _ASM_X86_GENAPIC_64_H
-#define _ASM_X86_GENAPIC_64_H
-
-#include <linux/cpumask.h>
-
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Generic APIC sub-arch data struct.
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-
-struct genapic {
- char *name;
- int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
- u32 int_delivery_mode;
- u32 int_dest_mode;
- int (*apic_id_registered)(void);
- const struct cpumask *(*target_cpus)(void);
- void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
- void (*init_apic_ldr)(void);
- /* ipi */
- void (*send_IPI_mask)(const struct cpumask *mask, int vector);
- void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
- int vector);
- void (*send_IPI_allbutself)(int vector);
- void (*send_IPI_all)(int vector);
- void (*send_IPI_self)(int vector);
- /* */
- unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
- unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
- const struct cpumask *andmask);
- unsigned int (*phys_pkg_id)(int index_msb);
- unsigned int (*get_apic_id)(unsigned long x);
- unsigned long (*set_apic_id)(unsigned int id);
- unsigned long apic_id_mask;
- /* wakeup_secondary_cpu */
- int (*wakeup_cpu)(int apicid, unsigned long start_eip);
-};
-
-extern struct genapic *genapic;
-
-extern struct genapic apic_flat;
-extern struct genapic apic_physflat;
-extern struct genapic apic_x2apic_cluster;
-extern struct genapic apic_x2apic_phys;
-extern int acpi_madt_oem_check(char *, char *);
-
-extern void apic_send_IPI_self(int vector);
-enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
-extern enum uv_system_type get_uv_system_type(void);
-extern int is_uv_system(void);
-
-extern struct genapic apic_x2apic_uv_x;
-DECLARE_PER_CPU(int, x2apic_extra_bits);
-extern void uv_cpu_init(void);
-extern void uv_system_init(void);
-extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
-
-extern void setup_apic_routing(void);
-
-#endif /* _ASM_X86_GENAPIC_64_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 000787d..46ebed7 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -1,11 +1,53 @@
-#ifdef CONFIG_X86_32
-# include "hardirq_32.h"
-#else
-# include "hardirq_64.h"
+#ifndef _ASM_X86_HARDIRQ_H
+#define _ASM_X86_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+typedef struct {
+ unsigned int __softirq_pending;
+ unsigned int __nmi_count; /* arch dependent */
+ unsigned int irq0_irqs;
+#ifdef CONFIG_X86_LOCAL_APIC
+ unsigned int apic_timer_irqs; /* arch dependent */
+ unsigned int irq_spurious_count;
#endif
+ unsigned int apic_perf_irqs;
+#ifdef CONFIG_SMP
+ unsigned int irq_resched_count;
+ unsigned int irq_call_count;
+ unsigned int irq_tlb_count;
+#endif
+#ifdef CONFIG_X86_MCE
+ unsigned int irq_thermal_count;
+# ifdef CONFIG_X86_64
+ unsigned int irq_threshold_count;
+# endif
+#endif
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+
+/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
+#define MAX_HARDIRQS_PER_CPU NR_VECTORS
+
+#define __ARCH_IRQ_STAT
+
+#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
+
+#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
+
+#define __ARCH_SET_SOFTIRQ_PENDING
+
+#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
+#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
+
+extern void ack_bad_irq(unsigned int irq);
extern u64 arch_irq_stat_cpu(unsigned int cpu);
#define arch_irq_stat_cpu arch_irq_stat_cpu
extern u64 arch_irq_stat(void);
#define arch_irq_stat arch_irq_stat
+
+#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
deleted file mode 100644
index cf7954d..0000000
--- a/arch/x86/include/asm/hardirq_32.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _ASM_X86_HARDIRQ_32_H
-#define _ASM_X86_HARDIRQ_32_H
-
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-typedef struct {
- unsigned int __softirq_pending;
- unsigned long idle_timestamp;
- unsigned int __nmi_count; /* arch dependent */
- unsigned int apic_timer_irqs; /* arch dependent */
- unsigned int irq0_irqs;
- unsigned int irq_resched_count;
- unsigned int irq_call_count;
- unsigned int irq_tlb_count;
- unsigned int irq_thermal_count;
- unsigned int irq_spurious_count;
-} ____cacheline_aligned irq_cpustat_t;
-
-DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
-
-#define __ARCH_IRQ_STAT
-#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
-
-#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++)
-
-void ack_bad_irq(unsigned int irq);
-#include <linux/irq_cpustat.h>
-
-#endif /* _ASM_X86_HARDIRQ_32_H */
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
deleted file mode 100644
index b5a6b5d..0000000
--- a/arch/x86/include/asm/hardirq_64.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _ASM_X86_HARDIRQ_64_H
-#define _ASM_X86_HARDIRQ_64_H
-
-#include <linux/threads.h>
-#include <linux/irq.h>
-#include <asm/pda.h>
-#include <asm/apic.h>
-
-/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
-#define MAX_HARDIRQS_PER_CPU NR_VECTORS
-
-#define __ARCH_IRQ_STAT 1
-
-#define inc_irq_stat(member) add_pda(member, 1)
-
-#define local_softirq_pending() read_pda(__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING 1
-
-#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
-#define or_softirq_pending(x) or_pda(__softirq_pending, (x))
-
-extern void ack_bad_irq(unsigned int irq);
-
-#endif /* _ASM_X86_HARDIRQ_64_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 8de644b6..f39881b 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -25,11 +25,11 @@
#include <asm/irq.h>
#include <asm/sections.h>
-#define platform_legacy_irq(irq) ((irq) < 16)
-
/* Interrupt handlers registered during init_IRQ */
extern void apic_timer_interrupt(void);
extern void error_interrupt(void);
+extern void perf_counter_interrupt(void);
+
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
extern void reschedule_interrupt(void);
@@ -58,7 +58,7 @@
extern void init_8259A(int aeoi);
/* IOAPIC */
-#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
extern unsigned long io_apic_irqs;
extern void init_VISWS_APIC_irqs(void);
@@ -67,15 +67,7 @@
extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
extern void setup_ioapic_dest(void);
-#ifdef CONFIG_X86_64
extern void enable_IO_APIC(void);
-#endif
-
-/* IPI functions */
-#ifdef CONFIG_X86_32
-extern void send_IPI_self(int vector);
-#endif
-extern void send_IPI(int dest, int vector);
/* Statistics */
extern atomic_t irq_err_count;
@@ -84,21 +76,11 @@
/* EISA */
extern void eisa_set_level_irq(unsigned int irq);
-/* Voyager functions */
-extern asmlinkage void vic_cpi_interrupt(void);
-extern asmlinkage void vic_sys_interrupt(void);
-extern asmlinkage void vic_cmn_interrupt(void);
-extern asmlinkage void qic_timer_interrupt(void);
-extern asmlinkage void qic_invalidate_interrupt(void);
-extern asmlinkage void qic_reschedule_interrupt(void);
-extern asmlinkage void qic_enable_irq_interrupt(void);
-extern asmlinkage void qic_call_function_interrupt(void);
-
/* SMP */
extern void smp_apic_timer_interrupt(struct pt_regs *);
extern void smp_spurious_interrupt(struct pt_regs *);
extern void smp_error_interrupt(struct pt_regs *);
-#ifdef CONFIG_X86_SMP
+#ifdef CONFIG_SMP
extern void smp_reschedule_interrupt(struct pt_regs *);
extern void smp_call_function_interrupt(struct pt_regs *);
extern void smp_call_function_single_interrupt(struct pt_regs *);
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 58d7091..1a99e6c 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -60,4 +60,8 @@
extern void mask_8259A(void);
extern void unmask_8259A(void);
+#ifdef CONFIG_X86_32
+extern void init_ISA_irqs(void);
+#endif
+
#endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h
deleted file mode 100644
index fa0fd06..0000000
--- a/arch/x86/include/asm/intel_arch_perfmon.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
-#define _ASM_X86_INTEL_ARCH_PERFMON_H
-
-#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
-
-#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
-
-#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
-
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
- (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
-
-union cpuid10_eax {
- struct {
- unsigned int version_id:8;
- unsigned int num_counters:8;
- unsigned int bit_width:8;
- unsigned int mask_length:8;
- } split;
- unsigned int full;
-};
-
-#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 1dbbdf4..683d0b4 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -5,6 +5,7 @@
#include <linux/compiler.h>
#include <asm-generic/int-ll64.h>
+#include <asm/page.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
@@ -80,6 +81,100 @@
#define readq readq
#define writeq writeq
+/**
+ * virt_to_phys - map virtual addresses to physical
+ * @address: address to remap
+ *
+ * The returned physical address is the physical (CPU) mapping for
+ * the memory address given. It is only valid to use this function on
+ * addresses directly mapped or allocated via kmalloc.
+ *
+ * This function does not give bus mappings for DMA transfers. In
+ * almost all conceivable cases a device driver should not be using
+ * this function
+ */
+
+static inline phys_addr_t virt_to_phys(volatile void *address)
+{
+ return __pa(address);
+}
+
+/**
+ * phys_to_virt - map physical address to virtual
+ * @address: address to remap
+ *
+ * The returned virtual address is a current CPU mapping for
+ * the memory address given. It is only valid to use this function on
+ * addresses that have a kernel mapping
+ *
+ * This function does not handle bus mappings for DMA transfers. In
+ * almost all conceivable cases a device driver should not be using
+ * this function
+ */
+
+static inline void *phys_to_virt(phys_addr_t address)
+{
+ return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+
+/*
+ * ISA I/O bus memory addresses are 1:1 with the physical address.
+ * However, we truncate the address to unsigned int to avoid undesirable
+ * promitions in legacy drivers.
+ */
+static inline unsigned int isa_virt_to_bus(volatile void *address)
+{
+ return (unsigned int)virt_to_phys(address);
+}
+#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page))
+#define isa_bus_to_virt phys_to_virt
+
+/*
+ * However PCI ones are not necessarily 1:1 and therefore these interfaces
+ * are forbidden in portable PCI drivers.
+ *
+ * Allow them on x86 for legacy drivers, though.
+ */
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+
+/**
+ * ioremap - map bus memory into CPU space
+ * @offset: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * If the area you are trying to map is a PCI BAR you should have a
+ * look at pci_iomap().
+ */
+extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+ unsigned long prot_val);
+
+/*
+ * The default ioremap() behavior is non-cached:
+ */
+static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
+{
+ return ioremap_nocache(offset, size);
+}
+
+extern void iounmap(volatile void __iomem *addr);
+
+extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
+
+
#ifdef CONFIG_X86_32
# include "io_32.h"
#else
@@ -91,7 +186,7 @@
extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
unsigned long prot_val);
-extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
+extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
/*
* early_ioremap() and early_iounmap() are for temporary early boot-time
@@ -105,5 +200,6 @@
extern void early_iounmap(void __iomem *addr, unsigned long size);
extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
+#define IO_SPACE_LIMIT 0xffff
#endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h
index d8e242e..a299900 100644
--- a/arch/x86/include/asm/io_32.h
+++ b/arch/x86/include/asm/io_32.h
@@ -37,8 +37,6 @@
* - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*/
-#define IO_SPACE_LIMIT 0xffff
-
#define XQUAD_PORTIO_BASE 0xfe400000
#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
@@ -53,92 +51,6 @@
*/
#define xlate_dev_kmem_ptr(p) p
-/**
- * virt_to_phys - map virtual addresses to physical
- * @address: address to remap
- *
- * The returned physical address is the physical (CPU) mapping for
- * the memory address given. It is only valid to use this function on
- * addresses directly mapped or allocated via kmalloc.
- *
- * This function does not give bus mappings for DMA transfers. In
- * almost all conceivable cases a device driver should not be using
- * this function
- */
-
-static inline unsigned long virt_to_phys(volatile void *address)
-{
- return __pa(address);
-}
-
-/**
- * phys_to_virt - map physical address to virtual
- * @address: address to remap
- *
- * The returned virtual address is a current CPU mapping for
- * the memory address given. It is only valid to use this function on
- * addresses that have a kernel mapping
- *
- * This function does not handle bus mappings for DMA transfers. In
- * almost all conceivable cases a device driver should not be using
- * this function
- */
-
-static inline void *phys_to_virt(unsigned long address)
-{
- return __va(address);
-}
-
-/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
-/**
- * ioremap - map bus memory into CPU space
- * @offset: bus address of the memory
- * @size: size of the resource to map
- *
- * ioremap performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * If the area you are trying to map is a PCI BAR you should have a
- * look at pci_iomap().
- */
-extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
-extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
-extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
- unsigned long prot_val);
-
-/*
- * The default ioremap() behavior is non-cached:
- */
-static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
-{
- return ioremap_nocache(offset, size);
-}
-
-extern void iounmap(volatile void __iomem *addr);
-
-/*
- * ISA I/O bus memory addresses are 1:1 with the physical address.
- */
-#define isa_virt_to_bus virt_to_phys
-#define isa_page_to_bus page_to_phys
-#define isa_bus_to_virt phys_to_virt
-
-/*
- * However PCI ones are not necessarily 1:1 and therefore these interfaces
- * are forbidden in portable PCI drivers.
- *
- * Allow them on x86 for legacy drivers, though.
- */
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-
static inline void
memset_io(volatile void __iomem *addr, unsigned char val, int count)
{
diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h
index 563c162..2440678 100644
--- a/arch/x86/include/asm/io_64.h
+++ b/arch/x86/include/asm/io_64.h
@@ -136,73 +136,12 @@
__OUTS(w)
__OUTS(l)
-#define IO_SPACE_LIMIT 0xffff
-
#if defined(__KERNEL__) && defined(__x86_64__)
#include <linux/vmalloc.h>
-#ifndef __i386__
-/*
- * Change virtual addresses to physical addresses and vv.
- * These are pretty trivial
- */
-static inline unsigned long virt_to_phys(volatile void *address)
-{
- return __pa(address);
-}
-
-static inline void *phys_to_virt(unsigned long address)
-{
- return __va(address);
-}
-#endif
-
-/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
#include <asm-generic/iomap.h>
-/*
- * This one maps high address device memory and turns off caching for that area.
- * it's useful if some control registers are in such an area and write combining
- * or read caching is not desirable:
- */
-extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
-extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
-extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
- unsigned long prot_val);
-
-/*
- * The default ioremap() behavior is non-cached:
- */
-static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
-{
- return ioremap_nocache(offset, size);
-}
-
-extern void iounmap(volatile void __iomem *addr);
-
-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
-
-/*
- * ISA I/O bus memory addresses are 1:1 with the physical address.
- */
-#define isa_virt_to_bus virt_to_phys
-#define isa_page_to_bus page_to_phys
-#define isa_bus_to_virt phys_to_virt
-
-/*
- * However PCI ones are not necessarily 1:1 and therefore these interfaces
- * are forbidden in portable PCI drivers.
- *
- * Allow them on x86 for legacy drivers, though.
- */
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-
void __memcpy_fromio(void *, unsigned long, unsigned);
void __memcpy_toio(unsigned long, const void *, unsigned);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 7a1f44a..59cb4a1 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -114,38 +114,16 @@
extern int nr_ioapics;
extern int nr_ioapic_registers[MAX_IO_APICS];
-/*
- * MP-BIOS irq configuration table structures:
- */
-
#define MP_MAX_IOAPIC_PIN 127
-struct mp_config_ioapic {
- unsigned long mp_apicaddr;
- unsigned int mp_apicid;
- unsigned char mp_type;
- unsigned char mp_apicver;
- unsigned char mp_flags;
-};
-
-struct mp_config_intsrc {
- unsigned int mp_dstapic;
- unsigned char mp_type;
- unsigned char mp_irqtype;
- unsigned short mp_irqflag;
- unsigned char mp_srcbus;
- unsigned char mp_srcbusirq;
- unsigned char mp_dstirq;
-};
-
/* I/O APIC entries */
-extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
/* # of MP IRQ source entries */
extern int mp_irq_entries;
/* MP IRQ source entries */
-extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* non-0 if default (table-less) MP configuration */
extern int mpc_default_type;
@@ -165,15 +143,6 @@
/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
extern int timer_through_8259;
-static inline void disable_ioapic_setup(void)
-{
-#ifdef CONFIG_PCI
- noioapicquirk = 1;
- noioapicreroute = -1;
-#endif
- skip_ioapic_setup = 1;
-}
-
/*
* If we use the IO-APIC for IRQ routing, disable automatic
* assignment of PCI IRQ's.
@@ -200,6 +169,12 @@
extern void probe_nr_irqs_gsi(void);
+extern int setup_ioapic_entry(int apic, int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination, int trigger,
+ int polarity, int vector);
+extern void ioapic_write_entry(int apic, int pin,
+ struct IO_APIC_route_entry e);
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
static const int timer_through_8259 = 0;
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 86af260..bd46495 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -24,7 +24,10 @@
#include <asm/tlbflush.h>
int
-is_io_mapping_possible(resource_size_t base, unsigned long size);
+reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot);
+
+void
+free_io_memtype(u64 base, unsigned long size);
void *
iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index c745a30..0b72282 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_IPI_H
#define _ASM_X86_IPI_H
+#ifdef CONFIG_X86_LOCAL_APIC
+
/*
* Copyright 2004 James Cleverdon, IBM.
* Subject to the GNU Public License, v.2
@@ -55,8 +57,8 @@
cpu_relax();
}
-static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
- unsigned int dest)
+static inline void
+__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
{
/*
* Subtle. In the case of the 'never do double writes' workaround
@@ -87,8 +89,8 @@
* This is used to send an IPI with no shorthand notation (the destination is
* specified in bits 56 to 63 of the ICR).
*/
-static inline void __send_IPI_dest_field(unsigned int mask, int vector,
- unsigned int dest)
+static inline void
+ __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
{
unsigned long cfg;
@@ -117,41 +119,44 @@
native_apic_mem_write(APIC_ICR, cfg);
}
-static inline void send_IPI_mask_sequence(const struct cpumask *mask,
- int vector)
-{
- unsigned long flags;
- unsigned long query_cpu;
+extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
+ int vector);
+extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
+ int vector);
+extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+ int vector);
+extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+ int vector);
- /*
- * Hack. The clustered APIC addressing mode doesn't allow us to send
- * to an arbitrary mask, so I do a unicast to each CPU instead.
- * - mbligh
- */
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask) {
- __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
- vector, APIC_DEST_PHYSICAL);
- }
- local_irq_restore(flags);
+/* Avoid include hell */
+#define NMI_VECTOR 0x02
+
+extern int no_broadcast;
+
+static inline void __default_local_send_IPI_allbutself(int vector)
+{
+ if (no_broadcast || vector == NMI_VECTOR)
+ apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
+ else
+ __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical);
}
-static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
- int vector)
+static inline void __default_local_send_IPI_all(int vector)
{
- unsigned long flags;
- unsigned int query_cpu;
- unsigned int this_cpu = smp_processor_id();
-
- /* See Hack comment above */
-
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask)
- if (query_cpu != this_cpu)
- __send_IPI_dest_field(
- per_cpu(x86_cpu_to_apicid, query_cpu),
- vector, APIC_DEST_PHYSICAL);
- local_irq_restore(flags);
+ if (no_broadcast || vector == NMI_VECTOR)
+ apic->send_IPI_mask(cpu_online_mask, vector);
+ else
+ __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical);
}
+#ifdef CONFIG_X86_32
+extern void default_send_IPI_mask_logical(const struct cpumask *mask,
+ int vector);
+extern void default_send_IPI_allbutself(int vector);
+extern void default_send_IPI_all(int vector);
+extern void default_send_IPI_self(int vector);
+#endif
+
+#endif
+
#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 592688e..107eb21 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -36,9 +36,11 @@
extern void fixup_irqs(void);
#endif
-extern unsigned int do_IRQ(struct pt_regs *regs);
extern void init_IRQ(void);
extern void native_init_IRQ(void);
+extern bool handle_irq(unsigned irq, struct pt_regs *regs);
+
+extern unsigned int do_IRQ(struct pt_regs *regs);
/* Interrupt vector management */
extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 89c898a..7784322 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -1,5 +1,31 @@
-#ifdef CONFIG_X86_32
-# include "irq_regs_32.h"
-#else
-# include "irq_regs_64.h"
-#endif
+/*
+ * Per-cpu current frame pointer - the location of the last exception frame on
+ * the stack, stored in the per-cpu area.
+ *
+ * Jeremy Fitzhardinge <jeremy@goop.org>
+ */
+#ifndef _ASM_X86_IRQ_REGS_H
+#define _ASM_X86_IRQ_REGS_H
+
+#include <asm/percpu.h>
+
+#define ARCH_HAS_OWN_IRQ_REGS
+
+DECLARE_PER_CPU(struct pt_regs *, irq_regs);
+
+static inline struct pt_regs *get_irq_regs(void)
+{
+ return percpu_read(irq_regs);
+}
+
+static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
+{
+ struct pt_regs *old_regs;
+
+ old_regs = get_irq_regs();
+ percpu_write(irq_regs, new_regs);
+
+ return old_regs;
+}
+
+#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
deleted file mode 100644
index 86afd74..0000000
--- a/arch/x86/include/asm/irq_regs_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Per-cpu current frame pointer - the location of the last exception frame on
- * the stack, stored in the per-cpu area.
- *
- * Jeremy Fitzhardinge <jeremy@goop.org>
- */
-#ifndef _ASM_X86_IRQ_REGS_32_H
-#define _ASM_X86_IRQ_REGS_32_H
-
-#include <asm/percpu.h>
-
-#define ARCH_HAS_OWN_IRQ_REGS
-
-DECLARE_PER_CPU(struct pt_regs *, irq_regs);
-
-static inline struct pt_regs *get_irq_regs(void)
-{
- return x86_read_percpu(irq_regs);
-}
-
-static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
-{
- struct pt_regs *old_regs;
-
- old_regs = get_irq_regs();
- x86_write_percpu(irq_regs, new_regs);
-
- return old_regs;
-}
-
-#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h
deleted file mode 100644
index 3dd9c0b..0000000
--- a/arch/x86/include/asm/irq_regs_64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index f7ff650..8a285f3 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -1,47 +1,69 @@
#ifndef _ASM_X86_IRQ_VECTORS_H
#define _ASM_X86_IRQ_VECTORS_H
-#include <linux/threads.h>
+/*
+ * Linux IRQ vector layout.
+ *
+ * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can
+ * be defined by Linux. They are used as a jump table by the CPU when a
+ * given vector is triggered - by a CPU-external, CPU-internal or
+ * software-triggered event.
+ *
+ * Linux sets the kernel code address each entry jumps to early during
+ * bootup, and never changes them. This is the general layout of the
+ * IDT entries:
+ *
+ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events
+ * Vectors 32 ... 127 : device interrupts
+ * Vector 128 : legacy int80 syscall interface
+ * Vectors 129 ... 237 : device interrupts
+ * Vectors 238 ... 255 : special interrupts
+ *
+ * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
+ *
+ * This file enumerates the exact layout of them:
+ */
-#define NMI_VECTOR 0x02
+#define NMI_VECTOR 0x02
/*
* IDT vectors usable for external interrupt sources start
* at 0x20:
*/
-#define FIRST_EXTERNAL_VECTOR 0x20
+#define FIRST_EXTERNAL_VECTOR 0x20
#ifdef CONFIG_X86_32
-# define SYSCALL_VECTOR 0x80
+# define SYSCALL_VECTOR 0x80
#else
-# define IA32_SYSCALL_VECTOR 0x80
+# define IA32_SYSCALL_VECTOR 0x80
#endif
/*
* Reserve the lowest usable priority level 0x20 - 0x2f for triggering
* cleanup after irq migration.
*/
-#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
+#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
/*
* Vectors 0x30-0x3f are used for ISA interrupts.
*/
-#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10)
-#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
-#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
-#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
-#define IRQ4_VECTOR (IRQ0_VECTOR + 4)
-#define IRQ5_VECTOR (IRQ0_VECTOR + 5)
-#define IRQ6_VECTOR (IRQ0_VECTOR + 6)
-#define IRQ7_VECTOR (IRQ0_VECTOR + 7)
-#define IRQ8_VECTOR (IRQ0_VECTOR + 8)
-#define IRQ9_VECTOR (IRQ0_VECTOR + 9)
-#define IRQ10_VECTOR (IRQ0_VECTOR + 10)
-#define IRQ11_VECTOR (IRQ0_VECTOR + 11)
-#define IRQ12_VECTOR (IRQ0_VECTOR + 12)
-#define IRQ13_VECTOR (IRQ0_VECTOR + 13)
-#define IRQ14_VECTOR (IRQ0_VECTOR + 14)
-#define IRQ15_VECTOR (IRQ0_VECTOR + 15)
+#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10)
+
+#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
+#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
+#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
+#define IRQ4_VECTOR (IRQ0_VECTOR + 4)
+#define IRQ5_VECTOR (IRQ0_VECTOR + 5)
+#define IRQ6_VECTOR (IRQ0_VECTOR + 6)
+#define IRQ7_VECTOR (IRQ0_VECTOR + 7)
+#define IRQ8_VECTOR (IRQ0_VECTOR + 8)
+#define IRQ9_VECTOR (IRQ0_VECTOR + 9)
+#define IRQ10_VECTOR (IRQ0_VECTOR + 10)
+#define IRQ11_VECTOR (IRQ0_VECTOR + 11)
+#define IRQ12_VECTOR (IRQ0_VECTOR + 12)
+#define IRQ13_VECTOR (IRQ0_VECTOR + 13)
+#define IRQ14_VECTOR (IRQ0_VECTOR + 14)
+#define IRQ15_VECTOR (IRQ0_VECTOR + 15)
/*
* Special IRQ vectors used by the SMP architecture, 0xf0-0xff
@@ -49,119 +71,98 @@
* some of the following vectors are 'rare', they are merged
* into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
* TLB, reschedule and local APIC vectors are performance-critical.
- *
- * Vectors 0xf0-0xfa are free (reserved for future Linux use).
*/
-#ifdef CONFIG_X86_32
-
-# define SPURIOUS_APIC_VECTOR 0xff
-# define ERROR_APIC_VECTOR 0xfe
-# define INVALIDATE_TLB_VECTOR 0xfd
-# define RESCHEDULE_VECTOR 0xfc
-# define CALL_FUNCTION_VECTOR 0xfb
-# define CALL_FUNCTION_SINGLE_VECTOR 0xfa
-# define THERMAL_APIC_VECTOR 0xf0
-
-#else
#define SPURIOUS_APIC_VECTOR 0xff
+/*
+ * Sanity check
+ */
+#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
+# error SPURIOUS_APIC_VECTOR definition error
+#endif
+
#define ERROR_APIC_VECTOR 0xfe
#define RESCHEDULE_VECTOR 0xfd
#define CALL_FUNCTION_VECTOR 0xfc
#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
#define THERMAL_APIC_VECTOR 0xfa
-#define THRESHOLD_APIC_VECTOR 0xf9
-#define UV_BAU_MESSAGE 0xf8
-#define INVALIDATE_TLB_VECTOR_END 0xf7
-#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
-#define NUM_INVALIDATE_TLB_VECTORS 8
-
+#ifdef CONFIG_X86_32
+/* 0xf8 - 0xf9 : free */
+#else
+# define THRESHOLD_APIC_VECTOR 0xf9
+# define UV_BAU_MESSAGE 0xf8
#endif
+/* f0-f7 used for spreading out TLB flushes: */
+#define INVALIDATE_TLB_VECTOR_END 0xf7
+#define INVALIDATE_TLB_VECTOR_START 0xf0
+#define NUM_INVALIDATE_TLB_VECTORS 8
+
/*
* Local APIC timer IRQ vector is on a different priority level,
* to work around the 'lost local interrupt if more than 2 IRQ
* sources per level' errata.
*/
-#define LOCAL_TIMER_VECTOR 0xef
+#define LOCAL_TIMER_VECTOR 0xef
+
+/*
+ * Performance monitoring interrupt vector:
+ */
+#define LOCAL_PERF_VECTOR 0xee
/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
* start at 0x31(0x41) to spread out vectors evenly between priority
* levels. (0x80 is the syscall vector)
*/
-#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
+#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
-#define NR_VECTORS 256
+#define NR_VECTORS 256
-#define FPU_IRQ 13
+#define FPU_IRQ 13
-#define FIRST_VM86_IRQ 3
-#define LAST_VM86_IRQ 15
-#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
+#define FIRST_VM86_IRQ 3
+#define LAST_VM86_IRQ 15
-#define NR_IRQS_LEGACY 16
-
-#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
-
-#ifndef CONFIG_SPARSE_IRQ
-# if NR_CPUS < MAX_IO_APICS
-# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
-# else
-# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
-# endif
-#else
-# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
-# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
-# else
-# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
-# endif
+#ifndef __ASSEMBLY__
+static inline int invalid_vm86_irq(int irq)
+{
+ return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
+}
#endif
-#elif defined(CONFIG_X86_VOYAGER)
+/*
+ * Size the maximum number of interrupts.
+ *
+ * If the irq_desc[] array has a sparse layout, we can size things
+ * generously - it scales up linearly with the maximum number of CPUs,
+ * and the maximum number of IO-APICs, whichever is higher.
+ *
+ * In other cases we size more conservatively, to not create too large
+ * static arrays.
+ */
-# define NR_IRQS 224
+#define NR_IRQS_LEGACY 16
-#else /* IO_APIC || VOYAGER */
+#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS )
+#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
-# define NR_IRQS 16
-
+#ifdef CONFIG_X86_IO_APIC
+# ifdef CONFIG_SPARSE_IRQ
+# define NR_IRQS \
+ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
+ (NR_VECTORS + CPU_VECTOR_LIMIT) : \
+ (NR_VECTORS + IO_APIC_VECTOR_LIMIT))
+# else
+# if NR_CPUS < MAX_IO_APICS
+# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT)
+# else
+# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT)
+# endif
+# endif
+#else /* !CONFIG_X86_IO_APIC: */
+# define NR_IRQS NR_IRQS_LEGACY
#endif
-/* Voyager specific defines */
-/* These define the CPIs we use in linux */
-#define VIC_CPI_LEVEL0 0
-#define VIC_CPI_LEVEL1 1
-/* now the fake CPIs */
-#define VIC_TIMER_CPI 2
-#define VIC_INVALIDATE_CPI 3
-#define VIC_RESCHEDULE_CPI 4
-#define VIC_ENABLE_IRQ_CPI 5
-#define VIC_CALL_FUNCTION_CPI 6
-#define VIC_CALL_FUNCTION_SINGLE_CPI 7
-
-/* Now the QIC CPIs: Since we don't need the two initial levels,
- * these are 2 less than the VIC CPIs */
-#define QIC_CPI_OFFSET 1
-#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET)
-#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET)
-#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
-#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
-#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
-#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET)
-
-#define VIC_START_FAKE_CPI VIC_TIMER_CPI
-#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI
-
-/* this is the SYS_INT CPI. */
-#define VIC_SYS_INT 8
-#define VIC_CMN_INT 15
-
-/* This is the boot CPI for alternate processors. It gets overwritten
- * by the above once the system has activated all available processors */
-#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0
-#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8)
-
-
#endif /* _ASM_X86_IRQ_VECTORS_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index c61d8b2..0ceb6d1 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -9,23 +9,8 @@
# define PAGES_NR 4
#else
# define PA_CONTROL_PAGE 0
-# define VA_CONTROL_PAGE 1
-# define PA_PGD 2
-# define VA_PGD 3
-# define PA_PUD_0 4
-# define VA_PUD_0 5
-# define PA_PMD_0 6
-# define VA_PMD_0 7
-# define PA_PTE_0 8
-# define VA_PTE_0 9
-# define PA_PUD_1 10
-# define VA_PUD_1 11
-# define PA_PMD_1 12
-# define VA_PMD_1 13
-# define PA_PTE_1 14
-# define VA_PTE_1 15
-# define PA_TABLE_PAGE 16
-# define PAGES_NR 17
+# define PA_TABLE_PAGE 1
+# define PAGES_NR 2
#endif
#ifdef CONFIG_X86_32
@@ -157,9 +142,9 @@
unsigned long start_address) ATTRIB_NORET;
#endif
-#ifdef CONFIG_X86_32
#define ARCH_HAS_KIMAGE_ARCH
+#ifdef CONFIG_X86_32
struct kimage_arch {
pgd_t *pgd;
#ifdef CONFIG_X86_PAE
@@ -169,6 +154,12 @@
pte_t *pte0;
pte_t *pte1;
};
+#else
+struct kimage_arch {
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+};
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 5d98d0b6..9320e2a 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -52,70 +52,14 @@
#endif
+#define GLOBAL(name) \
+ .globl name; \
+ name:
+
#ifdef CONFIG_X86_ALIGNMENT_16
#define __ALIGN .align 16,0x90
#define __ALIGN_STR ".align 16,0x90"
#endif
-/*
- * to check ENTRY_X86/END_X86 and
- * KPROBE_ENTRY_X86/KPROBE_END_X86
- * unbalanced-missed-mixed appearance
- */
-#define __set_entry_x86 .set ENTRY_X86_IN, 0
-#define __unset_entry_x86 .set ENTRY_X86_IN, 1
-#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
-#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
-
-#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
-
-#define __check_entry_x86 \
- .ifdef ENTRY_X86_IN; \
- .ifeq ENTRY_X86_IN; \
- __macro_err_x86; \
- .abort; \
- .endif; \
- .endif
-
-#define __check_kprobe_x86 \
- .ifdef KPROBE_X86_IN; \
- .ifeq KPROBE_X86_IN; \
- __macro_err_x86; \
- .abort; \
- .endif; \
- .endif
-
-#define __check_entry_kprobe_x86 \
- __check_entry_x86; \
- __check_kprobe_x86
-
-#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
-
-#define ENTRY_X86(name) \
- __check_entry_kprobe_x86; \
- __set_entry_x86; \
- .globl name; \
- __ALIGN; \
- name:
-
-#define END_X86(name) \
- __unset_entry_x86; \
- __check_entry_kprobe_x86; \
- .size name, .-name
-
-#define KPROBE_ENTRY_X86(name) \
- __check_entry_kprobe_x86; \
- __set_kprobe_x86; \
- .pushsection .kprobes.text, "ax"; \
- .globl name; \
- __ALIGN; \
- name:
-
-#define KPROBE_END_X86(name) \
- __unset_kprobe_x86; \
- __check_entry_kprobe_x86; \
- .size name, .-name; \
- .popsection
-
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h
deleted file mode 100644
index 6b1add8..0000000
--- a/arch/x86/include/asm/mach-default/entry_arch.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * This file is designed to contain the BUILD_INTERRUPT specifications for
- * all of the extra named interrupt vectors used by the architecture.
- * Usually this is the Inter Process Interrupts (IPIs)
- */
-
-/*
- * The following vectors are part of the Linux architecture, there
- * is no hardware IRQ pin equivalent for them, they are triggered
- * through the ICC by us (IPIs)
- */
-#ifdef CONFIG_X86_SMP
-BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
-BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
-BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
-BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
-#endif
-
-/*
- * every pentium local APIC has two 'local interrupts', with a
- * soft-definable vector attached to both interrupts, one of
- * which is a timer interrupt, the other one is error counter
- * overflow. Linux uses the local APIC timer interrupt to get
- * a much simpler SMP time architecture:
- */
-#ifdef CONFIG_X86_LOCAL_APIC
-BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
-BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
-BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
-#endif
-
-#endif
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
deleted file mode 100644
index cc09cbb..0000000
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#ifndef _ASM_X86_MACH_DEFAULT_MACH_APIC_H
-#define _ASM_X86_MACH_DEFAULT_MACH_APIC_H
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-#include <mach_apicdef.h>
-#include <asm/smp.h>
-
-#define APIC_DFR_VALUE (APIC_DFR_FLAT)
-
-static inline const struct cpumask *target_cpus(void)
-{
-#ifdef CONFIG_SMP
- return cpu_online_mask;
-#else
- return cpumask_of(0);
-#endif
-}
-
-#define NO_BALANCE_IRQ (0)
-#define esr_disable (0)
-
-#ifdef CONFIG_X86_64
-#include <asm/genapic.h>
-#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
-#define INT_DEST_MODE (genapic->int_dest_mode)
-#define TARGET_CPUS (genapic->target_cpus())
-#define apic_id_registered (genapic->apic_id_registered)
-#define init_apic_ldr (genapic->init_apic_ldr)
-#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
-#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
-#define phys_pkg_id (genapic->phys_pkg_id)
-#define vector_allocation_domain (genapic->vector_allocation_domain)
-#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
-#define send_IPI_self (genapic->send_IPI_self)
-#define wakeup_secondary_cpu (genapic->wakeup_cpu)
-extern void setup_apic_routing(void);
-#else
-#define INT_DELIVERY_MODE dest_LowestPrio
-#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
-#define TARGET_CPUS (target_cpus())
-#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static inline void init_apic_ldr(void)
-{
- unsigned long val;
-
- apic_write(APIC_DFR, APIC_DFR_VALUE);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
- apic_write(APIC_LDR, val);
-}
-
-static inline int apic_id_registered(void)
-{
- return physid_isset(read_apic_id(), phys_cpu_present_map);
-}
-
-static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- return cpumask_bits(cpumask)[0];
-}
-
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- unsigned long mask1 = cpumask_bits(cpumask)[0];
- unsigned long mask2 = cpumask_bits(andmask)[0];
- unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
-
- return (unsigned int)(mask1 & mask2 & mask3);
-}
-
-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-static inline void setup_apic_routing(void)
-{
-#ifdef CONFIG_X86_IO_APIC
- printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
- "Flat", nr_ioapics);
-#endif
-}
-
-static inline int apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
-#else
- return 0;
-#endif
-}
-
-static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
-}
-#endif
-
-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
-{
- return physid_isset(apicid, bitmap);
-}
-
-static inline unsigned long check_apicid_present(int bit)
-{
- return physid_isset(bit, phys_cpu_present_map);
-}
-
-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
-{
- return phys_map;
-}
-
-static inline int multi_timer_check(int apic, int irq)
-{
- return 0;
-}
-
-/* Mapping from cpu number to logical apicid */
-static inline int cpu_to_logical_apicid(int cpu)
-{
- return 1 << cpu;
-}
-
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
- return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
- else
- return BAD_APICID;
-}
-
-static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
-{
- return physid_mask_of_physid(phys_apicid);
-}
-
-static inline void setup_portio_remap(void)
-{
-}
-
-static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
-{
- return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map);
-}
-
-static inline void enable_apic_mode(void)
-{
-}
-#endif /* CONFIG_X86_LOCAL_APIC */
-#endif /* _ASM_X86_MACH_DEFAULT_MACH_APIC_H */
diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h
deleted file mode 100644
index 5317993..0000000
--- a/arch/x86/include/asm/mach-default/mach_apicdef.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H
-#define _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H
-
-#include <asm/apic.h>
-
-#ifdef CONFIG_X86_64
-#define APIC_ID_MASK (genapic->apic_id_mask)
-#define GET_APIC_ID(x) (genapic->get_apic_id(x))
-#define SET_APIC_ID(x) (genapic->set_apic_id(x))
-#else
-#define APIC_ID_MASK (0xF<<24)
-static inline unsigned get_apic_id(unsigned long x)
-{
- unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (APIC_XAPIC(ver))
- return (((x)>>24)&0xFF);
- else
- return (((x)>>24)&0xF);
-}
-
-#define GET_APIC_ID(x) get_apic_id(x)
-#endif
-
-#endif /* _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H */
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
deleted file mode 100644
index 191312d..0000000
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ /dev/null
@@ -1,64 +0,0 @@
-#ifndef _ASM_X86_MACH_DEFAULT_MACH_IPI_H
-#define _ASM_X86_MACH_DEFAULT_MACH_IPI_H
-
-/* Avoid include hell */
-#define NMI_VECTOR 0x02
-
-void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-void __send_IPI_shortcut(unsigned int shortcut, int vector);
-
-extern int no_broadcast;
-
-#ifdef CONFIG_X86_64
-#include <asm/genapic.h>
-#define send_IPI_mask (genapic->send_IPI_mask)
-#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
-#else
-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
-{
- send_IPI_mask_bitmask(mask, vector);
-}
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-#endif
-
-static inline void __local_send_IPI_allbutself(int vector)
-{
- if (no_broadcast || vector == NMI_VECTOR)
- send_IPI_mask_allbutself(cpu_online_mask, vector);
- else
- __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
-}
-
-static inline void __local_send_IPI_all(int vector)
-{
- if (no_broadcast || vector == NMI_VECTOR)
- send_IPI_mask(cpu_online_mask, vector);
- else
- __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
-}
-
-#ifdef CONFIG_X86_64
-#define send_IPI_allbutself (genapic->send_IPI_allbutself)
-#define send_IPI_all (genapic->send_IPI_all)
-#else
-static inline void send_IPI_allbutself(int vector)
-{
- /*
- * if there are no other CPUs in the system then we get an APIC send
- * error if we try to broadcast, thus avoid sending IPIs in this case.
- */
- if (!(num_online_cpus() > 1))
- return;
-
- __local_send_IPI_allbutself(vector);
- return;
-}
-
-static inline void send_IPI_all(int vector)
-{
- __local_send_IPI_all(vector);
-}
-#endif
-
-#endif /* _ASM_X86_MACH_DEFAULT_MACH_IPI_H */
diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h
deleted file mode 100644
index c70a263..0000000
--- a/arch/x86/include/asm/mach-default/mach_mpparse.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H
-#define _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H
-
-static inline int
-mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
-{
- return 0;
-}
-
-/* Hook from generic ACPI tables.c */
-static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- return 0;
-}
-
-
-#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H */
diff --git a/arch/x86/include/asm/mach-default/mach_mpspec.h b/arch/x86/include/asm/mach-default/mach_mpspec.h
deleted file mode 100644
index e85ede6..0000000
--- a/arch/x86/include/asm/mach-default/mach_mpspec.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H
-#define _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H
-
-#define MAX_IRQ_SOURCES 256
-
-#if CONFIG_BASE_SMALL == 0
-#define MAX_MP_BUSSES 256
-#else
-#define MAX_MP_BUSSES 32
-#endif
-
-#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H */
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
deleted file mode 100644
index 89897a6..0000000
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
-#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
-
-#define TRAMPOLINE_PHYS_LOW (0x467)
-#define TRAMPOLINE_PHYS_HIGH (0x469)
-
-static inline void wait_for_init_deassert(atomic_t *deassert)
-{
- while (!atomic_read(deassert))
- cpu_relax();
- return;
-}
-
-/* Nothing to do for most platforms, since cleared by the INIT cycle */
-static inline void smp_callin_clear_local_apic(void)
-{
-}
-
-static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
-{
-}
-
-static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
-{
-}
-
-#ifdef CONFIG_SMP
-extern void __inquire_remote_apic(int apicid);
-#else /* CONFIG_SMP */
-static inline void __inquire_remote_apic(int apicid)
-{
-}
-#endif /* CONFIG_SMP */
-
-static inline void inquire_remote_apic(int apicid)
-{
- if (apic_verbosity >= APIC_DEBUG)
- __inquire_remote_apic(apicid);
-}
-
-#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mach-generic/gpio.h b/arch/x86/include/asm/mach-generic/gpio.h
deleted file mode 100644
index 995c45e..0000000
--- a/arch/x86/include/asm/mach-generic/gpio.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _ASM_X86_MACH_GENERIC_GPIO_H
-#define _ASM_X86_MACH_GENERIC_GPIO_H
-
-int gpio_request(unsigned gpio, const char *label);
-void gpio_free(unsigned gpio);
-int gpio_direction_input(unsigned gpio);
-int gpio_direction_output(unsigned gpio, int value);
-int gpio_get_value(unsigned gpio);
-void gpio_set_value(unsigned gpio, int value);
-int gpio_to_irq(unsigned gpio);
-int irq_to_gpio(unsigned irq);
-
-#include <asm-generic/gpio.h> /* cansleep wrappers */
-
-#endif /* _ASM_X86_MACH_GENERIC_GPIO_H */
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
deleted file mode 100644
index 48553e9..0000000
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef _ASM_X86_MACH_GENERIC_MACH_APIC_H
-#define _ASM_X86_MACH_GENERIC_MACH_APIC_H
-
-#include <asm/genapic.h>
-
-#define esr_disable (genapic->ESR_DISABLE)
-#define NO_BALANCE_IRQ (genapic->no_balance_irq)
-#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
-#define INT_DEST_MODE (genapic->int_dest_mode)
-#undef APIC_DEST_LOGICAL
-#define APIC_DEST_LOGICAL (genapic->apic_destination_logical)
-#define TARGET_CPUS (genapic->target_cpus())
-#define apic_id_registered (genapic->apic_id_registered)
-#define init_apic_ldr (genapic->init_apic_ldr)
-#define ioapic_phys_id_map (genapic->ioapic_phys_id_map)
-#define setup_apic_routing (genapic->setup_apic_routing)
-#define multi_timer_check (genapic->multi_timer_check)
-#define apicid_to_node (genapic->apicid_to_node)
-#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid)
-#define cpu_present_to_apicid (genapic->cpu_present_to_apicid)
-#define apicid_to_cpu_present (genapic->apicid_to_cpu_present)
-#define setup_portio_remap (genapic->setup_portio_remap)
-#define check_apicid_present (genapic->check_apicid_present)
-#define check_phys_apicid_present (genapic->check_phys_apicid_present)
-#define check_apicid_used (genapic->check_apicid_used)
-#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
-#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
-#define vector_allocation_domain (genapic->vector_allocation_domain)
-#define enable_apic_mode (genapic->enable_apic_mode)
-#define phys_pkg_id (genapic->phys_pkg_id)
-#define wakeup_secondary_cpu (genapic->wakeup_cpu)
-
-extern void generic_bigsmp_probe(void);
-
-#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h
deleted file mode 100644
index 68041f3..0000000
--- a/arch/x86/include/asm/mach-generic/mach_apicdef.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_X86_MACH_GENERIC_MACH_APICDEF_H
-#define _ASM_X86_MACH_GENERIC_MACH_APICDEF_H
-
-#ifndef APIC_DEFINITION
-#include <asm/genapic.h>
-
-#define GET_APIC_ID (genapic->get_apic_id)
-#define APIC_ID_MASK (genapic->apic_id_mask)
-#endif
-
-#endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */
diff --git a/arch/x86/include/asm/mach-generic/mach_ipi.h b/arch/x86/include/asm/mach-generic/mach_ipi.h
deleted file mode 100644
index ffd637e..0000000
--- a/arch/x86/include/asm/mach-generic/mach_ipi.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_X86_MACH_GENERIC_MACH_IPI_H
-#define _ASM_X86_MACH_GENERIC_MACH_IPI_H
-
-#include <asm/genapic.h>
-
-#define send_IPI_mask (genapic->send_IPI_mask)
-#define send_IPI_allbutself (genapic->send_IPI_allbutself)
-#define send_IPI_all (genapic->send_IPI_all)
-
-#endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */
diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h
deleted file mode 100644
index 9444ab8..0000000
--- a/arch/x86/include/asm/mach-generic/mach_mpparse.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H
-#define _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H
-
-
-extern int mps_oem_check(struct mpc_table *, char *, char *);
-
-extern int acpi_madt_oem_check(char *, char *);
-
-#endif /* _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H */
diff --git a/arch/x86/include/asm/mach-generic/mach_mpspec.h b/arch/x86/include/asm/mach-generic/mach_mpspec.h
deleted file mode 100644
index 3bc4072..0000000
--- a/arch/x86/include/asm/mach-generic/mach_mpspec.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H
-#define _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H
-
-#define MAX_IRQ_SOURCES 256
-
-/* Summit or generic (i.e. installer) kernels need lots of bus entries. */
-/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */
-#define MAX_MP_BUSSES 260
-
-extern void numaq_mps_oem_check(struct mpc_table *, char *, char *);
-
-#endif /* _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H */
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
deleted file mode 100644
index 1ab16b1..0000000
--- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
-#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
-
-#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
-#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
-#define wait_for_init_deassert (genapic->wait_for_init_deassert)
-#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
-#define store_NMI_vector (genapic->store_NMI_vector)
-#define restore_NMI_vector (genapic->restore_NMI_vector)
-#define inquire_remote_apic (genapic->inquire_remote_apic)
-
-#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
diff --git a/arch/x86/include/asm/mach-rdc321x/gpio.h b/arch/x86/include/asm/mach-rdc321x/gpio.h
deleted file mode 100644
index c210ab5..0000000
--- a/arch/x86/include/asm/mach-rdc321x/gpio.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _ASM_X86_MACH_RDC321X_GPIO_H
-#define _ASM_X86_MACH_RDC321X_GPIO_H
-
-#include <linux/kernel.h>
-
-extern int rdc_gpio_get_value(unsigned gpio);
-extern void rdc_gpio_set_value(unsigned gpio, int value);
-extern int rdc_gpio_direction_input(unsigned gpio);
-extern int rdc_gpio_direction_output(unsigned gpio, int value);
-extern int rdc_gpio_request(unsigned gpio, const char *label);
-extern void rdc_gpio_free(unsigned gpio);
-extern void __init rdc321x_gpio_setup(void);
-
-/* Wrappers for the arch-neutral GPIO API */
-
-static inline int gpio_request(unsigned gpio, const char *label)
-{
- return rdc_gpio_request(gpio, label);
-}
-
-static inline void gpio_free(unsigned gpio)
-{
- might_sleep();
- rdc_gpio_free(gpio);
-}
-
-static inline int gpio_direction_input(unsigned gpio)
-{
- return rdc_gpio_direction_input(gpio);
-}
-
-static inline int gpio_direction_output(unsigned gpio, int value)
-{
- return rdc_gpio_direction_output(gpio, value);
-}
-
-static inline int gpio_get_value(unsigned gpio)
-{
- return rdc_gpio_get_value(gpio);
-}
-
-static inline void gpio_set_value(unsigned gpio, int value)
-{
- rdc_gpio_set_value(gpio, value);
-}
-
-static inline int gpio_to_irq(unsigned gpio)
-{
- return gpio;
-}
-
-static inline int irq_to_gpio(unsigned irq)
-{
- return irq;
-}
-
-/* For cansleep */
-#include <asm-generic/gpio.h>
-
-#endif /* _ASM_X86_MACH_RDC321X_GPIO_H */
diff --git a/arch/x86/include/asm/mach-voyager/do_timer.h b/arch/x86/include/asm/mach-voyager/do_timer.h
deleted file mode 100644
index 9e5a459f..0000000
--- a/arch/x86/include/asm/mach-voyager/do_timer.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* defines for inline arch setup functions */
-#include <linux/clockchips.h>
-
-#include <asm/voyager.h>
-#include <asm/i8253.h>
-
-/**
- * do_timer_interrupt_hook - hook into timer tick
- *
- * Call the pit clock event handler. see asm/i8253.h
- **/
-static inline void do_timer_interrupt_hook(void)
-{
- global_clock_event->event_handler(global_clock_event);
- voyager_timer_interrupt();
-}
-
diff --git a/arch/x86/include/asm/mach-voyager/entry_arch.h b/arch/x86/include/asm/mach-voyager/entry_arch.h
deleted file mode 100644
index ae52624..0000000
--- a/arch/x86/include/asm/mach-voyager/entry_arch.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8 -*- */
-
-/* Copyright (C) 2002
- *
- * Author: James.Bottomley@HansenPartnership.com
- *
- * linux/arch/i386/voyager/entry_arch.h
- *
- * This file builds the VIC and QIC CPI gates
- */
-
-/* initialise the voyager interrupt gates
- *
- * This uses the macros in irq.h to set up assembly jump gates. The
- * calls are then redirected to the same routine with smp_ prefixed */
-BUILD_INTERRUPT(vic_sys_interrupt, VIC_SYS_INT)
-BUILD_INTERRUPT(vic_cmn_interrupt, VIC_CMN_INT)
-BUILD_INTERRUPT(vic_cpi_interrupt, VIC_CPI_LEVEL0);
-
-/* do all the QIC interrupts */
-BUILD_INTERRUPT(qic_timer_interrupt, QIC_TIMER_CPI);
-BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI);
-BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI);
-BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI);
-BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI);
-BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI);
diff --git a/arch/x86/include/asm/mach-voyager/setup_arch.h b/arch/x86/include/asm/mach-voyager/setup_arch.h
deleted file mode 100644
index 71729ca..0000000
--- a/arch/x86/include/asm/mach-voyager/setup_arch.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <asm/voyager.h>
-#include <asm/setup.h>
-#define VOYAGER_BIOS_INFO ((struct voyager_bios_info *) \
- (&boot_params.apm_bios_info))
-
-/* Hook to call BIOS initialisation function */
-
-/* for voyager, pass the voyager BIOS/SUS info area to the detection
- * routines */
-
-#define ARCH_SETUP voyager_detect(VOYAGER_BIOS_INFO);
-
diff --git a/arch/x86/include/asm/mach-default/mach_timer.h b/arch/x86/include/asm/mach_timer.h
similarity index 100%
rename from arch/x86/include/asm/mach-default/mach_timer.h
rename to arch/x86/include/asm/mach_timer.h
diff --git a/arch/x86/include/asm/mach-default/mach_traps.h b/arch/x86/include/asm/mach_traps.h
similarity index 100%
rename from arch/x86/include/asm/mach-default/mach_traps.h
rename to arch/x86/include/asm/mach_traps.h
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8aeeb3f..f923203 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -21,11 +21,54 @@
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context(struct mm_struct *mm);
-#ifdef CONFIG_X86_32
-# include "mmu_context_32.h"
-#else
-# include "mmu_context_64.h"
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+#ifdef CONFIG_SMP
+ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned cpu = smp_processor_id();
+
+ if (likely(prev != next)) {
+ /* stop flush ipis for the previous mm */
+ cpu_clear(cpu, prev->cpu_vm_mask);
+#ifdef CONFIG_SMP
+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ percpu_write(cpu_tlbstate.active_mm, next);
+#endif
+ cpu_set(cpu, next->cpu_vm_mask);
+
+ /* Re-load page tables */
+ load_cr3(next->pgd);
+
+ /*
+ * load the LDT, if the LDT is different:
+ */
+ if (unlikely(prev->context.ldt != next->context.ldt))
+ load_LDT_nolock(&next->context);
+ }
+#ifdef CONFIG_SMP
+ else {
+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
+
+ if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+ /* We were in lazy tlb mode and leave_mm disabled
+ * tlb flush IPI delivery. We must reload CR3
+ * to make sure to use no freed page tables.
+ */
+ load_cr3(next->pgd);
+ load_LDT_nolock(&next->context);
+ }
+ }
+#endif
+}
#define activate_mm(prev, next) \
do { \
@@ -33,5 +76,17 @@
switch_mm((prev), (next), NULL); \
} while (0);
+#ifdef CONFIG_X86_32
+#define deactivate_mm(tsk, mm) \
+do { \
+ lazy_load_gs(0); \
+} while (0)
+#else
+#define deactivate_mm(tsk, mm) \
+do { \
+ load_gs_index(0); \
+ loadsegment(fs, 0); \
+} while (0)
+#endif
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
deleted file mode 100644
index 7e98ce1d..0000000
--- a/arch/x86/include/asm/mmu_context_32.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef _ASM_X86_MMU_CONTEXT_32_H
-#define _ASM_X86_MMU_CONTEXT_32_H
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-#ifdef CONFIG_SMP
- if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
- x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
-#endif
-}
-
-static inline void switch_mm(struct mm_struct *prev,
- struct mm_struct *next,
- struct task_struct *tsk)
-{
- int cpu = smp_processor_id();
-
- if (likely(prev != next)) {
- /* stop flush ipis for the previous mm */
- cpu_clear(cpu, prev->cpu_vm_mask);
-#ifdef CONFIG_SMP
- x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
- x86_write_percpu(cpu_tlbstate.active_mm, next);
-#endif
- cpu_set(cpu, next->cpu_vm_mask);
-
- /* Re-load page tables */
- load_cr3(next->pgd);
-
- /*
- * load the LDT, if the LDT is different:
- */
- if (unlikely(prev->context.ldt != next->context.ldt))
- load_LDT_nolock(&next->context);
- }
-#ifdef CONFIG_SMP
- else {
- x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
- BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
-
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
- /* We were in lazy tlb mode and leave_mm disabled
- * tlb flush IPI delivery. We must reload %cr3.
- */
- load_cr3(next->pgd);
- load_LDT_nolock(&next->context);
- }
- }
-#endif
-}
-
-#define deactivate_mm(tsk, mm) \
- asm("movl %0,%%gs": :"r" (0));
-
-#endif /* _ASM_X86_MMU_CONTEXT_32_H */
diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
deleted file mode 100644
index 677d36e..0000000
--- a/arch/x86/include/asm/mmu_context_64.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef _ASM_X86_MMU_CONTEXT_64_H
-#define _ASM_X86_MMU_CONTEXT_64_H
-
-#include <asm/pda.h>
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-#ifdef CONFIG_SMP
- if (read_pda(mmu_state) == TLBSTATE_OK)
- write_pda(mmu_state, TLBSTATE_LAZY);
-#endif
-}
-
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
- struct task_struct *tsk)
-{
- unsigned cpu = smp_processor_id();
- if (likely(prev != next)) {
- /* stop flush ipis for the previous mm */
- cpu_clear(cpu, prev->cpu_vm_mask);
-#ifdef CONFIG_SMP
- write_pda(mmu_state, TLBSTATE_OK);
- write_pda(active_mm, next);
-#endif
- cpu_set(cpu, next->cpu_vm_mask);
- load_cr3(next->pgd);
-
- if (unlikely(next->context.ldt != prev->context.ldt))
- load_LDT_nolock(&next->context);
- }
-#ifdef CONFIG_SMP
- else {
- write_pda(mmu_state, TLBSTATE_OK);
- if (read_pda(active_mm) != next)
- BUG();
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
- /* We were in lazy tlb mode and leave_mm disabled
- * tlb flush IPI delivery. We must reload CR3
- * to make sure to use no freed page tables.
- */
- load_cr3(next->pgd);
- load_LDT_nolock(&next->context);
- }
- }
-#endif
-}
-
-#define deactivate_mm(tsk, mm) \
-do { \
- load_gs_index(0); \
- asm volatile("movl %0,%%fs"::"r"(0)); \
-} while (0)
-
-#endif /* _ASM_X86_MMU_CONTEXT_64_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index bd22f2a..642fc7f 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -9,7 +9,18 @@
extern int pic_mode;
#ifdef CONFIG_X86_32
-#include <mach_mpspec.h>
+
+/*
+ * Summit or generic (i.e. installer) kernels need lots of bus entries.
+ * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets.
+ */
+#if CONFIG_BASE_SMALL == 0
+# define MAX_MP_BUSSES 260
+#else
+# define MAX_MP_BUSSES 32
+#endif
+
+#define MAX_IRQ_SOURCES 256
extern unsigned int def_to_bigsmp;
extern u8 apicid_2_node[];
@@ -20,15 +31,15 @@
extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
#endif
-#define MAX_APICID 256
+#define MAX_APICID 256
-#else
+#else /* CONFIG_X86_64: */
-#define MAX_MP_BUSSES 256
+#define MAX_MP_BUSSES 256
/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */
-#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
+#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
-#endif
+#endif /* CONFIG_X86_64 */
extern void early_find_smp_config(void);
extern void early_get_smp_config(void);
@@ -45,11 +56,13 @@
extern int mpc_default_type;
extern unsigned long mp_lapic_addr;
-extern void find_smp_config(void);
extern void get_smp_config(void);
+
#ifdef CONFIG_X86_MPPARSE
+extern void find_smp_config(void);
extern void early_reserve_e820_mpc_new(void);
#else
+static inline void find_smp_config(void) { }
static inline void early_reserve_e820_mpc_new(void) { }
#endif
@@ -64,6 +77,8 @@
#ifdef CONFIG_X86_IO_APIC
extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
u32 gsi, int triggering, int polarity);
+extern int mp_find_ioapic(int gsi);
+extern int mp_find_ioapic_pin(int ioapic, int gsi);
#else
static inline int
mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
@@ -148,4 +163,8 @@
extern physid_mask_t phys_cpu_present_map;
+extern int generic_mps_oem_check(struct mpc_table *, char *, char *);
+
+extern int default_acpi_madt_oem_check(char *, char *);
+
#endif /* _ASM_X86_MPSPEC_H */
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 59568bc..4a7f96d 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -24,17 +24,18 @@
# endif
#endif
-struct intel_mp_floating {
- char mpf_signature[4]; /* "_MP_" */
- unsigned int mpf_physptr; /* Configuration table address */
- unsigned char mpf_length; /* Our length (paragraphs) */
- unsigned char mpf_specification;/* Specification version */
- unsigned char mpf_checksum; /* Checksum (makes sum 0) */
- unsigned char mpf_feature1; /* Standard or configuration ? */
- unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */
- unsigned char mpf_feature3; /* Unused (0) */
- unsigned char mpf_feature4; /* Unused (0) */
- unsigned char mpf_feature5; /* Unused (0) */
+/* Intel MP Floating Pointer Structure */
+struct mpf_intel {
+ char signature[4]; /* "_MP_" */
+ unsigned int physptr; /* Configuration table address */
+ unsigned char length; /* Our length (paragraphs) */
+ unsigned char specification; /* Specification version */
+ unsigned char checksum; /* Checksum (makes sum 0) */
+ unsigned char feature1; /* Standard or configuration ? */
+ unsigned char feature2; /* Bit7 set for IMCR|PIC */
+ unsigned char feature3; /* Unused (0) */
+ unsigned char feature4; /* Unused (0) */
+ unsigned char feature5; /* Unused (0) */
};
#define MPC_SIGNATURE "PCMP"
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
index 1e8bd30..9f0a5f5 100644
--- a/arch/x86/include/asm/numaq.h
+++ b/arch/x86/include/asm/numaq.h
@@ -31,6 +31,8 @@
extern int found_numaq;
extern int get_memcfg_numaq(void);
+extern void *xquad_portio;
+
/*
* SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the
*/
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
deleted file mode 100644
index bf37bc4..0000000
--- a/arch/x86/include/asm/numaq/apic.h
+++ /dev/null
@@ -1,142 +0,0 @@
-#ifndef __ASM_NUMAQ_APIC_H
-#define __ASM_NUMAQ_APIC_H
-
-#include <asm/io.h>
-#include <linux/mmzone.h>
-#include <linux/nodemask.h>
-
-#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-
-static inline const cpumask_t *target_cpus(void)
-{
- return &CPU_MASK_ALL;
-}
-
-#define NO_BALANCE_IRQ (1)
-#define esr_disable (1)
-
-#define INT_DELIVERY_MODE dest_LowestPrio
-#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */
-
-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
-{
- return physid_isset(apicid, bitmap);
-}
-static inline unsigned long check_apicid_present(int bit)
-{
- return physid_isset(bit, phys_cpu_present_map);
-}
-#define apicid_cluster(apicid) (apicid & 0xF0)
-
-static inline int apic_id_registered(void)
-{
- return 1;
-}
-
-static inline void init_apic_ldr(void)
-{
- /* Already done in NUMA-Q firmware */
-}
-
-static inline void setup_apic_routing(void)
-{
- printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
- "NUMA-Q", nr_ioapics);
-}
-
-/*
- * Skip adding the timer int on secondary nodes, which causes
- * a small but painful rift in the time-space continuum.
- */
-static inline int multi_timer_check(int apic, int irq)
-{
- return apic != 0 && irq == 0;
-}
-
-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
-{
- /* We don't have a good way to do this yet - hack */
- return physids_promote(0xFUL);
-}
-
-/* Mapping from cpu number to logical apicid */
-extern u8 cpu_2_logical_apicid[];
-static inline int cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return (int)cpu_2_logical_apicid[cpu];
-}
-
-/*
- * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
- * cpu to APIC ID relation to properly interact with the intelligent
- * mode of the cluster controller.
- */
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < 60)
- return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
- else
- return BAD_APICID;
-}
-
-static inline int apicid_to_node(int logical_apicid)
-{
- return logical_apicid >> 4;
-}
-
-static inline physid_mask_t apicid_to_cpu_present(int logical_apicid)
-{
- int node = apicid_to_node(logical_apicid);
- int cpu = __ffs(logical_apicid & 0xf);
-
- return physid_mask_of_physid(cpu + 4*node);
-}
-
-extern void *xquad_portio;
-
-static inline void setup_portio_remap(void)
-{
- int num_quads = num_online_nodes();
-
- if (num_quads <= 1)
- return;
-
- printk("Remapping cross-quad port I/O for %d quads\n", num_quads);
- xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
- printk("xquad_portio vaddr 0x%08lx, len %08lx\n",
- (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
-}
-
-static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
-{
- return (1);
-}
-
-static inline void enable_apic_mode(void)
-{
-}
-
-/*
- * We use physical apicids here, not logical, so just return the default
- * physical broadcast to stop people from breaking us
- */
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
-{
- return (int) 0xF;
-}
-
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- return (int) 0xF;
-}
-
-/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-#endif /* __ASM_NUMAQ_APIC_H */
diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h
deleted file mode 100644
index e012a46..0000000
--- a/arch/x86/include/asm/numaq/apicdef.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __ASM_NUMAQ_APICDEF_H
-#define __ASM_NUMAQ_APICDEF_H
-
-
-#define APIC_ID_MASK (0xF<<24)
-
-static inline unsigned get_apic_id(unsigned long x)
-{
- return (((x)>>24)&0x0F);
-}
-
-#define GET_APIC_ID(x) get_apic_id(x)
-
-#endif
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
deleted file mode 100644
index a8374c6..0000000
--- a/arch/x86/include/asm/numaq/ipi.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef __ASM_NUMAQ_IPI_H
-#define __ASM_NUMAQ_IPI_H
-
-void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-
-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
-{
- send_IPI_mask_sequence(mask, vector);
-}
-
-static inline void send_IPI_allbutself(int vector)
-{
- send_IPI_mask_allbutself(cpu_online_mask, vector);
-}
-
-static inline void send_IPI_all(int vector)
-{
- send_IPI_mask(cpu_online_mask, vector);
-}
-
-#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/include/asm/numaq/mpparse.h b/arch/x86/include/asm/numaq/mpparse.h
deleted file mode 100644
index a2eeefc..0000000
--- a/arch/x86/include/asm/numaq/mpparse.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_NUMAQ_MPPARSE_H
-#define __ASM_NUMAQ_MPPARSE_H
-
-extern void numaq_mps_oem_check(struct mpc_table *, char *, char *);
-
-#endif /* __ASM_NUMAQ_MPPARSE_H */
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h
deleted file mode 100644
index 6f499df..0000000
--- a/arch/x86/include/asm/numaq/wakecpu.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef __ASM_NUMAQ_WAKECPU_H
-#define __ASM_NUMAQ_WAKECPU_H
-
-/* This file copes with machines that wakeup secondary CPUs by NMIs */
-
-#define TRAMPOLINE_PHYS_LOW (0x8)
-#define TRAMPOLINE_PHYS_HIGH (0xa)
-
-/* We don't do anything here because we use NMI's to boot instead */
-static inline void wait_for_init_deassert(atomic_t *deassert)
-{
-}
-
-/*
- * Because we use NMIs rather than the INIT-STARTUP sequence to
- * bootstrap the CPUs, the APIC may be in a weird state. Kick it.
- */
-static inline void smp_callin_clear_local_apic(void)
-{
- clear_local_APIC();
-}
-
-static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
-{
- printk("Storing NMI vector\n");
- *high =
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
- *low =
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
-}
-
-static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
-{
- printk("Restoring NMI vector\n");
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
- *high;
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
- *low;
-}
-
-static inline void inquire_remote_apic(int apicid)
-{
-}
-
-#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7765791..89ed9d7 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -1,42 +1,11 @@
#ifndef _ASM_X86_PAGE_H
#define _ASM_X86_PAGE_H
-#include <linux/const.h>
-
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#include <linux/types.h>
#ifdef __KERNEL__
-#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
-#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
-
-/* Cast PAGE_MASK to a signed type so that it is sign-extended if
- virtual addresses are 32-bits but physical addresses are larger
- (ie, 32-bit PAE). */
-#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
-
-/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
-#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
-
-/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
-#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
-
-#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
-#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
-
-#define HPAGE_SHIFT PMD_SHIFT
-#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
-#define HPAGE_MASK (~(HPAGE_SIZE - 1))
-#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
-
-#define HUGE_MAX_HSTATE 2
-
-#ifndef __ASSEMBLY__
-#include <linux/types.h>
-#endif
+#include <asm/page_types.h>
#ifdef CONFIG_X86_64
#include <asm/page_64.h>
@@ -44,38 +13,18 @@
#include <asm/page_32.h>
#endif /* CONFIG_X86_64 */
-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
-
-#define VM_DATA_DEFAULT_FLAGS \
- (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
- VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-
#ifndef __ASSEMBLY__
-typedef struct { pgdval_t pgd; } pgd_t;
-typedef struct { pgprotval_t pgprot; } pgprot_t;
-
-extern int page_is_ram(unsigned long pagenr);
-extern int devmem_is_allowed(unsigned long pagenr);
-extern void map_devmem(unsigned long pfn, unsigned long size,
- pgprot_t vma_prot);
-extern void unmap_devmem(unsigned long pfn, unsigned long size,
- pgprot_t vma_prot);
-
-extern unsigned long max_low_pfn_mapped;
-extern unsigned long max_pfn_mapped;
-
struct page;
static inline void clear_user_page(void *page, unsigned long vaddr,
- struct page *pg)
+ struct page *pg)
{
clear_page(page);
}
static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
- struct page *topage)
+ struct page *topage)
{
copy_page(to, from);
}
@@ -84,99 +33,6 @@
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-static inline pgd_t native_make_pgd(pgdval_t val)
-{
- return (pgd_t) { val };
-}
-
-static inline pgdval_t native_pgd_val(pgd_t pgd)
-{
- return pgd.pgd;
-}
-
-#if PAGETABLE_LEVELS >= 3
-#if PAGETABLE_LEVELS == 4
-typedef struct { pudval_t pud; } pud_t;
-
-static inline pud_t native_make_pud(pmdval_t val)
-{
- return (pud_t) { val };
-}
-
-static inline pudval_t native_pud_val(pud_t pud)
-{
- return pud.pud;
-}
-#else /* PAGETABLE_LEVELS == 3 */
-#include <asm-generic/pgtable-nopud.h>
-
-static inline pudval_t native_pud_val(pud_t pud)
-{
- return native_pgd_val(pud.pgd);
-}
-#endif /* PAGETABLE_LEVELS == 4 */
-
-typedef struct { pmdval_t pmd; } pmd_t;
-
-static inline pmd_t native_make_pmd(pmdval_t val)
-{
- return (pmd_t) { val };
-}
-
-static inline pmdval_t native_pmd_val(pmd_t pmd)
-{
- return pmd.pmd;
-}
-#else /* PAGETABLE_LEVELS == 2 */
-#include <asm-generic/pgtable-nopmd.h>
-
-static inline pmdval_t native_pmd_val(pmd_t pmd)
-{
- return native_pgd_val(pmd.pud.pgd);
-}
-#endif /* PAGETABLE_LEVELS >= 3 */
-
-static inline pte_t native_make_pte(pteval_t val)
-{
- return (pte_t) { .pte = val };
-}
-
-static inline pteval_t native_pte_val(pte_t pte)
-{
- return pte.pte;
-}
-
-static inline pteval_t native_pte_flags(pte_t pte)
-{
- return native_pte_val(pte) & PTE_FLAGS_MASK;
-}
-
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else /* !CONFIG_PARAVIRT */
-
-#define pgd_val(x) native_pgd_val(x)
-#define __pgd(x) native_make_pgd(x)
-
-#ifndef __PAGETABLE_PUD_FOLDED
-#define pud_val(x) native_pud_val(x)
-#define __pud(x) native_make_pud(x)
-#endif
-
-#ifndef __PAGETABLE_PMD_FOLDED
-#define pmd_val(x) native_pmd_val(x)
-#define __pmd(x) native_make_pmd(x)
-#endif
-
-#define pte_val(x) native_pte_val(x)
-#define pte_flags(x) native_pte_flags(x)
-#define __pte(x) native_make_pte(x)
-
-#endif /* CONFIG_PARAVIRT */
-
#define __pa(x) __phys_addr((unsigned long)(x))
#define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x))
/* __pa_symbol should be used for C visible symbols.
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index bcde0d7..da4e762 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -1,82 +1,14 @@
#ifndef _ASM_X86_PAGE_32_H
#define _ASM_X86_PAGE_32_H
-/*
- * This handles the memory map.
- *
- * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
- * a virtual address space of one gigabyte, which limits the
- * amount of physical memory you can use to about 950MB.
- *
- * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
- * and CONFIG_HIGHMEM64G options in the kernel configuration.
- */
-#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
-
-#ifdef CONFIG_4KSTACKS
-#define THREAD_ORDER 0
-#else
-#define THREAD_ORDER 1
-#endif
-#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
-
-#define STACKFAULT_STACK 0
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 0
-#define DEBUG_STACK 0
-#define MCE_STACK 0
-#define N_EXCEPTION_STACKS 1
-
-#ifdef CONFIG_X86_PAE
-/* 44=32+12, the limit we can fit into an unsigned long pfn */
-#define __PHYSICAL_MASK_SHIFT 44
-#define __VIRTUAL_MASK_SHIFT 32
-#define PAGETABLE_LEVELS 3
+#include <asm/page_32_types.h>
#ifndef __ASSEMBLY__
-typedef u64 pteval_t;
-typedef u64 pmdval_t;
-typedef u64 pudval_t;
-typedef u64 pgdval_t;
-typedef u64 pgprotval_t;
-
-typedef union {
- struct {
- unsigned long pte_low, pte_high;
- };
- pteval_t pte;
-} pte_t;
-#endif /* __ASSEMBLY__
- */
-#else /* !CONFIG_X86_PAE */
-#define __PHYSICAL_MASK_SHIFT 32
-#define __VIRTUAL_MASK_SHIFT 32
-#define PAGETABLE_LEVELS 2
-
-#ifndef __ASSEMBLY__
-typedef unsigned long pteval_t;
-typedef unsigned long pmdval_t;
-typedef unsigned long pudval_t;
-typedef unsigned long pgdval_t;
-typedef unsigned long pgprotval_t;
-
-typedef union {
- pteval_t pte;
- pteval_t pte_low;
-} pte_t;
-
-#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_X86_PAE */
-
-#ifndef __ASSEMBLY__
-typedef struct page *pgtable_t;
-#endif
#ifdef CONFIG_HUGETLB_PAGE
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
-#ifndef __ASSEMBLY__
#define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET)
#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
@@ -89,23 +21,6 @@
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#endif /* CONFIG_FLATMEM */
-extern int nx_enabled;
-
-/*
- * This much address space is reserved for vmalloc() and iomap()
- * as well as fixmap mappings.
- */
-extern unsigned int __VMALLOC_RESERVE;
-extern int sysctl_legacy_va_layout;
-
-extern void find_low_pfn_range(void);
-extern unsigned long init_memory_mapping(unsigned long start,
- unsigned long end);
-extern void initmem_init(unsigned long, unsigned long);
-extern void free_initmem(void);
-extern void setup_bootmem_allocator(void);
-
-
#ifdef CONFIG_X86_USE_3DNOW
#include <asm/mmx.h>
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
new file mode 100644
index 0000000..f1e4a79
--- /dev/null
+++ b/arch/x86/include/asm/page_32_types.h
@@ -0,0 +1,60 @@
+#ifndef _ASM_X86_PAGE_32_DEFS_H
+#define _ASM_X86_PAGE_32_DEFS_H
+
+#include <linux/const.h>
+
+/*
+ * This handles the memory map.
+ *
+ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 950MB.
+ *
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ */
+#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
+
+#ifdef CONFIG_4KSTACKS
+#define THREAD_ORDER 0
+#else
+#define THREAD_ORDER 1
+#endif
+#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
+
+#define STACKFAULT_STACK 0
+#define DOUBLEFAULT_STACK 1
+#define NMI_STACK 0
+#define DEBUG_STACK 0
+#define MCE_STACK 0
+#define N_EXCEPTION_STACKS 1
+
+#ifdef CONFIG_X86_PAE
+/* 44=32+12, the limit we can fit into an unsigned long pfn */
+#define __PHYSICAL_MASK_SHIFT 44
+#define __VIRTUAL_MASK_SHIFT 32
+
+#else /* !CONFIG_X86_PAE */
+#define __PHYSICAL_MASK_SHIFT 32
+#define __VIRTUAL_MASK_SHIFT 32
+#endif /* CONFIG_X86_PAE */
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This much address space is reserved for vmalloc() and iomap()
+ * as well as fixmap mappings.
+ */
+extern unsigned int __VMALLOC_RESERVE;
+extern int sysctl_legacy_va_layout;
+
+extern void find_low_pfn_range(void);
+extern unsigned long init_memory_mapping(unsigned long start,
+ unsigned long end);
+extern void initmem_init(unsigned long, unsigned long);
+extern void free_initmem(void);
+extern void setup_bootmem_allocator(void);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_X86_PAGE_32_DEFS_H */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 5ebca29..072694ed 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -1,105 +1,6 @@
#ifndef _ASM_X86_PAGE_64_H
#define _ASM_X86_PAGE_64_H
-#define PAGETABLE_LEVELS 4
-
-#define THREAD_ORDER 1
-#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
-#define CURRENT_MASK (~(THREAD_SIZE - 1))
-
-#define EXCEPTION_STACK_ORDER 0
-#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
-
-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
-
-#define STACKFAULT_STACK 1
-#define DOUBLEFAULT_STACK 2
-#define NMI_STACK 3
-#define DEBUG_STACK 4
-#define MCE_STACK 5
-#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
-
-#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
-
-/*
- * Set __PAGE_OFFSET to the most negative possible address +
- * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
- * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
- * what Xen requires.
- */
-#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
-
-#define __PHYSICAL_START CONFIG_PHYSICAL_START
-#define __KERNEL_ALIGN 0x200000
-
-/*
- * Make sure kernel is aligned to 2MB address. Catching it at compile
- * time is better. Change your config file and compile the kernel
- * for a 2MB aligned address (CONFIG_PHYSICAL_START)
- */
-#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
-#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
-#endif
-
-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
-#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
-
-/* See Documentation/x86_64/mm.txt for a description of the memory map. */
-#define __PHYSICAL_MASK_SHIFT 46
-#define __VIRTUAL_MASK_SHIFT 48
-
-/*
- * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
- * arch/x86/kernel/head_64.S), and it is mapped here:
- */
-#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
-#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
-
-#ifndef __ASSEMBLY__
-void clear_page(void *page);
-void copy_page(void *to, void *from);
-
-/* duplicated to the one in bootmem.h */
-extern unsigned long max_pfn;
-extern unsigned long phys_base;
-
-extern unsigned long __phys_addr(unsigned long);
-#define __phys_reloc_hide(x) (x)
-
-/*
- * These are used to make use of C type-checking..
- */
-typedef unsigned long pteval_t;
-typedef unsigned long pmdval_t;
-typedef unsigned long pudval_t;
-typedef unsigned long pgdval_t;
-typedef unsigned long pgprotval_t;
-
-typedef struct page *pgtable_t;
-
-typedef struct { pteval_t pte; } pte_t;
-
-#define vmemmap ((struct page *)VMEMMAP_START)
-
-extern unsigned long init_memory_mapping(unsigned long start,
- unsigned long end);
-
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
-extern void free_initmem(void);
-
-extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
-extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
-
-#endif /* !__ASSEMBLY__ */
-
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) < max_pfn)
-#endif
-
+#include <asm/page_64_types.h>
#endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
new file mode 100644
index 0000000..d38c91b
--- /dev/null
+++ b/arch/x86/include/asm/page_64_types.h
@@ -0,0 +1,89 @@
+#ifndef _ASM_X86_PAGE_64_DEFS_H
+#define _ASM_X86_PAGE_64_DEFS_H
+
+#define THREAD_ORDER 1
+#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
+#define CURRENT_MASK (~(THREAD_SIZE - 1))
+
+#define EXCEPTION_STACK_ORDER 0
+#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+
+#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
+#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
+
+#define IRQ_STACK_ORDER 2
+#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
+
+#define STACKFAULT_STACK 1
+#define DOUBLEFAULT_STACK 2
+#define NMI_STACK 3
+#define DEBUG_STACK 4
+#define MCE_STACK 5
+#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
+
+#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+
+/*
+ * Set __PAGE_OFFSET to the most negative possible address +
+ * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
+ * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
+ * what Xen requires.
+ */
+#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
+
+#define __PHYSICAL_START CONFIG_PHYSICAL_START
+#define __KERNEL_ALIGN 0x200000
+
+/*
+ * Make sure kernel is aligned to 2MB address. Catching it at compile
+ * time is better. Change your config file and compile the kernel
+ * for a 2MB aligned address (CONFIG_PHYSICAL_START)
+ */
+#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
+#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
+#endif
+
+#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
+#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
+
+/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+#define __PHYSICAL_MASK_SHIFT 46
+#define __VIRTUAL_MASK_SHIFT 48
+
+/*
+ * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
+ * arch/x86/kernel/head_64.S), and it is mapped here:
+ */
+#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
+#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
+
+#ifndef __ASSEMBLY__
+void clear_page(void *page);
+void copy_page(void *to, void *from);
+
+/* duplicated to the one in bootmem.h */
+extern unsigned long max_pfn;
+extern unsigned long phys_base;
+
+extern unsigned long __phys_addr(unsigned long);
+#define __phys_reloc_hide(x) (x)
+
+#define vmemmap ((struct page *)VMEMMAP_START)
+
+extern unsigned long init_memory_mapping(unsigned long start,
+ unsigned long end);
+
+extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void free_initmem(void);
+
+extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
+extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
+
+#endif /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_FLATMEM
+#define pfn_valid(pfn) ((pfn) < max_pfn)
+#endif
+
+#endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
new file mode 100644
index 0000000..2d625da
--- /dev/null
+++ b/arch/x86/include/asm/page_types.h
@@ -0,0 +1,57 @@
+#ifndef _ASM_X86_PAGE_DEFS_H
+#define _ASM_X86_PAGE_DEFS_H
+
+#include <linux/const.h>
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
+#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
+
+/* Cast PAGE_MASK to a signed type so that it is sign-extended if
+ virtual addresses are 32-bits but physical addresses are larger
+ (ie, 32-bit PAE). */
+#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
+
+#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
+#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
+
+#define HPAGE_SHIFT PMD_SHIFT
+#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+
+#define HUGE_MAX_HSTATE 2
+
+#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+
+#define VM_DATA_DEFAULT_FLAGS \
+ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#ifdef CONFIG_X86_64
+#include <asm/page_64_types.h>
+#else
+#include <asm/page_32_types.h>
+#endif /* CONFIG_X86_64 */
+
+#ifndef __ASSEMBLY__
+
+struct pgprot;
+
+extern int page_is_ram(unsigned long pagenr);
+extern int devmem_is_allowed(unsigned long pagenr);
+extern void map_devmem(unsigned long pfn, unsigned long size,
+ struct pgprot vma_prot);
+extern void unmap_devmem(unsigned long pfn, unsigned long size,
+ struct pgprot vma_prot);
+
+extern unsigned long max_low_pfn_mapped;
+extern unsigned long max_pfn_mapped;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_X86_PAGE_DEFS_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index e299287..0617d5c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -4,7 +4,7 @@
* para-virtualization: those hooks are defined here. */
#ifdef CONFIG_PARAVIRT
-#include <asm/page.h>
+#include <asm/pgtable_types.h>
#include <asm/asm.h>
/* Bitmask of what can be clobbered: usually at least eax. */
@@ -12,21 +12,38 @@
#define CLBR_EAX (1 << 0)
#define CLBR_ECX (1 << 1)
#define CLBR_EDX (1 << 2)
+#define CLBR_EDI (1 << 3)
-#ifdef CONFIG_X86_64
-#define CLBR_RSI (1 << 3)
-#define CLBR_RDI (1 << 4)
+#ifdef CONFIG_X86_32
+/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+#define CLBR_ANY ((1 << 4) - 1)
+
+#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
+#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
+#define CLBR_SCRATCH (0)
+#else
+#define CLBR_RAX CLBR_EAX
+#define CLBR_RCX CLBR_ECX
+#define CLBR_RDX CLBR_EDX
+#define CLBR_RDI CLBR_EDI
+#define CLBR_RSI (1 << 4)
#define CLBR_R8 (1 << 5)
#define CLBR_R9 (1 << 6)
#define CLBR_R10 (1 << 7)
#define CLBR_R11 (1 << 8)
+
#define CLBR_ANY ((1 << 9) - 1)
+
+#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
+ CLBR_RCX | CLBR_R8 | CLBR_R9)
+#define CLBR_RET_REG (CLBR_RAX)
+#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
+
#include <asm/desc_defs.h>
-#else
-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
-#define CLBR_ANY ((1 << 3) - 1)
#endif /* X86_64 */
+#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
+
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <linux/cpumask.h>
@@ -40,6 +57,14 @@
struct mm_struct;
struct desc_struct;
+/*
+ * Wrapper type for pointers to code which uses the non-standard
+ * calling convention. See PV_CALL_SAVE_REGS_THUNK below.
+ */
+struct paravirt_callee_save {
+ void *func;
+};
+
/* general info */
struct pv_info {
unsigned int kernel_rpl;
@@ -189,11 +214,15 @@
* expected to use X86_EFLAGS_IF; all other bits
* returned from save_fl are undefined, and may be ignored by
* restore_fl.
+ *
+ * NOTE: These functions callers expect the callee to preserve
+ * more registers than the standard C calling convention.
*/
- unsigned long (*save_fl)(void);
- void (*restore_fl)(unsigned long);
- void (*irq_disable)(void);
- void (*irq_enable)(void);
+ struct paravirt_callee_save save_fl;
+ struct paravirt_callee_save restore_fl;
+ struct paravirt_callee_save irq_disable;
+ struct paravirt_callee_save irq_enable;
+
void (*safe_halt)(void);
void (*halt)(void);
@@ -244,7 +273,8 @@
void (*flush_tlb_user)(void);
void (*flush_tlb_kernel)(void);
void (*flush_tlb_single)(unsigned long addr);
- void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
+ void (*flush_tlb_others)(const struct cpumask *cpus,
+ struct mm_struct *mm,
unsigned long va);
/* Hooks for allocating and freeing a pagetable top-level */
@@ -278,12 +308,11 @@
void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
- pteval_t (*pte_val)(pte_t);
- pteval_t (*pte_flags)(pte_t);
- pte_t (*make_pte)(pteval_t pte);
+ struct paravirt_callee_save pte_val;
+ struct paravirt_callee_save make_pte;
- pgdval_t (*pgd_val)(pgd_t);
- pgd_t (*make_pgd)(pgdval_t pgd);
+ struct paravirt_callee_save pgd_val;
+ struct paravirt_callee_save make_pgd;
#if PAGETABLE_LEVELS >= 3
#ifdef CONFIG_X86_PAE
@@ -298,12 +327,12 @@
void (*set_pud)(pud_t *pudp, pud_t pudval);
- pmdval_t (*pmd_val)(pmd_t);
- pmd_t (*make_pmd)(pmdval_t pmd);
+ struct paravirt_callee_save pmd_val;
+ struct paravirt_callee_save make_pmd;
#if PAGETABLE_LEVELS == 4
- pudval_t (*pud_val)(pud_t);
- pud_t (*make_pud)(pudval_t pud);
+ struct paravirt_callee_save pud_val;
+ struct paravirt_callee_save make_pud;
void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
#endif /* PAGETABLE_LEVELS == 4 */
@@ -388,6 +417,8 @@
asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
unsigned paravirt_patch_nop(void);
+unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
+unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
unsigned paravirt_patch_ignore(unsigned len);
unsigned paravirt_patch_call(void *insnbuf,
const void *target, u16 tgt_clobbers,
@@ -479,25 +510,45 @@
* makes sure the incoming and outgoing types are always correct.
*/
#ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx
+#define PVOP_VCALL_ARGS \
+ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
+
+#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
+
#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \
"=c" (__ecx)
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS
+
+#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx)
+#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
+
#define EXTRA_CLOBBERS
#define VEXTRA_CLOBBERS
-#else
-#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx
+#else /* CONFIG_X86_64 */
+#define PVOP_VCALL_ARGS \
+ unsigned long __edi = __edi, __esi = __esi, \
+ __edx = __edx, __ecx = __ecx
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
+
+#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
+#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x))
+
#define PVOP_VCALL_CLOBBERS "=D" (__edi), \
"=S" (__esi), "=d" (__edx), \
"=c" (__ecx)
-
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
+#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
+#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
+
#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
-#endif
+#endif /* CONFIG_X86_32 */
#ifdef CONFIG_PARAVIRT_DEBUG
#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
@@ -505,10 +556,11 @@
#define PVOP_TEST_NULL(op) ((void)op)
#endif
-#define __PVOP_CALL(rettype, op, pre, post, ...) \
+#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
+ pre, post, ...) \
({ \
rettype __ret; \
- PVOP_CALL_ARGS; \
+ PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
/* This is 32-bit specific, but is okay in 64-bit */ \
/* since this condition will never hold */ \
@@ -516,70 +568,113 @@
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : PVOP_CALL_CLOBBERS \
+ : call_clbr \
: paravirt_type(op), \
- paravirt_clobber(CLBR_ANY), \
+ paravirt_clobber(clbr), \
##__VA_ARGS__ \
- : "memory", "cc" EXTRA_CLOBBERS); \
+ : "memory", "cc" extra_clbr); \
__ret = (rettype)((((u64)__edx) << 32) | __eax); \
} else { \
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : PVOP_CALL_CLOBBERS \
+ : call_clbr \
: paravirt_type(op), \
- paravirt_clobber(CLBR_ANY), \
+ paravirt_clobber(clbr), \
##__VA_ARGS__ \
- : "memory", "cc" EXTRA_CLOBBERS); \
+ : "memory", "cc" extra_clbr); \
__ret = (rettype)__eax; \
} \
__ret; \
})
-#define __PVOP_VCALL(op, pre, post, ...) \
+
+#define __PVOP_CALL(rettype, op, pre, post, ...) \
+ ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
+ EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
+
+#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
+ ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
+ PVOP_CALLEE_CLOBBERS, , \
+ pre, post, ##__VA_ARGS__)
+
+
+#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
({ \
PVOP_VCALL_ARGS; \
PVOP_TEST_NULL(op); \
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : PVOP_VCALL_CLOBBERS \
+ : call_clbr \
: paravirt_type(op), \
- paravirt_clobber(CLBR_ANY), \
+ paravirt_clobber(clbr), \
##__VA_ARGS__ \
- : "memory", "cc" VEXTRA_CLOBBERS); \
+ : "memory", "cc" extra_clbr); \
})
+#define __PVOP_VCALL(op, pre, post, ...) \
+ ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
+ VEXTRA_CLOBBERS, \
+ pre, post, ##__VA_ARGS__)
+
+#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \
+ ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
+ PVOP_VCALLEE_CLOBBERS, , \
+ pre, post, ##__VA_ARGS__)
+
+
+
#define PVOP_CALL0(rettype, op) \
__PVOP_CALL(rettype, op, "", "")
#define PVOP_VCALL0(op) \
__PVOP_VCALL(op, "", "")
+#define PVOP_CALLEE0(rettype, op) \
+ __PVOP_CALLEESAVE(rettype, op, "", "")
+#define PVOP_VCALLEE0(op) \
+ __PVOP_VCALLEESAVE(op, "", "")
+
+
#define PVOP_CALL1(rettype, op, arg1) \
- __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
+ __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
#define PVOP_VCALL1(op, arg1) \
- __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
+ __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+
+#define PVOP_CALLEE1(rettype, op, arg1) \
+ __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+#define PVOP_VCALLEE1(op, arg1) \
+ __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+
#define PVOP_CALL2(rettype, op, arg1, arg2) \
- __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
- "1" ((unsigned long)(arg2)))
+ __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
#define PVOP_VCALL2(op, arg1, arg2) \
- __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
- "1" ((unsigned long)(arg2)))
+ __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+
+#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
+ __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+#define PVOP_VCALLEE2(op, arg1, arg2) \
+ __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+
#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
- __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
- "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
+ __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
#define PVOP_VCALL3(op, arg1, arg2, arg3) \
- __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
- "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
+ __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
/* This is the only difference in x86_64. We can make it much simpler */
#ifdef CONFIG_X86_32
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
__PVOP_CALL(rettype, op, \
"push %[_arg4];", "lea 4(%%esp),%%esp;", \
- "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
- "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
+ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
__PVOP_VCALL(op, \
"push %[_arg4];", "lea 4(%%esp),%%esp;", \
@@ -587,13 +682,13 @@
"2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
#else
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
- __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
- "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
- "3"((unsigned long)(arg4)))
+ __PVOP_CALL(rettype, op, "", "", \
+ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
- "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
- "3"((unsigned long)(arg4)))
+ __PVOP_VCALL(op, "", "", \
+ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
#endif
static inline int paravirt_enabled(void)
@@ -984,10 +1079,11 @@
PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
}
-static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+static inline void flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm,
unsigned long va)
{
- PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
+ PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
}
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
@@ -1059,13 +1155,13 @@
pteval_t ret;
if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALL2(pteval_t,
- pv_mmu_ops.make_pte,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pteval_t,
+ pv_mmu_ops.make_pte,
+ val, (u64)val >> 32);
else
- ret = PVOP_CALL1(pteval_t,
- pv_mmu_ops.make_pte,
- val);
+ ret = PVOP_CALLEE1(pteval_t,
+ pv_mmu_ops.make_pte,
+ val);
return (pte_t) { .pte = ret };
}
@@ -1075,42 +1171,25 @@
pteval_t ret;
if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
- pte.pte, (u64)pte.pte >> 32);
+ ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
+ pte.pte, (u64)pte.pte >> 32);
else
- ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
- pte.pte);
+ ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
+ pte.pte);
return ret;
}
-static inline pteval_t pte_flags(pte_t pte)
-{
- pteval_t ret;
-
- if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
- pte.pte, (u64)pte.pte >> 32);
- else
- ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
- pte.pte);
-
-#ifdef CONFIG_PARAVIRT_DEBUG
- BUG_ON(ret & PTE_PFN_MASK);
-#endif
- return ret;
-}
-
static inline pgd_t __pgd(pgdval_t val)
{
pgdval_t ret;
if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
+ val, (u64)val >> 32);
else
- ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
- val);
+ ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
+ val);
return (pgd_t) { ret };
}
@@ -1120,11 +1199,11 @@
pgdval_t ret;
if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
- pgd.pgd, (u64)pgd.pgd >> 32);
+ ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
+ pgd.pgd, (u64)pgd.pgd >> 32);
else
- ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
- pgd.pgd);
+ ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
+ pgd.pgd);
return ret;
}
@@ -1188,11 +1267,11 @@
pmdval_t ret;
if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
+ val, (u64)val >> 32);
else
- ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
- val);
+ ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
+ val);
return (pmd_t) { ret };
}
@@ -1202,11 +1281,11 @@
pmdval_t ret;
if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
- pmd.pmd, (u64)pmd.pmd >> 32);
+ ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
+ pmd.pmd, (u64)pmd.pmd >> 32);
else
- ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
- pmd.pmd);
+ ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
+ pmd.pmd);
return ret;
}
@@ -1228,11 +1307,11 @@
pudval_t ret;
if (sizeof(pudval_t) > sizeof(long))
- ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
- val, (u64)val >> 32);
+ ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
+ val, (u64)val >> 32);
else
- ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
- val);
+ ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
+ val);
return (pud_t) { ret };
}
@@ -1242,11 +1321,11 @@
pudval_t ret;
if (sizeof(pudval_t) > sizeof(long))
- ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
- pud.pud, (u64)pud.pud >> 32);
+ ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
+ pud.pud, (u64)pud.pud >> 32);
else
- ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
- pud.pud);
+ ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
+ pud.pud);
return ret;
}
@@ -1374,9 +1453,10 @@
}
void _paravirt_nop(void);
-#define paravirt_nop ((void *)_paravirt_nop)
+u32 _paravirt_ident_32(u32);
+u64 _paravirt_ident_64(u64);
-void paravirt_use_bytelocks(void);
+#define paravirt_nop ((void *)_paravirt_nop)
#ifdef CONFIG_SMP
@@ -1426,12 +1506,37 @@
__parainstructions_end[];
#ifdef CONFIG_X86_32
-#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
-#define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
+#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
+#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
+
+/* save and restore all caller-save registers, except return value */
+#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;"
+#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;"
+
#define PV_FLAGS_ARG "0"
#define PV_EXTRA_CLOBBERS
#define PV_VEXTRA_CLOBBERS
#else
+/* save and restore all caller-save registers, except return value */
+#define PV_SAVE_ALL_CALLER_REGS \
+ "push %rcx;" \
+ "push %rdx;" \
+ "push %rsi;" \
+ "push %rdi;" \
+ "push %r8;" \
+ "push %r9;" \
+ "push %r10;" \
+ "push %r11;"
+#define PV_RESTORE_ALL_CALLER_REGS \
+ "pop %r11;" \
+ "pop %r10;" \
+ "pop %r9;" \
+ "pop %r8;" \
+ "pop %rdi;" \
+ "pop %rsi;" \
+ "pop %rdx;" \
+ "pop %rcx;"
+
/* We save some registers, but all of them, that's too much. We clobber all
* caller saved registers but the argument parameter */
#define PV_SAVE_REGS "pushq %%rdi;"
@@ -1441,52 +1546,76 @@
#define PV_FLAGS_ARG "D"
#endif
+/*
+ * Generate a thunk around a function which saves all caller-save
+ * registers except for the return value. This allows C functions to
+ * be called from assembler code where fewer than normal registers are
+ * available. It may also help code generation around calls from C
+ * code if the common case doesn't use many registers.
+ *
+ * When a callee is wrapped in a thunk, the caller can assume that all
+ * arg regs and all scratch registers are preserved across the
+ * call. The return value in rax/eax will not be saved, even for void
+ * functions.
+ */
+#define PV_CALLEE_SAVE_REGS_THUNK(func) \
+ extern typeof(func) __raw_callee_save_##func; \
+ static void *__##func##__ __used = func; \
+ \
+ asm(".pushsection .text;" \
+ "__raw_callee_save_" #func ": " \
+ PV_SAVE_ALL_CALLER_REGS \
+ "call " #func ";" \
+ PV_RESTORE_ALL_CALLER_REGS \
+ "ret;" \
+ ".popsection")
+
+/* Get a reference to a callee-save function */
+#define PV_CALLEE_SAVE(func) \
+ ((struct paravirt_callee_save) { __raw_callee_save_##func })
+
+/* Promise that "func" already uses the right calling convention */
+#define __PV_IS_CALLEE_SAVE(func) \
+ ((struct paravirt_callee_save) { func })
+
static inline unsigned long __raw_local_save_flags(void)
{
unsigned long f;
- asm volatile(paravirt_alt(PV_SAVE_REGS
- PARAVIRT_CALL
- PV_RESTORE_REGS)
+ asm volatile(paravirt_alt(PARAVIRT_CALL)
: "=a"(f)
: paravirt_type(pv_irq_ops.save_fl),
paravirt_clobber(CLBR_EAX)
- : "memory", "cc" PV_VEXTRA_CLOBBERS);
+ : "memory", "cc");
return f;
}
static inline void raw_local_irq_restore(unsigned long f)
{
- asm volatile(paravirt_alt(PV_SAVE_REGS
- PARAVIRT_CALL
- PV_RESTORE_REGS)
+ asm volatile(paravirt_alt(PARAVIRT_CALL)
: "=a"(f)
: PV_FLAGS_ARG(f),
paravirt_type(pv_irq_ops.restore_fl),
paravirt_clobber(CLBR_EAX)
- : "memory", "cc" PV_EXTRA_CLOBBERS);
+ : "memory", "cc");
}
static inline void raw_local_irq_disable(void)
{
- asm volatile(paravirt_alt(PV_SAVE_REGS
- PARAVIRT_CALL
- PV_RESTORE_REGS)
+ asm volatile(paravirt_alt(PARAVIRT_CALL)
:
: paravirt_type(pv_irq_ops.irq_disable),
paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
+ : "memory", "eax", "cc");
}
static inline void raw_local_irq_enable(void)
{
- asm volatile(paravirt_alt(PV_SAVE_REGS
- PARAVIRT_CALL
- PV_RESTORE_REGS)
+ asm volatile(paravirt_alt(PARAVIRT_CALL)
:
: paravirt_type(pv_irq_ops.irq_enable),
paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
+ : "memory", "eax", "cc");
}
static inline unsigned long __raw_local_irq_save(void)
@@ -1529,33 +1658,49 @@
.popsection
+#define COND_PUSH(set, mask, reg) \
+ .if ((~(set)) & mask); push %reg; .endif
+#define COND_POP(set, mask, reg) \
+ .if ((~(set)) & mask); pop %reg; .endif
+
#ifdef CONFIG_X86_64
-#define PV_SAVE_REGS \
- push %rax; \
- push %rcx; \
- push %rdx; \
- push %rsi; \
- push %rdi; \
- push %r8; \
- push %r9; \
- push %r10; \
- push %r11
-#define PV_RESTORE_REGS \
- pop %r11; \
- pop %r10; \
- pop %r9; \
- pop %r8; \
- pop %rdi; \
- pop %rsi; \
- pop %rdx; \
- pop %rcx; \
- pop %rax
+
+#define PV_SAVE_REGS(set) \
+ COND_PUSH(set, CLBR_RAX, rax); \
+ COND_PUSH(set, CLBR_RCX, rcx); \
+ COND_PUSH(set, CLBR_RDX, rdx); \
+ COND_PUSH(set, CLBR_RSI, rsi); \
+ COND_PUSH(set, CLBR_RDI, rdi); \
+ COND_PUSH(set, CLBR_R8, r8); \
+ COND_PUSH(set, CLBR_R9, r9); \
+ COND_PUSH(set, CLBR_R10, r10); \
+ COND_PUSH(set, CLBR_R11, r11)
+#define PV_RESTORE_REGS(set) \
+ COND_POP(set, CLBR_R11, r11); \
+ COND_POP(set, CLBR_R10, r10); \
+ COND_POP(set, CLBR_R9, r9); \
+ COND_POP(set, CLBR_R8, r8); \
+ COND_POP(set, CLBR_RDI, rdi); \
+ COND_POP(set, CLBR_RSI, rsi); \
+ COND_POP(set, CLBR_RDX, rdx); \
+ COND_POP(set, CLBR_RCX, rcx); \
+ COND_POP(set, CLBR_RAX, rax)
+
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
#define PARA_INDIRECT(addr) *addr(%rip)
#else
-#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx
-#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
+#define PV_SAVE_REGS(set) \
+ COND_PUSH(set, CLBR_EAX, eax); \
+ COND_PUSH(set, CLBR_EDI, edi); \
+ COND_PUSH(set, CLBR_ECX, ecx); \
+ COND_PUSH(set, CLBR_EDX, edx)
+#define PV_RESTORE_REGS(set) \
+ COND_POP(set, CLBR_EDX, edx); \
+ COND_POP(set, CLBR_ECX, ecx); \
+ COND_POP(set, CLBR_EDI, edi); \
+ COND_POP(set, CLBR_EAX, eax)
+
#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
#define PARA_INDIRECT(addr) *%cs:addr
@@ -1567,15 +1712,15 @@
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
- PV_SAVE_REGS; \
+ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
- PV_RESTORE_REGS;) \
+ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define ENABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
- PV_SAVE_REGS; \
+ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
- PV_RESTORE_REGS;)
+ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define USERGS_SYSRET32 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
@@ -1605,11 +1750,15 @@
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
swapgs)
+/*
+ * Note: swapgs is very special, and in practise is either going to be
+ * implemented with a single "swapgs" instruction or something very
+ * special. Either way, we don't need to save any registers for
+ * it.
+ */
#define SWAPGS \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
- PV_SAVE_REGS; \
- call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
- PV_RESTORE_REGS \
+ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
)
#define GET_CR2_INTO_RCX \
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index b8493b3..b0e7005 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -5,10 +5,8 @@
#ifdef CONFIG_X86_PAT
extern int pat_enabled;
-extern void validate_pat_support(struct cpuinfo_x86 *c);
#else
static const int pat_enabled;
-static inline void validate_pat_support(struct cpuinfo_x86 *c) { }
#endif
extern void pat_init(void);
@@ -17,6 +15,7 @@
unsigned long req_type, unsigned long *ret_type);
extern int free_memtype(u64 start, u64 end);
-extern void pat_disable(char *reason);
+extern int kernel_map_sync_memtype(u64 base, unsigned long size,
+ unsigned long flag);
#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/mach-default/pci-functions.h b/arch/x86/include/asm/pci-functions.h
similarity index 100%
rename from arch/x86/include/asm/mach-default/pci-functions.h
rename to arch/x86/include/asm/pci-functions.h
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
deleted file mode 100644
index 2fbfff8..0000000
--- a/arch/x86/include/asm/pda.h
+++ /dev/null
@@ -1,137 +0,0 @@
-#ifndef _ASM_X86_PDA_H
-#define _ASM_X86_PDA_H
-
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/cache.h>
-#include <asm/page.h>
-
-/* Per processor datastructure. %gs points to it while the kernel runs */
-struct x8664_pda {
- struct task_struct *pcurrent; /* 0 Current process */
- unsigned long data_offset; /* 8 Per cpu data offset from linker
- address */
- unsigned long kernelstack; /* 16 top of kernel stack for current */
- unsigned long oldrsp; /* 24 user rsp for system call */
- int irqcount; /* 32 Irq nesting counter. Starts -1 */
- unsigned int cpunumber; /* 36 Logical CPU number */
-#ifdef CONFIG_CC_STACKPROTECTOR
- unsigned long stack_canary; /* 40 stack canary value */
- /* gcc-ABI: this canary MUST be at
- offset 40!!! */
-#endif
- char *irqstackptr;
- short nodenumber; /* number of current node (32k max) */
- short in_bootmem; /* pda lives in bootmem */
- unsigned int __softirq_pending;
- unsigned int __nmi_count; /* number of NMI on this CPUs */
- short mmu_state;
- short isidle;
- struct mm_struct *active_mm;
- unsigned apic_timer_irqs;
- unsigned irq0_irqs;
- unsigned irq_resched_count;
- unsigned irq_call_count;
- unsigned irq_tlb_count;
- unsigned irq_thermal_count;
- unsigned irq_threshold_count;
- unsigned irq_spurious_count;
-} ____cacheline_aligned_in_smp;
-
-extern struct x8664_pda **_cpu_pda;
-extern void pda_init(int);
-
-#define cpu_pda(i) (_cpu_pda[i])
-
-/*
- * There is no fast way to get the base address of the PDA, all the accesses
- * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
- */
-extern void __bad_pda_field(void) __attribute__((noreturn));
-
-/*
- * proxy_pda doesn't actually exist, but tell gcc it is accessed for
- * all PDA accesses so it gets read/write dependencies right.
- */
-extern struct x8664_pda _proxy_pda;
-
-#define pda_offset(field) offsetof(struct x8664_pda, field)
-
-#define pda_to_op(op, field, val) \
-do { \
- typedef typeof(_proxy_pda.field) T__; \
- if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \
- switch (sizeof(_proxy_pda.field)) { \
- case 2: \
- asm(op "w %1,%%gs:%c2" : \
- "+m" (_proxy_pda.field) : \
- "ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
- case 4: \
- asm(op "l %1,%%gs:%c2" : \
- "+m" (_proxy_pda.field) : \
- "ri" ((T__)val), \
- "i" (pda_offset(field))); \
- break; \
- case 8: \
- asm(op "q %1,%%gs:%c2": \
- "+m" (_proxy_pda.field) : \
- "ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
- default: \
- __bad_pda_field(); \
- } \
-} while (0)
-
-#define pda_from_op(op, field) \
-({ \
- typeof(_proxy_pda.field) ret__; \
- switch (sizeof(_proxy_pda.field)) { \
- case 2: \
- asm(op "w %%gs:%c1,%0" : \
- "=r" (ret__) : \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- case 4: \
- asm(op "l %%gs:%c1,%0": \
- "=r" (ret__): \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- case 8: \
- asm(op "q %%gs:%c1,%0": \
- "=r" (ret__) : \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- default: \
- __bad_pda_field(); \
- } \
- ret__; \
-})
-
-#define read_pda(field) pda_from_op("mov", field)
-#define write_pda(field, val) pda_to_op("mov", field, val)
-#define add_pda(field, val) pda_to_op("add", field, val)
-#define sub_pda(field, val) pda_to_op("sub", field, val)
-#define or_pda(field, val) pda_to_op("or", field, val)
-
-/* This is not atomic against other CPUs -- CPU preemption needs to be off */
-#define test_and_clear_bit_pda(bit, field) \
-({ \
- int old__; \
- asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \
- : "=r" (old__), "+m" (_proxy_pda.field) \
- : "dIr" (bit), "i" (pda_offset(field)) : "memory");\
- old__; \
-})
-
-#endif
-
-#define PDA_STACKOFFSET (5*8)
-
-#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ece7205..aee103b 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -2,53 +2,12 @@
#define _ASM_X86_PERCPU_H
#ifdef CONFIG_X86_64
-#include <linux/compiler.h>
-
-/* Same as asm-generic/percpu.h, except that we store the per cpu offset
- in the PDA. Longer term the PDA and every per cpu variable
- should be just put into a single section and referenced directly
- from %gs */
-
-#ifdef CONFIG_SMP
-#include <asm/pda.h>
-
-#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset read_pda(data_offset)
-
-#define per_cpu_offset(x) (__per_cpu_offset(x))
-
+#define __percpu_seg gs
+#define __percpu_mov_op movq
+#else
+#define __percpu_seg fs
+#define __percpu_mov_op movl
#endif
-#include <asm-generic/percpu.h>
-
-DECLARE_PER_CPU(struct x8664_pda, pda);
-
-/*
- * These are supposed to be implemented as a single instruction which
- * operates on the per-cpu data base segment. x86-64 doesn't have
- * that yet, so this is a fairly inefficient workaround for the
- * meantime. The single instruction is atomic with respect to
- * preemption and interrupts, so we need to explicitly disable
- * interrupts here to achieve the same effect. However, because it
- * can be used from within interrupt-disable/enable, we can't actually
- * disable interrupts; disabling preemption is enough.
- */
-#define x86_read_percpu(var) \
- ({ \
- typeof(per_cpu_var(var)) __tmp; \
- preempt_disable(); \
- __tmp = __get_cpu_var(var); \
- preempt_enable(); \
- __tmp; \
- })
-
-#define x86_write_percpu(var, val) \
- do { \
- preempt_disable(); \
- __get_cpu_var(var) = (val); \
- preempt_enable(); \
- } while(0)
-
-#else /* CONFIG_X86_64 */
#ifdef __ASSEMBLY__
@@ -65,47 +24,48 @@
* PER_CPU(cpu_gdt_descr, %ebx)
*/
#ifdef CONFIG_SMP
-#define PER_CPU(var, reg) \
- movl %fs:per_cpu__##this_cpu_off, reg; \
+#define PER_CPU(var, reg) \
+ __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \
lea per_cpu__##var(reg), reg
-#define PER_CPU_VAR(var) %fs:per_cpu__##var
+#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var
#else /* ! SMP */
-#define PER_CPU(var, reg) \
- movl $per_cpu__##var, reg
+#define PER_CPU(var, reg) \
+ __percpu_mov_op $per_cpu__##var, reg
#define PER_CPU_VAR(var) per_cpu__##var
#endif /* SMP */
+#ifdef CONFIG_X86_64_SMP
+#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
+#else
+#define INIT_PER_CPU_VAR(var) per_cpu__##var
+#endif
+
#else /* ...!ASSEMBLY */
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- * var - variable name
- * cpu - 32bit register containing the current CPU number
- *
- * The resulting address is stored in the "cpu" argument.
- *
- * Example:
- * PER_CPU(cpu_gdt_descr, %ebx)
- */
+#include <linux/stringify.h>
+
#ifdef CONFIG_SMP
+#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
+#define __my_cpu_offset percpu_read(this_cpu_off)
+#else
+#define __percpu_arg(x) "%" #x
+#endif
-#define __my_cpu_offset x86_read_percpu(this_cpu_off)
+/*
+ * Initialized pointers to per-cpu variables needed for the boot
+ * processor need to use these macros to get the proper address
+ * offset from __per_cpu_load on SMP.
+ *
+ * There also must be an entry in vmlinux_64.lds.S
+ */
+#define DECLARE_INIT_PER_CPU(var) \
+ extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
-/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
-#define __percpu_seg "%%fs:"
-
-#else /* !SMP */
-
-#define __percpu_seg ""
-
-#endif /* SMP */
-
-#include <asm-generic/percpu.h>
-
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
+#ifdef CONFIG_X86_64_SMP
+#define init_per_cpu_var(var) init_per_cpu__##var
+#else
+#define init_per_cpu_var(var) per_cpu_var(var)
+#endif
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
@@ -120,20 +80,25 @@
} \
switch (sizeof(var)) { \
case 1: \
- asm(op "b %1,"__percpu_seg"%0" \
+ asm(op "b %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
case 2: \
- asm(op "w %1,"__percpu_seg"%0" \
+ asm(op "w %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
case 4: \
- asm(op "l %1,"__percpu_seg"%0" \
+ asm(op "l %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
+ case 8: \
+ asm(op "q %1,"__percpu_arg(0) \
+ : "+m" (var) \
+ : "re" ((T__)val)); \
+ break; \
default: __bad_percpu_size(); \
} \
} while (0)
@@ -143,17 +108,22 @@
typeof(var) ret__; \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_seg"%1,%0" \
+ asm(op "b "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
case 2: \
- asm(op "w "__percpu_seg"%1,%0" \
+ asm(op "w "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
case 4: \
- asm(op "l "__percpu_seg"%1,%0" \
+ asm(op "l "__percpu_arg(1)",%0" \
+ : "=r" (ret__) \
+ : "m" (var)); \
+ break; \
+ case 8: \
+ asm(op "q "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
@@ -162,13 +132,30 @@
ret__; \
})
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
+#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
+#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
+#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
+#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
+#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
+#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
+#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
+
+/* This is not atomic against other CPUs -- CPU preemption needs to be off */
+#define x86_test_and_clear_bit_percpu(bit, var) \
+({ \
+ int old__; \
+ asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \
+ : "=r" (old__), "+m" (per_cpu__##var) \
+ : "dIr" (bit)); \
+ old__; \
+})
+
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
#endif /* !__ASSEMBLY__ */
-#endif /* !CONFIG_X86_64 */
#ifdef CONFIG_SMP
@@ -195,9 +182,9 @@
#define early_per_cpu_ptr(_name) (_name##_early_ptr)
#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
#define early_per_cpu(_name, _cpu) \
- (early_per_cpu_ptr(_name) ? \
- early_per_cpu_ptr(_name)[_cpu] : \
- per_cpu(_name, _cpu))
+ *(early_per_cpu_ptr(_name) ? \
+ &early_per_cpu_ptr(_name)[_cpu] : \
+ &per_cpu(_name, _cpu))
#else /* !CONFIG_SMP */
#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
new file mode 100644
index 0000000..2e08ed7
--- /dev/null
+++ b/arch/x86/include/asm/perf_counter.h
@@ -0,0 +1,95 @@
+#ifndef _ASM_X86_PERF_COUNTER_H
+#define _ASM_X86_PERF_COUNTER_H
+
+/*
+ * Performance counter hw details:
+ */
+
+#define X86_PMC_MAX_GENERIC 8
+#define X86_PMC_MAX_FIXED 3
+
+#define X86_PMC_IDX_GENERIC 0
+#define X86_PMC_IDX_FIXED 32
+#define X86_PMC_IDX_MAX 64
+
+#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
+
+/*
+ * Includes eventsel and unit mask as well:
+ */
+#define ARCH_PERFMON_EVENT_MASK 0xffff
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
+
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
+union cpuid10_eax {
+ struct {
+ unsigned int version_id:8;
+ unsigned int num_counters:8;
+ unsigned int bit_width:8;
+ unsigned int mask_length:8;
+ } split;
+ unsigned int full;
+};
+
+union cpuid10_edx {
+ struct {
+ unsigned int num_counters_fixed:4;
+ unsigned int reserved:28;
+ } split;
+ unsigned int full;
+};
+
+
+/*
+ * Fixed-purpose performance counters:
+ */
+
+/*
+ * All 3 fixed-mode PMCs are configured via this single MSR:
+ */
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
+
+/*
+ * The counts are available in three separate MSRs:
+ */
+
+/* Instr_Retired.Any: */
+#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
+
+/* CPU_CLK_Unhalted.Core: */
+#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
+
+/* CPU_CLK_Unhalted.Ref: */
+#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
+#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
+
+#ifdef CONFIG_PERF_COUNTERS
+extern void init_hw_perf_counters(void);
+extern void perf_counters_lapic_init(int nmi);
+#else
+static inline void init_hw_perf_counters(void) { }
+static inline void perf_counters_lapic_init(int nmi) { }
+#endif
+
+#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/include/asm/pgtable-2level-defs.h b/arch/x86/include/asm/pgtable-2level-defs.h
deleted file mode 100644
index d77db89..0000000
--- a/arch/x86/include/asm/pgtable-2level-defs.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H
-#define _ASM_X86_PGTABLE_2LEVEL_DEFS_H
-
-#define SHARED_KERNEL_PMD 0
-
-/*
- * traditional i386 two-level paging structure:
- */
-
-#define PGDIR_SHIFT 22
-#define PTRS_PER_PGD 1024
-
-/*
- * the i386 is two-level, so we don't really have any
- * PMD directory physically.
- */
-
-#define PTRS_PER_PTE 1024
-
-#endif /* _ASM_X86_PGTABLE_2LEVEL_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index e0d199f..c1774ac 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -53,8 +53,6 @@
#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
#endif
-#define pte_none(x) (!(x).pte_low)
-
/*
* Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
* split up the 29 bits of offset into this range:
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
new file mode 100644
index 0000000..daacc23
--- /dev/null
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H
+#define _ASM_X86_PGTABLE_2LEVEL_DEFS_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+typedef unsigned long pteval_t;
+typedef unsigned long pmdval_t;
+typedef unsigned long pudval_t;
+typedef unsigned long pgdval_t;
+typedef unsigned long pgprotval_t;
+
+typedef union {
+ pteval_t pte;
+ pteval_t pte_low;
+} pte_t;
+#endif /* !__ASSEMBLY__ */
+
+#define SHARED_KERNEL_PMD 0
+#define PAGETABLE_LEVELS 2
+
+/*
+ * traditional i386 two-level paging structure:
+ */
+
+#define PGDIR_SHIFT 22
+#define PTRS_PER_PGD 1024
+
+
+/*
+ * the i386 is two-level, so we don't really have any
+ * PMD directory physically.
+ */
+
+#define PTRS_PER_PTE 1024
+
+#endif /* _ASM_X86_PGTABLE_2LEVEL_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 447da43..3f13cdf 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -18,21 +18,6 @@
printk("%s:%d: bad pgd %p(%016Lx).\n", \
__FILE__, __LINE__, &(e), pgd_val(e))
-static inline int pud_none(pud_t pud)
-{
- return pud_val(pud) == 0;
-}
-
-static inline int pud_bad(pud_t pud)
-{
- return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
-}
-
-static inline int pud_present(pud_t pud)
-{
- return pud_val(pud) & _PAGE_PRESENT;
-}
-
/* Rules for using set_pte: the pte being assigned *must* be
* either not present or in a state where the hardware will
* not attempt to update the pte. In places where this is
@@ -120,15 +105,6 @@
write_cr3(pgd);
}
-#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
-
-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK))
-
-
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \
- pmd_index(address))
-
#ifdef CONFIG_SMP
static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
{
@@ -145,17 +121,6 @@
#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
#endif
-#define __HAVE_ARCH_PTE_SAME
-static inline int pte_same(pte_t a, pte_t b)
-{
- return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
-}
-
-static inline int pte_none(pte_t pte)
-{
- return !pte.pte_low && !pte.pte_high;
-}
-
/*
* Bits 0, 6 and 7 are taken in the low part of the pte,
* put the 32 bits of offset into the high part.
diff --git a/arch/x86/include/asm/pgtable-3level-defs.h b/arch/x86/include/asm/pgtable-3level_types.h
similarity index 64%
rename from arch/x86/include/asm/pgtable-3level-defs.h
rename to arch/x86/include/asm/pgtable-3level_types.h
index 6256136..1bd5876 100644
--- a/arch/x86/include/asm/pgtable-3level-defs.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -1,12 +1,31 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+typedef u64 pteval_t;
+typedef u64 pmdval_t;
+typedef u64 pudval_t;
+typedef u64 pgdval_t;
+typedef u64 pgprotval_t;
+
+typedef union {
+ struct {
+ unsigned long pte_low, pte_high;
+ };
+ pteval_t pte;
+} pte_t;
+#endif /* !__ASSEMBLY__ */
+
#ifdef CONFIG_PARAVIRT
#define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd)
#else
#define SHARED_KERNEL_PMD 1
#endif
+#define PAGETABLE_LEVELS 3
+
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
@@ -25,4 +44,5 @@
*/
#define PTRS_PER_PTE 512
+
#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 4f5af84..1c097a3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1,164 +1,9 @@
#ifndef _ASM_X86_PGTABLE_H
#define _ASM_X86_PGTABLE_H
-#define FIRST_USER_ADDRESS 0
+#include <asm/page.h>
-#define _PAGE_BIT_PRESENT 0 /* is present */
-#define _PAGE_BIT_RW 1 /* writeable */
-#define _PAGE_BIT_USER 2 /* userspace addressable */
-#define _PAGE_BIT_PWT 3 /* page write through */
-#define _PAGE_BIT_PCD 4 /* page cache disabled */
-#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
-#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
-#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
-#define _PAGE_BIT_PAT 7 /* on 4KB pages */
-#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
-#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
-#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
-#define _PAGE_BIT_UNUSED3 11
-#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
-#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
-#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
-#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
-
-/* If _PAGE_BIT_PRESENT is clear, we use these: */
-/* - if the user mapped it with PROT_NONE; pte_present gives true */
-#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
-/* - set: nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
-
-#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
-#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
-#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
-#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
-#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
-#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
-#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
-#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
-#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
-#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
-#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
-#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
-#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
-#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
-#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
-#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
-#define __HAVE_ARCH_PTE_SPECIAL
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
-#else
-#define _PAGE_NX (_AT(pteval_t, 0))
-#endif
-
-#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
-#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-
-#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
- _PAGE_DIRTY)
-
-/* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
- _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
-#define _PAGE_CACHE_WB (0)
-#define _PAGE_CACHE_WC (_PAGE_PWT)
-#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
-#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
-
-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-#define PAGE_COPY PAGE_COPY_NOEXEC
-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-
-#define __PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
-#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
-
-#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
-#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC)
-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
-#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
-#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
-#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-
-#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
-#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
-#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
-
-#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
-#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
-#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
-#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
-#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
-#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
-#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
-#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
-#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
-#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
-
-#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
-#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS)
-#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC)
-
-/* xwr */
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_EXEC
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
-
-/*
- * early identity mapping pte attrib macros.
- */
-#ifdef CONFIG_X86_64
-#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
-#else
-/*
- * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
- * bits are combined, this will alow user to access the high address mapped
- * VDSO in the presence of CONFIG_COMPAT_VDSO
- */
-#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
-#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
-#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
-#endif
+#include <asm/pgtable_types.h>
/*
* Macro to mark a page protection value as UC-
@@ -170,9 +15,6 @@
#ifndef __ASSEMBLY__
-#define pgprot_writecombine pgprot_writecombine
-extern pgprot_t pgprot_writecombine(pgprot_t prot);
-
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
@@ -183,6 +25,66 @@
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else /* !CONFIG_PARAVIRT */
+#define set_pte(ptep, pte) native_set_pte(ptep, pte)
+#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
+
+#define set_pte_present(mm, addr, ptep, pte) \
+ native_set_pte_present(mm, addr, ptep, pte)
+#define set_pte_atomic(ptep, pte) \
+ native_set_pte_atomic(ptep, pte)
+
+#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
+#define pgd_clear(pgd) native_pgd_clear(pgd)
+#endif
+
+#ifndef set_pud
+# define set_pud(pudp, pud) native_set_pud(pudp, pud)
+#endif
+
+#ifndef __PAGETABLE_PMD_FOLDED
+#define pud_clear(pud) native_pud_clear(pud)
+#endif
+
+#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
+#define pmd_clear(pmd) native_pmd_clear(pmd)
+
+#define pte_update(mm, addr, ptep) do { } while (0)
+#define pte_update_defer(mm, addr, ptep) do { } while (0)
+
+static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
+{
+ native_pagetable_setup_start(base);
+}
+
+static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
+{
+ native_pagetable_setup_done(base);
+}
+
+#define pgd_val(x) native_pgd_val(x)
+#define __pgd(x) native_make_pgd(x)
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define pud_val(x) native_pud_val(x)
+#define __pud(x) native_make_pud(x)
+#endif
+
+#ifndef __PAGETABLE_PMD_FOLDED
+#define pmd_val(x) native_pmd_val(x)
+#define __pmd(x) native_make_pmd(x)
+#endif
+
+#define pte_val(x) native_pte_val(x)
+#define __pte(x) native_make_pte(x)
+
+#endif /* CONFIG_PARAVIRT */
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
@@ -236,72 +138,84 @@
static inline int pmd_large(pmd_t pte)
{
- return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+ return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
(_PAGE_PSE | _PAGE_PRESENT);
}
+static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
+{
+ pteval_t v = native_pte_val(pte);
+
+ return native_make_pte(v | set);
+}
+
+static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
+{
+ pteval_t v = native_pte_val(pte);
+
+ return native_make_pte(v & ~clear);
+}
+
static inline pte_t pte_mkclean(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+ return pte_clear_flags(pte, _PAGE_DIRTY);
}
static inline pte_t pte_mkold(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+ return pte_clear_flags(pte, _PAGE_ACCESSED);
}
static inline pte_t pte_wrprotect(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_RW);
+ return pte_clear_flags(pte, _PAGE_RW);
}
static inline pte_t pte_mkexec(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_NX);
+ return pte_clear_flags(pte, _PAGE_NX);
}
static inline pte_t pte_mkdirty(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_DIRTY);
+ return pte_set_flags(pte, _PAGE_DIRTY);
}
static inline pte_t pte_mkyoung(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_ACCESSED);
+ return pte_set_flags(pte, _PAGE_ACCESSED);
}
static inline pte_t pte_mkwrite(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_RW);
+ return pte_set_flags(pte, _PAGE_RW);
}
static inline pte_t pte_mkhuge(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_PSE);
+ return pte_set_flags(pte, _PAGE_PSE);
}
static inline pte_t pte_clrhuge(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_PSE);
+ return pte_clear_flags(pte, _PAGE_PSE);
}
static inline pte_t pte_mkglobal(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_GLOBAL);
+ return pte_set_flags(pte, _PAGE_GLOBAL);
}
static inline pte_t pte_clrglobal(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_GLOBAL);
+ return pte_clear_flags(pte, _PAGE_GLOBAL);
}
static inline pte_t pte_mkspecial(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_SPECIAL);
+ return pte_set_flags(pte, _PAGE_SPECIAL);
}
-extern pteval_t __supported_pte_mask;
-
/*
* Mask out unsupported bits in a present pgprot. Non-present pgprots
* can use those bits for other purposes, so leave them be.
@@ -374,75 +288,6 @@
return 1;
}
-#ifndef __ASSEMBLY__
-/* Indicate that x86 has its own track and untrack pfn vma functions */
-#define __HAVE_PFNMAP_TRACKING
-
-#define __HAVE_PHYS_MEM_ACCESS_PROT
-struct file;
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
- unsigned long size, pgprot_t vma_prot);
-int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
- unsigned long size, pgprot_t *vma_prot);
-#endif
-
-/* Install a pte for a particular vaddr in kernel space. */
-void set_pte_vaddr(unsigned long vaddr, pte_t pte);
-
-#ifdef CONFIG_X86_32
-extern void native_pagetable_setup_start(pgd_t *base);
-extern void native_pagetable_setup_done(pgd_t *base);
-#else
-static inline void native_pagetable_setup_start(pgd_t *base) {}
-static inline void native_pagetable_setup_done(pgd_t *base) {}
-#endif
-
-struct seq_file;
-extern void arch_report_meminfo(struct seq_file *m);
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else /* !CONFIG_PARAVIRT */
-#define set_pte(ptep, pte) native_set_pte(ptep, pte)
-#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
-
-#define set_pte_present(mm, addr, ptep, pte) \
- native_set_pte_present(mm, addr, ptep, pte)
-#define set_pte_atomic(ptep, pte) \
- native_set_pte_atomic(ptep, pte)
-
-#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
-
-#ifndef __PAGETABLE_PUD_FOLDED
-#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
-#define pgd_clear(pgd) native_pgd_clear(pgd)
-#endif
-
-#ifndef set_pud
-# define set_pud(pudp, pud) native_set_pud(pudp, pud)
-#endif
-
-#ifndef __PAGETABLE_PMD_FOLDED
-#define pud_clear(pud) native_pud_clear(pud)
-#endif
-
-#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
-#define pmd_clear(pmd) native_pmd_clear(pmd)
-
-#define pte_update(mm, addr, ptep) do { } while (0)
-#define pte_update_defer(mm, addr, ptep) do { } while (0)
-
-static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
-{
- native_pagetable_setup_start(base);
-}
-
-static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
-{
- native_pagetable_setup_done(base);
-}
-#endif /* CONFIG_PARAVIRT */
-
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_X86_32
@@ -451,6 +296,188 @@
# include "pgtable_64.h"
#endif
+#ifndef __ASSEMBLY__
+#include <linux/mm_types.h>
+
+static inline int pte_none(pte_t pte)
+{
+ return !pte.pte;
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t a, pte_t b)
+{
+ return a.pte == b.pte;
+}
+
+static inline int pte_present(pte_t a)
+{
+ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_PRESENT;
+}
+
+static inline int pmd_none(pmd_t pmd)
+{
+ /* Only check low word on 32-bit platforms, since it might be
+ out of sync with upper half. */
+ return (unsigned long)native_pmd_val(pmd) == 0;
+}
+
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+ return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
+}
+
+/*
+ * Currently stuck as a macro due to indirect forward reference to
+ * linux/mmzone.h's __section_mem_map_addr() definition:
+ */
+#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+
+/*
+ * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
+ *
+ * this macro returns the index of the entry in the pmd page which would
+ * control the given virtual address
+ */
+static inline unsigned pmd_index(unsigned long address)
+{
+ return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * (Currently stuck as a macro because of indirect forward reference
+ * to linux/mm.h:page_to_nid())
+ */
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+
+/*
+ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+ *
+ * this function returns the index of the entry in the pte page which would
+ * control the given virtual address
+ */
+static inline unsigned pte_index(unsigned long address)
+{
+ return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+}
+
+static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
+{
+ return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+ return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
+}
+
+static inline unsigned long pages_to_mb(unsigned long npg)
+{
+ return npg >> (20 - PAGE_SHIFT);
+}
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+#if PAGETABLE_LEVELS > 2
+static inline int pud_none(pud_t pud)
+{
+ return native_pud_val(pud) == 0;
+}
+
+static inline int pud_present(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_PRESENT;
+}
+
+static inline unsigned long pud_page_vaddr(pud_t pud)
+{
+ return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
+}
+
+/*
+ * Currently stuck as a macro due to indirect forward reference to
+ * linux/mmzone.h's __section_mem_map_addr() definition:
+ */
+#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
+
+/* Find an entry in the second-level page table.. */
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+{
+ return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
+static inline int pud_large(pud_t pud)
+{
+ return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+ (_PAGE_PSE | _PAGE_PRESENT);
+}
+
+static inline int pud_bad(pud_t pud)
+{
+ return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+}
+#else
+static inline int pud_large(pud_t pud)
+{
+ return 0;
+}
+#endif /* PAGETABLE_LEVELS > 2 */
+
+#if PAGETABLE_LEVELS > 3
+static inline int pgd_present(pgd_t pgd)
+{
+ return pgd_flags(pgd) & _PAGE_PRESENT;
+}
+
+static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+{
+ return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK);
+}
+
+/*
+ * Currently stuck as a macro due to indirect forward reference to
+ * linux/mmzone.h's __section_mem_map_addr() definition:
+ */
+#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
+
+/* to find an entry in a page-table-directory. */
+static inline unsigned pud_index(unsigned long address)
+{
+ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+{
+ return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+ return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+ return !native_pgd_val(pgd);
+}
+#endif /* PAGETABLE_LEVELS > 3 */
+
+#endif /* __ASSEMBLY__ */
+
/*
* the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
*
@@ -476,28 +503,6 @@
#ifndef __ASSEMBLY__
-enum {
- PG_LEVEL_NONE,
- PG_LEVEL_4K,
- PG_LEVEL_2M,
- PG_LEVEL_1G,
- PG_LEVEL_NUM
-};
-
-#ifdef CONFIG_PROC_FS
-extern void update_page_count(int level, unsigned long pages);
-#else
-static inline void update_page_count(int level, unsigned long pages) { }
-#endif
-
-/*
- * Helper function that returns the kernel pagetable entry controlling
- * the virtual address 'address'. NULL means no pagetable entry present.
- * NOTE: the return type is pte_t but if the pmd is PSE then we return it
- * as a pte too.
- */
-extern pte_t *lookup_address(unsigned long address, unsigned int *level);
-
/* local pte updates need not use xchg for locking */
static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
{
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 72b020d..97612fc 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_PGTABLE_32_H
#define _ASM_X86_PGTABLE_32_H
+#include <asm/pgtable_32_types.h>
/*
* The Linux memory management assumes a three-level page table setup. On
@@ -33,47 +34,6 @@
extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
-/*
- * The Linux x86 paging architecture is 'compile-time dual-mode', it
- * implements both the traditional 2-level x86 page tables and the
- * newer 3-level PAE-mode page tables.
- */
-#ifdef CONFIG_X86_PAE
-# include <asm/pgtable-3level-defs.h>
-# define PMD_SIZE (1UL << PMD_SHIFT)
-# define PMD_MASK (~(PMD_SIZE - 1))
-#else
-# include <asm/pgtable-2level-defs.h>
-#endif
-
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts. That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- */
-#define VMALLOC_OFFSET (8 * 1024 * 1024)
-#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
- & PMD_MASK)
-
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
-#else
-# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
-#endif
-
-#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
/*
* Define this if things work differently on an i386 and an i486:
@@ -85,55 +45,12 @@
/* The boot page tables (all created as a single array) */
extern unsigned long pg0[];
-#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-
-/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
-#define pmd_none(x) (!(unsigned long)pmd_val((x)))
-#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT)
-#define pmd_bad(x) ((pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
-
-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
-
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
#else
# include <asm/pgtable-2level.h>
#endif
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
-
-
-static inline int pud_large(pud_t pud) { return 0; }
-
-/*
- * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
- *
- * this macro returns the index of the entry in the pmd page which would
- * control the given virtual address
- */
-#define pmd_index(address) \
- (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
-
-/*
- * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
- *
- * this macro returns the index of the entry in the pte page which would
- * control the given virtual address
- */
-#define pte_index(address) \
- (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir, address) \
- ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address)))
-
-#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
-
-#define pmd_page_vaddr(pmd) \
- ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK))
-
#if defined(CONFIG_HIGHPTE)
#define pte_offset_map(dir, address) \
((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \
@@ -176,7 +93,4 @@
#define kern_addr_valid(kaddr) (0)
#endif
-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
-
#endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
new file mode 100644
index 0000000..bd8df3b
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -0,0 +1,46 @@
+#ifndef _ASM_X86_PGTABLE_32_DEFS_H
+#define _ASM_X86_PGTABLE_32_DEFS_H
+
+/*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+ * newer 3-level PAE-mode page tables.
+ */
+#ifdef CONFIG_X86_PAE
+# include <asm/pgtable-3level_types.h>
+# define PMD_SIZE (1UL << PMD_SHIFT)
+# define PMD_MASK (~(PMD_SIZE - 1))
+#else
+# include <asm/pgtable-2level_types.h>
+#endif
+
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+
+/* Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts. That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8 * 1024 * 1024)
+#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
+ & PMD_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
+#else
+# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
+#endif
+
+#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
+#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ba09289..6b87bc6 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -2,6 +2,8 @@
#define _ASM_X86_PGTABLE_64_H
#include <linux/const.h>
+#include <asm/pgtable_64_types.h>
+
#ifndef __ASSEMBLY__
/*
@@ -11,7 +13,6 @@
#include <asm/processor.h>
#include <linux/bitops.h>
#include <linux/threads.h>
-#include <asm/pda.h>
extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
@@ -26,32 +27,6 @@
#endif /* !__ASSEMBLY__ */
-#define SHARED_KERNEL_PMD 0
-
-/*
- * PGDIR_SHIFT determines what a top-level page table entry can map
- */
-#define PGDIR_SHIFT 39
-#define PTRS_PER_PGD 512
-
-/*
- * 3rd level page
- */
-#define PUD_SHIFT 30
-#define PTRS_PER_PUD 512
-
-/*
- * PMD_SHIFT determines the size of the area a middle-level
- * page table can map
- */
-#define PMD_SHIFT 21
-#define PTRS_PER_PMD 512
-
-/*
- * entries per page directory level
- */
-#define PTRS_PER_PTE 512
-
#ifndef __ASSEMBLY__
#define pte_ERROR(e) \
@@ -67,9 +42,6 @@
printk("%s:%d: bad pgd %p(%016lx).\n", \
__FILE__, __LINE__, &(e), pgd_val(e))
-#define pgd_none(x) (!pgd_val(x))
-#define pud_none(x) (!pud_val(x))
-
struct mm_struct;
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
@@ -134,48 +106,6 @@
native_set_pgd(pgd, native_make_pgd(0));
}
-#define pte_same(a, b) ((a).pte == (b).pte)
-
-#endif /* !__ASSEMBLY__ */
-
-#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE - 1))
-#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
-#define PUD_MASK (~(PUD_SIZE - 1))
-#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-
-
-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START _AC(0xffffc20000000000, UL)
-#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
-#define VMEMMAP_START _AC(0xffffe20000000000, UL)
-#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
-#define MODULES_END _AC(0xffffffffff000000, UL)
-#define MODULES_LEN (MODULES_END - MODULES_VADDR)
-
-#ifndef __ASSEMBLY__
-
-static inline int pgd_bad(pgd_t pgd)
-{
- return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
-}
-
-static inline int pud_bad(pud_t pud)
-{
- return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
-}
-
-static inline int pmd_bad(pmd_t pmd)
-{
- return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
-}
-
-#define pte_none(x) (!pte_val((x)))
-#define pte_present(x) (pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE))
-
-#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */
-
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
@@ -184,41 +114,12 @@
/*
* Level 4 access.
*/
-#define pgd_page_vaddr(pgd) \
- ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK))
-#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
-#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT)
static inline int pgd_large(pgd_t pgd) { return 0; }
#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
/* PUD - Level3 access */
-/* to find an entry in a page-table-directory. */
-#define pud_page_vaddr(pud) \
- ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK))
-#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT))
-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-#define pud_offset(pgd, address) \
- ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address)))
-#define pud_present(pud) (pud_val((pud)) & _PAGE_PRESENT)
-
-static inline int pud_large(pud_t pte)
-{
- return (pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
- (_PAGE_PSE | _PAGE_PRESENT);
-}
/* PMD - Level 2 access */
-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK))
-#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
-
-#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
-#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \
- pmd_index(address))
-#define pmd_none(x) (!pmd_val((x)))
-#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT)
-#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot))))
-#define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
-
#define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \
_PAGE_FILE })
@@ -226,13 +127,6 @@
/* PTE - Level 1 access. */
-/* page, protection -> pte */
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn((page)), (pgprot))
-
-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
- pte_index((address)))
-
/* x86-64 always has all page tables mapped. */
#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
@@ -266,9 +160,6 @@
extern int kern_addr_valid(unsigned long addr);
extern void cleanup_highmap(void);
-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
-
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
new file mode 100644
index 0000000..fbf42b8
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -0,0 +1,63 @@
+#ifndef _ASM_X86_PGTABLE_64_DEFS_H
+#define _ASM_X86_PGTABLE_64_DEFS_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef unsigned long pteval_t;
+typedef unsigned long pmdval_t;
+typedef unsigned long pudval_t;
+typedef unsigned long pgdval_t;
+typedef unsigned long pgprotval_t;
+
+typedef struct { pteval_t pte; } pte_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#define SHARED_KERNEL_PMD 0
+#define PAGETABLE_LEVELS 4
+
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
+#define PGDIR_SHIFT 39
+#define PTRS_PER_PGD 512
+
+/*
+ * 3rd level page
+ */
+#define PUD_SHIFT 30
+#define PTRS_PER_PUD 512
+
+/*
+ * PMD_SHIFT determines the size of the area a middle-level
+ * page table can map
+ */
+#define PMD_SHIFT 21
+#define PTRS_PER_PMD 512
+
+/*
+ * entries per page directory level
+ */
+#define PTRS_PER_PTE 512
+
+#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE - 1))
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE - 1))
+#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+
+
+#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define VMALLOC_START _AC(0xffffc20000000000, UL)
+#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
+#define VMEMMAP_START _AC(0xffffe20000000000, UL)
+#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
+#define MODULES_END _AC(0xffffffffff000000, UL)
+#define MODULES_LEN (MODULES_END - MODULES_VADDR)
+
+#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
new file mode 100644
index 0000000..4d258ad
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -0,0 +1,328 @@
+#ifndef _ASM_X86_PGTABLE_DEFS_H
+#define _ASM_X86_PGTABLE_DEFS_H
+
+#include <linux/const.h>
+#include <asm/page_types.h>
+
+#define FIRST_USER_ADDRESS 0
+
+#define _PAGE_BIT_PRESENT 0 /* is present */
+#define _PAGE_BIT_RW 1 /* writeable */
+#define _PAGE_BIT_USER 2 /* userspace addressable */
+#define _PAGE_BIT_PWT 3 /* page write through */
+#define _PAGE_BIT_PCD 4 /* page cache disabled */
+#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
+#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
+#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
+#define _PAGE_BIT_PAT 7 /* on 4KB pages */
+#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
+#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
+#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
+#define _PAGE_BIT_UNUSED3 11
+#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
+#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
+#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
+#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
+
+/* If _PAGE_BIT_PRESENT is clear, we use these: */
+/* - if the user mapped it with PROT_NONE; pte_present gives true */
+#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
+/* - set: nonlinear file mapping, saved PTE; unset:swap */
+#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
+
+#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
+#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
+#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
+#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
+#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
+#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
+#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
+#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
+#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
+#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
+#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
+#define __HAVE_ARCH_PTE_SPECIAL
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
+#else
+#define _PAGE_NX (_AT(pteval_t, 0))
+#endif
+
+#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
+#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+
+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
+ _PAGE_DIRTY)
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
+ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
+#define _PAGE_CACHE_WB (0)
+#define _PAGE_CACHE_WC (_PAGE_PWT)
+#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
+#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
+
+#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_NX)
+
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
+ _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+ _PAGE_ACCESSED)
+#define PAGE_COPY PAGE_COPY_NOEXEC
+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+ _PAGE_ACCESSED)
+
+#define __PAGE_KERNEL_EXEC \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
+#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
+
+#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
+#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
+#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC)
+#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
+#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
+#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+
+#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
+#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
+#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
+#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
+
+#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
+#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
+#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
+#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
+#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
+#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
+#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
+
+#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS)
+#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC)
+
+/* xwr */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
+
+/*
+ * early identity mapping pte attrib macros.
+ */
+#ifdef CONFIG_X86_64
+#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
+#else
+/*
+ * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
+ * bits are combined, this will alow user to access the high address mapped
+ * VDSO in the presence of CONFIG_COMPAT_VDSO
+ */
+#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
+#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
+#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
+#endif
+
+#ifdef CONFIG_X86_32
+# include "pgtable_32_types.h"
+#else
+# include "pgtable_64_types.h"
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
+#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
+
+/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
+#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
+
+typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
+
+typedef struct { pgdval_t pgd; } pgd_t;
+
+static inline pgd_t native_make_pgd(pgdval_t val)
+{
+ return (pgd_t) { val };
+}
+
+static inline pgdval_t native_pgd_val(pgd_t pgd)
+{
+ return pgd.pgd;
+}
+
+static inline pgdval_t pgd_flags(pgd_t pgd)
+{
+ return native_pgd_val(pgd) & PTE_FLAGS_MASK;
+}
+
+#if PAGETABLE_LEVELS > 3
+typedef struct { pudval_t pud; } pud_t;
+
+static inline pud_t native_make_pud(pmdval_t val)
+{
+ return (pud_t) { val };
+}
+
+static inline pudval_t native_pud_val(pud_t pud)
+{
+ return pud.pud;
+}
+#else
+#include <asm-generic/pgtable-nopud.h>
+
+static inline pudval_t native_pud_val(pud_t pud)
+{
+ return native_pgd_val(pud.pgd);
+}
+#endif
+
+#if PAGETABLE_LEVELS > 2
+typedef struct { pmdval_t pmd; } pmd_t;
+
+static inline pmd_t native_make_pmd(pmdval_t val)
+{
+ return (pmd_t) { val };
+}
+
+static inline pmdval_t native_pmd_val(pmd_t pmd)
+{
+ return pmd.pmd;
+}
+#else
+#include <asm-generic/pgtable-nopmd.h>
+
+static inline pmdval_t native_pmd_val(pmd_t pmd)
+{
+ return native_pgd_val(pmd.pud.pgd);
+}
+#endif
+
+static inline pudval_t pud_flags(pud_t pud)
+{
+ return native_pud_val(pud) & PTE_FLAGS_MASK;
+}
+
+static inline pmdval_t pmd_flags(pmd_t pmd)
+{
+ return native_pmd_val(pmd) & PTE_FLAGS_MASK;
+}
+
+static inline pte_t native_make_pte(pteval_t val)
+{
+ return (pte_t) { .pte = val };
+}
+
+static inline pteval_t native_pte_val(pte_t pte)
+{
+ return pte.pte;
+}
+
+static inline pteval_t pte_flags(pte_t pte)
+{
+ return native_pte_val(pte) & PTE_FLAGS_MASK;
+}
+
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+
+typedef struct page *pgtable_t;
+
+extern pteval_t __supported_pte_mask;
+extern int nx_enabled;
+
+#define pgprot_writecombine pgprot_writecombine
+extern pgprot_t pgprot_writecombine(pgprot_t prot);
+
+/* Indicate that x86 has its own track and untrack pfn vma functions */
+#define __HAVE_PFNMAP_TRACKING
+
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+struct file;
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot);
+int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t *vma_prot);
+
+/* Install a pte for a particular vaddr in kernel space. */
+void set_pte_vaddr(unsigned long vaddr, pte_t pte);
+
+#ifdef CONFIG_X86_32
+extern void native_pagetable_setup_start(pgd_t *base);
+extern void native_pagetable_setup_done(pgd_t *base);
+#else
+static inline void native_pagetable_setup_start(pgd_t *base) {}
+static inline void native_pagetable_setup_done(pgd_t *base) {}
+#endif
+
+struct seq_file;
+extern void arch_report_meminfo(struct seq_file *m);
+
+enum {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+ PG_LEVEL_NUM
+};
+
+#ifdef CONFIG_PROC_FS
+extern void update_page_count(int level, unsigned long pages);
+#else
+static inline void update_page_count(int level, unsigned long pages) { }
+#endif
+
+/*
+ * Helper function that returns the kernel pagetable entry controlling
+ * the virtual address 'address'. NULL means no pagetable entry present.
+ * NOTE: the return type is pte_t but if the pmd is PSE then we return it
+ * as a pte too.
+ */
+extern pte_t *lookup_address(unsigned long address, unsigned int *level);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h
index a889464..3ac5032 100644
--- a/arch/x86/include/asm/prctl.h
+++ b/arch/x86/include/asm/prctl.h
@@ -6,8 +6,4 @@
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
-#ifdef CONFIG_X86_64
-extern long sys_arch_prctl(int, unsigned long);
-#endif /* CONFIG_X86_64 */
-
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3bfd523..c7a98f7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -16,6 +16,7 @@
#include <asm/cpufeature.h>
#include <asm/system.h>
#include <asm/page.h>
+#include <asm/pgtable_types.h>
#include <asm/percpu.h>
#include <asm/msr.h>
#include <asm/desc_defs.h>
@@ -73,7 +74,7 @@
char pad0;
#else
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
- int x86_tlbsize;
+ int x86_tlbsize;
__u8 x86_virt_bits;
__u8 x86_phys_bits;
#endif
@@ -378,9 +379,30 @@
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
-#endif
-extern void print_cpu_info(struct cpuinfo_x86 *);
+union irq_stack_union {
+ char irq_stack[IRQ_STACK_SIZE];
+ /*
+ * GCC hardcodes the stack canary as %gs:40. Since the
+ * irq_stack is the object at %gs:0, we reserve the bottom
+ * 48 bytes of the irq stack for the canary.
+ */
+ struct {
+ char gs_base[40];
+ unsigned long stack_canary;
+ };
+};
+
+DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
+DECLARE_INIT_PER_CPU(irq_stack_union);
+
+DECLARE_PER_CPU(char *, irq_stack_ptr);
+#else /* X86_64 */
+#ifdef CONFIG_CC_STACKPROTECTOR
+DECLARE_PER_CPU(unsigned long, stack_canary);
+#endif
+#endif /* X86_64 */
+
extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
@@ -752,9 +774,9 @@
extern struct desc_ptr early_gdt_descr;
extern void cpu_set_gdt(int);
-extern void switch_to_new_gdt(void);
+extern void switch_to_new_gdt(int);
+extern void load_percpu_segment(int);
extern void cpu_init(void);
-extern void init_gdt(int cpu);
static inline unsigned long get_debugctlmsr(void)
{
@@ -839,6 +861,7 @@
* User space process size: 3GB (default).
*/
#define TASK_SIZE PAGE_OFFSET
+#define TASK_SIZE_MAX TASK_SIZE
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
@@ -898,7 +921,7 @@
/*
* User space process size. 47bits minus one guard page.
*/
-#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
@@ -907,12 +930,12 @@
0xc0000000 : 0xFFFFe000)
#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
- IA32_PAGE_OFFSET : TASK_SIZE64)
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
- IA32_PAGE_OFFSET : TASK_SIZE64)
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX TASK_SIZE64
+#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index d6a22f9..49fb3ec 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -18,11 +18,7 @@
extern void check_efer(void);
-#ifdef CONFIG_X86_BIOS_REBOOT
extern int reboot_force;
-#else
-static const int reboot_force = 0;
-#endif
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 6d34d95..e304b66 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -28,7 +28,7 @@
int xds;
int xes;
int xfs;
- /* int gs; */
+ int xgs;
long orig_eax;
long eip;
int xcs;
@@ -50,7 +50,7 @@
unsigned long ds;
unsigned long es;
unsigned long fs;
- /* int gs; */
+ unsigned long gs;
unsigned long orig_ax;
unsigned long ip;
unsigned long cs;
diff --git a/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h b/arch/x86/include/asm/rdc321x_defs.h
similarity index 100%
rename from arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h
rename to arch/x86/include/asm/rdc321x_defs.h
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 1dc1b51..14e0ed8 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -61,7 +61,7 @@
*
* 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ]
- * 28 - unused
+ * 28 - stack_canary-20 [ for stack protector ]
* 29 - unused
* 30 - unused
* 31 - TSS for double fault handler
@@ -95,6 +95,13 @@
#define __KERNEL_PERCPU 0
#endif
+#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16)
+#ifdef CONFIG_CC_STACKPROTECTOR
+#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8)
+#else
+#define __KERNEL_STACK_CANARY 0
+#endif
+
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
/*
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ebe858c..66801cb 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -1,33 +1,19 @@
#ifndef _ASM_X86_SETUP_H
#define _ASM_X86_SETUP_H
+#ifdef __KERNEL__
+
#define COMMAND_LINE_SIZE 2048
#ifndef __ASSEMBLY__
-/* Interrupt control for vSMPowered x86_64 systems */
-void vsmp_init(void);
-
-
-void setup_bios_corruption_check(void);
-
-
-#ifdef CONFIG_X86_VISWS
-extern void visws_early_detect(void);
-extern int is_visws_box(void);
-#else
-static inline void visws_early_detect(void) { }
-static inline int is_visws_box(void) { return 0; }
-#endif
-
-extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
-extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
/*
* Any setup quirks to be performed?
*/
struct mpc_cpu;
struct mpc_bus;
struct mpc_oemtable;
+
struct x86_quirks {
int (*arch_pre_time_init)(void);
int (*arch_time_init)(void);
@@ -43,21 +29,21 @@
void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
void (*mpc_oem_pci_bus)(struct mpc_bus *m);
void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable,
- unsigned short oemsize);
+ unsigned short oemsize);
int (*setup_ioapic_ids)(void);
- int (*update_genapic)(void);
+ int (*update_apic)(void);
};
-extern struct x86_quirks *x86_quirks;
-extern unsigned long saved_video_mode;
+extern void x86_quirk_pre_intr_init(void);
+extern void x86_quirk_intr_init(void);
-#ifndef CONFIG_PARAVIRT
-#define paravirt_post_allocator_init() do {} while (0)
-#endif
+extern void x86_quirk_trap_init(void);
+
+extern void x86_quirk_pre_time_init(void);
+extern void x86_quirk_time_init(void);
+
#endif /* __ASSEMBLY__ */
-#ifdef __KERNEL__
-
#ifdef __i386__
#include <linux/pfn.h>
@@ -78,6 +64,28 @@
#ifndef __ASSEMBLY__
#include <asm/bootparam.h>
+/* Interrupt control for vSMPowered x86_64 systems */
+void vsmp_init(void);
+
+void setup_bios_corruption_check(void);
+
+#ifdef CONFIG_X86_VISWS
+extern void visws_early_detect(void);
+extern int is_visws_box(void);
+#else
+static inline void visws_early_detect(void) { }
+static inline int is_visws_box(void) { return 0; }
+#endif
+
+extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
+extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
+extern struct x86_quirks *x86_quirks;
+extern unsigned long saved_video_mode;
+
+#ifndef CONFIG_PARAVIRT
+#define paravirt_post_allocator_init() do {} while (0)
+#endif
+
#ifndef _SETUP
/*
@@ -100,7 +108,6 @@
extern unsigned long init_pg_tables_end;
#else
-void __init x86_64_init_pda(void);
void __init x86_64_start_kernel(char *real_mode);
void __init x86_64_start_reservations(char *real_mode_data);
diff --git a/arch/x86/include/asm/mach-default/setup_arch.h b/arch/x86/include/asm/setup_arch.h
similarity index 100%
rename from arch/x86/include/asm/mach-default/setup_arch.h
rename to arch/x86/include/asm/setup_arch.h
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 19953df..47d0e21 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -15,34 +15,8 @@
# include <asm/io_apic.h>
# endif
#endif
-#include <asm/pda.h>
#include <asm/thread_info.h>
-
-#ifdef CONFIG_X86_64
-
-extern cpumask_var_t cpu_callin_mask;
-extern cpumask_var_t cpu_callout_mask;
-extern cpumask_var_t cpu_initialized_mask;
-extern cpumask_var_t cpu_sibling_setup_mask;
-
-#else /* CONFIG_X86_32 */
-
-extern cpumask_t cpu_callin_map;
-extern cpumask_t cpu_callout_map;
-extern cpumask_t cpu_initialized;
-extern cpumask_t cpu_sibling_setup_map;
-
-#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
-#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
-#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
-#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
-
-#endif /* CONFIG_X86_32 */
-
-extern void (*mtrr_hook)(void);
-extern void zap_low_mappings(void);
-
-extern int __cpuinit get_local_pda(int cpu);
+#include <asm/cpumask.h>
extern int smp_num_siblings;
extern unsigned int num_processors;
@@ -50,9 +24,7 @@
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_t, cpu_core_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
-#ifdef CONFIG_X86_32
DECLARE_PER_CPU(int, cpu_number);
-#endif
static inline struct cpumask *cpu_sibling_mask(int cpu)
{
@@ -167,8 +139,6 @@
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
-extern void prefill_possible_map(void);
-
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
@@ -177,10 +147,6 @@
{
return cpumask_weight(cpu_callout_mask);
}
-#else
-static inline void prefill_possible_map(void)
-{
-}
#endif /* CONFIG_SMP */
extern unsigned disabled_cpus __cpuinitdata;
@@ -191,11 +157,11 @@
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
-#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
+#define raw_smp_processor_id() (percpu_read(cpu_number))
extern int safe_smp_processor_id(void);
#elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() read_pda(cpunumber)
+#define raw_smp_processor_id() (percpu_read(cpu_number))
#define stack_smp_processor_id() \
({ \
@@ -205,10 +171,6 @@
})
#define safe_smp_processor_id() smp_processor_id()
-#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
-#define cpu_physical_id(cpu) boot_cpu_physical_apicid
-#define safe_smp_processor_id() 0
-#define stack_smp_processor_id() 0
#endif
#ifdef CONFIG_X86_LOCAL_APIC
@@ -220,28 +182,9 @@
return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
}
-#include <mach_apicdef.h>
-static inline unsigned int read_apic_id(void)
-{
- unsigned int reg;
-
- reg = *(u32 *)(APIC_BASE + APIC_ID);
-
- return GET_APIC_ID(reg);
-}
#endif
-
-# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
extern int hard_smp_processor_id(void);
-# else
-#include <mach_apicdef.h>
-static inline int hard_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return read_apic_id();
-}
-# endif /* APIC_DEFINITION */
#else /* CONFIG_X86_LOCAL_APIC */
@@ -251,11 +194,5 @@
#endif /* CONFIG_X86_LOCAL_APIC */
-#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
-extern unsigned char boot_cpu_id;
-#else
-#define boot_cpu_id 0
-#endif
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
similarity index 83%
rename from arch/x86/include/asm/mach-default/smpboot_hooks.h
rename to arch/x86/include/asm/smpboot_hooks.h
index 23bf5210..1def601 100644
--- a/arch/x86/include/asm/mach-default/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -13,10 +13,10 @@
CMOS_WRITE(0xa, 0xf);
local_flush_tlb();
pr_debug("1.\n");
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+ *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_high)) =
start_eip >> 4;
pr_debug("2.\n");
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+ *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_low)) =
start_eip & 0xf;
pr_debug("3.\n");
}
@@ -34,7 +34,7 @@
*/
CMOS_WRITE(0, 0xf);
- *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
+ *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0;
}
static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 8247e94..3a56966 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -172,70 +172,8 @@
return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
}
-#ifdef CONFIG_PARAVIRT
-/*
- * Define virtualization-friendly old-style lock byte lock, for use in
- * pv_lock_ops if desired.
- *
- * This differs from the pre-2.6.24 spinlock by always using xchgb
- * rather than decb to take the lock; this allows it to use a
- * zero-initialized lock structure. It also maintains a 1-byte
- * contention counter, so that we can implement
- * __byte_spin_is_contended.
- */
-struct __byte_spinlock {
- s8 lock;
- s8 spinners;
-};
+#ifndef CONFIG_PARAVIRT
-static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- return bl->lock != 0;
-}
-
-static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- return bl->spinners != 0;
-}
-
-static inline void __byte_spin_lock(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- s8 val = 1;
-
- asm("1: xchgb %1, %0\n"
- " test %1,%1\n"
- " jz 3f\n"
- " " LOCK_PREFIX "incb %2\n"
- "2: rep;nop\n"
- " cmpb $1, %0\n"
- " je 2b\n"
- " " LOCK_PREFIX "decb %2\n"
- " jmp 1b\n"
- "3:"
- : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
-}
-
-static inline int __byte_spin_trylock(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- u8 old = 1;
-
- asm("xchgb %1,%0"
- : "+m" (bl->lock), "+q" (old) : : "memory");
-
- return old == 0;
-}
-
-static inline void __byte_spin_unlock(raw_spinlock_t *lock)
-{
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- smp_wmb();
- bl->lock = 0;
-}
-#else /* !CONFIG_PARAVIRT */
static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
{
return __ticket_spin_is_locked(lock);
@@ -268,7 +206,7 @@
__raw_spin_lock(lock);
}
-#endif /* CONFIG_PARAVIRT */
+#endif
static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
{
@@ -330,8 +268,7 @@
{
atomic_t *count = (atomic_t *)lock;
- atomic_dec(count);
- if (atomic_read(count) >= 0)
+ if (atomic_dec_return(count) >= 0)
return 1;
atomic_inc(count);
return 0;
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
new file mode 100644
index 0000000..c2d742c
--- /dev/null
+++ b/arch/x86/include/asm/stackprotector.h
@@ -0,0 +1,124 @@
+/*
+ * GCC stack protector support.
+ *
+ * Stack protector works by putting predefined pattern at the start of
+ * the stack frame and verifying that it hasn't been overwritten when
+ * returning from the function. The pattern is called stack canary
+ * and unfortunately gcc requires it to be at a fixed offset from %gs.
+ * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
+ * and x86_32 use segment registers differently and thus handles this
+ * requirement differently.
+ *
+ * On x86_64, %gs is shared by percpu area and stack canary. All
+ * percpu symbols are zero based and %gs points to the base of percpu
+ * area. The first occupant of the percpu area is always
+ * irq_stack_union which contains stack_canary at offset 40. Userland
+ * %gs is always saved and restored on kernel entry and exit using
+ * swapgs, so stack protector doesn't add any complexity there.
+ *
+ * On x86_32, it's slightly more complicated. As in x86_64, %gs is
+ * used for userland TLS. Unfortunately, some processors are much
+ * slower at loading segment registers with different value when
+ * entering and leaving the kernel, so the kernel uses %fs for percpu
+ * area and manages %gs lazily so that %gs is switched only when
+ * necessary, usually during task switch.
+ *
+ * As gcc requires the stack canary at %gs:20, %gs can't be managed
+ * lazily if stack protector is enabled, so the kernel saves and
+ * restores userland %gs on kernel entry and exit. This behavior is
+ * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
+ * system.h to hide the details.
+ */
+
+#ifndef _ASM_STACKPROTECTOR_H
+#define _ASM_STACKPROTECTOR_H 1
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+
+#include <asm/tsc.h>
+#include <asm/processor.h>
+#include <asm/percpu.h>
+#include <asm/system.h>
+#include <asm/desc.h>
+#include <linux/random.h>
+
+/*
+ * 24 byte read-only segment initializer for stack canary. Linker
+ * can't handle the address bit shifting. Address will be set in
+ * head_32 for boot CPU and setup_per_cpu_areas() for others.
+ */
+#define GDT_STACK_CANARY_INIT \
+ [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+ u64 canary;
+ u64 tsc;
+
+#ifdef CONFIG_X86_64
+ BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
+#endif
+ /*
+ * We both use the random pool and the current TSC as a source
+ * of randomness. The TSC only matters for very early init,
+ * there it already has some randomness on most systems. Later
+ * on during the bootup the random pool has true entropy too.
+ */
+ get_random_bytes(&canary, sizeof(canary));
+ tsc = __native_read_tsc();
+ canary += tsc + (tsc << 32UL);
+
+ current->stack_canary = canary;
+#ifdef CONFIG_X86_64
+ percpu_write(irq_stack_union.stack_canary, canary);
+#else
+ percpu_write(stack_canary, canary);
+#endif
+}
+
+static inline void setup_stack_canary_segment(int cpu)
+{
+#ifdef CONFIG_X86_32
+ unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20;
+ struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+ struct desc_struct desc;
+
+ desc = gdt_table[GDT_ENTRY_STACK_CANARY];
+ desc.base0 = canary & 0xffff;
+ desc.base1 = (canary >> 16) & 0xff;
+ desc.base2 = (canary >> 24) & 0xff;
+ write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
+#endif
+}
+
+static inline void load_stack_canary_segment(void)
+{
+#ifdef CONFIG_X86_32
+ asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
+#endif
+}
+
+#else /* CC_STACKPROTECTOR */
+
+#define GDT_STACK_CANARY_INIT
+
+/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
+
+static inline void setup_stack_canary_segment(int cpu)
+{ }
+
+static inline void load_stack_canary_segment(void)
+{
+#ifdef CONFIG_X86_32
+ asm volatile ("mov %0, %%gs" : : "r" (0));
+#endif
+}
+
+#endif /* CC_STACKPROTECTOR */
+#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
deleted file mode 100644
index 93d2c86..0000000
--- a/arch/x86/include/asm/summit/apic.h
+++ /dev/null
@@ -1,202 +0,0 @@
-#ifndef __ASM_SUMMIT_APIC_H
-#define __ASM_SUMMIT_APIC_H
-
-#include <asm/smp.h>
-#include <linux/gfp.h>
-
-#define esr_disable (1)
-#define NO_BALANCE_IRQ (0)
-
-/* In clustered mode, the high nibble of APIC ID is a cluster number.
- * The low nibble is a 4-bit bitmap. */
-#define XAPIC_DEST_CPUS_SHIFT 4
-#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
-#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
-
-#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-
-static inline const cpumask_t *target_cpus(void)
-{
- /* CPU_MASK_ALL (0xff) has undefined behaviour with
- * dest_LowestPrio mode logical clustered apic interrupt routing
- * Just start on cpu 0. IRQ balancing will spread load
- */
- return &cpumask_of_cpu(0);
-}
-
-#define INT_DELIVERY_MODE (dest_LowestPrio)
-#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
-
-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
-{
- return 0;
-}
-
-/* we don't use the phys_cpu_present_map to indicate apicid presence */
-static inline unsigned long check_apicid_present(int bit)
-{
- return 1;
-}
-
-#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK)
-
-extern u8 cpu_2_logical_apicid[];
-
-static inline void init_apic_ldr(void)
-{
- unsigned long val, id;
- int count = 0;
- u8 my_id = (u8)hard_smp_processor_id();
- u8 my_cluster = (u8)apicid_cluster(my_id);
-#ifdef CONFIG_SMP
- u8 lid;
- int i;
-
- /* Create logical APIC IDs by counting CPUs already in cluster. */
- for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
- lid = cpu_2_logical_apicid[i];
- if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
- ++count;
- }
-#endif
- /* We only have a 4 wide bitmap in cluster mode. If a deranged
- * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
- BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
- id = my_cluster | (1UL << count);
- apic_write(APIC_DFR, APIC_DFR_VALUE);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(id);
- apic_write(APIC_LDR, val);
-}
-
-static inline int multi_timer_check(int apic, int irq)
-{
- return 0;
-}
-
-static inline int apic_id_registered(void)
-{
- return 1;
-}
-
-static inline void setup_apic_routing(void)
-{
- printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
- nr_ioapics);
-}
-
-static inline int apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
-#else
- return 0;
-#endif
-}
-
-/* Mapping from cpu number to logical apicid */
-static inline int cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return (int)cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < nr_cpu_ids)
- return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
- else
- return BAD_APICID;
-}
-
-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map)
-{
- /* For clustered we don't have a good way to do this yet - hack */
- return physids_promote(0x0F);
-}
-
-static inline physid_mask_t apicid_to_cpu_present(int apicid)
-{
- return physid_mask_of_physid(0);
-}
-
-static inline void setup_portio_remap(void)
-{
-}
-
-static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
-{
- return 1;
-}
-
-static inline void enable_apic_mode(void)
-{
-}
-
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
-{
- int num_bits_set;
- int cpus_found = 0;
- int cpu;
- int apicid;
-
- num_bits_set = cpus_weight(*cpumask);
- /* Return id to all */
- if (num_bits_set >= nr_cpu_ids)
- return (int) 0xFF;
- /*
- * The cpus in the mask must all be on the apic cluster. If are not
- * on the same apicid cluster return default value of TARGET_CPUS.
- */
- cpu = first_cpu(*cpumask);
- apicid = cpu_to_logical_apicid(cpu);
- while (cpus_found < num_bits_set) {
- if (cpu_isset(cpu, *cpumask)) {
- int new_apicid = cpu_to_logical_apicid(cpu);
- if (apicid_cluster(apicid) !=
- apicid_cluster(new_apicid)){
- printk ("%s: Not a valid mask!\n", __func__);
- return 0xFF;
- }
- apicid = apicid | new_apicid;
- cpus_found++;
- }
- cpu++;
- }
- return apicid;
-}
-
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
- const struct cpumask *andmask)
-{
- int apicid = cpu_to_logical_apicid(0);
- cpumask_var_t cpumask;
-
- if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
- return apicid;
-
- cpumask_and(cpumask, inmask, andmask);
- cpumask_and(cpumask, cpumask, cpu_online_mask);
- apicid = cpu_mask_to_apicid(cpumask);
-
- free_cpumask_var(cpumask);
- return apicid;
-}
-
-/* cpuid returns the value latched in the HW at reset, not the APIC ID
- * register's value. For any box whose BIOS changes APIC IDs, like
- * clustered APIC systems, we must use hard_smp_processor_id.
- *
- * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
- */
-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
-{
- return hard_smp_processor_id() >> index_msb;
-}
-
-#endif /* __ASM_SUMMIT_APIC_H */
diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h
deleted file mode 100644
index f3fbca1..0000000
--- a/arch/x86/include/asm/summit/apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_SUMMIT_APICDEF_H
-#define __ASM_SUMMIT_APICDEF_H
-
-#define APIC_ID_MASK (0xFF<<24)
-
-static inline unsigned get_apic_id(unsigned long x)
-{
- return (x>>24)&0xFF;
-}
-
-#define GET_APIC_ID(x) get_apic_id(x)
-
-#endif
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h
deleted file mode 100644
index a8a2c24..0000000
--- a/arch/x86/include/asm/summit/ipi.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef __ASM_SUMMIT_IPI_H
-#define __ASM_SUMMIT_IPI_H
-
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
-
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
-{
- send_IPI_mask_sequence(mask, vector);
-}
-
-static inline void send_IPI_allbutself(int vector)
-{
- cpumask_t mask = cpu_online_map;
- cpu_clear(smp_processor_id(), mask);
-
- if (!cpus_empty(mask))
- send_IPI_mask(&mask, vector);
-}
-
-static inline void send_IPI_all(int vector)
-{
- send_IPI_mask(&cpu_online_map, vector);
-}
-
-#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h
deleted file mode 100644
index 380e86c..0000000
--- a/arch/x86/include/asm/summit/mpparse.h
+++ /dev/null
@@ -1,109 +0,0 @@
-#ifndef __ASM_SUMMIT_MPPARSE_H
-#define __ASM_SUMMIT_MPPARSE_H
-
-#include <asm/tsc.h>
-
-extern int use_cyclone;
-
-#ifdef CONFIG_X86_SUMMIT_NUMA
-extern void setup_summit(void);
-#else
-#define setup_summit() {}
-#endif
-
-static inline int mps_oem_check(struct mpc_table *mpc, char *oem,
- char *productid)
-{
- if (!strncmp(oem, "IBM ENSW", 8) &&
- (!strncmp(productid, "VIGIL SMP", 9)
- || !strncmp(productid, "EXA", 3)
- || !strncmp(productid, "RUTHLESS SMP", 12))){
- mark_tsc_unstable("Summit based system");
- use_cyclone = 1; /*enable cyclone-timer*/
- setup_summit();
- return 1;
- }
- return 0;
-}
-
-/* Hook from generic ACPI tables.c */
-static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- if (!strncmp(oem_id, "IBM", 3) &&
- (!strncmp(oem_table_id, "SERVIGIL", 8)
- || !strncmp(oem_table_id, "EXA", 3))){
- mark_tsc_unstable("Summit based system");
- use_cyclone = 1; /*enable cyclone-timer*/
- setup_summit();
- return 1;
- }
- return 0;
-}
-
-struct rio_table_hdr {
- unsigned char version; /* Version number of this data structure */
- /* Version 3 adds chassis_num & WP_index */
- unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
- unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
-} __attribute__((packed));
-
-struct scal_detail {
- unsigned char node_id; /* Scalability Node ID */
- unsigned long CBAR; /* Address of 1MB register space */
- unsigned char port0node; /* Node ID port connected to: 0xFF=None */
- unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char port1node; /* Node ID port connected to: 0xFF = None */
- unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char port2node; /* Node ID port connected to: 0xFF = None */
- unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
-} __attribute__((packed));
-
-struct rio_detail {
- unsigned char node_id; /* RIO Node ID */
- unsigned long BBAR; /* Address of 1MB register space */
- unsigned char type; /* Type of device */
- unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
- /* For CYC: Node ID of Twister that owns this CYC */
- unsigned char port0node; /* Node ID port connected to: 0xFF=None */
- unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char port1node; /* Node ID port connected to: 0xFF=None */
- unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
- /* For CYC: 0 */
- unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
- /* = 0 : the XAPIC is not used, ie:*/
- /* ints fwded to another XAPIC */
- /* Bits1:7 Reserved */
- /* For CYC: Bits0:7 Reserved */
- unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
- /* lower slot numbers/PCI bus numbers */
- /* For CYC: No meaning */
- unsigned char chassis_num; /* 1 based Chassis number */
- /* For LookOut WPEGs this field indicates the */
- /* Expansion Chassis #, enumerated from Boot */
- /* Node WPEG external port, then Boot Node CYC */
- /* external port, then Next Vigil chassis WPEG */
- /* external port, etc. */
- /* Shared Lookouts have only 1 chassis number (the */
- /* first one assigned) */
-} __attribute__((packed));
-
-
-typedef enum {
- CompatTwister = 0, /* Compatibility Twister */
- AltTwister = 1, /* Alternate Twister of internal 8-way */
- CompatCyclone = 2, /* Compatibility Cyclone */
- AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
- CompatWPEG = 4, /* Compatibility WPEG */
- AltWPEG = 5, /* Second Planar WPEG */
- LookOutAWPEG = 6, /* LookOut WPEG */
- LookOutBWPEG = 7, /* LookOut WPEG */
-} node_type;
-
-static inline int is_WPEG(struct rio_detail *rio){
- return (rio->type == CompatWPEG || rio->type == AltWPEG ||
- rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
-}
-
-#endif /* __ASM_SUMMIT_MPPARSE_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index c0b0bda..7043408 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -29,21 +29,21 @@
/* X86_32 only */
#ifdef CONFIG_X86_32
/* kernel/process_32.c */
-asmlinkage int sys_fork(struct pt_regs);
-asmlinkage int sys_clone(struct pt_regs);
-asmlinkage int sys_vfork(struct pt_regs);
-asmlinkage int sys_execve(struct pt_regs);
+int sys_fork(struct pt_regs *);
+int sys_clone(struct pt_regs *);
+int sys_vfork(struct pt_regs *);
+int sys_execve(struct pt_regs *);
/* kernel/signal_32.c */
asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
struct old_sigaction __user *);
-asmlinkage int sys_sigaltstack(unsigned long);
-asmlinkage unsigned long sys_sigreturn(unsigned long);
-asmlinkage int sys_rt_sigreturn(unsigned long);
+int sys_sigaltstack(struct pt_regs *);
+unsigned long sys_sigreturn(struct pt_regs *);
+long sys_rt_sigreturn(struct pt_regs *);
/* kernel/ioport.c */
-asmlinkage long sys_iopl(unsigned long);
+long sys_iopl(struct pt_regs *);
/* kernel/sys_i386_32.c */
asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
@@ -59,8 +59,8 @@
asmlinkage int sys_olduname(struct oldold_utsname __user *);
/* kernel/vm86_32.c */
-asmlinkage int sys_vm86old(struct pt_regs);
-asmlinkage int sys_vm86(struct pt_regs);
+int sys_vm86old(struct pt_regs *);
+int sys_vm86(struct pt_regs *);
#else /* CONFIG_X86_32 */
@@ -74,6 +74,7 @@
asmlinkage long sys_execve(char __user *, char __user * __user *,
char __user * __user *,
struct pt_regs *);
+long sys_arch_prctl(int, unsigned long);
/* kernel/ioport.c */
asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
@@ -81,7 +82,7 @@
/* kernel/signal_64.c */
asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
struct pt_regs *);
-asmlinkage long sys_rt_sigreturn(struct pt_regs *);
+long sys_rt_sigreturn(struct pt_regs *);
/* kernel/sys_x86_64.c */
asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 8e626ea..c00bfdb 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -23,6 +23,20 @@
#ifdef CONFIG_X86_32
+#ifdef CONFIG_CC_STACKPROTECTOR
+#define __switch_canary \
+ "movl %P[task_canary](%[next]), %%ebx\n\t" \
+ "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
+#define __switch_canary_oparam \
+ , [stack_canary] "=m" (per_cpu_var(stack_canary))
+#define __switch_canary_iparam \
+ , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
+#else /* CC_STACKPROTECTOR */
+#define __switch_canary
+#define __switch_canary_oparam
+#define __switch_canary_iparam
+#endif /* CC_STACKPROTECTOR */
+
/*
* Saving eflags is important. It switches not only IOPL between tasks,
* it also protects other tasks from NT leaking through sysenter etc.
@@ -44,6 +58,7 @@
"movl %[next_sp],%%esp\n\t" /* restore ESP */ \
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \
"pushl %[next_ip]\n\t" /* restore EIP */ \
+ __switch_canary \
"jmp __switch_to\n" /* regparm call */ \
"1:\t" \
"popl %%ebp\n\t" /* restore EBP */ \
@@ -58,6 +73,8 @@
"=b" (ebx), "=c" (ecx), "=d" (edx), \
"=S" (esi), "=D" (edi) \
\
+ __switch_canary_oparam \
+ \
/* input parameters: */ \
: [next_sp] "m" (next->thread.sp), \
[next_ip] "m" (next->thread.ip), \
@@ -66,6 +83,8 @@
[prev] "a" (prev), \
[next] "d" (next) \
\
+ __switch_canary_iparam \
+ \
: /* reloaded segment registers */ \
"memory"); \
} while (0)
@@ -86,27 +105,44 @@
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
"r12", "r13", "r14", "r15"
+#ifdef CONFIG_CC_STACKPROTECTOR
+#define __switch_canary \
+ "movq %P[task_canary](%%rsi),%%r8\n\t" \
+ "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
+#define __switch_canary_oparam \
+ , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
+#define __switch_canary_iparam \
+ , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
+#else /* CC_STACKPROTECTOR */
+#define __switch_canary
+#define __switch_canary_oparam
+#define __switch_canary_iparam
+#endif /* CC_STACKPROTECTOR */
+
/* Save restore flags to clear handle leaking NT */
#define switch_to(prev, next, last) \
- asm volatile(SAVE_CONTEXT \
+ asm volatile(SAVE_CONTEXT \
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
"call __switch_to\n\t" \
".globl thread_return\n" \
"thread_return:\n\t" \
- "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
+ "movq "__percpu_arg([current_task])",%%rsi\n\t" \
+ __switch_canary \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
- LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \
- "jc ret_from_fork\n\t" \
+ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
+ "jnz ret_from_fork\n\t" \
RESTORE_CONTEXT \
: "=a" (last) \
+ __switch_canary_oparam \
: [next] "S" (next), [prev] "D" (prev), \
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
- [tif_fork] "i" (TIF_FORK), \
+ [_tif_fork] "i" (_TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
- [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
+ [current_task] "m" (per_cpu_var(current_task)) \
+ __switch_canary_iparam \
: "memory", "cc" __EXTRA_CLOBBER)
#endif
@@ -165,6 +201,25 @@
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
+/*
+ * x86_32 user gs accessors.
+ */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_32_LAZY_GS
+#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
+#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
+#define task_user_gs(tsk) ((tsk)->thread.gs)
+#define lazy_save_gs(v) savesegment(gs, (v))
+#define lazy_load_gs(v) loadsegment(gs, (v))
+#else /* X86_32_LAZY_GS */
+#define get_user_gs(regs) (u16)((regs)->gs)
+#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
+#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
+#define lazy_save_gs(v) do { } while (0)
+#define lazy_load_gs(v) do { } while (0)
+#endif /* X86_32_LAZY_GS */
+#endif /* X86_32 */
+
static inline unsigned long get_limit(unsigned long segment)
{
unsigned long __limit;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 9878964..ca7310e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -40,6 +40,7 @@
*/
__u8 supervisor_stack[0];
#endif
+ int uaccess_err;
};
#define INIT_THREAD_INFO(tsk) \
@@ -82,6 +83,7 @@
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
+#define TIF_PERF_COUNTERS 11 /* notify perf counter work */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* 32bit process */
#define TIF_FORK 18 /* ret_from_fork */
@@ -104,6 +106,7 @@
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
+#define _TIF_PERF_COUNTERS (1 << TIF_PERF_COUNTERS)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
@@ -135,7 +138,7 @@
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
- (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
+ (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
@@ -194,25 +197,21 @@
#else /* X86_32 */
-#include <asm/pda.h>
+#include <asm/percpu.h>
+#define KERNEL_STACK_OFFSET (5*8)
/*
* macros/functions for gaining access to the thread information structure
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
#ifndef __ASSEMBLY__
+DECLARE_PER_CPU(unsigned long, kernel_stack);
+
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
- ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
- return ti;
-}
-
-/* do not use in interrupt context */
-static inline struct thread_info *stack_thread_info(void)
-{
- struct thread_info *ti;
- asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
+ ti = (void *)(percpu_read(kernel_stack) +
+ KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
}
@@ -220,8 +219,8 @@
/* how to get the thread information struct from ASM */
#define GET_THREAD_INFO(reg) \
- movq %gs:pda_kernelstack,reg ; \
- subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
+ movq PER_CPU_VAR(kernel_stack),reg ; \
+ subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
#endif
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 2bb6a83..a81195e 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/percpu.h>
+#include <linux/interrupt.h>
#define TICK_SIZE (tick_nsec / 1000)
@@ -12,6 +13,7 @@
#ifdef CONFIG_X86_32
extern int timer_ack;
extern int recalibrate_cpu_khz(void);
+extern irqreturn_t timer_interrupt(int irq, void *dev_id);
#endif /* CONFIG_X86_32 */
extern int no_timer_check;
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0e7bbb5..d3539f9 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -113,7 +113,7 @@
__flush_tlb();
}
-static inline void native_flush_tlb_others(const cpumask_t *cpumask,
+static inline void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va)
{
@@ -142,31 +142,28 @@
flush_tlb_mm(vma->vm_mm);
}
-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
- unsigned long va);
+void native_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm, unsigned long va);
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
-#ifdef CONFIG_X86_32
struct tlb_state {
struct mm_struct *active_mm;
int state;
- char __cacheline_padding[L1_CACHE_BYTES-8];
};
DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
-void reset_lazy_tlbstate(void);
-#else
static inline void reset_lazy_tlbstate(void)
{
+ percpu_write(cpu_tlbstate.state, 0);
+ percpu_write(cpu_tlbstate.active_mm, &init_mm);
}
-#endif
#endif /* SMP */
#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va)
+#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va)
#endif
static inline void flush_tlb_kernel_range(unsigned long start,
@@ -175,4 +172,6 @@
flush_tlb_all();
}
+extern void zap_low_mappings(void);
+
#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4e2f2e0..77cfb2c 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -74,6 +74,8 @@
return &node_to_cpumask_map[node];
}
+static inline void setup_node_to_cpumask_map(void) { }
+
#else /* CONFIG_X86_64 */
/* Mappings between node number and cpus on that node. */
@@ -83,7 +85,8 @@
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
/* Returns the number of the current Node. */
-#define numa_node_id() read_pda(nodenumber)
+DECLARE_PER_CPU(int, node_number);
+#define numa_node_id() percpu_read(node_number)
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
extern int cpu_to_node(int cpu);
@@ -102,10 +105,7 @@
/* Same function but used if called before per_cpu areas are setup */
static inline int early_cpu_to_node(int cpu)
{
- if (early_per_cpu_ptr(x86_cpu_to_node_map))
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
- return per_cpu(x86_cpu_to_node_map, cpu);
+ return early_per_cpu(x86_cpu_to_node_map, cpu);
}
/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
@@ -122,6 +122,8 @@
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
+extern void setup_node_to_cpumask_map(void);
+
/*
* Replace default node_to_cpumask_ptr with optimized version
* Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
@@ -192,9 +194,20 @@
#else /* !CONFIG_NUMA */
-#define numa_node_id() 0
-#define cpu_to_node(cpu) 0
-#define early_cpu_to_node(cpu) 0
+static inline int numa_node_id(void)
+{
+ return 0;
+}
+
+static inline int cpu_to_node(int cpu)
+{
+ return 0;
+}
+
+static inline int early_cpu_to_node(int cpu)
+{
+ return 0;
+}
static inline const cpumask_t *cpumask_of_node(int node)
{
@@ -209,6 +222,8 @@
return first_cpu(cpu_online_map);
}
+static inline void setup_node_to_cpumask_map(void) { }
+
/*
* Replace default node_to_cpumask_ptr with optimized version
* Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index 780ba0a..90f06c2 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -13,6 +13,7 @@
extern unsigned long init_rsp;
extern unsigned long initial_code;
+extern unsigned long initial_gs;
#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
#define TRAMPOLINE_BASE 0x6000
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index cf3bb05..0d53425 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -41,7 +41,7 @@
dotraplinkage void do_overflow(struct pt_regs *, long);
dotraplinkage void do_bounds(struct pt_regs *, long);
dotraplinkage void do_invalid_op(struct pt_regs *, long);
-dotraplinkage void do_device_not_available(struct pt_regs);
+dotraplinkage void do_device_not_available(struct pt_regs *, long);
dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
dotraplinkage void do_segment_not_present(struct pt_regs *, long);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 4340055..b685ece 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -121,7 +121,7 @@
#define __get_user_x(size, ret, x, ptr) \
asm volatile("call __get_user_" #size \
- : "=a" (ret),"=d" (x) \
+ : "=a" (ret), "=d" (x) \
: "0" (ptr)) \
/* Careful: we have to cast the result to the type of the pointer
@@ -181,12 +181,12 @@
#define __put_user_x(size, x, ptr, __ret_pu) \
asm volatile("call __put_user_" #size : "=a" (__ret_pu) \
- :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
+ : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
#ifdef CONFIG_X86_32
-#define __put_user_u64(x, addr, err) \
+#define __put_user_asm_u64(x, addr, err, errret) \
asm volatile("1: movl %%eax,0(%2)\n" \
"2: movl %%edx,4(%2)\n" \
"3:\n" \
@@ -197,14 +197,24 @@
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
: "=r" (err) \
- : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
+ : "A" (x), "r" (addr), "i" (errret), "0" (err))
+
+#define __put_user_asm_ex_u64(x, addr) \
+ asm volatile("1: movl %%eax,0(%1)\n" \
+ "2: movl %%edx,4(%1)\n" \
+ "3:\n" \
+ _ASM_EXTABLE(1b, 2b - 1b) \
+ _ASM_EXTABLE(2b, 3b - 2b) \
+ : : "A" (x), "r" (addr))
#define __put_user_x8(x, ptr, __ret_pu) \
asm volatile("call __put_user_8" : "=a" (__ret_pu) \
: "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
#else
-#define __put_user_u64(x, ptr, retval) \
- __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
+#define __put_user_asm_u64(x, ptr, retval, errret) \
+ __put_user_asm(x, ptr, retval, "q", "", "Zr", errret)
+#define __put_user_asm_ex_u64(x, addr) \
+ __put_user_asm_ex(x, addr, "q", "", "Zr")
#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
#endif
@@ -276,10 +286,32 @@
__put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \
break; \
case 4: \
- __put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\
+ __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \
break; \
case 8: \
- __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \
+ __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \
+ errret); \
+ break; \
+ default: \
+ __put_user_bad(); \
+ } \
+} while (0)
+
+#define __put_user_size_ex(x, ptr, size) \
+do { \
+ __chk_user_ptr(ptr); \
+ switch (size) { \
+ case 1: \
+ __put_user_asm_ex(x, ptr, "b", "b", "iq"); \
+ break; \
+ case 2: \
+ __put_user_asm_ex(x, ptr, "w", "w", "ir"); \
+ break; \
+ case 4: \
+ __put_user_asm_ex(x, ptr, "l", "k", "ir"); \
+ break; \
+ case 8: \
+ __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \
break; \
default: \
__put_user_bad(); \
@@ -311,9 +343,12 @@
#ifdef CONFIG_X86_32
#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad()
+#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad()
#else
#define __get_user_asm_u64(x, ptr, retval, errret) \
__get_user_asm(x, ptr, retval, "q", "", "=r", errret)
+#define __get_user_asm_ex_u64(x, ptr) \
+ __get_user_asm_ex(x, ptr, "q", "", "=r")
#endif
#define __get_user_size(x, ptr, size, retval, errret) \
@@ -350,6 +385,33 @@
: "=r" (err), ltype(x) \
: "m" (__m(addr)), "i" (errret), "0" (err))
+#define __get_user_size_ex(x, ptr, size) \
+do { \
+ __chk_user_ptr(ptr); \
+ switch (size) { \
+ case 1: \
+ __get_user_asm_ex(x, ptr, "b", "b", "=q"); \
+ break; \
+ case 2: \
+ __get_user_asm_ex(x, ptr, "w", "w", "=r"); \
+ break; \
+ case 4: \
+ __get_user_asm_ex(x, ptr, "l", "k", "=r"); \
+ break; \
+ case 8: \
+ __get_user_asm_ex_u64(x, ptr); \
+ break; \
+ default: \
+ (x) = __get_user_bad(); \
+ } \
+} while (0)
+
+#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \
+ asm volatile("1: mov"itype" %1,%"rtype"0\n" \
+ "2:\n" \
+ _ASM_EXTABLE(1b, 2b - 1b) \
+ : ltype(x) : "m" (__m(addr)))
+
#define __put_user_nocheck(x, ptr, size) \
({ \
int __pu_err; \
@@ -385,6 +447,26 @@
_ASM_EXTABLE(1b, 3b) \
: "=r"(err) \
: ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
+
+#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \
+ asm volatile("1: mov"itype" %"rtype"0,%1\n" \
+ "2:\n" \
+ _ASM_EXTABLE(1b, 2b - 1b) \
+ : : ltype(x), "m" (__m(addr)))
+
+/*
+ * uaccess_try and catch
+ */
+#define uaccess_try do { \
+ int prev_err = current_thread_info()->uaccess_err; \
+ current_thread_info()->uaccess_err = 0; \
+ barrier();
+
+#define uaccess_catch(err) \
+ (err) |= current_thread_info()->uaccess_err; \
+ current_thread_info()->uaccess_err = prev_err; \
+} while (0)
+
/**
* __get_user: - Get a simple variable from user space, with less checking.
* @x: Variable to store result.
@@ -408,6 +490,7 @@
#define __get_user(x, ptr) \
__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+
/**
* __put_user: - Write a simple value into user space, with less checking.
* @x: Value to copy to user space.
@@ -435,6 +518,45 @@
#define __put_user_unaligned __put_user
/*
+ * {get|put}_user_try and catch
+ *
+ * get_user_try {
+ * get_user_ex(...);
+ * } get_user_catch(err)
+ */
+#define get_user_try uaccess_try
+#define get_user_catch(err) uaccess_catch(err)
+
+#define get_user_ex(x, ptr) do { \
+ unsigned long __gue_val; \
+ __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \
+ (x) = (__force __typeof__(*(ptr)))__gue_val; \
+} while (0)
+
+#ifdef CONFIG_X86_WP_WORKS_OK
+
+#define put_user_try uaccess_try
+#define put_user_catch(err) uaccess_catch(err)
+
+#define put_user_ex(x, ptr) \
+ __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+
+#else /* !CONFIG_X86_WP_WORKS_OK */
+
+#define put_user_try do { \
+ int __uaccess_err = 0;
+
+#define put_user_catch(err) \
+ (err) |= __uaccess_err; \
+} while (0)
+
+#define put_user_ex(x, ptr) do { \
+ __uaccess_err |= __put_user(x, ptr); \
+} while (0)
+
+#endif /* CONFIG_X86_WP_WORKS_OK */
+
+/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
#ifdef CONFIG_X86_INTEL_USERCOPY
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 5e06259..a0ba613 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -157,7 +157,7 @@
}
static __always_inline unsigned long __copy_from_user_nocache(void *to,
- const void __user *from, unsigned long n)
+ const void __user *from, unsigned long n, unsigned long total)
{
might_fault();
if (__builtin_constant_p(n)) {
@@ -180,7 +180,7 @@
static __always_inline unsigned long
__copy_from_user_inatomic_nocache(void *to, const void __user *from,
- unsigned long n)
+ unsigned long n, unsigned long total)
{
return __copy_from_user_ll_nocache_nozero(to, from, n);
}
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 84210c4..dcaa0404 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -189,17 +189,28 @@
unsigned size, int zerorest);
static inline int __copy_from_user_nocache(void *dst, const void __user *src,
- unsigned size)
+ unsigned size, unsigned long total)
{
might_sleep();
- return __copy_user_nocache(dst, src, size, 1);
+ /*
+ * In practice this limit means that large file write()s
+ * which get chunked to 4K copies get handled via
+ * non-temporal stores here. Smaller writes get handled
+ * via regular __copy_from_user():
+ */
+ if (likely(total >= PAGE_SIZE))
+ return __copy_user_nocache(dst, src, size, 1);
+ else
+ return __copy_from_user(dst, src, size);
}
static inline int __copy_from_user_inatomic_nocache(void *dst,
- const void __user *src,
- unsigned size)
+ const void __user *src, unsigned size, unsigned total)
{
- return __copy_user_nocache(dst, src, size, 0);
+ if (likely(total >= PAGE_SIZE))
+ return __copy_user_nocache(dst, src, size, 0);
+ else
+ return __copy_from_user_inatomic(dst, src, size);
}
unsigned long
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index f2bba78..7e47658 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -338,6 +338,7 @@
#define __NR_dup3 330
#define __NR_pipe2 331
#define __NR_inotify_init1 332
+#define __NR_perf_counter_open 333
#ifdef __KERNEL__
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index d2e415e..53025fe 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -653,7 +653,8 @@
__SYSCALL(__NR_pipe2, sys_pipe2)
#define __NR_inotify_init1 294
__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
-
+#define __NR_perf_counter_open 295
+__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
new file mode 100644
index 0000000..8242bf9
--- /dev/null
+++ b/arch/x86/include/asm/uv/uv.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_X86_UV_UV_H
+#define _ASM_X86_UV_UV_H
+
+enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
+
+struct cpumask;
+struct mm_struct;
+
+#ifdef CONFIG_X86_UV
+
+extern enum uv_system_type get_uv_system_type(void);
+extern int is_uv_system(void);
+extern void uv_cpu_init(void);
+extern void uv_system_init(void);
+extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
+extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm,
+ unsigned long va,
+ unsigned int cpu);
+
+#else /* X86_UV */
+
+static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
+static inline int is_uv_system(void) { return 0; }
+static inline void uv_cpu_init(void) { }
+static inline void uv_system_init(void) { }
+static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+{ return 1; }
+static inline const struct cpumask *
+uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
+ unsigned long va, unsigned int cpu)
+{ return cpumask; }
+
+#endif /* X86_UV */
+
+#endif /* _ASM_X86_UV_UV_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 50423c7..9b0e61b 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,7 +325,6 @@
#define cpubit_isset(cpu, bau_local_cpumask) \
test_bit((cpu), (bau_local_cpumask).bits)
-extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
extern void uv_bau_message_intr1(void);
extern void uv_bau_timeout_intr1(void);
diff --git a/arch/x86/include/asm/vic.h b/arch/x86/include/asm/vic.h
deleted file mode 100644
index 53100f3..0000000
--- a/arch/x86/include/asm/vic.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Copyright (C) 1999,2001
- *
- * Author: J.E.J.Bottomley@HansenPartnership.com
- *
- * Standard include definitions for the NCR Voyager Interrupt Controller */
-
-/* The eight CPI vectors. To activate a CPI, you write a bit mask
- * corresponding to the processor set to be interrupted into the
- * relevant register. That set of CPUs will then be interrupted with
- * the CPI */
-static const int VIC_CPI_Registers[] =
- {0xFC00, 0xFC01, 0xFC08, 0xFC09,
- 0xFC10, 0xFC11, 0xFC18, 0xFC19 };
-
-#define VIC_PROC_WHO_AM_I 0xfc29
-# define QUAD_IDENTIFIER 0xC0
-# define EIGHT_SLOT_IDENTIFIER 0xE0
-#define QIC_EXTENDED_PROCESSOR_SELECT 0xFC72
-#define VIC_CPI_BASE_REGISTER 0xFC41
-#define VIC_PROCESSOR_ID 0xFC21
-# define VIC_CPU_MASQUERADE_ENABLE 0x8
-
-#define VIC_CLAIM_REGISTER_0 0xFC38
-#define VIC_CLAIM_REGISTER_1 0xFC39
-#define VIC_REDIRECT_REGISTER_0 0xFC60
-#define VIC_REDIRECT_REGISTER_1 0xFC61
-#define VIC_PRIORITY_REGISTER 0xFC20
-
-#define VIC_PRIMARY_MC_BASE 0xFC48
-#define VIC_SECONDARY_MC_BASE 0xFC49
-
-#define QIC_PROCESSOR_ID 0xFC71
-# define QIC_CPUID_ENABLE 0x08
-
-#define QIC_VIC_CPI_BASE_REGISTER 0xFC79
-#define QIC_CPI_BASE_REGISTER 0xFC7A
-
-#define QIC_MASK_REGISTER0 0xFC80
-/* NOTE: these are masked high, enabled low */
-# define QIC_PERF_TIMER 0x01
-# define QIC_LPE 0x02
-# define QIC_SYS_INT 0x04
-# define QIC_CMN_INT 0x08
-/* at the moment, just enable CMN_INT, disable SYS_INT */
-# define QIC_DEFAULT_MASK0 (~(QIC_CMN_INT /* | VIC_SYS_INT */))
-#define QIC_MASK_REGISTER1 0xFC81
-# define QIC_BOOT_CPI_MASK 0xFE
-/* Enable CPI's 1-6 inclusive */
-# define QIC_CPI_ENABLE 0x81
-
-#define QIC_INTERRUPT_CLEAR0 0xFC8A
-#define QIC_INTERRUPT_CLEAR1 0xFC8B
-
-/* this is where we place the CPI vectors */
-#define VIC_DEFAULT_CPI_BASE 0xC0
-/* this is where we place the QIC CPI vectors */
-#define QIC_DEFAULT_CPI_BASE 0xD0
-
-#define VIC_BOOT_INTERRUPT_MASK 0xfe
-
-extern void smp_vic_timer_interrupt(void);
diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h
deleted file mode 100644
index b3e6473..0000000
--- a/arch/x86/include/asm/voyager.h
+++ /dev/null
@@ -1,529 +0,0 @@
-/* Copyright (C) 1999,2001
- *
- * Author: J.E.J.Bottomley@HansenPartnership.com
- *
- * Standard include definitions for the NCR Voyager system */
-
-#undef VOYAGER_DEBUG
-#undef VOYAGER_CAT_DEBUG
-
-#ifdef VOYAGER_DEBUG
-#define VDEBUG(x) printk x
-#else
-#define VDEBUG(x)
-#endif
-
-/* There are three levels of voyager machine: 3,4 and 5. The rule is
- * if it's less than 3435 it's a Level 3 except for a 3360 which is
- * a level 4. A 3435 or above is a Level 5 */
-#define VOYAGER_LEVEL5_AND_ABOVE 0x3435
-#define VOYAGER_LEVEL4 0x3360
-
-/* The L4 DINO ASIC */
-#define VOYAGER_DINO 0x43
-
-/* voyager ports in standard I/O space */
-#define VOYAGER_MC_SETUP 0x96
-
-
-#define VOYAGER_CAT_CONFIG_PORT 0x97
-# define VOYAGER_CAT_DESELECT 0xff
-#define VOYAGER_SSPB_RELOCATION_PORT 0x98
-
-/* Valid CAT controller commands */
-/* start instruction register cycle */
-#define VOYAGER_CAT_IRCYC 0x01
-/* start data register cycle */
-#define VOYAGER_CAT_DRCYC 0x02
-/* move to execute state */
-#define VOYAGER_CAT_RUN 0x0F
-/* end operation */
-#define VOYAGER_CAT_END 0x80
-/* hold in idle state */
-#define VOYAGER_CAT_HOLD 0x90
-/* single step an "intest" vector */
-#define VOYAGER_CAT_STEP 0xE0
-/* return cat controller to CLEMSON mode */
-#define VOYAGER_CAT_CLEMSON 0xFF
-
-/* the default cat command header */
-#define VOYAGER_CAT_HEADER 0x7F
-
-/* the range of possible CAT module ids in the system */
-#define VOYAGER_MIN_MODULE 0x10
-#define VOYAGER_MAX_MODULE 0x1f
-
-/* The voyager registers per asic */
-#define VOYAGER_ASIC_ID_REG 0x00
-#define VOYAGER_ASIC_TYPE_REG 0x01
-/* the sub address registers can be made auto incrementing on reads */
-#define VOYAGER_AUTO_INC_REG 0x02
-# define VOYAGER_AUTO_INC 0x04
-# define VOYAGER_NO_AUTO_INC 0xfb
-#define VOYAGER_SUBADDRDATA 0x03
-#define VOYAGER_SCANPATH 0x05
-# define VOYAGER_CONNECT_ASIC 0x01
-# define VOYAGER_DISCONNECT_ASIC 0xfe
-#define VOYAGER_SUBADDRLO 0x06
-#define VOYAGER_SUBADDRHI 0x07
-#define VOYAGER_SUBMODSELECT 0x08
-#define VOYAGER_SUBMODPRESENT 0x09
-
-#define VOYAGER_SUBADDR_LO 0xff
-#define VOYAGER_SUBADDR_HI 0xffff
-
-/* the maximum size of a scan path -- used to form instructions */
-#define VOYAGER_MAX_SCAN_PATH 0x100
-/* the biggest possible register size (in bytes) */
-#define VOYAGER_MAX_REG_SIZE 4
-
-/* Total number of possible modules (including submodules) */
-#define VOYAGER_MAX_MODULES 16
-/* Largest number of asics per module */
-#define VOYAGER_MAX_ASICS_PER_MODULE 7
-
-/* the CAT asic of each module is always the first one */
-#define VOYAGER_CAT_ID 0
-#define VOYAGER_PSI 0x1a
-
-/* voyager instruction operations and registers */
-#define VOYAGER_READ_CONFIG 0x1
-#define VOYAGER_WRITE_CONFIG 0x2
-#define VOYAGER_BYPASS 0xff
-
-typedef struct voyager_asic {
- __u8 asic_addr; /* ASIC address; Level 4 */
- __u8 asic_type; /* ASIC type */
- __u8 asic_id; /* ASIC id */
- __u8 jtag_id[4]; /* JTAG id */
- __u8 asic_location; /* Location within scan path; start w/ 0 */
- __u8 bit_location; /* Location within bit stream; start w/ 0 */
- __u8 ireg_length; /* Instruction register length */
- __u16 subaddr; /* Amount of sub address space */
- struct voyager_asic *next; /* Next asic in linked list */
-} voyager_asic_t;
-
-typedef struct voyager_module {
- __u8 module_addr; /* Module address */
- __u8 scan_path_connected; /* Scan path connected */
- __u16 ee_size; /* Size of the EEPROM */
- __u16 num_asics; /* Number of Asics */
- __u16 inst_bits; /* Instruction bits in the scan path */
- __u16 largest_reg; /* Largest register in the scan path */
- __u16 smallest_reg; /* Smallest register in the scan path */
- voyager_asic_t *asic; /* First ASIC in scan path (CAT_I) */
- struct voyager_module *submodule; /* Submodule pointer */
- struct voyager_module *next; /* Next module in linked list */
-} voyager_module_t;
-
-typedef struct voyager_eeprom_hdr {
- __u8 module_id[4];
- __u8 version_id;
- __u8 config_id;
- __u16 boundry_id; /* boundary scan id */
- __u16 ee_size; /* size of EEPROM */
- __u8 assembly[11]; /* assembly # */
- __u8 assembly_rev; /* assembly rev */
- __u8 tracer[4]; /* tracer number */
- __u16 assembly_cksum; /* asm checksum */
- __u16 power_consump; /* pwr requirements */
- __u16 num_asics; /* number of asics */
- __u16 bist_time; /* min. bist time */
- __u16 err_log_offset; /* error log offset */
- __u16 scan_path_offset;/* scan path offset */
- __u16 cct_offset;
- __u16 log_length; /* length of err log */
- __u16 xsum_end; /* offset to end of
- checksum */
- __u8 reserved[4];
- __u8 sflag; /* starting sentinal */
- __u8 part_number[13]; /* prom part number */
- __u8 version[10]; /* version number */
- __u8 signature[8];
- __u16 eeprom_chksum;
- __u32 data_stamp_offset;
- __u8 eflag ; /* ending sentinal */
-} __attribute__((packed)) voyager_eprom_hdr_t;
-
-
-
-#define VOYAGER_EPROM_SIZE_OFFSET \
- ((__u16)(&(((voyager_eprom_hdr_t *)0)->ee_size)))
-#define VOYAGER_XSUM_END_OFFSET 0x2a
-
-/* the following three definitions are for internal table layouts
- * in the module EPROMs. We really only care about the IDs and
- * offsets */
-typedef struct voyager_sp_table {
- __u8 asic_id;
- __u8 bypass_flag;
- __u16 asic_data_offset;
- __u16 config_data_offset;
-} __attribute__((packed)) voyager_sp_table_t;
-
-typedef struct voyager_jtag_table {
- __u8 icode[4];
- __u8 runbist[4];
- __u8 intest[4];
- __u8 samp_preld[4];
- __u8 ireg_len;
-} __attribute__((packed)) voyager_jtt_t;
-
-typedef struct voyager_asic_data_table {
- __u8 jtag_id[4];
- __u16 length_bsr;
- __u16 length_bist_reg;
- __u32 bist_clk;
- __u16 subaddr_bits;
- __u16 seed_bits;
- __u16 sig_bits;
- __u16 jtag_offset;
-} __attribute__((packed)) voyager_at_t;
-
-/* Voyager Interrupt Controller (VIC) registers */
-
-/* Base to add to Cross Processor Interrupts (CPIs) when triggering
- * the CPU IRQ line */
-/* register defines for the WCBICs (one per processor) */
-#define VOYAGER_WCBIC0 0x41 /* bus A node P1 processor 0 */
-#define VOYAGER_WCBIC1 0x49 /* bus A node P1 processor 1 */
-#define VOYAGER_WCBIC2 0x51 /* bus A node P2 processor 0 */
-#define VOYAGER_WCBIC3 0x59 /* bus A node P2 processor 1 */
-#define VOYAGER_WCBIC4 0x61 /* bus B node P1 processor 0 */
-#define VOYAGER_WCBIC5 0x69 /* bus B node P1 processor 1 */
-#define VOYAGER_WCBIC6 0x71 /* bus B node P2 processor 0 */
-#define VOYAGER_WCBIC7 0x79 /* bus B node P2 processor 1 */
-
-
-/* top of memory registers */
-#define VOYAGER_WCBIC_TOM_L 0x4
-#define VOYAGER_WCBIC_TOM_H 0x5
-
-/* register defines for Voyager Memory Contol (VMC)
- * these are present on L4 machines only */
-#define VOYAGER_VMC1 0x81
-#define VOYAGER_VMC2 0x91
-#define VOYAGER_VMC3 0xa1
-#define VOYAGER_VMC4 0xb1
-
-/* VMC Ports */
-#define VOYAGER_VMC_MEMORY_SETUP 0x9
-# define VMC_Interleaving 0x01
-# define VMC_4Way 0x02
-# define VMC_EvenCacheLines 0x04
-# define VMC_HighLine 0x08
-# define VMC_Start0_Enable 0x20
-# define VMC_Start1_Enable 0x40
-# define VMC_Vremap 0x80
-#define VOYAGER_VMC_BANK_DENSITY 0xa
-# define VMC_BANK_EMPTY 0
-# define VMC_BANK_4MB 1
-# define VMC_BANK_16MB 2
-# define VMC_BANK_64MB 3
-# define VMC_BANK0_MASK 0x03
-# define VMC_BANK1_MASK 0x0C
-# define VMC_BANK2_MASK 0x30
-# define VMC_BANK3_MASK 0xC0
-
-/* Magellan Memory Controller (MMC) defines - present on L5 */
-#define VOYAGER_MMC_ASIC_ID 1
-/* the two memory modules corresponding to memory cards in the system */
-#define VOYAGER_MMC_MEMORY0_MODULE 0x14
-#define VOYAGER_MMC_MEMORY1_MODULE 0x15
-/* the Magellan Memory Address (MMA) defines */
-#define VOYAGER_MMA_ASIC_ID 2
-
-/* Submodule number for the Quad Baseboard */
-#define VOYAGER_QUAD_BASEBOARD 1
-
-/* ASIC defines for the Quad Baseboard */
-#define VOYAGER_QUAD_QDATA0 1
-#define VOYAGER_QUAD_QDATA1 2
-#define VOYAGER_QUAD_QABC 3
-
-/* Useful areas in extended CMOS */
-#define VOYAGER_PROCESSOR_PRESENT_MASK 0x88a
-#define VOYAGER_MEMORY_CLICKMAP 0xa23
-#define VOYAGER_DUMP_LOCATION 0xb1a
-
-/* SUS In Control bit - used to tell SUS that we don't need to be
- * babysat anymore */
-#define VOYAGER_SUS_IN_CONTROL_PORT 0x3ff
-# define VOYAGER_IN_CONTROL_FLAG 0x80
-
-/* Voyager PSI defines */
-#define VOYAGER_PSI_STATUS_REG 0x08
-# define PSI_DC_FAIL 0x01
-# define PSI_MON 0x02
-# define PSI_FAULT 0x04
-# define PSI_ALARM 0x08
-# define PSI_CURRENT 0x10
-# define PSI_DVM 0x20
-# define PSI_PSCFAULT 0x40
-# define PSI_STAT_CHG 0x80
-
-#define VOYAGER_PSI_SUPPLY_REG 0x8000
- /* read */
-# define PSI_FAIL_DC 0x01
-# define PSI_FAIL_AC 0x02
-# define PSI_MON_INT 0x04
-# define PSI_SWITCH_OFF 0x08
-# define PSI_HX_OFF 0x10
-# define PSI_SECURITY 0x20
-# define PSI_CMOS_BATT_LOW 0x40
-# define PSI_CMOS_BATT_FAIL 0x80
- /* write */
-# define PSI_CLR_SWITCH_OFF 0x13
-# define PSI_CLR_HX_OFF 0x14
-# define PSI_CLR_CMOS_BATT_FAIL 0x17
-
-#define VOYAGER_PSI_MASK 0x8001
-# define PSI_MASK_MASK 0x10
-
-#define VOYAGER_PSI_AC_FAIL_REG 0x8004
-#define AC_FAIL_STAT_CHANGE 0x80
-
-#define VOYAGER_PSI_GENERAL_REG 0x8007
- /* read */
-# define PSI_SWITCH_ON 0x01
-# define PSI_SWITCH_ENABLED 0x02
-# define PSI_ALARM_ENABLED 0x08
-# define PSI_SECURE_ENABLED 0x10
-# define PSI_COLD_RESET 0x20
-# define PSI_COLD_START 0x80
- /* write */
-# define PSI_POWER_DOWN 0x10
-# define PSI_SWITCH_DISABLE 0x01
-# define PSI_SWITCH_ENABLE 0x11
-# define PSI_CLEAR 0x12
-# define PSI_ALARM_DISABLE 0x03
-# define PSI_ALARM_ENABLE 0x13
-# define PSI_CLEAR_COLD_RESET 0x05
-# define PSI_SET_COLD_RESET 0x15
-# define PSI_CLEAR_COLD_START 0x07
-# define PSI_SET_COLD_START 0x17
-
-
-
-struct voyager_bios_info {
- __u8 len;
- __u8 major;
- __u8 minor;
- __u8 debug;
- __u8 num_classes;
- __u8 class_1;
- __u8 class_2;
-};
-
-/* The following structures and definitions are for the Kernel/SUS
- * interface these are needed to find out how SUS initialised any Quad
- * boards in the system */
-
-#define NUMBER_OF_MC_BUSSES 2
-#define SLOTS_PER_MC_BUS 8
-#define MAX_CPUS 16 /* 16 way CPU system */
-#define MAX_PROCESSOR_BOARDS 4 /* 4 processor slot system */
-#define MAX_CACHE_LEVELS 4 /* # of cache levels supported */
-#define MAX_SHARED_CPUS 4 /* # of CPUs that can share a LARC */
-#define NUMBER_OF_POS_REGS 8
-
-typedef struct {
- __u8 MC_Slot;
- __u8 POS_Values[NUMBER_OF_POS_REGS];
-} __attribute__((packed)) MC_SlotInformation_t;
-
-struct QuadDescription {
- __u8 Type; /* for type 0 (DYADIC or MONADIC) all fields
- * will be zero except for slot */
- __u8 StructureVersion;
- __u32 CPI_BaseAddress;
- __u32 LARC_BankSize;
- __u32 LocalMemoryStateBits;
- __u8 Slot; /* Processor slots 1 - 4 */
-} __attribute__((packed));
-
-struct ProcBoardInfo {
- __u8 Type;
- __u8 StructureVersion;
- __u8 NumberOfBoards;
- struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS];
-} __attribute__((packed));
-
-struct CacheDescription {
- __u8 Level;
- __u32 TotalSize;
- __u16 LineSize;
- __u8 Associativity;
- __u8 CacheType;
- __u8 WriteType;
- __u8 Number_CPUs_SharedBy;
- __u8 Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS];
-
-} __attribute__((packed));
-
-struct CPU_Description {
- __u8 CPU_HardwareId;
- char *FRU_String;
- __u8 NumberOfCacheLevels;
- struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS];
-} __attribute__((packed));
-
-struct CPU_Info {
- __u8 Type;
- __u8 StructureVersion;
- __u8 NumberOf_CPUs;
- struct CPU_Description CPU_Data[MAX_CPUS];
-} __attribute__((packed));
-
-
-/*
- * This structure will be used by SUS and the OS.
- * The assumption about this structure is that no blank space is
- * packed in it by our friend the compiler.
- */
-typedef struct {
- __u8 Mailbox_SUS; /* Written to by SUS to give
- commands/response to the OS */
- __u8 Mailbox_OS; /* Written to by the OS to give
- commands/response to SUS */
- __u8 SUS_MailboxVersion; /* Tells the OS which iteration of the
- interface SUS supports */
- __u8 OS_MailboxVersion; /* Tells SUS which iteration of the
- interface the OS supports */
- __u32 OS_Flags; /* Flags set by the OS as info for
- SUS */
- __u32 SUS_Flags; /* Flags set by SUS as info
- for the OS */
- __u32 WatchDogPeriod; /* Watchdog period (in seconds) which
- the DP uses to see if the OS
- is dead */
- __u32 WatchDogCount; /* Updated by the OS on every tic. */
- __u32 MemoryFor_SUS_ErrorLog; /* Flat 32 bit address which tells SUS
- where to stuff the SUS error log
- on a dump */
- MC_SlotInformation_t MC_SlotInfo[NUMBER_OF_MC_BUSSES*SLOTS_PER_MC_BUS];
- /* Storage for MCA POS data */
- /* All new SECOND_PASS_INTERFACE fields added from this point */
- struct ProcBoardInfo *BoardData;
- struct CPU_Info *CPU_Data;
- /* All new fields must be added from this point */
-} Voyager_KernelSUS_Mbox_t;
-
-/* structure for finding the right memory address to send a QIC CPI to */
-struct voyager_qic_cpi {
- /* Each cache line (32 bytes) can trigger a cpi. The cpi
- * read/write may occur anywhere in the cache line---pick the
- * middle to be safe */
- struct {
- __u32 pad1[3];
- __u32 cpi;
- __u32 pad2[4];
- } qic_cpi[8];
-};
-
-struct voyager_status {
- __u32 power_fail:1;
- __u32 switch_off:1;
- __u32 request_from_kernel:1;
-};
-
-struct voyager_psi_regs {
- __u8 cat_id;
- __u8 cat_dev;
- __u8 cat_control;
- __u8 subaddr;
- __u8 dummy4;
- __u8 checkbit;
- __u8 subaddr_low;
- __u8 subaddr_high;
- __u8 intstatus;
- __u8 stat1;
- __u8 stat3;
- __u8 fault;
- __u8 tms;
- __u8 gen;
- __u8 sysconf;
- __u8 dummy15;
-};
-
-struct voyager_psi_subregs {
- __u8 supply;
- __u8 mask;
- __u8 present;
- __u8 DCfail;
- __u8 ACfail;
- __u8 fail;
- __u8 UPSfail;
- __u8 genstatus;
-};
-
-struct voyager_psi {
- struct voyager_psi_regs regs;
- struct voyager_psi_subregs subregs;
-};
-
-struct voyager_SUS {
-#define VOYAGER_DUMP_BUTTON_NMI 0x1
-#define VOYAGER_SUS_VALID 0x2
-#define VOYAGER_SYSINT_COMPLETE 0x3
- __u8 SUS_mbox;
-#define VOYAGER_NO_COMMAND 0x0
-#define VOYAGER_IGNORE_DUMP 0x1
-#define VOYAGER_DO_DUMP 0x2
-#define VOYAGER_SYSINT_HANDSHAKE 0x3
-#define VOYAGER_DO_MEM_DUMP 0x4
-#define VOYAGER_SYSINT_WAS_RECOVERED 0x5
- __u8 kernel_mbox;
-#define VOYAGER_MAILBOX_VERSION 0x10
- __u8 SUS_version;
- __u8 kernel_version;
-#define VOYAGER_OS_HAS_SYSINT 0x1
-#define VOYAGER_OS_IN_PROGRESS 0x2
-#define VOYAGER_UPDATING_WDPERIOD 0x4
- __u32 kernel_flags;
-#define VOYAGER_SUS_BOOTING 0x1
-#define VOYAGER_SUS_IN_PROGRESS 0x2
- __u32 SUS_flags;
- __u32 watchdog_period;
- __u32 watchdog_count;
- __u32 SUS_errorlog;
- /* lots of system configuration stuff under here */
-};
-
-/* Variables exported by voyager_smp */
-extern __u32 voyager_extended_vic_processors;
-extern __u32 voyager_allowed_boot_processors;
-extern __u32 voyager_quad_processors;
-extern struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS];
-extern struct voyager_SUS *voyager_SUS;
-
-/* variables exported always */
-extern struct task_struct *voyager_thread;
-extern int voyager_level;
-extern struct voyager_status voyager_status;
-
-/* functions exported by the voyager and voyager_smp modules */
-extern int voyager_cat_readb(__u8 module, __u8 asic, int reg);
-extern void voyager_cat_init(void);
-extern void voyager_detect(struct voyager_bios_info *);
-extern void voyager_trap_init(void);
-extern void voyager_setup_irqs(void);
-extern int voyager_memory_detect(int region, __u32 *addr, __u32 *length);
-extern void voyager_smp_intr_init(void);
-extern __u8 voyager_extended_cmos_read(__u16 cmos_address);
-extern void voyager_smp_dump(void);
-extern void voyager_timer_interrupt(void);
-extern void smp_local_timer_interrupt(void);
-extern void voyager_power_off(void);
-extern void smp_voyager_power_off(void *dummy);
-extern void voyager_restart(void);
-extern void voyager_cat_power_off(void);
-extern void voyager_cat_do_common_interrupt(void);
-extern void voyager_handle_nmi(void);
-extern void voyager_smp_intr_init(void);
-/* Commands for the following are */
-#define VOYAGER_PSI_READ 0
-#define VOYAGER_PSI_WRITE 1
-#define VOYAGER_PSI_SUBREAD 2
-#define VOYAGER_PSI_SUBWRITE 3
-extern void voyager_cat_psi(__u8, __u16, __u8 *);
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index 1914418..1df3541 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -15,10 +15,4 @@
return raw_irqs_disabled_flags(regs->flags);
}
-static inline void xen_do_IRQ(int irq, struct pt_regs *regs)
-{
- regs->orig_ax = ~irq;
- do_IRQ(regs);
-}
-
#endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 81fbd73..d5b7e90 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -38,22 +38,30 @@
extern struct start_info *xen_start_info;
enum xen_domain_type {
- XEN_NATIVE,
- XEN_PV_DOMAIN,
- XEN_HVM_DOMAIN,
+ XEN_NATIVE, /* running on bare hardware */
+ XEN_PV_DOMAIN, /* running in a PV domain */
+ XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
};
-extern enum xen_domain_type xen_domain_type;
-
#ifdef CONFIG_XEN
-#define xen_domain() (xen_domain_type != XEN_NATIVE)
+extern enum xen_domain_type xen_domain_type;
#else
-#define xen_domain() (0)
+#define xen_domain_type XEN_NATIVE
#endif
-#define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN)
-#define xen_hvm_domain() (xen_domain() && xen_domain_type == XEN_HVM_DOMAIN)
+#define xen_domain() (xen_domain_type != XEN_NATIVE)
+#define xen_pv_domain() (xen_domain() && \
+ xen_domain_type == XEN_PV_DOMAIN)
+#define xen_hvm_domain() (xen_domain() && \
+ xen_domain_type == XEN_HVM_DOMAIN)
-#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
+#ifdef CONFIG_XEN_DOM0
+#include <xen/interface/xen.h>
+
+#define xen_initial_domain() (xen_pv_domain() && \
+ xen_start_info->flags & SIF_INITDOMAIN)
+#else /* !CONFIG_XEN_DOM0 */
+#define xen_initial_domain() (0)
+#endif /* CONFIG_XEN_DOM0 */
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d364df0..de5657c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,11 +23,12 @@
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp)
+CFLAGS_paravirt.o := $(nostackp)
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
-obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
+obj-y += setup.o i8259.o irqinit_$(BITS).o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -49,30 +50,26 @@
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
-obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
+obj-y += reboot.o
obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
-obj-$(CONFIG_X86_SMP) += smp.o
-obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
-obj-$(CONFIG_X86_32_SMP) += smpcommon.o
-obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o
+obj-$(CONFIG_SMP) += setup_percpu.o
+obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
-obj-$(CONFIG_X86_IO_APIC) += io_apic.o
+obj-y += apic/
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
-obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
-obj-$(CONFIG_X86_ES7000) += es7000_32.o
-obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
obj-y += vsmp_64.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_MODULES) += module_$(BITS).o
@@ -114,16 +111,13 @@
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
- obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
- obj-y += bios_uv.o uv_irq.o uv_sysfs.o
- obj-y += genx2apic_cluster.o
- obj-y += genx2apic_phys.o
- obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
- obj-$(CONFIG_AUDIT) += audit_64.o
+ obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o
+ obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
+ obj-$(CONFIG_AUDIT) += audit_64.o
- obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
- obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
- obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
+ obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
+ obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
+ obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
- obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
+ obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7678f10..a18eb7c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -37,15 +37,10 @@
#include <asm/pgtable.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
-#include <asm/genapic.h>
#include <asm/io.h>
#include <asm/mpspec.h>
#include <asm/smp.h>
-#ifdef CONFIG_X86_LOCAL_APIC
-# include <mach_apic.h>
-#endif
-
static int __initdata acpi_force = 0;
u32 acpi_rsdt_forced;
#ifdef CONFIG_ACPI
@@ -56,16 +51,7 @@
EXPORT_SYMBOL(acpi_disabled);
#ifdef CONFIG_X86_64
-
-#include <asm/proto.h>
-
-#else /* X86 */
-
-#ifdef CONFIG_X86_LOCAL_APIC
-#include <mach_apic.h>
-#include <mach_mpparse.h>
-#endif /* CONFIG_X86_LOCAL_APIC */
-
+# include <asm/proto.h>
#endif /* X86 */
#define BAD_MADT_ENTRY(entry, end) ( \
@@ -121,35 +107,18 @@
*/
char *__init __acpi_map_table(unsigned long phys, unsigned long size)
{
- unsigned long base, offset, mapped_size;
- int idx;
if (!phys || !size)
return NULL;
- if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
- return __va(phys);
+ return early_ioremap(phys, size);
+}
+void __init __acpi_unmap_table(char *map, unsigned long size)
+{
+ if (!map || !size)
+ return;
- offset = phys & (PAGE_SIZE - 1);
- mapped_size = PAGE_SIZE - offset;
- clear_fixmap(FIX_ACPI_END);
- set_fixmap(FIX_ACPI_END, phys);
- base = fix_to_virt(FIX_ACPI_END);
-
- /*
- * Most cases can be covered by the below.
- */
- idx = FIX_ACPI_END;
- while (mapped_size < size) {
- if (--idx < FIX_ACPI_BEGIN)
- return NULL; /* cannot handle this */
- phys += PAGE_SIZE;
- clear_fixmap(idx);
- set_fixmap(idx, phys);
- mapped_size += PAGE_SIZE;
- }
-
- return ((unsigned char *)base + offset);
+ early_iounmap(map, size);
}
#ifdef CONFIG_PCI_MMCONFIG
@@ -239,7 +208,8 @@
madt->address);
}
- acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
+ default_acpi_madt_oem_check(madt->header.oem_id,
+ madt->header.oem_table_id);
return 0;
}
@@ -884,7 +854,7 @@
DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];
-static int mp_find_ioapic(int gsi)
+int mp_find_ioapic(int gsi)
{
int i = 0;
@@ -899,6 +869,16 @@
return -1;
}
+int mp_find_ioapic_pin(int ioapic, int gsi)
+{
+ if (WARN_ON(ioapic == -1))
+ return -1;
+ if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
+ return -1;
+
+ return gsi - mp_ioapic_routing[ioapic].gsi_base;
+}
+
static u8 __init uniq_ioapic_id(u8 id)
{
#ifdef CONFIG_X86_32
@@ -912,8 +892,8 @@
DECLARE_BITMAP(used, 256);
bitmap_zero(used, 256);
for (i = 0; i < nr_ioapics; i++) {
- struct mp_config_ioapic *ia = &mp_ioapics[i];
- __set_bit(ia->mp_apicid, used);
+ struct mpc_ioapic *ia = &mp_ioapics[i];
+ __set_bit(ia->apicid, used);
}
if (!test_bit(id, used))
return id;
@@ -945,29 +925,29 @@
idx = nr_ioapics;
- mp_ioapics[idx].mp_type = MP_IOAPIC;
- mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
- mp_ioapics[idx].mp_apicaddr = address;
+ mp_ioapics[idx].type = MP_IOAPIC;
+ mp_ioapics[idx].flags = MPC_APIC_USABLE;
+ mp_ioapics[idx].apicaddr = address;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
- mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
+ mp_ioapics[idx].apicid = uniq_ioapic_id(id);
#ifdef CONFIG_X86_32
- mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
+ mp_ioapics[idx].apicver = io_apic_get_version(idx);
#else
- mp_ioapics[idx].mp_apicver = 0;
+ mp_ioapics[idx].apicver = 0;
#endif
/*
* Build basic GSI lookup table to facilitate gsi->io_apic lookups
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
- mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
+ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
mp_ioapic_routing[idx].gsi_base = gsi_base;
mp_ioapic_routing[idx].gsi_end = gsi_base +
io_apic_get_redir_entries(idx);
- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
- "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
- mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
+ printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+ "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
+ mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
nr_ioapics++;
@@ -996,19 +976,19 @@
return max_gsi + 1;
}
-static void assign_to_mp_irq(struct mp_config_intsrc *m,
- struct mp_config_intsrc *mp_irq)
+static void assign_to_mp_irq(struct mpc_intsrc *m,
+ struct mpc_intsrc *mp_irq)
{
- memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
+ memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
}
-static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
- struct mp_config_intsrc *m)
+static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
+ struct mpc_intsrc *m)
{
- return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
+ return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
}
-static void save_mp_irq(struct mp_config_intsrc *m)
+static void save_mp_irq(struct mpc_intsrc *m)
{
int i;
@@ -1026,7 +1006,7 @@
{
int ioapic;
int pin;
- struct mp_config_intsrc mp_irq;
+ struct mpc_intsrc mp_irq;
/*
* Convert 'gsi' to 'ioapic.pin'.
@@ -1034,7 +1014,7 @@
ioapic = mp_find_ioapic(gsi);
if (ioapic < 0)
return;
- pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+ pin = mp_find_ioapic_pin(ioapic, gsi);
/*
* TBD: This check is for faulty timer entries, where the override
@@ -1044,13 +1024,13 @@
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
- mp_irq.mp_type = MP_INTSRC;
- mp_irq.mp_irqtype = mp_INT;
- mp_irq.mp_irqflag = (trigger << 2) | polarity;
- mp_irq.mp_srcbus = MP_ISA_BUS;
- mp_irq.mp_srcbusirq = bus_irq; /* IRQ */
- mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */
- mp_irq.mp_dstirq = pin; /* INTIN# */
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger << 2) | polarity;
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.srcbusirq = bus_irq; /* IRQ */
+ mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
+ mp_irq.dstirq = pin; /* INTIN# */
save_mp_irq(&mp_irq);
}
@@ -1060,7 +1040,7 @@
int i;
int ioapic;
unsigned int dstapic;
- struct mp_config_intsrc mp_irq;
+ struct mpc_intsrc mp_irq;
#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
/*
@@ -1085,7 +1065,7 @@
ioapic = mp_find_ioapic(0);
if (ioapic < 0)
return;
- dstapic = mp_ioapics[ioapic].mp_apicid;
+ dstapic = mp_ioapics[ioapic].apicid;
/*
* Use the default configuration for the IRQs 0-15. Unless
@@ -1095,16 +1075,14 @@
int idx;
for (idx = 0; idx < mp_irq_entries; idx++) {
- struct mp_config_intsrc *irq = mp_irqs + idx;
+ struct mpc_intsrc *irq = mp_irqs + idx;
/* Do we already have a mapping for this ISA IRQ? */
- if (irq->mp_srcbus == MP_ISA_BUS
- && irq->mp_srcbusirq == i)
+ if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
break;
/* Do we already have a mapping for this IOAPIC pin */
- if (irq->mp_dstapic == dstapic &&
- irq->mp_dstirq == i)
+ if (irq->dstapic == dstapic && irq->dstirq == i)
break;
}
@@ -1113,13 +1091,13 @@
continue; /* IRQ already used */
}
- mp_irq.mp_type = MP_INTSRC;
- mp_irq.mp_irqflag = 0; /* Conforming */
- mp_irq.mp_srcbus = MP_ISA_BUS;
- mp_irq.mp_dstapic = dstapic;
- mp_irq.mp_irqtype = mp_INT;
- mp_irq.mp_srcbusirq = i; /* Identity mapped */
- mp_irq.mp_dstirq = i;
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqflag = 0; /* Conforming */
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.dstapic = dstapic;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.srcbusirq = i; /* Identity mapped */
+ mp_irq.dstirq = i;
save_mp_irq(&mp_irq);
}
@@ -1156,7 +1134,7 @@
return gsi;
}
- ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+ ioapic_pin = mp_find_ioapic_pin(ioapic, gsi);
#ifdef CONFIG_X86_32
if (ioapic_renumber_irq)
@@ -1230,22 +1208,22 @@
u32 gsi, int triggering, int polarity)
{
#ifdef CONFIG_X86_MPPARSE
- struct mp_config_intsrc mp_irq;
+ struct mpc_intsrc mp_irq;
int ioapic;
if (!acpi_ioapic)
return 0;
/* print the entry should happen on mptable identically */
- mp_irq.mp_type = MP_INTSRC;
- mp_irq.mp_irqtype = mp_INT;
- mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
- mp_irq.mp_srcbus = number;
- mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+ mp_irq.srcbus = number;
+ mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
ioapic = mp_find_ioapic(gsi);
- mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id;
- mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
+ mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
+ mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
save_mp_irq(&mp_irq);
#endif
@@ -1372,7 +1350,7 @@
if (!error) {
acpi_lapic = 1;
-#ifdef CONFIG_X86_GENERICARCH
+#ifdef CONFIG_X86_BIGSMP
generic_bigsmp_probe();
#endif
/*
@@ -1384,9 +1362,8 @@
acpi_ioapic = 1;
smp_found_config = 1;
-#ifdef CONFIG_X86_32
- setup_apic_routing();
-#endif
+ if (apic->setup_apic_routing)
+ apic->setup_apic_routing();
}
}
if (error == -EINVAL) {
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index 3355973..580b4e2 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -3,8 +3,8 @@
*/
#include <asm/segment.h>
#include <asm/msr-index.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
+#include <asm/page_types.h>
+#include <asm/pgtable_types.h>
#include <asm/processor-flags.h>
.code16
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index a60c1f3..7c243a2 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,7 @@
stack_start.sp = temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
(unsigned long)get_cpu_gdt_table(smp_processor_id());
+ initial_gs = per_cpu_offset(smp_processor_id());
#endif
initial_code = (unsigned long)wakeup_long64;
saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index a12e6a9..8ded418 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,7 +1,7 @@
.section .text.page_aligned
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 96258d9..8ea5164 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -1,8 +1,8 @@
.text
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
+#include <asm/pgtable_types.h>
+#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/asm-offsets.h>
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a84ac7b..6907b8e 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -498,12 +498,12 @@
*/
void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
{
- unsigned long flags;
char *vaddr;
int nr_pages = 2;
struct page *pages[2];
int i;
+ might_sleep();
if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -517,9 +517,9 @@
nr_pages = 1;
vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
BUG_ON(!vaddr);
- local_irq_save(flags);
+ local_irq_disable();
memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
- local_irq_restore(flags);
+ local_irq_enable();
vunmap(vaddr);
sync_core();
/* Could also do a CLFLUSH here to speed up CPU recovery; but
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
new file mode 100644
index 0000000..da7b7b9
--- /dev/null
+++ b/arch/x86/kernel/apic/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for local APIC drivers and for the IO-APIC code
+#
+
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o
+obj-$(CONFIG_X86_IO_APIC) += io_apic.o
+obj-$(CONFIG_SMP) += ipi.o
+
+ifeq ($(CONFIG_X86_64),y)
+obj-y += apic_flat_64.o
+obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
+obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
+obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
+endif
+
+obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
+obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
+obj-$(CONFIG_X86_ES7000) += es7000_32.o
+obj-$(CONFIG_X86_SUMMIT) += summit_32.o
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic/apic.c
similarity index 93%
rename from arch/x86/kernel/apic.c
rename to arch/x86/kernel/apic/apic.c
index 570f36e..4732768 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1,7 +1,7 @@
/*
* Local APIC handling, local APIC timers
*
- * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
*
* Fixes
* Maciej W. Rozycki : Bits for genuine 82489DX APICs;
@@ -14,51 +14,70 @@
* Mikael Pettersson : PM converted to driver model.
*/
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/ioport.h>
-#include <linux/cpu.h>
-#include <linux/clockchips.h>
+#include <linux/mc146818rtc.h>
#include <linux/acpi_pmtmr.h>
-#include <linux/module.h>
-#include <linux/dmi.h>
-#include <linux/dmar.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/bootmem.h>
#include <linux/ftrace.h>
-#include <linux/smp.h>
-#include <linux/nmi.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/delay.h>
#include <linux/timex.h>
+#include <linux/dmar.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/dmi.h>
+#include <linux/nmi.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
-#include <asm/atomic.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-#include <asm/hpet.h>
+#include <asm/perf_counter.h>
#include <asm/pgalloc.h>
+#include <asm/atomic.h>
+#include <asm/mpspec.h>
#include <asm/i8253.h>
-#include <asm/idle.h>
+#include <asm/i8259.h>
#include <asm/proto.h>
#include <asm/apic.h>
-#include <asm/i8259.h>
+#include <asm/desc.h>
+#include <asm/hpet.h>
+#include <asm/idle.h>
+#include <asm/mtrr.h>
#include <asm/smp.h>
-#include <mach_apic.h>
-#include <mach_apicdef.h>
-#include <mach_ipi.h>
+unsigned int num_processors;
+
+unsigned disabled_cpus __cpuinitdata;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
/*
- * Sanity check
+ * The highest APIC ID seen during enumeration.
+ *
+ * This determines the messaging protocol we can use: if all APIC IDs
+ * are in the 0 ... 7 range, then we can use logical addressing which
+ * has some performance advantages (better broadcasting).
+ *
+ * If there's an APIC ID above 8, we use physical addressing.
*/
-#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
-# error SPURIOUS_APIC_VECTOR definition error
-#endif
+unsigned int max_physical_apicid;
+
+/*
+ * Bitmask of physically existing CPUs:
+ */
+physid_mask_t phys_cpu_present_map;
+
+/*
+ * Map cpu index to physical APIC ID
+ */
+DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#ifdef CONFIG_X86_32
/*
@@ -92,11 +111,7 @@
__setup("apicpmtimer", setup_apicpmtimer);
#endif
-#ifdef CONFIG_X86_64
-#define HAVE_X2APIC
-#endif
-
-#ifdef HAVE_X2APIC
+#ifdef CONFIG_X86_X2APIC
int x2apic;
/* x2apic enabled before OS handover */
static int x2apic_preenabled;
@@ -194,18 +209,13 @@
return lapic_get_version() >= 0x14;
}
-/*
- * Paravirt kernels also might be using these below ops. So we still
- * use generic apic_read()/apic_write(), which might be pointing to different
- * ops in PARAVIRT case.
- */
-void xapic_wait_icr_idle(void)
+void native_apic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
-u32 safe_xapic_wait_icr_idle(void)
+u32 native_safe_apic_wait_icr_idle(void)
{
u32 send_status;
int timeout;
@@ -221,13 +231,13 @@
return send_status;
}
-void xapic_icr_write(u32 low, u32 id)
+void native_apic_icr_write(u32 low, u32 id)
{
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
apic_write(APIC_ICR, low);
}
-static u64 xapic_icr_read(void)
+u64 native_apic_icr_read(void)
{
u32 icr1, icr2;
@@ -237,54 +247,6 @@
return icr1 | ((u64)icr2 << 32);
}
-static struct apic_ops xapic_ops = {
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .icr_read = xapic_icr_read,
- .icr_write = xapic_icr_write,
- .wait_icr_idle = xapic_wait_icr_idle,
- .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
-};
-
-struct apic_ops __read_mostly *apic_ops = &xapic_ops;
-EXPORT_SYMBOL_GPL(apic_ops);
-
-#ifdef HAVE_X2APIC
-static void x2apic_wait_icr_idle(void)
-{
- /* no need to wait for icr idle in x2apic */
- return;
-}
-
-static u32 safe_x2apic_wait_icr_idle(void)
-{
- /* no need to wait for icr idle in x2apic */
- return 0;
-}
-
-void x2apic_icr_write(u32 low, u32 id)
-{
- wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
-}
-
-static u64 x2apic_icr_read(void)
-{
- unsigned long val;
-
- rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
- return val;
-}
-
-static struct apic_ops x2apic_ops = {
- .read = native_apic_msr_read,
- .write = native_apic_msr_write,
- .icr_read = x2apic_icr_read,
- .icr_write = x2apic_icr_write,
- .wait_icr_idle = x2apic_wait_icr_idle,
- .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
-};
-#endif
-
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
@@ -457,7 +419,7 @@
static void lapic_timer_broadcast(const struct cpumask *mask)
{
#ifdef CONFIG_SMP
- send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+ apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
#endif
}
@@ -535,7 +497,8 @@
}
}
-static int __init calibrate_by_pmtimer(long deltapm, long *delta)
+static int __init
+calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
{
const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
const long pm_thresh = pm_100ms / 100;
@@ -546,7 +509,7 @@
return -1;
#endif
- apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+ apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
/* Check, if the PM timer is available */
if (!deltapm)
@@ -556,19 +519,30 @@
if (deltapm > (pm_100ms - pm_thresh) &&
deltapm < (pm_100ms + pm_thresh)) {
- apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
- } else {
- res = (((u64)deltapm) * mult) >> 22;
- do_div(res, 1000000);
- pr_warning("APIC calibration not consistent "
- "with PM Timer: %ldms instead of 100ms\n",
- (long)res);
- /* Correct the lapic counter value */
- res = (((u64)(*delta)) * pm_100ms);
+ apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
+ return 0;
+ }
+
+ res = (((u64)deltapm) * mult) >> 22;
+ do_div(res, 1000000);
+ pr_warning("APIC calibration not consistent "
+ "with PM-Timer: %ldms instead of 100ms\n",(long)res);
+
+ /* Correct the lapic counter value */
+ res = (((u64)(*delta)) * pm_100ms);
+ do_div(res, deltapm);
+ pr_info("APIC delta adjusted to PM-Timer: "
+ "%lu (%ld)\n", (unsigned long)res, *delta);
+ *delta = (long)res;
+
+ /* Correct the tsc counter value */
+ if (cpu_has_tsc) {
+ res = (((u64)(*deltatsc)) * pm_100ms);
do_div(res, deltapm);
- pr_info("APIC delta adjusted to PM-Timer: "
- "%lu (%ld)\n", (unsigned long)res, *delta);
- *delta = (long)res;
+ apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
+ "PM-Timer: %lu (%ld) \n",
+ (unsigned long)res, *deltatsc);
+ *deltatsc = (long)res;
}
return 0;
@@ -579,7 +553,7 @@
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
void (*real_handler)(struct clock_event_device *dev);
unsigned long deltaj;
- long delta;
+ long delta, deltatsc;
int pm_referenced = 0;
local_irq_disable();
@@ -609,9 +583,11 @@
delta = lapic_cal_t1 - lapic_cal_t2;
apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+ deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
+
/* we trust the PM based calibration if possible */
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
- &delta);
+ &delta, &deltatsc);
/* Calculate the scaled math multiplication factor */
lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -629,11 +605,10 @@
calibration_result);
if (cpu_has_tsc) {
- delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
"%ld.%04ld MHz.\n",
- (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
- (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+ (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
+ (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
}
apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
@@ -780,6 +755,8 @@
inc_irq_stat(apic_timer_irqs);
evt->event_handler(evt);
+
+ perf_counter_unthrottle();
}
/*
@@ -991,11 +968,11 @@
*/
reg0 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
- apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+ apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
reg1 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
apic_write(APIC_ID, reg0);
- if (reg1 != (reg0 ^ APIC_ID_MASK))
+ if (reg1 != (reg0 ^ apic->apic_id_mask))
return 0;
/*
@@ -1089,7 +1066,7 @@
return;
}
- if (esr_disable) {
+ if (apic->disable_esr) {
/*
* Something untraceable is creating bad interrupts on
* secondary quads ... for the moment, just leave the
@@ -1130,15 +1107,21 @@
unsigned int value;
int i, j;
+ if (disable_apic) {
+ arch_disable_smp_support();
+ return;
+ }
+
#ifdef CONFIG_X86_32
/* Pound the ESR really hard over the head with a big hammer - mbligh */
- if (lapic_is_integrated() && esr_disable) {
+ if (lapic_is_integrated() && apic->disable_esr) {
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
}
#endif
+ perf_counters_lapic_init(0);
preempt_disable();
@@ -1146,7 +1129,7 @@
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
*/
- if (!apic_id_registered())
+ if (!apic->apic_id_registered())
BUG();
/*
@@ -1154,7 +1137,7 @@
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
- init_apic_ldr();
+ apic->init_apic_ldr();
/*
* Set Task Priority to 'accept all'. We never change this
@@ -1282,17 +1265,12 @@
apic_pm_activate();
}
-#ifdef HAVE_X2APIC
+#ifdef CONFIG_X86_X2APIC
void check_x2apic(void)
{
- int msr, msr2;
-
- rdmsr(MSR_IA32_APICBASE, msr, msr2);
-
- if (msr & X2APIC_ENABLE) {
+ if (x2apic_enabled()) {
pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
x2apic_preenabled = x2apic = 1;
- apic_ops = &x2apic_ops;
}
}
@@ -1300,6 +1278,9 @@
{
int msr, msr2;
+ if (!x2apic)
+ return;
+
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (!(msr & X2APIC_ENABLE)) {
pr_info("Enabling x2apic\n");
@@ -1363,7 +1344,6 @@
if (!x2apic) {
x2apic = 1;
- apic_ops = &x2apic_ops;
enable_x2apic();
}
@@ -1401,7 +1381,7 @@
return;
}
-#endif /* HAVE_X2APIC */
+#endif /* CONFIG_X86_X2APIC */
#ifdef CONFIG_X86_64
/*
@@ -1532,7 +1512,7 @@
*/
void __init init_apic_mappings(void)
{
-#ifdef HAVE_X2APIC
+#ifdef CONFIG_X86_X2APIC
if (x2apic) {
boot_cpu_physical_apicid = read_apic_id();
return;
@@ -1570,11 +1550,11 @@
int __init APIC_init_uniprocessor(void)
{
-#ifdef CONFIG_X86_64
if (disable_apic) {
pr_info("Apic disabled\n");
return -1;
}
+#ifdef CONFIG_X86_64
if (!cpu_has_apic) {
disable_apic = 1;
pr_info("Apic disabled by BIOS\n");
@@ -1596,11 +1576,9 @@
}
#endif
-#ifdef HAVE_X2APIC
enable_IR_x2apic();
-#endif
#ifdef CONFIG_X86_64
- setup_apic_routing();
+ default_setup_apic_routing();
#endif
verify_local_APIC();
@@ -1621,35 +1599,31 @@
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
setup_local_APIC();
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_IO_APIC
/*
* Now enable IO-APICs, actually call clear_IO_APIC
- * We need clear_IO_APIC before enabling vector on BP
+ * We need clear_IO_APIC before enabling error vector
*/
if (!skip_ioapic_setup && nr_ioapics)
enable_IO_APIC();
#endif
-#ifdef CONFIG_X86_IO_APIC
- if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
-#endif
- localise_nmi_watchdog();
end_local_APIC_setup();
#ifdef CONFIG_X86_IO_APIC
if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
-# ifdef CONFIG_X86_64
- else
+ else {
nr_ioapics = 0;
-# endif
+ localise_nmi_watchdog();
+ }
+#else
+ localise_nmi_watchdog();
#endif
-#ifdef CONFIG_X86_64
- setup_boot_APIC_clock();
- check_nmi_watchdog();
-#else
setup_boot_clock();
+#ifdef CONFIG_X86_64
+ check_nmi_watchdog();
#endif
return 0;
@@ -1738,7 +1712,8 @@
outb(0x01, 0x23);
}
#endif
- enable_apic_mode();
+ if (apic->enable_apic_mode)
+ apic->enable_apic_mode();
}
/**
@@ -1876,29 +1851,39 @@
}
#endif
-#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
- /* are we being called early in kernel startup? */
- if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
- u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
- u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-
- cpu_to_apicid[cpu] = apicid;
- bios_cpu_apicid[cpu] = apicid;
- } else {
- per_cpu(x86_cpu_to_apicid, cpu) = apicid;
- per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
- }
+#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
+ early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+ early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
#endif
set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
-#ifdef CONFIG_X86_64
int hard_smp_processor_id(void)
{
return read_apic_id();
}
+
+void default_init_apic_ldr(void)
+{
+ unsigned long val;
+
+ apic_write(APIC_DFR, APIC_DFR_VALUE);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+ apic_write(APIC_LDR, val);
+}
+
+#ifdef CONFIG_X86_32
+int default_apicid_to_node(int logical_apicid)
+{
+#ifdef CONFIG_SMP
+ return apicid_2_node[hard_smp_processor_id()];
+#else
+ return 0;
+#endif
+}
#endif
/*
@@ -1976,7 +1961,7 @@
local_irq_save(flags);
-#ifdef HAVE_X2APIC
+#ifdef CONFIG_X86_X2APIC
if (x2apic)
enable_x2apic();
else
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
new file mode 100644
index 0000000..3b00299
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Flat APIC subarch code.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+
+static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return 1;
+}
+
+static const struct cpumask *flat_target_cpus(void)
+{
+ return cpu_online_mask;
+}
+
+static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+static void flat_init_apic_ldr(void)
+{
+ unsigned long val;
+ unsigned long num, id;
+
+ num = smp_processor_id();
+ id = 1UL << num;
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(id);
+ apic_write(APIC_LDR, val);
+}
+
+static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+ local_irq_restore(flags);
+}
+
+static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+
+ _flat_send_IPI_mask(mask, vector);
+}
+
+static void
+ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+ int cpu = smp_processor_id();
+
+ if (cpu < BITS_PER_LONG)
+ clear_bit(cpu, &mask);
+
+ _flat_send_IPI_mask(mask, vector);
+}
+
+static void flat_send_IPI_allbutself(int vector)
+{
+ int cpu = smp_processor_id();
+#ifdef CONFIG_HOTPLUG_CPU
+ int hotplug = 1;
+#else
+ int hotplug = 0;
+#endif
+ if (hotplug || vector == NMI_VECTOR) {
+ if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+ unsigned long mask = cpumask_bits(cpu_online_mask)[0];
+
+ if (cpu < BITS_PER_LONG)
+ clear_bit(cpu, &mask);
+
+ _flat_send_IPI_mask(mask, vector);
+ }
+ } else if (num_online_cpus() > 1) {
+ __default_send_IPI_shortcut(APIC_DEST_ALLBUT,
+ vector, apic->dest_logical);
+ }
+}
+
+static void flat_send_IPI_all(int vector)
+{
+ if (vector == NMI_VECTOR) {
+ flat_send_IPI_mask(cpu_online_mask, vector);
+ } else {
+ __default_send_IPI_shortcut(APIC_DEST_ALLINC,
+ vector, apic->dest_logical);
+ }
+}
+
+static unsigned int flat_get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ id = (((x)>>24) & 0xFFu);
+
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = ((id & 0xFFu)<<24);
+ return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+ unsigned int id;
+
+ id = flat_get_apic_id(apic_read(APIC_ID));
+ return id;
+}
+
+static int flat_apic_id_registered(void)
+{
+ return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+ unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
+
+ return mask1 & mask2;
+}
+
+static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+ return hard_smp_processor_id() >> index_msb;
+}
+
+struct apic apic_flat = {
+ .name = "flat",
+ .probe = NULL,
+ .acpi_madt_oem_check = flat_acpi_madt_oem_check,
+ .apic_id_registered = flat_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ .irq_dest_mode = 1, /* logical */
+
+ .target_cpus = flat_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = flat_vector_allocation_domain,
+ .init_apic_ldr = flat_init_apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .apicid_to_node = NULL,
+ .cpu_to_logical_apicid = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = flat_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = flat_get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xFFu << 24,
+
+ .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = flat_send_IPI_mask,
+ .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = flat_send_IPI_allbutself,
+ .send_IPI_all = flat_send_IPI_all,
+ .send_IPI_self = apic_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
+
+/*
+ * Physflat mode is used when there are more than 8 CPUs on a AMD system.
+ * We cannot use logical delivery in this case because the mask
+ * overflows, so use physical mode.
+ */
+static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+#ifdef CONFIG_ACPI
+ /*
+ * Quirk: some x86_64 machines can only use physical APIC mode
+ * regardless of how many processors are present (x86_64 ES7000
+ * is an example).
+ */
+ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+ printk(KERN_DEBUG "system APIC only can use physical flat");
+ return 1;
+ }
+#endif
+
+ return 0;
+}
+
+static const struct cpumask *physflat_target_cpus(void)
+{
+ return cpu_online_mask;
+}
+
+static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+ default_send_IPI_mask_sequence_phys(cpumask, vector);
+}
+
+static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+ int vector)
+{
+ default_send_IPI_mask_allbutself_phys(cpumask, vector);
+}
+
+static void physflat_send_IPI_allbutself(int vector)
+{
+ default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+}
+
+static void physflat_send_IPI_all(int vector)
+{
+ physflat_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = cpumask_first(cpumask);
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int
+physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+
+ return BAD_APICID;
+}
+
+struct apic apic_physflat = {
+
+ .name = "physical flat",
+ .probe = NULL,
+ .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
+ .apic_id_registered = flat_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ .irq_dest_mode = 0, /* physical */
+
+ .target_cpus = physflat_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = 0,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = physflat_vector_allocation_domain,
+ /* not needed, but shouldn't hurt: */
+ .init_apic_ldr = flat_init_apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .apicid_to_node = NULL,
+ .cpu_to_logical_apicid = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = flat_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = flat_get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xFFu << 24,
+
+ .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = physflat_send_IPI_mask,
+ .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = physflat_send_IPI_allbutself,
+ .send_IPI_all = physflat_send_IPI_all,
+ .send_IPI_self = apic_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
new file mode 100644
index 0000000..0b10933
--- /dev/null
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -0,0 +1,274 @@
+/*
+ * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
+ *
+ * Drives the local APIC in "clustered mode".
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <linux/smp.h>
+
+#include <asm/apicdef.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+static inline unsigned bigsmp_get_apic_id(unsigned long x)
+{
+ return (x >> 24) & 0xFF;
+}
+
+static inline int bigsmp_apic_id_registered(void)
+{
+ return 1;
+}
+
+static inline const cpumask_t *bigsmp_target_cpus(void)
+{
+#ifdef CONFIG_SMP
+ return &cpu_online_map;
+#else
+ return &cpumask_of_cpu(0);
+#endif
+}
+
+static inline unsigned long
+bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+ return 0;
+}
+
+static inline unsigned long bigsmp_check_apicid_present(int bit)
+{
+ return 1;
+}
+
+static inline unsigned long calculate_ldr(int cpu)
+{
+ unsigned long val, id;
+
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ id = per_cpu(x86_bios_cpu_apicid, cpu);
+ val |= SET_APIC_LOGICAL_ID(id);
+
+ return val;
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+static inline void bigsmp_init_apic_ldr(void)
+{
+ unsigned long val;
+ int cpu = smp_processor_id();
+
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
+ val = calculate_ldr(cpu);
+ apic_write(APIC_LDR, val);
+}
+
+static inline void bigsmp_setup_apic_routing(void)
+{
+ printk(KERN_INFO
+ "Enabling APIC mode: Physflat. Using %d I/O APICs\n",
+ nr_ioapics);
+}
+
+static inline int bigsmp_apicid_to_node(int logical_apicid)
+{
+ return apicid_2_node[hard_smp_processor_id()];
+}
+
+static inline int bigsmp_cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < nr_cpu_ids)
+ return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
+
+ return BAD_APICID;
+}
+
+static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
+{
+ return physid_mask_of_physid(phys_apicid);
+}
+
+/* Mapping from cpu number to logical apicid */
+static inline int bigsmp_cpu_to_logical_apicid(int cpu)
+{
+ if (cpu >= nr_cpu_ids)
+ return BAD_APICID;
+ return cpu_physical_id(cpu);
+}
+
+static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
+{
+ /* For clustered we don't have a good way to do this yet - hack */
+ return physids_promote(0xFFL);
+}
+
+static inline void bigsmp_setup_portio_remap(void)
+{
+}
+
+static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
+{
+ return 1;
+}
+
+/* As we are using single CPU as destination, pick only one CPU here */
+static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
+{
+ return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask));
+}
+
+static inline unsigned int
+bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+ if (cpu < nr_cpu_ids)
+ return bigsmp_cpu_to_logical_apicid(cpu);
+
+ return BAD_APICID;
+}
+
+static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ default_send_IPI_mask_sequence_phys(mask, vector);
+}
+
+static inline void bigsmp_send_IPI_allbutself(int vector)
+{
+ default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+}
+
+static inline void bigsmp_send_IPI_all(int vector)
+{
+ bigsmp_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int dmi_bigsmp; /* can be set by dmi scanners */
+
+static int hp_ht_bigsmp(const struct dmi_system_id *d)
+{
+ printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
+ dmi_bigsmp = 1;
+
+ return 0;
+}
+
+
+static const struct dmi_system_id bigsmp_dmi_table[] = {
+ { hp_ht_bigsmp, "HP ProLiant DL760 G2",
+ { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+ DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
+ }
+ },
+
+ { hp_ht_bigsmp, "HP ProLiant DL740",
+ { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+ DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
+ }
+ },
+ { } /* NULL entry stops DMI scanning */
+};
+
+static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask)
+{
+ cpus_clear(*retmask);
+ cpu_set(cpu, *retmask);
+}
+
+static int probe_bigsmp(void)
+{
+ if (def_to_bigsmp)
+ dmi_bigsmp = 1;
+ else
+ dmi_check_system(bigsmp_dmi_table);
+
+ return dmi_bigsmp;
+}
+
+struct apic apic_bigsmp = {
+
+ .name = "bigsmp",
+ .probe = probe_bigsmp,
+ .acpi_madt_oem_check = NULL,
+ .apic_id_registered = bigsmp_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ /* phys delivery to target CPU: */
+ .irq_dest_mode = 0,
+
+ .target_cpus = bigsmp_target_cpus,
+ .disable_esr = 1,
+ .dest_logical = 0,
+ .check_apicid_used = bigsmp_check_apicid_used,
+ .check_apicid_present = bigsmp_check_apicid_present,
+
+ .vector_allocation_domain = bigsmp_vector_allocation_domain,
+ .init_apic_ldr = bigsmp_init_apic_ldr,
+
+ .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
+ .setup_apic_routing = bigsmp_setup_apic_routing,
+ .multi_timer_check = NULL,
+ .apicid_to_node = bigsmp_apicid_to_node,
+ .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
+ .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
+ .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = bigsmp_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = bigsmp_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = bigsmp_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0xFF << 24,
+
+ .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = bigsmp_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = bigsmp_send_IPI_allbutself,
+ .send_IPI_all = bigsmp_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+ .wait_for_init_deassert = default_wait_for_init_deassert,
+
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
new file mode 100644
index 0000000..320f2d2
--- /dev/null
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -0,0 +1,757 @@
+/*
+ * Written by: Garry Forsgren, Unisys Corporation
+ * Natalie Protasevich, Unisys Corporation
+ *
+ * This file contains the code to configure and interface
+ * with Unisys ES7000 series hardware system manager.
+ *
+ * Copyright (c) 2003 Unisys Corporation.
+ * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
+ *
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Unisys Corporation, Township Line & Union Meeting
+ * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
+ *
+ * http://www.unisys.com
+ */
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/nmi.h>
+#include <linux/smp.h>
+#include <linux/io.h>
+
+#include <asm/apicdef.h>
+#include <asm/atomic.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+/*
+ * ES7000 chipsets
+ */
+
+#define NON_UNISYS 0
+#define ES7000_CLASSIC 1
+#define ES7000_ZORRO 2
+
+#define MIP_REG 1
+#define MIP_PSAI_REG 4
+
+#define MIP_BUSY 1
+#define MIP_SPIN 0xf0000
+#define MIP_VALID 0x0100000000000000ULL
+#define MIP_SW_APIC 0x1020b
+
+#define MIP_PORT(val) ((val >> 32) & 0xffff)
+
+#define MIP_RD_LO(val) (val & 0xffffffff)
+
+struct mip_reg {
+ unsigned long long off_0x00;
+ unsigned long long off_0x08;
+ unsigned long long off_0x10;
+ unsigned long long off_0x18;
+ unsigned long long off_0x20;
+ unsigned long long off_0x28;
+ unsigned long long off_0x30;
+ unsigned long long off_0x38;
+};
+
+struct mip_reg_info {
+ unsigned long long mip_info;
+ unsigned long long delivery_info;
+ unsigned long long host_reg;
+ unsigned long long mip_reg;
+};
+
+struct psai {
+ unsigned long long entry_type;
+ unsigned long long addr;
+ unsigned long long bep_addr;
+};
+
+#ifdef CONFIG_ACPI
+
+struct es7000_oem_table {
+ struct acpi_table_header Header;
+ u32 OEMTableAddr;
+ u32 OEMTableSize;
+};
+
+static unsigned long oem_addrX;
+static unsigned long oem_size;
+
+#endif
+
+/*
+ * ES7000 Globals
+ */
+
+static volatile unsigned long *psai;
+static struct mip_reg *mip_reg;
+static struct mip_reg *host_reg;
+static int mip_port;
+static unsigned long mip_addr;
+static unsigned long host_addr;
+
+int es7000_plat;
+
+/*
+ * GSI override for ES7000 platforms.
+ */
+
+static unsigned int base;
+
+static int
+es7000_rename_gsi(int ioapic, int gsi)
+{
+ if (es7000_plat == ES7000_ZORRO)
+ return gsi;
+
+ if (!base) {
+ int i;
+ for (i = 0; i < nr_ioapics; i++)
+ base += nr_ioapic_registers[i];
+ }
+
+ if (!ioapic && (gsi < 16))
+ gsi += base;
+
+ return gsi;
+}
+
+static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+{
+ unsigned long vect = 0, psaival = 0;
+
+ if (psai == NULL)
+ return -1;
+
+ vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+ psaival = (0x1000000 | vect | cpu);
+
+ while (*psai & 0x1000000)
+ ;
+
+ *psai = psaival;
+
+ return 0;
+}
+
+static int __init es7000_update_apic(void)
+{
+ apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+
+ /* MPENTIUMIII */
+ if (boot_cpu_data.x86 == 6 &&
+ (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
+ es7000_update_apic_to_cluster();
+ apic->wait_for_init_deassert = NULL;
+ apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+ }
+
+ return 0;
+}
+
+static void __init setup_unisys(void)
+{
+ /*
+ * Determine the generation of the ES7000 currently running.
+ *
+ * es7000_plat = 1 if the machine is a 5xx ES7000 box
+ * es7000_plat = 2 if the machine is a x86_64 ES7000 box
+ *
+ */
+ if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
+ es7000_plat = ES7000_ZORRO;
+ else
+ es7000_plat = ES7000_CLASSIC;
+ ioapic_renumber_irq = es7000_rename_gsi;
+
+ x86_quirks->update_apic = es7000_update_apic;
+}
+
+/*
+ * Parse the OEM Table:
+ */
+static int __init parse_unisys_oem(char *oemptr)
+{
+ int i;
+ int success = 0;
+ unsigned char type, size;
+ unsigned long val;
+ char *tp = NULL;
+ struct psai *psaip = NULL;
+ struct mip_reg_info *mi;
+ struct mip_reg *host, *mip;
+
+ tp = oemptr;
+
+ tp += 8;
+
+ for (i = 0; i <= 6; i++) {
+ type = *tp++;
+ size = *tp++;
+ tp -= 2;
+ switch (type) {
+ case MIP_REG:
+ mi = (struct mip_reg_info *)tp;
+ val = MIP_RD_LO(mi->host_reg);
+ host_addr = val;
+ host = (struct mip_reg *)val;
+ host_reg = __va(host);
+ val = MIP_RD_LO(mi->mip_reg);
+ mip_port = MIP_PORT(mi->mip_info);
+ mip_addr = val;
+ mip = (struct mip_reg *)val;
+ mip_reg = __va(mip);
+ pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
+ (unsigned long)host_reg);
+ pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
+ (unsigned long)mip_reg);
+ success++;
+ break;
+ case MIP_PSAI_REG:
+ psaip = (struct psai *)tp;
+ if (tp != NULL) {
+ if (psaip->addr)
+ psai = __va(psaip->addr);
+ else
+ psai = NULL;
+ success++;
+ }
+ break;
+ default:
+ break;
+ }
+ tp += size;
+ }
+
+ if (success < 2)
+ es7000_plat = NON_UNISYS;
+ else
+ setup_unisys();
+
+ return es7000_plat;
+}
+
+#ifdef CONFIG_ACPI
+static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
+{
+ struct acpi_table_header *header = NULL;
+ struct es7000_oem_table *table;
+ acpi_size tbl_size;
+ acpi_status ret;
+ int i = 0;
+
+ for (;;) {
+ ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size);
+ if (!ACPI_SUCCESS(ret))
+ return -1;
+
+ if (!memcmp((char *) &header->oem_id, "UNISYS", 6))
+ break;
+
+ early_acpi_os_unmap_memory(header, tbl_size);
+ }
+
+ table = (void *)header;
+
+ oem_addrX = table->OEMTableAddr;
+ oem_size = table->OEMTableSize;
+
+ early_acpi_os_unmap_memory(header, tbl_size);
+
+ *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size);
+
+ return 0;
+}
+
+static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+{
+ if (!oem_addr)
+ return;
+
+ __acpi_unmap_table((char *)oem_addr, oem_size);
+}
+
+static int es7000_check_dsdt(void)
+{
+ struct acpi_table_header header;
+
+ if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
+ !strncmp(header.oem_id, "UNISYS", 6))
+ return 1;
+ return 0;
+}
+
+/* Hook from generic ACPI tables.c */
+static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ unsigned long oem_addr = 0;
+ int check_dsdt;
+ int ret = 0;
+
+ /* check dsdt at first to avoid clear fix_map for oem_addr */
+ check_dsdt = es7000_check_dsdt();
+
+ if (!find_unisys_acpi_oem_table(&oem_addr)) {
+ if (check_dsdt) {
+ ret = parse_unisys_oem((char *)oem_addr);
+ } else {
+ setup_unisys();
+ ret = 1;
+ }
+ /*
+ * we need to unmap it
+ */
+ unmap_unisys_acpi_oem_table(oem_addr);
+ }
+ return ret;
+}
+#else /* !CONFIG_ACPI: */
+static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return 0;
+}
+#endif /* !CONFIG_ACPI */
+
+static void es7000_spin(int n)
+{
+ int i = 0;
+
+ while (i++ < n)
+ rep_nop();
+}
+
+static int __init
+es7000_mip_write(struct mip_reg *mip_reg)
+{
+ int status = 0;
+ int spin;
+
+ spin = MIP_SPIN;
+ while ((host_reg->off_0x38 & MIP_VALID) != 0) {
+ if (--spin <= 0) {
+ WARN(1, "Timeout waiting for Host Valid Flag\n");
+ return -1;
+ }
+ es7000_spin(MIP_SPIN);
+ }
+
+ memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
+ outb(1, mip_port);
+
+ spin = MIP_SPIN;
+
+ while ((mip_reg->off_0x38 & MIP_VALID) == 0) {
+ if (--spin <= 0) {
+ WARN(1, "Timeout waiting for MIP Valid Flag\n");
+ return -1;
+ }
+ es7000_spin(MIP_SPIN);
+ }
+
+ status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48;
+ mip_reg->off_0x38 &= ~MIP_VALID;
+
+ return status;
+}
+
+static void __init es7000_enable_apic_mode(void)
+{
+ struct mip_reg es7000_mip_reg;
+ int mip_status;
+
+ if (!es7000_plat)
+ return;
+
+ printk(KERN_INFO "ES7000: Enabling APIC mode.\n");
+ memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
+ es7000_mip_reg.off_0x00 = MIP_SW_APIC;
+ es7000_mip_reg.off_0x38 = MIP_VALID;
+
+ while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
+ WARN(1, "Command failed, status = %x\n", mip_status);
+}
+
+static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
+{
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+}
+
+
+static void es7000_wait_for_init_deassert(atomic_t *deassert)
+{
+#ifndef CONFIG_ES7000_CLUSTERED_APIC
+ while (!atomic_read(deassert))
+ cpu_relax();
+#endif
+ return;
+}
+
+static unsigned int es7000_get_apic_id(unsigned long x)
+{
+ return (x >> 24) & 0xFF;
+}
+
+static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ default_send_IPI_mask_sequence_phys(mask, vector);
+}
+
+static void es7000_send_IPI_allbutself(int vector)
+{
+ default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+}
+
+static void es7000_send_IPI_all(int vector)
+{
+ es7000_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int es7000_apic_id_registered(void)
+{
+ return 1;
+}
+
+static const cpumask_t *target_cpus_cluster(void)
+{
+ return &CPU_MASK_ALL;
+}
+
+static const cpumask_t *es7000_target_cpus(void)
+{
+ return &cpumask_of_cpu(smp_processor_id());
+}
+
+static unsigned long
+es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+ return 0;
+}
+static unsigned long es7000_check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+static unsigned long calculate_ldr(int cpu)
+{
+ unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
+
+ return SET_APIC_LOGICAL_ID(id);
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LdR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+static void es7000_init_apic_ldr_cluster(void)
+{
+ unsigned long val;
+ int cpu = smp_processor_id();
+
+ apic_write(APIC_DFR, APIC_DFR_CLUSTER);
+ val = calculate_ldr(cpu);
+ apic_write(APIC_LDR, val);
+}
+
+static void es7000_init_apic_ldr(void)
+{
+ unsigned long val;
+ int cpu = smp_processor_id();
+
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
+ val = calculate_ldr(cpu);
+ apic_write(APIC_LDR, val);
+}
+
+static void es7000_setup_apic_routing(void)
+{
+ int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
+
+ printk(KERN_INFO
+ "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
+ (apic_version[apic] == 0x14) ?
+ "Physical Cluster" : "Logical Cluster",
+ nr_ioapics, cpus_addr(*es7000_target_cpus())[0]);
+}
+
+static int es7000_apicid_to_node(int logical_apicid)
+{
+ return 0;
+}
+
+
+static int es7000_cpu_present_to_apicid(int mps_cpu)
+{
+ if (!mps_cpu)
+ return boot_cpu_physical_apicid;
+ else if (mps_cpu < nr_cpu_ids)
+ return per_cpu(x86_bios_cpu_apicid, mps_cpu);
+ else
+ return BAD_APICID;
+}
+
+static int cpu_id;
+
+static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
+{
+ physid_mask_t mask;
+
+ mask = physid_mask_of_physid(cpu_id);
+ ++cpu_id;
+
+ return mask;
+}
+
+/* Mapping from cpu number to logical apicid */
+static int es7000_cpu_to_logical_apicid(int cpu)
+{
+#ifdef CONFIG_SMP
+ if (cpu >= nr_cpu_ids)
+ return BAD_APICID;
+ return cpu_2_logical_apicid[cpu];
+#else
+ return logical_smp_processor_id();
+#endif
+}
+
+static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
+{
+ /* For clustered we don't have a good way to do this yet - hack */
+ return physids_promote(0xff);
+}
+
+static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
+{
+ boot_cpu_physical_apicid = read_apic_id();
+ return 1;
+}
+
+static unsigned int
+es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
+{
+ int cpus_found = 0;
+ int num_bits_set;
+ int apicid;
+ int cpu;
+
+ num_bits_set = cpumask_weight(cpumask);
+ /* Return id to all */
+ if (num_bits_set == nr_cpu_ids)
+ return 0xFF;
+ /*
+ * The cpus in the mask must all be on the apic cluster. If are not
+ * on the same apicid cluster return default value of target_cpus():
+ */
+ cpu = cpumask_first(cpumask);
+ apicid = es7000_cpu_to_logical_apicid(cpu);
+
+ while (cpus_found < num_bits_set) {
+ if (cpumask_test_cpu(cpu, cpumask)) {
+ int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+
+ if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
+ WARN(1, "Not a valid mask!");
+
+ return 0xFF;
+ }
+ apicid = new_apicid;
+ cpus_found++;
+ }
+ cpu++;
+ }
+ return apicid;
+}
+
+static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask)
+{
+ int cpus_found = 0;
+ int num_bits_set;
+ int apicid;
+ int cpu;
+
+ num_bits_set = cpus_weight(*cpumask);
+ /* Return id to all */
+ if (num_bits_set == nr_cpu_ids)
+ return es7000_cpu_to_logical_apicid(0);
+ /*
+ * The cpus in the mask must all be on the apic cluster. If are not
+ * on the same apicid cluster return default value of target_cpus():
+ */
+ cpu = first_cpu(*cpumask);
+ apicid = es7000_cpu_to_logical_apicid(cpu);
+ while (cpus_found < num_bits_set) {
+ if (cpu_isset(cpu, *cpumask)) {
+ int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+
+ if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
+ printk("%s: Not a valid mask!\n", __func__);
+
+ return es7000_cpu_to_logical_apicid(0);
+ }
+ apicid = new_apicid;
+ cpus_found++;
+ }
+ cpu++;
+ }
+ return apicid;
+}
+
+static unsigned int
+es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
+ const struct cpumask *andmask)
+{
+ int apicid = es7000_cpu_to_logical_apicid(0);
+ cpumask_var_t cpumask;
+
+ if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+ return apicid;
+
+ cpumask_and(cpumask, inmask, andmask);
+ cpumask_and(cpumask, cpumask, cpu_online_mask);
+ apicid = es7000_cpu_mask_to_apicid(cpumask);
+
+ free_cpumask_var(cpumask);
+
+ return apicid;
+}
+
+static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+void __init es7000_update_apic_to_cluster(void)
+{
+ apic->target_cpus = target_cpus_cluster;
+ apic->irq_delivery_mode = dest_LowestPrio;
+ /* logical delivery broadcast to all procs: */
+ apic->irq_dest_mode = 1;
+
+ apic->init_apic_ldr = es7000_init_apic_ldr_cluster;
+
+ apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster;
+}
+
+static int probe_es7000(void)
+{
+ /* probed later in mptable/ACPI hooks */
+ return 0;
+}
+
+static __init int
+es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+{
+ if (mpc->oemptr) {
+ struct mpc_oemtable *oem_table =
+ (struct mpc_oemtable *)mpc->oemptr;
+
+ if (!strncmp(oem, "UNISYS", 6))
+ return parse_unisys_oem((char *)oem_table);
+ }
+ return 0;
+}
+
+
+struct apic apic_es7000 = {
+
+ .name = "es7000",
+ .probe = probe_es7000,
+ .acpi_madt_oem_check = es7000_acpi_madt_oem_check,
+ .apic_id_registered = es7000_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ /* phys delivery to target CPUs: */
+ .irq_dest_mode = 0,
+
+ .target_cpus = es7000_target_cpus,
+ .disable_esr = 1,
+ .dest_logical = 0,
+ .check_apicid_used = es7000_check_apicid_used,
+ .check_apicid_present = es7000_check_apicid_present,
+
+ .vector_allocation_domain = es7000_vector_allocation_domain,
+ .init_apic_ldr = es7000_init_apic_ldr,
+
+ .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
+ .setup_apic_routing = es7000_setup_apic_routing,
+ .multi_timer_check = NULL,
+ .apicid_to_node = es7000_apicid_to_node,
+ .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
+ .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
+ .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = es7000_check_phys_apicid_present,
+ .enable_apic_mode = es7000_enable_apic_mode,
+ .phys_pkg_id = es7000_phys_pkg_id,
+ .mps_oem_check = es7000_mps_oem_check,
+
+ .get_apic_id = es7000_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0xFF << 24,
+
+ .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = es7000_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = es7000_send_IPI_allbutself,
+ .send_IPI_all = es7000_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+
+ .trampoline_phys_low = 0x467,
+ .trampoline_phys_high = 0x469,
+
+ .wait_for_init_deassert = es7000_wait_for_init_deassert,
+
+ /* Nothing to do for most platforms, since cleared by the INIT cycle: */
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/apic/io_apic.c
similarity index 91%
rename from arch/x86/kernel/io_apic.c
rename to arch/x86/kernel/apic/io_apic.c
index bc7ac4d..00e6071 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1,7 +1,7 @@
/*
* Intel IO-APIC support for multi-Pentium hosts.
*
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
*
* Many thanks to Stig Venaas for trying out countless experimental
* patches and reporting/debugging problems patiently!
@@ -46,6 +46,7 @@
#include <asm/idle.h>
#include <asm/io.h>
#include <asm/smp.h>
+#include <asm/cpu.h>
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/acpi.h>
@@ -61,9 +62,7 @@
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_irq.h>
-#include <mach_ipi.h>
-#include <mach_apic.h>
-#include <mach_apicdef.h>
+#include <asm/apic.h>
#define __apicdebuginit(type) static type __init
@@ -82,11 +81,11 @@
int nr_ioapic_registers[MAX_IO_APICS];
/* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
/* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
@@ -99,10 +98,19 @@
int skip_ioapic_setup;
+void arch_disable_smp_support(void)
+{
+#ifdef CONFIG_PCI
+ noioapicquirk = 1;
+ noioapicreroute = -1;
+#endif
+ skip_ioapic_setup = 1;
+}
+
static int __init parse_noapic(char *str)
{
/* disable IO-APIC */
- disable_ioapic_setup();
+ arch_disable_smp_support();
return 0;
}
early_param("noapic", parse_noapic);
@@ -356,7 +364,7 @@
if (!cfg->move_in_progress) {
/* it means that domain is not changed */
- if (!cpumask_intersects(&desc->affinity, mask))
+ if (!cpumask_intersects(desc->affinity, mask))
cfg->move_desc_pending = 1;
}
}
@@ -386,7 +394,7 @@
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
{
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
- + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
+ + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
}
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -478,7 +486,7 @@
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
}
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
unsigned long flags;
spin_lock_irqsave(&ioapic_lock, flags);
@@ -513,11 +521,11 @@
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
cfg->move_cleanup_count++;
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+ apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
} else {
cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
free_cpumask_var(cleanup_mask);
}
cfg->move_in_progress = 0;
@@ -562,8 +570,9 @@
assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
/*
- * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
- * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
*/
static unsigned int
set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
@@ -579,9 +588,10 @@
if (assign_irq_vector(irq, cfg, mask))
return BAD_APICID;
- cpumask_and(&desc->affinity, cfg->domain, mask);
+ cpumask_and(desc->affinity, cfg->domain, mask);
set_extra_move_desc(desc, mask);
- return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+
+ return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
}
static void
@@ -796,23 +806,6 @@
clear_IO_APIC_pin(apic, pin);
}
-#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
-void send_IPI_self(int vector)
-{
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
- cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write(APIC_ICR, cfg);
-}
-#endif /* !CONFIG_SMP && CONFIG_X86_32*/
-
#ifdef CONFIG_X86_32
/*
* support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
@@ -820,8 +813,9 @@
*/
#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
+static int pirq_entries[MAX_PIRQS] = {
+ [0 ... MAX_PIRQS - 1] = -1
+};
static int __init ioapic_pirq_setup(char *str)
{
@@ -830,10 +824,6 @@
get_options(str, ARRAY_SIZE(ints), ints);
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- pirqs_enabled = 1;
apic_printk(APIC_VERBOSE, KERN_INFO
"PIRQ redirection, working around broken MP-BIOS.\n");
max = MAX_PIRQS;
@@ -944,10 +934,10 @@
int i;
for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mp_irqtype == type &&
- (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
- mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
- mp_irqs[i].mp_dstirq == pin)
+ if (mp_irqs[i].irqtype == type &&
+ (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
+ mp_irqs[i].dstapic == MP_APIC_ALL) &&
+ mp_irqs[i].dstirq == pin)
return i;
return -1;
@@ -961,13 +951,13 @@
int i;
for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mp_srcbus;
+ int lbus = mp_irqs[i].srcbus;
if (test_bit(lbus, mp_bus_not_pci) &&
- (mp_irqs[i].mp_irqtype == type) &&
- (mp_irqs[i].mp_srcbusirq == irq))
+ (mp_irqs[i].irqtype == type) &&
+ (mp_irqs[i].srcbusirq == irq))
- return mp_irqs[i].mp_dstirq;
+ return mp_irqs[i].dstirq;
}
return -1;
}
@@ -977,17 +967,17 @@
int i;
for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mp_srcbus;
+ int lbus = mp_irqs[i].srcbus;
if (test_bit(lbus, mp_bus_not_pci) &&
- (mp_irqs[i].mp_irqtype == type) &&
- (mp_irqs[i].mp_srcbusirq == irq))
+ (mp_irqs[i].irqtype == type) &&
+ (mp_irqs[i].srcbusirq == irq))
break;
}
if (i < mp_irq_entries) {
int apic;
for(apic = 0; apic < nr_ioapics; apic++) {
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
+ if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
return apic;
}
}
@@ -1012,23 +1002,23 @@
return -1;
}
for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mp_srcbus;
+ int lbus = mp_irqs[i].srcbus;
for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
- mp_irqs[i].mp_dstapic == MP_APIC_ALL)
+ if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+ mp_irqs[i].dstapic == MP_APIC_ALL)
break;
if (!test_bit(lbus, mp_bus_not_pci) &&
- !mp_irqs[i].mp_irqtype &&
+ !mp_irqs[i].irqtype &&
(bus == lbus) &&
- (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
+ (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
if (!(apic || IO_APIC_IRQ(irq)))
continue;
- if (pin == (mp_irqs[i].mp_srcbusirq & 3))
+ if (pin == (mp_irqs[i].srcbusirq & 3))
return irq;
/*
* Use the first all-but-pin matching entry as a
@@ -1071,7 +1061,7 @@
* EISA conforming in the MP table, that means its trigger type must
* be read in from the ELCR */
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
#define default_EISA_polarity(idx) default_ISA_polarity(idx)
/* PCI interrupts are always polarity one level triggered,
@@ -1088,13 +1078,13 @@
static int MPBIOS_polarity(int idx)
{
- int bus = mp_irqs[idx].mp_srcbus;
+ int bus = mp_irqs[idx].srcbus;
int polarity;
/*
* Determine IRQ line polarity (high active or low active):
*/
- switch (mp_irqs[idx].mp_irqflag & 3)
+ switch (mp_irqs[idx].irqflag & 3)
{
case 0: /* conforms, ie. bus-type dependent polarity */
if (test_bit(bus, mp_bus_not_pci))
@@ -1130,13 +1120,13 @@
static int MPBIOS_trigger(int idx)
{
- int bus = mp_irqs[idx].mp_srcbus;
+ int bus = mp_irqs[idx].srcbus;
int trigger;
/*
* Determine IRQ trigger mode (edge or level sensitive):
*/
- switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
+ switch ((mp_irqs[idx].irqflag>>2) & 3)
{
case 0: /* conforms, ie. bus-type dependent */
if (test_bit(bus, mp_bus_not_pci))
@@ -1214,16 +1204,16 @@
static int pin_2_irq(int idx, int apic, int pin)
{
int irq, i;
- int bus = mp_irqs[idx].mp_srcbus;
+ int bus = mp_irqs[idx].srcbus;
/*
* Debugging check, we are in big trouble if this message pops up!
*/
- if (mp_irqs[idx].mp_dstirq != pin)
+ if (mp_irqs[idx].dstirq != pin)
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
if (test_bit(bus, mp_bus_not_pci)) {
- irq = mp_irqs[idx].mp_srcbusirq;
+ irq = mp_irqs[idx].srcbusirq;
} else {
/*
* PCI IRQs are mapped in order
@@ -1315,7 +1305,7 @@
int new_cpu;
int vector, offset;
- vector_allocation_domain(cpu, tmp_mask);
+ apic->vector_allocation_domain(cpu, tmp_mask);
vector = current_vector;
offset = current_offset;
@@ -1485,10 +1475,10 @@
handle_edge_irq, "edge");
}
-static int setup_ioapic_entry(int apic, int irq,
- struct IO_APIC_route_entry *entry,
- unsigned int destination, int trigger,
- int polarity, int vector)
+int setup_ioapic_entry(int apic_id, int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination, int trigger,
+ int polarity, int vector)
{
/*
* add it to the IO-APIC irq-routing table:
@@ -1497,25 +1487,25 @@
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled) {
- struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+ struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
struct irte irte;
struct IR_IO_APIC_route_entry *ir_entry =
(struct IR_IO_APIC_route_entry *) entry;
int index;
if (!iommu)
- panic("No mapping iommu for ioapic %d\n", apic);
+ panic("No mapping iommu for ioapic %d\n", apic_id);
index = alloc_irte(iommu, irq, 1);
if (index < 0)
- panic("Failed to allocate IRTE for ioapic %d\n", apic);
+ panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
memset(&irte, 0, sizeof(irte));
irte.present = 1;
- irte.dst_mode = INT_DEST_MODE;
+ irte.dst_mode = apic->irq_dest_mode;
irte.trigger_mode = trigger;
- irte.dlvry_mode = INT_DELIVERY_MODE;
+ irte.dlvry_mode = apic->irq_delivery_mode;
irte.vector = vector;
irte.dest_id = IRTE_DEST(destination);
@@ -1528,8 +1518,8 @@
} else
#endif
{
- entry->delivery_mode = INT_DELIVERY_MODE;
- entry->dest_mode = INT_DEST_MODE;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
entry->dest = destination;
}
@@ -1546,7 +1536,7 @@
return 0;
}
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
+static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
int trigger, int polarity)
{
struct irq_cfg *cfg;
@@ -1558,22 +1548,22 @@
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, TARGET_CPUS))
+ if (assign_irq_vector(irq, cfg, apic->target_cpus()))
return;
- dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
"IRQ %d Mode:%i Active:%i)\n",
- apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+ apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
irq, trigger, polarity);
- if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+ if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
dest, trigger, polarity, cfg->vector)) {
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
- mp_ioapics[apic].mp_apicid, pin);
+ mp_ioapics[apic_id].apicid, pin);
__clear_irq_vector(irq, cfg);
return;
}
@@ -1582,12 +1572,12 @@
if (irq < NR_IRQS_LEGACY)
disable_8259A_irq(irq);
- ioapic_write_entry(apic, pin, entry);
+ ioapic_write_entry(apic_id, pin, entry);
}
static void __init setup_IO_APIC_irqs(void)
{
- int apic, pin, idx, irq;
+ int apic_id, pin, idx, irq;
int notcon = 0;
struct irq_desc *desc;
struct irq_cfg *cfg;
@@ -1595,21 +1585,19 @@
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
- idx = find_irq_entry(apic, pin, mp_INT);
+ idx = find_irq_entry(apic_id, pin, mp_INT);
if (idx == -1) {
if (!notcon) {
notcon = 1;
apic_printk(APIC_VERBOSE,
KERN_DEBUG " %d-%d",
- mp_ioapics[apic].mp_apicid,
- pin);
+ mp_ioapics[apic_id].apicid, pin);
} else
apic_printk(APIC_VERBOSE, " %d-%d",
- mp_ioapics[apic].mp_apicid,
- pin);
+ mp_ioapics[apic_id].apicid, pin);
continue;
}
if (notcon) {
@@ -1618,20 +1606,25 @@
notcon = 0;
}
- irq = pin_2_irq(idx, apic, pin);
-#ifdef CONFIG_X86_32
- if (multi_timer_check(apic, irq))
+ irq = pin_2_irq(idx, apic_id, pin);
+
+ /*
+ * Skip the timer IRQ if there's a quirk handler
+ * installed and if it returns 1:
+ */
+ if (apic->multi_timer_check &&
+ apic->multi_timer_check(apic_id, irq))
continue;
-#endif
+
desc = irq_to_desc_alloc_cpu(irq, cpu);
if (!desc) {
printk(KERN_INFO "can not get irq_desc for %d\n", irq);
continue;
}
cfg = desc->chip_data;
- add_pin_to_irq_cpu(cfg, cpu, apic, pin);
+ add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
- setup_IO_APIC_irq(apic, pin, irq, desc,
+ setup_IO_APIC_irq(apic_id, pin, irq, desc,
irq_trigger(idx), irq_polarity(idx));
}
}
@@ -1644,7 +1637,7 @@
/*
* Set up the timer pin, possibly with the 8259A-master behind.
*/
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
+static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
int vector)
{
struct IO_APIC_route_entry entry;
@@ -1660,10 +1653,10 @@
* We use logical delivery to get the timer IRQ
* to the first CPU.
*/
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 1; /* mask IRQ now */
- entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
- entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = apic->irq_dest_mode;
+ entry.mask = 0; /* don't mask IRQ for edge */
+ entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
+ entry.delivery_mode = apic->irq_delivery_mode;
entry.polarity = 0;
entry.trigger = 0;
entry.vector = vector;
@@ -1677,7 +1670,7 @@
/*
* Add it to the IO-APIC irq-routing table:
*/
- ioapic_write_entry(apic, pin, entry);
+ ioapic_write_entry(apic_id, pin, entry);
}
@@ -1699,7 +1692,7 @@
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for (i = 0; i < nr_ioapics; i++)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
+ mp_ioapics[i].apicid, nr_ioapic_registers[i]);
/*
* We are a bit conservative about what we expect. We have to
@@ -1719,7 +1712,7 @@
spin_unlock_irqrestore(&ioapic_lock, flags);
printk("\n");
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1980,13 +1973,6 @@
int apic;
unsigned long flags;
-#ifdef CONFIG_X86_32
- int i;
- if (!pirqs_enabled)
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-#endif
-
/*
* The number of IO-APIC IRQ registers (== #pins):
*/
@@ -2090,7 +2076,7 @@
{
union IO_APIC_reg_00 reg_00;
physid_mask_t phys_id_present_map;
- int apic;
+ int apic_id;
int i;
unsigned char old_id;
unsigned long flags;
@@ -2109,26 +2095,26 @@
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
*/
- phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+ phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
/*
* Set the IOAPIC ID to the value stored in the MPC table.
*/
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
/* Read the register 0 value */
spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
+ reg_00.raw = io_apic_read(apic_id, 0);
spin_unlock_irqrestore(&ioapic_lock, flags);
- old_id = mp_ioapics[apic].mp_apicid;
+ old_id = mp_ioapics[apic_id].apicid;
- if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+ if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
- apic, mp_ioapics[apic].mp_apicid);
+ apic_id, mp_ioapics[apic_id].apicid);
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
reg_00.bits.ID);
- mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+ mp_ioapics[apic_id].apicid = reg_00.bits.ID;
}
/*
@@ -2136,10 +2122,10 @@
* system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (check_apicid_used(phys_id_present_map,
- mp_ioapics[apic].mp_apicid)) {
+ if (apic->check_apicid_used(phys_id_present_map,
+ mp_ioapics[apic_id].apicid)) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
- apic, mp_ioapics[apic].mp_apicid);
+ apic_id, mp_ioapics[apic_id].apicid);
for (i = 0; i < get_physical_broadcast(); i++)
if (!physid_isset(i, phys_id_present_map))
break;
@@ -2148,13 +2134,13 @@
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
i);
physid_set(i, phys_id_present_map);
- mp_ioapics[apic].mp_apicid = i;
+ mp_ioapics[apic_id].apicid = i;
} else {
physid_mask_t tmp;
- tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+ tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
apic_printk(APIC_VERBOSE, "Setting %d in the "
"phys_id_present_map\n",
- mp_ioapics[apic].mp_apicid);
+ mp_ioapics[apic_id].apicid);
physids_or(phys_id_present_map, phys_id_present_map, tmp);
}
@@ -2163,11 +2149,11 @@
* We need to adjust the IRQ routing table
* if the ID changed.
*/
- if (old_id != mp_ioapics[apic].mp_apicid)
+ if (old_id != mp_ioapics[apic_id].apicid)
for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mp_dstapic == old_id)
- mp_irqs[i].mp_dstapic
- = mp_ioapics[apic].mp_apicid;
+ if (mp_irqs[i].dstapic == old_id)
+ mp_irqs[i].dstapic
+ = mp_ioapics[apic_id].apicid;
/*
* Read the right value from the MPC table and
@@ -2175,20 +2161,20 @@
*/
apic_printk(APIC_VERBOSE, KERN_INFO
"...changing IO-APIC physical APIC ID to %d ...",
- mp_ioapics[apic].mp_apicid);
+ mp_ioapics[apic_id].apicid);
- reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+ reg_00.bits.ID = mp_ioapics[apic_id].apicid;
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0, reg_00.raw);
+ io_apic_write(apic_id, 0, reg_00.raw);
spin_unlock_irqrestore(&ioapic_lock, flags);
/*
* Sanity check
*/
spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
+ reg_00.raw = io_apic_read(apic_id, 0);
spin_unlock_irqrestore(&ioapic_lock, flags);
- if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+ if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
printk("could not set ID!\n");
else
apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2291,7 +2277,7 @@
unsigned long flags;
spin_lock_irqsave(&vector_lock, flags);
- send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
+ apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
spin_unlock_irqrestore(&vector_lock, flags);
return 1;
@@ -2299,7 +2285,7 @@
#else
static int ioapic_retrigger_irq(unsigned int irq)
{
- send_IPI_self(irq_cfg(irq)->vector);
+ apic->send_IPI_self(irq_cfg(irq)->vector);
return 1;
}
@@ -2363,7 +2349,7 @@
set_extra_move_desc(desc, mask);
- dest = cpu_mask_to_apicid_and(cfg->domain, mask);
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
modify_ioapic_rte = desc->status & IRQ_LEVEL;
if (modify_ioapic_rte) {
@@ -2383,7 +2369,7 @@
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
- cpumask_copy(&desc->affinity, mask);
+ cpumask_copy(desc->affinity, mask);
}
static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2405,11 +2391,11 @@
}
/* everthing is clear. we have right of way */
- migrate_ioapic_irq_desc(desc, &desc->pending_mask);
+ migrate_ioapic_irq_desc(desc, desc->pending_mask);
ret = 0;
desc->status &= ~IRQ_MOVE_PENDING;
- cpumask_clear(&desc->pending_mask);
+ cpumask_clear(desc->pending_mask);
unmask:
unmask_IO_APIC_irq_desc(desc);
@@ -2434,7 +2420,7 @@
continue;
}
- desc->chip->set_affinity(irq, &desc->pending_mask);
+ desc->chip->set_affinity(irq, desc->pending_mask);
spin_unlock_irqrestore(&desc->lock, flags);
}
}
@@ -2448,7 +2434,7 @@
{
if (desc->status & IRQ_LEVEL) {
desc->status |= IRQ_MOVE_PENDING;
- cpumask_copy(&desc->pending_mask, mask);
+ cpumask_copy(desc->pending_mask, mask);
migrate_irq_remapped_level_desc(desc);
return;
}
@@ -2516,7 +2502,7 @@
/* domain has not changed, but affinity did */
me = smp_processor_id();
- if (cpu_isset(me, desc->affinity)) {
+ if (cpumask_test_cpu(me, desc->affinity)) {
*descp = desc = move_irq_desc(desc, me);
/* get the new one */
cfg = desc->chip_data;
@@ -2867,19 +2853,15 @@
int cpu = boot_cpu_id;
int apic1, pin1, apic2, pin2;
unsigned long flags;
- unsigned int ver;
int no_pin1 = 0;
local_irq_save(flags);
- ver = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(ver);
-
/*
* get/set the timer IRQ vector:
*/
disable_8259A_irq(0);
- assign_irq_vector(0, cfg, TARGET_CPUS);
+ assign_irq_vector(0, cfg, apic->target_cpus());
/*
* As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2893,7 +2875,13 @@
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
init_8259A(1);
#ifdef CONFIG_X86_32
- timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+ {
+ unsigned int ver;
+
+ ver = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(ver);
+ timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+ }
#endif
pin1 = find_isa_irq_pin(0, mp_INT);
@@ -2932,8 +2920,17 @@
if (no_pin1) {
add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+ } else {
+ /* for edge trigger, setup_IO_APIC_irq already
+ * leave it unmasked.
+ * so only need to unmask if it is level-trigger
+ * do we really have level trigger timer?
+ */
+ int idx;
+ idx = find_irq_entry(apic1, pin1, mp_INT);
+ if (idx != -1 && irq_trigger(idx))
+ unmask_IO_APIC_irq_desc(desc);
}
- unmask_IO_APIC_irq_desc(desc);
if (timer_irq_works()) {
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
@@ -2947,6 +2944,7 @@
if (intr_remapping_enabled)
panic("timer doesn't work through Interrupt-remapped IO-APIC");
#endif
+ local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2961,7 +2959,6 @@
*/
replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
- unmask_IO_APIC_irq_desc(desc);
enable_8259A_irq(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2976,6 +2973,7 @@
/*
* Cleanup, just in case ...
*/
+ local_irq_disable();
disable_8259A_irq(0);
clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
@@ -3001,6 +2999,7 @@
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
+ local_irq_disable();
disable_8259A_irq(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -3018,6 +3017,7 @@
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
+ local_irq_disable();
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option.\n");
@@ -3047,13 +3047,9 @@
void __init setup_IO_APIC(void)
{
-#ifdef CONFIG_X86_32
- enable_IO_APIC();
-#else
/*
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
*/
-#endif
io_apic_irqs = ~PIC_IRQS;
@@ -3118,8 +3114,8 @@
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(dev->id, 0);
- if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
- reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
+ if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
+ reg_00.bits.ID = mp_ioapics[dev->id].apicid;
io_apic_write(dev->id, 0, reg_00.raw);
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -3169,6 +3165,7 @@
device_initcall(ioapic_init_sysfs);
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
/*
* Dynamic irq allocate and deallocation
*/
@@ -3183,11 +3180,11 @@
struct irq_desc *desc_new = NULL;
irq = 0;
- spin_lock_irqsave(&vector_lock, flags);
- for (new = irq_want; new < NR_IRQS; new++) {
- if (platform_legacy_irq(new))
- continue;
+ if (irq_want < nr_irqs_gsi)
+ irq_want = nr_irqs_gsi;
+ spin_lock_irqsave(&vector_lock, flags);
+ for (new = irq_want; new < nr_irqs; new++) {
desc_new = irq_to_desc_alloc_cpu(new, cpu);
if (!desc_new) {
printk(KERN_INFO "can not get irq_desc for %d\n", new);
@@ -3197,7 +3194,7 @@
if (cfg_new->vector != 0)
continue;
- if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
+ if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
irq = new;
break;
}
@@ -3212,7 +3209,6 @@
return irq;
}
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
int create_irq(void)
{
unsigned int irq_want;
@@ -3259,12 +3255,15 @@
int err;
unsigned dest;
+ if (disable_apic)
+ return -ENXIO;
+
cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, TARGET_CPUS);
+ err = assign_irq_vector(irq, cfg, apic->target_cpus());
if (err)
return err;
- dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
@@ -3278,9 +3277,9 @@
memset (&irte, 0, sizeof(irte));
irte.present = 1;
- irte.dst_mode = INT_DEST_MODE;
+ irte.dst_mode = apic->irq_dest_mode;
irte.trigger_mode = 0; /* edge */
- irte.dlvry_mode = INT_DELIVERY_MODE;
+ irte.dlvry_mode = apic->irq_delivery_mode;
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -3298,10 +3297,10 @@
msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo =
MSI_ADDR_BASE_LO |
- ((INT_DEST_MODE == 0) ?
+ ((apic->irq_dest_mode == 0) ?
MSI_ADDR_DEST_MODE_PHYSICAL:
MSI_ADDR_DEST_MODE_LOGICAL) |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
MSI_ADDR_REDIRECTION_CPU:
MSI_ADDR_REDIRECTION_LOWPRI) |
MSI_ADDR_DEST_ID(dest);
@@ -3309,7 +3308,7 @@
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
MSI_DATA_DELIVERY_FIXED:
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(cfg->vector);
@@ -3464,40 +3463,6 @@
return 0;
}
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
-{
- unsigned int irq;
- int ret;
- unsigned int irq_want;
-
- irq_want = nr_irqs_gsi;
- irq = create_irq_nr(irq_want);
- if (irq == 0)
- return -1;
-
-#ifdef CONFIG_INTR_REMAP
- if (!intr_remapping_enabled)
- goto no_ir;
-
- ret = msi_alloc_irte(dev, irq, 1);
- if (ret < 0)
- goto error;
-no_ir:
-#endif
- ret = setup_msi_irq(dev, msidesc, irq);
- if (ret < 0) {
- destroy_irq(irq);
- return ret;
- }
- return 0;
-
-#ifdef CONFIG_INTR_REMAP
-error:
- destroy_irq(irq);
- return ret;
-#endif
-}
-
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
unsigned int irq;
@@ -3514,9 +3479,9 @@
sub_handle = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = create_irq_nr(irq_want);
- irq_want++;
if (irq == 0)
return -1;
+ irq_want = irq + 1;
#ifdef CONFIG_INTR_REMAP
if (!intr_remapping_enabled)
goto no_ir;
@@ -3727,13 +3692,17 @@
struct irq_cfg *cfg;
int err;
+ if (disable_apic)
+ return -ENXIO;
+
cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, TARGET_CPUS);
+ err = assign_irq_vector(irq, cfg, apic->target_cpus());
if (!err) {
struct ht_irq_msg msg;
unsigned dest;
- dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain,
+ apic->target_cpus());
msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
@@ -3741,11 +3710,11 @@
HT_IRQ_LOW_BASE |
HT_IRQ_LOW_DEST_ID(dest) |
HT_IRQ_LOW_VECTOR(cfg->vector) |
- ((INT_DEST_MODE == 0) ?
+ ((apic->irq_dest_mode == 0) ?
HT_IRQ_LOW_DM_PHYSICAL :
HT_IRQ_LOW_DM_LOGICAL) |
HT_IRQ_LOW_RQEOI_EDGE |
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
HT_IRQ_LOW_MT_FIXED :
HT_IRQ_LOW_MT_ARBITRATED) |
HT_IRQ_LOW_IRQ_MASKED;
@@ -3761,7 +3730,7 @@
}
#endif /* CONFIG_HT_IRQ */
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_UV
/*
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
@@ -3793,12 +3762,12 @@
BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
entry->vector = cfg->vector;
- entry->delivery_mode = INT_DELIVERY_MODE;
- entry->dest_mode = INT_DEST_MODE;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
- entry->dest = cpu_mask_to_apicid(eligible_cpu);
+ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3861,6 +3830,28 @@
printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
}
+#ifdef CONFIG_SPARSE_IRQ
+int __init arch_probe_nr_irqs(void)
+{
+ int nr;
+
+ if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
+ nr_irqs = NR_VECTORS * nr_cpu_ids;
+
+ nr = nr_irqs_gsi + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+ /*
+ * for MSI and HT dyn irq
+ */
+ nr += nr_irqs_gsi * 16;
+#endif
+ if (nr < nr_irqs)
+ nr_irqs = nr;
+
+ return 0;
+}
+#endif
+
/* --------------------------------------------------------------------------
ACPI-based IOAPIC Configuration
-------------------------------------------------------------------------- */
@@ -3886,7 +3877,7 @@
*/
if (physids_empty(apic_id_map))
- apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+ apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic, 0);
@@ -3902,10 +3893,10 @@
* Every APIC in a system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (check_apicid_used(apic_id_map, apic_id)) {
+ if (apic->check_apicid_used(apic_id_map, apic_id)) {
for (i = 0; i < get_physical_broadcast(); i++) {
- if (!check_apicid_used(apic_id_map, i))
+ if (!apic->check_apicid_used(apic_id_map, i))
break;
}
@@ -3918,7 +3909,7 @@
apic_id = i;
}
- tmp = apicid_to_cpu_present(apic_id);
+ tmp = apic->apicid_to_cpu_present(apic_id);
physids_or(apic_id_map, apic_id_map, tmp);
if (reg_00.bits.ID != apic_id) {
@@ -3995,8 +3986,8 @@
return -1;
for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mp_irqtype == mp_INT &&
- mp_irqs[i].mp_srcbusirq == bus_irq)
+ if (mp_irqs[i].irqtype == mp_INT &&
+ mp_irqs[i].srcbusirq == bus_irq)
break;
if (i >= mp_irq_entries)
return -1;
@@ -4011,7 +4002,7 @@
/*
* This function currently is only a helper for the i386 smp boot process where
* we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
+ * so mask in all cases should simply be apic->target_cpus()
*/
#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void)
@@ -4050,9 +4041,9 @@
*/
if (desc->status &
(IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = &desc->affinity;
+ mask = desc->affinity;
else
- mask = TARGET_CPUS;
+ mask = apic->target_cpus();
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
@@ -4111,7 +4102,7 @@
ioapic_res = ioapic_setup_resources();
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
- ioapic_phys = mp_ioapics[i].mp_apicaddr;
+ ioapic_phys = mp_ioapics[i].apicaddr;
#ifdef CONFIG_X86_32
if (!ioapic_phys) {
printk(KERN_ERR
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
new file mode 100644
index 0000000..dbf5445
--- /dev/null
+++ b/arch/x86/kernel/apic/ipi.c
@@ -0,0 +1,164 @@
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/apic.h>
+#include <asm/proto.h>
+#include <asm/ipi.h>
+
+void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
+{
+ unsigned long query_cpu;
+ unsigned long flags;
+
+ /*
+ * Hack. The clustered APIC addressing mode doesn't allow us to send
+ * to an arbitrary mask, so I do a unicast to each CPU instead.
+ * - mbligh
+ */
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+ query_cpu), vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
+ int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int query_cpu;
+ unsigned long flags;
+
+ /* See Hack comment above */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ if (query_cpu == this_cpu)
+ continue;
+ __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+ query_cpu), vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+ int vector)
+{
+ unsigned long flags;
+ unsigned int query_cpu;
+
+ /*
+ * Hack. The clustered APIC addressing mode doesn't allow us to send
+ * to an arbitrary mask, so I do a unicasts to each CPU instead. This
+ * should be modified to do 1 message per cluster ID - mbligh
+ */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask)
+ __default_send_IPI_dest_field(
+ apic->cpu_to_logical_apicid(query_cpu), vector,
+ apic->dest_logical);
+ local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+ int vector)
+{
+ unsigned long flags;
+ unsigned int query_cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ /* See Hack comment above */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ if (query_cpu == this_cpu)
+ continue;
+ __default_send_IPI_dest_field(
+ apic->cpu_to_logical_apicid(query_cpu), vector,
+ apic->dest_logical);
+ }
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_X86_32
+
+/*
+ * This is only used on smaller machines.
+ */
+void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+ unsigned long flags;
+
+ local_irq_save(flags);
+ WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
+ __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+ local_irq_restore(flags);
+}
+
+void default_send_IPI_allbutself(int vector)
+{
+ /*
+ * if there are no other CPUs in the system then we get an APIC send
+ * error if we try to broadcast, thus avoid sending IPIs in this case.
+ */
+ if (!(num_online_cpus() > 1))
+ return;
+
+ __default_local_send_IPI_allbutself(vector);
+}
+
+void default_send_IPI_all(int vector)
+{
+ __default_local_send_IPI_all(vector);
+}
+
+void default_send_IPI_self(int vector)
+{
+ __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
+}
+
+/* must come after the send_IPI functions above for inlining */
+static int convert_apicid_to_cpu(int apic_id)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
+ return i;
+ }
+ return -1;
+}
+
+int safe_smp_processor_id(void)
+{
+ int apicid, cpuid;
+
+ if (!boot_cpu_has(X86_FEATURE_APIC))
+ return 0;
+
+ apicid = hard_smp_processor_id();
+ if (apicid == BAD_APICID)
+ return 0;
+
+ cpuid = convert_apicid_to_cpu(apicid);
+
+ return cpuid >= 0 ? cpuid : 0;
+}
+#endif
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/apic/nmi.c
similarity index 98%
rename from arch/x86/kernel/nmi.c
rename to arch/x86/kernel/apic/nmi.c
index 7228979..bdfad80 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -34,7 +34,7 @@
#include <asm/mce.h>
-#include <mach_traps.h>
+#include <asm/mach_traps.h>
int unknown_nmi_panic;
int nmi_watchdog_enabled;
@@ -61,11 +61,7 @@
static inline unsigned int get_nmi_count(int cpu)
{
-#ifdef CONFIG_X86_64
- return cpu_pda(cpu)->__nmi_count;
-#else
- return nmi_count(cpu);
-#endif
+ return per_cpu(irq_stat, cpu).__nmi_count;
}
static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@
*/
static inline unsigned int get_timer_irqs(int cpu)
{
-#ifdef CONFIG_X86_64
- return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
-#else
return per_cpu(irq_stat, cpu).apic_timer_irqs +
per_cpu(irq_stat, cpu).irq0_irqs;
-#endif
}
#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
new file mode 100644
index 0000000..d9d6d61
--- /dev/null
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -0,0 +1,565 @@
+/*
+ * Written by: Patricia Gaughen, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <gone@us.ibm.com>
+ */
+#include <linux/nodemask.h>
+#include <linux/topology.h>
+#include <linux/bootmem.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/numa.h>
+#include <linux/smp.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/numaq.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/e820.h>
+#include <asm/ipi.h>
+
+#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
+
+int found_numaq;
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+struct mpc_trans {
+ unsigned char mpc_type;
+ unsigned char trans_len;
+ unsigned char trans_type;
+ unsigned char trans_quad;
+ unsigned char trans_global;
+ unsigned char trans_local;
+ unsigned short trans_reserved;
+};
+
+/* x86_quirks member */
+static int mpc_record;
+
+static __cpuinitdata struct mpc_trans *translation_table[MAX_MPC_ENTRY];
+
+int mp_bus_id_to_node[MAX_MP_BUSSES];
+int mp_bus_id_to_local[MAX_MP_BUSSES];
+int quad_local_to_mp_bus_id[NR_CPUS/4][4];
+
+
+static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
+{
+ struct eachquadmem *eq = scd->eq + node;
+
+ node_set_online(node);
+
+ /* Convert to pages */
+ node_start_pfn[node] =
+ MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
+
+ node_end_pfn[node] =
+ MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
+
+ e820_register_active_regions(node, node_start_pfn[node],
+ node_end_pfn[node]);
+
+ memory_present(node, node_start_pfn[node], node_end_pfn[node]);
+
+ node_remap_size[node] = node_memmap_size_bytes(node,
+ node_start_pfn[node],
+ node_end_pfn[node]);
+}
+
+/*
+ * Function: smp_dump_qct()
+ *
+ * Description: gets memory layout from the quad config table. This
+ * function also updates node_online_map with the nodes (quads) present.
+ */
+static void __init smp_dump_qct(void)
+{
+ struct sys_cfg_data *scd;
+ int node;
+
+ scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
+
+ nodes_clear(node_online_map);
+ for_each_node(node) {
+ if (scd->quads_present31_0 & (1 << node))
+ numaq_register_node(node, scd);
+ }
+}
+
+void __cpuinit numaq_tsc_disable(void)
+{
+ if (!found_numaq)
+ return;
+
+ if (num_online_nodes() > 1) {
+ printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+ setup_clear_cpu_cap(X86_FEATURE_TSC);
+ }
+}
+
+static int __init numaq_pre_time_init(void)
+{
+ numaq_tsc_disable();
+ return 0;
+}
+
+static inline int generate_logical_apicid(int quad, int phys_apicid)
+{
+ return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
+}
+
+/* x86_quirks member */
+static int mpc_apic_id(struct mpc_cpu *m)
+{
+ int quad = translation_table[mpc_record]->trans_quad;
+ int logical_apicid = generate_logical_apicid(quad, m->apicid);
+
+ printk(KERN_DEBUG
+ "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+ m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->cpufeature & CPU_MODEL_MASK) >> 4,
+ m->apicver, quad, logical_apicid);
+
+ return logical_apicid;
+}
+
+/* x86_quirks member */
+static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
+{
+ int quad = translation_table[mpc_record]->trans_quad;
+ int local = translation_table[mpc_record]->trans_local;
+
+ mp_bus_id_to_node[m->busid] = quad;
+ mp_bus_id_to_local[m->busid] = local;
+
+ printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad);
+}
+
+/* x86_quirks member */
+static void mpc_oem_pci_bus(struct mpc_bus *m)
+{
+ int quad = translation_table[mpc_record]->trans_quad;
+ int local = translation_table[mpc_record]->trans_local;
+
+ quad_local_to_mp_bus_id[quad][local] = m->busid;
+}
+
+static void __init MP_translation_info(struct mpc_trans *m)
+{
+ printk(KERN_INFO
+ "Translation: record %d, type %d, quad %d, global %d, local %d\n",
+ mpc_record, m->trans_type, m->trans_quad, m->trans_global,
+ m->trans_local);
+
+ if (mpc_record >= MAX_MPC_ENTRY)
+ printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+ else
+ translation_table[mpc_record] = m; /* stash this for later */
+
+ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+ node_set_online(m->trans_quad);
+}
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+ int sum = 0;
+
+ while (len--)
+ sum += *mp++;
+
+ return sum & 0xFF;
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+static void __init
+ smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize)
+{
+ int count = sizeof(*oemtable); /* the header size */
+ unsigned char *oemptr = ((unsigned char *)oemtable) + count;
+
+ mpc_record = 0;
+ printk(KERN_INFO
+ "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+
+ if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
+ printk(KERN_WARNING
+ "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+ oemtable->signature[0], oemtable->signature[1],
+ oemtable->signature[2], oemtable->signature[3]);
+ return;
+ }
+
+ if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
+ printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+ return;
+ }
+
+ while (count < oemtable->length) {
+ switch (*oemptr) {
+ case MP_TRANSLATION:
+ {
+ struct mpc_trans *m = (void *)oemptr;
+
+ MP_translation_info(m);
+ oemptr += sizeof(*m);
+ count += sizeof(*m);
+ ++mpc_record;
+ break;
+ }
+ default:
+ printk(KERN_WARNING
+ "Unrecognised OEM table entry type! - %d\n",
+ (int)*oemptr);
+ return;
+ }
+ }
+}
+
+static int __init numaq_setup_ioapic_ids(void)
+{
+ /* so can skip it */
+ return 1;
+}
+
+static int __init numaq_update_apic(void)
+{
+ apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
+
+ return 0;
+}
+
+static struct x86_quirks numaq_x86_quirks __initdata = {
+ .arch_pre_time_init = numaq_pre_time_init,
+ .arch_time_init = NULL,
+ .arch_pre_intr_init = NULL,
+ .arch_memory_setup = NULL,
+ .arch_intr_init = NULL,
+ .arch_trap_init = NULL,
+ .mach_get_smp_config = NULL,
+ .mach_find_smp_config = NULL,
+ .mpc_record = &mpc_record,
+ .mpc_apic_id = mpc_apic_id,
+ .mpc_oem_bus_info = mpc_oem_bus_info,
+ .mpc_oem_pci_bus = mpc_oem_pci_bus,
+ .smp_read_mpc_oem = smp_read_mpc_oem,
+ .setup_ioapic_ids = numaq_setup_ioapic_ids,
+ .update_apic = numaq_update_apic,
+};
+
+static __init void early_check_numaq(void)
+{
+ /*
+ * Find possible boot-time SMP configuration:
+ */
+ early_find_smp_config();
+
+ /*
+ * get boot-time SMP configuration:
+ */
+ if (smp_found_config)
+ early_get_smp_config();
+
+ if (found_numaq)
+ x86_quirks = &numaq_x86_quirks;
+}
+
+int __init get_memcfg_numaq(void)
+{
+ early_check_numaq();
+ if (!found_numaq)
+ return 0;
+ smp_dump_qct();
+
+ return 1;
+}
+
+#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
+
+static inline unsigned int numaq_get_apic_id(unsigned long x)
+{
+ return (x >> 24) & 0x0F;
+}
+
+static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ default_send_IPI_mask_sequence_logical(mask, vector);
+}
+
+static inline void numaq_send_IPI_allbutself(int vector)
+{
+ default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
+}
+
+static inline void numaq_send_IPI_all(int vector)
+{
+ numaq_send_IPI_mask(cpu_online_mask, vector);
+}
+
+#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8)
+#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa)
+
+/*
+ * Because we use NMIs rather than the INIT-STARTUP sequence to
+ * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
+ */
+static inline void numaq_smp_callin_clear_local_apic(void)
+{
+ clear_local_APIC();
+}
+
+static inline const cpumask_t *numaq_target_cpus(void)
+{
+ return &CPU_MASK_ALL;
+}
+
+static inline unsigned long
+numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+ return physid_isset(apicid, bitmap);
+}
+
+static inline unsigned long numaq_check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+static inline int numaq_apic_id_registered(void)
+{
+ return 1;
+}
+
+static inline void numaq_init_apic_ldr(void)
+{
+ /* Already done in NUMA-Q firmware */
+}
+
+static inline void numaq_setup_apic_routing(void)
+{
+ printk(KERN_INFO
+ "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n",
+ nr_ioapics);
+}
+
+/*
+ * Skip adding the timer int on secondary nodes, which causes
+ * a small but painful rift in the time-space continuum.
+ */
+static inline int numaq_multi_timer_check(int apic, int irq)
+{
+ return apic != 0 && irq == 0;
+}
+
+static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
+{
+ /* We don't have a good way to do this yet - hack */
+ return physids_promote(0xFUL);
+}
+
+static inline int numaq_cpu_to_logical_apicid(int cpu)
+{
+ if (cpu >= nr_cpu_ids)
+ return BAD_APICID;
+ return cpu_2_logical_apicid[cpu];
+}
+
+/*
+ * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
+ * cpu to APIC ID relation to properly interact with the intelligent
+ * mode of the cluster controller.
+ */
+static inline int numaq_cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < 60)
+ return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
+ else
+ return BAD_APICID;
+}
+
+static inline int numaq_apicid_to_node(int logical_apicid)
+{
+ return logical_apicid >> 4;
+}
+
+static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
+{
+ int node = numaq_apicid_to_node(logical_apicid);
+ int cpu = __ffs(logical_apicid & 0xf);
+
+ return physid_mask_of_physid(cpu + 4*node);
+}
+
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio;
+
+static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
+{
+ return 1;
+}
+
+/*
+ * We use physical apicids here, not logical, so just return the default
+ * physical broadcast to stop people from breaking us
+ */
+static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask)
+{
+ return 0x0F;
+}
+
+static inline unsigned int
+numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ return 0x0F;
+}
+
+/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
+static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+static int
+numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+{
+ if (strncmp(oem, "IBM NUMA", 8))
+ printk(KERN_ERR "Warning! Not a NUMA-Q system!\n");
+ else
+ found_numaq = 1;
+
+ return found_numaq;
+}
+
+static int probe_numaq(void)
+{
+ /* already know from get_memcfg_numaq() */
+ return found_numaq;
+}
+
+static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask)
+{
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+}
+
+static void numaq_setup_portio_remap(void)
+{
+ int num_quads = num_online_nodes();
+
+ if (num_quads <= 1)
+ return;
+
+ printk(KERN_INFO
+ "Remapping cross-quad port I/O for %d quads\n", num_quads);
+
+ xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
+
+ printk(KERN_INFO
+ "xquad_portio vaddr 0x%08lx, len %08lx\n",
+ (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
+}
+
+struct apic apic_numaq = {
+
+ .name = "NUMAQ",
+ .probe = probe_numaq,
+ .acpi_madt_oem_check = NULL,
+ .apic_id_registered = numaq_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* physical delivery on LOCAL quad: */
+ .irq_dest_mode = 0,
+
+ .target_cpus = numaq_target_cpus,
+ .disable_esr = 1,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = numaq_check_apicid_used,
+ .check_apicid_present = numaq_check_apicid_present,
+
+ .vector_allocation_domain = numaq_vector_allocation_domain,
+ .init_apic_ldr = numaq_init_apic_ldr,
+
+ .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
+ .setup_apic_routing = numaq_setup_apic_routing,
+ .multi_timer_check = numaq_multi_timer_check,
+ .apicid_to_node = numaq_apicid_to_node,
+ .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
+ .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
+ .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
+ .setup_portio_remap = numaq_setup_portio_remap,
+ .check_phys_apicid_present = numaq_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = numaq_phys_pkg_id,
+ .mps_oem_check = numaq_mps_oem_check,
+
+ .get_apic_id = numaq_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0x0F << 24,
+
+ .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = numaq_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = numaq_send_IPI_allbutself,
+ .send_IPI_all = numaq_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+ .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
+
+ /* We don't do anything here because we use NMI's to boot instead */
+ .wait_for_init_deassert = NULL,
+
+ .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
new file mode 100644
index 0000000..3a730fa
--- /dev/null
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -0,0 +1,295 @@
+/*
+ * Default generic APIC driver. This handles up to 8 CPUs.
+ *
+ * Copyright 2003 Andi Kleen, SuSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic x86 APIC driver probe layer.
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+#include <asm/apic.h>
+#include <asm/setup.h>
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <asm/ipi.h>
+
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define DEFAULT_SEND_IPI (1)
+#else
+#define DEFAULT_SEND_IPI (0)
+#endif
+
+int no_broadcast = DEFAULT_SEND_IPI;
+
+static __init int no_ipi_broadcast(char *str)
+{
+ get_option(&str, &no_broadcast);
+ pr_info("Using %s mode\n",
+ no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
+ return 1;
+}
+__setup("no_ipi_broadcast=", no_ipi_broadcast);
+
+static int __init print_ipi_mode(void)
+{
+ pr_info("Using IPI %s mode\n",
+ no_broadcast ? "No-Shortcut" : "Shortcut");
+ return 0;
+}
+late_initcall(print_ipi_mode);
+
+void default_setup_apic_routing(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ printk(KERN_INFO
+ "Enabling APIC mode: Flat. Using %d I/O APICs\n",
+ nr_ioapics);
+#endif
+}
+
+static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ /*
+ * Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
+}
+
+/* should be called last. */
+static int probe_default(void)
+{
+ return 1;
+}
+
+struct apic apic_default = {
+
+ .name = "default",
+ .probe = probe_default,
+ .acpi_madt_oem_check = NULL,
+ .apic_id_registered = default_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* logical delivery broadcast to all CPUs: */
+ .irq_dest_mode = 1,
+
+ .target_cpus = default_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = default_check_apicid_used,
+ .check_apicid_present = default_check_apicid_present,
+
+ .vector_allocation_domain = default_vector_allocation_domain,
+ .init_apic_ldr = default_init_apic_ldr,
+
+ .ioapic_phys_id_map = default_ioapic_phys_id_map,
+ .setup_apic_routing = default_setup_apic_routing,
+ .multi_timer_check = NULL,
+ .apicid_to_node = default_apicid_to_node,
+ .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = default_apicid_to_cpu_present,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = default_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = default_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0x0F << 24,
+
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = default_send_IPI_mask_logical,
+ .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
+ .send_IPI_allbutself = default_send_IPI_allbutself,
+ .send_IPI_all = default_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+ .wait_for_init_deassert = default_wait_for_init_deassert,
+
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
+
+extern struct apic apic_numaq;
+extern struct apic apic_summit;
+extern struct apic apic_bigsmp;
+extern struct apic apic_es7000;
+extern struct apic apic_default;
+
+struct apic *apic = &apic_default;
+EXPORT_SYMBOL_GPL(apic);
+
+static struct apic *apic_probe[] __initdata = {
+#ifdef CONFIG_X86_NUMAQ
+ &apic_numaq,
+#endif
+#ifdef CONFIG_X86_SUMMIT
+ &apic_summit,
+#endif
+#ifdef CONFIG_X86_BIGSMP
+ &apic_bigsmp,
+#endif
+#ifdef CONFIG_X86_ES7000
+ &apic_es7000,
+#endif
+ &apic_default, /* must be last */
+ NULL,
+};
+
+static int cmdline_apic __initdata;
+static int __init parse_apic(char *arg)
+{
+ int i;
+
+ if (!arg)
+ return -EINVAL;
+
+ for (i = 0; apic_probe[i]; i++) {
+ if (!strcmp(apic_probe[i]->name, arg)) {
+ apic = apic_probe[i];
+ cmdline_apic = 1;
+ return 0;
+ }
+ }
+
+ if (x86_quirks->update_apic)
+ x86_quirks->update_apic();
+
+ /* Parsed again by __setup for debug/verbose */
+ return 0;
+}
+early_param("apic", parse_apic);
+
+void __init generic_bigsmp_probe(void)
+{
+#ifdef CONFIG_X86_BIGSMP
+ /*
+ * This routine is used to switch to bigsmp mode when
+ * - There is no apic= option specified by the user
+ * - generic_apic_probe() has chosen apic_default as the sub_arch
+ * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
+ */
+
+ if (!cmdline_apic && apic == &apic_default) {
+ if (apic_bigsmp.probe()) {
+ apic = &apic_bigsmp;
+ if (x86_quirks->update_apic)
+ x86_quirks->update_apic();
+ printk(KERN_INFO "Overriding APIC driver with %s\n",
+ apic->name);
+ }
+ }
+#endif
+}
+
+void __init generic_apic_probe(void)
+{
+ if (!cmdline_apic) {
+ int i;
+ for (i = 0; apic_probe[i]; i++) {
+ if (apic_probe[i]->probe()) {
+ apic = apic_probe[i];
+ break;
+ }
+ }
+ /* Not visible without early console */
+ if (!apic_probe[i])
+ panic("Didn't find an APIC driver");
+
+ if (x86_quirks->update_apic)
+ x86_quirks->update_apic();
+ }
+ printk(KERN_INFO "Using APIC driver %s\n", apic->name);
+}
+
+/* These functions can switch the APIC even after the initial ->probe() */
+
+int __init
+generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+{
+ int i;
+
+ for (i = 0; apic_probe[i]; ++i) {
+ if (!apic_probe[i]->mps_oem_check)
+ continue;
+ if (!apic_probe[i]->mps_oem_check(mpc, oem, productid))
+ continue;
+
+ if (!cmdline_apic) {
+ apic = apic_probe[i];
+ if (x86_quirks->update_apic)
+ x86_quirks->update_apic();
+ printk(KERN_INFO "Switched to APIC driver `%s'.\n",
+ apic->name);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ int i;
+
+ for (i = 0; apic_probe[i]; ++i) {
+ if (!apic_probe[i]->acpi_madt_oem_check)
+ continue;
+ if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id))
+ continue;
+
+ if (!cmdline_apic) {
+ apic = apic_probe[i];
+ if (x86_quirks->update_apic)
+ x86_quirks->update_apic();
+ printk(KERN_INFO "Switched to APIC driver `%s'.\n",
+ apic->name);
+ }
+ return 1;
+ }
+ return 0;
+}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
new file mode 100644
index 0000000..e7c1636
--- /dev/null
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/setup.h>
+
+extern struct apic apic_flat;
+extern struct apic apic_physflat;
+extern struct apic apic_x2xpic_uv_x;
+extern struct apic apic_x2apic_phys;
+extern struct apic apic_x2apic_cluster;
+
+struct apic __read_mostly *apic = &apic_flat;
+EXPORT_SYMBOL_GPL(apic);
+
+static struct apic *apic_probe[] __initdata = {
+#ifdef CONFIG_X86_UV
+ &apic_x2apic_uv_x,
+#endif
+#ifdef CONFIG_X86_X2APIC
+ &apic_x2apic_phys,
+ &apic_x2apic_cluster,
+#endif
+ &apic_physflat,
+ NULL,
+};
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init default_setup_apic_routing(void)
+{
+#ifdef CONFIG_X86_X2APIC
+ if (x2apic && (apic != &apic_x2apic_phys &&
+#ifdef CONFIG_X86_UV
+ apic != &apic_x2apic_uv_x &&
+#endif
+ apic != &apic_x2apic_cluster)) {
+ if (x2apic_phys)
+ apic = &apic_x2apic_phys;
+ else
+ apic = &apic_x2apic_cluster;
+ printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
+ }
+#endif
+
+ if (apic == &apic_flat) {
+ if (max_physical_apicid >= 8)
+ apic = &apic_physflat;
+ printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
+ }
+
+ if (x86_quirks->update_apic)
+ x86_quirks->update_apic();
+}
+
+/* Same for both flat and physical. */
+
+void apic_send_IPI_self(int vector)
+{
+ __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ int i;
+
+ for (i = 0; apic_probe[i]; ++i) {
+ if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
+ apic = apic_probe[i];
+ printk(KERN_INFO "Setting APIC routing to %s.\n",
+ apic->name);
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
new file mode 100644
index 0000000..32838b5
--- /dev/null
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -0,0 +1,592 @@
+/*
+ * IBM Summit-Specific Code
+ *
+ * Written By: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (c) 2003 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <asm/io.h>
+#include <asm/bios_ebda.h>
+
+/*
+ * APIC driver for the IBM "Summit" chipset.
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/smp.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <asm/ipi.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <linux/smp.h>
+
+static unsigned summit_get_apic_id(unsigned long x)
+{
+ return (x >> 24) & 0xFF;
+}
+
+static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector)
+{
+ default_send_IPI_mask_sequence_logical(mask, vector);
+}
+
+static void summit_send_IPI_allbutself(int vector)
+{
+ cpumask_t mask = cpu_online_map;
+ cpu_clear(smp_processor_id(), mask);
+
+ if (!cpus_empty(mask))
+ summit_send_IPI_mask(&mask, vector);
+}
+
+static void summit_send_IPI_all(int vector)
+{
+ summit_send_IPI_mask(&cpu_online_map, vector);
+}
+
+#include <asm/tsc.h>
+
+extern int use_cyclone;
+
+#ifdef CONFIG_X86_SUMMIT_NUMA
+extern void setup_summit(void);
+#else
+#define setup_summit() {}
+#endif
+
+static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
+ char *productid)
+{
+ if (!strncmp(oem, "IBM ENSW", 8) &&
+ (!strncmp(productid, "VIGIL SMP", 9)
+ || !strncmp(productid, "EXA", 3)
+ || !strncmp(productid, "RUTHLESS SMP", 12))){
+ mark_tsc_unstable("Summit based system");
+ use_cyclone = 1; /*enable cyclone-timer*/
+ setup_summit();
+ return 1;
+ }
+ return 0;
+}
+
+/* Hook from generic ACPI tables.c */
+static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (!strncmp(oem_id, "IBM", 3) &&
+ (!strncmp(oem_table_id, "SERVIGIL", 8)
+ || !strncmp(oem_table_id, "EXA", 3))){
+ mark_tsc_unstable("Summit based system");
+ use_cyclone = 1; /*enable cyclone-timer*/
+ setup_summit();
+ return 1;
+ }
+ return 0;
+}
+
+struct rio_table_hdr {
+ unsigned char version; /* Version number of this data structure */
+ /* Version 3 adds chassis_num & WP_index */
+ unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
+ unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
+} __attribute__((packed));
+
+struct scal_detail {
+ unsigned char node_id; /* Scalability Node ID */
+ unsigned long CBAR; /* Address of 1MB register space */
+ unsigned char port0node; /* Node ID port connected to: 0xFF=None */
+ unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char port1node; /* Node ID port connected to: 0xFF = None */
+ unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char port2node; /* Node ID port connected to: 0xFF = None */
+ unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
+} __attribute__((packed));
+
+struct rio_detail {
+ unsigned char node_id; /* RIO Node ID */
+ unsigned long BBAR; /* Address of 1MB register space */
+ unsigned char type; /* Type of device */
+ unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
+ /* For CYC: Node ID of Twister that owns this CYC */
+ unsigned char port0node; /* Node ID port connected to: 0xFF=None */
+ unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char port1node; /* Node ID port connected to: 0xFF=None */
+ unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
+ /* For CYC: 0 */
+ unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
+ /* = 0 : the XAPIC is not used, ie:*/
+ /* ints fwded to another XAPIC */
+ /* Bits1:7 Reserved */
+ /* For CYC: Bits0:7 Reserved */
+ unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
+ /* lower slot numbers/PCI bus numbers */
+ /* For CYC: No meaning */
+ unsigned char chassis_num; /* 1 based Chassis number */
+ /* For LookOut WPEGs this field indicates the */
+ /* Expansion Chassis #, enumerated from Boot */
+ /* Node WPEG external port, then Boot Node CYC */
+ /* external port, then Next Vigil chassis WPEG */
+ /* external port, etc. */
+ /* Shared Lookouts have only 1 chassis number (the */
+ /* first one assigned) */
+} __attribute__((packed));
+
+
+typedef enum {
+ CompatTwister = 0, /* Compatibility Twister */
+ AltTwister = 1, /* Alternate Twister of internal 8-way */
+ CompatCyclone = 2, /* Compatibility Cyclone */
+ AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
+ CompatWPEG = 4, /* Compatibility WPEG */
+ AltWPEG = 5, /* Second Planar WPEG */
+ LookOutAWPEG = 6, /* LookOut WPEG */
+ LookOutBWPEG = 7, /* LookOut WPEG */
+} node_type;
+
+static inline int is_WPEG(struct rio_detail *rio){
+ return (rio->type == CompatWPEG || rio->type == AltWPEG ||
+ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
+}
+
+
+/* In clustered mode, the high nibble of APIC ID is a cluster number.
+ * The low nibble is a 4-bit bitmap. */
+#define XAPIC_DEST_CPUS_SHIFT 4
+#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
+#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
+
+#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
+
+static const cpumask_t *summit_target_cpus(void)
+{
+ /* CPU_MASK_ALL (0xff) has undefined behaviour with
+ * dest_LowestPrio mode logical clustered apic interrupt routing
+ * Just start on cpu 0. IRQ balancing will spread load
+ */
+ return &cpumask_of_cpu(0);
+}
+
+static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+ return 0;
+}
+
+/* we don't use the phys_cpu_present_map to indicate apicid presence */
+static unsigned long summit_check_apicid_present(int bit)
+{
+ return 1;
+}
+
+static void summit_init_apic_ldr(void)
+{
+ unsigned long val, id;
+ int count = 0;
+ u8 my_id = (u8)hard_smp_processor_id();
+ u8 my_cluster = APIC_CLUSTER(my_id);
+#ifdef CONFIG_SMP
+ u8 lid;
+ int i;
+
+ /* Create logical APIC IDs by counting CPUs already in cluster. */
+ for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
+ lid = cpu_2_logical_apicid[i];
+ if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
+ ++count;
+ }
+#endif
+ /* We only have a 4 wide bitmap in cluster mode. If a deranged
+ * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
+ BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
+ id = my_cluster | (1UL << count);
+ apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(id);
+ apic_write(APIC_LDR, val);
+}
+
+static int summit_apic_id_registered(void)
+{
+ return 1;
+}
+
+static void summit_setup_apic_routing(void)
+{
+ printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
+ nr_ioapics);
+}
+
+static int summit_apicid_to_node(int logical_apicid)
+{
+#ifdef CONFIG_SMP
+ return apicid_2_node[hard_smp_processor_id()];
+#else
+ return 0;
+#endif
+}
+
+/* Mapping from cpu number to logical apicid */
+static inline int summit_cpu_to_logical_apicid(int cpu)
+{
+#ifdef CONFIG_SMP
+ if (cpu >= nr_cpu_ids)
+ return BAD_APICID;
+ return cpu_2_logical_apicid[cpu];
+#else
+ return logical_smp_processor_id();
+#endif
+}
+
+static int summit_cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < nr_cpu_ids)
+ return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+ else
+ return BAD_APICID;
+}
+
+static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
+{
+ /* For clustered we don't have a good way to do this yet - hack */
+ return physids_promote(0x0F);
+}
+
+static physid_mask_t summit_apicid_to_cpu_present(int apicid)
+{
+ return physid_mask_of_physid(0);
+}
+
+static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
+{
+ return 1;
+}
+
+static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
+{
+ int cpus_found = 0;
+ int num_bits_set;
+ int apicid;
+ int cpu;
+
+ num_bits_set = cpus_weight(*cpumask);
+ if (num_bits_set >= nr_cpu_ids)
+ return BAD_APICID;
+ /*
+ * The cpus in the mask must all be on the apic cluster.
+ */
+ cpu = first_cpu(*cpumask);
+ apicid = summit_cpu_to_logical_apicid(cpu);
+
+ while (cpus_found < num_bits_set) {
+ if (cpu_isset(cpu, *cpumask)) {
+ int new_apicid = summit_cpu_to_logical_apicid(cpu);
+
+ if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
+ printk("%s: Not a valid mask!\n", __func__);
+
+ return BAD_APICID;
+ }
+ apicid = apicid | new_apicid;
+ cpus_found++;
+ }
+ cpu++;
+ }
+ return apicid;
+}
+
+static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
+ const struct cpumask *andmask)
+{
+ int apicid = summit_cpu_to_logical_apicid(0);
+ cpumask_var_t cpumask;
+
+ if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+ return apicid;
+
+ cpumask_and(cpumask, inmask, andmask);
+ cpumask_and(cpumask, cpumask, cpu_online_mask);
+ apicid = summit_cpu_mask_to_apicid(cpumask);
+
+ free_cpumask_var(cpumask);
+
+ return apicid;
+}
+
+/*
+ * cpuid returns the value latched in the HW at reset, not the APIC ID
+ * register's value. For any box whose BIOS changes APIC IDs, like
+ * clustered APIC systems, we must use hard_smp_processor_id.
+ *
+ * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
+ */
+static int summit_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return hard_smp_processor_id() >> index_msb;
+}
+
+static int probe_summit(void)
+{
+ /* probed later in mptable/ACPI hooks */
+ return 0;
+}
+
+static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
+{
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+}
+
+#ifdef CONFIG_X86_SUMMIT_NUMA
+static struct rio_table_hdr *rio_table_hdr __initdata;
+static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
+static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
+
+#ifndef CONFIG_X86_NUMAQ
+static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
+#endif
+
+static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
+{
+ int twister = 0, node = 0;
+ int i, bus, num_buses;
+
+ for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
+ if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
+ twister = rio_devs[i]->owner_id;
+ break;
+ }
+ }
+ if (i == rio_table_hdr->num_rio_dev) {
+ printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
+ return last_bus;
+ }
+
+ for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
+ if (scal_devs[i]->node_id == twister) {
+ node = scal_devs[i]->node_id;
+ break;
+ }
+ }
+ if (i == rio_table_hdr->num_scal_dev) {
+ printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
+ return last_bus;
+ }
+
+ switch (rio_devs[wpeg_num]->type) {
+ case CompatWPEG:
+ /*
+ * The Compatibility Winnipeg controls the 2 legacy buses,
+ * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
+ * a PCI-PCI bridge card is used in either slot: total 5 buses.
+ */
+ num_buses = 5;
+ break;
+ case AltWPEG:
+ /*
+ * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
+ * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
+ * the "extra" buses for each of those slots: total 7 buses.
+ */
+ num_buses = 7;
+ break;
+ case LookOutAWPEG:
+ case LookOutBWPEG:
+ /*
+ * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
+ * & the "extra" buses for each of those slots: total 9 buses.
+ */
+ num_buses = 9;
+ break;
+ default:
+ printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
+ return last_bus;
+ }
+
+ for (bus = last_bus; bus < last_bus + num_buses; bus++)
+ mp_bus_id_to_node[bus] = node;
+ return bus;
+}
+
+static int __init build_detail_arrays(void)
+{
+ unsigned long ptr;
+ int i, scal_detail_size, rio_detail_size;
+
+ if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
+ printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+ return 0;
+ }
+
+ switch (rio_table_hdr->version) {
+ default:
+ printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
+ return 0;
+ case 2:
+ scal_detail_size = 11;
+ rio_detail_size = 13;
+ break;
+ case 3:
+ scal_detail_size = 12;
+ rio_detail_size = 15;
+ break;
+ }
+
+ ptr = (unsigned long)rio_table_hdr + 3;
+ for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
+ scal_devs[i] = (struct scal_detail *)ptr;
+
+ for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
+ rio_devs[i] = (struct rio_detail *)ptr;
+
+ return 1;
+}
+
+void __init setup_summit(void)
+{
+ unsigned long ptr;
+ unsigned short offset;
+ int i, next_wpeg, next_bus = 0;
+
+ /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
+ ptr = get_bios_ebda();
+ ptr = (unsigned long)phys_to_virt(ptr);
+
+ rio_table_hdr = NULL;
+ offset = 0x180;
+ while (offset) {
+ /* The block id is stored in the 2nd word */
+ if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
+ /* set the pointer past the offset & block id */
+ rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
+ break;
+ }
+ /* The next offset is stored in the 1st word. 0 means no more */
+ offset = *((unsigned short *)(ptr + offset));
+ }
+ if (!rio_table_hdr) {
+ printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
+ return;
+ }
+
+ if (!build_detail_arrays())
+ return;
+
+ /* The first Winnipeg we're looking for has an index of 0 */
+ next_wpeg = 0;
+ do {
+ for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
+ if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
+ /* It's the Winnipeg we're looking for! */
+ next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
+ next_wpeg++;
+ break;
+ }
+ }
+ /*
+ * If we go through all Rio devices and don't find one with
+ * the next index, it means we've found all the Winnipegs,
+ * and thus all the PCI buses.
+ */
+ if (i == rio_table_hdr->num_rio_dev)
+ next_wpeg = 0;
+ } while (next_wpeg != 0);
+}
+#endif
+
+struct apic apic_summit = {
+
+ .name = "summit",
+ .probe = probe_summit,
+ .acpi_madt_oem_check = summit_acpi_madt_oem_check,
+ .apic_id_registered = summit_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* logical delivery broadcast to all CPUs: */
+ .irq_dest_mode = 1,
+
+ .target_cpus = summit_target_cpus,
+ .disable_esr = 1,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = summit_check_apicid_used,
+ .check_apicid_present = summit_check_apicid_present,
+
+ .vector_allocation_domain = summit_vector_allocation_domain,
+ .init_apic_ldr = summit_init_apic_ldr,
+
+ .ioapic_phys_id_map = summit_ioapic_phys_id_map,
+ .setup_apic_routing = summit_setup_apic_routing,
+ .multi_timer_check = NULL,
+ .apicid_to_node = summit_apicid_to_node,
+ .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
+ .cpu_present_to_apicid = summit_cpu_present_to_apicid,
+ .apicid_to_cpu_present = summit_apicid_to_cpu_present,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = summit_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = summit_phys_pkg_id,
+ .mps_oem_check = summit_mps_oem_check,
+
+ .get_apic_id = summit_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0xFF << 24,
+
+ .cpu_mask_to_apicid = summit_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = summit_send_IPI_mask,
+ .send_IPI_mask_allbutself = NULL,
+ .send_IPI_allbutself = summit_send_IPI_allbutself,
+ .send_IPI_all = summit_send_IPI_all,
+ .send_IPI_self = default_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+ .wait_for_init_deassert = default_wait_for_init_deassert,
+
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
new file mode 100644
index 0000000..354b9c4
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -0,0 +1,240 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return x2apic_enabled();
+}
+
+/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+
+static const struct cpumask *x2apic_target_cpus(void)
+{
+ return cpumask_of(0);
+}
+
+/*
+ * for now each logical cpu is in its own vector allocation domain.
+ */
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+static void
+ __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
+{
+ unsigned long cfg;
+
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * send the IPI.
+ */
+ native_x2apic_icr_write(cfg, apicid);
+}
+
+/*
+ * for now, we send the IPI's one by one in the cpumask.
+ * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
+ * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+ * writes.
+ */
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ unsigned long query_cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
+ }
+ local_irq_restore(flags);
+}
+
+static void
+ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ unsigned long this_cpu = smp_processor_id();
+ unsigned long query_cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ if (query_cpu == this_cpu)
+ continue;
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
+ }
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ unsigned long this_cpu = smp_processor_id();
+ unsigned long query_cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_online_cpu(query_cpu) {
+ if (query_cpu == this_cpu)
+ continue;
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
+ }
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+ x2apic_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+ return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ /*
+ * We're using fixed IRQ delivery, can only return one logical APIC ID.
+ * May as well be the first.
+ */
+ int cpu = cpumask_first(cpumask);
+
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int
+x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one logical APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ return BAD_APICID;
+}
+
+static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ id = x;
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = id;
+ return x;
+}
+
+static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
+{
+ return current_cpu_data.initial_apicid >> index_msb;
+}
+
+static void x2apic_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+
+static void init_x2apic_ldr(void)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
+}
+
+struct apic apic_x2apic_cluster = {
+
+ .name = "cluster x2apic",
+ .probe = NULL,
+ .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .apic_id_registered = x2apic_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ .irq_dest_mode = 1, /* logical */
+
+ .target_cpus = x2apic_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .init_apic_ldr = init_x2apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .apicid_to_node = NULL,
+ .cpu_to_logical_apicid = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = x2apic_cluster_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = x2apic_cluster_phys_get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xFFFFFFFFu,
+
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_self = x2apic_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = native_x2apic_icr_read,
+ .icr_write = native_x2apic_icr_write,
+ .wait_icr_idle = native_x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
new file mode 100644
index 0000000..5bcb174
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -0,0 +1,229 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+int x2apic_phys;
+
+static int set_x2apic_phys_mode(char *arg)
+{
+ x2apic_phys = 1;
+ return 0;
+}
+early_param("x2apic_phys", set_x2apic_phys_mode);
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (x2apic_phys)
+ return x2apic_enabled();
+ else
+ return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+
+static const struct cpumask *x2apic_target_cpus(void)
+{
+ return cpumask_of(0);
+}
+
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+ unsigned int dest)
+{
+ unsigned long cfg;
+
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * send the IPI.
+ */
+ native_x2apic_icr_write(cfg, apicid);
+}
+
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ unsigned long query_cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+static void
+ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ unsigned long this_cpu = smp_processor_id();
+ unsigned long query_cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ if (query_cpu != this_cpu)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ unsigned long this_cpu = smp_processor_id();
+ unsigned long query_cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_online_cpu(query_cpu) {
+ if (query_cpu == this_cpu)
+ continue;
+ __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+ x2apic_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+ return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ int cpu = cpumask_first(cpumask);
+
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int
+x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+
+ return BAD_APICID;
+}
+
+static unsigned int x2apic_phys_get_apic_id(unsigned long x)
+{
+ return x;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ return id;
+}
+
+static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
+{
+ return current_cpu_data.initial_apicid >> index_msb;
+}
+
+static void x2apic_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+
+static void init_x2apic_ldr(void)
+{
+}
+
+struct apic apic_x2apic_phys = {
+
+ .name = "physical x2apic",
+ .probe = NULL,
+ .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .apic_id_registered = x2apic_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ .irq_dest_mode = 0, /* physical */
+
+ .target_cpus = x2apic_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = 0,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .init_apic_ldr = init_x2apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .apicid_to_node = NULL,
+ .cpu_to_logical_apicid = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = x2apic_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = x2apic_phys_get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xFFFFFFFFu,
+
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_self = x2apic_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = native_x2apic_icr_read,
+ .icr_write = native_x2apic_icr_write,
+ .wait_icr_idle = native_x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
similarity index 84%
rename from arch/x86/kernel/genx2apic_uv_x.c
rename to arch/x86/kernel/apic/x2apic_uv_x.c
index b193e08..20b4ad0 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -22,9 +22,10 @@
#include <linux/proc_fs.h>
#include <asm/current.h>
#include <asm/smp.h>
+#include <asm/apic.h>
#include <asm/ipi.h>
-#include <asm/genapic.h>
#include <asm/pgtable.h>
+#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/bios.h>
@@ -113,16 +114,16 @@
static void uv_send_IPI_one(int cpu, int vector)
{
- unsigned long val, apicid, lapicid;
+ unsigned long val, apicid;
int pnode;
apicid = per_cpu(x86_cpu_to_apicid, cpu);
- lapicid = apicid & 0x3f; /* ZZZ macro needed */
pnode = uv_apicid_to_pnode(apicid);
- val =
- (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
- UVH_IPI_INT_APIC_ID_SHFT) |
- (vector << UVH_IPI_INT_VECTOR_SHFT);
+
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+ (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+ (vector << UVH_IPI_INT_VECTOR_SHFT);
+
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
@@ -136,22 +137,24 @@
static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
{
- unsigned int cpu;
unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
- for_each_cpu(cpu, mask)
+ for_each_cpu(cpu, mask) {
if (cpu != this_cpu)
uv_send_IPI_one(cpu, vector);
+ }
}
static void uv_send_IPI_allbutself(int vector)
{
- unsigned int cpu;
unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
if (cpu != this_cpu)
uv_send_IPI_one(cpu, vector);
+ }
}
static void uv_send_IPI_all(int vector)
@@ -170,21 +173,21 @@
static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
- int cpu;
-
/*
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
- cpu = cpumask_first(cpumask);
+ int cpu = cpumask_first(cpumask);
+
if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
-static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
+static unsigned int
+uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
{
int cpu;
@@ -192,15 +195,17 @@
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
- for_each_cpu_and(cpu, cpumask, andmask)
+ for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
+ }
if (cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
+
return BAD_APICID;
}
-static unsigned int get_apic_id(unsigned long x)
+static unsigned int x2apic_get_apic_id(unsigned long x)
{
unsigned int id;
@@ -222,10 +227,10 @@
static unsigned int uv_read_apic_id(void)
{
- return get_apic_id(apic_read(APIC_ID));
+ return x2apic_get_apic_id(apic_read(APIC_ID));
}
-static unsigned int phys_pkg_id(int index_msb)
+static int uv_phys_pkg_id(int initial_apicid, int index_msb)
{
return uv_read_apic_id() >> index_msb;
}
@@ -235,26 +240,64 @@
apic_write(APIC_SELF_IPI, vector);
}
-struct genapic apic_x2apic_uv_x = {
- .name = "UV large system",
- .acpi_madt_oem_check = uv_acpi_madt_oem_check,
- .int_delivery_mode = dest_Fixed,
- .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .target_cpus = uv_target_cpus,
- .vector_allocation_domain = uv_vector_allocation_domain,
- .apic_id_registered = uv_apic_id_registered,
- .init_apic_ldr = uv_init_apic_ldr,
- .send_IPI_all = uv_send_IPI_all,
- .send_IPI_allbutself = uv_send_IPI_allbutself,
- .send_IPI_mask = uv_send_IPI_mask,
- .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
- .send_IPI_self = uv_send_IPI_self,
- .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
- .phys_pkg_id = phys_pkg_id,
- .get_apic_id = get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = (0xFFFFFFFFu),
+struct apic apic_x2apic_uv_x = {
+
+ .name = "UV large system",
+ .probe = NULL,
+ .acpi_madt_oem_check = uv_acpi_madt_oem_check,
+ .apic_id_registered = uv_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ .irq_dest_mode = 1, /* logical */
+
+ .target_cpus = uv_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = uv_vector_allocation_domain,
+ .init_apic_ldr = uv_init_apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .apicid_to_node = NULL,
+ .cpu_to_logical_apicid = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = uv_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = x2apic_get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xFFFFFFFFu,
+
+ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = uv_send_IPI_mask,
+ .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = uv_send_IPI_allbutself,
+ .send_IPI_all = uv_send_IPI_all,
+ .send_IPI_self = uv_send_IPI_self,
+
+ .wakeup_cpu = NULL,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = native_x2apic_icr_read,
+ .icr_write = native_x2apic_icr_write,
+ .wait_icr_idle = native_x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
};
static __cpuinit void set_x2apic_extra_bits(int pnode)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 266ec6c..10033fe 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -301,7 +301,7 @@
*/
#define APM_ZERO_SEGS
-#include "apm.h"
+#include <asm/apm.h>
/*
* Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ee4df08..fbf2f33 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -75,6 +75,7 @@
OFFSET(PT_DS, pt_regs, ds);
OFFSET(PT_ES, pt_regs, es);
OFFSET(PT_FS, pt_regs, fs);
+ OFFSET(PT_GS, pt_regs, gs);
OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
OFFSET(PT_EIP, pt_regs, ip);
OFFSET(PT_CS, pt_regs, cs);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f..8793ab3 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
#include <linux/hardirq.h>
#include <linux/suspend.h>
#include <linux/kbuild.h>
-#include <asm/pda.h>
#include <asm/processor.h>
#include <asm/segment.h>
#include <asm/thread_info.h>
@@ -48,16 +47,6 @@
#endif
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- ENTRY(kernelstack);
- ENTRY(oldrsp);
- ENTRY(pcurrent);
- ENTRY(irqcount);
- ENTRY(cpunumber);
- ENTRY(irqstackptr);
- ENTRY(data_offset);
- BLANK();
-#undef ENTRY
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82db7f4..c381330 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,5 +1,5 @@
#
-# Makefile for x86-compatible CPU details and quirks
+# Makefile for x86-compatible CPU details, features and quirks
#
# Don't trace early stages of a secondary CPU boot
@@ -22,11 +22,13 @@
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
-obj-$(CONFIG_X86_MCE) += mcheck/
-obj-$(CONFIG_MTRR) += mtrr/
-obj-$(CONFIG_CPU_FREQ) += cpufreq/
+obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
-obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
+obj-$(CONFIG_X86_MCE) += mcheck/
+obj-$(CONFIG_MTRR) += mtrr/
+obj-$(CONFIG_CPU_FREQ) += cpufreq/
+
+obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2cf2363..6882a73 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,7 +7,7 @@
#include <asm/pat.h>
#include <asm/processor.h>
-#include <mach_apic.h>
+#include <asm/apic.h>
struct cpuid_bit {
u16 feature;
@@ -69,7 +69,7 @@
*/
void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_X86_SMP
+#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
unsigned int ht_mask_width, core_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
@@ -116,22 +116,14 @@
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
-#ifdef CONFIG_X86_32
- c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
+ c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
& core_select_mask;
- c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
/*
* Reinit the apicid, now that we have extended initial_apicid.
*/
- c->apicid = phys_pkg_id(c->initial_apicid, 0);
-#else
- c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
- c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
- /*
- * Reinit the apicid, now that we have extended initial_apicid.
- */
- c->apicid = phys_pkg_id(0);
-#endif
+ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
+
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
@@ -143,37 +135,3 @@
return;
#endif
}
-
-#ifdef CONFIG_X86_PAT
-void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
-{
- if (!cpu_has_pat)
- pat_disable("PAT not supported by CPU.");
-
- switch (c->x86_vendor) {
- case X86_VENDOR_INTEL:
- /*
- * There is a known erratum on Pentium III and Core Solo
- * and Core Duo CPUs.
- * " Page with PAT set to WC while associated MTRR is UC
- * may consolidate to UC "
- * Because of this erratum, it is better to stick with
- * setting WC in MTRR rather than using PAT on these CPUs.
- *
- * Enable PAT WC only on P4, Core 2 or later CPUs.
- */
- if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
- return;
-
- pat_disable("PAT WC disabled due to known CPU erratum.");
- return;
-
- case X86_VENDOR_AMD:
- case X86_VENDOR_CENTAUR:
- case X86_VENDOR_TRANSMETA:
- return;
- }
-
- pat_disable("PAT disabled. Not yet verified on this CPU type.");
-}
-#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c878f6..edcde52 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -12,8 +12,6 @@
# include <asm/cacheflush.h>
#endif
-#include <mach_apic.h>
-
#include "cpu.h"
#ifdef CONFIG_X86_32
@@ -370,6 +368,10 @@
if (c->x86 >= 6)
set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
+ /* Enable Performance counter for K7 and later */
+ if (c->x86 > 6 && c->x86 <= 0x11)
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+
if (!c->x86_model_id[0]) {
switch (c->x86) {
case 0xf:
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 83492b1..b66af09 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -17,18 +17,19 @@
#include <asm/mmu_context.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
+#include <asm/perf_counter.h>
#include <asm/pat.h>
#include <asm/asm.h>
#include <asm/numa.h>
#include <asm/smp.h>
-#ifdef CONFIG_X86_LOCAL_APIC
-#include <asm/mpspec.h>
+#include <asm/cpu.h>
+#include <asm/cpumask.h>
#include <asm/apic.h>
-#include <mach_apic.h>
-#include <asm/genapic.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/uv/uv.h>
#endif
-#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/desc.h>
@@ -37,6 +38,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
+#include <asm/stackprotector.h>
#include "cpu.h"
@@ -50,6 +52,15 @@
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
+/* correctly size the local cpu masks */
+void __init setup_cpu_local_masks(void)
+{
+ alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+ alloc_bootmem_cpumask_var(&cpu_callin_mask);
+ alloc_bootmem_cpumask_var(&cpu_callout_mask);
+ alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
#else /* CONFIG_X86_32 */
cpumask_t cpu_callin_map;
@@ -62,23 +73,23 @@
static struct cpu_dev *this_cpu __cpuinitdata;
+DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
-/* We need valid kernel segments for data and code in long mode too
- * IRET will check the segment types kkeil 2000/10/28
- * Also sysret mandates a special GDT layout
- */
-/* The TLS descriptors are currently at a different place compared to i386.
- Hopefully nobody expects them at a fixed place (Wine?) */
-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+ /*
+ * We need valid kernel segments for data and code in long mode too
+ * IRET will check the segment types kkeil 2000/10/28
+ * Also sysret mandates a special GDT layout
+ *
+ * The TLS descriptors are currently at a different place compared to i386.
+ * Hopefully nobody expects them at a fixed place (Wine?)
+ */
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
-} };
#else
-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -110,9 +121,10 @@
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
- [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
-} };
+ [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
+ GDT_STACK_CANARY_INIT
#endif
+} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
#ifdef CONFIG_X86_32
@@ -213,6 +225,49 @@
#endif
/*
+ * Some CPU features depend on higher CPUID levels, which may not always
+ * be available due to CPUID level capping or broken virtualization
+ * software. Add those features to this table to auto-disable them.
+ */
+struct cpuid_dependent_feature {
+ u32 feature;
+ u32 level;
+};
+static const struct cpuid_dependent_feature __cpuinitconst
+cpuid_dependent_features[] = {
+ { X86_FEATURE_MWAIT, 0x00000005 },
+ { X86_FEATURE_DCA, 0x00000009 },
+ { X86_FEATURE_XSAVE, 0x0000000d },
+ { 0, 0 }
+};
+
+static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
+{
+ const struct cpuid_dependent_feature *df;
+ for (df = cpuid_dependent_features; df->feature; df++) {
+ /*
+ * Note: cpuid_level is set to -1 if unavailable, but
+ * extended_extended_level is set to 0 if unavailable
+ * and the legitimate extended levels are all negative
+ * when signed; hence the weird messing around with
+ * signs here...
+ */
+ if (cpu_has(c, df->feature) &&
+ ((s32)df->level < 0 ?
+ (u32)df->level > (u32)c->extended_cpuid_level :
+ (s32)df->level > (s32)c->cpuid_level)) {
+ clear_cpu_cap(c, df->feature);
+ if (warn)
+ printk(KERN_WARNING
+ "CPU: CPU feature %s disabled "
+ "due to lack of CPUID level 0x%x\n",
+ x86_cap_flags[df->feature],
+ df->level);
+ }
+ }
+}
+
+/*
* Naming convention should be: <Name> [(<Codename>)]
* This table only is used unless init_<vendor>() below doesn't set it;
* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
@@ -242,18 +297,29 @@
__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+void load_percpu_segment(int cpu)
+{
+#ifdef CONFIG_X86_32
+ loadsegment(fs, __KERNEL_PERCPU);
+#else
+ loadsegment(gs, 0);
+ wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+#endif
+ load_stack_canary_segment();
+}
+
/* Current gdt points %fs at the "master" per-cpu area: after this,
* it's on the real one. */
-void switch_to_new_gdt(void)
+void switch_to_new_gdt(int cpu)
{
struct desc_ptr gdt_descr;
- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+ gdt_descr.address = (long)get_cpu_gdt_table(cpu);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
-#ifdef CONFIG_X86_32
- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
-#endif
+ /* Reload the per-cpu base */
+
+ load_percpu_segment(cpu);
}
static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@@ -383,11 +449,7 @@
}
index_msb = get_count_order(smp_num_siblings);
-#ifdef CONFIG_X86_64
- c->phys_proc_id = phys_pkg_id(index_msb);
-#else
- c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
-#endif
+ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
smp_num_siblings = smp_num_siblings / c->x86_max_cores;
@@ -395,13 +457,8 @@
core_bits = get_count_order(c->x86_max_cores);
-#ifdef CONFIG_X86_64
- c->cpu_core_id = phys_pkg_id(index_msb) &
+ c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
((1 << core_bits) - 1);
-#else
- c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
- ((1 << core_bits) - 1);
-#endif
}
out:
@@ -570,11 +627,10 @@
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
- validate_pat_support(c);
-
#ifdef CONFIG_SMP
c->cpu_index = boot_cpu_id;
#endif
+ filter_cpuid_features(c, false);
}
void __init early_cpu_init(void)
@@ -637,7 +693,7 @@
c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
#ifdef CONFIG_X86_32
# ifdef CONFIG_X86_HT
- c->apicid = phys_pkg_id(c->initial_apicid, 0);
+ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
# else
c->apicid = c->initial_apicid;
# endif
@@ -684,7 +740,7 @@
this_cpu->c_identify(c);
#ifdef CONFIG_X86_64
- c->apicid = phys_pkg_id(0);
+ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif
/*
@@ -708,6 +764,9 @@
* we do "generic changes."
*/
+ /* Filter out anything that depends on CPUID levels we don't have */
+ filter_cpuid_features(c, true);
+
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
char *p;
@@ -772,6 +831,7 @@
#else
vgetcpu_set_mode();
#endif
+ init_hw_perf_counters();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -877,54 +937,22 @@
__setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
-struct x8664_pda **_cpu_pda __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
-
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+DEFINE_PER_CPU_FIRST(union irq_stack_union,
+ irq_stack_union) __aligned(PAGE_SIZE);
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
-void __cpuinit pda_init(int cpu)
-{
- struct x8664_pda *pda = cpu_pda(cpu);
+DEFINE_PER_CPU(unsigned long, kernel_stack) =
+ (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(kernel_stack);
- /* Setup up data that may be needed in __get_free_pages early */
- loadsegment(fs, 0);
- loadsegment(gs, 0);
- /* Memory clobbers used to order PDA accessed */
- mb();
- wrmsrl(MSR_GS_BASE, pda);
- mb();
+DEFINE_PER_CPU(unsigned int, irq_count) = -1;
- pda->cpunumber = cpu;
- pda->irqcount = -1;
- pda->kernelstack = (unsigned long)stack_thread_info() -
- PDA_STACKOFFSET + THREAD_SIZE;
- pda->active_mm = &init_mm;
- pda->mmu_state = 0;
-
- if (cpu == 0) {
- /* others are initialized in smpboot.c */
- pda->pcurrent = &init_task;
- pda->irqstackptr = boot_cpu_stack;
- pda->irqstackptr += IRQSTACKSIZE - 64;
- } else {
- if (!pda->irqstackptr) {
- pda->irqstackptr = (char *)
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
- if (!pda->irqstackptr)
- panic("cannot allocate irqstack for cpu %d",
- cpu);
- pda->irqstackptr += IRQSTACKSIZE - 64;
- }
-
- if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
- pda->nodenumber = cpu_to_node(cpu);
- }
-}
-
-static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
- DEBUG_STKSZ] __page_aligned_bss;
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
+ __aligned(PAGE_SIZE);
extern asmlinkage void ignore_sysret(void);
@@ -957,16 +985,21 @@
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
-#else
+#else /* x86_64 */
-/* Make sure %fs is initialized properly in idle threads */
+#ifdef CONFIG_CC_STACKPROTECTOR
+DEFINE_PER_CPU(unsigned long, stack_canary);
+#endif
+
+/* Make sure %fs and %gs are initialized properly in idle threads */
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU;
+ regs->gs = __KERNEL_STACK_CANARY;
return regs;
}
-#endif
+#endif /* x86_64 */
/*
* cpu_init() initializes state that is per-CPU. Some data is already
@@ -982,15 +1015,14 @@
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
unsigned long v;
- char *estacks = NULL;
struct task_struct *me;
int i;
- /* CPU 0 is initialised in head64.c */
- if (cpu != 0)
- pda_init(cpu);
- else
- estacks = boot_exception_stacks;
+#ifdef CONFIG_NUMA
+ if (cpu != 0 && percpu_read(node_number) == 0 &&
+ cpu_to_node(cpu) != NUMA_NO_NODE)
+ percpu_write(node_number, cpu_to_node(cpu));
+#endif
me = current;
@@ -1006,7 +1038,9 @@
* and set up the GDT descriptor:
*/
- switch_to_new_gdt();
+ switch_to_new_gdt(cpu);
+ loadsegment(fs, 0);
+
load_idt((const struct desc_ptr *)&idt_descr);
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1017,25 +1051,20 @@
barrier();
check_efer();
- if (cpu != 0 && x2apic)
+ if (cpu != 0)
enable_x2apic();
/*
* set up and load the per-CPU TSS
*/
if (!orig_ist->ist[0]) {
- static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+ static const unsigned int sizes[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
};
+ char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
- if (cpu) {
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
- if (!estacks)
- panic("Cannot allocate exception "
- "stack %ld %d\n", v, cpu);
- }
- estacks += PAGE_SIZE << order[v];
+ estacks += sizes[v];
orig_ist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
@@ -1069,22 +1098,19 @@
*/
if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
arch_kgdb_ops.correct_hw_break();
- else {
+ else
#endif
- /*
- * Clear all 6 debug registers:
- */
-
- set_debugreg(0UL, 0);
- set_debugreg(0UL, 1);
- set_debugreg(0UL, 2);
- set_debugreg(0UL, 3);
- set_debugreg(0UL, 6);
- set_debugreg(0UL, 7);
-#ifdef CONFIG_KGDB
- /* If the kgdb is connected no debug regs should be altered. */
+ {
+ /*
+ * Clear all 6 debug registers:
+ */
+ set_debugreg(0UL, 0);
+ set_debugreg(0UL, 1);
+ set_debugreg(0UL, 2);
+ set_debugreg(0UL, 3);
+ set_debugreg(0UL, 6);
+ set_debugreg(0UL, 7);
}
-#endif
fpu_init();
@@ -1114,7 +1140,7 @@
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
load_idt(&idt_descr);
- switch_to_new_gdt();
+ switch_to_new_gdt(cpu);
/*
* Set up and load the per-CPU TSS and LDT
@@ -1135,9 +1161,6 @@
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
#endif
- /* Clear %gs. */
- asm volatile ("mov %0, %%gs" : : "r" (0));
-
/* Clear all 6 debug registers: */
set_debugreg(0, 0);
set_debugreg(0, 1);
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c2f930d..41ab3f0 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -204,12 +204,12 @@
}
/* Enable Enhanced PowerSaver */
rdmsrl(MSR_IA32_MISC_ENABLE, val);
- if (!(val & 1 << 16)) {
- val |= 1 << 16;
+ if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
wrmsrl(MSR_IA32_MISC_ENABLE, val);
/* Can be locked at 0 */
rdmsrl(MSR_IA32_MISC_ENABLE, val);
- if (!(val & 1 << 16)) {
+ if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
return -ENODEV;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index f089982..c9f1fdc 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -390,14 +390,14 @@
enable it if not. */
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- if (!(l & (1<<16))) {
- l |= (1<<16);
+ if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
wrmsr(MSR_IA32_MISC_ENABLE, l, h);
/* check to see if it stuck */
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- if (!(l & (1<<16))) {
+ if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
printk(KERN_INFO PFX
"couldn't enable Enhanced SpeedStep\n");
return -ENODEV;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a..25c559b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -24,7 +24,6 @@
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
-#include <mach_apic.h>
#endif
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
@@ -63,6 +62,18 @@
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
}
+ /*
+ * There is a known erratum on Pentium III and Core Solo
+ * and Core Duo CPUs.
+ * " Page with PAT set to WC while associated MTRR is UC
+ * may consolidate to UC "
+ * Because of this erratum, it is better to stick with
+ * setting WC in MTRR rather than using PAT on these CPUs.
+ *
+ * Enable PAT WC only on P4, Core 2 or later CPUs.
+ */
+ if (c->x86 == 6 && c->x86_model < 15)
+ clear_cpu_cap(c, X86_FEATURE_PAT);
}
#ifdef CONFIG_X86_32
@@ -135,10 +146,10 @@
*/
if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
- if ((lo & (1<<9)) == 0) {
+ if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) {
printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
- lo |= (1<<9); /* Disable hw prefetching */
+ lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
}
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index da299eb..7293508 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -147,7 +147,16 @@
union _cpuid4_leaf_ecx ecx;
unsigned long size;
unsigned long can_disable;
- cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
+ DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
+};
+
+/* subset of above _cpuid4_info w/o shared_cpu_map */
+struct _cpuid4_info_regs {
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
+ unsigned long size;
+ unsigned long can_disable;
};
#ifdef CONFIG_PCI
@@ -278,7 +287,7 @@
}
static void __cpuinit
-amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
{
if (index < 3)
return;
@@ -286,7 +295,8 @@
}
static int
-__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+__cpuinit cpuid4_cache_lookup_regs(int index,
+ struct _cpuid4_info_regs *this_leaf)
{
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
@@ -314,6 +324,15 @@
return 0;
}
+static int
+__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+{
+ struct _cpuid4_info_regs *leaf_regs =
+ (struct _cpuid4_info_regs *)this_leaf;
+
+ return cpuid4_cache_lookup_regs(index, leaf_regs);
+}
+
static int __cpuinit find_num_cache_leaves(void)
{
unsigned int eax, ebx, ecx, edx;
@@ -353,11 +372,10 @@
* parameters cpuid leaf to find the cache details
*/
for (i = 0; i < num_cache_leaves; i++) {
- struct _cpuid4_info this_leaf;
-
+ struct _cpuid4_info_regs this_leaf;
int retval;
- retval = cpuid4_cache_lookup(i, &this_leaf);
+ retval = cpuid4_cache_lookup_regs(i, &this_leaf);
if (retval >= 0) {
switch(this_leaf.eax.split.level) {
case 1:
@@ -506,17 +524,20 @@
num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
if (num_threads_sharing == 1)
- cpu_set(cpu, this_leaf->shared_cpu_map);
+ cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
else {
index_msb = get_count_order(num_threads_sharing);
for_each_online_cpu(i) {
if (cpu_data(i).apicid >> index_msb ==
c->apicid >> index_msb) {
- cpu_set(i, this_leaf->shared_cpu_map);
+ cpumask_set_cpu(i,
+ to_cpumask(this_leaf->shared_cpu_map));
if (i != cpu && per_cpu(cpuid4_info, i)) {
- sibling_leaf = CPUID4_INFO_IDX(i, index);
- cpu_set(cpu, sibling_leaf->shared_cpu_map);
+ sibling_leaf =
+ CPUID4_INFO_IDX(i, index);
+ cpumask_set_cpu(cpu, to_cpumask(
+ sibling_leaf->shared_cpu_map));
}
}
}
@@ -528,9 +549,10 @@
int sibling;
this_leaf = CPUID4_INFO_IDX(cpu, index);
- for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
+ for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
sibling_leaf = CPUID4_INFO_IDX(sibling, index);
- cpu_clear(cpu, sibling_leaf->shared_cpu_map);
+ cpumask_clear_cpu(cpu,
+ to_cpumask(sibling_leaf->shared_cpu_map));
}
}
#else
@@ -635,8 +657,9 @@
int n = 0;
if (len > 1) {
- cpumask_t *mask = &this_leaf->shared_cpu_map;
+ const struct cpumask *mask;
+ mask = to_cpumask(this_leaf->shared_cpu_map);
n = type?
cpulist_scnprintf(buf, len-2, mask) :
cpumask_scnprintf(buf, len-2, mask);
@@ -699,7 +722,8 @@
static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
{
- int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
+ int node = cpu_to_node(cpumask_first(mask));
struct pci_dev *dev = NULL;
ssize_t ret = 0;
int i;
@@ -733,7 +757,8 @@
store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
size_t count)
{
- int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
+ int node = cpu_to_node(cpumask_first(mask));
struct pci_dev *dev = NULL;
unsigned int ret, index, val;
@@ -878,7 +903,7 @@
return -ENOMEM;
}
-static cpumask_t cache_dev_map = CPU_MASK_NONE;
+static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
/* Add/Remove cache interface for CPU device */
static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -918,7 +943,7 @@
}
kobject_uevent(&(this_object->kobj), KOBJ_ADD);
}
- cpu_set(cpu, cache_dev_map);
+ cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
return 0;
@@ -931,9 +956,9 @@
if (per_cpu(cpuid4_info, cpu) == NULL)
return;
- if (!cpu_isset(cpu, cache_dev_map))
+ if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
return;
- cpu_clear(cpu, cache_dev_map);
+ cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
for (i = 0; i < num_cache_leaves; i++)
kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index f2ee0ae..9817506 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@
struct threshold_bank {
struct kobject *kobj;
struct threshold_block *blocks;
- cpumask_t cpus;
+ cpumask_var_t cpus;
};
static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
@@ -481,7 +481,7 @@
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
- i = first_cpu(per_cpu(cpu_core_map, cpu));
+ i = cpumask_first(&per_cpu(cpu_core_map, cpu));
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -501,7 +501,7 @@
if (err)
goto out;
- b->cpus = per_cpu(cpu_core_map, cpu);
+ cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
}
@@ -512,15 +512,20 @@
err = -ENOMEM;
goto out;
}
+ if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
+ kfree(b);
+ err = -ENOMEM;
+ goto out;
+ }
b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
if (!b->kobj)
goto out_free;
#ifndef CONFIG_SMP
- b->cpus = CPU_MASK_ALL;
+ cpumask_setall(b->cpus);
#else
- b->cpus = per_cpu(cpu_core_map, cpu);
+ cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
#endif
per_cpu(threshold_banks, cpu)[bank] = b;
@@ -529,7 +534,7 @@
if (err)
goto out_free;
- for_each_cpu_mask_nr(i, b->cpus) {
+ for_each_cpu(i, b->cpus) {
if (i == cpu)
continue;
@@ -545,6 +550,7 @@
out_free:
per_cpu(threshold_banks, cpu)[bank] = NULL;
+ free_cpumask_var(b->cpus);
kfree(b);
out:
return err;
@@ -619,7 +625,7 @@
#endif
/* remove all sibling symlinks before unregistering */
- for_each_cpu_mask_nr(i, b->cpus) {
+ for_each_cpu(i, b->cpus) {
if (i == cpu)
continue;
@@ -632,6 +638,7 @@
free_out:
kobject_del(b->kobj);
kobject_put(b->kobj);
+ free_cpumask_var(b->cpus);
kfree(b);
per_cpu(threshold_banks, cpu)[bank] = NULL;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index f44c366..aa5e287 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -7,6 +7,7 @@
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <asm/processor.h>
+#include <asm/apic.h>
#include <asm/msr.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
@@ -48,13 +49,13 @@
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
- if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
+ if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
return;
}
- if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+ if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
tm2 = 1;
if (h & APIC_VECTOR_MASK) {
@@ -72,7 +73,7 @@
wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
+ wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 9b60fce..f53bdcb 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -85,7 +85,7 @@
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
- if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+ if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
cpu);
return; /* -EBUSY */
@@ -111,7 +111,7 @@
vendor_thermal_interrupt = intel_thermal_interrupt;
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+ wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
new file mode 100644
index 0000000..3b65f19
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -0,0 +1,862 @@
+/*
+ * Performance counter x86 architecture code
+ *
+ * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
+ * Copyright(C) 2009 Jaswinder Singh Rajput
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_counter.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+
+#include <asm/perf_counter.h>
+#include <asm/apic.h>
+
+static bool perf_counters_initialized __read_mostly;
+
+/*
+ * Number of (generic) HW counters:
+ */
+static int nr_counters_generic __read_mostly;
+static u64 perf_counter_mask __read_mostly;
+static u64 counter_value_mask __read_mostly;
+
+static int nr_counters_fixed __read_mostly;
+
+struct cpu_hw_counters {
+ struct perf_counter *counters[X86_PMC_IDX_MAX];
+ unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long interrupts;
+ u64 global_enable;
+};
+
+/*
+ * struct pmc_x86_ops - performance counter x86 ops
+ */
+struct pmc_x86_ops {
+ u64 (*save_disable_all)(void);
+ void (*restore_all)(u64 ctrl);
+ unsigned eventsel;
+ unsigned perfctr;
+ int (*event_map)(int event);
+ int max_events;
+};
+
+static struct pmc_x86_ops *pmc_ops;
+
+static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
+
+/*
+ * Intel PerfMon v3. Used on Core2 and later.
+ */
+static const int intel_perfmon_event_map[] =
+{
+ [PERF_COUNT_CPU_CYCLES] = 0x003c,
+ [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
+ [PERF_COUNT_CACHE_MISSES] = 0x412e,
+ [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_BUS_CYCLES] = 0x013c,
+};
+
+static int pmc_intel_event_map(int event)
+{
+ return intel_perfmon_event_map[event];
+}
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const int amd_perfmon_event_map[] =
+{
+ [PERF_COUNT_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_CACHE_REFERENCES] = 0x0080,
+ [PERF_COUNT_CACHE_MISSES] = 0x0081,
+ [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
+};
+
+static int pmc_amd_event_map(int event)
+{
+ return amd_perfmon_event_map[event];
+}
+
+/*
+ * Propagate counter elapsed time into the generic counter.
+ * Can only be executed on the CPU where the counter is active.
+ * Returns the delta events processed.
+ */
+static void
+x86_perf_counter_update(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, int idx)
+{
+ u64 prev_raw_count, new_raw_count, delta;
+
+ /*
+ * Careful: an NMI might modify the previous counter value.
+ *
+ * Our tactic to handle this is to first atomically read and
+ * exchange a new raw count - then add that new-prev delta
+ * count to the generic counter atomically:
+ */
+again:
+ prev_raw_count = atomic64_read(&hwc->prev_count);
+ rdmsrl(hwc->counter_base + idx, new_raw_count);
+
+ if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ goto again;
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (counter-)time and add that to the generic counter.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count, so we do that by clipping the delta to 32 bits:
+ */
+ delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
+
+ atomic64_add(delta, &counter->count);
+ atomic64_sub(delta, &hwc->period_left);
+}
+
+/*
+ * Setup the hardware configuration for a given hw_event_type
+ */
+static int __hw_perf_counter_init(struct perf_counter *counter)
+{
+ struct perf_counter_hw_event *hw_event = &counter->hw_event;
+ struct hw_perf_counter *hwc = &counter->hw;
+
+ if (unlikely(!perf_counters_initialized))
+ return -EINVAL;
+
+ /*
+ * Generate PMC IRQs:
+ * (keep 'enabled' bit clear for now)
+ */
+ hwc->config = ARCH_PERFMON_EVENTSEL_INT;
+
+ /*
+ * Count user and OS events unless requested not to.
+ */
+ if (!hw_event->exclude_user)
+ hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
+ if (!hw_event->exclude_kernel)
+ hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
+
+ /*
+ * If privileged enough, allow NMI events:
+ */
+ hwc->nmi = 0;
+ if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
+ hwc->nmi = 1;
+
+ hwc->irq_period = hw_event->irq_period;
+ /*
+ * Intel PMCs cannot be accessed sanely above 32 bit width,
+ * so we install an artificial 1<<31 period regardless of
+ * the generic counter period:
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
+ hwc->irq_period = 0x7FFFFFFF;
+
+ atomic64_set(&hwc->period_left, hwc->irq_period);
+
+ /*
+ * Raw event type provide the config in the event structure
+ */
+ if (hw_event->raw) {
+ hwc->config |= hw_event->type;
+ } else {
+ if (hw_event->type >= pmc_ops->max_events)
+ return -EINVAL;
+ /*
+ * The generic map:
+ */
+ hwc->config |= pmc_ops->event_map(hw_event->type);
+ }
+ counter->wakeup_pending = 0;
+
+ return 0;
+}
+
+static u64 pmc_intel_save_disable_all(void)
+{
+ u64 ctrl;
+
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ return ctrl;
+}
+
+static u64 pmc_amd_save_disable_all(void)
+{
+ int idx;
+ u64 val, ctrl = 0;
+
+ for (idx = 0; idx < nr_counters_generic; idx++) {
+ rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+ ctrl |= (1 << idx);
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ }
+
+ return ctrl;
+}
+
+u64 hw_perf_save_disable(void)
+{
+ if (unlikely(!perf_counters_initialized))
+ return 0;
+
+ return pmc_ops->save_disable_all();
+}
+EXPORT_SYMBOL_GPL(hw_perf_save_disable);
+
+static void pmc_intel_restore_all(u64 ctrl)
+{
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+}
+
+static void pmc_amd_restore_all(u64 ctrl)
+{
+ u64 val;
+ int idx;
+
+ for (idx = 0; idx < nr_counters_generic; idx++) {
+ if (ctrl & (1 << idx)) {
+ rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+ }
+ }
+}
+
+void hw_perf_restore(u64 ctrl)
+{
+ if (unlikely(!perf_counters_initialized))
+ return;
+
+ pmc_ops->restore_all(ctrl);
+}
+EXPORT_SYMBOL_GPL(hw_perf_restore);
+
+static inline void
+__pmc_fixed_disable(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, unsigned int __idx)
+{
+ int idx = __idx - X86_PMC_IDX_FIXED;
+ u64 ctrl_val, mask;
+ int err;
+
+ mask = 0xfULL << (idx * 4);
+
+ rdmsrl(hwc->config_base, ctrl_val);
+ ctrl_val &= ~mask;
+ err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline void
+__pmc_generic_disable(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, unsigned int idx)
+{
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
+ __pmc_fixed_disable(counter, hwc, idx);
+ else
+ wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
+}
+
+static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the counter disabled in hw:
+ */
+static void
+__hw_perf_counter_set_period(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, int idx)
+{
+ s64 left = atomic64_read(&hwc->period_left);
+ s32 period = hwc->irq_period;
+ int err;
+
+ /*
+ * If we are way outside a reasoable range then just skip forward:
+ */
+ if (unlikely(left <= -period)) {
+ left = period;
+ atomic64_set(&hwc->period_left, left);
+ }
+
+ if (unlikely(left <= 0)) {
+ left += period;
+ atomic64_set(&hwc->period_left, left);
+ }
+
+ per_cpu(prev_left[idx], smp_processor_id()) = left;
+
+ /*
+ * The hw counter starts counting from this counter offset,
+ * mark it to be able to extra future deltas:
+ */
+ atomic64_set(&hwc->prev_count, (u64)-left);
+
+ err = checking_wrmsrl(hwc->counter_base + idx,
+ (u64)(-left) & counter_value_mask);
+}
+
+static inline void
+__pmc_fixed_enable(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, unsigned int __idx)
+{
+ int idx = __idx - X86_PMC_IDX_FIXED;
+ u64 ctrl_val, bits, mask;
+ int err;
+
+ /*
+ * Enable IRQ generation (0x8),
+ * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+ * if requested:
+ */
+ bits = 0x8ULL;
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+ bits |= 0x2;
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+ bits |= 0x1;
+ bits <<= (idx * 4);
+ mask = 0xfULL << (idx * 4);
+
+ rdmsrl(hwc->config_base, ctrl_val);
+ ctrl_val &= ~mask;
+ ctrl_val |= bits;
+ err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void
+__pmc_generic_enable(struct perf_counter *counter,
+ struct hw_perf_counter *hwc, int idx)
+{
+ if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
+ __pmc_fixed_enable(counter, hwc, idx);
+ else
+ wrmsr(hwc->config_base + idx,
+ hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
+}
+
+static int
+fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
+{
+ unsigned int event;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return -1;
+
+ if (unlikely(hwc->nmi))
+ return -1;
+
+ event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+
+ if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
+ return X86_PMC_IDX_FIXED_INSTRUCTIONS;
+ if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
+ return X86_PMC_IDX_FIXED_CPU_CYCLES;
+ if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
+ return X86_PMC_IDX_FIXED_BUS_CYCLES;
+
+ return -1;
+}
+
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in counter:
+ */
+static int pmc_generic_enable(struct perf_counter *counter)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct hw_perf_counter *hwc = &counter->hw;
+ int idx;
+
+ idx = fixed_mode_idx(counter, hwc);
+ if (idx >= 0) {
+ /*
+ * Try to get the fixed counter, if that is already taken
+ * then try to get a generic counter:
+ */
+ if (test_and_set_bit(idx, cpuc->used))
+ goto try_generic;
+
+ hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+ /*
+ * We set it so that counter_base + idx in wrmsr/rdmsr maps to
+ * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+ */
+ hwc->counter_base =
+ MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ hwc->idx = idx;
+ } else {
+ idx = hwc->idx;
+ /* Try to get the previous generic counter again */
+ if (test_and_set_bit(idx, cpuc->used)) {
+try_generic:
+ idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
+ if (idx == nr_counters_generic)
+ return -EAGAIN;
+
+ set_bit(idx, cpuc->used);
+ hwc->idx = idx;
+ }
+ hwc->config_base = pmc_ops->eventsel;
+ hwc->counter_base = pmc_ops->perfctr;
+ }
+
+ perf_counters_lapic_init(hwc->nmi);
+
+ __pmc_generic_disable(counter, hwc, idx);
+
+ cpuc->counters[idx] = counter;
+ /*
+ * Make it visible before enabling the hw:
+ */
+ smp_wmb();
+
+ __hw_perf_counter_set_period(counter, hwc, idx);
+ __pmc_generic_enable(counter, hwc, idx);
+
+ return 0;
+}
+
+void perf_counter_print_debug(void)
+{
+ u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+ struct cpu_hw_counters *cpuc;
+ int cpu, idx;
+
+ if (!nr_counters_generic)
+ return;
+
+ local_irq_disable();
+
+ cpu = smp_processor_id();
+ cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+ rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+
+ pr_info("\n");
+ pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
+ pr_info("CPU#%d: status: %016llx\n", cpu, status);
+ pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
+ pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
+ }
+ pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
+
+ for (idx = 0; idx < nr_counters_generic; idx++) {
+ rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
+ rdmsrl(pmc_ops->perfctr + idx, pmc_count);
+
+ prev_left = per_cpu(prev_left[idx], cpu);
+
+ pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
+ cpu, idx, pmc_ctrl);
+ pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
+ cpu, idx, pmc_count);
+ pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
+ cpu, idx, prev_left);
+ }
+ for (idx = 0; idx < nr_counters_fixed; idx++) {
+ rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+
+ pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
+ cpu, idx, pmc_count);
+ }
+ local_irq_enable();
+}
+
+static void pmc_generic_disable(struct perf_counter *counter)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct hw_perf_counter *hwc = &counter->hw;
+ unsigned int idx = hwc->idx;
+
+ __pmc_generic_disable(counter, hwc, idx);
+
+ clear_bit(idx, cpuc->used);
+ cpuc->counters[idx] = NULL;
+ /*
+ * Make sure the cleared pointer becomes visible before we
+ * (potentially) free the counter:
+ */
+ smp_wmb();
+
+ /*
+ * Drain the remaining delta count out of a counter
+ * that we are disabling:
+ */
+ x86_perf_counter_update(counter, hwc, idx);
+}
+
+static void perf_store_irq_data(struct perf_counter *counter, u64 data)
+{
+ struct perf_data *irqdata = counter->irqdata;
+
+ if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
+ irqdata->overrun++;
+ } else {
+ u64 *p = (u64 *) &irqdata->data[irqdata->len];
+
+ *p = data;
+ irqdata->len += sizeof(u64);
+ }
+}
+
+/*
+ * Save and restart an expired counter. Called by NMI contexts,
+ * so it has to be careful about preempting normal counter ops:
+ */
+static void perf_save_and_restart(struct perf_counter *counter)
+{
+ struct hw_perf_counter *hwc = &counter->hw;
+ int idx = hwc->idx;
+
+ x86_perf_counter_update(counter, hwc, idx);
+ __hw_perf_counter_set_period(counter, hwc, idx);
+
+ if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+ __pmc_generic_enable(counter, hwc, idx);
+}
+
+static void
+perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
+{
+ struct perf_counter *counter, *group_leader = sibling->group_leader;
+
+ /*
+ * Store sibling timestamps (if any):
+ */
+ list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
+
+ x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
+ perf_store_irq_data(sibling, counter->hw_event.type);
+ perf_store_irq_data(sibling, atomic64_read(&counter->count));
+ }
+}
+
+/*
+ * Maximum interrupt frequency of 100KHz per CPU
+ */
+#define PERFMON_MAX_INTERRUPTS (100000/HZ)
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
+{
+ int bit, cpu = smp_processor_id();
+ u64 ack, status;
+ struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+
+ /* Disable counters globally */
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ ack_APIC_irq();
+
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ if (!status)
+ goto out;
+
+again:
+ inc_irq_stat(apic_perf_irqs);
+ ack = status;
+ for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+ struct perf_counter *counter = cpuc->counters[bit];
+
+ clear_bit(bit, (unsigned long *) &status);
+ if (!counter)
+ continue;
+
+ perf_save_and_restart(counter);
+
+ switch (counter->hw_event.record_type) {
+ case PERF_RECORD_SIMPLE:
+ continue;
+ case PERF_RECORD_IRQ:
+ perf_store_irq_data(counter, instruction_pointer(regs));
+ break;
+ case PERF_RECORD_GROUP:
+ perf_handle_group(counter, &status, &ack);
+ break;
+ }
+ /*
+ * From NMI context we cannot call into the scheduler to
+ * do a task wakeup - but we mark these generic as
+ * wakeup_pending and initate a wakeup callback:
+ */
+ if (nmi) {
+ counter->wakeup_pending = 1;
+ set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+ } else {
+ wake_up(&counter->waitq);
+ }
+ }
+
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+
+ /*
+ * Repeat if there is more work to be done:
+ */
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ if (status)
+ goto again;
+out:
+ /*
+ * Restore - do not reenable when global enable is off or throttled:
+ */
+ if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+}
+
+void perf_counter_unthrottle(void)
+{
+ struct cpu_hw_counters *cpuc;
+ u64 global_enable;
+
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return;
+
+ if (unlikely(!perf_counters_initialized))
+ return;
+
+ cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
+ if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
+ if (printk_ratelimit())
+ printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+ }
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable);
+ if (unlikely(cpuc->global_enable && !global_enable))
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+ cpuc->interrupts = 0;
+}
+
+void smp_perf_counter_interrupt(struct pt_regs *regs)
+{
+ irq_enter();
+ apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
+ __smp_perf_counter_interrupt(regs, 0);
+
+ irq_exit();
+}
+
+/*
+ * This handler is triggered by NMI contexts:
+ */
+void perf_counter_notify(struct pt_regs *regs)
+{
+ struct cpu_hw_counters *cpuc;
+ unsigned long flags;
+ int bit, cpu;
+
+ local_irq_save(flags);
+ cpu = smp_processor_id();
+ cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+ for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
+ struct perf_counter *counter = cpuc->counters[bit];
+
+ if (!counter)
+ continue;
+
+ if (counter->wakeup_pending) {
+ counter->wakeup_pending = 0;
+ wake_up(&counter->waitq);
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+void perf_counters_lapic_init(int nmi)
+{
+ u32 apic_val;
+
+ if (!perf_counters_initialized)
+ return;
+ /*
+ * Enable the performance counter vector in the APIC LVT:
+ */
+ apic_val = apic_read(APIC_LVTERR);
+
+ apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
+ if (nmi)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ else
+ apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
+ apic_write(APIC_LVTERR, apic_val);
+}
+
+static int __kprobes
+perf_counter_nmi_handler(struct notifier_block *self,
+ unsigned long cmd, void *__args)
+{
+ struct die_args *args = __args;
+ struct pt_regs *regs;
+
+ if (likely(cmd != DIE_NMI_IPI))
+ return NOTIFY_DONE;
+
+ regs = args->regs;
+
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ __smp_perf_counter_interrupt(regs, 1);
+
+ return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
+ .notifier_call = perf_counter_nmi_handler,
+ .next = NULL,
+ .priority = 1
+};
+
+static struct pmc_x86_ops pmc_intel_ops = {
+ .save_disable_all = pmc_intel_save_disable_all,
+ .restore_all = pmc_intel_restore_all,
+ .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .event_map = pmc_intel_event_map,
+ .max_events = ARRAY_SIZE(intel_perfmon_event_map),
+};
+
+static struct pmc_x86_ops pmc_amd_ops = {
+ .save_disable_all = pmc_amd_save_disable_all,
+ .restore_all = pmc_amd_restore_all,
+ .eventsel = MSR_K7_EVNTSEL0,
+ .perfctr = MSR_K7_PERFCTR0,
+ .event_map = pmc_amd_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+};
+
+static struct pmc_x86_ops *pmc_intel_init(void)
+{
+ union cpuid10_eax eax;
+ unsigned int ebx;
+ unsigned int unused;
+ union cpuid10_edx edx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Branch Misses Retired Event or not.
+ */
+ cpuid(10, &eax.full, &ebx, &unused, &edx.full);
+ if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+ return NULL;
+
+ pr_info("Intel Performance Monitoring support detected.\n");
+ pr_info("... version: %d\n", eax.split.version_id);
+ pr_info("... bit width: %d\n", eax.split.bit_width);
+ pr_info("... mask length: %d\n", eax.split.mask_length);
+
+ nr_counters_generic = eax.split.num_counters;
+ nr_counters_fixed = edx.split.num_counters_fixed;
+ counter_value_mask = (1ULL << eax.split.bit_width) - 1;
+
+ return &pmc_intel_ops;
+}
+
+static struct pmc_x86_ops *pmc_amd_init(void)
+{
+ nr_counters_generic = 4;
+ nr_counters_fixed = 0;
+
+ pr_info("AMD Performance Monitoring support detected.\n");
+
+ return &pmc_amd_ops;
+}
+
+void __init init_hw_perf_counters(void)
+{
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return;
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ pmc_ops = pmc_intel_init();
+ break;
+ case X86_VENDOR_AMD:
+ pmc_ops = pmc_amd_init();
+ break;
+ }
+ if (!pmc_ops)
+ return;
+
+ pr_info("... num counters: %d\n", nr_counters_generic);
+ if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
+ nr_counters_generic = X86_PMC_MAX_GENERIC;
+ WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
+ nr_counters_generic, X86_PMC_MAX_GENERIC);
+ }
+ perf_counter_mask = (1 << nr_counters_generic) - 1;
+ perf_max_counters = nr_counters_generic;
+
+ pr_info("... value mask: %016Lx\n", counter_value_mask);
+
+ if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
+ nr_counters_fixed = X86_PMC_MAX_FIXED;
+ WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
+ nr_counters_fixed, X86_PMC_MAX_FIXED);
+ }
+ pr_info("... fixed counters: %d\n", nr_counters_fixed);
+
+ perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
+
+ pr_info("... counter mask: %016Lx\n", perf_counter_mask);
+ perf_counters_initialized = true;
+
+ perf_counters_lapic_init(0);
+ register_die_notifier(&perf_counter_nmi_notifier);
+}
+
+static void pmc_generic_read(struct perf_counter *counter)
+{
+ x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
+}
+
+static const struct hw_perf_counter_ops x86_perf_counter_ops = {
+ .enable = pmc_generic_enable,
+ .disable = pmc_generic_disable,
+ .read = pmc_generic_read,
+};
+
+const struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter)
+{
+ int err;
+
+ err = __hw_perf_counter_init(counter);
+ if (err)
+ return NULL;
+
+ return &x86_perf_counter_ops;
+}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9abd48b..d6f5b9f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -20,7 +20,7 @@
#include <linux/kprobes.h>
#include <asm/apic.h>
-#include <asm/intel_arch_perfmon.h>
+#include <asm/perf_counter.h>
struct nmi_watchdog_ctlblk {
unsigned int cccr_msr;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c689d19..ff95824 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,12 +24,10 @@
#include <asm/apic.h>
#include <asm/hpet.h>
#include <linux/kdebug.h>
-#include <asm/smp.h>
+#include <asm/cpu.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
-#include <mach_ipi.h>
-
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6b1f6f6..87d103d 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -99,7 +99,7 @@
frame = frame->next_frame;
bp = (unsigned long) frame;
} else {
- ops->address(data, addr, bp == 0);
+ ops->address(data, addr, 0);
}
print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d07..d35db59 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
- unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+ unsigned long *irq_stack_end =
+ (unsigned long *)per_cpu(irq_stack_ptr, cpu);
unsigned used = 0;
struct thread_info *tinfo;
int graph = 0;
@@ -160,23 +161,23 @@
stack = (unsigned long *) estack_end[-2];
continue;
}
- if (irqstack_end) {
- unsigned long *irqstack;
- irqstack = irqstack_end -
- (IRQSTACKSIZE - 64) / sizeof(*irqstack);
+ if (irq_stack_end) {
+ unsigned long *irq_stack;
+ irq_stack = irq_stack_end -
+ (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
- if (stack >= irqstack && stack < irqstack_end) {
+ if (stack >= irq_stack && stack < irq_stack_end) {
if (ops->stack(data, "IRQ") < 0)
break;
bp = print_context_stack(tinfo, stack, bp,
- ops, data, irqstack_end, &graph);
+ ops, data, irq_stack_end, &graph);
/*
* We link to the next stack (which would be
* the process stack normally) the last
* pointer (index -1 to end) in the IRQ stack:
*/
- stack = (unsigned long *) (irqstack_end[-1]);
- irqstack_end = NULL;
+ stack = (unsigned long *) (irq_stack_end[-1]);
+ irq_stack_end = NULL;
ops->stack(data, "EOI");
continue;
}
@@ -199,10 +200,10 @@
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
- unsigned long *irqstack_end =
- (unsigned long *) (cpu_pda(cpu)->irqstackptr);
- unsigned long *irqstack =
- (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+ unsigned long *irq_stack_end =
+ (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
+ unsigned long *irq_stack =
+ (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
/*
* debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
- if (stack >= irqstack && stack <= irqstack_end) {
- if (stack == irqstack_end) {
- stack = (unsigned long *) (irqstack_end[-1]);
+ if (stack >= irq_stack && stack <= irq_stack_end) {
+ if (stack == irq_stack_end) {
+ stack = (unsigned long *) (irq_stack_end[-1]);
printk(" <EOI> ");
}
} else {
@@ -241,7 +242,7 @@
int i;
unsigned long sp;
const int cpu = smp_processor_id();
- struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+ struct task_struct *cur = current;
sp = regs->sp;
printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e858268..508bec1 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -858,6 +858,9 @@
*/
void __init reserve_early(u64 start, u64 end, char *name)
{
+ if (start >= end)
+ return;
+
drop_overlaps_that_are_ok(start, end);
__reserve_early(start, end, name, 0);
}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 504ad19..639ad98 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -13,8 +13,8 @@
#include <asm/setup.h>
#include <xen/hvc-console.h>
#include <asm/pci-direct.h>
-#include <asm/pgtable.h>
#include <asm/fixmap.h>
+#include <asm/pgtable.h>
#include <linux/usb/ehci_def.h>
/* Simple VGA output */
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d24..b205272 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,10 +366,12 @@
SMBIOS_TABLE_GUID)) {
efi.smbios = config_tables[i].table;
printk(" SMBIOS=0x%lx ", config_tables[i].table);
+#ifdef CONFIG_X86_UV
} else if (!efi_guidcmp(config_tables[i].guid,
UV_SYSTEM_TABLE_GUID)) {
efi.uv_systab = config_tables[i].table;
printk(" UVsystab=0x%lx ", config_tables[i].table);
+#endif
} else if (!efi_guidcmp(config_tables[i].guid,
HCDP_TABLE_GUID)) {
efi.hcdp = config_tables[i].table;
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c528..a4ee291 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -36,6 +36,7 @@
#include <asm/proto.h>
#include <asm/efi.h>
#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
static pgd_t save_pgd __initdata;
static unsigned long efi_flags __initdata;
diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S
index ef00bb7..fbe66e6 100644
--- a/arch/x86/kernel/efi_stub_32.S
+++ b/arch/x86/kernel/efi_stub_32.S
@@ -6,7 +6,7 @@
*/
#include <linux/linkage.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
/*
* efi_call_phys(void *, ...) is a function with variable parameters.
@@ -113,6 +113,7 @@
movl (%edx), %ecx
pushl %ecx
ret
+ENDPROC(efi_call_phys)
.previous
.data
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S
index 99b47d4..4c07cca 100644
--- a/arch/x86/kernel/efi_stub_64.S
+++ b/arch/x86/kernel/efi_stub_64.S
@@ -41,6 +41,7 @@
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call0)
ENTRY(efi_call1)
SAVE_XMM
@@ -50,6 +51,7 @@
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call1)
ENTRY(efi_call2)
SAVE_XMM
@@ -59,6 +61,7 @@
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call2)
ENTRY(efi_call3)
SAVE_XMM
@@ -69,6 +72,7 @@
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call3)
ENTRY(efi_call4)
SAVE_XMM
@@ -80,6 +84,7 @@
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call4)
ENTRY(efi_call5)
SAVE_XMM
@@ -92,6 +97,7 @@
addq $48, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call5)
ENTRY(efi_call6)
SAVE_XMM
@@ -107,3 +113,4 @@
addq $48, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call6)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4646902..899e893 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -30,12 +30,13 @@
* 1C(%esp) - %ds
* 20(%esp) - %es
* 24(%esp) - %fs
- * 28(%esp) - orig_eax
- * 2C(%esp) - %eip
- * 30(%esp) - %cs
- * 34(%esp) - %eflags
- * 38(%esp) - %oldesp
- * 3C(%esp) - %oldss
+ * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
+ * 2C(%esp) - orig_eax
+ * 30(%esp) - %eip
+ * 34(%esp) - %cs
+ * 38(%esp) - %eflags
+ * 3C(%esp) - %oldesp
+ * 40(%esp) - %oldss
*
* "current" is in register %ebx during any slow entries.
*/
@@ -46,7 +47,7 @@
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/desc.h>
#include <asm/percpu.h>
#include <asm/dwarf2.h>
@@ -101,121 +102,221 @@
#define resume_userspace_sig resume_userspace
#endif
-#define SAVE_ALL \
- cld; \
- pushl %fs; \
- CFI_ADJUST_CFA_OFFSET 4;\
- /*CFI_REL_OFFSET fs, 0;*/\
- pushl %es; \
- CFI_ADJUST_CFA_OFFSET 4;\
- /*CFI_REL_OFFSET es, 0;*/\
- pushl %ds; \
- CFI_ADJUST_CFA_OFFSET 4;\
- /*CFI_REL_OFFSET ds, 0;*/\
- pushl %eax; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET eax, 0;\
- pushl %ebp; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET ebp, 0;\
- pushl %edi; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET edi, 0;\
- pushl %esi; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET esi, 0;\
- pushl %edx; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET edx, 0;\
- pushl %ecx; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET ecx, 0;\
- pushl %ebx; \
- CFI_ADJUST_CFA_OFFSET 4;\
- CFI_REL_OFFSET ebx, 0;\
- movl $(__USER_DS), %edx; \
- movl %edx, %ds; \
- movl %edx, %es; \
- movl $(__KERNEL_PERCPU), %edx; \
- movl %edx, %fs
+/*
+ * User gs save/restore
+ *
+ * %gs is used for userland TLS and kernel only uses it for stack
+ * canary which is required to be at %gs:20 by gcc. Read the comment
+ * at the top of stackprotector.h for more info.
+ *
+ * Local labels 98 and 99 are used.
+ */
+#ifdef CONFIG_X86_32_LAZY_GS
-#define RESTORE_INT_REGS \
- popl %ebx; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE ebx;\
- popl %ecx; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE ecx;\
- popl %edx; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE edx;\
- popl %esi; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE esi;\
- popl %edi; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE edi;\
- popl %ebp; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE ebp;\
- popl %eax; \
- CFI_ADJUST_CFA_OFFSET -4;\
- CFI_RESTORE eax
+ /* unfortunately push/pop can't be no-op */
+.macro PUSH_GS
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+.endm
+.macro POP_GS pop=0
+ addl $(4 + \pop), %esp
+ CFI_ADJUST_CFA_OFFSET -(4 + \pop)
+.endm
+.macro POP_GS_EX
+.endm
-#define RESTORE_REGS \
- RESTORE_INT_REGS; \
-1: popl %ds; \
- CFI_ADJUST_CFA_OFFSET -4;\
- /*CFI_RESTORE ds;*/\
-2: popl %es; \
- CFI_ADJUST_CFA_OFFSET -4;\
- /*CFI_RESTORE es;*/\
-3: popl %fs; \
- CFI_ADJUST_CFA_OFFSET -4;\
- /*CFI_RESTORE fs;*/\
-.pushsection .fixup,"ax"; \
-4: movl $0,(%esp); \
- jmp 1b; \
-5: movl $0,(%esp); \
- jmp 2b; \
-6: movl $0,(%esp); \
- jmp 3b; \
-.section __ex_table,"a";\
- .align 4; \
- .long 1b,4b; \
- .long 2b,5b; \
- .long 3b,6b; \
+ /* all the rest are no-op */
+.macro PTGS_TO_GS
+.endm
+.macro PTGS_TO_GS_EX
+.endm
+.macro GS_TO_REG reg
+.endm
+.macro REG_TO_PTGS reg
+.endm
+.macro SET_KERNEL_GS reg
+.endm
+
+#else /* CONFIG_X86_32_LAZY_GS */
+
+.macro PUSH_GS
+ pushl %gs
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET gs, 0*/
+.endm
+
+.macro POP_GS pop=0
+98: popl %gs
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_RESTORE gs*/
+ .if \pop <> 0
+ add $\pop, %esp
+ CFI_ADJUST_CFA_OFFSET -\pop
+ .endif
+.endm
+.macro POP_GS_EX
+.pushsection .fixup, "ax"
+99: movl $0, (%esp)
+ jmp 98b
+.section __ex_table, "a"
+ .align 4
+ .long 98b, 99b
.popsection
+.endm
-#define RING0_INT_FRAME \
- CFI_STARTPROC simple;\
- CFI_SIGNAL_FRAME;\
- CFI_DEF_CFA esp, 3*4;\
- /*CFI_OFFSET cs, -2*4;*/\
+.macro PTGS_TO_GS
+98: mov PT_GS(%esp), %gs
+.endm
+.macro PTGS_TO_GS_EX
+.pushsection .fixup, "ax"
+99: movl $0, PT_GS(%esp)
+ jmp 98b
+.section __ex_table, "a"
+ .align 4
+ .long 98b, 99b
+.popsection
+.endm
+
+.macro GS_TO_REG reg
+ movl %gs, \reg
+ /*CFI_REGISTER gs, \reg*/
+.endm
+.macro REG_TO_PTGS reg
+ movl \reg, PT_GS(%esp)
+ /*CFI_REL_OFFSET gs, PT_GS*/
+.endm
+.macro SET_KERNEL_GS reg
+ movl $(__KERNEL_STACK_CANARY), \reg
+ movl \reg, %gs
+.endm
+
+#endif /* CONFIG_X86_32_LAZY_GS */
+
+.macro SAVE_ALL
+ cld
+ PUSH_GS
+ pushl %fs
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET fs, 0;*/
+ pushl %es
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET es, 0;*/
+ pushl %ds
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ds, 0;*/
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eax, 0
+ pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebp, 0
+ pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
+ pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
+ pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx, 0
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
+ pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
+ movl $(__USER_DS), %edx
+ movl %edx, %ds
+ movl %edx, %es
+ movl $(__KERNEL_PERCPU), %edx
+ movl %edx, %fs
+ SET_KERNEL_GS %edx
+.endm
+
+.macro RESTORE_INT_REGS
+ popl %ebx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ebx
+ popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ecx
+ popl %edx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE edx
+ popl %esi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE esi
+ popl %edi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE edi
+ popl %ebp
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ebp
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE eax
+.endm
+
+.macro RESTORE_REGS pop=0
+ RESTORE_INT_REGS
+1: popl %ds
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_RESTORE ds;*/
+2: popl %es
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_RESTORE es;*/
+3: popl %fs
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_RESTORE fs;*/
+ POP_GS \pop
+.pushsection .fixup, "ax"
+4: movl $0, (%esp)
+ jmp 1b
+5: movl $0, (%esp)
+ jmp 2b
+6: movl $0, (%esp)
+ jmp 3b
+.section __ex_table, "a"
+ .align 4
+ .long 1b, 4b
+ .long 2b, 5b
+ .long 3b, 6b
+.popsection
+ POP_GS_EX
+.endm
+
+.macro RING0_INT_FRAME
+ CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA esp, 3*4
+ /*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4
+.endm
-#define RING0_EC_FRAME \
- CFI_STARTPROC simple;\
- CFI_SIGNAL_FRAME;\
- CFI_DEF_CFA esp, 4*4;\
- /*CFI_OFFSET cs, -2*4;*/\
+.macro RING0_EC_FRAME
+ CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA esp, 4*4
+ /*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4
+.endm
-#define RING0_PTREGS_FRAME \
- CFI_STARTPROC simple;\
- CFI_SIGNAL_FRAME;\
- CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
- /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
- CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
- /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
- /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
- CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
- CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
- CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
- CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
- CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
- CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
+.macro RING0_PTREGS_FRAME
+ CFI_STARTPROC simple
+ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
+ /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
+ CFI_OFFSET eip, PT_EIP-PT_OLDESP
+ /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
+ /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
+ CFI_OFFSET eax, PT_EAX-PT_OLDESP
+ CFI_OFFSET ebp, PT_EBP-PT_OLDESP
+ CFI_OFFSET edi, PT_EDI-PT_OLDESP
+ CFI_OFFSET esi, PT_ESI-PT_OLDESP
+ CFI_OFFSET edx, PT_EDX-PT_OLDESP
+ CFI_OFFSET ecx, PT_ECX-PT_OLDESP
CFI_OFFSET ebx, PT_EBX-PT_OLDESP
+.endm
ENTRY(ret_from_fork)
CFI_STARTPROC
@@ -362,6 +463,7 @@
xorl %ebp,%ebp
TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs
+ PTGS_TO_GS
ENABLE_INTERRUPTS_SYSEXIT
#ifdef CONFIG_AUDITSYSCALL
@@ -410,6 +512,7 @@
.align 4
.long 1b,2b
.popsection
+ PTGS_TO_GS_EX
ENDPROC(ia32_sysenter_target)
# system call handler stub
@@ -452,8 +555,7 @@
restore_nocheck:
TRACE_IRQS_IRET
restore_nocheck_notrace:
- RESTORE_REGS
- addl $4, %esp # skip orig_eax/error_code
+ RESTORE_REGS 4 # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
irq_return:
INTERRUPT_RETURN
@@ -595,28 +697,50 @@
END(syscall_badsys)
CFI_ENDPROC
-#define FIXUP_ESPFIX_STACK \
- /* since we are on a wrong stack, we cant make it a C code :( */ \
- PER_CPU(gdt_page, %ebx); \
- GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
- addl %esp, %eax; \
- pushl $__KERNEL_DS; \
- CFI_ADJUST_CFA_OFFSET 4; \
- pushl %eax; \
- CFI_ADJUST_CFA_OFFSET 4; \
- lss (%esp), %esp; \
- CFI_ADJUST_CFA_OFFSET -8;
-#define UNWIND_ESPFIX_STACK \
- movl %ss, %eax; \
- /* see if on espfix stack */ \
- cmpw $__ESPFIX_SS, %ax; \
- jne 27f; \
- movl $__KERNEL_DS, %eax; \
- movl %eax, %ds; \
- movl %eax, %es; \
- /* switch to normal stack */ \
- FIXUP_ESPFIX_STACK; \
-27:;
+/*
+ * System calls that need a pt_regs pointer.
+ */
+#define PTREGSCALL(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%eax; \
+ jmp sys_##name;
+
+PTREGSCALL(iopl)
+PTREGSCALL(fork)
+PTREGSCALL(clone)
+PTREGSCALL(vfork)
+PTREGSCALL(execve)
+PTREGSCALL(sigaltstack)
+PTREGSCALL(sigreturn)
+PTREGSCALL(rt_sigreturn)
+PTREGSCALL(vm86)
+PTREGSCALL(vm86old)
+
+.macro FIXUP_ESPFIX_STACK
+ /* since we are on a wrong stack, we cant make it a C code :( */
+ PER_CPU(gdt_page, %ebx)
+ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
+ addl %esp, %eax
+ pushl $__KERNEL_DS
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ lss (%esp), %esp
+ CFI_ADJUST_CFA_OFFSET -8
+.endm
+.macro UNWIND_ESPFIX_STACK
+ movl %ss, %eax
+ /* see if on espfix stack */
+ cmpw $__ESPFIX_SS, %ax
+ jne 27f
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ /* switch to normal stack */
+ FIXUP_ESPFIX_STACK
+27:
+.endm
/*
* Build the entry stubs and pointer table with some assembler magic.
@@ -672,7 +796,7 @@
ENDPROC(common_interrupt)
CFI_ENDPROC
-#define BUILD_INTERRUPT(name, nr) \
+#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
pushl $~(nr); \
@@ -680,13 +804,15 @@
SAVE_ALL; \
TRACE_IRQS_OFF \
movl %esp,%eax; \
- call smp_##name; \
+ call fn; \
jmp ret_from_intr; \
CFI_ENDPROC; \
ENDPROC(name)
+#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
+
/* The include is where all of the SMP etc. interrupts come from */
-#include "entry_arch.h"
+#include <asm/entry_arch.h>
ENTRY(coprocessor_error)
RING0_INT_FRAME
@@ -1068,7 +1194,10 @@
CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
- /* the function address is in %fs's slot on the stack */
+ /* the function address is in %gs's slot on the stack */
+ pushl %fs
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET fs, 0*/
pushl %es
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0*/
@@ -1097,20 +1226,15 @@
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
cld
- pushl %fs
- CFI_ADJUST_CFA_OFFSET 4
- /*CFI_REL_OFFSET fs, 0*/
movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
UNWIND_ESPFIX_STACK
- popl %ecx
- CFI_ADJUST_CFA_OFFSET -4
- /*CFI_REGISTER es, ecx*/
- movl PT_FS(%esp), %edi # get the function address
+ GS_TO_REG %ecx
+ movl PT_GS(%esp), %edi # get the function address
movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
- mov %ecx, PT_FS(%esp)
- /*CFI_REL_OFFSET fs, ES*/
+ REG_TO_PTGS %ecx
+ SET_KERNEL_GS %ecx
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
@@ -1134,26 +1258,27 @@
* by hand onto the new stack - while updating the return eip past
* the instruction that would have done it for sysenter.
*/
-#define FIX_STACK(offset, ok, label) \
- cmpw $__KERNEL_CS,4(%esp); \
- jne ok; \
-label: \
- movl TSS_sysenter_sp0+offset(%esp),%esp; \
- CFI_DEF_CFA esp, 0; \
- CFI_UNDEFINED eip; \
- pushfl; \
- CFI_ADJUST_CFA_OFFSET 4; \
- pushl $__KERNEL_CS; \
- CFI_ADJUST_CFA_OFFSET 4; \
- pushl $sysenter_past_esp; \
- CFI_ADJUST_CFA_OFFSET 4; \
+.macro FIX_STACK offset ok label
+ cmpw $__KERNEL_CS, 4(%esp)
+ jne \ok
+\label:
+ movl TSS_sysenter_sp0 + \offset(%esp), %esp
+ CFI_DEF_CFA esp, 0
+ CFI_UNDEFINED eip
+ pushfl
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $__KERNEL_CS
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl $sysenter_past_esp
+ CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eip, 0
+.endm
ENTRY(debug)
RING0_INT_FRAME
cmpl $ia32_sysenter_target,(%esp)
jne debug_stack_correct
- FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+ FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
debug_stack_correct:
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
@@ -1211,7 +1336,7 @@
nmi_stack_fixup:
RING0_INT_FRAME
- FIX_STACK(12,nmi_stack_correct, 1)
+ FIX_STACK 12, nmi_stack_correct, 1
jmp nmi_stack_correct
nmi_debug_stack_check:
@@ -1222,7 +1347,7 @@
jb nmi_stack_correct
cmpl $debug_esp_fix_insn,(%esp)
ja nmi_stack_correct
- FIX_STACK(24,nmi_stack_correct, 1)
+ FIX_STACK 24, nmi_stack_correct, 1
jmp nmi_stack_correct
nmi_espfix_stack:
@@ -1234,7 +1359,7 @@
CFI_ADJUST_CFA_OFFSET 4
pushl %esp
CFI_ADJUST_CFA_OFFSET 4
- addw $4, (%esp)
+ addl $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
pushl 16(%esp)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a134621..24c7031 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -48,10 +48,11 @@
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/hw_irq.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/irqflags.h>
#include <asm/paravirt.h>
#include <asm/ftrace.h>
+#include <asm/percpu.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -76,20 +77,17 @@
movq 8(%rbp), %rsi
subq $MCOUNT_INSN_SIZE, %rdi
-.globl ftrace_call
-ftrace_call:
+GLOBAL(ftrace_call)
call ftrace_stub
MCOUNT_RESTORE_FRAME
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
+GLOBAL(ftrace_graph_call)
jmp ftrace_stub
#endif
-.globl ftrace_stub
-ftrace_stub:
+GLOBAL(ftrace_stub)
retq
END(ftrace_caller)
@@ -109,8 +107,7 @@
jnz ftrace_graph_caller
#endif
-.globl ftrace_stub
-ftrace_stub:
+GLOBAL(ftrace_stub)
retq
trace:
@@ -147,9 +144,7 @@
retq
END(ftrace_graph_caller)
-
-.globl return_to_handler
-return_to_handler:
+GLOBAL(return_to_handler)
subq $80, %rsp
movq %rax, (%rsp)
@@ -187,6 +182,7 @@
ENTRY(native_usergs_sysret64)
swapgs
sysretq
+ENDPROC(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
@@ -209,7 +205,7 @@
/* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp offset=0
- movq %gs:pda_oldrsp,\tmp
+ movq PER_CPU_VAR(old_rsp),\tmp
movq \tmp,RSP+\offset(%rsp)
movq $__USER_DS,SS+\offset(%rsp)
movq $__USER_CS,CS+\offset(%rsp)
@@ -220,7 +216,7 @@
.macro RESTORE_TOP_OF_STACK tmp offset=0
movq RSP+\offset(%rsp),\tmp
- movq \tmp,%gs:pda_oldrsp
+ movq \tmp,PER_CPU_VAR(old_rsp)
movq EFLAGS+\offset(%rsp),\tmp
movq \tmp,R11+\offset(%rsp)
.endm
@@ -336,15 +332,15 @@
je 1f
SWAPGS
/*
- * irqcount is used to check if a CPU is already on an interrupt stack
+ * irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
* a little cheaper to use a separate counter in the PDA (short of
* moving irq_enter into assembly, which would be too much work)
*/
-1: incl %gs:pda_irqcount
+1: incl PER_CPU_VAR(irq_count)
jne 2f
popq_cfi %rax /* move return address... */
- mov %gs:pda_irqstackptr,%rsp
+ mov PER_CPU_VAR(irq_stack_ptr),%rsp
EMPTY_FRAME 0
pushq_cfi %rbp /* backlink for unwinder */
pushq_cfi %rax /* ... to the new stack */
@@ -409,6 +405,8 @@
ENTRY(ret_from_fork)
DEFAULT_FRAME
+ LOCK ; btr $TIF_FORK,TI_flags(%r8)
+
push kernel_eflags(%rip)
CFI_ADJUST_CFA_OFFSET 8
popf # reset kernel eflags
@@ -468,7 +466,7 @@
ENTRY(system_call)
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,PDA_STACKOFFSET
+ CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
@@ -479,8 +477,8 @@
*/
ENTRY(system_call_after_swapgs)
- movq %rsp,%gs:pda_oldrsp
- movq %gs:pda_kernelstack,%rsp
+ movq %rsp,PER_CPU_VAR(old_rsp)
+ movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs off/on section - it's straight
* and short:
@@ -523,7 +521,7 @@
CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
/*CFI_REGISTER rflags,r11*/
- movq %gs:pda_oldrsp, %rsp
+ movq PER_CPU_VAR(old_rsp), %rsp
USERGS_SYSRET64
CFI_RESTORE_STATE
@@ -630,16 +628,14 @@
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
- .globl int_ret_from_sys_call
- .globl int_with_check
-int_ret_from_sys_call:
+GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
-int_with_check:
+GLOBAL(int_with_check)
LOCKDEP_SYS_EXIT_IRQ
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%edx
@@ -833,11 +829,11 @@
XCPT_FRAME
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
- /* 0(%rsp): oldrsp-ARGOFFSET */
+ /* 0(%rsp): old_rsp-ARGOFFSET */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- decl %gs:pda_irqcount
+ decl PER_CPU_VAR(irq_count)
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
@@ -982,8 +978,10 @@
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
#endif
+#ifdef CONFIG_X86_UV
apicinterrupt UV_BAU_MESSAGE \
uv_bau_message_intr1 uv_bau_message_interrupt
+#endif
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
@@ -1025,6 +1023,11 @@
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
+#ifdef CONFIG_PERF_COUNTERS
+apicinterrupt LOCAL_PERF_VECTOR \
+ perf_counter_interrupt smp_perf_counter_interrupt
+#endif
+
/*
* Exception entry points.
*/
@@ -1073,10 +1076,10 @@
TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
- movq %gs:pda_data_offset, %rbp
- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+ PER_CPU(init_tss, %rbp)
+ subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
call \do_sym
- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+ addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
END(\sym)
@@ -1138,7 +1141,7 @@
CFI_STARTPROC
pushf
CFI_ADJUST_CFA_OFFSET 8
- DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+ DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
SWAPGS
gs_change:
movl %edi,%gs
@@ -1260,14 +1263,14 @@
CFI_REL_OFFSET rbp,0
mov %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
- incl %gs:pda_irqcount
- cmove %gs:pda_irqstackptr,%rsp
+ incl PER_CPU_VAR(irq_count)
+ cmove PER_CPU_VAR(irq_stack_ptr),%rsp
push %rbp # backlink for old unwinder
call __do_softirq
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
- decl %gs:pda_irqcount
+ decl PER_CPU_VAR(irq_count)
ret
CFI_ENDPROC
END(call_softirq)
@@ -1297,15 +1300,15 @@
movq %rdi, %rsp # we don't return, adjust the stack frame
CFI_ENDPROC
DEFAULT_FRAME
-11: incl %gs:pda_irqcount
+11: incl PER_CPU_VAR(irq_count)
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
- cmovzq %gs:pda_irqstackptr,%rsp
+ cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
pushq %rbp # backlink for old unwinder
call xen_evtchn_do_upcall
popq %rsp
CFI_DEF_CFA_REGISTER rsp
- decl %gs:pda_irqcount
+ decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
END(do_hypervisor_callback)
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
deleted file mode 100644
index 53699c9..0000000
--- a/arch/x86/kernel/es7000_32.c
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
- * Written by: Garry Forsgren, Unisys Corporation
- * Natalie Protasevich, Unisys Corporation
- * This file contains the code to configure and interface
- * with Unisys ES7000 series hardware system manager.
- *
- * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Unisys Corporation, Township Line & Union Meeting
- * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
- *
- * http://www.unisys.com
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-#include <asm/io.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/atomic.h>
-#include <asm/apicdef.h>
-#include <mach_mpparse.h>
-#include <asm/genapic.h>
-#include <asm/setup.h>
-
-/*
- * ES7000 chipsets
- */
-
-#define NON_UNISYS 0
-#define ES7000_CLASSIC 1
-#define ES7000_ZORRO 2
-
-
-#define MIP_REG 1
-#define MIP_PSAI_REG 4
-
-#define MIP_BUSY 1
-#define MIP_SPIN 0xf0000
-#define MIP_VALID 0x0100000000000000ULL
-#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
-
-#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
-
-struct mip_reg_info {
- unsigned long long mip_info;
- unsigned long long delivery_info;
- unsigned long long host_reg;
- unsigned long long mip_reg;
-};
-
-struct part_info {
- unsigned char type;
- unsigned char length;
- unsigned char part_id;
- unsigned char apic_mode;
- unsigned long snum;
- char ptype[16];
- char sname[64];
- char pname[64];
-};
-
-struct psai {
- unsigned long long entry_type;
- unsigned long long addr;
- unsigned long long bep_addr;
-};
-
-struct es7000_mem_info {
- unsigned char type;
- unsigned char length;
- unsigned char resv[6];
- unsigned long long start;
- unsigned long long size;
-};
-
-struct es7000_oem_table {
- unsigned long long hdr;
- struct mip_reg_info mip;
- struct part_info pif;
- struct es7000_mem_info shm;
- struct psai psai;
-};
-
-#ifdef CONFIG_ACPI
-
-struct oem_table {
- struct acpi_table_header Header;
- u32 OEMTableAddr;
- u32 OEMTableSize;
-};
-
-extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
-extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
-#endif
-
-struct mip_reg {
- unsigned long long off_0;
- unsigned long long off_8;
- unsigned long long off_10;
- unsigned long long off_18;
- unsigned long long off_20;
- unsigned long long off_28;
- unsigned long long off_30;
- unsigned long long off_38;
-};
-
-#define MIP_SW_APIC 0x1020b
-#define MIP_FUNC(VALUE) (VALUE & 0xff)
-
-/*
- * ES7000 Globals
- */
-
-static volatile unsigned long *psai = NULL;
-static struct mip_reg *mip_reg;
-static struct mip_reg *host_reg;
-static int mip_port;
-static unsigned long mip_addr, host_addr;
-
-int es7000_plat;
-
-/*
- * GSI override for ES7000 platforms.
- */
-
-static unsigned int base;
-
-static int
-es7000_rename_gsi(int ioapic, int gsi)
-{
- if (es7000_plat == ES7000_ZORRO)
- return gsi;
-
- if (!base) {
- int i;
- for (i = 0; i < nr_ioapics; i++)
- base += nr_ioapic_registers[i];
- }
-
- if (!ioapic && (gsi < 16))
- gsi += base;
- return gsi;
-}
-
-static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
-{
- unsigned long vect = 0, psaival = 0;
-
- if (psai == NULL)
- return -1;
-
- vect = ((unsigned long)__pa(eip)/0x1000) << 16;
- psaival = (0x1000000 | vect | cpu);
-
- while (*psai & 0x1000000)
- ;
-
- *psai = psaival;
-
- return 0;
-}
-
-static void noop_wait_for_deassert(atomic_t *deassert_not_used)
-{
-}
-
-static int __init es7000_update_genapic(void)
-{
- genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
-
- /* MPENTIUMIII */
- if (boot_cpu_data.x86 == 6 &&
- (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
- es7000_update_genapic_to_cluster();
- genapic->wait_for_init_deassert = noop_wait_for_deassert;
- genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
- }
-
- return 0;
-}
-
-void __init
-setup_unisys(void)
-{
- /*
- * Determine the generation of the ES7000 currently running.
- *
- * es7000_plat = 1 if the machine is a 5xx ES7000 box
- * es7000_plat = 2 if the machine is a x86_64 ES7000 box
- *
- */
- if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
- es7000_plat = ES7000_ZORRO;
- else
- es7000_plat = ES7000_CLASSIC;
- ioapic_renumber_irq = es7000_rename_gsi;
-
- x86_quirks->update_genapic = es7000_update_genapic;
-}
-
-/*
- * Parse the OEM Table
- */
-
-int __init
-parse_unisys_oem (char *oemptr)
-{
- int i;
- int success = 0;
- unsigned char type, size;
- unsigned long val;
- char *tp = NULL;
- struct psai *psaip = NULL;
- struct mip_reg_info *mi;
- struct mip_reg *host, *mip;
-
- tp = oemptr;
-
- tp += 8;
-
- for (i=0; i <= 6; i++) {
- type = *tp++;
- size = *tp++;
- tp -= 2;
- switch (type) {
- case MIP_REG:
- mi = (struct mip_reg_info *)tp;
- val = MIP_RD_LO(mi->host_reg);
- host_addr = val;
- host = (struct mip_reg *)val;
- host_reg = __va(host);
- val = MIP_RD_LO(mi->mip_reg);
- mip_port = MIP_PORT(mi->mip_info);
- mip_addr = val;
- mip = (struct mip_reg *)val;
- mip_reg = __va(mip);
- pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
- (unsigned long)host_reg);
- pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
- (unsigned long)mip_reg);
- success++;
- break;
- case MIP_PSAI_REG:
- psaip = (struct psai *)tp;
- if (tp != NULL) {
- if (psaip->addr)
- psai = __va(psaip->addr);
- else
- psai = NULL;
- success++;
- }
- break;
- default:
- break;
- }
- tp += size;
- }
-
- if (success < 2) {
- es7000_plat = NON_UNISYS;
- } else
- setup_unisys();
- return es7000_plat;
-}
-
-#ifdef CONFIG_ACPI
-static unsigned long oem_addrX;
-static unsigned long oem_size;
-int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
-{
- struct acpi_table_header *header = NULL;
- int i = 0;
-
- while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
- if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
- struct oem_table *t = (struct oem_table *)header;
-
- oem_addrX = t->OEMTableAddr;
- oem_size = t->OEMTableSize;
-
- *oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
- oem_size);
- return 0;
- }
- }
- return -1;
-}
-
-void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
-{
-}
-#endif
-
-static void
-es7000_spin(int n)
-{
- int i = 0;
-
- while (i++ < n)
- rep_nop();
-}
-
-static int __init
-es7000_mip_write(struct mip_reg *mip_reg)
-{
- int status = 0;
- int spin;
-
- spin = MIP_SPIN;
- while (((unsigned long long)host_reg->off_38 &
- (unsigned long long)MIP_VALID) != 0) {
- if (--spin <= 0) {
- printk("es7000_mip_write: Timeout waiting for Host Valid Flag");
- return -1;
- }
- es7000_spin(MIP_SPIN);
- }
-
- memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
- outb(1, mip_port);
-
- spin = MIP_SPIN;
-
- while (((unsigned long long)mip_reg->off_38 &
- (unsigned long long)MIP_VALID) == 0) {
- if (--spin <= 0) {
- printk("es7000_mip_write: Timeout waiting for MIP Valid Flag");
- return -1;
- }
- es7000_spin(MIP_SPIN);
- }
-
- status = ((unsigned long long)mip_reg->off_0 &
- (unsigned long long)0xffff0000000000ULL) >> 48;
- mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 &
- (unsigned long long)~MIP_VALID);
- return status;
-}
-
-void __init
-es7000_sw_apic(void)
-{
- if (es7000_plat) {
- int mip_status;
- struct mip_reg es7000_mip_reg;
-
- printk("ES7000: Enabling APIC mode.\n");
- memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
- es7000_mip_reg.off_0 = MIP_SW_APIC;
- es7000_mip_reg.off_38 = (MIP_VALID);
- while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
- printk("es7000_sw_apic: command failed, status = %x\n",
- mip_status);
- return;
- }
-}
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
deleted file mode 100644
index 2bced78..0000000
--- a/arch/x86/kernel/genapic_64.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Generic APIC sub-arch probe layer.
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/hardirq.h>
-#include <linux/dmar.h>
-
-#include <asm/smp.h>
-#include <asm/ipi.h>
-#include <asm/genapic.h>
-#include <asm/setup.h>
-
-extern struct genapic apic_flat;
-extern struct genapic apic_physflat;
-extern struct genapic apic_x2xpic_uv_x;
-extern struct genapic apic_x2apic_phys;
-extern struct genapic apic_x2apic_cluster;
-
-struct genapic __read_mostly *genapic = &apic_flat;
-
-static struct genapic *apic_probe[] __initdata = {
- &apic_x2apic_uv_x,
- &apic_x2apic_phys,
- &apic_x2apic_cluster,
- &apic_physflat,
- NULL,
-};
-
-/*
- * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
- */
-void __init setup_apic_routing(void)
-{
- if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
- if (!intr_remapping_enabled)
- genapic = &apic_flat;
- }
-
- if (genapic == &apic_flat) {
- if (max_physical_apicid >= 8)
- genapic = &apic_physflat;
- printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
- }
-
- if (x86_quirks->update_genapic)
- x86_quirks->update_genapic();
-}
-
-/* Same for both flat and physical. */
-
-void apic_send_IPI_self(int vector)
-{
- __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
-}
-
-int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- int i;
-
- for (i = 0; apic_probe[i]; ++i) {
- if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
- genapic = apic_probe[i];
- printk(KERN_INFO "Setting APIC routing to %s.\n",
- genapic->name);
- return 1;
- }
- }
- return 0;
-}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
deleted file mode 100644
index 3418548..0000000
--- a/arch/x86/kernel/genapic_flat_64.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Flat APIC subarch code.
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-#include <linux/errno.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/hardirq.h>
-#include <asm/smp.h>
-#include <asm/ipi.h>
-#include <asm/genapic.h>
-#include <mach_apicdef.h>
-
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-
-static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- return 1;
-}
-
-static const struct cpumask *flat_target_cpus(void)
-{
- return cpu_online_mask;
-}
-
-static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- cpumask_clear(retmask);
- cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static void flat_init_apic_ldr(void)
-{
- unsigned long val;
- unsigned long num, id;
-
- num = smp_processor_id();
- id = 1UL << num;
- apic_write(APIC_DFR, APIC_DFR_FLAT);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(id);
- apic_write(APIC_LDR, val);
-}
-
-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
- local_irq_restore(flags);
-}
-
-static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
- unsigned long mask = cpumask_bits(cpumask)[0];
-
- _flat_send_IPI_mask(mask, vector);
-}
-
-static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
- int vector)
-{
- unsigned long mask = cpumask_bits(cpumask)[0];
- int cpu = smp_processor_id();
-
- if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
- _flat_send_IPI_mask(mask, vector);
-}
-
-static void flat_send_IPI_allbutself(int vector)
-{
- int cpu = smp_processor_id();
-#ifdef CONFIG_HOTPLUG_CPU
- int hotplug = 1;
-#else
- int hotplug = 0;
-#endif
- if (hotplug || vector == NMI_VECTOR) {
- if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
- unsigned long mask = cpumask_bits(cpu_online_mask)[0];
-
- if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
-
- _flat_send_IPI_mask(mask, vector);
- }
- } else if (num_online_cpus() > 1) {
- __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
- }
-}
-
-static void flat_send_IPI_all(int vector)
-{
- if (vector == NMI_VECTOR)
- flat_send_IPI_mask(cpu_online_mask, vector);
- else
- __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
-}
-
-static unsigned int get_apic_id(unsigned long x)
-{
- unsigned int id;
-
- id = (((x)>>24) & 0xFFu);
- return id;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
- unsigned long x;
-
- x = ((id & 0xFFu)<<24);
- return x;
-}
-
-static unsigned int read_xapic_id(void)
-{
- unsigned int id;
-
- id = get_apic_id(apic_read(APIC_ID));
- return id;
-}
-
-static int flat_apic_id_registered(void)
-{
- return physid_isset(read_xapic_id(), phys_cpu_present_map);
-}
-
-static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
-}
-
-static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
- unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
-
- return mask1 & mask2;
-}
-
-static unsigned int phys_pkg_id(int index_msb)
-{
- return hard_smp_processor_id() >> index_msb;
-}
-
-struct genapic apic_flat = {
- .name = "flat",
- .acpi_madt_oem_check = flat_acpi_madt_oem_check,
- .int_delivery_mode = dest_LowestPrio,
- .int_dest_mode = (APIC_DEST_LOGICAL != 0),
- .target_cpus = flat_target_cpus,
- .vector_allocation_domain = flat_vector_allocation_domain,
- .apic_id_registered = flat_apic_id_registered,
- .init_apic_ldr = flat_init_apic_ldr,
- .send_IPI_all = flat_send_IPI_all,
- .send_IPI_allbutself = flat_send_IPI_allbutself,
- .send_IPI_mask = flat_send_IPI_mask,
- .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
- .send_IPI_self = apic_send_IPI_self,
- .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
- .phys_pkg_id = phys_pkg_id,
- .get_apic_id = get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = (0xFFu<<24),
-};
-
-/*
- * Physflat mode is used when there are more than 8 CPUs on a AMD system.
- * We cannot use logical delivery in this case because the mask
- * overflows, so use physical mode.
- */
-static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-#ifdef CONFIG_ACPI
- /*
- * Quirk: some x86_64 machines can only use physical APIC mode
- * regardless of how many processors are present (x86_64 ES7000
- * is an example).
- */
- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
- printk(KERN_DEBUG "system APIC only can use physical flat");
- return 1;
- }
-#endif
-
- return 0;
-}
-
-static const struct cpumask *physflat_target_cpus(void)
-{
- return cpu_online_mask;
-}
-
-static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
-}
-
-static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
- send_IPI_mask_sequence(cpumask, vector);
-}
-
-static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
- int vector)
-{
- send_IPI_mask_allbutself(cpumask, vector);
-}
-
-static void physflat_send_IPI_allbutself(int vector)
-{
- send_IPI_mask_allbutself(cpu_online_mask, vector);
-}
-
-static void physflat_send_IPI_all(int vector)
-{
- physflat_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- cpu = cpumask_first(cpumask);
- if ((unsigned)cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
- else
- return BAD_APICID;
-}
-
-static unsigned int
-physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask)
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- if (cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
- return BAD_APICID;
-}
-
-struct genapic apic_physflat = {
- .name = "physical flat",
- .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
- .int_delivery_mode = dest_Fixed,
- .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .target_cpus = physflat_target_cpus,
- .vector_allocation_domain = physflat_vector_allocation_domain,
- .apic_id_registered = flat_apic_id_registered,
- .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
- .send_IPI_all = physflat_send_IPI_all,
- .send_IPI_allbutself = physflat_send_IPI_allbutself,
- .send_IPI_mask = physflat_send_IPI_mask,
- .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
- .send_IPI_self = apic_send_IPI_self,
- .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
- .phys_pkg_id = phys_pkg_id,
- .get_apic_id = get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = (0xFFu<<24),
-};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
deleted file mode 100644
index 6ce497c..0000000
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/dmar.h>
-
-#include <asm/smp.h>
-#include <asm/ipi.h>
-#include <asm/genapic.h>
-
-DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
-
-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- if (cpu_has_x2apic)
- return 1;
-
- return 0;
-}
-
-/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-
-static const struct cpumask *x2apic_target_cpus(void)
-{
- return cpumask_of(0);
-}
-
-/*
- * for now each logical cpu is in its own vector allocation domain.
- */
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
-}
-
-static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
- unsigned int dest)
-{
- unsigned long cfg;
-
- cfg = __prepare_ICR(0, vector, dest);
-
- /*
- * send the IPI.
- */
- x2apic_icr_write(cfg, apicid);
-}
-
-/*
- * for now, we send the IPI's one by one in the cpumask.
- * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
- * at once. We have 16 cpu's in a cluster. This will minimize IPI register
- * writes.
- */
-static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- unsigned long flags;
- unsigned long query_cpu;
-
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask)
- __x2apic_send_IPI_dest(
- per_cpu(x86_cpu_to_logical_apicid, query_cpu),
- vector, APIC_DEST_LOGICAL);
- local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
- int vector)
-{
- unsigned long flags;
- unsigned long query_cpu;
- unsigned long this_cpu = smp_processor_id();
-
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask)
- if (query_cpu != this_cpu)
- __x2apic_send_IPI_dest(
- per_cpu(x86_cpu_to_logical_apicid, query_cpu),
- vector, APIC_DEST_LOGICAL);
- local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_allbutself(int vector)
-{
- unsigned long flags;
- unsigned long query_cpu;
- unsigned long this_cpu = smp_processor_id();
-
- local_irq_save(flags);
- for_each_online_cpu(query_cpu)
- if (query_cpu != this_cpu)
- __x2apic_send_IPI_dest(
- per_cpu(x86_cpu_to_logical_apicid, query_cpu),
- vector, APIC_DEST_LOGICAL);
- local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_all(int vector)
-{
- x2apic_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static int x2apic_apic_id_registered(void)
-{
- return 1;
-}
-
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one logical APIC ID.
- * May as well be the first.
- */
- cpu = cpumask_first(cpumask);
- if ((unsigned)cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_logical_apicid, cpu);
- else
- return BAD_APICID;
-}
-
-static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one logical APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask)
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- if (cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_logical_apicid, cpu);
- return BAD_APICID;
-}
-
-static unsigned int get_apic_id(unsigned long x)
-{
- unsigned int id;
-
- id = x;
- return id;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
- unsigned long x;
-
- x = id;
- return x;
-}
-
-static unsigned int phys_pkg_id(int index_msb)
-{
- return current_cpu_data.initial_apicid >> index_msb;
-}
-
-static void x2apic_send_IPI_self(int vector)
-{
- apic_write(APIC_SELF_IPI, vector);
-}
-
-static void init_x2apic_ldr(void)
-{
- int cpu = smp_processor_id();
-
- per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
- return;
-}
-
-struct genapic apic_x2apic_cluster = {
- .name = "cluster x2apic",
- .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
- .int_delivery_mode = dest_LowestPrio,
- .int_dest_mode = (APIC_DEST_LOGICAL != 0),
- .target_cpus = x2apic_target_cpus,
- .vector_allocation_domain = x2apic_vector_allocation_domain,
- .apic_id_registered = x2apic_apic_id_registered,
- .init_apic_ldr = init_x2apic_ldr,
- .send_IPI_all = x2apic_send_IPI_all,
- .send_IPI_allbutself = x2apic_send_IPI_allbutself,
- .send_IPI_mask = x2apic_send_IPI_mask,
- .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
- .send_IPI_self = x2apic_send_IPI_self,
- .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
- .phys_pkg_id = phys_pkg_id,
- .get_apic_id = get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = (0xFFFFFFFFu),
-};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
deleted file mode 100644
index 21bcc0e..0000000
--- a/arch/x86/kernel/genx2apic_phys.c
+++ /dev/null
@@ -1,194 +0,0 @@
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/dmar.h>
-
-#include <asm/smp.h>
-#include <asm/ipi.h>
-#include <asm/genapic.h>
-
-static int x2apic_phys;
-
-static int set_x2apic_phys_mode(char *arg)
-{
- x2apic_phys = 1;
- return 0;
-}
-early_param("x2apic_phys", set_x2apic_phys_mode);
-
-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- if (cpu_has_x2apic && x2apic_phys)
- return 1;
-
- return 0;
-}
-
-/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-
-static const struct cpumask *x2apic_target_cpus(void)
-{
- return cpumask_of(0);
-}
-
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
-}
-
-static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
- unsigned int dest)
-{
- unsigned long cfg;
-
- cfg = __prepare_ICR(0, vector, dest);
-
- /*
- * send the IPI.
- */
- x2apic_icr_write(cfg, apicid);
-}
-
-static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- unsigned long flags;
- unsigned long query_cpu;
-
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask) {
- __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
- vector, APIC_DEST_PHYSICAL);
- }
- local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
- int vector)
-{
- unsigned long flags;
- unsigned long query_cpu;
- unsigned long this_cpu = smp_processor_id();
-
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask) {
- if (query_cpu != this_cpu)
- __x2apic_send_IPI_dest(
- per_cpu(x86_cpu_to_apicid, query_cpu),
- vector, APIC_DEST_PHYSICAL);
- }
- local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_allbutself(int vector)
-{
- unsigned long flags;
- unsigned long query_cpu;
- unsigned long this_cpu = smp_processor_id();
-
- local_irq_save(flags);
- for_each_online_cpu(query_cpu)
- if (query_cpu != this_cpu)
- __x2apic_send_IPI_dest(
- per_cpu(x86_cpu_to_apicid, query_cpu),
- vector, APIC_DEST_PHYSICAL);
- local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_all(int vector)
-{
- x2apic_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static int x2apic_apic_id_registered(void)
-{
- return 1;
-}
-
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- cpu = cpumask_first(cpumask);
- if ((unsigned)cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
- else
- return BAD_APICID;
-}
-
-static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask)
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- if (cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
- return BAD_APICID;
-}
-
-static unsigned int get_apic_id(unsigned long x)
-{
- unsigned int id;
-
- id = x;
- return id;
-}
-
-static unsigned long set_apic_id(unsigned int id)
-{
- unsigned long x;
-
- x = id;
- return x;
-}
-
-static unsigned int phys_pkg_id(int index_msb)
-{
- return current_cpu_data.initial_apicid >> index_msb;
-}
-
-static void x2apic_send_IPI_self(int vector)
-{
- apic_write(APIC_SELF_IPI, vector);
-}
-
-static void init_x2apic_ldr(void)
-{
- return;
-}
-
-struct genapic apic_x2apic_phys = {
- .name = "physical x2apic",
- .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
- .int_delivery_mode = dest_Fixed,
- .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .target_cpus = x2apic_target_cpus,
- .vector_allocation_domain = x2apic_vector_allocation_domain,
- .apic_id_registered = x2apic_apic_id_registered,
- .init_apic_ldr = init_x2apic_ldr,
- .send_IPI_all = x2apic_send_IPI_all,
- .send_IPI_allbutself = x2apic_send_IPI_allbutself,
- .send_IPI_mask = x2apic_send_IPI_mask,
- .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
- .send_IPI_self = x2apic_send_IPI_self,
- .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
- .phys_pkg_id = phys_pkg_id,
- .get_apic_id = get_apic_id,
- .set_apic_id = set_apic_id,
- .apic_id_mask = (0xFFFFFFFFu),
-};
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b9a4d8c..f5b2722 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,27 +26,6 @@
#include <asm/bios_ebda.h>
#include <asm/trampoline.h>
-/* boot cpu pda */
-static struct x8664_pda _boot_cpu_pda;
-
-#ifdef CONFIG_SMP
-/*
- * We install an empty cpu_pda pointer table to indicate to early users
- * (numa_set_node) that the cpu_pda pointer table for cpus other than
- * the boot cpu is not yet setup.
- */
-static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
-#else
-static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
-#endif
-
-void __init x86_64_init_pda(void)
-{
- _cpu_pda = __cpu_pda;
- cpu_pda(0) = &_boot_cpu_pda;
- pda_init(0);
-}
-
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
@@ -112,8 +91,6 @@
if (console_loglevel == 10)
early_printk("Kernel alive\n");
- x86_64_init_pda();
-
x86_64_start_reservations(real_mode_data);
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4e..c32ca19 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -11,14 +11,15 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
+#include <asm/page_types.h>
+#include <asm/pgtable_types.h>
#include <asm/desc.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/setup.h>
#include <asm/processor-flags.h>
+#include <asm/percpu.h>
/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)
@@ -429,14 +430,34 @@
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
- movl %eax,%fs # gets reset once there's real percpu
movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds
movl %eax,%es
- xorl %eax,%eax # Clear GS and LDT
+ movl $(__KERNEL_PERCPU), %eax
+ movl %eax,%fs # set this cpu's percpu
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+ /*
+ * The linker can't handle this by relocation. Manually set
+ * base address in stack canary segment descriptor.
+ */
+ cmpb $0,ready
+ jne 1f
+ movl $per_cpu__gdt_page,%eax
+ movl $per_cpu__stack_canary,%ecx
+ subl $20, %ecx
+ movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
+ shrl $16, %ecx
+ movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
+ movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
+1:
+#endif
+ movl $(__KERNEL_STACK_CANARY),%eax
movl %eax,%gs
+
+ xorl %eax,%eax # Clear LDT
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
@@ -446,8 +467,6 @@
movb $1, ready
cmpb $0,%cl # the first CPU calls start_kernel
je 1f
- movl $(__KERNEL_PERCPU), %eax
- movl %eax,%fs # set this cpu's percpu
movl (stack_start), %esp
1:
#endif /* CONFIG_SMP */
@@ -548,12 +567,8 @@
pushl %eax
pushl %edx /* trapno */
pushl $fault_msg
-#ifdef CONFIG_EARLY_PRINTK
- call early_printk
-#else
call printk
#endif
-#endif
call dump_stack
hlt_loop:
hlt
@@ -580,11 +595,10 @@
pushl 32(%esp)
pushl 40(%esp)
pushl $int_msg
-#ifdef CONFIG_EARLY_PRINTK
- call early_printk
-#else
call printk
-#endif
+
+ call dump_stack
+
addl $(5*4),%esp
popl %ds
popl %es
@@ -660,7 +674,7 @@
.long 0
int_msg:
- .asciz "Unknown interrupt or fault at EIP %p %p %p\n"
+ .asciz "Unknown interrupt or fault at: %p %p %p\n"
fault_msg:
/* fault info: */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d4..54b29bb 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,7 @@
#include <asm/msr.h>
#include <asm/cache.h>
#include <asm/processor-flags.h>
+#include <asm/percpu.h>
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
@@ -226,12 +227,15 @@
movl %eax,%fs
movl %eax,%gs
- /*
- * Setup up a dummy PDA. this is just for some early bootup code
- * that does in_interrupt()
- */
+ /* Set up %gs.
+ *
+ * The base of %gs always points to the bottom of the irqstack
+ * union. If the stack protector canary is enabled, it is
+ * located at %gs:40. Note that, on SMP, the boot cpu uses
+ * init data section till per cpu areas are set up.
+ */
movl $MSR_GS_BASE,%ecx
- movq $empty_zero_page,%rax
+ movq initial_gs(%rip),%rax
movq %rax,%rdx
shrq $32,%rdx
wrmsr
@@ -257,6 +261,8 @@
.align 8
ENTRY(initial_code)
.quad x86_64_start_kernel
+ ENTRY(initial_gs)
+ .quad INIT_PER_CPU_VAR(irq_stack_union)
__FINITDATA
ENTRY(stack_start)
@@ -323,8 +329,6 @@
#endif /* CONFIG_EARLY_PRINTK */
.previous
-.balign PAGE_SIZE
-
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
ENTRY(name)
@@ -401,7 +405,8 @@
.globl early_gdt_descr
early_gdt_descr:
.word GDT_ENTRIES*8-1
- .quad per_cpu__gdt_page
+early_gdt_descr_base:
+ .quad INIT_PER_CPU_VAR(gdt_page)
ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */
@@ -412,7 +417,7 @@
.section .bss, "aw", @nobits
.align L1_CACHE_BYTES
ENTRY(idt_table)
- .skip 256 * 16
+ .skip IDT_ENTRIES * 16
.section .bss.page_aligned, "aw", @nobits
.align PAGE_SIZE
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 11d5093..df89102 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -22,7 +22,6 @@
#include <asm/pgtable.h>
#include <asm/desc.h>
#include <asm/apic.h>
-#include <asm/arch_hooks.h>
#include <asm/i8259.h>
/*
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b12208f..e41980a 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -131,9 +131,8 @@
}
#ifdef CONFIG_X86_32
-asmlinkage long sys_iopl(unsigned long regsp)
+long sys_iopl(struct pt_regs *regs)
{
- struct pt_regs *regs = (struct pt_regs *)®sp;
unsigned int level = regs->bx;
struct thread_struct *t = ¤t->thread;
int rc;
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
deleted file mode 100644
index 285bbf8..0000000
--- a/arch/x86/kernel/ipi.c
+++ /dev/null
@@ -1,190 +0,0 @@
-#include <linux/cpumask.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/cpu.h>
-#include <linux/module.h>
-
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/apic.h>
-#include <asm/proto.h>
-
-#ifdef CONFIG_X86_32
-#include <mach_apic.h>
-#include <mach_ipi.h>
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline int __prepare_ICR(unsigned int shortcut, int vector)
-{
- unsigned int icr = shortcut | APIC_DEST_LOGICAL;
-
- switch (vector) {
- default:
- icr |= APIC_DM_FIXED | vector;
- break;
- case NMI_VECTOR:
- icr |= APIC_DM_NMI;
- break;
- }
- return icr;
-}
-
-static inline int __prepare_ICR2(unsigned int mask)
-{
- return SET_APIC_DEST_FIELD(mask);
-}
-
-void __send_IPI_shortcut(unsigned int shortcut, int vector)
-{
- /*
- * Subtle. In the case of the 'never do double writes' workaround
- * we have to lock out interrupts to be safe. As we don't care
- * of the value read we use an atomic rmw access to avoid costly
- * cli/sti. Otherwise we use an even cheaper single atomic write
- * to the APIC.
- */
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * No need to touch the target chip field
- */
- cfg = __prepare_ICR(shortcut, vector);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write(APIC_ICR, cfg);
-}
-
-void send_IPI_self(int vector)
-{
- __send_IPI_shortcut(APIC_DEST_SELF, vector);
-}
-
-/*
- * This is used to send an IPI with no shorthand notation (the destination is
- * specified in bits 56 to 63 of the ICR).
- */
-static inline void __send_IPI_dest_field(unsigned long mask, int vector)
-{
- unsigned long cfg;
-
- /*
- * Wait for idle.
- */
- if (unlikely(vector == NMI_VECTOR))
- safe_apic_wait_icr_idle();
- else
- apic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(mask);
- apic_write(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write(APIC_ICR, cfg);
-}
-
-/*
- * This is only used on smaller machines.
- */
-void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
-{
- unsigned long mask = cpumask_bits(cpumask)[0];
- unsigned long flags;
-
- local_irq_save(flags);
- WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
- __send_IPI_dest_field(mask, vector);
- local_irq_restore(flags);
-}
-
-void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
-{
- unsigned long flags;
- unsigned int query_cpu;
-
- /*
- * Hack. The clustered APIC addressing mode doesn't allow us to send
- * to an arbitrary mask, so I do a unicasts to each CPU instead. This
- * should be modified to do 1 message per cluster ID - mbligh
- */
-
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask)
- __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
- local_irq_restore(flags);
-}
-
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
-{
- unsigned long flags;
- unsigned int query_cpu;
- unsigned int this_cpu = smp_processor_id();
-
- /* See Hack comment above */
-
- local_irq_save(flags);
- for_each_cpu(query_cpu, mask)
- if (query_cpu != this_cpu)
- __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
- vector);
- local_irq_restore(flags);
-}
-
-/* must come after the send_IPI functions above for inlining */
-static int convert_apicid_to_cpu(int apic_id)
-{
- int i;
-
- for_each_possible_cpu(i) {
- if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
- return i;
- }
- return -1;
-}
-
-int safe_smp_processor_id(void)
-{
- int apicid, cpuid;
-
- if (!boot_cpu_has(X86_FEATURE_APIC))
- return 0;
-
- apicid = hard_smp_processor_id();
- if (apicid == BAD_APICID)
- return 0;
-
- cpuid = convert_apicid_to_cpu(apicid);
-
- return cpuid >= 0 ? cpuid : 0;
-}
-#endif
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3973e2d..7c95c89 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -6,10 +6,12 @@
#include <linux/kernel_stat.h>
#include <linux/seq_file.h>
#include <linux/smp.h>
+#include <linux/ftrace.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/irq.h>
+#include <asm/idle.h>
atomic_t irq_err_count;
@@ -36,11 +38,7 @@
#endif
}
-#ifdef CONFIG_X86_32
-# define irq_stats(x) (&per_cpu(irq_stat, x))
-#else
-# define irq_stats(x) cpu_pda(x)
-#endif
+#define irq_stats(x) (&per_cpu(irq_stat, x))
/*
* /proc/interrupts printing:
*/
@@ -57,6 +55,10 @@
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
seq_printf(p, " Local timer interrupts\n");
+ seq_printf(p, "CNT: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+ seq_printf(p, " Performance counter interrupts\n");
#endif
#ifdef CONFIG_SMP
seq_printf(p, "RES: ");
@@ -164,6 +166,7 @@
#ifdef CONFIG_X86_LOCAL_APIC
sum += irq_stats(cpu)->apic_timer_irqs;
+ sum += irq_stats(cpu)->apic_perf_irqs;
#endif
#ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
@@ -192,4 +195,40 @@
return sum;
}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ /* high bit used in ret_from_ code */
+ unsigned vector = ~regs->orig_ax;
+ unsigned irq;
+
+ exit_idle();
+ irq_enter();
+
+ irq = __get_cpu_var(vector_irq)[vector];
+
+ if (!handle_irq(irq, regs)) {
+#ifdef CONFIG_X86_64
+ if (!disable_apic)
+ ack_APIC_irq();
+#endif
+
+ if (printk_ratelimit())
+ printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
+ __func__, smp_processor_id(), vector, irq);
+ }
+
+ irq_exit();
+
+ set_irq_regs(old_regs);
+ return 1;
+}
+
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 74b9ff7..9dc6b2b 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -191,33 +191,16 @@
execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
#endif
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-unsigned int do_IRQ(struct pt_regs *regs)
+bool handle_irq(unsigned irq, struct pt_regs *regs)
{
- struct pt_regs *old_regs;
- /* high bit used in ret_from_ code */
- int overflow;
- unsigned vector = ~regs->orig_ax;
struct irq_desc *desc;
- unsigned irq;
-
-
- old_regs = set_irq_regs(regs);
- irq_enter();
- irq = __get_cpu_var(vector_irq)[vector];
+ int overflow;
overflow = check_stack_overflow();
desc = irq_to_desc(irq);
- if (unlikely(!desc)) {
- printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
- __func__, irq, vector, smp_processor_id());
- BUG();
- }
+ if (unlikely(!desc))
+ return false;
if (!execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
@@ -225,13 +208,10 @@
desc->handle_irq(irq, desc);
}
- irq_exit();
- set_irq_regs(old_regs);
- return 1;
+ return true;
}
#ifdef CONFIG_HOTPLUG_CPU
-#include <mach_apic.h>
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
@@ -248,7 +228,7 @@
if (irq == 2)
continue;
- affinity = &desc->affinity;
+ affinity = desc->affinity;
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
printk("Breaking affinity for irq %i\n", irq);
affinity = cpu_all_mask;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 63c88e6..977d8b4 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,6 +18,13 @@
#include <linux/smp.h>
#include <asm/io_apic.h>
#include <asm/idle.h>
+#include <asm/apic.h>
+
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+DEFINE_PER_CPU(struct pt_regs *, irq_regs);
+EXPORT_PER_CPU_SYMBOL(irq_regs);
/*
* Probabilistic stack overflow check:
@@ -41,42 +48,18 @@
#endif
}
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
+bool handle_irq(unsigned irq, struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
struct irq_desc *desc;
- /* high bit used in ret_from_ code */
- unsigned vector = ~regs->orig_ax;
- unsigned irq;
-
- exit_idle();
- irq_enter();
- irq = __get_cpu_var(vector_irq)[vector];
-
stack_overflow_check(regs);
desc = irq_to_desc(irq);
- if (likely(desc))
- generic_handle_irq_desc(irq, desc);
- else {
- if (!disable_apic)
- ack_APIC_irq();
+ if (unlikely(!desc))
+ return false;
- if (printk_ratelimit())
- printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
- __func__, smp_processor_id(), vector);
- }
-
- irq_exit();
-
- set_irq_regs(old_regs);
- return 1;
+ generic_handle_irq_desc(irq, desc);
+ return true;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -100,7 +83,7 @@
/* interrupt's are disabled at this point */
spin_lock(&desc->lock);
- affinity = &desc->affinity;
+ affinity = desc->affinity;
if (!irq_has_action(irq) ||
cpumask_equal(affinity, cpu_online_mask)) {
spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 10a09c2..f3e11cb2 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -18,7 +18,7 @@
#include <asm/pgtable.h>
#include <asm/desc.h>
#include <asm/apic.h>
-#include <asm/arch_hooks.h>
+#include <asm/setup.h>
#include <asm/i8259.h>
#include <asm/traps.h>
@@ -78,6 +78,15 @@
}
}
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+static struct irqaction irq2 = {
+ .handler = no_action,
+ .mask = CPU_MASK_NONE,
+ .name = "cascade",
+};
+
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[0 ... IRQ0_VECTOR - 1] = -1,
[IRQ0_VECTOR] = 0,
@@ -111,28 +120,8 @@
return 0;
}
-/* Overridden in paravirt.c */
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-void __init native_init_IRQ(void)
+static void __init smp_intr_init(void)
{
- int i;
-
- /* all the set up before the call gates are initialised */
- pre_intr_init_hook();
-
- /*
- * Cover the whole vector space, no vector can escape
- * us. (some of these will be overridden and become
- * 'special' SMP interrupts)
- */
- for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
- /* SYSCALL_VECTOR was reserved in trap_init. */
- if (i != SYSCALL_VECTOR)
- set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
- }
-
-
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
/*
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
@@ -140,8 +129,15 @@
*/
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
- /* IPI for invalidation */
- alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+ /* IPIs for invalidation */
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@ -154,6 +150,11 @@
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
#endif
+}
+
+static void __init apic_intr_init(void)
+{
+ smp_intr_init();
#ifdef CONFIG_X86_LOCAL_APIC
/* self generated IPI for local APIC timer */
@@ -162,17 +163,49 @@
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-#endif
+# ifdef CONFIG_PERF_COUNTERS
+ alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
+# endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
+# ifdef CONFIG_X86_MCE_P4THERMAL
/* thermal monitor LVT interrupt */
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+# endif
#endif
+}
- /* setup after call gates are initialised (usually add in
- * the architecture specific gates)
+/* Overridden in paravirt.c */
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+
+void __init native_init_IRQ(void)
+{
+ int i;
+
+ /* Execute any quirks before the call gates are initialised: */
+ x86_quirk_pre_intr_init();
+
+ apic_intr_init();
+
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
*/
- intr_init_hook();
+ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+ int vector = FIRST_EXTERNAL_VECTOR + i;
+ /* SYSCALL_VECTOR was reserved in trap_init. */
+ if (!test_bit(vector, used_vectors))
+ set_intr_gate(vector, interrupt[i]);
+ }
+
+ if (!acpi_ioapic)
+ setup_irq(2, &irq2);
+
+ /*
+ * Call quirks after call gates are initialised (usually add in
+ * the architecture specific gates):
+ */
+ x86_quirk_intr_init();
/*
* External FPU? Set up irq13 if so, for
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index da481a1..16e1fc6 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -150,6 +150,11 @@
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+ /* Performance monitoring interrupt: */
+#ifdef CONFIG_PERF_COUNTERS
+ alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
+#endif
}
void __init native_init_IRQ(void)
@@ -157,6 +162,9 @@
int i;
init_ISA_irqs();
+
+ apic_intr_init();
+
/*
* Cover the whole vector space, no vector can escape
* us. (some of these will be overridden and become
@@ -164,12 +172,10 @@
*/
for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
int vector = FIRST_EXTERNAL_VECTOR + i;
- if (vector != IA32_SYSCALL_VECTOR)
+ if (!test_bit(vector, used_vectors))
set_intr_gate(vector, interrupt[i]);
}
- apic_intr_init();
-
if (!acpi_ioapic)
setup_irq(2, &irq2);
}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 10435a1..eedfaeb 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -46,7 +46,7 @@
#include <asm/apicdef.h>
#include <asm/system.h>
-#include <mach_ipi.h>
+#include <asm/apic.h>
/*
* Put the error code here just in case the user cares:
@@ -347,7 +347,7 @@
*/
void kgdb_roundup_cpus(unsigned long flags)
{
- send_IPI_allbutself(APIC_DM_NMI);
+ apic->send_IPI_allbutself(APIC_DM_NMI);
}
#endif
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 652fce6d..137f2e8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -19,7 +19,6 @@
#include <linux/clocksource.h>
#include <linux/kvm_para.h>
#include <asm/pvclock.h>
-#include <asm/arch_hooks.h>
#include <asm/msr.h>
#include <asm/apic.h>
#include <linux/percpu.h>
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 37f4200..f5fc8c7 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -121,7 +121,7 @@
static void machine_kexec_prepare_page_tables(struct kimage *image)
{
void *control_page;
- pmd_t *pmd = 0;
+ pmd_t *pmd = NULL;
control_page = page_address(image->control_code_page);
#ifdef CONFIG_X86_PAE
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c43caa3..6993d51 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,15 +18,6 @@
#include <asm/mmu_context.h>
#include <asm/io.h>
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-static u64 kexec_pgd[512] PAGE_ALIGNED;
-static u64 kexec_pud0[512] PAGE_ALIGNED;
-static u64 kexec_pmd0[512] PAGE_ALIGNED;
-static u64 kexec_pte0[512] PAGE_ALIGNED;
-static u64 kexec_pud1[512] PAGE_ALIGNED;
-static u64 kexec_pmd1[512] PAGE_ALIGNED;
-static u64 kexec_pte1[512] PAGE_ALIGNED;
-
static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
unsigned long end_addr;
@@ -107,12 +98,65 @@
return result;
}
+static void free_transition_pgtable(struct kimage *image)
+{
+ free_page((unsigned long)image->arch.pud);
+ free_page((unsigned long)image->arch.pmd);
+ free_page((unsigned long)image->arch.pte);
+}
+
+static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long vaddr, paddr;
+ int result = -ENOMEM;
+
+ vaddr = (unsigned long)relocate_kernel;
+ paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
+ pgd += pgd_index(vaddr);
+ if (!pgd_present(*pgd)) {
+ pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
+ if (!pud)
+ goto err;
+ image->arch.pud = pud;
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+ pud = pud_offset(pgd, vaddr);
+ if (!pud_present(*pud)) {
+ pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+ if (!pmd)
+ goto err;
+ image->arch.pmd = pmd;
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+ pmd = pmd_offset(pud, vaddr);
+ if (!pmd_present(*pmd)) {
+ pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
+ if (!pte)
+ goto err;
+ image->arch.pte = pte;
+ set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
+ }
+ pte = pte_offset_kernel(pmd, vaddr);
+ set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+ return 0;
+err:
+ free_transition_pgtable(image);
+ return result;
+}
+
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
pgd_t *level4p;
+ int result;
level4p = (pgd_t *)__va(start_pgtable);
- return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
+ result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
+ if (result)
+ return result;
+ return init_transition_pgtable(image, level4p);
}
static void set_idt(void *newidt, u16 limit)
@@ -174,7 +218,7 @@
void machine_kexec_cleanup(struct kimage *image)
{
- return;
+ free_transition_pgtable(image);
}
/*
@@ -195,22 +239,6 @@
memcpy(control_page, relocate_kernel, PAGE_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
- page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
- page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
- page_list[VA_PGD] = (unsigned long)kexec_pgd;
- page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
- page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
- page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
- page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
- page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
- page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
- page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
- page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
- page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
- page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
- page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
- page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page));
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
index 2dc1837..845d80c 100644
--- a/arch/x86/kernel/mca_32.c
+++ b/arch/x86/kernel/mca_32.c
@@ -51,7 +51,6 @@
#include <linux/ioport.h>
#include <asm/uaccess.h>
#include <linux/init.h>
-#include <asm/arch_hooks.h>
static unsigned char which_scsi;
@@ -474,6 +473,4 @@
* adapter was responsible for the error.
*/
bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
-
- mca_nmi_hook();
-} /* mca_handle_nmi */
+}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index b7f4c92..5e9f4fc 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -87,9 +87,9 @@
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
+#include <linux/uaccess.h>
#include <asm/msr.h>
-#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/microcode.h>
@@ -196,7 +196,7 @@
return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
}
-static inline int
+static inline int
update_match_revision(struct microcode_header_intel *mc_header, int rev)
{
return (mc_header->rev <= rev) ? 0 : 1;
@@ -442,8 +442,8 @@
return ret;
}
- ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
- &get_ucode_fw);
+ ret = generic_load_microcode(cpu, (void *)firmware->data,
+ firmware->size, &get_ucode_fw);
release_firmware(firmware);
@@ -460,7 +460,7 @@
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
- return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user);
+ return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
}
static void microcode_fini_cpu(int cpu)
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
index 3db0a544..0edd819 100644
--- a/arch/x86/kernel/module_32.c
+++ b/arch/x86/kernel/module_32.c
@@ -42,7 +42,7 @@
{
vfree(module_region);
/* FIXME: If module_region == mod->init_region, trim exception
- table entries. */
+ table entries. */
}
/* We don't need anything special. */
@@ -113,13 +113,13 @@
*para = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
if (!strcmp(".text", secstrings + s->sh_name))
text = s;
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
- locks= s;
+ locks = s;
if (!strcmp(".parainstructions", secstrings + s->sh_name))
para = s;
}
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 6ba8783..c23880b 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -30,14 +30,14 @@
#include <asm/page.h>
#include <asm/pgtable.h>
-#define DEBUGP(fmt...)
+#define DEBUGP(fmt...)
#ifndef CONFIG_UML
void module_free(struct module *mod, void *module_region)
{
vfree(module_region);
/* FIXME: If module_region == mod->init_region, trim exception
- table entries. */
+ table entries. */
}
void *module_alloc(unsigned long size)
@@ -77,7 +77,7 @@
Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
Elf64_Sym *sym;
void *loc;
- u64 val;
+ u64 val;
DEBUGP("Applying relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
@@ -91,11 +91,11 @@
sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ ELF64_R_SYM(rel[i].r_info);
- DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
- (int)ELF64_R_TYPE(rel[i].r_info),
- sym->st_value, rel[i].r_addend, (u64)loc);
+ DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
+ (int)ELF64_R_TYPE(rel[i].r_info),
+ sym->st_value, rel[i].r_addend, (u64)loc);
- val = sym->st_value + rel[i].r_addend;
+ val = sym->st_value + rel[i].r_addend;
switch (ELF64_R_TYPE(rel[i].r_info)) {
case R_X86_64_NONE:
@@ -113,16 +113,16 @@
if ((s64)val != *(s32 *)loc)
goto overflow;
break;
- case R_X86_64_PC32:
+ case R_X86_64_PC32:
val -= (u64)loc;
*(u32 *)loc = val;
#if 0
if ((s64)val != *(s32 *)loc)
- goto overflow;
+ goto overflow;
#endif
break;
default:
- printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n",
+ printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
me->name, ELF64_R_TYPE(rel[i].r_info));
return -ENOEXEC;
}
@@ -130,7 +130,7 @@
return 0;
overflow:
- printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
+ printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), val);
printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
me->name);
@@ -143,13 +143,13 @@
unsigned int relsec,
struct module *me)
{
- printk("non add relocation not supported\n");
+ printk(KERN_ERR "non add relocation not supported\n");
return -ENOSYS;
-}
+}
int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
+ const Elf_Shdr *sechdrs,
+ struct module *me)
{
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
*para = NULL;
@@ -161,7 +161,7 @@
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
- locks= s;
+ locks = s;
if (!strcmp(".parainstructions", secstrings + s->sh_name))
para = s;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a649a4c..37cb1bd 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -3,7 +3,7 @@
* compliant MP-table parsing routines.
*
* (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
* (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
*/
@@ -29,12 +29,7 @@
#include <asm/setup.h>
#include <asm/smp.h>
-#include <mach_apic.h>
-#ifdef CONFIG_X86_32
-#include <mach_apicdef.h>
-#include <mach_mpparse.h>
-#endif
-
+#include <asm/apic.h>
/*
* Checksum an MP configuration block.
*/
@@ -144,11 +139,11 @@
if (bad_ioapic(m->apicaddr))
return;
- mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr;
- mp_ioapics[nr_ioapics].mp_apicid = m->apicid;
- mp_ioapics[nr_ioapics].mp_type = m->type;
- mp_ioapics[nr_ioapics].mp_apicver = m->apicver;
- mp_ioapics[nr_ioapics].mp_flags = m->flags;
+ mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
+ mp_ioapics[nr_ioapics].apicid = m->apicid;
+ mp_ioapics[nr_ioapics].type = m->type;
+ mp_ioapics[nr_ioapics].apicver = m->apicver;
+ mp_ioapics[nr_ioapics].flags = m->flags;
nr_ioapics++;
}
@@ -160,55 +155,55 @@
m->srcbusirq, m->dstapic, m->dstirq);
}
-static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
+static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
{
apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
- mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
- (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
- mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
+ mp_irq->irqtype, mp_irq->irqflag & 3,
+ (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
+ mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
}
static void __init assign_to_mp_irq(struct mpc_intsrc *m,
- struct mp_config_intsrc *mp_irq)
+ struct mpc_intsrc *mp_irq)
{
- mp_irq->mp_dstapic = m->dstapic;
- mp_irq->mp_type = m->type;
- mp_irq->mp_irqtype = m->irqtype;
- mp_irq->mp_irqflag = m->irqflag;
- mp_irq->mp_srcbus = m->srcbus;
- mp_irq->mp_srcbusirq = m->srcbusirq;
- mp_irq->mp_dstirq = m->dstirq;
+ mp_irq->dstapic = m->dstapic;
+ mp_irq->type = m->type;
+ mp_irq->irqtype = m->irqtype;
+ mp_irq->irqflag = m->irqflag;
+ mp_irq->srcbus = m->srcbus;
+ mp_irq->srcbusirq = m->srcbusirq;
+ mp_irq->dstirq = m->dstirq;
}
-static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
+static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
struct mpc_intsrc *m)
{
- m->dstapic = mp_irq->mp_dstapic;
- m->type = mp_irq->mp_type;
- m->irqtype = mp_irq->mp_irqtype;
- m->irqflag = mp_irq->mp_irqflag;
- m->srcbus = mp_irq->mp_srcbus;
- m->srcbusirq = mp_irq->mp_srcbusirq;
- m->dstirq = mp_irq->mp_dstirq;
+ m->dstapic = mp_irq->dstapic;
+ m->type = mp_irq->type;
+ m->irqtype = mp_irq->irqtype;
+ m->irqflag = mp_irq->irqflag;
+ m->srcbus = mp_irq->srcbus;
+ m->srcbusirq = mp_irq->srcbusirq;
+ m->dstirq = mp_irq->dstirq;
}
-static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
+static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
struct mpc_intsrc *m)
{
- if (mp_irq->mp_dstapic != m->dstapic)
+ if (mp_irq->dstapic != m->dstapic)
return 1;
- if (mp_irq->mp_type != m->type)
+ if (mp_irq->type != m->type)
return 2;
- if (mp_irq->mp_irqtype != m->irqtype)
+ if (mp_irq->irqtype != m->irqtype)
return 3;
- if (mp_irq->mp_irqflag != m->irqflag)
+ if (mp_irq->irqflag != m->irqflag)
return 4;
- if (mp_irq->mp_srcbus != m->srcbus)
+ if (mp_irq->srcbus != m->srcbus)
return 5;
- if (mp_irq->mp_srcbusirq != m->srcbusirq)
+ if (mp_irq->srcbusirq != m->srcbusirq)
return 6;
- if (mp_irq->mp_dstirq != m->dstirq)
+ if (mp_irq->dstirq != m->dstirq)
return 7;
return 0;
@@ -292,16 +287,7 @@
return 0;
#ifdef CONFIG_X86_32
- /*
- * need to make sure summit and es7000's mps_oem_check is safe to be
- * called early via genericarch 's mps_oem_check
- */
- if (early) {
-#ifdef CONFIG_X86_NUMAQ
- numaq_mps_oem_check(mpc, oem, str);
-#endif
- } else
- mps_oem_check(mpc, oem, str);
+ generic_mps_oem_check(mpc, oem, str);
#endif
/* save the local APIC address, it might be non-default */
if (!acpi_lapic)
@@ -386,13 +372,13 @@
(*x86_quirks->mpc_record)++;
}
-#ifdef CONFIG_X86_GENERICARCH
- generic_bigsmp_probe();
+#ifdef CONFIG_X86_BIGSMP
+ generic_bigsmp_probe();
#endif
-#ifdef CONFIG_X86_32
- setup_apic_routing();
-#endif
+ if (apic->setup_apic_routing)
+ apic->setup_apic_routing();
+
if (!num_processors)
printk(KERN_ERR "MPTABLE: no processors registered!\n");
return num_processors;
@@ -417,7 +403,7 @@
intsrc.type = MP_INTSRC;
intsrc.irqflag = 0; /* conforming */
intsrc.srcbus = 0;
- intsrc.dstapic = mp_ioapics[0].mp_apicid;
+ intsrc.dstapic = mp_ioapics[0].apicid;
intsrc.irqtype = mp_INT;
@@ -570,14 +556,14 @@
}
}
-static struct intel_mp_floating *mpf_found;
+static struct mpf_intel *mpf_found;
/*
* Scan the memory blocks for an SMP configuration block.
*/
static void __init __get_smp_config(unsigned int early)
{
- struct intel_mp_floating *mpf = mpf_found;
+ struct mpf_intel *mpf = mpf_found;
if (!mpf)
return;
@@ -598,9 +584,9 @@
}
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
- mpf->mpf_specification);
+ mpf->specification);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
- if (mpf->mpf_feature2 & (1 << 7)) {
+ if (mpf->feature2 & (1 << 7)) {
printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
pic_mode = 1;
} else {
@@ -611,7 +597,7 @@
/*
* Now see if we need to read further.
*/
- if (mpf->mpf_feature1 != 0) {
+ if (mpf->feature1 != 0) {
if (early) {
/*
* local APIC has default address
@@ -621,16 +607,16 @@
}
printk(KERN_INFO "Default MP configuration #%d\n",
- mpf->mpf_feature1);
- construct_default_ISA_mptable(mpf->mpf_feature1);
+ mpf->feature1);
+ construct_default_ISA_mptable(mpf->feature1);
- } else if (mpf->mpf_physptr) {
+ } else if (mpf->physptr) {
/*
* Read the physical hardware table. Anything here will
* override the defaults.
*/
- if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) {
+ if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 0;
#endif
@@ -688,32 +674,32 @@
unsigned reserve)
{
unsigned int *bp = phys_to_virt(base);
- struct intel_mp_floating *mpf;
+ struct mpf_intel *mpf;
apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
bp, length);
BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
- mpf = (struct intel_mp_floating *)bp;
+ mpf = (struct mpf_intel *)bp;
if ((*bp == SMP_MAGIC_IDENT) &&
- (mpf->mpf_length == 1) &&
+ (mpf->length == 1) &&
!mpf_checksum((unsigned char *)bp, 16) &&
- ((mpf->mpf_specification == 1)
- || (mpf->mpf_specification == 4))) {
+ ((mpf->specification == 1)
+ || (mpf->specification == 4))) {
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 1;
#endif
mpf_found = mpf;
- printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
- mpf, virt_to_phys(mpf));
+ printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
+ mpf, (u64)virt_to_phys(mpf));
if (!reserve)
return 1;
reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
BOOTMEM_DEFAULT);
- if (mpf->mpf_physptr) {
+ if (mpf->physptr) {
unsigned long size = PAGE_SIZE;
#ifdef CONFIG_X86_32
/*
@@ -722,15 +708,24 @@
* the bottom is mapped now.
* PC-9800's MPC table places on the very last
* of physical memory; so that simply reserving
- * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+ * PAGE_SIZE from mpf->physptr yields BUG()
* in reserve_bootmem.
+ * also need to make sure physptr is below than
+ * max_low_pfn
+ * we don't need reserve the area above max_low_pfn
*/
unsigned long end = max_low_pfn * PAGE_SIZE;
- if (mpf->mpf_physptr + size > end)
- size = end - mpf->mpf_physptr;
-#endif
- reserve_bootmem_generic(mpf->mpf_physptr, size,
+
+ if (mpf->physptr < end) {
+ if (mpf->physptr + size > end)
+ size = end - mpf->physptr;
+ reserve_bootmem_generic(mpf->physptr, size,
+ BOOTMEM_DEFAULT);
+ }
+#else
+ reserve_bootmem_generic(mpf->physptr, size,
BOOTMEM_DEFAULT);
+#endif
}
return 1;
@@ -809,15 +804,15 @@
/* not legacy */
for (i = 0; i < mp_irq_entries; i++) {
- if (mp_irqs[i].mp_irqtype != mp_INT)
+ if (mp_irqs[i].irqtype != mp_INT)
continue;
- if (mp_irqs[i].mp_irqflag != 0x0f)
+ if (mp_irqs[i].irqflag != 0x0f)
continue;
- if (mp_irqs[i].mp_srcbus != m->srcbus)
+ if (mp_irqs[i].srcbus != m->srcbus)
continue;
- if (mp_irqs[i].mp_srcbusirq != m->srcbusirq)
+ if (mp_irqs[i].srcbusirq != m->srcbusirq)
continue;
if (irq_used[i]) {
/* already claimed */
@@ -922,10 +917,10 @@
if (irq_used[i])
continue;
- if (mp_irqs[i].mp_irqtype != mp_INT)
+ if (mp_irqs[i].irqtype != mp_INT)
continue;
- if (mp_irqs[i].mp_irqflag != 0x0f)
+ if (mp_irqs[i].irqflag != 0x0f)
continue;
if (nr_m_spare > 0) {
@@ -1001,7 +996,7 @@
{
char str[16];
char oem[10];
- struct intel_mp_floating *mpf;
+ struct mpf_intel *mpf;
struct mpc_table *mpc, *mpc_new;
if (!enable_update_mptable)
@@ -1014,19 +1009,19 @@
/*
* Now see if we need to go further.
*/
- if (mpf->mpf_feature1 != 0)
+ if (mpf->feature1 != 0)
return 0;
- if (!mpf->mpf_physptr)
+ if (!mpf->physptr)
return 0;
- mpc = phys_to_virt(mpf->mpf_physptr);
+ mpc = phys_to_virt(mpf->physptr);
if (!smp_check_mpc(mpc, oem, str))
return 0;
- printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
- printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
+ printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf));
+ printk(KERN_INFO "physptr: %x\n", mpf->physptr);
if (mpc_new_phys && mpc->length > mpc_new_length) {
mpc_new_phys = 0;
@@ -1047,23 +1042,23 @@
}
printk(KERN_INFO "use in-positon replacing\n");
} else {
- mpf->mpf_physptr = mpc_new_phys;
+ mpf->physptr = mpc_new_phys;
mpc_new = phys_to_virt(mpc_new_phys);
memcpy(mpc_new, mpc, mpc->length);
mpc = mpc_new;
/* check if we can modify that */
- if (mpc_new_phys - mpf->mpf_physptr) {
- struct intel_mp_floating *mpf_new;
+ if (mpc_new_phys - mpf->physptr) {
+ struct mpf_intel *mpf_new;
/* steal 16 bytes from [0, 1k) */
printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
mpf_new = phys_to_virt(0x400 - 16);
memcpy(mpf_new, mpf, 16);
mpf = mpf_new;
- mpf->mpf_physptr = mpc_new_phys;
+ mpf->physptr = mpc_new_phys;
}
- mpf->mpf_checksum = 0;
- mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
- printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
+ mpf->checksum = 0;
+ mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
+ printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
}
/*
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7262666..3cf3413 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -35,10 +35,10 @@
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
+#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
-#include <asm/uaccess.h>
#include <asm/system.h>
static struct class *msr_class;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
deleted file mode 100644
index f2191d4..0000000
--- a/arch/x86/kernel/numaq_32.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Written by: Patricia Gaughen, IBM Corporation
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <gone@us.ibm.com>
- */
-
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/mmzone.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <asm/numaq.h>
-#include <asm/topology.h>
-#include <asm/processor.h>
-#include <asm/genapic.h>
-#include <asm/e820.h>
-#include <asm/setup.h>
-
-#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
-
-/*
- * Function: smp_dump_qct()
- *
- * Description: gets memory layout from the quad config table. This
- * function also updates node_online_map with the nodes (quads) present.
- */
-static void __init smp_dump_qct(void)
-{
- int node;
- struct eachquadmem *eq;
- struct sys_cfg_data *scd =
- (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR);
-
- nodes_clear(node_online_map);
- for_each_node(node) {
- if (scd->quads_present31_0 & (1 << node)) {
- node_set_online(node);
- eq = &scd->eq[node];
- /* Convert to pages */
- node_start_pfn[node] = MB_TO_PAGES(
- eq->hi_shrd_mem_start - eq->priv_mem_size);
- node_end_pfn[node] = MB_TO_PAGES(
- eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
-
- e820_register_active_regions(node, node_start_pfn[node],
- node_end_pfn[node]);
- memory_present(node,
- node_start_pfn[node], node_end_pfn[node]);
- node_remap_size[node] = node_memmap_size_bytes(node,
- node_start_pfn[node],
- node_end_pfn[node]);
- }
- }
-}
-
-
-void __cpuinit numaq_tsc_disable(void)
-{
- if (!found_numaq)
- return;
-
- if (num_online_nodes() > 1) {
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- setup_clear_cpu_cap(X86_FEATURE_TSC);
- }
-}
-
-static int __init numaq_pre_time_init(void)
-{
- numaq_tsc_disable();
- return 0;
-}
-
-int found_numaq;
-/*
- * Have to match translation table entries to main table entries by counter
- * hence the mpc_record variable .... can't see a less disgusting way of
- * doing this ....
- */
-struct mpc_config_translation {
- unsigned char mpc_type;
- unsigned char trans_len;
- unsigned char trans_type;
- unsigned char trans_quad;
- unsigned char trans_global;
- unsigned char trans_local;
- unsigned short trans_reserved;
-};
-
-/* x86_quirks member */
-static int mpc_record;
-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
- __cpuinitdata;
-
-static inline int generate_logical_apicid(int quad, int phys_apicid)
-{
- return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
-}
-
-/* x86_quirks member */
-static int mpc_apic_id(struct mpc_cpu *m)
-{
- int quad = translation_table[mpc_record]->trans_quad;
- int logical_apicid = generate_logical_apicid(quad, m->apicid);
-
- printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
- m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->cpufeature & CPU_MODEL_MASK) >> 4,
- m->apicver, quad, logical_apicid);
- return logical_apicid;
-}
-
-int mp_bus_id_to_node[MAX_MP_BUSSES];
-
-int mp_bus_id_to_local[MAX_MP_BUSSES];
-
-/* x86_quirks member */
-static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
-{
- int quad = translation_table[mpc_record]->trans_quad;
- int local = translation_table[mpc_record]->trans_local;
-
- mp_bus_id_to_node[m->busid] = quad;
- mp_bus_id_to_local[m->busid] = local;
- printk(KERN_INFO "Bus #%d is %s (node %d)\n",
- m->busid, name, quad);
-}
-
-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-
-/* x86_quirks member */
-static void mpc_oem_pci_bus(struct mpc_bus *m)
-{
- int quad = translation_table[mpc_record]->trans_quad;
- int local = translation_table[mpc_record]->trans_local;
-
- quad_local_to_mp_bus_id[quad][local] = m->busid;
-}
-
-static void __init MP_translation_info(struct mpc_config_translation *m)
-{
- printk(KERN_INFO
- "Translation: record %d, type %d, quad %d, global %d, local %d\n",
- mpc_record, m->trans_type, m->trans_quad, m->trans_global,
- m->trans_local);
-
- if (mpc_record >= MAX_MPC_ENTRY)
- printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
- else
- translation_table[mpc_record] = m; /* stash this for later */
- if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
- node_set_online(m->trans_quad);
-}
-
-static int __init mpf_checksum(unsigned char *mp, int len)
-{
- int sum = 0;
-
- while (len--)
- sum += *mp++;
-
- return sum & 0xFF;
-}
-
-/*
- * Read/parse the MPC oem tables
- */
-
-static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable,
- unsigned short oemsize)
-{
- int count = sizeof(*oemtable); /* the header size */
- unsigned char *oemptr = ((unsigned char *)oemtable) + count;
-
- mpc_record = 0;
- printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
- oemtable);
- if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
- printk(KERN_WARNING
- "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
- oemtable->signature[0], oemtable->signature[1],
- oemtable->signature[2], oemtable->signature[3]);
- return;
- }
- if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
- printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
- return;
- }
- while (count < oemtable->length) {
- switch (*oemptr) {
- case MP_TRANSLATION:
- {
- struct mpc_config_translation *m =
- (struct mpc_config_translation *)oemptr;
- MP_translation_info(m);
- oemptr += sizeof(*m);
- count += sizeof(*m);
- ++mpc_record;
- break;
- }
- default:
- {
- printk(KERN_WARNING
- "Unrecognised OEM table entry type! - %d\n",
- (int)*oemptr);
- return;
- }
- }
- }
-}
-
-static int __init numaq_setup_ioapic_ids(void)
-{
- /* so can skip it */
- return 1;
-}
-
-static int __init numaq_update_genapic(void)
-{
- genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
-
- return 0;
-}
-
-static struct x86_quirks numaq_x86_quirks __initdata = {
- .arch_pre_time_init = numaq_pre_time_init,
- .arch_time_init = NULL,
- .arch_pre_intr_init = NULL,
- .arch_memory_setup = NULL,
- .arch_intr_init = NULL,
- .arch_trap_init = NULL,
- .mach_get_smp_config = NULL,
- .mach_find_smp_config = NULL,
- .mpc_record = &mpc_record,
- .mpc_apic_id = mpc_apic_id,
- .mpc_oem_bus_info = mpc_oem_bus_info,
- .mpc_oem_pci_bus = mpc_oem_pci_bus,
- .smp_read_mpc_oem = smp_read_mpc_oem,
- .setup_ioapic_ids = numaq_setup_ioapic_ids,
- .update_genapic = numaq_update_genapic,
-};
-
-void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
-{
- if (strncmp(oem, "IBM NUMA", 8))
- printk("Warning! Not a NUMA-Q system!\n");
- else
- found_numaq = 1;
-}
-
-static __init void early_check_numaq(void)
-{
- /*
- * Find possible boot-time SMP configuration:
- */
- early_find_smp_config();
- /*
- * get boot-time SMP configuration:
- */
- if (smp_found_config)
- early_get_smp_config();
-
- if (found_numaq)
- x86_quirks = &numaq_x86_quirks;
-}
-
-int __init get_memcfg_numaq(void)
-{
- early_check_numaq();
- if (!found_numaq)
- return 0;
- smp_dump_qct();
- return 1;
-}
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 95777b0..3a7c5a4 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -26,13 +26,3 @@
};
EXPORT_SYMBOL(pv_lock_ops);
-void __init paravirt_use_bytelocks(void)
-{
-#ifdef CONFIG_SMP
- pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
- pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
- pv_lock_ops.spin_lock = __byte_spin_lock;
- pv_lock_ops.spin_trylock = __byte_spin_trylock;
- pv_lock_ops.spin_unlock = __byte_spin_unlock;
-#endif
-}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c6520a4..63dd358 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -28,7 +28,6 @@
#include <asm/paravirt.h>
#include <asm/desc.h>
#include <asm/setup.h>
-#include <asm/arch_hooks.h>
#include <asm/pgtable.h>
#include <asm/time.h>
#include <asm/pgalloc.h>
@@ -44,6 +43,17 @@
{
}
+/* identity function, which can be inlined */
+u32 _paravirt_ident_32(u32 x)
+{
+ return x;
+}
+
+u64 _paravirt_ident_64(u64 x)
+{
+ return x;
+}
+
static void __init default_banner(void)
{
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -138,9 +148,16 @@
if (opfunc == NULL)
/* If there's no function, patch it with a ud2a (BUG) */
ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
- else if (opfunc == paravirt_nop)
+ else if (opfunc == _paravirt_nop)
/* If the operation is a nop, then nop the callsite */
ret = paravirt_patch_nop();
+
+ /* identity functions just return their single argument */
+ else if (opfunc == _paravirt_ident_32)
+ ret = paravirt_patch_ident_32(insnbuf, len);
+ else if (opfunc == _paravirt_ident_64)
+ ret = paravirt_patch_ident_64(insnbuf, len);
+
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
@@ -318,10 +335,10 @@
struct pv_irq_ops pv_irq_ops = {
.init_IRQ = native_init_IRQ,
- .save_fl = native_save_fl,
- .restore_fl = native_restore_fl,
- .irq_disable = native_irq_disable,
- .irq_enable = native_irq_enable,
+ .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
+ .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
+ .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
+ .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
.safe_halt = native_safe_halt,
.halt = native_halt,
#ifdef CONFIG_X86_64
@@ -399,6 +416,14 @@
#endif
};
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
+/* 32-bit pagetable entries */
+#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
+#else
+/* 64-bit pagetable entries */
+#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
+#endif
+
struct pv_mmu_ops pv_mmu_ops = {
#ifndef CONFIG_X86_64
.pagetable_setup_start = native_pagetable_setup_start,
@@ -450,22 +475,23 @@
.pmd_clear = native_pmd_clear,
#endif
.set_pud = native_set_pud,
- .pmd_val = native_pmd_val,
- .make_pmd = native_make_pmd,
+
+ .pmd_val = PTE_IDENT,
+ .make_pmd = PTE_IDENT,
#if PAGETABLE_LEVELS == 4
- .pud_val = native_pud_val,
- .make_pud = native_make_pud,
+ .pud_val = PTE_IDENT,
+ .make_pud = PTE_IDENT,
+
.set_pgd = native_set_pgd,
#endif
#endif /* PAGETABLE_LEVELS >= 3 */
- .pte_val = native_pte_val,
- .pte_flags = native_pte_flags,
- .pgd_val = native_pgd_val,
+ .pte_val = PTE_IDENT,
+ .pgd_val = PTE_IDENT,
- .make_pte = native_make_pte,
- .make_pgd = native_make_pgd,
+ .make_pte = PTE_IDENT,
+ .make_pgd = PTE_IDENT,
.dup_mmap = paravirt_nop,
.exit_mmap = paravirt_nop,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 9fe644f..d9f32e6 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,18 @@
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
+unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
+{
+ /* arg in %eax, return in %eax */
+ return 0;
+}
+
+unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
+{
+ /* arg in %edx:%eax, return in %edx:%eax */
+ return 0;
+}
+
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 061d01d..3f08f34 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -19,6 +19,21 @@
DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
+DEF_NATIVE(, mov32, "mov %edi, %eax");
+DEF_NATIVE(, mov64, "mov %rdi, %rax");
+
+unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
+{
+ return paravirt_patch_insns(insnbuf, len,
+ start__mov32, end__mov32);
+}
+
+unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
+{
+ return paravirt_patch_insns(insnbuf, len,
+ start__mov64, end__mov64);
+}
+
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c
index 675a48c..071e7fe 100644
--- a/arch/x86/kernel/probe_roms_32.c
+++ b/arch/x86/kernel/probe_roms_32.c
@@ -18,7 +18,7 @@
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/io.h>
-#include <setup_arch.h>
+#include <asm/setup_arch.h>
static struct resource system_rom_resource = {
.name = "System ROM",
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6d12f7e..87b69d4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -350,7 +350,7 @@
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_X86_SMP
+#ifdef CONFIG_SMP
if (pm_idle == poll_idle && smp_num_siblings > 1) {
printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
" performance may degrade.\n");
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index bd4da2a..646da41 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -11,6 +11,7 @@
#include <stdarg.h>
+#include <linux/stackprotector.h>
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
@@ -66,9 +67,6 @@
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
-DEFINE_PER_CPU(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
/*
* Return saved PC of a blocked thread.
*/
@@ -94,6 +92,15 @@
{
int cpu = smp_processor_id();
+ /*
+ * If we're the non-boot CPU, nothing set the stack canary up
+ * for us. CPU0 already has it initialized but no harm in
+ * doing it again. This is a good place for updating it, as
+ * we wont ever return from this function (so the invalid
+ * canaries already on the stack wont ever trigger).
+ */
+ boot_init_stack_canary();
+
current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
@@ -108,7 +115,6 @@
play_dead();
local_irq_disable();
- __get_cpu_var(irq_stat).idle_timestamp = jiffies;
/* Don't trace irqs off for idle */
stop_critical_timings();
pm_idle();
@@ -132,7 +138,7 @@
if (user_mode_vm(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
- savesegment(gs, gs);
+ gs = get_user_gs(regs);
} else {
sp = (unsigned long) (®s->sp);
savesegment(ss, ss);
@@ -213,6 +219,7 @@
regs.ds = __USER_DS;
regs.es = __USER_DS;
regs.fs = __KERNEL_PERCPU;
+ regs.gs = __KERNEL_STACK_CANARY;
regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl();
@@ -305,7 +312,7 @@
p->thread.ip = (unsigned long) ret_from_fork;
- savesegment(gs, p->thread.gs);
+ task_user_gs(p) = get_user_gs(regs);
tsk = current;
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -343,7 +350,7 @@
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
- __asm__("movl %0, %%gs" : : "r"(0));
+ set_user_gs(regs, 0);
regs->fs = 0;
set_fs(USER_DS);
regs->ds = __USER_DS;
@@ -540,7 +547,7 @@
* used %fs or %gs (it does not today), or if the kernel is
* running inside of a hypervisor layer.
*/
- savesegment(gs, prev->gs);
+ lazy_save_gs(prev->gs);
/*
* Load the per-thread Thread-Local Storage descriptor.
@@ -586,31 +593,31 @@
* Restore %gs if needed (which is common)
*/
if (prev->gs | next->gs)
- loadsegment(gs, next->gs);
+ lazy_load_gs(next->gs);
- x86_write_percpu(current_task, next_p);
+ percpu_write(current_task, next_p);
return prev_p;
}
-asmlinkage int sys_fork(struct pt_regs regs)
+int sys_fork(struct pt_regs *regs)
{
- return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL);
+ return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
-asmlinkage int sys_clone(struct pt_regs regs)
+int sys_clone(struct pt_regs *regs)
{
unsigned long clone_flags;
unsigned long newsp;
int __user *parent_tidptr, *child_tidptr;
- clone_flags = regs.bx;
- newsp = regs.cx;
- parent_tidptr = (int __user *)regs.dx;
- child_tidptr = (int __user *)regs.di;
+ clone_flags = regs->bx;
+ newsp = regs->cx;
+ parent_tidptr = (int __user *)regs->dx;
+ child_tidptr = (int __user *)regs->di;
if (!newsp)
- newsp = regs.sp;
- return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr);
+ newsp = regs->sp;
+ return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
}
/*
@@ -623,27 +630,27 @@
* do not have enough call-clobbered registers to hold all
* the information you need.
*/
-asmlinkage int sys_vfork(struct pt_regs regs)
+int sys_vfork(struct pt_regs *regs)
{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL);
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
/*
* sys_execve() executes a new program.
*/
-asmlinkage int sys_execve(struct pt_regs regs)
+int sys_execve(struct pt_regs *regs)
{
int error;
char *filename;
- filename = getname((char __user *) regs.bx);
+ filename = getname((char __user *) regs->bx);
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
error = do_execve(filename,
- (char __user * __user *) regs.cx,
- (char __user * __user *) regs.dx,
- ®s);
+ (char __user * __user *) regs->cx,
+ (char __user * __user *) regs->dx,
+ regs);
if (error == 0) {
/* Make sure we don't return using sysenter.. */
set_thread_flag(TIF_IRET);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 85b4cb5..836ef65 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -16,6 +16,7 @@
#include <stdarg.h>
+#include <linux/stackprotector.h>
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
@@ -47,7 +48,6 @@
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
-#include <asm/pda.h>
#include <asm/prctl.h>
#include <asm/desc.h>
#include <asm/proto.h>
@@ -58,6 +58,12 @@
asmlinkage extern void ret_from_fork(void);
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
+DEFINE_PER_CPU(unsigned long, old_rsp);
+static DEFINE_PER_CPU(unsigned char, is_idle);
+
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -76,13 +82,13 @@
void enter_idle(void)
{
- write_pda(isidle, 1);
+ percpu_write(is_idle, 1);
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
static void __exit_idle(void)
{
- if (test_and_clear_bit_pda(0, isidle) == 0)
+ if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
return;
atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
}
@@ -112,6 +118,16 @@
void cpu_idle(void)
{
current_thread_info()->status |= TS_POLLING;
+
+ /*
+ * If we're the non-boot CPU, nothing set the stack canary up
+ * for us. CPU0 already has it initialized but no harm in
+ * doing it again. This is a good place for updating it, as
+ * we wont ever return from this function (so the invalid
+ * canaries already on the stack wont ever trigger).
+ */
+ boot_init_stack_canary();
+
/* endless idle loop with no priority at all */
while (1) {
tick_nohz_stop_sched_tick(1);
@@ -397,7 +413,7 @@
load_gs_index(0);
regs->ip = new_ip;
regs->sp = new_sp;
- write_pda(oldrsp, new_sp);
+ percpu_write(old_rsp, new_sp);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
regs->flags = 0x200;
@@ -618,21 +634,13 @@
/*
* Switch the PDA and FPU contexts.
*/
- prev->usersp = read_pda(oldrsp);
- write_pda(oldrsp, next->usersp);
- write_pda(pcurrent, next_p);
+ prev->usersp = percpu_read(old_rsp);
+ percpu_write(old_rsp, next->usersp);
+ percpu_write(current_task, next_p);
- write_pda(kernelstack,
+ percpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
- THREAD_SIZE - PDA_STACKOFFSET);
-#ifdef CONFIG_CC_STACKPROTECTOR
- write_pda(stack_canary, next_p->stack_canary);
- /*
- * Build time only check to make sure the stack_canary is at
- * offset 40 in the pda; this is a gcc ABI requirement
- */
- BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
-#endif
+ THREAD_SIZE - KERNEL_STACK_OFFSET);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 06ca07f..3d9672e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -75,10 +75,7 @@
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
- regno >>= 2;
- if (regno > FS)
- --regno;
- return ®s->bx + regno;
+ return ®s->bx + (regno >> 2);
}
static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
@@ -90,9 +87,10 @@
if (offset != offsetof(struct user_regs_struct, gs))
retval = *pt_regs_access(task_pt_regs(task), offset);
else {
- retval = task->thread.gs;
if (task == current)
- savesegment(gs, retval);
+ retval = get_user_gs(task_pt_regs(task));
+ else
+ retval = task_user_gs(task);
}
return retval;
}
@@ -126,13 +124,10 @@
break;
case offsetof(struct user_regs_struct, gs):
- task->thread.gs = value;
if (task == current)
- /*
- * The user-mode %gs is not affected by
- * kernel entry, so we must update the CPU.
- */
- loadsegment(gs, value);
+ set_user_gs(task_pt_regs(task), value);
+ else
+ task_user_gs(task) = value;
}
return 0;
@@ -273,7 +268,7 @@
if (test_tsk_thread_flag(task, TIF_IA32))
return IA32_PAGE_OFFSET - 3;
#endif
- return TASK_SIZE64 - 7;
+ return TASK_SIZE_MAX - 7;
}
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb4..1cc18d4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -14,6 +14,7 @@
#include <asm/reboot.h>
#include <asm/pci_x86.h>
#include <asm/virtext.h>
+#include <asm/cpu.h>
#ifdef CONFIG_X86_32
# include <linux/dmi.h>
@@ -23,8 +24,6 @@
# include <asm/iommu.h>
#endif
-#include <mach_ipi.h>
-
/*
* Power off function, if any
*/
@@ -650,7 +649,7 @@
static void smp_send_nmi_allbutself(void)
{
- send_IPI_allbutself(NMI_VECTOR);
+ apic->send_IPI_allbutself(NMI_VECTOR);
}
static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index a160f31..2064d0a 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -7,7 +7,7 @@
*/
#include <linux/linkage.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index f5afe66..d32cfb2 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -7,10 +7,10 @@
*/
#include <linux/linkage.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
-#include <asm/pgtable.h>
+#include <asm/pgtable_types.h>
/*
* Must be relocatable PIC code callable as a C function
@@ -29,122 +29,6 @@
* %rdx start address
*/
- /* map the control page at its virtual address */
-
- movq $0x0000ff8000000000, %r10 /* mask */
- mov $(39 - 3), %cl /* bits to shift */
- movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PGD)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PUD_0)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PUD_0)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PMD_0)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PMD_0)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PTE_0)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PTE_0)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- /* identity map the control page at its physical address */
-
- movq $0x0000ff8000000000, %r10 /* mask */
- mov $(39 - 3), %cl /* bits to shift */
- movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PGD)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PUD_1)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PUD_1)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PMD_1)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PMD_1)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_PTE_1)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
- shrq $9, %r10
- sub $9, %cl
-
- movq %r11, %r9
- andq %r10, %r9
- shrq %cl, %r9
-
- movq PTR(VA_PTE_1)(%rsi), %r8
- addq %r8, %r9
- movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
- orq $PAGE_ATTR, %r8
- movq %r8, (%r9)
-
-relocate_new_kernel:
- /* %rdi indirection_page
- * %rsi page_list
- * %rdx start address
- */
-
/* zero out flags, and disable interrupts */
pushq $0
popfq
@@ -156,9 +40,8 @@
/* get physical address of page table now too */
movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
- /* switch to new set of page tables */
- movq PTR(PA_PGD)(%rsi), %r9
- movq %r9, %cr3
+ /* Switch to the identity mapped page tables */
+ movq %rcx, %cr3
/* setup a new stack at the end of the physical control page */
lea PAGE_SIZE(%r8), %rsp
@@ -194,9 +77,7 @@
jmp 1f
1:
- /* Switch to the identity mapped page tables,
- * and flush the TLB.
- */
+ /* Flush the TLB (needed?) */
movq %rcx, %cr3
/* Do the copies */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c461f6d..5b85759 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -74,14 +74,15 @@
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
-#include <asm/arch_hooks.h>
#include <asm/efi.h>
+#include <asm/timer.h>
+#include <asm/i8259.h>
#include <asm/sections.h>
#include <asm/dmi.h>
#include <asm/io_apic.h>
#include <asm/ist.h>
#include <asm/vmi.h>
-#include <setup_arch.h>
+#include <asm/setup_arch.h>
#include <asm/bios_ebda.h>
#include <asm/cacheflush.h>
#include <asm/processor.h>
@@ -89,7 +90,7 @@
#include <asm/system.h>
#include <asm/vsyscall.h>
-#include <asm/smp.h>
+#include <asm/cpu.h>
#include <asm/desc.h>
#include <asm/dma.h>
#include <asm/iommu.h>
@@ -97,7 +98,6 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
-#include <mach_apic.h>
#include <asm/paravirt.h>
#include <asm/hypervisor.h>
@@ -112,6 +112,20 @@
#define ARCH_SETUP
#endif
+unsigned int boot_cpu_id __read_mostly;
+
+#ifdef CONFIG_X86_64
+int default_cpu_present_to_apicid(int mps_cpu)
+{
+ return __default_cpu_present_to_apicid(mps_cpu);
+}
+
+int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+{
+ return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
+}
+#endif
+
#ifndef CONFIG_DEBUG_BOOT_PARAMS
struct boot_params __initdata boot_params;
#else
@@ -586,19 +600,18 @@
early_param("elfcorehdr", setup_elfcorehdr);
#endif
-static int __init default_update_genapic(void)
+static int __init default_update_apic(void)
{
-#ifdef CONFIG_X86_SMP
-# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
- genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
-# endif
+#ifdef CONFIG_SMP
+ if (!apic->wakeup_cpu)
+ apic->wakeup_cpu = wakeup_secondary_cpu_via_init;
#endif
return 0;
}
static struct x86_quirks default_x86_quirks __initdata = {
- .update_genapic = default_update_genapic,
+ .update_apic = default_update_apic,
};
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
@@ -656,7 +669,6 @@
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
- pre_setup_arch_hook();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
@@ -823,8 +835,7 @@
#else
num_physpages = max_pfn;
- if (cpu_has_x2apic)
- check_x2apic();
+ check_x2apic();
/* How many end-of-memory variables you have, grandma! */
/* need this before calling reserve_initrd */
@@ -892,12 +903,11 @@
*/
acpi_reserve_bootmem();
#endif
-#ifdef CONFIG_X86_FIND_SMP_CONFIG
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
-#endif
+
reserve_crashkernel();
#ifdef CONFIG_X86_64
@@ -924,9 +934,7 @@
map_vsyscall();
#endif
-#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
-#endif
early_quirks();
@@ -977,4 +985,95 @@
#endif
}
+#ifdef CONFIG_X86_32
+/**
+ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ * Perform any necessary interrupt initialisation prior to setting up
+ * the "ordinary" interrupt call gates. For legacy reasons, the ISA
+ * interrupts should be initialised here if the machine emulates a PC
+ * in any way.
+ **/
+void __init x86_quirk_pre_intr_init(void)
+{
+ if (x86_quirks->arch_pre_intr_init) {
+ if (x86_quirks->arch_pre_intr_init())
+ return;
+ }
+ init_ISA_irqs();
+}
+
+/**
+ * x86_quirk_intr_init - post gate setup interrupt initialisation
+ *
+ * Description:
+ * Fill in any interrupts that may have been left out by the general
+ * init_IRQ() routine. interrupts having to do with the machine rather
+ * than the devices on the I/O bus (like APIC interrupts in intel MP
+ * systems) are started here.
+ **/
+void __init x86_quirk_intr_init(void)
+{
+ if (x86_quirks->arch_intr_init) {
+ if (x86_quirks->arch_intr_init())
+ return;
+ }
+}
+
+/**
+ * x86_quirk_trap_init - initialise system specific traps
+ *
+ * Description:
+ * Called as the final act of trap_init(). Used in VISWS to initialise
+ * the various board specific APIC traps.
+ **/
+void __init x86_quirk_trap_init(void)
+{
+ if (x86_quirks->arch_trap_init) {
+ if (x86_quirks->arch_trap_init())
+ return;
+ }
+}
+
+static struct irqaction irq0 = {
+ .handler = timer_interrupt,
+ .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
+ .mask = CPU_MASK_NONE,
+ .name = "timer"
+};
+
+/**
+ * x86_quirk_pre_time_init - do any specific initialisations before.
+ *
+ **/
+void __init x86_quirk_pre_time_init(void)
+{
+ if (x86_quirks->arch_pre_time_init)
+ x86_quirks->arch_pre_time_init();
+}
+
+/**
+ * x86_quirk_time_init - do any specific initialisations for the system timer.
+ *
+ * Description:
+ * Must plug the system timer interrupt source at HZ into the IRQ listed
+ * in irq_vectors.h:TIMER_IRQ
+ **/
+void __init x86_quirk_time_init(void)
+{
+ if (x86_quirks->arch_time_init) {
+ /*
+ * A nonzero return code does not mean failure, it means
+ * that the architecture quirk does not want any
+ * generic (timer) setup to be performed after this:
+ */
+ if (x86_quirks->arch_time_init())
+ return;
+ }
+
+ irq0.mask = cpumask_of_cpu(0);
+ setup_irq(0, &irq0);
+}
+#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0116107..d992e6c 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,145 +13,47 @@
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/highmem.h>
+#include <asm/proto.h>
+#include <asm/cpumask.h>
+#include <asm/cpu.h>
+#include <asm/stackprotector.h>
-#ifdef CONFIG_X86_LOCAL_APIC
-unsigned int num_processors;
-unsigned disabled_cpus __cpuinitdata;
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
-EXPORT_SYMBOL(boot_cpu_physical_apicid);
-unsigned int max_physical_apicid;
-
-/* Bitmask of physically existing CPUs */
-physid_mask_t phys_cpu_present_map;
-#endif
-
-/* map cpu index to physical APIC ID */
-DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
-DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
-
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
-#define X86_64_NUMA 1
-
-/* map cpu index to node index */
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-
-/* which logical CPUs are on which nodes */
-cpumask_t *node_to_cpumask_map;
-EXPORT_SYMBOL(node_to_cpumask_map);
-
-/* setup node_to_cpumask_map */
-static void __init setup_node_to_cpumask_map(void);
-
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+# define DBG(x...) printk(KERN_DEBUG x)
#else
-static inline void setup_node_to_cpumask_map(void) { }
+# define DBG(x...)
#endif
-#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
-/*
- * Copy data used in early init routines from the initial arrays to the
- * per cpu data areas. These arrays then become expendable and the
- * *_early_ptr's are zeroed indicating that the static arrays are gone.
- */
-static void __init setup_per_cpu_maps(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- per_cpu(x86_cpu_to_apicid, cpu) =
- early_per_cpu_map(x86_cpu_to_apicid, cpu);
- per_cpu(x86_bios_cpu_apicid, cpu) =
- early_per_cpu_map(x86_bios_cpu_apicid, cpu);
-#ifdef X86_64_NUMA
- per_cpu(x86_cpu_to_node_map, cpu) =
- early_per_cpu_map(x86_cpu_to_node_map, cpu);
-#endif
- }
-
- /* indicate the early static arrays will soon be gone */
- early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
- early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
-#ifdef X86_64_NUMA
- early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
-#endif
-}
-
-#ifdef CONFIG_X86_32
-/*
- * Great future not-so-futuristic plan: make i386 and x86_64 do it
- * the same way
- */
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(__per_cpu_offset);
-static inline void setup_cpu_pda_map(void) { }
-
-#elif !defined(CONFIG_SMP)
-static inline void setup_cpu_pda_map(void) { }
-
-#else /* CONFIG_SMP && CONFIG_X86_64 */
-
-/*
- * Allocate cpu_pda pointer table and array via alloc_bootmem.
- */
-static void __init setup_cpu_pda_map(void)
-{
- char *pda;
- struct x8664_pda **new_cpu_pda;
- unsigned long size;
- int cpu;
-
- size = roundup(sizeof(struct x8664_pda), cache_line_size());
-
- /* allocate cpu_pda array and pointer table */
- {
- unsigned long tsize = nr_cpu_ids * sizeof(void *);
- unsigned long asize = size * (nr_cpu_ids - 1);
-
- tsize = roundup(tsize, cache_line_size());
- new_cpu_pda = alloc_bootmem(tsize + asize);
- pda = (char *)new_cpu_pda + tsize;
- }
-
- /* initialize pointer table to static pda's */
- for_each_possible_cpu(cpu) {
- if (cpu == 0) {
- /* leave boot cpu pda in place */
- new_cpu_pda[0] = cpu_pda(0);
- continue;
- }
- new_cpu_pda[cpu] = (struct x8664_pda *)pda;
- new_cpu_pda[cpu]->in_bootmem = 1;
- pda += size;
- }
-
- /* point to new pointer table */
- _cpu_pda = new_cpu_pda;
-}
-
-#endif /* CONFIG_SMP && CONFIG_X86_64 */
+DEFINE_PER_CPU(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
#ifdef CONFIG_X86_64
+#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
+#else
+#define BOOT_PERCPU_OFFSET 0
+#endif
-/* correctly size the local cpu masks */
-static void __init setup_cpu_local_masks(void)
+DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
+EXPORT_PER_CPU_SYMBOL(this_cpu_off);
+
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
+};
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static inline void setup_percpu_segment(int cpu)
{
- alloc_bootmem_cpumask_var(&cpu_initialized_mask);
- alloc_bootmem_cpumask_var(&cpu_callin_mask);
- alloc_bootmem_cpumask_var(&cpu_callout_mask);
- alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+#ifdef CONFIG_X86_32
+ struct desc_struct gdt;
+
+ pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
+ 0x2 | DESCTYPE_S, 0x8);
+ gdt.s = 1;
+ write_gdt_entry(get_cpu_gdt_table(cpu),
+ GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
+#endif
}
-#else /* CONFIG_X86_32 */
-
-static inline void setup_cpu_local_masks(void)
-{
-}
-
-#endif /* CONFIG_X86_32 */
-
/*
* Great future plan:
* Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
@@ -159,18 +61,12 @@
*/
void __init setup_per_cpu_areas(void)
{
- ssize_t size, old_size;
+ ssize_t size;
char *ptr;
int cpu;
- unsigned long align = 1;
-
- /* Setup cpu_pda map */
- setup_cpu_pda_map();
/* Copy section for each CPU (we discard the original) */
- old_size = PERCPU_ENOUGH_ROOM;
- align = max_t(unsigned long, PAGE_SIZE, align);
- size = roundup(old_size, align);
+ size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
@@ -179,30 +75,68 @@
for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
- ptr = __alloc_bootmem(size, align,
- __pa(MAX_DMA_ADDRESS));
+ ptr = alloc_bootmem_pages(size);
#else
int node = early_cpu_to_node(cpu);
if (!node_online(node) || !NODE_DATA(node)) {
- ptr = __alloc_bootmem(size, align,
- __pa(MAX_DMA_ADDRESS));
+ ptr = alloc_bootmem_pages(size);
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d at %016lx\n",
cpu, __pa(ptr));
} else {
- ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
- __pa(MAX_DMA_ADDRESS));
+ ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
cpu, node, __pa(ptr));
}
#endif
+
+ memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
per_cpu_offset(cpu) = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+ per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
+ per_cpu(cpu_number, cpu) = cpu;
+ setup_percpu_segment(cpu);
+ setup_stack_canary_segment(cpu);
+ /*
+ * Copy data used in early init routines from the
+ * initial arrays to the per cpu data areas. These
+ * arrays then become expendable and the *_early_ptr's
+ * are zeroed indicating that the static arrays are
+ * gone.
+ */
+#ifdef CONFIG_X86_LOCAL_APIC
+ per_cpu(x86_cpu_to_apicid, cpu) =
+ early_per_cpu_map(x86_cpu_to_apicid, cpu);
+ per_cpu(x86_bios_cpu_apicid, cpu) =
+ early_per_cpu_map(x86_bios_cpu_apicid, cpu);
+#endif
+#ifdef CONFIG_X86_64
+ per_cpu(irq_stack_ptr, cpu) =
+ per_cpu(irq_stack_union.irq_stack, cpu) +
+ IRQ_STACK_SIZE - 64;
+#ifdef CONFIG_NUMA
+ per_cpu(x86_cpu_to_node_map, cpu) =
+ early_per_cpu_map(x86_cpu_to_node_map, cpu);
+#endif
+#endif
+ /*
+ * Up to this point, the boot CPU has been using .data.init
+ * area. Reload any changed state for the boot CPU.
+ */
+ if (cpu == boot_cpu_id)
+ switch_to_new_gdt(cpu);
+
+ DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
}
- /* Setup percpu data maps */
- setup_per_cpu_maps();
+ /* indicate the early static arrays will soon be gone */
+#ifdef CONFIG_X86_LOCAL_APIC
+ early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
+ early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
+#endif
+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+ early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
+#endif
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
@@ -210,199 +144,3 @@
/* Setup cpu initialized, callin, callout masks */
setup_cpu_local_masks();
}
-
-#endif
-
-#ifdef X86_64_NUMA
-
-/*
- * Allocate node_to_cpumask_map based on number of available nodes
- * Requires node_possible_map to be valid.
- *
- * Note: node_to_cpumask() is not valid until after this is done.
- */
-static void __init setup_node_to_cpumask_map(void)
-{
- unsigned int node, num = 0;
- cpumask_t *map;
-
- /* setup nr_node_ids if not done yet */
- if (nr_node_ids == MAX_NUMNODES) {
- for_each_node_mask(node, node_possible_map)
- num = node;
- nr_node_ids = num + 1;
- }
-
- /* allocate the map */
- map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
-
- pr_debug("Node to cpumask map at %p for %d nodes\n",
- map, nr_node_ids);
-
- /* node_to_cpumask() will now work */
- node_to_cpumask_map = map;
-}
-
-void __cpuinit numa_set_node(int cpu, int node)
-{
- int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
-
- if (cpu_pda(cpu) && node != NUMA_NO_NODE)
- cpu_pda(cpu)->nodenumber = node;
-
- if (cpu_to_node_map)
- cpu_to_node_map[cpu] = node;
-
- else if (per_cpu_offset(cpu))
- per_cpu(x86_cpu_to_node_map, cpu) = node;
-
- else
- pr_debug("Setting node for non-present cpu %d\n", cpu);
-}
-
-void __cpuinit numa_clear_node(int cpu)
-{
- numa_set_node(cpu, NUMA_NO_NODE);
-}
-
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
-
-void __cpuinit numa_add_cpu(int cpu)
-{
- cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
- cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
-}
-
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
-
-/*
- * --------- debug versions of the numa functions ---------
- */
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
-{
- int node = cpu_to_node(cpu);
- cpumask_t *mask;
- char buf[64];
-
- if (node_to_cpumask_map == NULL) {
- printk(KERN_ERR "node_to_cpumask_map NULL\n");
- dump_stack();
- return;
- }
-
- mask = &node_to_cpumask_map[node];
- if (enable)
- cpu_set(cpu, *mask);
- else
- cpu_clear(cpu, *mask);
-
- cpulist_scnprintf(buf, sizeof(buf), mask);
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
- enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
-}
-
-void __cpuinit numa_add_cpu(int cpu)
-{
- numa_set_cpumask(cpu, 1);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
- numa_set_cpumask(cpu, 0);
-}
-
-int cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
- printk(KERN_WARNING
- "cpu_to_node(%d): usage too early!\n", cpu);
- dump_stack();
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
-}
-EXPORT_SYMBOL(cpu_to_node);
-
-/*
- * Same function as cpu_to_node() but used if called before the
- * per_cpu areas are setup.
- */
-int early_cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map))
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
- if (!per_cpu_offset(cpu)) {
- printk(KERN_WARNING
- "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
- dump_stack();
- return NUMA_NO_NODE;
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
-}
-
-
-/* empty cpumask */
-static const cpumask_t cpu_mask_none;
-
-/*
- * Returns a pointer to the bitmask of CPUs on Node 'node'.
- */
-const cpumask_t *cpumask_of_node(int node)
-{
- if (node_to_cpumask_map == NULL) {
- printk(KERN_WARNING
- "cpumask_of_node(%d): no node_to_cpumask_map!\n",
- node);
- dump_stack();
- return (const cpumask_t *)&cpu_online_map;
- }
- if (node >= nr_node_ids) {
- printk(KERN_WARNING
- "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
- node, nr_node_ids);
- dump_stack();
- return &cpu_mask_none;
- }
- return &node_to_cpumask_map[node];
-}
-EXPORT_SYMBOL(cpumask_of_node);
-
-/*
- * Returns a bitmask of CPUs on Node 'node'.
- *
- * Side note: this function creates the returned cpumask on the stack
- * so with a high NR_CPUS count, excessive stack space is used. The
- * node_to_cpumask_ptr function should be used whenever possible.
- */
-cpumask_t node_to_cpumask(int node)
-{
- if (node_to_cpumask_map == NULL) {
- printk(KERN_WARNING
- "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
- dump_stack();
- return cpu_online_map;
- }
- if (node >= nr_node_ids) {
- printk(KERN_WARNING
- "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
- node, nr_node_ids);
- dump_stack();
- return cpu_mask_none;
- }
- return node_to_cpumask_map[node];
-}
-EXPORT_SYMBOL(node_to_cpumask);
-
-/*
- * --------- end of debug versions of the numa functions ---------
- */
-
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
-
-#endif /* X86_64_NUMA */
-
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index df0587f..4d34410 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,7 +6,7 @@
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
* 2000-2002 x86-64 support by Andi Kleen
*/
-
+#include <linux/perf_counter.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>
@@ -50,27 +50,23 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
-#define COPY(x) { \
- err |= __get_user(regs->x, &sc->x); \
-}
+#define COPY(x) do { \
+ get_user_ex(regs->x, &sc->x); \
+} while (0)
-#define COPY_SEG(seg) { \
- unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- regs->seg = tmp; \
-}
+#define GET_SEG(seg) ({ \
+ unsigned short tmp; \
+ get_user_ex(tmp, &sc->seg); \
+ tmp; \
+})
-#define COPY_SEG_CPL3(seg) { \
- unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- regs->seg = tmp | 3; \
-}
+#define COPY_SEG(seg) do { \
+ regs->seg = GET_SEG(seg); \
+} while (0)
-#define GET_SEG(seg) { \
- unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- loadsegment(seg, tmp); \
-}
+#define COPY_SEG_CPL3(seg) do { \
+ regs->seg = GET_SEG(seg) | 3; \
+} while (0)
static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
@@ -83,45 +79,49 @@
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ get_user_try {
+
#ifdef CONFIG_X86_32
- GET_SEG(gs);
- COPY_SEG(fs);
- COPY_SEG(es);
- COPY_SEG(ds);
+ set_user_gs(regs, GET_SEG(gs));
+ COPY_SEG(fs);
+ COPY_SEG(es);
+ COPY_SEG(ds);
#endif /* CONFIG_X86_32 */
- COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
- COPY(dx); COPY(cx); COPY(ip);
+ COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+ COPY(dx); COPY(cx); COPY(ip);
#ifdef CONFIG_X86_64
- COPY(r8);
- COPY(r9);
- COPY(r10);
- COPY(r11);
- COPY(r12);
- COPY(r13);
- COPY(r14);
- COPY(r15);
+ COPY(r8);
+ COPY(r9);
+ COPY(r10);
+ COPY(r11);
+ COPY(r12);
+ COPY(r13);
+ COPY(r14);
+ COPY(r15);
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_32
- COPY_SEG_CPL3(cs);
- COPY_SEG_CPL3(ss);
+ COPY_SEG_CPL3(cs);
+ COPY_SEG_CPL3(ss);
#else /* !CONFIG_X86_32 */
- /* Kernel saves and restores only the CS segment register on signals,
- * which is the bare minimum needed to allow mixed 32/64-bit code.
- * App's signal handler can save/restore other segments if needed. */
- COPY_SEG_CPL3(cs);
+ /* Kernel saves and restores only the CS segment register on signals,
+ * which is the bare minimum needed to allow mixed 32/64-bit code.
+ * App's signal handler can save/restore other segments if needed. */
+ COPY_SEG_CPL3(cs);
#endif /* CONFIG_X86_32 */
- err |= __get_user(tmpflags, &sc->flags);
- regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
- regs->orig_ax = -1; /* disable syscall checks */
+ get_user_ex(tmpflags, &sc->flags);
+ regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+ regs->orig_ax = -1; /* disable syscall checks */
- err |= __get_user(buf, &sc->fpstate);
- err |= restore_i387_xstate(buf);
+ get_user_ex(buf, &sc->fpstate);
+ err |= restore_i387_xstate(buf);
- err |= __get_user(*pax, &sc->ax);
+ get_user_ex(*pax, &sc->ax);
+ } get_user_catch(err);
+
return err;
}
@@ -131,57 +131,55 @@
{
int err = 0;
-#ifdef CONFIG_X86_32
- {
- unsigned int tmp;
+ put_user_try {
- savesegment(gs, tmp);
- err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
- }
- err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
- err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
- err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+#ifdef CONFIG_X86_32
+ put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
+ put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
+ put_user_ex(regs->es, (unsigned int __user *)&sc->es);
+ put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
#endif /* CONFIG_X86_32 */
- err |= __put_user(regs->di, &sc->di);
- err |= __put_user(regs->si, &sc->si);
- err |= __put_user(regs->bp, &sc->bp);
- err |= __put_user(regs->sp, &sc->sp);
- err |= __put_user(regs->bx, &sc->bx);
- err |= __put_user(regs->dx, &sc->dx);
- err |= __put_user(regs->cx, &sc->cx);
- err |= __put_user(regs->ax, &sc->ax);
+ put_user_ex(regs->di, &sc->di);
+ put_user_ex(regs->si, &sc->si);
+ put_user_ex(regs->bp, &sc->bp);
+ put_user_ex(regs->sp, &sc->sp);
+ put_user_ex(regs->bx, &sc->bx);
+ put_user_ex(regs->dx, &sc->dx);
+ put_user_ex(regs->cx, &sc->cx);
+ put_user_ex(regs->ax, &sc->ax);
#ifdef CONFIG_X86_64
- err |= __put_user(regs->r8, &sc->r8);
- err |= __put_user(regs->r9, &sc->r9);
- err |= __put_user(regs->r10, &sc->r10);
- err |= __put_user(regs->r11, &sc->r11);
- err |= __put_user(regs->r12, &sc->r12);
- err |= __put_user(regs->r13, &sc->r13);
- err |= __put_user(regs->r14, &sc->r14);
- err |= __put_user(regs->r15, &sc->r15);
+ put_user_ex(regs->r8, &sc->r8);
+ put_user_ex(regs->r9, &sc->r9);
+ put_user_ex(regs->r10, &sc->r10);
+ put_user_ex(regs->r11, &sc->r11);
+ put_user_ex(regs->r12, &sc->r12);
+ put_user_ex(regs->r13, &sc->r13);
+ put_user_ex(regs->r14, &sc->r14);
+ put_user_ex(regs->r15, &sc->r15);
#endif /* CONFIG_X86_64 */
- err |= __put_user(current->thread.trap_no, &sc->trapno);
- err |= __put_user(current->thread.error_code, &sc->err);
- err |= __put_user(regs->ip, &sc->ip);
+ put_user_ex(current->thread.trap_no, &sc->trapno);
+ put_user_ex(current->thread.error_code, &sc->err);
+ put_user_ex(regs->ip, &sc->ip);
#ifdef CONFIG_X86_32
- err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
- err |= __put_user(regs->flags, &sc->flags);
- err |= __put_user(regs->sp, &sc->sp_at_signal);
- err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+ put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
+ put_user_ex(regs->flags, &sc->flags);
+ put_user_ex(regs->sp, &sc->sp_at_signal);
+ put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
#else /* !CONFIG_X86_32 */
- err |= __put_user(regs->flags, &sc->flags);
- err |= __put_user(regs->cs, &sc->cs);
- err |= __put_user(0, &sc->gs);
- err |= __put_user(0, &sc->fs);
+ put_user_ex(regs->flags, &sc->flags);
+ put_user_ex(regs->cs, &sc->cs);
+ put_user_ex(0, &sc->gs);
+ put_user_ex(0, &sc->fs);
#endif /* CONFIG_X86_32 */
- err |= __put_user(fpstate, &sc->fpstate);
+ put_user_ex(fpstate, &sc->fpstate);
- /* non-iBCS2 extensions.. */
- err |= __put_user(mask, &sc->oldmask);
- err |= __put_user(current->thread.cr2, &sc->cr2);
+ /* non-iBCS2 extensions.. */
+ put_user_ex(mask, &sc->oldmask);
+ put_user_ex(current->thread.cr2, &sc->cr2);
+ } put_user_catch(err);
return err;
}
@@ -336,43 +334,41 @@
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
- err |= __put_user(sig, &frame->sig);
- err |= __put_user(&frame->info, &frame->pinfo);
- err |= __put_user(&frame->uc, &frame->puc);
- err |= copy_siginfo_to_user(&frame->info, info);
- if (err)
- return -EFAULT;
+ put_user_try {
+ put_user_ex(sig, &frame->sig);
+ put_user_ex(&frame->info, &frame->pinfo);
+ put_user_ex(&frame->uc, &frame->puc);
+ err |= copy_siginfo_to_user(&frame->info, info);
- /* Create the ucontext. */
- if (cpu_has_xsave)
- err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- return -EFAULT;
+ /* Create the ucontext. */
+ if (cpu_has_xsave)
+ put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(0, &frame->uc.uc_link);
+ put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ put_user_ex(sas_ss_flags(regs->sp),
+ &frame->uc.uc_stack.ss_flags);
+ put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+ regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- /* Set up to return from userspace. */
- restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
- err |= __put_user(restorer, &frame->pretcode);
+ /* Set up to return from userspace. */
+ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
+ if (ka->sa.sa_flags & SA_RESTORER)
+ restorer = ka->sa.sa_restorer;
+ put_user_ex(restorer, &frame->pretcode);
- /*
- * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
- *
- * WE DO NOT USE IT ANY MORE! It's only left here for historical
- * reasons and because gdb uses it as a signature to notice
- * signal handler stack frames.
- */
- err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
+ /*
+ * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
+ *
+ * WE DO NOT USE IT ANY MORE! It's only left here for historical
+ * reasons and because gdb uses it as a signature to notice
+ * signal handler stack frames.
+ */
+ put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
+ } put_user_catch(err);
if (err)
return -EFAULT;
@@ -436,28 +432,30 @@
return -EFAULT;
}
- /* Create the ucontext. */
- if (cpu_has_xsave)
- err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ put_user_try {
+ /* Create the ucontext. */
+ if (cpu_has_xsave)
+ put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(0, &frame->uc.uc_link);
+ put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ put_user_ex(sas_ss_flags(regs->sp),
+ &frame->uc.uc_stack.ss_flags);
+ put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
- /* x86-64 should always use SA_RESTORER. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
- } else {
- /* could use a vstub here */
- return -EFAULT;
- }
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ /* x86-64 should always use SA_RESTORER. */
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ put_user_ex(ka->sa.sa_restorer, &frame->pretcode);
+ } else {
+ /* could use a vstub here */
+ err |= -EFAULT;
+ }
+ } put_user_catch(err);
if (err)
return -EFAULT;
@@ -509,31 +507,41 @@
struct old_sigaction __user *oact)
{
struct k_sigaction new_ka, old_ka;
- int ret;
+ int ret = 0;
if (act) {
old_sigset_t mask;
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)))
return -EFAULT;
- __get_user(new_ka.sa.sa_flags, &act->sa_flags);
- __get_user(mask, &act->sa_mask);
+ get_user_try {
+ get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
+ get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
+ get_user_ex(mask, &act->sa_mask);
+ get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
+ } get_user_catch(ret);
+
+ if (ret)
+ return -EFAULT;
siginitset(&new_ka.sa.sa_mask, mask);
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
return -EFAULT;
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+ put_user_try {
+ put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
+ put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
+ put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+ put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
+ } put_user_catch(ret);
+
+ if (ret)
+ return -EFAULT;
}
return ret;
@@ -541,14 +549,9 @@
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_X86_32
-asmlinkage int sys_sigaltstack(unsigned long bx)
+int sys_sigaltstack(struct pt_regs *regs)
{
- /*
- * This is needed to make gcc realize it doesn't own the
- * "struct pt_regs"
- */
- struct pt_regs *regs = (struct pt_regs *)&bx;
- const stack_t __user *uss = (const stack_t __user *)bx;
+ const stack_t __user *uss = (const stack_t __user *)regs->bx;
stack_t __user *uoss = (stack_t __user *)regs->cx;
return do_sigaltstack(uss, uoss, regs->sp);
@@ -566,14 +569,12 @@
* Do a signal return; undo the signal stack.
*/
#ifdef CONFIG_X86_32
-asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
+unsigned long sys_sigreturn(struct pt_regs *regs)
{
struct sigframe __user *frame;
- struct pt_regs *regs;
unsigned long ax;
sigset_t set;
- regs = (struct pt_regs *) &__unused;
frame = (struct sigframe __user *)(regs->sp - 8);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -600,7 +601,7 @@
}
#endif /* CONFIG_X86_32 */
-static long do_rt_sigreturn(struct pt_regs *regs)
+long sys_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
unsigned long ax;
@@ -631,25 +632,6 @@
return 0;
}
-#ifdef CONFIG_X86_32
-/*
- * Note: do not pass in pt_regs directly as with tail-call optimization
- * GCC will incorrectly stomp on the caller's frame and corrupt user-space
- * register state:
- */
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
-{
- struct pt_regs *regs = (struct pt_regs *)&__unused;
-
- return do_rt_sigreturn(regs);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
- return do_rt_sigreturn(regs);
-}
-#endif /* CONFIG_X86_32 */
-
/*
* OK, we're invoking a handler:
*/
@@ -893,6 +875,11 @@
tracehook_notify_resume(regs);
}
+ if (thread_info_flags & _TIF_PERF_COUNTERS) {
+ clear_thread_flag(TIF_PERF_COUNTERS);
+ perf_counter_notify(regs);
+ }
+
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index e6faa33..13f33ea 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -2,7 +2,7 @@
* Intel SMP support routines.
*
* (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ * (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
* (c) 2002,2003 Andi Kleen, SuSE Labs.
*
* i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
@@ -26,8 +26,7 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
-#include <mach_ipi.h>
-#include <mach_apic.h>
+#include <asm/apic.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
*
@@ -118,12 +117,12 @@
WARN_ON(1);
return;
}
- send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
+ apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
}
void native_send_call_func_single_ipi(int cpu)
{
- send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+ apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
}
void native_send_call_func_ipi(const struct cpumask *mask)
@@ -131,7 +130,7 @@
cpumask_var_t allbutself;
if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
- send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+ apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
return;
}
@@ -140,9 +139,9 @@
if (cpumask_equal(mask, allbutself) &&
cpumask_equal(cpu_online_mask, cpu_callout_mask))
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+ apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
else
- send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+ apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
free_cpumask_var(allbutself);
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bb1a3b1..9ce6663 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2,7 +2,7 @@
* x86 SMP booting functions
*
* (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
* Copyright 2001 Andi Kleen, SuSE Labs.
*
* Much of the core SMP work is based on previous work by Thomas Radke, to
@@ -53,7 +53,6 @@
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/idle.h>
-#include <asm/smp.h>
#include <asm/trampoline.h>
#include <asm/cpu.h>
#include <asm/numa.h>
@@ -61,13 +60,12 @@
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
#include <asm/vmi.h>
-#include <asm/genapic.h>
+#include <asm/apic.h>
#include <asm/setup.h>
+#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
-#include <mach_apic.h>
-#include <mach_wakecpu.h>
-#include <smpboot_hooks.h>
+#include <asm/smpboot_hooks.h>
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
@@ -163,7 +161,7 @@
{
int cpu = smp_processor_id();
int apicid = logical_smp_processor_id();
- int node = apicid_to_node(apicid);
+ int node = apic->apicid_to_node(apicid);
if (!node_online(node))
node = first_online_node;
@@ -196,7 +194,8 @@
* our local APIC. We have to wait for the IPI or we'll
* lock up on an APIC access.
*/
- wait_for_init_deassert(&init_deasserted);
+ if (apic->wait_for_init_deassert)
+ apic->wait_for_init_deassert(&init_deasserted);
/*
* (This works even if the APIC is not enabled.)
@@ -243,7 +242,8 @@
*/
pr_debug("CALLIN, before setup_local_APIC().\n");
- smp_callin_clear_local_apic();
+ if (apic->smp_callin_clear_local_apic)
+ apic->smp_callin_clear_local_apic();
setup_local_APIC();
end_local_APIC_setup();
map_cpu_to_logical_apicid();
@@ -583,7 +583,7 @@
/* Target chip */
/* Boot on the stack */
/* Kick the second */
- apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
+ apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -745,78 +745,22 @@
complete(&c_idle->done);
}
-#ifdef CONFIG_X86_64
-
-/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
-static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
-{
- if (!after_bootmem)
- free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
-}
-
-/*
- * Allocate node local memory for the AP pda.
- *
- * Must be called after the _cpu_pda pointer table is initialized.
- */
-int __cpuinit get_local_pda(int cpu)
-{
- struct x8664_pda *oldpda, *newpda;
- unsigned long size = sizeof(struct x8664_pda);
- int node = cpu_to_node(cpu);
-
- if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
- return 0;
-
- oldpda = cpu_pda(cpu);
- newpda = kmalloc_node(size, GFP_ATOMIC, node);
- if (!newpda) {
- printk(KERN_ERR "Could not allocate node local PDA "
- "for CPU %d on node %d\n", cpu, node);
-
- if (oldpda)
- return 0; /* have a usable pda */
- else
- return -1;
- }
-
- if (oldpda) {
- memcpy(newpda, oldpda, size);
- free_bootmem_pda(oldpda);
- }
-
- newpda->in_bootmem = 0;
- cpu_pda(cpu) = newpda;
- return 0;
-}
-#endif /* CONFIG_X86_64 */
-
-static int __cpuinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
+ * Returns zero if CPU booted OK, else error code from ->wakeup_cpu.
*/
+static int __cpuinit do_boot_cpu(int apicid, int cpu)
{
unsigned long boot_error = 0;
- int timeout;
unsigned long start_ip;
- unsigned short nmi_high = 0, nmi_low = 0;
+ int timeout;
struct create_idle c_idle = {
- .cpu = cpu,
- .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
+ .cpu = cpu,
+ .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
- INIT_WORK(&c_idle.work, do_fork_idle);
-#ifdef CONFIG_X86_64
- /* Allocate node local memory for AP pdas */
- if (cpu > 0) {
- boot_error = get_local_pda(cpu);
- if (boot_error)
- goto restore_state;
- /* if can't get pda memory, can't start cpu */
- }
-#endif
+ INIT_WORK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
@@ -847,14 +791,16 @@
set_idle_for_cpu(cpu, c_idle.idle);
do_rest:
-#ifdef CONFIG_X86_32
per_cpu(current_task, cpu) = c_idle.idle;
- init_gdt(cpu);
+#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
#else
- cpu_pda(cpu)->pcurrent = c_idle.idle;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
+ initial_gs = per_cpu_offset(cpu);
+ per_cpu(kernel_stack, cpu) =
+ (unsigned long)task_stack_page(c_idle.idle) -
+ KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
@@ -878,8 +824,6 @@
pr_debug("Setting warm reset code and vector.\n");
- store_NMI_vector(&nmi_high, &nmi_low);
-
smpboot_setup_warm_reset_vector(start_ip);
/*
* Be paranoid about clearing APIC errors.
@@ -893,7 +837,7 @@
/*
* Starting actual IPI sequence...
*/
- boot_error = wakeup_secondary_cpu(apicid, start_ip);
+ boot_error = apic->wakeup_cpu(apicid, start_ip);
if (!boot_error) {
/*
@@ -927,13 +871,11 @@
else
/* trampoline code not run */
printk(KERN_ERR "Not responding.\n");
- if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
- inquire_remote_apic(apicid);
+ if (apic->inquire_remote_apic)
+ apic->inquire_remote_apic(apicid);
}
}
-#ifdef CONFIG_X86_64
-restore_state:
-#endif
+
if (boot_error) {
/* Try to put things back the way they were before ... */
numa_remove_cpu(cpu); /* was set by numa_add_cpu */
@@ -961,7 +903,7 @@
int __cpuinit native_cpu_up(unsigned int cpu)
{
- int apicid = cpu_present_to_apicid(cpu);
+ int apicid = apic->cpu_present_to_apicid(cpu);
unsigned long flags;
int err;
@@ -1054,14 +996,14 @@
{
preempt_disable();
-#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
+#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
if (def_to_bigsmp && nr_cpu_ids > 8) {
unsigned int cpu;
unsigned nr;
printk(KERN_WARNING
"More than 8 CPUs detected - skipping them.\n"
- "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
+ "Use CONFIG_X86_BIGSMP.\n");
nr = 0;
for_each_present_cpu(cpu) {
@@ -1107,7 +1049,7 @@
* Should not be necessary because the MP table should list the boot
* CPU too, but we do it for the sake of robustness anyway.
*/
- if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+ if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
printk(KERN_NOTICE
"weird, boot CPU (#%d) not listed by the BIOS.\n",
boot_cpu_physical_apicid);
@@ -1125,6 +1067,7 @@
printk(KERN_ERR "... forcing use of dummy APIC emulation."
"(tell your hw vendor)\n");
smpboot_clear_io_apic();
+ arch_disable_smp_support();
return -1;
}
@@ -1181,9 +1124,9 @@
current_thread_info()->cpu = 0; /* needed? */
set_cpu_sibling_map(0);
-#ifdef CONFIG_X86_64
enable_IR_x2apic();
- setup_apic_routing();
+#ifdef CONFIG_X86_64
+ default_setup_apic_routing();
#endif
if (smp_sanity_check(max_cpus) < 0) {
@@ -1207,18 +1150,18 @@
*/
setup_local_APIC();
-#ifdef CONFIG_X86_64
/*
* Enable IO APIC before setting up error vector
*/
if (!skip_ioapic_setup && nr_ioapics)
enable_IO_APIC();
-#endif
+
end_local_APIC_setup();
map_cpu_to_logical_apicid();
- setup_portio_remap();
+ if (apic->setup_portio_remap)
+ apic->setup_portio_remap();
smpboot_setup_io_apic();
/*
@@ -1240,10 +1183,7 @@
void __init native_smp_prepare_boot_cpu(void)
{
int me = smp_processor_id();
-#ifdef CONFIG_X86_32
- init_gdt(me);
-#endif
- switch_to_new_gdt();
+ switch_to_new_gdt(me);
/* already set me in cpu_online_mask in boot_cpu_init() */
cpumask_set_cpu(me, cpu_callout_mask);
per_cpu(cpu_state, me) = CPU_ONLINE;
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
deleted file mode 100644
index 397e309..0000000
--- a/arch/x86/kernel/smpcommon.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * SMP stuff which is common to all sub-architectures.
- */
-#include <linux/module.h>
-#include <asm/smp.h>
-
-#ifdef CONFIG_X86_32
-DEFINE_PER_CPU(unsigned long, this_cpu_off);
-EXPORT_PER_CPU_SYMBOL(this_cpu_off);
-
-/*
- * Initialize the CPU's GDT. This is either the boot CPU doing itself
- * (still using the master per-cpu area), or a CPU doing it for a
- * secondary which will soon come up.
- */
-__cpuinit void init_gdt(int cpu)
-{
- struct desc_struct gdt;
-
- pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
- 0x2 | DESCTYPE_S, 0x8);
- gdt.s = 1;
-
- write_gdt_entry(get_cpu_gdt_table(cpu),
- GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
-
- per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
- per_cpu(cpu_number, cpu) = cpu;
-}
-#endif
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 10786af..f7bddc2 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -1,7 +1,7 @@
/*
* Stack trace management functions
*
- * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*/
#include <linux/sched.h>
#include <linux/stacktrace.h>
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
deleted file mode 100644
index 7b98785..0000000
--- a/arch/x86/kernel/summit_32.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * IBM Summit-Specific Code
- *
- * Written By: Matthew Dobson, IBM Corporation
- *
- * Copyright (c) 2003 IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <colpatch@us.ibm.com>
- *
- */
-
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <asm/io.h>
-#include <asm/bios_ebda.h>
-#include <asm/summit/mpparse.h>
-
-static struct rio_table_hdr *rio_table_hdr __initdata;
-static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
-static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
-
-#ifndef CONFIG_X86_NUMAQ
-static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
-#endif
-
-static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
-{
- int twister = 0, node = 0;
- int i, bus, num_buses;
-
- for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
- if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
- twister = rio_devs[i]->owner_id;
- break;
- }
- }
- if (i == rio_table_hdr->num_rio_dev) {
- printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
- return last_bus;
- }
-
- for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
- if (scal_devs[i]->node_id == twister) {
- node = scal_devs[i]->node_id;
- break;
- }
- }
- if (i == rio_table_hdr->num_scal_dev) {
- printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
- return last_bus;
- }
-
- switch (rio_devs[wpeg_num]->type) {
- case CompatWPEG:
- /*
- * The Compatibility Winnipeg controls the 2 legacy buses,
- * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
- * a PCI-PCI bridge card is used in either slot: total 5 buses.
- */
- num_buses = 5;
- break;
- case AltWPEG:
- /*
- * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
- * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
- * the "extra" buses for each of those slots: total 7 buses.
- */
- num_buses = 7;
- break;
- case LookOutAWPEG:
- case LookOutBWPEG:
- /*
- * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
- * & the "extra" buses for each of those slots: total 9 buses.
- */
- num_buses = 9;
- break;
- default:
- printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
- return last_bus;
- }
-
- for (bus = last_bus; bus < last_bus + num_buses; bus++)
- mp_bus_id_to_node[bus] = node;
- return bus;
-}
-
-static int __init build_detail_arrays(void)
-{
- unsigned long ptr;
- int i, scal_detail_size, rio_detail_size;
-
- if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
- printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
- return 0;
- }
-
- switch (rio_table_hdr->version) {
- default:
- printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
- return 0;
- case 2:
- scal_detail_size = 11;
- rio_detail_size = 13;
- break;
- case 3:
- scal_detail_size = 12;
- rio_detail_size = 15;
- break;
- }
-
- ptr = (unsigned long)rio_table_hdr + 3;
- for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
- scal_devs[i] = (struct scal_detail *)ptr;
-
- for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
- rio_devs[i] = (struct rio_detail *)ptr;
-
- return 1;
-}
-
-void __init setup_summit(void)
-{
- unsigned long ptr;
- unsigned short offset;
- int i, next_wpeg, next_bus = 0;
-
- /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
- ptr = get_bios_ebda();
- ptr = (unsigned long)phys_to_virt(ptr);
-
- rio_table_hdr = NULL;
- offset = 0x180;
- while (offset) {
- /* The block id is stored in the 2nd word */
- if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
- /* set the pointer past the offset & block id */
- rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
- break;
- }
- /* The next offset is stored in the 1st word. 0 means no more */
- offset = *((unsigned short *)(ptr + offset));
- }
- if (!rio_table_hdr) {
- printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
- return;
- }
-
- if (!build_detail_arrays())
- return;
-
- /* The first Winnipeg we're looking for has an index of 0 */
- next_wpeg = 0;
- do {
- for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
- if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
- /* It's the Winnipeg we're looking for! */
- next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
- next_wpeg++;
- break;
- }
- }
- /*
- * If we go through all Rio devices and don't find one with
- * the next index, it means we've found all the Winnipegs,
- * and thus all the PCI buses.
- */
- if (i == rio_table_hdr->num_rio_dev)
- next_wpeg = 0;
- } while (next_wpeg != 0);
-}
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index e2e86a0..b7607c4 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -1,7 +1,7 @@
ENTRY(sys_call_table)
.long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
.long sys_exit
- .long sys_fork
+ .long ptregs_fork
.long sys_read
.long sys_write
.long sys_open /* 5 */
@@ -10,7 +10,7 @@
.long sys_creat
.long sys_link
.long sys_unlink /* 10 */
- .long sys_execve
+ .long ptregs_execve
.long sys_chdir
.long sys_time
.long sys_mknod
@@ -109,17 +109,17 @@
.long sys_newlstat
.long sys_newfstat
.long sys_uname
- .long sys_iopl /* 110 */
+ .long ptregs_iopl /* 110 */
.long sys_vhangup
.long sys_ni_syscall /* old "idle" system call */
- .long sys_vm86old
+ .long ptregs_vm86old
.long sys_wait4
.long sys_swapoff /* 115 */
.long sys_sysinfo
.long sys_ipc
.long sys_fsync
- .long sys_sigreturn
- .long sys_clone /* 120 */
+ .long ptregs_sigreturn
+ .long ptregs_clone /* 120 */
.long sys_setdomainname
.long sys_newuname
.long sys_modify_ldt
@@ -165,14 +165,14 @@
.long sys_mremap
.long sys_setresuid16
.long sys_getresuid16 /* 165 */
- .long sys_vm86
+ .long ptregs_vm86
.long sys_ni_syscall /* Old sys_query_module */
.long sys_poll
.long sys_nfsservctl
.long sys_setresgid16 /* 170 */
.long sys_getresgid16
.long sys_prctl
- .long sys_rt_sigreturn
+ .long ptregs_rt_sigreturn
.long sys_rt_sigaction
.long sys_rt_sigprocmask /* 175 */
.long sys_rt_sigpending
@@ -185,11 +185,11 @@
.long sys_getcwd
.long sys_capget
.long sys_capset /* 185 */
- .long sys_sigaltstack
+ .long ptregs_sigaltstack
.long sys_sendfile
.long sys_ni_syscall /* reserved for streams1 */
.long sys_ni_syscall /* reserved for streams2 */
- .long sys_vfork /* 190 */
+ .long ptregs_vfork /* 190 */
.long sys_getrlimit
.long sys_mmap2
.long sys_truncate64
@@ -332,3 +332,4 @@
.long sys_dup3 /* 330 */
.long sys_pipe2
.long sys_inotify_init1
+ .long sys_perf_counter_open
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 3985cac..5c5d87f 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -33,12 +33,12 @@
#include <linux/time.h>
#include <linux/mca.h>
-#include <asm/arch_hooks.h>
+#include <asm/setup.h>
#include <asm/hpet.h>
#include <asm/time.h>
#include <asm/timer.h>
-#include "do_timer.h"
+#include <asm/do_timer.h>
int timer_ack;
@@ -118,7 +118,7 @@
{
if (!hpet_enable())
setup_pit_timer();
- time_init_hook();
+ x86_quirk_time_init();
}
/*
@@ -131,7 +131,7 @@
*/
void __init time_init(void)
{
- pre_time_init_hook();
+ x86_quirk_pre_time_init();
tsc_init();
late_time_init = choose_time_init();
}
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index ce50546..0000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,256 +0,0 @@
-#include <linux/spinlock.h>
-#include <linux/cpu.h>
-#include <linux/interrupt.h>
-
-#include <asm/tlbflush.h>
-
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
- ____cacheline_aligned = { &init_mm, 0, };
-
-/* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
-
-/*
- * Smarter SMP flushing macros.
- * c/o Linus Torvalds.
- *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
- *
- * Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-
-static cpumask_t flush_cpumask;
-static struct mm_struct *flush_mm;
-static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-void leave_mm(int cpu)
-{
- BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
- cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
- load_cr3(swapper_pg_dir);
-}
-EXPORT_SYMBOL_GPL(leave_mm);
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superfluous
- * tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- * was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- * cpu_tlbstate[].active_mm is correct, cpu0 already handles
- * flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-
-void smp_invalidate_interrupt(struct pt_regs *regs)
-{
- unsigned long cpu;
-
- cpu = get_cpu();
-
- if (!cpu_isset(cpu, flush_cpumask))
- goto out;
- /*
- * This was a BUG() but until someone can quote me the
- * line from the intel manual that guarantees an IPI to
- * multiple CPUs is retried _only_ on the erroring CPUs
- * its staying as a return
- *
- * BUG();
- */
-
- if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
- if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
- if (flush_va == TLB_FLUSH_ALL)
- local_flush_tlb();
- else
- __flush_tlb_one(flush_va);
- } else
- leave_mm(cpu);
- }
- ack_APIC_irq();
- smp_mb__before_clear_bit();
- cpu_clear(cpu, flush_cpumask);
- smp_mb__after_clear_bit();
-out:
- put_cpu_no_resched();
- inc_irq_stat(irq_tlb_count);
-}
-
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
- unsigned long va)
-{
- cpumask_t cpumask = *cpumaskp;
-
- /*
- * A couple of (to be removed) sanity checks:
- *
- * - current CPU must not be in mask
- * - mask must exist :)
- */
- BUG_ON(cpus_empty(cpumask));
- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
- BUG_ON(!mm);
-
-#ifdef CONFIG_HOTPLUG_CPU
- /* If a CPU which we ran on has gone down, OK. */
- cpus_and(cpumask, cpumask, cpu_online_map);
- if (unlikely(cpus_empty(cpumask)))
- return;
-#endif
-
- /*
- * i'm not happy about this global shared spinlock in the
- * MM hot path, but we'll see how contended it is.
- * AK: x86-64 has a faster method that could be ported.
- */
- spin_lock(&tlbstate_lock);
-
- flush_mm = mm;
- flush_va = va;
- cpus_or(flush_cpumask, cpumask, flush_cpumask);
-
- /*
- * Make the above memory operations globally visible before
- * sending the IPI.
- */
- smp_mb();
- /*
- * We have to send the IPI only to
- * CPUs affected.
- */
- send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
-
- while (!cpus_empty(flush_cpumask))
- /* nothing. lockup detection does not belong here */
- cpu_relax();
-
- flush_mm = NULL;
- flush_va = 0;
- spin_unlock(&tlbstate_lock);
-}
-
-void flush_tlb_current_task(void)
-{
- struct mm_struct *mm = current->mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- local_flush_tlb();
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
- preempt_enable();
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if (current->mm)
- local_flush_tlb();
- else
- leave_mm(smp_processor_id());
- }
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-
- preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
-{
- struct mm_struct *mm = vma->vm_mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if (current->mm)
- __flush_tlb_one(va);
- else
- leave_mm(smp_processor_id());
- }
-
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, va);
-
- preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_page);
-
-static void do_flush_tlb_all(void *info)
-{
- unsigned long cpu = smp_processor_id();
-
- __flush_tlb_all();
- if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
- leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
- on_each_cpu(do_flush_tlb_all, NULL, 1);
-}
-
-void reset_lazy_tlbstate(void)
-{
- int cpu = raw_smp_processor_id();
-
- per_cpu(cpu_tlbstate, cpu).state = 0;
- per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
-}
-
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6812b82..f04549a 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,16 +11,15 @@
#include <linux/kernel.h>
#include <asm/mmu_context.h>
+#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
-#include <asm/genapic.h>
+#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/tsc.h>
#include <asm/irq_vectors.h>
-#include <mach_apic.h>
-
static struct bau_control **uv_bau_table_bases __read_mostly;
static int uv_bau_retry_limit __read_mostly;
@@ -210,14 +209,15 @@
*
* Send a broadcast and wait for a broadcast message to complete.
*
- * The cpumaskp mask contains the cpus the broadcast was sent to.
+ * The flush_mask contains the cpus the broadcast was sent to.
*
- * Returns 1 if all remote flushing was done. The mask is zeroed.
- * Returns 0 if some remote flushing remains to be done. The mask is left
- * unchanged.
+ * Returns NULL if all remote flushing was done. The mask is zeroed.
+ * Returns @flush_mask if some remote flushing remains to be done. The
+ * mask will have some bits still set.
*/
-int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
- cpumask_t *cpumaskp)
+const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
+ struct bau_desc *bau_desc,
+ struct cpumask *flush_mask)
{
int completion_status = 0;
int right_shift;
@@ -257,66 +257,76 @@
* the cpu's, all of which are still in the mask.
*/
__get_cpu_var(ptcstats).ptc_i++;
- return 0;
+ return flush_mask;
}
/*
* Success, so clear the remote cpu's from the mask so we don't
* use the IPI method of shootdown on them.
*/
- for_each_cpu_mask(bit, *cpumaskp) {
+ for_each_cpu(bit, flush_mask) {
blade = uv_cpu_to_blade_id(bit);
if (blade == this_blade)
continue;
- cpu_clear(bit, *cpumaskp);
+ cpumask_clear_cpu(bit, flush_mask);
}
- if (!cpus_empty(*cpumaskp))
- return 0;
- return 1;
+ if (!cpumask_empty(flush_mask))
+ return flush_mask;
+ return NULL;
}
/**
* uv_flush_tlb_others - globally purge translation cache of a virtual
* address or all TLB's
- * @cpumaskp: mask of all cpu's in which the address is to be removed
+ * @cpumask: mask of all cpu's in which the address is to be removed
* @mm: mm_struct containing virtual address range
* @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
+ * @cpu: the current cpu
*
* This is the entry point for initiating any UV global TLB shootdown.
*
* Purges the translation caches of all specified processors of the given
* virtual address, or purges all TLB's on specified processors.
*
- * The caller has derived the cpumaskp from the mm_struct and has subtracted
- * the local cpu from the mask. This function is called only if there
- * are bits set in the mask. (e.g. flush_tlb_page())
+ * The caller has derived the cpumask from the mm_struct. This function
+ * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
*
- * The cpumaskp is converted into a nodemask of the nodes containing
+ * The cpumask is converted into a nodemask of the nodes containing
* the cpus.
*
- * Returns 1 if all remote flushing was done.
- * Returns 0 if some remote flushing remains to be done.
+ * Note that this function should be called with preemption disabled.
+ *
+ * Returns NULL if all remote flushing was done.
+ * Returns pointer to cpumask if some remote flushing remains to be
+ * done. The returned pointer is valid till preemption is re-enabled.
*/
-int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
- unsigned long va)
+const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm,
+ unsigned long va, unsigned int cpu)
{
+ static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
+ struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
int i;
int bit;
int blade;
- int cpu;
+ int uv_cpu;
int this_blade;
int locals = 0;
struct bau_desc *bau_desc;
- cpu = uv_blade_processor_id();
+ WARN_ON(!in_atomic());
+
+ cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
+
+ uv_cpu = uv_blade_processor_id();
this_blade = uv_numa_blade_id();
bau_desc = __get_cpu_var(bau_control).descriptor_base;
- bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu;
+ bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
i = 0;
- for_each_cpu_mask(bit, *cpumaskp) {
+ for_each_cpu(bit, flush_mask) {
blade = uv_cpu_to_blade_id(bit);
BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
if (blade == this_blade) {
@@ -331,17 +341,17 @@
* no off_node flushing; return status for local node
*/
if (locals)
- return 0;
+ return flush_mask;
else
- return 1;
+ return NULL;
}
__get_cpu_var(ptcstats).requestor++;
__get_cpu_var(ptcstats).ntargeted += i;
bau_desc->payload.address = va;
- bau_desc->payload.sending_cpu = smp_processor_id();
+ bau_desc->payload.sending_cpu = cpu;
- return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
+ return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
}
/*
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index d8ccc3c..66d874e 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -29,7 +29,7 @@
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
/* We can free up trampoline after bootup if cpu hotplug is not supported. */
#ifndef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 894293c..cddfb8d 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -25,10 +25,11 @@
*/
#include <linux/linkage.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
+#include <asm/pgtable_types.h>
+#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/segment.h>
+#include <asm/processor-flags.h>
.section .rodata, "a", @progbits
@@ -37,7 +38,7 @@
ENTRY(trampoline_data)
r_base = .
cli # We should be safe anyway
- wbinvd
+ wbinvd
mov %cs, %ax # Code and data in the same place
mov %ax, %ds
mov %ax, %es
@@ -73,9 +74,8 @@
lidtl tidt - r_base # load idt with 0, 0
lgdtl tgdt - r_base # load gdt with whatever is appropriate
- xor %ax, %ax
- inc %ax # protected mode (PE) bit
- lmsw %ax # into protected mode
+ mov $X86_CR0_PE, %ax # protected mode (PE) bit
+ lmsw %ax # into protected mode
# flush prefetch and jump to startup_32
ljmpl *(startup_32_vector - r_base)
@@ -86,9 +86,8 @@
movl $__KERNEL_DS, %eax # Initialize the %ds segment register
movl %eax, %ds
- xorl %eax, %eax
- btsl $5, %eax # Enable PAE mode
- movl %eax, %cr4
+ movl $X86_CR4_PAE, %eax
+ movl %eax, %cr4 # Enable PAE mode
# Setup trampoline 4 level pagetables
leal (trampoline_level4_pgt - r_base)(%esi), %eax
@@ -99,9 +98,9 @@
xorl %edx, %edx
wrmsr
- xorl %eax, %eax
- btsl $31, %eax # Enable paging and in turn activate Long Mode
- btsl $0, %eax # Enable protected mode
+ # Enable paging and in turn activate Long Mode
+ # Enable protected mode
+ movl $(X86_CR0_PG | X86_CR0_PE), %eax
movl %eax, %cr0
/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a9e7548..1dba866 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,15 +54,14 @@
#include <asm/desc.h>
#include <asm/i387.h>
-#include <mach_traps.h>
+#include <asm/mach_traps.h>
#ifdef CONFIG_X86_64
#include <asm/pgalloc.h>
#include <asm/proto.h>
-#include <asm/pda.h>
#else
#include <asm/processor-flags.h>
-#include <asm/arch_hooks.h>
+#include <asm/setup.h>
#include <asm/traps.h>
#include "cpu/mcheck/mce.h"
@@ -914,19 +913,20 @@
}
#endif /* CONFIG_MATH_EMULATION */
-dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
+dotraplinkage void __kprobes
+do_device_not_available(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_X86_32
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
- conditional_sti(®s);
+ conditional_sti(regs);
- info.regs = ®s;
+ info.regs = regs;
math_emulate(&info);
} else {
math_state_restore(); /* interrupts still off */
- conditional_sti(®s);
+ conditional_sti(regs);
}
#else
math_state_restore();
@@ -942,7 +942,7 @@
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_BADSTK;
- info.si_addr = 0;
+ info.si_addr = NULL;
if (notify_die(DIE_TRAP, "iret exception",
regs, error_code, 32, SIGILL) == NOTIFY_STOP)
return;
@@ -991,8 +991,13 @@
#endif
set_intr_gate(19, &simd_coprocessor_error);
+ /* Reserve all the builtin and the syscall vector: */
+ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
+ set_bit(i, used_vectors);
+
#ifdef CONFIG_IA32_EMULATION
set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
+ set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
#ifdef CONFIG_X86_32
@@ -1009,23 +1014,15 @@
}
set_system_trap_gate(SYSCALL_VECTOR, &system_call);
-#endif
-
- /* Reserve all the builtin and the syscall vector: */
- for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
- set_bit(i, used_vectors);
-
-#ifdef CONFIG_X86_64
- set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#else
set_bit(SYSCALL_VECTOR, used_vectors);
#endif
+
/*
* Should be a barrier for any external CPU state:
*/
cpu_init();
#ifdef CONFIG_X86_32
- trap_init_hook();
+ x86_quirk_trap_init();
#endif
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 599e581..83d53ce 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -773,7 +773,7 @@
if (!cpu_has_tsc || tsc_unstable)
return 1;
-#ifdef CONFIG_X86_SMP
+#ifdef CONFIG_SMP
if (apic_is_clustered_box())
return 1;
#endif
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index d801d06..191a876 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -24,18 +24,14 @@
#include <asm/visws/cobalt.h>
#include <asm/visws/piix4.h>
-#include <asm/arch_hooks.h>
#include <asm/io_apic.h>
#include <asm/fixmap.h>
#include <asm/reboot.h>
#include <asm/setup.h>
+#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/io.h>
-#include <mach_ipi.h>
-
-#include "mach_apic.h"
-
#include <linux/kernel_stat.h>
#include <asm/i8259.h>
@@ -49,8 +45,6 @@
extern int no_broadcast;
-#include <asm/apic.h>
-
char visws_board_type = -1;
char visws_board_rev = -1;
@@ -200,7 +194,7 @@
return;
}
- apic_cpus = apicid_to_cpu_present(m->apicid);
+ apic_cpus = apic->apicid_to_cpu_present(m->apicid);
physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
/*
* Validate version
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 4eeb5cf..d7ac84e 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -158,7 +158,7 @@
ret = KVM86->regs32;
ret->fs = current->thread.saved_fs;
- loadsegment(gs, current->thread.saved_gs);
+ set_user_gs(ret, current->thread.saved_gs);
return ret;
}
@@ -197,9 +197,9 @@
static int do_vm86_irq_handling(int subfunction, int irqnumber);
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
-asmlinkage int sys_vm86old(struct pt_regs regs)
+int sys_vm86old(struct pt_regs *regs)
{
- struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx;
+ struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
* This remains on the stack until we
@@ -218,7 +218,7 @@
if (tmp)
goto out;
memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
- info.regs32 = ®s;
+ info.regs32 = regs;
tsk->thread.vm86_info = v86;
do_sys_vm86(&info, tsk);
ret = 0; /* we never return here */
@@ -227,7 +227,7 @@
}
-asmlinkage int sys_vm86(struct pt_regs regs)
+int sys_vm86(struct pt_regs *regs)
{
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
@@ -239,12 +239,12 @@
struct vm86plus_struct __user *v86;
tsk = current;
- switch (regs.bx) {
+ switch (regs->bx) {
case VM86_REQUEST_IRQ:
case VM86_FREE_IRQ:
case VM86_GET_IRQ_BITS:
case VM86_GET_AND_RESET_IRQ:
- ret = do_vm86_irq_handling(regs.bx, (int)regs.cx);
+ ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
goto out;
case VM86_PLUS_INSTALL_CHECK:
/*
@@ -261,14 +261,14 @@
ret = -EPERM;
if (tsk->thread.saved_sp0)
goto out;
- v86 = (struct vm86plus_struct __user *)regs.cx;
+ v86 = (struct vm86plus_struct __user *)regs->cx;
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
offsetof(struct kernel_vm86_struct, regs32) -
sizeof(info.regs));
ret = -EFAULT;
if (tmp)
goto out;
- info.regs32 = ®s;
+ info.regs32 = regs;
info.vm86plus.is_vm86pus = 1;
tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
do_sys_vm86(&info, tsk);
@@ -323,7 +323,7 @@
info->regs32->ax = 0;
tsk->thread.saved_sp0 = tsk->thread.sp0;
tsk->thread.saved_fs = info->regs32->fs;
- savesegment(gs, tsk->thread.saved_gs);
+ tsk->thread.saved_gs = get_user_gs(info->regs32);
tss = &per_cpu(init_tss, get_cpu());
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index bef58b4..2cc4a90 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -680,10 +680,11 @@
para_fill(pv_mmu_ops.write_cr2, SetCR2);
para_fill(pv_mmu_ops.write_cr3, SetCR3);
para_fill(pv_cpu_ops.write_cr4, SetCR4);
- para_fill(pv_irq_ops.save_fl, GetInterruptMask);
- para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
- para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
- para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
+
+ para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
+ para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
+ para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
+ para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
para_fill(pv_cpu_ops.wbinvd, WBINVD);
para_fill(pv_cpu_ops.read_tsc, RDTSC);
@@ -797,8 +798,8 @@
#endif
#ifdef CONFIG_X86_LOCAL_APIC
- para_fill(apic_ops->read, APICRead);
- para_fill(apic_ops->write, APICWrite);
+ para_fill(apic->read, APICRead);
+ para_fill(apic->write, APICWrite);
#endif
/*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index e5b088f..33a788d 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -28,7 +28,6 @@
#include <asm/vmi.h>
#include <asm/vmi_time.h>
-#include <asm/arch_hooks.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
#include <asm/timer.h>
@@ -256,7 +255,7 @@
*/
clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
local_irq_disable();
-#ifdef CONFIG_X86_SMP
+#ifdef CONFIG_SMP
/*
* XXX handle_percpu_irq only defined for SMP; we need to switch over
* to using it, since this is a local interrupt, which each CPU must
@@ -288,8 +287,7 @@
static cycle_t read_real_cycles(void)
{
cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
- return ret >= clocksource_vmi.cycle_last ?
- ret : clocksource_vmi.cycle_last;
+ return max(ret, clocksource_vmi.cycle_last);
}
static struct clocksource clocksource_vmi = {
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 82c6755..0d86096 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -12,7 +12,7 @@
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/cache.h>
#include <asm/boot.h>
@@ -178,14 +178,7 @@
__initramfs_end = .;
}
#endif
- . = ALIGN(PAGE_SIZE);
- .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
- __per_cpu_start = .;
- *(.data.percpu.page_aligned)
- *(.data.percpu)
- *(.data.percpu.shared_aligned)
- __per_cpu_end = .;
- }
+ PERCPU(PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
/* freed after init ends here */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0..fbfced6 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,7 +5,8 @@
#define LOAD_OFFSET __START_KERNEL_map
#include <asm-generic/vmlinux.lds.h>
-#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/page_types.h>
#undef i386 /* in case the preprocessor is a 32bit one */
@@ -13,12 +14,15 @@
OUTPUT_ARCH(i386:x86-64)
ENTRY(phys_startup_64)
jiffies_64 = jiffies;
-_proxy_pda = 1;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(7); /* RWE */
user PT_LOAD FLAGS(7); /* RWE */
data.init PT_LOAD FLAGS(7); /* RWE */
+#ifdef CONFIG_SMP
+ percpu PT_LOAD FLAGS(7); /* RWE */
+#endif
+ data.init2 PT_LOAD FLAGS(7); /* RWE */
note PT_NOTE FLAGS(0); /* ___ */
}
SECTIONS
@@ -208,14 +212,28 @@
__initramfs_end = .;
#endif
+#ifdef CONFIG_SMP
+ /*
+ * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
+ * output PHDR, so the next output section - __data_nosave - should
+ * start another section data.init2. Also, pda should be at the head of
+ * percpu area. Preallocate it and define the percpu offset symbol
+ * so that it can be accessed as a percpu variable.
+ */
+ . = ALIGN(PAGE_SIZE);
+ PERCPU_VADDR(0, :percpu)
+#else
PERCPU(PAGE_SIZE)
+#endif
. = ALIGN(PAGE_SIZE);
__init_end = .;
. = ALIGN(PAGE_SIZE);
__nosave_begin = .;
- .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
+ .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+ *(.data.nosave)
+ } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
. = ALIGN(PAGE_SIZE);
__nosave_end = .;
@@ -239,8 +257,21 @@
DWARF_DEBUG
}
+ /*
+ * Per-cpu symbols which need to be offset from __per_cpu_load
+ * for the boot processor.
+ */
+#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+INIT_PER_CPU(gdt_page);
+INIT_PER_CPU(irq_stack_union);
+
/*
* Build-time check on the image size:
*/
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE")
+
+#ifdef CONFIG_SMP
+ASSERT((per_cpu__irq_stack_union == 0),
+ "irq_stack_union is not at start of per-cpu area");
+#endif
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index a688f3b..c609205 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -37,6 +37,7 @@
flags &= ~X86_EFLAGS_IF;
return flags;
}
+PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
static void vsmp_restore_fl(unsigned long flags)
{
@@ -46,6 +47,7 @@
flags |= X86_EFLAGS_AC;
native_restore_fl(flags);
}
+PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
static void vsmp_irq_disable(void)
{
@@ -53,6 +55,7 @@
native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
}
+PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
static void vsmp_irq_enable(void)
{
@@ -60,6 +63,7 @@
native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
}
+PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
@@ -90,10 +94,10 @@
cap, ctl);
if (cap & ctl & (1 << 4)) {
/* Setup irq ops and turn on vSMP IRQ fastpath handling */
- pv_irq_ops.irq_disable = vsmp_irq_disable;
- pv_irq_ops.irq_enable = vsmp_irq_enable;
- pv_irq_ops.save_fl = vsmp_save_fl;
- pv_irq_ops.restore_fl = vsmp_restore_fl;
+ pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
+ pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
+ pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
+ pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
pv_init_ops.patch = vsmp_patch;
ctl &= ~(1 << 4);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 695e426..3909e3b 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -58,5 +58,3 @@
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(init_level4_pgt);
EXPORT_SYMBOL(load_gs_index);
-
-EXPORT_SYMBOL(_proxy_pda);
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index c70e12b..8dab8f7 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -3,7 +3,6 @@
select PARAVIRT
depends on X86_32
depends on !X86_PAE
- depends on !X86_VOYAGER
select VIRTIO
select VIRTIO_RING
select VIRTIO_CONSOLE
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 92f1c6f..f3a5305 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -173,24 +173,29 @@
{
return lguest_data.irq_enabled;
}
+PV_CALLEE_SAVE_REGS_THUNK(save_fl);
/* restore_flags() just sets the flags back to the value given. */
static void restore_fl(unsigned long flags)
{
lguest_data.irq_enabled = flags;
}
+PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
/* Interrupts go off... */
static void irq_disable(void)
{
lguest_data.irq_enabled = 0;
}
+PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
/* Interrupts go on... */
static void irq_enable(void)
{
lguest_data.irq_enabled = X86_EFLAGS_IF;
}
+PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
+
/*:*/
/*M:003 Note that we don't check for outstanding interrupts when we re-enable
* them (or when we unmask an interrupt). This seems to work for the moment,
@@ -278,7 +283,7 @@
/* There's one problem which normal hardware doesn't have: the Host
* can't handle us removing entries we're currently using. So we clear
* the GS register here: if it's needed it'll be reloaded anyway. */
- loadsegment(gs, 0);
+ lazy_load_gs(0);
lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
}
@@ -823,13 +828,14 @@
return 0;
}
-static struct apic_ops lguest_basic_apic_ops = {
- .read = lguest_apic_read,
- .write = lguest_apic_write,
- .icr_read = lguest_apic_icr_read,
- .icr_write = lguest_apic_icr_write,
- .wait_icr_idle = lguest_apic_wait_icr_idle,
- .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
+static void set_lguest_basic_apic_ops(void)
+{
+ apic->read = lguest_apic_read;
+ apic->write = lguest_apic_write;
+ apic->icr_read = lguest_apic_icr_read;
+ apic->icr_write = lguest_apic_icr_write;
+ apic->wait_icr_idle = lguest_apic_wait_icr_idle;
+ apic->safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle;
};
#endif
@@ -984,10 +990,10 @@
/* interrupt-related operations */
pv_irq_ops.init_IRQ = lguest_init_IRQ;
- pv_irq_ops.save_fl = save_fl;
- pv_irq_ops.restore_fl = restore_fl;
- pv_irq_ops.irq_disable = irq_disable;
- pv_irq_ops.irq_enable = irq_enable;
+ pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
+ pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl);
+ pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
+ pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable);
pv_irq_ops.safe_halt = lguest_safe_halt;
/* init-time operations */
@@ -1030,7 +1036,7 @@
#ifdef CONFIG_X86_LOCAL_APIC
/* apic read/write intercepts */
- apic_ops = &lguest_basic_apic_ops;
+ set_lguest_basic_apic_ops();
#endif
/* time operations */
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index ad37400..51f1504 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -28,7 +28,7 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
diff --git a/arch/x86/mach-default/Makefile b/arch/x86/mach-default/Makefile
deleted file mode 100644
index 012fe34..0000000
--- a/arch/x86/mach-default/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-obj-y := setup.o
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
deleted file mode 100644
index 50b5918..0000000
--- a/arch/x86/mach-default/setup.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Machine specific setup for generic
- */
-
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <asm/acpi.h>
-#include <asm/arch_hooks.h>
-#include <asm/e820.h>
-#include <asm/setup.h>
-
-#include <mach_ipi.h>
-
-#ifdef CONFIG_HOTPLUG_CPU
-#define DEFAULT_SEND_IPI (1)
-#else
-#define DEFAULT_SEND_IPI (0)
-#endif
-
-int no_broadcast = DEFAULT_SEND_IPI;
-
-/**
- * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
- *
- * Description:
- * Perform any necessary interrupt initialisation prior to setting up
- * the "ordinary" interrupt call gates. For legacy reasons, the ISA
- * interrupts should be initialised here if the machine emulates a PC
- * in any way.
- **/
-void __init pre_intr_init_hook(void)
-{
- if (x86_quirks->arch_pre_intr_init) {
- if (x86_quirks->arch_pre_intr_init())
- return;
- }
- init_ISA_irqs();
-}
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-static struct irqaction irq2 = {
- .handler = no_action,
- .mask = CPU_MASK_NONE,
- .name = "cascade",
-};
-
-/**
- * intr_init_hook - post gate setup interrupt initialisation
- *
- * Description:
- * Fill in any interrupts that may have been left out by the general
- * init_IRQ() routine. interrupts having to do with the machine rather
- * than the devices on the I/O bus (like APIC interrupts in intel MP
- * systems) are started here.
- **/
-void __init intr_init_hook(void)
-{
- if (x86_quirks->arch_intr_init) {
- if (x86_quirks->arch_intr_init())
- return;
- }
- if (!acpi_ioapic)
- setup_irq(2, &irq2);
-
-}
-
-/**
- * pre_setup_arch_hook - hook called prior to any setup_arch() execution
- *
- * Description:
- * generally used to activate any machine specific identification
- * routines that may be needed before setup_arch() runs. On Voyager
- * this is used to get the board revision and type.
- **/
-void __init pre_setup_arch_hook(void)
-{
-}
-
-/**
- * trap_init_hook - initialise system specific traps
- *
- * Description:
- * Called as the final act of trap_init(). Used in VISWS to initialise
- * the various board specific APIC traps.
- **/
-void __init trap_init_hook(void)
-{
- if (x86_quirks->arch_trap_init) {
- if (x86_quirks->arch_trap_init())
- return;
- }
-}
-
-static struct irqaction irq0 = {
- .handler = timer_interrupt,
- .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
- .mask = CPU_MASK_NONE,
- .name = "timer"
-};
-
-/**
- * pre_time_init_hook - do any specific initialisations before.
- *
- **/
-void __init pre_time_init_hook(void)
-{
- if (x86_quirks->arch_pre_time_init)
- x86_quirks->arch_pre_time_init();
-}
-
-/**
- * time_init_hook - do any specific initialisations for the system timer.
- *
- * Description:
- * Must plug the system timer interrupt source at HZ into the IRQ listed
- * in irq_vectors.h:TIMER_IRQ
- **/
-void __init time_init_hook(void)
-{
- if (x86_quirks->arch_time_init) {
- /*
- * A nonzero return code does not mean failure, it means
- * that the architecture quirk does not want any
- * generic (timer) setup to be performed after this:
- */
- if (x86_quirks->arch_time_init())
- return;
- }
-
- irq0.mask = cpumask_of_cpu(0);
- setup_irq(0, &irq0);
-}
-
-#ifdef CONFIG_MCA
-/**
- * mca_nmi_hook - hook into MCA specific NMI chain
- *
- * Description:
- * The MCA (Microchannel Architecture) has an NMI chain for NMI sources
- * along the MCA bus. Use this to hook into that chain if you will need
- * it.
- **/
-void mca_nmi_hook(void)
-{
- /*
- * If I recall correctly, there's a whole bunch of other things that
- * we can do to check for NMI problems, but that's all I know about
- * at the moment.
- */
- pr_warning("NMI generated from unknown source!\n");
-}
-#endif
-
-static __init int no_ipi_broadcast(char *str)
-{
- get_option(&str, &no_broadcast);
- pr_info("Using %s mode\n",
- no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
- return 1;
-}
-__setup("no_ipi_broadcast=", no_ipi_broadcast);
-
-static int __init print_ipi_mode(void)
-{
- pr_info("Using IPI %s mode\n",
- no_broadcast ? "No-Shortcut" : "Shortcut");
- return 0;
-}
-
-late_initcall(print_ipi_mode);
-
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile
deleted file mode 100644
index 6730f4e..0000000
--- a/arch/x86/mach-generic/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Makefile for the generic architecture
-#
-
-EXTRA_CFLAGS := -Iarch/x86/kernel
-
-obj-y := probe.o default.o
-obj-$(CONFIG_X86_NUMAQ) += numaq.o
-obj-$(CONFIG_X86_SUMMIT) += summit.o
-obj-$(CONFIG_X86_BIGSMP) += bigsmp.o
-obj-$(CONFIG_X86_ES7000) += es7000.o
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
deleted file mode 100644
index bc4c784..0000000
--- a/arch/x86/mach-generic/bigsmp.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs.
- * Drives the local APIC in "clustered mode".
- */
-#define APIC_DEFINITION 1
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/genapic.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/dmi.h>
-#include <asm/bigsmp/apicdef.h>
-#include <linux/smp.h>
-#include <asm/bigsmp/apic.h>
-#include <asm/bigsmp/ipi.h>
-#include <asm/mach-default/mach_mpparse.h>
-#include <asm/mach-default/mach_wakecpu.h>
-
-static int dmi_bigsmp; /* can be set by dmi scanners */
-
-static int hp_ht_bigsmp(const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
- dmi_bigsmp = 1;
- return 0;
-}
-
-
-static const struct dmi_system_id bigsmp_dmi_table[] = {
- { hp_ht_bigsmp, "HP ProLiant DL760 G2",
- { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
- DMI_MATCH(DMI_BIOS_VERSION, "P44-"),}
- },
-
- { hp_ht_bigsmp, "HP ProLiant DL740",
- { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
- DMI_MATCH(DMI_BIOS_VERSION, "P47-"),}
- },
- { }
-};
-
-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
-{
- cpus_clear(*retmask);
- cpu_set(cpu, *retmask);
-}
-
-static int probe_bigsmp(void)
-{
- if (def_to_bigsmp)
- dmi_bigsmp = 1;
- else
- dmi_check_system(bigsmp_dmi_table);
- return dmi_bigsmp;
-}
-
-struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp);
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c
deleted file mode 100644
index e63a4a7..0000000
--- a/arch/x86/mach-generic/default.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Default generic APIC driver. This handles up to 8 CPUs.
- */
-#define APIC_DEFINITION 1
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/mach-default/mach_apicdef.h>
-#include <asm/genapic.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <asm/mach-default/mach_apic.h>
-#include <asm/mach-default/mach_ipi.h>
-#include <asm/mach-default/mach_mpparse.h>
-#include <asm/mach-default/mach_wakecpu.h>
-
-/* should be called last. */
-static int probe_default(void)
-{
- return 1;
-}
-
-struct genapic apic_default = APIC_INIT("default", probe_default);
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
deleted file mode 100644
index c2ded14..0000000
--- a/arch/x86/mach-generic/es7000.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * APIC driver for the Unisys ES7000 chipset.
- */
-#define APIC_DEFINITION 1
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/genapic.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <asm/es7000/apicdef.h>
-#include <linux/smp.h>
-#include <asm/es7000/apic.h>
-#include <asm/es7000/ipi.h>
-#include <asm/es7000/mpparse.h>
-#include <asm/mach-default/mach_wakecpu.h>
-
-void __init es7000_update_genapic_to_cluster(void)
-{
- genapic->target_cpus = target_cpus_cluster;
- genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
- genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
- genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
-
- genapic->init_apic_ldr = init_apic_ldr_cluster;
-
- genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
-}
-
-static int probe_es7000(void)
-{
- /* probed later in mptable/ACPI hooks */
- return 0;
-}
-
-extern void es7000_sw_apic(void);
-static void __init enable_apic_mode(void)
-{
- es7000_sw_apic();
- return;
-}
-
-static __init int
-mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
-{
- if (mpc->oemptr) {
- struct mpc_oemtable *oem_table =
- (struct mpc_oemtable *)mpc->oemptr;
- if (!strncmp(oem, "UNISYS", 6))
- return parse_unisys_oem((char *)oem_table);
- }
- return 0;
-}
-
-#ifdef CONFIG_ACPI
-/* Hook from generic ACPI tables.c */
-static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- unsigned long oem_addr = 0;
- int check_dsdt;
- int ret = 0;
-
- /* check dsdt at first to avoid clear fix_map for oem_addr */
- check_dsdt = es7000_check_dsdt();
-
- if (!find_unisys_acpi_oem_table(&oem_addr)) {
- if (check_dsdt)
- ret = parse_unisys_oem((char *)oem_addr);
- else {
- setup_unisys();
- ret = 1;
- }
- /*
- * we need to unmap it
- */
- unmap_unisys_acpi_oem_table(oem_addr);
- }
- return ret;
-}
-#else
-static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- return 0;
-}
-#endif
-
-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
-}
-
-struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
deleted file mode 100644
index 3679e22..0000000
--- a/arch/x86/mach-generic/numaq.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * APIC driver for the IBM NUMAQ chipset.
- */
-#define APIC_DEFINITION 1
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/genapic.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <asm/numaq/apicdef.h>
-#include <linux/smp.h>
-#include <asm/numaq/apic.h>
-#include <asm/numaq/ipi.h>
-#include <asm/numaq/mpparse.h>
-#include <asm/numaq/wakecpu.h>
-#include <asm/numaq.h>
-
-static int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
-{
- numaq_mps_oem_check(mpc, oem, productid);
- return found_numaq;
-}
-
-static int probe_numaq(void)
-{
- /* already know from get_memcfg_numaq() */
- return found_numaq;
-}
-
-/* Hook from generic ACPI tables.c */
-static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- return 0;
-}
-
-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
-}
-
-struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
deleted file mode 100644
index 15a38da..0000000
--- a/arch/x86/mach-generic/probe.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright 2003 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License, v.2
- *
- * Generic x86 APIC driver probe layer.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-#include <asm/genapic.h>
-#include <asm/setup.h>
-
-extern struct genapic apic_numaq;
-extern struct genapic apic_summit;
-extern struct genapic apic_bigsmp;
-extern struct genapic apic_es7000;
-extern struct genapic apic_default;
-
-struct genapic *genapic = &apic_default;
-
-static struct genapic *apic_probe[] __initdata = {
-#ifdef CONFIG_X86_NUMAQ
- &apic_numaq,
-#endif
-#ifdef CONFIG_X86_SUMMIT
- &apic_summit,
-#endif
-#ifdef CONFIG_X86_BIGSMP
- &apic_bigsmp,
-#endif
-#ifdef CONFIG_X86_ES7000
- &apic_es7000,
-#endif
- &apic_default, /* must be last */
- NULL,
-};
-
-static int cmdline_apic __initdata;
-static int __init parse_apic(char *arg)
-{
- int i;
-
- if (!arg)
- return -EINVAL;
-
- for (i = 0; apic_probe[i]; i++) {
- if (!strcmp(apic_probe[i]->name, arg)) {
- genapic = apic_probe[i];
- cmdline_apic = 1;
- return 0;
- }
- }
-
- if (x86_quirks->update_genapic)
- x86_quirks->update_genapic();
-
- /* Parsed again by __setup for debug/verbose */
- return 0;
-}
-early_param("apic", parse_apic);
-
-void __init generic_bigsmp_probe(void)
-{
-#ifdef CONFIG_X86_BIGSMP
- /*
- * This routine is used to switch to bigsmp mode when
- * - There is no apic= option specified by the user
- * - generic_apic_probe() has chosen apic_default as the sub_arch
- * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
- */
-
- if (!cmdline_apic && genapic == &apic_default) {
- if (apic_bigsmp.probe()) {
- genapic = &apic_bigsmp;
- if (x86_quirks->update_genapic)
- x86_quirks->update_genapic();
- printk(KERN_INFO "Overriding APIC driver with %s\n",
- genapic->name);
- }
- }
-#endif
-}
-
-void __init generic_apic_probe(void)
-{
- if (!cmdline_apic) {
- int i;
- for (i = 0; apic_probe[i]; i++) {
- if (apic_probe[i]->probe()) {
- genapic = apic_probe[i];
- break;
- }
- }
- /* Not visible without early console */
- if (!apic_probe[i])
- panic("Didn't find an APIC driver");
-
- if (x86_quirks->update_genapic)
- x86_quirks->update_genapic();
- }
- printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
-}
-
-/* These functions can switch the APIC even after the initial ->probe() */
-
-int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
-{
- int i;
- for (i = 0; apic_probe[i]; ++i) {
- if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
- if (!cmdline_apic) {
- genapic = apic_probe[i];
- if (x86_quirks->update_genapic)
- x86_quirks->update_genapic();
- printk(KERN_INFO "Switched to APIC driver `%s'.\n",
- genapic->name);
- }
- return 1;
- }
- }
- return 0;
-}
-
-int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- int i;
- for (i = 0; apic_probe[i]; ++i) {
- if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
- if (!cmdline_apic) {
- genapic = apic_probe[i];
- if (x86_quirks->update_genapic)
- x86_quirks->update_genapic();
- printk(KERN_INFO "Switched to APIC driver `%s'.\n",
- genapic->name);
- }
- return 1;
- }
- }
- return 0;
-}
-
-int hard_smp_processor_id(void)
-{
- return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
-}
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
deleted file mode 100644
index 2821ffc..0000000
--- a/arch/x86/mach-generic/summit.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * APIC driver for the IBM "Summit" chipset.
- */
-#define APIC_DEFINITION 1
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/genapic.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <asm/summit/apicdef.h>
-#include <linux/smp.h>
-#include <asm/summit/apic.h>
-#include <asm/summit/ipi.h>
-#include <asm/summit/mpparse.h>
-#include <asm/mach-default/mach_wakecpu.h>
-
-static int probe_summit(void)
-{
- /* probed later in mptable/ACPI hooks */
- return 0;
-}
-
-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
-}
-
-struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-rdc321x/Makefile b/arch/x86/mach-rdc321x/Makefile
deleted file mode 100644
index 8325b4c..0000000
--- a/arch/x86/mach-rdc321x/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the RDC321x specific parts of the kernel
-#
-obj-$(CONFIG_X86_RDC321X) := gpio.o platform.o
-
diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c
deleted file mode 100644
index 247f33d..0000000
--- a/arch/x86/mach-rdc321x/gpio.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * GPIO support for RDC SoC R3210/R8610
- *
- * Copyright (C) 2007, Florian Fainelli <florian@openwrt.org>
- * Copyright (C) 2008, Volker Weiss <dev@tintuc.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/io.h>
-#include <linux/types.h>
-#include <linux/module.h>
-
-#include <asm/gpio.h>
-#include <asm/mach-rdc321x/rdc321x_defs.h>
-
-
-/* spin lock to protect our private copy of GPIO data register plus
- the access to PCI conf registers. */
-static DEFINE_SPINLOCK(gpio_lock);
-
-/* copy of GPIO data registers */
-static u32 gpio_data_reg1;
-static u32 gpio_data_reg2;
-
-static u32 gpio_request_data[2];
-
-
-static inline void rdc321x_conf_write(unsigned addr, u32 value)
-{
- outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
- outl(value, RDC3210_CFGREG_DATA);
-}
-
-static inline void rdc321x_conf_or(unsigned addr, u32 value)
-{
- outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
- value |= inl(RDC3210_CFGREG_DATA);
- outl(value, RDC3210_CFGREG_DATA);
-}
-
-static inline u32 rdc321x_conf_read(unsigned addr)
-{
- outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
-
- return inl(RDC3210_CFGREG_DATA);
-}
-
-/* configure pin as GPIO */
-static void rdc321x_configure_gpio(unsigned gpio)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&gpio_lock, flags);
- rdc321x_conf_or(gpio < 32
- ? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2,
- 1 << (gpio & 0x1f));
- spin_unlock_irqrestore(&gpio_lock, flags);
-}
-
-/* initially setup the 2 copies of the gpio data registers.
- This function must be called by the platform setup code. */
-void __init rdc321x_gpio_setup()
-{
- /* this might not be, what others (BIOS, bootloader, etc.)
- wrote to these registers before, but it's a good guess. Still
- better than just using 0xffffffff. */
-
- gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1);
- gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2);
-}
-
-/* determine, if gpio number is valid */
-static inline int rdc321x_is_gpio(unsigned gpio)
-{
- return gpio <= RDC321X_MAX_GPIO;
-}
-
-/* request GPIO */
-int rdc_gpio_request(unsigned gpio, const char *label)
-{
- unsigned long flags;
-
- if (!rdc321x_is_gpio(gpio))
- return -EINVAL;
-
- spin_lock_irqsave(&gpio_lock, flags);
- if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f)))
- goto inuse;
- gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f));
- spin_unlock_irqrestore(&gpio_lock, flags);
-
- return 0;
-inuse:
- spin_unlock_irqrestore(&gpio_lock, flags);
- return -EINVAL;
-}
-EXPORT_SYMBOL(rdc_gpio_request);
-
-/* release previously-claimed GPIO */
-void rdc_gpio_free(unsigned gpio)
-{
- unsigned long flags;
-
- if (!rdc321x_is_gpio(gpio))
- return;
-
- spin_lock_irqsave(&gpio_lock, flags);
- gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f));
- spin_unlock_irqrestore(&gpio_lock, flags);
-}
-EXPORT_SYMBOL(rdc_gpio_free);
-
-/* read GPIO pin */
-int rdc_gpio_get_value(unsigned gpio)
-{
- u32 reg;
- unsigned long flags;
-
- spin_lock_irqsave(&gpio_lock, flags);
- reg = rdc321x_conf_read(gpio < 32
- ? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2);
- spin_unlock_irqrestore(&gpio_lock, flags);
-
- return (1 << (gpio & 0x1f)) & reg ? 1 : 0;
-}
-EXPORT_SYMBOL(rdc_gpio_get_value);
-
-/* set GPIO pin to value */
-void rdc_gpio_set_value(unsigned gpio, int value)
-{
- unsigned long flags;
- u32 reg;
-
- reg = 1 << (gpio & 0x1f);
- if (gpio < 32) {
- spin_lock_irqsave(&gpio_lock, flags);
- if (value)
- gpio_data_reg1 |= reg;
- else
- gpio_data_reg1 &= ~reg;
- rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1);
- spin_unlock_irqrestore(&gpio_lock, flags);
- } else {
- spin_lock_irqsave(&gpio_lock, flags);
- if (value)
- gpio_data_reg2 |= reg;
- else
- gpio_data_reg2 &= ~reg;
- rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2);
- spin_unlock_irqrestore(&gpio_lock, flags);
- }
-}
-EXPORT_SYMBOL(rdc_gpio_set_value);
-
-/* configure GPIO pin as input */
-int rdc_gpio_direction_input(unsigned gpio)
-{
- if (!rdc321x_is_gpio(gpio))
- return -EINVAL;
-
- rdc321x_configure_gpio(gpio);
-
- return 0;
-}
-EXPORT_SYMBOL(rdc_gpio_direction_input);
-
-/* configure GPIO pin as output and set value */
-int rdc_gpio_direction_output(unsigned gpio, int value)
-{
- if (!rdc321x_is_gpio(gpio))
- return -EINVAL;
-
- gpio_set_value(gpio, value);
- rdc321x_configure_gpio(gpio);
-
- return 0;
-}
-EXPORT_SYMBOL(rdc_gpio_direction_output);
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
deleted file mode 100644
index 4f4e50c..0000000
--- a/arch/x86/mach-rdc321x/platform.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Generic RDC321x platform devices
- *
- * Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/leds.h>
-
-#include <asm/gpio.h>
-
-/* LEDS */
-static struct gpio_led default_leds[] = {
- { .name = "rdc:dmz", .gpio = 1, },
-};
-
-static struct gpio_led_platform_data rdc321x_led_data = {
- .num_leds = ARRAY_SIZE(default_leds),
- .leds = default_leds,
-};
-
-static struct platform_device rdc321x_leds = {
- .name = "leds-gpio",
- .id = -1,
- .dev = {
- .platform_data = &rdc321x_led_data,
- }
-};
-
-/* Watchdog */
-static struct platform_device rdc321x_wdt = {
- .name = "rdc321x-wdt",
- .id = -1,
- .num_resources = 0,
-};
-
-static struct platform_device *rdc321x_devs[] = {
- &rdc321x_leds,
- &rdc321x_wdt
-};
-
-static int __init rdc_board_setup(void)
-{
- rdc321x_gpio_setup();
-
- return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
-}
-
-arch_initcall(rdc_board_setup);
diff --git a/arch/x86/mach-voyager/Makefile b/arch/x86/mach-voyager/Makefile
deleted file mode 100644
index 15c250b..0000000
--- a/arch/x86/mach-voyager/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-EXTRA_CFLAGS := -Iarch/x86/kernel
-obj-y := setup.o voyager_basic.o voyager_thread.o
-
-obj-$(CONFIG_SMP) += voyager_smp.o voyager_cat.o
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
deleted file mode 100644
index 8e51183..0000000
--- a/arch/x86/mach-voyager/setup.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Machine specific setup for generic
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <asm/arch_hooks.h>
-#include <asm/voyager.h>
-#include <asm/e820.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-
-void __init pre_intr_init_hook(void)
-{
- init_ISA_irqs();
-}
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-static struct irqaction irq2 = {
- .handler = no_action,
- .mask = CPU_MASK_NONE,
- .name = "cascade",
-};
-
-void __init intr_init_hook(void)
-{
-#ifdef CONFIG_SMP
- voyager_smp_intr_init();
-#endif
-
- setup_irq(2, &irq2);
-}
-
-static void voyager_disable_tsc(void)
-{
- /* Voyagers run their CPUs from independent clocks, so disable
- * the TSC code because we can't sync them */
- setup_clear_cpu_cap(X86_FEATURE_TSC);
-}
-
-void __init pre_setup_arch_hook(void)
-{
- voyager_disable_tsc();
-}
-
-void __init pre_time_init_hook(void)
-{
- voyager_disable_tsc();
-}
-
-void __init trap_init_hook(void)
-{
-}
-
-static struct irqaction irq0 = {
- .handler = timer_interrupt,
- .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
- .mask = CPU_MASK_NONE,
- .name = "timer"
-};
-
-void __init time_init_hook(void)
-{
- irq0.mask = cpumask_of_cpu(safe_smp_processor_id());
- setup_irq(0, &irq0);
-}
-
-/* Hook for machine specific memory setup. */
-
-char *__init machine_specific_memory_setup(void)
-{
- char *who;
- int new_nr;
-
- who = "NOT VOYAGER";
-
- if (voyager_level == 5) {
- __u32 addr, length;
- int i;
-
- who = "Voyager-SUS";
-
- e820.nr_map = 0;
- for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
- e820_add_region(addr, length, E820_RAM);
- }
- return who;
- } else if (voyager_level == 4) {
- __u32 tom;
- __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
- /* select the DINO config space */
- outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT);
- /* Read DINO top of memory register */
- tom = ((inb(catbase + 0x4) & 0xf0) << 16)
- + ((inb(catbase + 0x5) & 0x7f) << 24);
-
- if (inb(catbase) != VOYAGER_DINO) {
- printk(KERN_ERR
- "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
- tom = (boot_params.screen_info.ext_mem_k) << 10;
- }
- who = "Voyager-TOM";
- e820_add_region(0, 0x9f000, E820_RAM);
- /* map from 1M to top of memory */
- e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
- E820_RAM);
- /* FIXME: Should check the ASICs to see if I need to
- * take out the 8M window. Just do it at the moment
- * */
- e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024,
- E820_RESERVED);
- return who;
- }
-
- return default_machine_specific_memory_setup();
-}
diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c
deleted file mode 100644
index 46d6f80..0000000
--- a/arch/x86/mach-voyager/voyager_basic.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/* Copyright (C) 1999,2001
- *
- * Author: J.E.J.Bottomley@HansenPartnership.com
- *
- * This file contains all the voyager specific routines for getting
- * initialisation of the architecture to function. For additional
- * features see:
- *
- * voyager_cat.c - Voyager CAT bus interface
- * voyager_smp.c - Voyager SMP hal (emulates linux smp.c)
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
-#include <linux/sysrq.h>
-#include <linux/smp.h>
-#include <linux/nodemask.h>
-#include <asm/io.h>
-#include <asm/voyager.h>
-#include <asm/vic.h>
-#include <linux/pm.h>
-#include <asm/tlbflush.h>
-#include <asm/arch_hooks.h>
-#include <asm/i8253.h>
-
-/*
- * Power off function, if any
- */
-void (*pm_power_off) (void);
-EXPORT_SYMBOL(pm_power_off);
-
-int voyager_level = 0;
-
-struct voyager_SUS *voyager_SUS = NULL;
-
-#ifdef CONFIG_SMP
-static void voyager_dump(int dummy1, struct tty_struct *dummy3)
-{
- /* get here via a sysrq */
- voyager_smp_dump();
-}
-
-static struct sysrq_key_op sysrq_voyager_dump_op = {
- .handler = voyager_dump,
- .help_msg = "Voyager",
- .action_msg = "Dump Voyager Status",
-};
-#endif
-
-void voyager_detect(struct voyager_bios_info *bios)
-{
- if (bios->len != 0xff) {
- int class = (bios->class_1 << 8)
- | (bios->class_2 & 0xff);
-
- printk("Voyager System detected.\n"
- " Class %x, Revision %d.%d\n",
- class, bios->major, bios->minor);
- if (class == VOYAGER_LEVEL4)
- voyager_level = 4;
- else if (class < VOYAGER_LEVEL5_AND_ABOVE)
- voyager_level = 3;
- else
- voyager_level = 5;
- printk(" Architecture Level %d\n", voyager_level);
- if (voyager_level < 4)
- printk
- ("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
- /* install the power off handler */
- pm_power_off = voyager_power_off;
-#ifdef CONFIG_SMP
- register_sysrq_key('v', &sysrq_voyager_dump_op);
-#endif
- } else {
- printk("\n\n**WARNING**: No Voyager Subsystem Found\n");
- }
-}
-
-void voyager_system_interrupt(int cpl, void *dev_id)
-{
- printk("Voyager: detected system interrupt\n");
-}
-
-/* Routine to read information from the extended CMOS area */
-__u8 voyager_extended_cmos_read(__u16 addr)
-{
- outb(addr & 0xff, 0x74);
- outb((addr >> 8) & 0xff, 0x75);
- return inb(0x76);
-}
-
-/* internal definitions for the SUS Click Map of memory */
-
-#define CLICK_ENTRIES 16
-#define CLICK_SIZE 4096 /* click to byte conversion for Length */
-
-typedef struct ClickMap {
- struct Entry {
- __u32 Address;
- __u32 Length;
- } Entry[CLICK_ENTRIES];
-} ClickMap_t;
-
-/* This routine is pretty much an awful hack to read the bios clickmap by
- * mapping it into page 0. There are usually three regions in the map:
- * Base Memory
- * Extended Memory
- * zero length marker for end of map
- *
- * Returns are 0 for failure and 1 for success on extracting region.
- */
-int __init voyager_memory_detect(int region, __u32 * start, __u32 * length)
-{
- int i;
- int retval = 0;
- __u8 cmos[4];
- ClickMap_t *map;
- unsigned long map_addr;
- unsigned long old;
-
- if (region >= CLICK_ENTRIES) {
- printk("Voyager: Illegal ClickMap region %d\n", region);
- return 0;
- }
-
- for (i = 0; i < sizeof(cmos); i++)
- cmos[i] =
- voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
-
- map_addr = *(unsigned long *)cmos;
-
- /* steal page 0 for this */
- old = pg0[0];
- pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
- local_flush_tlb();
- /* now clear everything out but page 0 */
- map = (ClickMap_t *) (map_addr & (~PAGE_MASK));
-
- /* zero length is the end of the clickmap */
- if (map->Entry[region].Length != 0) {
- *length = map->Entry[region].Length * CLICK_SIZE;
- *start = map->Entry[region].Address;
- retval = 1;
- }
-
- /* replace the mapping */
- pg0[0] = old;
- local_flush_tlb();
- return retval;
-}
-
-/* voyager specific handling code for timer interrupts. Used to hand
- * off the timer tick to the SMP code, since the VIC doesn't have an
- * internal timer (The QIC does, but that's another story). */
-void voyager_timer_interrupt(void)
-{
- if ((jiffies & 0x3ff) == 0) {
-
- /* There seems to be something flaky in either
- * hardware or software that is resetting the timer 0
- * count to something much higher than it should be
- * This seems to occur in the boot sequence, just
- * before root is mounted. Therefore, every 10
- * seconds or so, we sanity check the timer zero count
- * and kick it back to where it should be.
- *
- * FIXME: This is the most awful hack yet seen. I
- * should work out exactly what is interfering with
- * the timer count settings early in the boot sequence
- * and swiftly introduce it to something sharp and
- * pointy. */
- __u16 val;
-
- spin_lock(&i8253_lock);
-
- outb_p(0x00, 0x43);
- val = inb_p(0x40);
- val |= inb(0x40) << 8;
- spin_unlock(&i8253_lock);
-
- if (val > LATCH) {
- printk
- ("\nVOYAGER: countdown timer value too high (%d), resetting\n\n",
- val);
- spin_lock(&i8253_lock);
- outb(0x34, 0x43);
- outb_p(LATCH & 0xff, 0x40); /* LSB */
- outb(LATCH >> 8, 0x40); /* MSB */
- spin_unlock(&i8253_lock);
- }
- }
-#ifdef CONFIG_SMP
- smp_vic_timer_interrupt();
-#endif
-}
-
-void voyager_power_off(void)
-{
- printk("VOYAGER Power Off\n");
-
- if (voyager_level == 5) {
- voyager_cat_power_off();
- } else if (voyager_level == 4) {
- /* This doesn't apparently work on most L4 machines,
- * but the specs say to do this to get automatic power
- * off. Unfortunately, if it doesn't power off the
- * machine, it ends up doing a cold restart, which
- * isn't really intended, so comment out the code */
-#if 0
- int port;
-
- /* enable the voyager Configuration Space */
- outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, VOYAGER_MC_SETUP);
- /* the port for the power off flag is an offset from the
- floating base */
- port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21;
- /* set the power off flag */
- outb(inb(port) | 0x1, port);
-#endif
- }
- /* and wait for it to happen */
- local_irq_disable();
- for (;;)
- halt();
-}
-
-/* copied from process.c */
-static inline void kb_wait(void)
-{
- int i;
-
- for (i = 0; i < 0x10000; i++)
- if ((inb_p(0x64) & 0x02) == 0)
- break;
-}
-
-void machine_shutdown(void)
-{
- /* Architecture specific shutdown needed before a kexec */
-}
-
-void machine_restart(char *cmd)
-{
- printk("Voyager Warm Restart\n");
- kb_wait();
-
- if (voyager_level == 5) {
- /* write magic values to the RTC to inform system that
- * shutdown is beginning */
- outb(0x8f, 0x70);
- outb(0x5, 0x71);
-
- udelay(50);
- outb(0xfe, 0x64); /* pull reset low */
- } else if (voyager_level == 4) {
- __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
- __u8 basebd = inb(VOYAGER_MC_SETUP);
-
- outb(basebd | 0x08, VOYAGER_MC_SETUP);
- outb(0x02, catbase + 0x21);
- }
- local_irq_disable();
- for (;;)
- halt();
-}
-
-void machine_emergency_restart(void)
-{
- /*for now, just hook this to a warm restart */
- machine_restart(NULL);
-}
-
-void mca_nmi_hook(void)
-{
- __u8 dumpval __maybe_unused = inb(0xf823);
- __u8 swnmi __maybe_unused = inb(0xf813);
-
- /* FIXME: assume dump switch pressed */
- /* check to see if the dump switch was pressed */
- VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi));
- /* clear swnmi */
- outb(0xff, 0xf813);
- /* tell SUS to ignore dump */
- if (voyager_level == 5 && voyager_SUS != NULL) {
- if (voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
- voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND;
- voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS;
- udelay(1000);
- voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP;
- voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS;
- }
- }
- printk(KERN_ERR
- "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n",
- smp_processor_id());
- show_stack(NULL, NULL);
- show_state();
-}
-
-void machine_halt(void)
-{
- /* treat a halt like a power off */
- machine_power_off();
-}
-
-void machine_power_off(void)
-{
- if (pm_power_off)
- pm_power_off();
-}
diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c
deleted file mode 100644
index 2ad598c..0000000
--- a/arch/x86/mach-voyager/voyager_cat.c
+++ /dev/null
@@ -1,1197 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8 -*- */
-
-/* Copyright (C) 1999,2001
- *
- * Author: J.E.J.Bottomley@HansenPartnership.com
- *
- * This file contains all the logic for manipulating the CAT bus
- * in a level 5 machine.
- *
- * The CAT bus is a serial configuration and test bus. Its primary
- * uses are to probe the initial configuration of the system and to
- * diagnose error conditions when a system interrupt occurs. The low
- * level interface is fairly primitive, so most of this file consists
- * of bit shift manipulations to send and receive packets on the
- * serial bus */
-
-#include <linux/types.h>
-#include <linux/completion.h>
-#include <linux/sched.h>
-#include <asm/voyager.h>
-#include <asm/vic.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <asm/io.h>
-
-#ifdef VOYAGER_CAT_DEBUG
-#define CDEBUG(x) printk x
-#else
-#define CDEBUG(x)
-#endif
-
-/* the CAT command port */
-#define CAT_CMD (sspb + 0xe)
-/* the CAT data port */
-#define CAT_DATA (sspb + 0xd)
-
-/* the internal cat functions */
-static void cat_pack(__u8 * msg, __u16 start_bit, __u8 * data, __u16 num_bits);
-static void cat_unpack(__u8 * msg, __u16 start_bit, __u8 * data,
- __u16 num_bits);
-static void cat_build_header(__u8 * header, const __u16 len,
- const __u16 smallest_reg_bits,
- const __u16 longest_reg_bits);
-static int cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp,
- __u8 reg, __u8 op);
-static int cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp,
- __u8 reg, __u8 * value);
-static int cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes,
- __u8 pad_bits);
-static int cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
- __u8 value);
-static int cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
- __u8 * value);
-static int cat_subread(voyager_module_t * modp, voyager_asic_t * asicp,
- __u16 offset, __u16 len, void *buf);
-static int cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
- __u8 reg, __u8 value);
-static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp);
-static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp);
-
-static inline const char *cat_module_name(int module_id)
-{
- switch (module_id) {
- case 0x10:
- return "Processor Slot 0";
- case 0x11:
- return "Processor Slot 1";
- case 0x12:
- return "Processor Slot 2";
- case 0x13:
- return "Processor Slot 4";
- case 0x14:
- return "Memory Slot 0";
- case 0x15:
- return "Memory Slot 1";
- case 0x18:
- return "Primary Microchannel";
- case 0x19:
- return "Secondary Microchannel";
- case 0x1a:
- return "Power Supply Interface";
- case 0x1c:
- return "Processor Slot 5";
- case 0x1d:
- return "Processor Slot 6";
- case 0x1e:
- return "Processor Slot 7";
- case 0x1f:
- return "Processor Slot 8";
- default:
- return "Unknown Module";
- }
-}
-
-static int sspb = 0; /* stores the super port location */
-int voyager_8slot = 0; /* set to true if a 51xx monster */
-
-voyager_module_t *voyager_cat_list;
-
-/* the I/O port assignments for the VIC and QIC */
-static struct resource vic_res = {
- .name = "Voyager Interrupt Controller",
- .start = 0xFC00,
- .end = 0xFC6F
-};
-static struct resource qic_res = {
- .name = "Quad Interrupt Controller",
- .start = 0xFC70,
- .end = 0xFCFF
-};
-
-/* This function is used to pack a data bit stream inside a message.
- * It writes num_bits of the data buffer in msg starting at start_bit.
- * Note: This function assumes that any unused bit in the data stream
- * is set to zero so that the ors will work correctly */
-static void
-cat_pack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
-{
- /* compute initial shift needed */
- const __u16 offset = start_bit % BITS_PER_BYTE;
- __u16 len = num_bits / BITS_PER_BYTE;
- __u16 byte = start_bit / BITS_PER_BYTE;
- __u16 residue = (num_bits % BITS_PER_BYTE) + offset;
- int i;
-
- /* adjust if we have more than a byte of residue */
- if (residue >= BITS_PER_BYTE) {
- residue -= BITS_PER_BYTE;
- len++;
- }
-
- /* clear out the bits. We assume here that if len==0 then
- * residue >= offset. This is always true for the catbus
- * operations */
- msg[byte] &= 0xff << (BITS_PER_BYTE - offset);
- msg[byte++] |= data[0] >> offset;
- if (len == 0)
- return;
- for (i = 1; i < len; i++)
- msg[byte++] = (data[i - 1] << (BITS_PER_BYTE - offset))
- | (data[i] >> offset);
- if (residue != 0) {
- __u8 mask = 0xff >> residue;
- __u8 last_byte = data[i - 1] << (BITS_PER_BYTE - offset)
- | (data[i] >> offset);
-
- last_byte &= ~mask;
- msg[byte] &= mask;
- msg[byte] |= last_byte;
- }
- return;
-}
-
-/* unpack the data again (same arguments as cat_pack()). data buffer
- * must be zero populated.
- *
- * Function: given a message string move to start_bit and copy num_bits into
- * data (starting at bit 0 in data).
- */
-static void
-cat_unpack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
-{
- /* compute initial shift needed */
- const __u16 offset = start_bit % BITS_PER_BYTE;
- __u16 len = num_bits / BITS_PER_BYTE;
- const __u8 last_bits = num_bits % BITS_PER_BYTE;
- __u16 byte = start_bit / BITS_PER_BYTE;
- int i;
-
- if (last_bits != 0)
- len++;
-
- /* special case: want < 8 bits from msg and we can get it from
- * a single byte of the msg */
- if (len == 0 && BITS_PER_BYTE - offset >= num_bits) {
- data[0] = msg[byte] << offset;
- data[0] &= 0xff >> (BITS_PER_BYTE - num_bits);
- return;
- }
- for (i = 0; i < len; i++) {
- /* this annoying if has to be done just in case a read of
- * msg one beyond the array causes a panic */
- if (offset != 0) {
- data[i] = msg[byte++] << offset;
- data[i] |= msg[byte] >> (BITS_PER_BYTE - offset);
- } else {
- data[i] = msg[byte++];
- }
- }
- /* do we need to truncate the final byte */
- if (last_bits != 0) {
- data[i - 1] &= 0xff << (BITS_PER_BYTE - last_bits);
- }
- return;
-}
-
-static void
-cat_build_header(__u8 * header, const __u16 len, const __u16 smallest_reg_bits,
- const __u16 longest_reg_bits)
-{
- int i;
- __u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE;
- __u8 *last_byte = &header[len - 1];
-
- if (start_bit == 0)
- start_bit = 1; /* must have at least one bit in the hdr */
-
- for (i = 0; i < len; i++)
- header[i] = 0;
-
- for (i = start_bit; i > 0; i--)
- *last_byte = ((*last_byte) << 1) + 1;
-
-}
-
-static int
-cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 op)
-{
- __u8 parity, inst, inst_buf[4] = { 0 };
- __u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE];
- __u16 ibytes, hbytes, padbits;
- int i;
-
- /*
- * Parity is the parity of the register number + 1 (READ_REGISTER
- * and WRITE_REGISTER always add '1' to the number of bits == 1)
- */
- parity = (__u8) (1 + (reg & 0x01) +
- ((__u8) (reg & 0x02) >> 1) +
- ((__u8) (reg & 0x04) >> 2) +
- ((__u8) (reg & 0x08) >> 3)) % 2;
-
- inst = ((parity << 7) | (reg << 2) | op);
-
- outb(VOYAGER_CAT_IRCYC, CAT_CMD);
- if (!modp->scan_path_connected) {
- if (asicp->asic_id != VOYAGER_CAT_ID) {
- printk
- ("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
- return 1;
- }
- outb(VOYAGER_CAT_HEADER, CAT_DATA);
- outb(inst, CAT_DATA);
- if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
- CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n"));
- return 1;
- }
- return 0;
- }
- ibytes = modp->inst_bits / BITS_PER_BYTE;
- if ((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
- padbits = BITS_PER_BYTE - padbits;
- ibytes++;
- }
- hbytes = modp->largest_reg / BITS_PER_BYTE;
- if (modp->largest_reg % BITS_PER_BYTE)
- hbytes++;
- CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes));
- /* initialise the instruction sequence to 0xff */
- for (i = 0; i < ibytes + hbytes; i++)
- iseq[i] = 0xff;
- cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg);
- cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE);
- inst_buf[0] = inst;
- inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE);
- cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length);
-#ifdef VOYAGER_CAT_DEBUG
- printk("ins = 0x%x, iseq: ", inst);
- for (i = 0; i < ibytes + hbytes; i++)
- printk("0x%x ", iseq[i]);
- printk("\n");
-#endif
- if (cat_shiftout(iseq, ibytes, hbytes, padbits)) {
- CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n"));
- return 1;
- }
- CDEBUG(("CAT SHIFTOUT DONE\n"));
- return 0;
-}
-
-static int
-cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
- __u8 * value)
-{
- if (!modp->scan_path_connected) {
- if (asicp->asic_id != VOYAGER_CAT_ID) {
- CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n"));
- return 1;
- }
- if (reg > VOYAGER_SUBADDRHI)
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- outb(VOYAGER_CAT_DRCYC, CAT_CMD);
- outb(VOYAGER_CAT_HEADER, CAT_DATA);
- *value = inb(CAT_DATA);
- outb(0xAA, CAT_DATA);
- if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
- CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n"));
- return 1;
- }
- return 0;
- } else {
- __u16 sbits = modp->num_asics - 1 + asicp->ireg_length;
- __u16 sbytes = sbits / BITS_PER_BYTE;
- __u16 tbytes;
- __u8 string[VOYAGER_MAX_SCAN_PATH],
- trailer[VOYAGER_MAX_REG_SIZE];
- __u8 padbits;
- int i;
-
- outb(VOYAGER_CAT_DRCYC, CAT_CMD);
-
- if ((padbits = sbits % BITS_PER_BYTE) != 0) {
- padbits = BITS_PER_BYTE - padbits;
- sbytes++;
- }
- tbytes = asicp->ireg_length / BITS_PER_BYTE;
- if (asicp->ireg_length % BITS_PER_BYTE)
- tbytes++;
- CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n",
- tbytes, sbytes, padbits));
- cat_build_header(trailer, tbytes, 1, asicp->ireg_length);
-
- for (i = tbytes - 1; i >= 0; i--) {
- outb(trailer[i], CAT_DATA);
- string[sbytes + i] = inb(CAT_DATA);
- }
-
- for (i = sbytes - 1; i >= 0; i--) {
- outb(0xaa, CAT_DATA);
- string[i] = inb(CAT_DATA);
- }
- *value = 0;
- cat_unpack(string,
- padbits + (tbytes * BITS_PER_BYTE) +
- asicp->asic_location, value, asicp->ireg_length);
-#ifdef VOYAGER_CAT_DEBUG
- printk("value=0x%x, string: ", *value);
- for (i = 0; i < tbytes + sbytes; i++)
- printk("0x%x ", string[i]);
- printk("\n");
-#endif
-
- /* sanity check the rest of the return */
- for (i = 0; i < tbytes; i++) {
- __u8 input = 0;
-
- cat_unpack(string, padbits + (i * BITS_PER_BYTE),
- &input, BITS_PER_BYTE);
- if (trailer[i] != input) {
- CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i]));
- return 1;
- }
- }
- CDEBUG(("cat_getdata DONE\n"));
- return 0;
- }
-}
-
-static int
-cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
-{
- int i;
-
- for (i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
- outb(data[i], CAT_DATA);
-
- for (i = header_bytes - 1; i >= 0; i--) {
- __u8 header = 0;
- __u8 input;
-
- outb(data[i], CAT_DATA);
- input = inb(CAT_DATA);
- CDEBUG(("cat_shiftout: returned 0x%x\n", input));
- cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits,
- &header, BITS_PER_BYTE);
- if (input != header) {
- CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header));
- return 1;
- }
- }
- return 0;
-}
-
-static int
-cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
- __u8 reg, __u8 value)
-{
- outb(VOYAGER_CAT_DRCYC, CAT_CMD);
- if (!modp->scan_path_connected) {
- if (asicp->asic_id != VOYAGER_CAT_ID) {
- CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n"));
- return 1;
- }
- outb(VOYAGER_CAT_HEADER, CAT_DATA);
- outb(value, CAT_DATA);
- if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
- CDEBUG(("cat_senddata: failed to get correct header response to sent data\n"));
- return 1;
- }
- if (reg > VOYAGER_SUBADDRHI) {
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- outb(VOYAGER_CAT_END, CAT_CMD);
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- }
-
- return 0;
- } else {
- __u16 hbytes = asicp->ireg_length / BITS_PER_BYTE;
- __u16 dbytes =
- (modp->num_asics - 1 + asicp->ireg_length) / BITS_PER_BYTE;
- __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH],
- hseq[VOYAGER_MAX_REG_SIZE];
- int i;
-
- if ((padbits = (modp->num_asics - 1
- + asicp->ireg_length) % BITS_PER_BYTE) != 0) {
- padbits = BITS_PER_BYTE - padbits;
- dbytes++;
- }
- if (asicp->ireg_length % BITS_PER_BYTE)
- hbytes++;
-
- cat_build_header(hseq, hbytes, 1, asicp->ireg_length);
-
- for (i = 0; i < dbytes + hbytes; i++)
- dseq[i] = 0xff;
- CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n",
- dbytes, hbytes, padbits));
- cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length,
- hseq, hbytes * BITS_PER_BYTE);
- cat_pack(dseq, asicp->asic_location, &value,
- asicp->ireg_length);
-#ifdef VOYAGER_CAT_DEBUG
- printk("dseq ");
- for (i = 0; i < hbytes + dbytes; i++) {
- printk("0x%x ", dseq[i]);
- }
- printk("\n");
-#endif
- return cat_shiftout(dseq, dbytes, hbytes, padbits);
- }
-}
-
-static int
-cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 value)
-{
- if (cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
- return 1;
- return cat_senddata(modp, asicp, reg, value);
-}
-
-static int
-cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
- __u8 * value)
-{
- if (cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
- return 1;
- return cat_getdata(modp, asicp, reg, value);
-}
-
-static int
-cat_subaddrsetup(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
- __u16 len)
-{
- __u8 val;
-
- if (len > 1) {
- /* set auto increment */
- __u8 newval;
-
- if (cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
- CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n"));
- return 1;
- }
- CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n",
- val));
- newval = val | VOYAGER_AUTO_INC;
- if (newval != val) {
- if (cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
- CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n"));
- return 1;
- }
- }
- }
- if (cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8) (offset & 0xff))) {
- CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n"));
- return 1;
- }
- if (asicp->subaddr > VOYAGER_SUBADDR_LO) {
- if (cat_write
- (modp, asicp, VOYAGER_SUBADDRHI, (__u8) (offset >> 8))) {
- CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n"));
- return 1;
- }
- cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val);
- CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset,
- val));
- }
- cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val);
- CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val));
- return 0;
-}
-
-static int
-cat_subwrite(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
- __u16 len, void *buf)
-{
- int i, retval;
-
- /* FIXME: need special actions for VOYAGER_CAT_ID here */
- if (asicp->asic_id == VOYAGER_CAT_ID) {
- CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n"));
- /* FIXME -- This is supposed to be handled better
- * There is a problem writing to the cat asic in the
- * PSI. The 30us delay seems to work, though */
- udelay(30);
- }
-
- if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
- printk("cat_subwrite: cat_subaddrsetup FAILED\n");
- return retval;
- }
-
- if (cat_sendinst
- (modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
- printk("cat_subwrite: cat_sendinst FAILED\n");
- return 1;
- }
- for (i = 0; i < len; i++) {
- if (cat_senddata(modp, asicp, 0xFF, ((__u8 *) buf)[i])) {
- printk
- ("cat_subwrite: cat_sendata element at %d FAILED\n",
- i);
- return 1;
- }
- }
- return 0;
-}
-static int
-cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
- __u16 len, void *buf)
-{
- int i, retval;
-
- if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
- CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n"));
- return retval;
- }
-
- if (cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
- CDEBUG(("cat_subread: cat_sendinst failed\n"));
- return 1;
- }
- for (i = 0; i < len; i++) {
- if (cat_getdata(modp, asicp, 0xFF, &((__u8 *) buf)[i])) {
- CDEBUG(("cat_subread: cat_getdata element %d failed\n",
- i));
- return 1;
- }
- }
- return 0;
-}
-
-/* buffer for storing EPROM data read in during initialisation */
-static __initdata __u8 eprom_buf[0xFFFF];
-static voyager_module_t *voyager_initial_module;
-
-/* Initialise the cat bus components. We assume this is called by the
- * boot cpu *after* all memory initialisation has been done (so we can
- * use kmalloc) but before smp initialisation, so we can probe the SMP
- * configuration and pick up necessary information. */
-void __init voyager_cat_init(void)
-{
- voyager_module_t **modpp = &voyager_initial_module;
- voyager_asic_t **asicpp;
- voyager_asic_t *qabc_asic = NULL;
- int i, j;
- unsigned long qic_addr = 0;
- __u8 qabc_data[0x20];
- __u8 num_submodules, val;
- voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *) & eprom_buf[0];
-
- __u8 cmos[4];
- unsigned long addr;
-
- /* initiallise the SUS mailbox */
- for (i = 0; i < sizeof(cmos); i++)
- cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i);
- addr = *(unsigned long *)cmos;
- if ((addr & 0xff000000) != 0xff000000) {
- printk(KERN_ERR
- "Voyager failed to get SUS mailbox (addr = 0x%lx\n",
- addr);
- } else {
- static struct resource res;
-
- res.name = "voyager SUS";
- res.start = addr;
- res.end = addr + 0x3ff;
-
- request_resource(&iomem_resource, &res);
- voyager_SUS = (struct voyager_SUS *)
- ioremap(addr, 0x400);
- printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n",
- voyager_SUS->SUS_version);
- voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION;
- voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT;
- }
-
- /* clear the processor counts */
- voyager_extended_vic_processors = 0;
- voyager_quad_processors = 0;
-
- printk("VOYAGER: beginning CAT bus probe\n");
- /* set up the SuperSet Port Block which tells us where the
- * CAT communication port is */
- sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100;
- VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb));
-
- /* now find out if were 8 slot or normal */
- if ((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
- == EIGHT_SLOT_IDENTIFIER) {
- voyager_8slot = 1;
- printk(KERN_NOTICE
- "Voyager: Eight slot 51xx configuration detected\n");
- }
-
- for (i = VOYAGER_MIN_MODULE; i <= VOYAGER_MAX_MODULE; i++) {
- __u8 input;
- int asic;
- __u16 eprom_size;
- __u16 sp_offset;
-
- outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
- outb(i, VOYAGER_CAT_CONFIG_PORT);
-
- /* check the presence of the module */
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- outb(VOYAGER_CAT_IRCYC, CAT_CMD);
- outb(VOYAGER_CAT_HEADER, CAT_DATA);
- /* stream series of alternating 1's and 0's to stimulate
- * response */
- outb(0xAA, CAT_DATA);
- input = inb(CAT_DATA);
- outb(VOYAGER_CAT_END, CAT_CMD);
- if (input != VOYAGER_CAT_HEADER) {
- continue;
- }
- CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i,
- cat_module_name(i)));
- *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++]; */
- if (*modpp == NULL) {
- printk("**WARNING** kmalloc failure in cat_init\n");
- continue;
- }
- memset(*modpp, 0, sizeof(voyager_module_t));
- /* need temporary asic for cat_subread. It will be
- * filled in correctly later */
- (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count]; */
- if ((*modpp)->asic == NULL) {
- printk("**WARNING** kmalloc failure in cat_init\n");
- continue;
- }
- memset((*modpp)->asic, 0, sizeof(voyager_asic_t));
- (*modpp)->asic->asic_id = VOYAGER_CAT_ID;
- (*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI;
- (*modpp)->module_addr = i;
- (*modpp)->scan_path_connected = 0;
- if (i == VOYAGER_PSI) {
- /* Exception leg for modules with no EEPROM */
- printk("Module \"%s\"\n", cat_module_name(i));
- continue;
- }
-
- CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_disconnect(*modpp, (*modpp)->asic);
- if (cat_subread(*modpp, (*modpp)->asic,
- VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
- &eprom_size)) {
- printk
- ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
- i);
- outb(VOYAGER_CAT_END, CAT_CMD);
- continue;
- }
- if (eprom_size > sizeof(eprom_buf)) {
- printk
- ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n",
- i, eprom_size);
- outb(VOYAGER_CAT_END, CAT_CMD);
- continue;
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
- eprom_size));
- if (cat_subread
- (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
- outb(VOYAGER_CAT_END, CAT_CMD);
- continue;
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
- printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n",
- cat_module_name(i), eprom_hdr->version_id,
- *((__u32 *) eprom_hdr->tracer), eprom_hdr->num_asics);
- (*modpp)->ee_size = eprom_hdr->ee_size;
- (*modpp)->num_asics = eprom_hdr->num_asics;
- asicpp = &((*modpp)->asic);
- sp_offset = eprom_hdr->scan_path_offset;
- /* All we really care about are the Quad cards. We
- * identify them because they are in a processor slot
- * and have only four asics */
- if ((i < 0x10 || (i >= 0x14 && i < 0x1c) || i > 0x1f)) {
- modpp = &((*modpp)->next);
- continue;
- }
- /* Now we know it's in a processor slot, does it have
- * a quad baseboard submodule */
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT,
- &num_submodules);
- /* lowest two bits, active low */
- num_submodules = ~(0xfc | num_submodules);
- CDEBUG(("VOYAGER CAT: %d submodules present\n",
- num_submodules));
- if (num_submodules == 0) {
- /* fill in the dyadic extended processors */
- __u8 cpu = i & 0x07;
-
- printk("Module \"%s\": Dyadic Processor Card\n",
- cat_module_name(i));
- voyager_extended_vic_processors |= (1 << cpu);
- cpu += 4;
- voyager_extended_vic_processors |= (1 << cpu);
- outb(VOYAGER_CAT_END, CAT_CMD);
- continue;
- }
-
- /* now we want to read the asics on the first submodule,
- * which should be the quad base board */
-
- cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, &val);
- CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val));
- val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD;
- cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val);
-
- outb(VOYAGER_CAT_END, CAT_CMD);
-
- CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_disconnect(*modpp, (*modpp)->asic);
- if (cat_subread(*modpp, (*modpp)->asic,
- VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
- &eprom_size)) {
- printk
- ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
- i);
- outb(VOYAGER_CAT_END, CAT_CMD);
- continue;
- }
- if (eprom_size > sizeof(eprom_buf)) {
- printk
- ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n",
- i, eprom_size);
- outb(VOYAGER_CAT_END, CAT_CMD);
- continue;
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
- eprom_size));
- if (cat_subread
- (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
- outb(VOYAGER_CAT_END, CAT_CMD);
- continue;
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
- /* Now do everything for the QBB submodule 1 */
- (*modpp)->ee_size = eprom_hdr->ee_size;
- (*modpp)->num_asics = eprom_hdr->num_asics;
- asicpp = &((*modpp)->asic);
- sp_offset = eprom_hdr->scan_path_offset;
- /* get rid of the dummy CAT asic and read the real one */
- kfree((*modpp)->asic);
- for (asic = 0; asic < (*modpp)->num_asics; asic++) {
- int j;
- voyager_asic_t *asicp = *asicpp = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++]; */
- voyager_sp_table_t *sp_table;
- voyager_at_t *asic_table;
- voyager_jtt_t *jtag_table;
-
- if (asicp == NULL) {
- printk
- ("**WARNING** kmalloc failure in cat_init\n");
- continue;
- }
- asicpp = &(asicp->next);
- asicp->asic_location = asic;
- sp_table =
- (voyager_sp_table_t *) (eprom_buf + sp_offset);
- asicp->asic_id = sp_table->asic_id;
- asic_table =
- (voyager_at_t *) (eprom_buf +
- sp_table->asic_data_offset);
- for (j = 0; j < 4; j++)
- asicp->jtag_id[j] = asic_table->jtag_id[j];
- jtag_table =
- (voyager_jtt_t *) (eprom_buf +
- asic_table->jtag_offset);
- asicp->ireg_length = jtag_table->ireg_len;
- asicp->bit_location = (*modpp)->inst_bits;
- (*modpp)->inst_bits += asicp->ireg_length;
- if (asicp->ireg_length > (*modpp)->largest_reg)
- (*modpp)->largest_reg = asicp->ireg_length;
- if (asicp->ireg_length < (*modpp)->smallest_reg ||
- (*modpp)->smallest_reg == 0)
- (*modpp)->smallest_reg = asicp->ireg_length;
- CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n",
- asicp->asic_id, asicp->ireg_length,
- asicp->bit_location));
- if (asicp->asic_id == VOYAGER_QUAD_QABC) {
- CDEBUG(("VOYAGER CAT: QABC ASIC found\n"));
- qabc_asic = asicp;
- }
- sp_offset += sizeof(voyager_sp_table_t);
- }
- CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", (*modpp)->inst_bits, (*modpp)->largest_reg, (*modpp)->smallest_reg));
- /* OK, now we have the QUAD ASICs set up, use them.
- * we need to:
- *
- * 1. Find the Memory area for the Quad CPIs.
- * 2. Find the Extended VIC processor
- * 3. Configure a second extended VIC processor (This
- * cannot be done for the 51xx.
- * */
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_connect(*modpp, (*modpp)->asic);
- CDEBUG(("CAT CONNECTED!!\n"));
- cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data);
- qic_addr = qabc_data[5] << 8;
- qic_addr = (qic_addr | qabc_data[6]) << 8;
- qic_addr = (qic_addr | qabc_data[7]) << 8;
- printk
- ("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
- cat_module_name(i), qic_addr, qabc_data[8]);
-#if 0 /* plumbing fails---FIXME */
- if ((qabc_data[8] & 0xf0) == 0) {
- /* FIXME: 32 way 8 CPU slot monster cannot be
- * plumbed this way---need to check for it */
-
- printk("Plumbing second Extended Quad Processor\n");
- /* second VIC line hardwired to Quad CPU 1 */
- qabc_data[8] |= 0x20;
- cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
-#ifdef VOYAGER_CAT_DEBUG
- /* verify plumbing */
- cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
- if ((qabc_data[8] & 0xf0) == 0) {
- CDEBUG(("PLUMBING FAILED: 0x%x\n",
- qabc_data[8]));
- }
-#endif
- }
-#endif
-
- {
- struct resource *res =
- kzalloc(sizeof(struct resource), GFP_KERNEL);
- res->name = kmalloc(128, GFP_KERNEL);
- sprintf((char *)res->name, "Voyager %s Quad CPI",
- cat_module_name(i));
- res->start = qic_addr;
- res->end = qic_addr + 0x3ff;
- request_resource(&iomem_resource, res);
- }
-
- qic_addr = (unsigned long)ioremap_cache(qic_addr, 0x400);
-
- for (j = 0; j < 4; j++) {
- __u8 cpu;
-
- if (voyager_8slot) {
- /* 8 slot has a different mapping,
- * each slot has only one vic line, so
- * 1 cpu in each slot must be < 8 */
- cpu = (i & 0x07) + j * 8;
- } else {
- cpu = (i & 0x03) + j * 4;
- }
- if ((qabc_data[8] & (1 << j))) {
- voyager_extended_vic_processors |= (1 << cpu);
- }
- if (qabc_data[8] & (1 << (j + 4))) {
- /* Second SET register plumbed: Quad
- * card has two VIC connected CPUs.
- * Secondary cannot be booted as a VIC
- * CPU */
- voyager_extended_vic_processors |= (1 << cpu);
- voyager_allowed_boot_processors &=
- (~(1 << cpu));
- }
-
- voyager_quad_processors |= (1 << cpu);
- voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *)
- (qic_addr + (j << 8));
- CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu,
- (unsigned long)voyager_quad_cpi_addr[cpu]));
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
-
- *asicpp = NULL;
- modpp = &((*modpp)->next);
- }
- *modpp = NULL;
- printk
- ("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n",
- voyager_extended_vic_processors, voyager_quad_processors,
- voyager_allowed_boot_processors);
- request_resource(&ioport_resource, &vic_res);
- if (voyager_quad_processors)
- request_resource(&ioport_resource, &qic_res);
- /* set up the front power switch */
-}
-
-int voyager_cat_readb(__u8 module, __u8 asic, int reg)
-{
- return 0;
-}
-
-static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp)
-{
- __u8 val;
- int err = 0;
-
- if (!modp->scan_path_connected)
- return 0;
- if (asicp->asic_id != VOYAGER_CAT_ID) {
- CDEBUG(("cat_disconnect: ASIC is not CAT\n"));
- return 1;
- }
- err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
- if (err) {
- CDEBUG(("cat_disconnect: failed to read SCANPATH\n"));
- return err;
- }
- val &= VOYAGER_DISCONNECT_ASIC;
- err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
- if (err) {
- CDEBUG(("cat_disconnect: failed to write SCANPATH\n"));
- return err;
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- modp->scan_path_connected = 0;
-
- return 0;
-}
-
-static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp)
-{
- __u8 val;
- int err = 0;
-
- if (modp->scan_path_connected)
- return 0;
- if (asicp->asic_id != VOYAGER_CAT_ID) {
- CDEBUG(("cat_connect: ASIC is not CAT\n"));
- return 1;
- }
-
- err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
- if (err) {
- CDEBUG(("cat_connect: failed to read SCANPATH\n"));
- return err;
- }
- val |= VOYAGER_CONNECT_ASIC;
- err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
- if (err) {
- CDEBUG(("cat_connect: failed to write SCANPATH\n"));
- return err;
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- modp->scan_path_connected = 1;
-
- return 0;
-}
-
-void voyager_cat_power_off(void)
-{
- /* Power the machine off by writing to the PSI over the CAT
- * bus */
- __u8 data;
- voyager_module_t psi = { 0 };
- voyager_asic_t psi_asic = { 0 };
-
- psi.asic = &psi_asic;
- psi.asic->asic_id = VOYAGER_CAT_ID;
- psi.asic->subaddr = VOYAGER_SUBADDR_HI;
- psi.module_addr = VOYAGER_PSI;
- psi.scan_path_connected = 0;
-
- outb(VOYAGER_CAT_END, CAT_CMD);
- /* Connect the PSI to the CAT Bus */
- outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
- outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_disconnect(&psi, &psi_asic);
- /* Read the status */
- cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
- outb(VOYAGER_CAT_END, CAT_CMD);
- CDEBUG(("PSI STATUS 0x%x\n", data));
- /* These two writes are power off prep and perform */
- data = PSI_CLEAR;
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
- outb(VOYAGER_CAT_END, CAT_CMD);
- data = PSI_POWER_DOWN;
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
- outb(VOYAGER_CAT_END, CAT_CMD);
-}
-
-struct voyager_status voyager_status = { 0 };
-
-void voyager_cat_psi(__u8 cmd, __u16 reg, __u8 * data)
-{
- voyager_module_t psi = { 0 };
- voyager_asic_t psi_asic = { 0 };
-
- psi.asic = &psi_asic;
- psi.asic->asic_id = VOYAGER_CAT_ID;
- psi.asic->subaddr = VOYAGER_SUBADDR_HI;
- psi.module_addr = VOYAGER_PSI;
- psi.scan_path_connected = 0;
-
- outb(VOYAGER_CAT_END, CAT_CMD);
- /* Connect the PSI to the CAT Bus */
- outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
- outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_disconnect(&psi, &psi_asic);
- switch (cmd) {
- case VOYAGER_PSI_READ:
- cat_read(&psi, &psi_asic, reg, data);
- break;
- case VOYAGER_PSI_WRITE:
- cat_write(&psi, &psi_asic, reg, *data);
- break;
- case VOYAGER_PSI_SUBREAD:
- cat_subread(&psi, &psi_asic, reg, 1, data);
- break;
- case VOYAGER_PSI_SUBWRITE:
- cat_subwrite(&psi, &psi_asic, reg, 1, data);
- break;
- default:
- printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd);
- break;
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
-}
-
-void voyager_cat_do_common_interrupt(void)
-{
- /* This is caused either by a memory parity error or something
- * in the PSI */
- __u8 data;
- voyager_module_t psi = { 0 };
- voyager_asic_t psi_asic = { 0 };
- struct voyager_psi psi_reg;
- int i;
- re_read:
- psi.asic = &psi_asic;
- psi.asic->asic_id = VOYAGER_CAT_ID;
- psi.asic->subaddr = VOYAGER_SUBADDR_HI;
- psi.module_addr = VOYAGER_PSI;
- psi.scan_path_connected = 0;
-
- outb(VOYAGER_CAT_END, CAT_CMD);
- /* Connect the PSI to the CAT Bus */
- outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
- outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_disconnect(&psi, &psi_asic);
- /* Read the status. NOTE: Need to read *all* the PSI regs here
- * otherwise the cmn int will be reasserted */
- for (i = 0; i < sizeof(psi_reg.regs); i++) {
- cat_read(&psi, &psi_asic, i, &((__u8 *) & psi_reg.regs)[i]);
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
- if ((psi_reg.regs.checkbit & 0x02) == 0) {
- psi_reg.regs.checkbit |= 0x02;
- cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit);
- printk("VOYAGER RE-READ PSI\n");
- goto re_read;
- }
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- for (i = 0; i < sizeof(psi_reg.subregs); i++) {
- /* This looks strange, but the PSI doesn't do auto increment
- * correctly */
- cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i,
- 1, &((__u8 *) & psi_reg.subregs)[i]);
- }
- outb(VOYAGER_CAT_END, CAT_CMD);
-#ifdef VOYAGER_CAT_DEBUG
- printk("VOYAGER PSI: ");
- for (i = 0; i < sizeof(psi_reg.regs); i++)
- printk("%02x ", ((__u8 *) & psi_reg.regs)[i]);
- printk("\n ");
- for (i = 0; i < sizeof(psi_reg.subregs); i++)
- printk("%02x ", ((__u8 *) & psi_reg.subregs)[i]);
- printk("\n");
-#endif
- if (psi_reg.regs.intstatus & PSI_MON) {
- /* switch off or power fail */
-
- if (psi_reg.subregs.supply & PSI_SWITCH_OFF) {
- if (voyager_status.switch_off) {
- printk(KERN_ERR
- "Voyager front panel switch turned off again---Immediate power off!\n");
- voyager_cat_power_off();
- /* not reached */
- } else {
- printk(KERN_ERR
- "Voyager front panel switch turned off\n");
- voyager_status.switch_off = 1;
- voyager_status.request_from_kernel = 1;
- wake_up_process(voyager_thread);
- }
- /* Tell the hardware we're taking care of the
- * shutdown, otherwise it will power the box off
- * within 3 seconds of the switch being pressed and,
- * which is much more important to us, continue to
- * assert the common interrupt */
- data = PSI_CLR_SWITCH_OFF;
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG,
- 1, &data);
- outb(VOYAGER_CAT_END, CAT_CMD);
- } else {
-
- VDEBUG(("Voyager ac fail reg 0x%x\n",
- psi_reg.subregs.ACfail));
- if ((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
- /* No further update */
- return;
- }
-#if 0
- /* Don't bother trying to find out who failed.
- * FIXME: This probably makes the code incorrect on
- * anything other than a 345x */
- for (i = 0; i < 5; i++) {
- if (psi_reg.subregs.ACfail & (1 << i)) {
- break;
- }
- }
- printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i);
-#endif
- /* DON'T do this: it shuts down the AC PSI
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- data = PSI_MASK_MASK | i;
- cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
- 1, &data);
- outb(VOYAGER_CAT_END, CAT_CMD);
- */
- printk(KERN_ERR "Voyager AC power failure\n");
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- data = PSI_COLD_START;
- cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG,
- 1, &data);
- outb(VOYAGER_CAT_END, CAT_CMD);
- voyager_status.power_fail = 1;
- voyager_status.request_from_kernel = 1;
- wake_up_process(voyager_thread);
- }
-
- } else if (psi_reg.regs.intstatus & PSI_FAULT) {
- /* Major fault! */
- printk(KERN_ERR
- "Voyager PSI Detected major fault, immediate power off!\n");
- voyager_cat_power_off();
- /* not reached */
- } else if (psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
- | PSI_CURRENT | PSI_DVM
- | PSI_PSCFAULT | PSI_STAT_CHG)) {
- /* other psi fault */
-
- printk(KERN_WARNING "Voyager PSI status 0x%x\n", data);
- /* clear the PSI fault */
- outb(VOYAGER_CAT_RUN, CAT_CMD);
- cat_write(&psi, &psi_asic, VOYAGER_PSI_STATUS_REG, 0);
- outb(VOYAGER_CAT_END, CAT_CMD);
- }
-}
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
deleted file mode 100644
index b9cc84a..0000000
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ /dev/null
@@ -1,1807 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8 -*- */
-
-/* Copyright (C) 1999,2001
- *
- * Author: J.E.J.Bottomley@HansenPartnership.com
- *
- * This file provides all the same external entries as smp.c but uses
- * the voyager hal to provide the functionality
- */
-#include <linux/cpu.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/kernel_stat.h>
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/bootmem.h>
-#include <linux/completion.h>
-#include <asm/desc.h>
-#include <asm/voyager.h>
-#include <asm/vic.h>
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/arch_hooks.h>
-#include <asm/trampoline.h>
-
-/* TLB state -- visible externally, indexed physically */
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 };
-
-/* CPU IRQ affinity -- set to all ones initially */
-static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned =
- {[0 ... NR_CPUS-1] = ~0UL };
-
-/* per CPU data structure (for /proc/cpuinfo et al), visible externally
- * indexed physically */
-DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
-EXPORT_PER_CPU_SYMBOL(cpu_info);
-
-/* physical ID of the CPU used to boot the system */
-unsigned char boot_cpu_id;
-
-/* The memory line addresses for the Quad CPIs */
-struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned;
-
-/* The masks for the Extended VIC processors, filled in by cat_init */
-__u32 voyager_extended_vic_processors = 0;
-
-/* Masks for the extended Quad processors which cannot be VIC booted */
-__u32 voyager_allowed_boot_processors = 0;
-
-/* The mask for the Quad Processors (both extended and non-extended) */
-__u32 voyager_quad_processors = 0;
-
-/* Total count of live CPUs, used in process.c to display
- * the CPU information and in irq.c for the per CPU irq
- * activity count. Finally exported by i386_ksyms.c */
-static int voyager_extended_cpus = 1;
-
-/* Used for the invalidate map that's also checked in the spinlock */
-static volatile unsigned long smp_invalidate_needed;
-
-/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
- * by scheduler but indexed physically */
-static cpumask_t voyager_phys_cpu_present_map = CPU_MASK_NONE;
-
-/* The internal functions */
-static void send_CPI(__u32 cpuset, __u8 cpi);
-static void ack_CPI(__u8 cpi);
-static int ack_QIC_CPI(__u8 cpi);
-static void ack_special_QIC_CPI(__u8 cpi);
-static void ack_VIC_CPI(__u8 cpi);
-static void send_CPI_allbutself(__u8 cpi);
-static void mask_vic_irq(unsigned int irq);
-static void unmask_vic_irq(unsigned int irq);
-static unsigned int startup_vic_irq(unsigned int irq);
-static void enable_local_vic_irq(unsigned int irq);
-static void disable_local_vic_irq(unsigned int irq);
-static void before_handle_vic_irq(unsigned int irq);
-static void after_handle_vic_irq(unsigned int irq);
-static void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask);
-static void ack_vic_irq(unsigned int irq);
-static void vic_enable_cpi(void);
-static void do_boot_cpu(__u8 cpuid);
-static void do_quad_bootstrap(void);
-static void initialize_secondary(void);
-
-int hard_smp_processor_id(void);
-int safe_smp_processor_id(void);
-
-/* Inline functions */
-static inline void send_one_QIC_CPI(__u8 cpu, __u8 cpi)
-{
- voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi =
- (smp_processor_id() << 16) + cpi;
-}
-
-static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi)
-{
- int cpu;
-
- for_each_online_cpu(cpu) {
- if (cpuset & (1 << cpu)) {
-#ifdef VOYAGER_DEBUG
- if (!cpu_online(cpu))
- VDEBUG(("CPU%d sending cpi %d to CPU%d not in "
- "cpu_online_map\n",
- hard_smp_processor_id(), cpi, cpu));
-#endif
- send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
- }
- }
-}
-
-static inline void wrapper_smp_local_timer_interrupt(void)
-{
- irq_enter();
- smp_local_timer_interrupt();
- irq_exit();
-}
-
-static inline void send_one_CPI(__u8 cpu, __u8 cpi)
-{
- if (voyager_quad_processors & (1 << cpu))
- send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
- else
- send_CPI(1 << cpu, cpi);
-}
-
-static inline void send_CPI_allbutself(__u8 cpi)
-{
- __u8 cpu = smp_processor_id();
- __u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu);
- send_CPI(mask, cpi);
-}
-
-static inline int is_cpu_quad(void)
-{
- __u8 cpumask = inb(VIC_PROC_WHO_AM_I);
- return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER);
-}
-
-static inline int is_cpu_extended(void)
-{
- __u8 cpu = hard_smp_processor_id();
-
- return (voyager_extended_vic_processors & (1 << cpu));
-}
-
-static inline int is_cpu_vic_boot(void)
-{
- __u8 cpu = hard_smp_processor_id();
-
- return (voyager_extended_vic_processors
- & voyager_allowed_boot_processors & (1 << cpu));
-}
-
-static inline void ack_CPI(__u8 cpi)
-{
- switch (cpi) {
- case VIC_CPU_BOOT_CPI:
- if (is_cpu_quad() && !is_cpu_vic_boot())
- ack_QIC_CPI(cpi);
- else
- ack_VIC_CPI(cpi);
- break;
- case VIC_SYS_INT:
- case VIC_CMN_INT:
- /* These are slightly strange. Even on the Quad card,
- * They are vectored as VIC CPIs */
- if (is_cpu_quad())
- ack_special_QIC_CPI(cpi);
- else
- ack_VIC_CPI(cpi);
- break;
- default:
- printk("VOYAGER ERROR: CPI%d is in common CPI code\n", cpi);
- break;
- }
-}
-
-/* local variables */
-
-/* The VIC IRQ descriptors -- these look almost identical to the
- * 8259 IRQs except that masks and things must be kept per processor
- */
-static struct irq_chip vic_chip = {
- .name = "VIC",
- .startup = startup_vic_irq,
- .mask = mask_vic_irq,
- .unmask = unmask_vic_irq,
- .set_affinity = set_vic_irq_affinity,
-};
-
-/* used to count up as CPUs are brought on line (starts at 0) */
-static int cpucount = 0;
-
-/* The per cpu profile stuff - used in smp_local_timer_interrupt */
-static DEFINE_PER_CPU(int, prof_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) = 1;
-
-/* the map used to check if a CPU has booted */
-static __u32 cpu_booted_map;
-
-/* the synchronize flag used to hold all secondary CPUs spinning in
- * a tight loop until the boot sequence is ready for them */
-static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
-
-/* This is for the new dynamic CPU boot code */
-
-/* The per processor IRQ masks (these are usually kept in sync) */
-static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
-
-/* the list of IRQs to be enabled by the VIC_ENABLE_IRQ_CPI */
-static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 };
-
-/* Lock for enable/disable of VIC interrupts */
-static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
-
-/* The boot processor is correctly set up in PC mode when it
- * comes up, but the secondaries need their master/slave 8259
- * pairs initializing correctly */
-
-/* Interrupt counters (per cpu) and total - used to try to
- * even up the interrupt handling routines */
-static long vic_intr_total = 0;
-static long vic_intr_count[NR_CPUS] __cacheline_aligned = { 0 };
-static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 };
-
-/* Since we can only use CPI0, we fake all the other CPIs */
-static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned;
-
-/* debugging routine to read the isr of the cpu's pic */
-static inline __u16 vic_read_isr(void)
-{
- __u16 isr;
-
- outb(0x0b, 0xa0);
- isr = inb(0xa0) << 8;
- outb(0x0b, 0x20);
- isr |= inb(0x20);
-
- return isr;
-}
-
-static __init void qic_setup(void)
-{
- if (!is_cpu_quad()) {
- /* not a quad, no setup */
- return;
- }
- outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
- outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
-
- if (is_cpu_extended()) {
- /* the QIC duplicate of the VIC base register */
- outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER);
- outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER);
-
- /* FIXME: should set up the QIC timer and memory parity
- * error vectors here */
- }
-}
-
-static __init void vic_setup_pic(void)
-{
- outb(1, VIC_REDIRECT_REGISTER_1);
- /* clear the claim registers for dynamic routing */
- outb(0, VIC_CLAIM_REGISTER_0);
- outb(0, VIC_CLAIM_REGISTER_1);
-
- outb(0, VIC_PRIORITY_REGISTER);
- /* Set the Primary and Secondary Microchannel vector
- * bases to be the same as the ordinary interrupts
- *
- * FIXME: This would be more efficient using separate
- * vectors. */
- outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
- outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
- /* Now initiallise the master PIC belonging to this CPU by
- * sending the four ICWs */
-
- /* ICW1: level triggered, ICW4 needed */
- outb(0x19, 0x20);
-
- /* ICW2: vector base */
- outb(FIRST_EXTERNAL_VECTOR, 0x21);
-
- /* ICW3: slave at line 2 */
- outb(0x04, 0x21);
-
- /* ICW4: 8086 mode */
- outb(0x01, 0x21);
-
- /* now the same for the slave PIC */
-
- /* ICW1: level trigger, ICW4 needed */
- outb(0x19, 0xA0);
-
- /* ICW2: slave vector base */
- outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1);
-
- /* ICW3: slave ID */
- outb(0x02, 0xA1);
-
- /* ICW4: 8086 mode */
- outb(0x01, 0xA1);
-}
-
-static void do_quad_bootstrap(void)
-{
- if (is_cpu_quad() && is_cpu_vic_boot()) {
- int i;
- unsigned long flags;
- __u8 cpuid = hard_smp_processor_id();
-
- local_irq_save(flags);
-
- for (i = 0; i < 4; i++) {
- /* FIXME: this would be >>3 &0x7 on the 32 way */
- if (((cpuid >> 2) & 0x03) == i)
- /* don't lower our own mask! */
- continue;
-
- /* masquerade as local Quad CPU */
- outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID);
- /* enable the startup CPI */
- outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1);
- /* restore cpu id */
- outb(0, QIC_PROCESSOR_ID);
- }
- local_irq_restore(flags);
- }
-}
-
-void prefill_possible_map(void)
-{
- /* This is empty on voyager because we need a much
- * earlier detection which is done in find_smp_config */
-}
-
-/* Set up all the basic stuff: read the SMP config and make all the
- * SMP information reflect only the boot cpu. All others will be
- * brought on-line later. */
-void __init find_smp_config(void)
-{
- int i;
-
- boot_cpu_id = hard_smp_processor_id();
-
- printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
-
- /* initialize the CPU structures (moved from smp_boot_cpus) */
- for (i = 0; i < nr_cpu_ids; i++)
- cpu_irq_affinity[i] = ~0;
- cpu_online_map = cpumask_of_cpu(boot_cpu_id);
-
- /* The boot CPU must be extended */
- voyager_extended_vic_processors = 1 << boot_cpu_id;
- /* initially, all of the first 8 CPUs can boot */
- voyager_allowed_boot_processors = 0xff;
- /* set up everything for just this CPU, we can alter
- * this as we start the other CPUs later */
- /* now get the CPU disposition from the extended CMOS */
- cpus_addr(voyager_phys_cpu_present_map)[0] =
- voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
- cpus_addr(voyager_phys_cpu_present_map)[0] |=
- voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
- cpus_addr(voyager_phys_cpu_present_map)[0] |=
- voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
- 2) << 16;
- cpus_addr(voyager_phys_cpu_present_map)[0] |=
- voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
- 3) << 24;
- init_cpu_possible(&voyager_phys_cpu_present_map);
- printk("VOYAGER SMP: voyager_phys_cpu_present_map = 0x%lx\n",
- cpus_addr(voyager_phys_cpu_present_map)[0]);
- /* Here we set up the VIC to enable SMP */
- /* enable the CPIs by writing the base vector to their register */
- outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER);
- outb(1, VIC_REDIRECT_REGISTER_1);
- /* set the claim registers for static routing --- Boot CPU gets
- * all interrupts untill all other CPUs started */
- outb(0xff, VIC_CLAIM_REGISTER_0);
- outb(0xff, VIC_CLAIM_REGISTER_1);
- /* Set the Primary and Secondary Microchannel vector
- * bases to be the same as the ordinary interrupts
- *
- * FIXME: This would be more efficient using separate
- * vectors. */
- outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
- outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
-
- /* Finally tell the firmware that we're driving */
- outb(inb(VOYAGER_SUS_IN_CONTROL_PORT) | VOYAGER_IN_CONTROL_FLAG,
- VOYAGER_SUS_IN_CONTROL_PORT);
-
- current_thread_info()->cpu = boot_cpu_id;
- x86_write_percpu(cpu_number, boot_cpu_id);
-}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them
- * for a given CPU, id is physical */
-void __init smp_store_cpu_info(int id)
-{
- struct cpuinfo_x86 *c = &cpu_data(id);
-
- *c = boot_cpu_data;
- c->cpu_index = id;
-
- identify_secondary_cpu(c);
-}
-
-/* Routine initially called when a non-boot CPU is brought online */
-static void __init start_secondary(void *unused)
-{
- __u8 cpuid = hard_smp_processor_id();
-
- cpu_init();
-
- /* OK, we're in the routine */
- ack_CPI(VIC_CPU_BOOT_CPI);
-
- /* setup the 8259 master slave pair belonging to this CPU ---
- * we won't actually receive any until the boot CPU
- * relinquishes it's static routing mask */
- vic_setup_pic();
-
- qic_setup();
-
- if (is_cpu_quad() && !is_cpu_vic_boot()) {
- /* clear the boot CPI */
- __u8 dummy;
-
- dummy =
- voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
- printk("read dummy %d\n", dummy);
- }
-
- /* lower the mask to receive CPIs */
- vic_enable_cpi();
-
- VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
-
- notify_cpu_starting(cpuid);
-
- /* enable interrupts */
- local_irq_enable();
-
- /* get our bogomips */
- calibrate_delay();
-
- /* save our processor parameters */
- smp_store_cpu_info(cpuid);
-
- /* if we're a quad, we may need to bootstrap other CPUs */
- do_quad_bootstrap();
-
- /* FIXME: this is rather a poor hack to prevent the CPU
- * activating softirqs while it's supposed to be waiting for
- * permission to proceed. Without this, the new per CPU stuff
- * in the softirqs will fail */
- local_irq_disable();
- cpu_set(cpuid, cpu_callin_map);
-
- /* signal that we're done */
- cpu_booted_map = 1;
-
- while (!cpu_isset(cpuid, smp_commenced_mask))
- rep_nop();
- local_irq_enable();
-
- local_flush_tlb();
-
- cpu_set(cpuid, cpu_online_map);
- wmb();
- cpu_idle();
-}
-
-/* Routine to kick start the given CPU and wait for it to report ready
- * (or timeout in startup). When this routine returns, the requested
- * CPU is either fully running and configured or known to be dead.
- *
- * We call this routine sequentially 1 CPU at a time, so no need for
- * locking */
-
-static void __init do_boot_cpu(__u8 cpu)
-{
- struct task_struct *idle;
- int timeout;
- unsigned long flags;
- int quad_boot = (1 << cpu) & voyager_quad_processors
- & ~(voyager_extended_vic_processors
- & voyager_allowed_boot_processors);
-
- /* This is the format of the CPI IDT gate (in real mode) which
- * we're hijacking to boot the CPU */
- union IDTFormat {
- struct seg {
- __u16 Offset;
- __u16 Segment;
- } idt;
- __u32 val;
- } hijack_source;
-
- __u32 *hijack_vector;
- __u32 start_phys_address = setup_trampoline();
-
- /* There's a clever trick to this: The linux trampoline is
- * compiled to begin at absolute location zero, so make the
- * address zero but have the data segment selector compensate
- * for the actual address */
- hijack_source.idt.Offset = start_phys_address & 0x000F;
- hijack_source.idt.Segment = (start_phys_address >> 4) & 0xFFFF;
-
- cpucount++;
- alternatives_smp_switch(1);
-
- idle = fork_idle(cpu);
- if (IS_ERR(idle))
- panic("failed fork for CPU%d", cpu);
- idle->thread.ip = (unsigned long)start_secondary;
- /* init_tasks (in sched.c) is indexed logically */
- stack_start.sp = (void *)idle->thread.sp;
-
- init_gdt(cpu);
- per_cpu(current_task, cpu) = idle;
- early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
- irq_ctx_init(cpu);
-
- /* Note: Don't modify initial ss override */
- VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu,
- (unsigned long)hijack_source.val, hijack_source.idt.Segment,
- hijack_source.idt.Offset, stack_start.sp));
-
- /* init lowmem identity mapping */
- clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
- flush_tlb_all();
-
- if (quad_boot) {
- printk("CPU %d: non extended Quad boot\n", cpu);
- hijack_vector =
- (__u32 *)
- phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE) * 4);
- *hijack_vector = hijack_source.val;
- } else {
- printk("CPU%d: extended VIC boot\n", cpu);
- hijack_vector =
- (__u32 *)
- phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE) * 4);
- *hijack_vector = hijack_source.val;
- /* VIC errata, may also receive interrupt at this address */
- hijack_vector =
- (__u32 *)
- phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI +
- VIC_DEFAULT_CPI_BASE) * 4);
- *hijack_vector = hijack_source.val;
- }
- /* All non-boot CPUs start with interrupts fully masked. Need
- * to lower the mask of the CPI we're about to send. We do
- * this in the VIC by masquerading as the processor we're
- * about to boot and lowering its interrupt mask */
- local_irq_save(flags);
- if (quad_boot) {
- send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI);
- } else {
- outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
- /* here we're altering registers belonging to `cpu' */
-
- outb(VIC_BOOT_INTERRUPT_MASK, 0x21);
- /* now go back to our original identity */
- outb(boot_cpu_id, VIC_PROCESSOR_ID);
-
- /* and boot the CPU */
-
- send_CPI((1 << cpu), VIC_CPU_BOOT_CPI);
- }
- cpu_booted_map = 0;
- local_irq_restore(flags);
-
- /* now wait for it to become ready (or timeout) */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpu_booted_map)
- break;
- udelay(100);
- }
- /* reset the page table */
- zap_low_mappings();
-
- if (cpu_booted_map) {
- VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n",
- cpu, smp_processor_id()));
-
- printk("CPU%d: ", cpu);
- print_cpu_info(&cpu_data(cpu));
- wmb();
- cpu_set(cpu, cpu_callout_map);
- cpu_set(cpu, cpu_present_map);
- } else {
- printk("CPU%d FAILED TO BOOT: ", cpu);
- if (*
- ((volatile unsigned char *)phys_to_virt(start_phys_address))
- == 0xA5)
- printk("Stuck.\n");
- else
- printk("Not responding.\n");
-
- cpucount--;
- }
-}
-
-void __init smp_boot_cpus(void)
-{
- int i;
-
- /* CAT BUS initialisation must be done after the memory */
- /* FIXME: The L4 has a catbus too, it just needs to be
- * accessed in a totally different way */
- if (voyager_level == 5) {
- voyager_cat_init();
-
- /* now that the cat has probed the Voyager System Bus, sanity
- * check the cpu map */
- if (((voyager_quad_processors | voyager_extended_vic_processors)
- & cpus_addr(voyager_phys_cpu_present_map)[0]) !=
- cpus_addr(voyager_phys_cpu_present_map)[0]) {
- /* should panic */
- printk("\n\n***WARNING*** "
- "Sanity check of CPU present map FAILED\n");
- }
- } else if (voyager_level == 4)
- voyager_extended_vic_processors =
- cpus_addr(voyager_phys_cpu_present_map)[0];
-
- /* this sets up the idle task to run on the current cpu */
- voyager_extended_cpus = 1;
- /* Remove the global_irq_holder setting, it triggers a BUG() on
- * schedule at the moment */
- //global_irq_holder = boot_cpu_id;
-
- /* FIXME: Need to do something about this but currently only works
- * on CPUs with a tsc which none of mine have.
- smp_tune_scheduling();
- */
- smp_store_cpu_info(boot_cpu_id);
- /* setup the jump vector */
- initial_code = (unsigned long)initialize_secondary;
- printk("CPU%d: ", boot_cpu_id);
- print_cpu_info(&cpu_data(boot_cpu_id));
-
- if (is_cpu_quad()) {
- /* booting on a Quad CPU */
- printk("VOYAGER SMP: Boot CPU is Quad\n");
- qic_setup();
- do_quad_bootstrap();
- }
-
- /* enable our own CPIs */
- vic_enable_cpi();
-
- cpu_set(boot_cpu_id, cpu_online_map);
- cpu_set(boot_cpu_id, cpu_callout_map);
-
- /* loop over all the extended VIC CPUs and boot them. The
- * Quad CPUs must be bootstrapped by their extended VIC cpu */
- for (i = 0; i < nr_cpu_ids; i++) {
- if (i == boot_cpu_id || !cpu_isset(i, voyager_phys_cpu_present_map))
- continue;
- do_boot_cpu(i);
- /* This udelay seems to be needed for the Quad boots
- * don't remove unless you know what you're doing */
- udelay(1000);
- }
- /* we could compute the total bogomips here, but why bother?,
- * Code added from smpboot.c */
- {
- unsigned long bogosum = 0;
-
- for_each_online_cpu(i)
- bogosum += cpu_data(i).loops_per_jiffy;
- printk(KERN_INFO "Total of %d processors activated "
- "(%lu.%02lu BogoMIPS).\n",
- cpucount + 1, bogosum / (500000 / HZ),
- (bogosum / (5000 / HZ)) % 100);
- }
- voyager_extended_cpus = hweight32(voyager_extended_vic_processors);
- printk("VOYAGER: Extended (interrupt handling CPUs): "
- "%d, non-extended: %d\n", voyager_extended_cpus,
- num_booting_cpus() - voyager_extended_cpus);
- /* that's it, switch to symmetric mode */
- outb(0, VIC_PRIORITY_REGISTER);
- outb(0, VIC_CLAIM_REGISTER_0);
- outb(0, VIC_CLAIM_REGISTER_1);
-
- VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus()));
-}
-
-/* Reload the secondary CPUs task structure (this function does not
- * return ) */
-static void __init initialize_secondary(void)
-{
-#if 0
- // AC kernels only
- set_current(hard_get_current());
-#endif
-
- /*
- * We don't actually need to load the full TSS,
- * basically just the stack pointer and the eip.
- */
-
- asm volatile ("movl %0,%%esp\n\t"
- "jmp *%1"::"r" (current->thread.sp),
- "r"(current->thread.ip));
-}
-
-/* handle a Voyager SYS_INT -- If we don't, the base board will
- * panic the system.
- *
- * System interrupts occur because some problem was detected on the
- * various busses. To find out what you have to probe all the
- * hardware via the CAT bus. FIXME: At the moment we do nothing. */
-void smp_vic_sys_interrupt(struct pt_regs *regs)
-{
- ack_CPI(VIC_SYS_INT);
- printk("Voyager SYSTEM INTERRUPT\n");
-}
-
-/* Handle a voyager CMN_INT; These interrupts occur either because of
- * a system status change or because a single bit memory error
- * occurred. FIXME: At the moment, ignore all this. */
-void smp_vic_cmn_interrupt(struct pt_regs *regs)
-{
- static __u8 in_cmn_int = 0;
- static DEFINE_SPINLOCK(cmn_int_lock);
-
- /* common ints are broadcast, so make sure we only do this once */
- _raw_spin_lock(&cmn_int_lock);
- if (in_cmn_int)
- goto unlock_end;
-
- in_cmn_int++;
- _raw_spin_unlock(&cmn_int_lock);
-
- VDEBUG(("Voyager COMMON INTERRUPT\n"));
-
- if (voyager_level == 5)
- voyager_cat_do_common_interrupt();
-
- _raw_spin_lock(&cmn_int_lock);
- in_cmn_int = 0;
- unlock_end:
- _raw_spin_unlock(&cmn_int_lock);
- ack_CPI(VIC_CMN_INT);
-}
-
-/*
- * Reschedule call back. Nothing to do, all the work is done
- * automatically when we return from the interrupt. */
-static void smp_reschedule_interrupt(void)
-{
- /* do nothing */
-}
-
-static struct mm_struct *flush_mm;
-static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-static inline void voyager_leave_mm(unsigned long cpu)
-{
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
- BUG();
- cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
- load_cr3(swapper_pg_dir);
-}
-
-/*
- * Invalidate call-back
- */
-static void smp_invalidate_interrupt(void)
-{
- __u8 cpu = smp_processor_id();
-
- if (!test_bit(cpu, &smp_invalidate_needed))
- return;
- /* This will flood messages. Don't uncomment unless you see
- * Problems with cross cpu invalidation
- VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
- smp_processor_id()));
- */
-
- if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
- if (flush_va == TLB_FLUSH_ALL)
- local_flush_tlb();
- else
- __flush_tlb_one(flush_va);
- } else
- voyager_leave_mm(cpu);
- }
- smp_mb__before_clear_bit();
- clear_bit(cpu, &smp_invalidate_needed);
- smp_mb__after_clear_bit();
-}
-
-/* All the new flush operations for 2.4 */
-
-/* This routine is called with a physical cpu mask */
-static void
-voyager_flush_tlb_others(unsigned long cpumask, struct mm_struct *mm,
- unsigned long va)
-{
- int stuck = 50000;
-
- if (!cpumask)
- BUG();
- if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask)
- BUG();
- if (cpumask & (1 << smp_processor_id()))
- BUG();
- if (!mm)
- BUG();
-
- spin_lock(&tlbstate_lock);
-
- flush_mm = mm;
- flush_va = va;
- atomic_set_mask(cpumask, &smp_invalidate_needed);
- /*
- * We have to send the CPI only to
- * CPUs affected.
- */
- send_CPI(cpumask, VIC_INVALIDATE_CPI);
-
- while (smp_invalidate_needed) {
- mb();
- if (--stuck == 0) {
- printk("***WARNING*** Stuck doing invalidate CPI "
- "(CPU%d)\n", smp_processor_id());
- break;
- }
- }
-
- /* Uncomment only to debug invalidation problems
- VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
- */
-
- flush_mm = NULL;
- flush_va = 0;
- spin_unlock(&tlbstate_lock);
-}
-
-void flush_tlb_current_task(void)
-{
- struct mm_struct *mm = current->mm;
- unsigned long cpu_mask;
-
- preempt_disable();
-
- cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
- local_flush_tlb();
- if (cpu_mask)
- voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-
- preempt_enable();
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
- unsigned long cpu_mask;
-
- preempt_disable();
-
- cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
-
- if (current->active_mm == mm) {
- if (current->mm)
- local_flush_tlb();
- else
- voyager_leave_mm(smp_processor_id());
- }
- if (cpu_mask)
- voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-
- preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
-{
- struct mm_struct *mm = vma->vm_mm;
- unsigned long cpu_mask;
-
- preempt_disable();
-
- cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
- if (current->active_mm == mm) {
- if (current->mm)
- __flush_tlb_one(va);
- else
- voyager_leave_mm(smp_processor_id());
- }
-
- if (cpu_mask)
- voyager_flush_tlb_others(cpu_mask, mm, va);
-
- preempt_enable();
-}
-
-EXPORT_SYMBOL(flush_tlb_page);
-
-/* enable the requested IRQs */
-static void smp_enable_irq_interrupt(void)
-{
- __u8 irq;
- __u8 cpu = get_cpu();
-
- VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu,
- vic_irq_enable_mask[cpu]));
-
- spin_lock(&vic_irq_lock);
- for (irq = 0; irq < 16; irq++) {
- if (vic_irq_enable_mask[cpu] & (1 << irq))
- enable_local_vic_irq(irq);
- }
- vic_irq_enable_mask[cpu] = 0;
- spin_unlock(&vic_irq_lock);
-
- put_cpu_no_resched();
-}
-
-/*
- * CPU halt call-back
- */
-static void smp_stop_cpu_function(void *dummy)
-{
- VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id()));
- cpu_clear(smp_processor_id(), cpu_online_map);
- local_irq_disable();
- for (;;)
- halt();
-}
-
-/* execute a thread on a new CPU. The function to be called must be
- * previously set up. This is used to schedule a function for
- * execution on all CPUs - set up the function then broadcast a
- * function_interrupt CPI to come here on each CPU */
-static void smp_call_function_interrupt(void)
-{
- irq_enter();
- generic_smp_call_function_interrupt();
- __get_cpu_var(irq_stat).irq_call_count++;
- irq_exit();
-}
-
-static void smp_call_function_single_interrupt(void)
-{
- irq_enter();
- generic_smp_call_function_single_interrupt();
- __get_cpu_var(irq_stat).irq_call_count++;
- irq_exit();
-}
-
-/* Sorry about the name. In an APIC based system, the APICs
- * themselves are programmed to send a timer interrupt. This is used
- * by linux to reschedule the processor. Voyager doesn't have this,
- * so we use the system clock to interrupt one processor, which in
- * turn, broadcasts a timer CPI to all the others --- we receive that
- * CPI here. We don't use this actually for counting so losing
- * ticks doesn't matter
- *
- * FIXME: For those CPUs which actually have a local APIC, we could
- * try to use it to trigger this interrupt instead of having to
- * broadcast the timer tick. Unfortunately, all my pentium DYADs have
- * no local APIC, so I can't do this
- *
- * This function is currently a placeholder and is unused in the code */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
- wrapper_smp_local_timer_interrupt();
- set_irq_regs(old_regs);
-}
-
-/* All of the QUAD interrupt GATES */
-void smp_qic_timer_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
- ack_QIC_CPI(QIC_TIMER_CPI);
- wrapper_smp_local_timer_interrupt();
- set_irq_regs(old_regs);
-}
-
-void smp_qic_invalidate_interrupt(struct pt_regs *regs)
-{
- ack_QIC_CPI(QIC_INVALIDATE_CPI);
- smp_invalidate_interrupt();
-}
-
-void smp_qic_reschedule_interrupt(struct pt_regs *regs)
-{
- ack_QIC_CPI(QIC_RESCHEDULE_CPI);
- smp_reschedule_interrupt();
-}
-
-void smp_qic_enable_irq_interrupt(struct pt_regs *regs)
-{
- ack_QIC_CPI(QIC_ENABLE_IRQ_CPI);
- smp_enable_irq_interrupt();
-}
-
-void smp_qic_call_function_interrupt(struct pt_regs *regs)
-{
- ack_QIC_CPI(QIC_CALL_FUNCTION_CPI);
- smp_call_function_interrupt();
-}
-
-void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
-{
- ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
- smp_call_function_single_interrupt();
-}
-
-void smp_vic_cpi_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
- __u8 cpu = smp_processor_id();
-
- if (is_cpu_quad())
- ack_QIC_CPI(VIC_CPI_LEVEL0);
- else
- ack_VIC_CPI(VIC_CPI_LEVEL0);
-
- if (test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
- wrapper_smp_local_timer_interrupt();
- if (test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
- smp_invalidate_interrupt();
- if (test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
- smp_reschedule_interrupt();
- if (test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
- smp_enable_irq_interrupt();
- if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
- smp_call_function_interrupt();
- if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
- smp_call_function_single_interrupt();
- set_irq_regs(old_regs);
-}
-
-static void do_flush_tlb_all(void *info)
-{
- unsigned long cpu = smp_processor_id();
-
- __flush_tlb_all();
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
- voyager_leave_mm(cpu);
-}
-
-/* flush the TLB of every active CPU in the system */
-void flush_tlb_all(void)
-{
- on_each_cpu(do_flush_tlb_all, 0, 1);
-}
-
-/* send a reschedule CPI to one CPU by physical CPU number*/
-static void voyager_smp_send_reschedule(int cpu)
-{
- send_one_CPI(cpu, VIC_RESCHEDULE_CPI);
-}
-
-int hard_smp_processor_id(void)
-{
- __u8 i;
- __u8 cpumask = inb(VIC_PROC_WHO_AM_I);
- if ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
- return cpumask & 0x1F;
-
- for (i = 0; i < 8; i++) {
- if (cpumask & (1 << i))
- return i;
- }
- printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask);
- return 0;
-}
-
-int safe_smp_processor_id(void)
-{
- return hard_smp_processor_id();
-}
-
-/* broadcast a halt to all other CPUs */
-static void voyager_smp_send_stop(void)
-{
- smp_call_function(smp_stop_cpu_function, NULL, 1);
-}
-
-/* this function is triggered in time.c when a clock tick fires
- * we need to re-broadcast the tick to all CPUs */
-void smp_vic_timer_interrupt(void)
-{
- send_CPI_allbutself(VIC_TIMER_CPI);
- smp_local_timer_interrupt();
-}
-
-/* local (per CPU) timer interrupt. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
-void smp_local_timer_interrupt(void)
-{
- int cpu = smp_processor_id();
- long weight;
-
- profile_tick(CPU_PROFILING);
- if (--per_cpu(prof_counter, cpu) <= 0) {
- /*
- * The multiplier may have changed since the last time we got
- * to this point as a result of the user writing to
- * /proc/profile. In this case we need to adjust the APIC
- * timer accordingly.
- *
- * Interrupts are already masked off at this point.
- */
- per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
- if (per_cpu(prof_counter, cpu) !=
- per_cpu(prof_old_multiplier, cpu)) {
- /* FIXME: need to update the vic timer tick here */
- per_cpu(prof_old_multiplier, cpu) =
- per_cpu(prof_counter, cpu);
- }
-
- update_process_times(user_mode_vm(get_irq_regs()));
- }
-
- if (((1 << cpu) & voyager_extended_vic_processors) == 0)
- /* only extended VIC processors participate in
- * interrupt distribution */
- return;
-
- /*
- * We take the 'long' return path, and there every subsystem
- * grabs the appropriate locks (kernel lock/ irq lock).
- *
- * we might want to decouple profiling from the 'long path',
- * and do the profiling totally in assembly.
- *
- * Currently this isn't too much of an issue (performance wise),
- * we can take more than 100K local irqs per second on a 100 MHz P5.
- */
-
- if ((++vic_tick[cpu] & 0x7) != 0)
- return;
- /* get here every 16 ticks (about every 1/6 of a second) */
-
- /* Change our priority to give someone else a chance at getting
- * the IRQ. The algorithm goes like this:
- *
- * In the VIC, the dynamically routed interrupt is always
- * handled by the lowest priority eligible (i.e. receiving
- * interrupts) CPU. If >1 eligible CPUs are equal lowest, the
- * lowest processor number gets it.
- *
- * The priority of a CPU is controlled by a special per-CPU
- * VIC priority register which is 3 bits wide 0 being lowest
- * and 7 highest priority..
- *
- * Therefore we subtract the average number of interrupts from
- * the number we've fielded. If this number is negative, we
- * lower the activity count and if it is positive, we raise
- * it.
- *
- * I'm afraid this still leads to odd looking interrupt counts:
- * the totals are all roughly equal, but the individual ones
- * look rather skewed.
- *
- * FIXME: This algorithm is total crap when mixed with SMP
- * affinity code since we now try to even up the interrupt
- * counts when an affinity binding is keeping them on a
- * particular CPU*/
- weight = (vic_intr_count[cpu] * voyager_extended_cpus
- - vic_intr_total) >> 4;
- weight += 4;
- if (weight > 7)
- weight = 7;
- if (weight < 0)
- weight = 0;
-
- outb((__u8) weight, VIC_PRIORITY_REGISTER);
-
-#ifdef VOYAGER_DEBUG
- if ((vic_tick[cpu] & 0xFFF) == 0) {
- /* print this message roughly every 25 secs */
- printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n",
- cpu, vic_tick[cpu], weight);
- }
-#endif
-}
-
-/* setup the profiling timer */
-int setup_profiling_timer(unsigned int multiplier)
-{
- int i;
-
- if ((!multiplier))
- return -EINVAL;
-
- /*
- * Set the new multiplier for each CPU. CPUs don't start using the
- * new values until the next timer interrupt in which they do process
- * accounting.
- */
- for (i = 0; i < nr_cpu_ids; ++i)
- per_cpu(prof_multiplier, i) = multiplier;
-
- return 0;
-}
-
-/* This is a bit of a mess, but forced on us by the genirq changes
- * there's no genirq handler that really does what voyager wants
- * so hack it up with the simple IRQ handler */
-static void handle_vic_irq(unsigned int irq, struct irq_desc *desc)
-{
- before_handle_vic_irq(irq);
- handle_simple_irq(irq, desc);
- after_handle_vic_irq(irq);
-}
-
-/* The CPIs are handled in the per cpu 8259s, so they must be
- * enabled to be received: FIX: enabling the CPIs in the early
- * boot sequence interferes with bug checking; enable them later
- * on in smp_init */
-#define VIC_SET_GATE(cpi, vector) \
- set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector))
-#define QIC_SET_GATE(cpi, vector) \
- set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
-
-void __init voyager_smp_intr_init(void)
-{
- int i;
-
- /* initialize the per cpu irq mask to all disabled */
- for (i = 0; i < nr_cpu_ids; i++)
- vic_irq_mask[i] = 0xFFFF;
-
- VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
-
- VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt);
- VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt);
-
- QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt);
- QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt);
- QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt);
- QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt);
- QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt);
-
- /* now put the VIC descriptor into the first 48 IRQs
- *
- * This is for later: first 16 correspond to PC IRQs; next 16
- * are Primary MC IRQs and final 16 are Secondary MC IRQs */
- for (i = 0; i < 48; i++)
- set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq);
-}
-
-/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per
- * processor to receive CPI */
-static void send_CPI(__u32 cpuset, __u8 cpi)
-{
- int cpu;
- __u32 quad_cpuset = (cpuset & voyager_quad_processors);
-
- if (cpi < VIC_START_FAKE_CPI) {
- /* fake CPI are only used for booting, so send to the
- * extended quads as well---Quads must be VIC booted */
- outb((__u8) (cpuset), VIC_CPI_Registers[cpi]);
- return;
- }
- if (quad_cpuset)
- send_QIC_CPI(quad_cpuset, cpi);
- cpuset &= ~quad_cpuset;
- cpuset &= 0xff; /* only first 8 CPUs vaild for VIC CPI */
- if (cpuset == 0)
- return;
- for_each_online_cpu(cpu) {
- if (cpuset & (1 << cpu))
- set_bit(cpi, &vic_cpi_mailbox[cpu]);
- }
- if (cpuset)
- outb((__u8) cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
-}
-
-/* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and
- * set the cache line to shared by reading it.
- *
- * DON'T make this inline otherwise the cache line read will be
- * optimised away
- * */
-static int ack_QIC_CPI(__u8 cpi)
-{
- __u8 cpu = hard_smp_processor_id();
-
- cpi &= 7;
-
- outb(1 << cpi, QIC_INTERRUPT_CLEAR1);
- return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi;
-}
-
-static void ack_special_QIC_CPI(__u8 cpi)
-{
- switch (cpi) {
- case VIC_CMN_INT:
- outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0);
- break;
- case VIC_SYS_INT:
- outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0);
- break;
- }
- /* also clear at the VIC, just in case (nop for non-extended proc) */
- ack_VIC_CPI(cpi);
-}
-
-/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */
-static void ack_VIC_CPI(__u8 cpi)
-{
-#ifdef VOYAGER_DEBUG
- unsigned long flags;
- __u16 isr;
- __u8 cpu = smp_processor_id();
-
- local_irq_save(flags);
- isr = vic_read_isr();
- if ((isr & (1 << (cpi & 7))) == 0) {
- printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi);
- }
-#endif
- /* send specific EOI; the two system interrupts have
- * bit 4 set for a separate vector but behave as the
- * corresponding 3 bit intr */
- outb_p(0x60 | (cpi & 7), 0x20);
-
-#ifdef VOYAGER_DEBUG
- if ((vic_read_isr() & (1 << (cpi & 7))) != 0) {
- printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi);
- }
- local_irq_restore(flags);
-#endif
-}
-
-/* cribbed with thanks from irq.c */
-#define __byte(x,y) (((unsigned char *)&(y))[x])
-#define cached_21(cpu) (__byte(0,vic_irq_mask[cpu]))
-#define cached_A1(cpu) (__byte(1,vic_irq_mask[cpu]))
-
-static unsigned int startup_vic_irq(unsigned int irq)
-{
- unmask_vic_irq(irq);
-
- return 0;
-}
-
-/* The enable and disable routines. This is where we run into
- * conflicting architectural philosophy. Fundamentally, the voyager
- * architecture does not expect to have to disable interrupts globally
- * (the IRQ controllers belong to each CPU). The processor masquerade
- * which is used to start the system shouldn't be used in a running OS
- * since it will cause great confusion if two separate CPUs drive to
- * the same IRQ controller (I know, I've tried it).
- *
- * The solution is a variant on the NCR lazy SPL design:
- *
- * 1) To disable an interrupt, do nothing (other than set the
- * IRQ_DISABLED flag). This dares the interrupt actually to arrive.
- *
- * 2) If the interrupt dares to come in, raise the local mask against
- * it (this will result in all the CPU masks being raised
- * eventually).
- *
- * 3) To enable the interrupt, lower the mask on the local CPU and
- * broadcast an Interrupt enable CPI which causes all other CPUs to
- * adjust their masks accordingly. */
-
-static void unmask_vic_irq(unsigned int irq)
-{
- /* linux doesn't to processor-irq affinity, so enable on
- * all CPUs we know about */
- int cpu = smp_processor_id(), real_cpu;
- __u16 mask = (1 << irq);
- __u32 processorList = 0;
- unsigned long flags;
-
- VDEBUG(("VOYAGER: unmask_vic_irq(%d) CPU%d affinity 0x%lx\n",
- irq, cpu, cpu_irq_affinity[cpu]));
- spin_lock_irqsave(&vic_irq_lock, flags);
- for_each_online_cpu(real_cpu) {
- if (!(voyager_extended_vic_processors & (1 << real_cpu)))
- continue;
- if (!(cpu_irq_affinity[real_cpu] & mask)) {
- /* irq has no affinity for this CPU, ignore */
- continue;
- }
- if (real_cpu == cpu) {
- enable_local_vic_irq(irq);
- } else if (vic_irq_mask[real_cpu] & mask) {
- vic_irq_enable_mask[real_cpu] |= mask;
- processorList |= (1 << real_cpu);
- }
- }
- spin_unlock_irqrestore(&vic_irq_lock, flags);
- if (processorList)
- send_CPI(processorList, VIC_ENABLE_IRQ_CPI);
-}
-
-static void mask_vic_irq(unsigned int irq)
-{
- /* lazy disable, do nothing */
-}
-
-static void enable_local_vic_irq(unsigned int irq)
-{
- __u8 cpu = smp_processor_id();
- __u16 mask = ~(1 << irq);
- __u16 old_mask = vic_irq_mask[cpu];
-
- vic_irq_mask[cpu] &= mask;
- if (vic_irq_mask[cpu] == old_mask)
- return;
-
- VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n",
- irq, cpu));
-
- if (irq & 8) {
- outb_p(cached_A1(cpu), 0xA1);
- (void)inb_p(0xA1);
- } else {
- outb_p(cached_21(cpu), 0x21);
- (void)inb_p(0x21);
- }
-}
-
-static void disable_local_vic_irq(unsigned int irq)
-{
- __u8 cpu = smp_processor_id();
- __u16 mask = (1 << irq);
- __u16 old_mask = vic_irq_mask[cpu];
-
- if (irq == 7)
- return;
-
- vic_irq_mask[cpu] |= mask;
- if (old_mask == vic_irq_mask[cpu])
- return;
-
- VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n",
- irq, cpu));
-
- if (irq & 8) {
- outb_p(cached_A1(cpu), 0xA1);
- (void)inb_p(0xA1);
- } else {
- outb_p(cached_21(cpu), 0x21);
- (void)inb_p(0x21);
- }
-}
-
-/* The VIC is level triggered, so the ack can only be issued after the
- * interrupt completes. However, we do Voyager lazy interrupt
- * handling here: It is an extremely expensive operation to mask an
- * interrupt in the vic, so we merely set a flag (IRQ_DISABLED). If
- * this interrupt actually comes in, then we mask and ack here to push
- * the interrupt off to another CPU */
-static void before_handle_vic_irq(unsigned int irq)
-{
- irq_desc_t *desc = irq_to_desc(irq);
- __u8 cpu = smp_processor_id();
-
- _raw_spin_lock(&vic_irq_lock);
- vic_intr_total++;
- vic_intr_count[cpu]++;
-
- if (!(cpu_irq_affinity[cpu] & (1 << irq))) {
- /* The irq is not in our affinity mask, push it off
- * onto another CPU */
- VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d "
- "on cpu %d\n", irq, cpu));
- disable_local_vic_irq(irq);
- /* set IRQ_INPROGRESS to prevent the handler in irq.c from
- * actually calling the interrupt routine */
- desc->status |= IRQ_REPLAY | IRQ_INPROGRESS;
- } else if (desc->status & IRQ_DISABLED) {
- /* Damn, the interrupt actually arrived, do the lazy
- * disable thing. The interrupt routine in irq.c will
- * not handle a IRQ_DISABLED interrupt, so nothing more
- * need be done here */
- VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n",
- irq, cpu));
- disable_local_vic_irq(irq);
- desc->status |= IRQ_REPLAY;
- } else {
- desc->status &= ~IRQ_REPLAY;
- }
-
- _raw_spin_unlock(&vic_irq_lock);
-}
-
-/* Finish the VIC interrupt: basically mask */
-static void after_handle_vic_irq(unsigned int irq)
-{
- irq_desc_t *desc = irq_to_desc(irq);
-
- _raw_spin_lock(&vic_irq_lock);
- {
- unsigned int status = desc->status & ~IRQ_INPROGRESS;
-#ifdef VOYAGER_DEBUG
- __u16 isr;
-#endif
-
- desc->status = status;
- if ((status & IRQ_DISABLED))
- disable_local_vic_irq(irq);
-#ifdef VOYAGER_DEBUG
- /* DEBUG: before we ack, check what's in progress */
- isr = vic_read_isr();
- if ((isr & (1 << irq) && !(status & IRQ_REPLAY)) == 0) {
- int i;
- __u8 cpu = smp_processor_id();
- __u8 real_cpu;
- int mask; /* Um... initialize me??? --RR */
-
- printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
- cpu, irq);
- for_each_possible_cpu(real_cpu, mask) {
-
- outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
- VIC_PROCESSOR_ID);
- isr = vic_read_isr();
- if (isr & (1 << irq)) {
- printk
- ("VOYAGER SMP: CPU%d ack irq %d\n",
- real_cpu, irq);
- ack_vic_irq(irq);
- }
- outb(cpu, VIC_PROCESSOR_ID);
- }
- }
-#endif /* VOYAGER_DEBUG */
- /* as soon as we ack, the interrupt is eligible for
- * receipt by another CPU so everything must be in
- * order here */
- ack_vic_irq(irq);
- if (status & IRQ_REPLAY) {
- /* replay is set if we disable the interrupt
- * in the before_handle_vic_irq() routine, so
- * clear the in progress bit here to allow the
- * next CPU to handle this correctly */
- desc->status &= ~(IRQ_REPLAY | IRQ_INPROGRESS);
- }
-#ifdef VOYAGER_DEBUG
- isr = vic_read_isr();
- if ((isr & (1 << irq)) != 0)
- printk("VOYAGER SMP: after_handle_vic_irq() after "
- "ack irq=%d, isr=0x%x\n", irq, isr);
-#endif /* VOYAGER_DEBUG */
- }
- _raw_spin_unlock(&vic_irq_lock);
-
- /* All code after this point is out of the main path - the IRQ
- * may be intercepted by another CPU if reasserted */
-}
-
-/* Linux processor - interrupt affinity manipulations.
- *
- * For each processor, we maintain a 32 bit irq affinity mask.
- * Initially it is set to all 1's so every processor accepts every
- * interrupt. In this call, we change the processor's affinity mask:
- *
- * Change from enable to disable:
- *
- * If the interrupt ever comes in to the processor, we will disable it
- * and ack it to push it off to another CPU, so just accept the mask here.
- *
- * Change from disable to enable:
- *
- * change the mask and then do an interrupt enable CPI to re-enable on
- * the selected processors */
-
-void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask)
-{
- /* Only extended processors handle interrupts */
- unsigned long real_mask;
- unsigned long irq_mask = 1 << irq;
- int cpu;
-
- real_mask = cpus_addr(*mask)[0] & voyager_extended_vic_processors;
-
- if (cpus_addr(*mask)[0] == 0)
- /* can't have no CPUs to accept the interrupt -- extremely
- * bad things will happen */
- return;
-
- if (irq == 0)
- /* can't change the affinity of the timer IRQ. This
- * is due to the constraint in the voyager
- * architecture that the CPI also comes in on and IRQ
- * line and we have chosen IRQ0 for this. If you
- * raise the mask on this interrupt, the processor
- * will no-longer be able to accept VIC CPIs */
- return;
-
- if (irq >= 32)
- /* You can only have 32 interrupts in a voyager system
- * (and 32 only if you have a secondary microchannel
- * bus) */
- return;
-
- for_each_online_cpu(cpu) {
- unsigned long cpu_mask = 1 << cpu;
-
- if (cpu_mask & real_mask) {
- /* enable the interrupt for this cpu */
- cpu_irq_affinity[cpu] |= irq_mask;
- } else {
- /* disable the interrupt for this cpu */
- cpu_irq_affinity[cpu] &= ~irq_mask;
- }
- }
- /* this is magic, we now have the correct affinity maps, so
- * enable the interrupt. This will send an enable CPI to
- * those CPUs who need to enable it in their local masks,
- * causing them to correct for the new affinity . If the
- * interrupt is currently globally disabled, it will simply be
- * disabled again as it comes in (voyager lazy disable). If
- * the affinity map is tightened to disable the interrupt on a
- * cpu, it will be pushed off when it comes in */
- unmask_vic_irq(irq);
-}
-
-static void ack_vic_irq(unsigned int irq)
-{
- if (irq & 8) {
- outb(0x62, 0x20); /* Specific EOI to cascade */
- outb(0x60 | (irq & 7), 0xA0);
- } else {
- outb(0x60 | (irq & 7), 0x20);
- }
-}
-
-/* enable the CPIs. In the VIC, the CPIs are delivered by the 8259
- * but are not vectored by it. This means that the 8259 mask must be
- * lowered to receive them */
-static __init void vic_enable_cpi(void)
-{
- __u8 cpu = smp_processor_id();
-
- /* just take a copy of the current mask (nop for boot cpu) */
- vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id];
-
- enable_local_vic_irq(VIC_CPI_LEVEL0);
- enable_local_vic_irq(VIC_CPI_LEVEL1);
- /* for sys int and cmn int */
- enable_local_vic_irq(7);
-
- if (is_cpu_quad()) {
- outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
- outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
- VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n",
- cpu, QIC_CPI_ENABLE));
- }
-
- VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n",
- cpu, vic_irq_mask[cpu]));
-}
-
-void voyager_smp_dump()
-{
- int old_cpu = smp_processor_id(), cpu;
-
- /* dump the interrupt masks of each processor */
- for_each_online_cpu(cpu) {
- __u16 imr, isr, irr;
- unsigned long flags;
-
- local_irq_save(flags);
- outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
- imr = (inb(0xa1) << 8) | inb(0x21);
- outb(0x0a, 0xa0);
- irr = inb(0xa0) << 8;
- outb(0x0a, 0x20);
- irr |= inb(0x20);
- outb(0x0b, 0xa0);
- isr = inb(0xa0) << 8;
- outb(0x0b, 0x20);
- isr |= inb(0x20);
- outb(old_cpu, VIC_PROCESSOR_ID);
- local_irq_restore(flags);
- printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n",
- cpu, vic_irq_mask[cpu], imr, irr, isr);
-#if 0
- /* These lines are put in to try to unstick an un ack'd irq */
- if (isr != 0) {
- int irq;
- for (irq = 0; irq < 16; irq++) {
- if (isr & (1 << irq)) {
- printk("\tCPU%d: ack irq %d\n",
- cpu, irq);
- local_irq_save(flags);
- outb(VIC_CPU_MASQUERADE_ENABLE | cpu,
- VIC_PROCESSOR_ID);
- ack_vic_irq(irq);
- outb(old_cpu, VIC_PROCESSOR_ID);
- local_irq_restore(flags);
- }
- }
- }
-#endif
- }
-}
-
-void smp_voyager_power_off(void *dummy)
-{
- if (smp_processor_id() == boot_cpu_id)
- voyager_power_off();
- else
- smp_stop_cpu_function(NULL);
-}
-
-static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
-{
- /* FIXME: ignore max_cpus for now */
- smp_boot_cpus();
-}
-
-static void __cpuinit voyager_smp_prepare_boot_cpu(void)
-{
- init_gdt(smp_processor_id());
- switch_to_new_gdt();
-
- cpu_online_map = cpumask_of_cpu(smp_processor_id());
- cpu_callout_map = cpumask_of_cpu(smp_processor_id());
- cpu_callin_map = CPU_MASK_NONE;
- cpu_present_map = cpumask_of_cpu(smp_processor_id());
-
-}
-
-static int __cpuinit voyager_cpu_up(unsigned int cpu)
-{
- /* This only works at boot for x86. See "rewrite" above. */
- if (cpu_isset(cpu, smp_commenced_mask))
- return -ENOSYS;
-
- /* In case one didn't come up */
- if (!cpu_isset(cpu, cpu_callin_map))
- return -EIO;
- /* Unleash the CPU! */
- cpu_set(cpu, smp_commenced_mask);
- while (!cpu_online(cpu))
- mb();
- return 0;
-}
-
-static void __init voyager_smp_cpus_done(unsigned int max_cpus)
-{
- zap_low_mappings();
-}
-
-void __init smp_setup_processor_id(void)
-{
- current_thread_info()->cpu = hard_smp_processor_id();
- x86_write_percpu(cpu_number, hard_smp_processor_id());
-}
-
-static void voyager_send_call_func(const struct cpumask *callmask)
-{
- __u32 mask = cpus_addr(*callmask)[0] & ~(1 << smp_processor_id());
- send_CPI(mask, VIC_CALL_FUNCTION_CPI);
-}
-
-static void voyager_send_call_func_single(int cpu)
-{
- send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI);
-}
-
-struct smp_ops smp_ops = {
- .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
- .smp_prepare_cpus = voyager_smp_prepare_cpus,
- .cpu_up = voyager_cpu_up,
- .smp_cpus_done = voyager_smp_cpus_done,
-
- .smp_send_stop = voyager_smp_send_stop,
- .smp_send_reschedule = voyager_smp_send_reschedule,
-
- .send_call_func_ipi = voyager_send_call_func,
- .send_call_func_single_ipi = voyager_send_call_func_single,
-};
diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c
deleted file mode 100644
index 15464a2..0000000
--- a/arch/x86/mach-voyager/voyager_thread.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8 -*- */
-
-/* Copyright (C) 2001
- *
- * Author: J.E.J.Bottomley@HansenPartnership.com
- *
- * This module provides the machine status monitor thread for the
- * voyager architecture. This allows us to monitor the machine
- * environment (temp, voltage, fan function) and the front panel and
- * internal UPS. If a fault is detected, this thread takes corrective
- * action (usually just informing init)
- * */
-
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/kernel_stat.h>
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/kmod.h>
-#include <linux/completion.h>
-#include <linux/sched.h>
-#include <linux/kthread.h>
-#include <asm/desc.h>
-#include <asm/voyager.h>
-#include <asm/vic.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-
-struct task_struct *voyager_thread;
-static __u8 set_timeout;
-
-static int execute(const char *string)
-{
- int ret;
-
- char *envp[] = {
- "HOME=/",
- "TERM=linux",
- "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
- NULL,
- };
- char *argv[] = {
- "/bin/bash",
- "-c",
- (char *)string,
- NULL,
- };
-
- if ((ret =
- call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
- printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string,
- ret);
- }
- return ret;
-}
-
-static void check_from_kernel(void)
-{
- if (voyager_status.switch_off) {
-
- /* FIXME: This should be configurable via proc */
- execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1");
- } else if (voyager_status.power_fail) {
- VDEBUG(("Voyager daemon detected AC power failure\n"));
-
- /* FIXME: This should be configureable via proc */
- execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1");
- set_timeout = 1;
- }
-}
-
-static void check_continuing_condition(void)
-{
- if (voyager_status.power_fail) {
- __u8 data;
- voyager_cat_psi(VOYAGER_PSI_SUBREAD,
- VOYAGER_PSI_AC_FAIL_REG, &data);
- if ((data & 0x1f) == 0) {
- /* all power restored */
- printk(KERN_NOTICE
- "VOYAGER AC power restored, cancelling shutdown\n");
- /* FIXME: should be user configureable */
- execute
- ("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
- set_timeout = 0;
- }
- }
-}
-
-static int thread(void *unused)
-{
- printk(KERN_NOTICE "Voyager starting monitor thread\n");
-
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT);
-
- VDEBUG(("Voyager Daemon awoken\n"));
- if (voyager_status.request_from_kernel == 0) {
- /* probably awoken from timeout */
- check_continuing_condition();
- } else {
- check_from_kernel();
- voyager_status.request_from_kernel = 0;
- }
- }
-}
-
-static int __init voyager_thread_start(void)
-{
- voyager_thread = kthread_run(thread, NULL, "kvoyagerd");
- if (IS_ERR(voyager_thread)) {
- printk(KERN_ERR
- "Voyager: Failed to create system monitor thread.\n");
- return PTR_ERR(voyager_thread);
- }
- return 0;
-}
-
-static void __exit voyager_thread_stop(void)
-{
- kthread_stop(voyager_thread);
-}
-
-module_init(voyager_thread_start);
-module_exit(voyager_thread_stop);
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index 420b3b6..6ef5e99 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -150,11 +150,9 @@
#endif /* PARANOID */
switch (segment) {
- /* gs isn't used by the kernel, so it still has its
- user-space value. */
case PREFIX_GS_ - 1:
- /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
- savesegment(gs, addr->selector);
+ /* user gs handling can be lazy, use special accessors */
+ addr->selector = get_user_gs(FPU_info->regs);
break;
default:
addr->selector = PM_REG_(segment);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index d8cc96a2..2b938a3 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,6 +1,8 @@
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o gup.o
+obj-$(CONFIG_SMP) += tlb.o
+
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 7e8db53..61b41ca 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -23,6 +23,12 @@
fixup = search_exception_tables(regs->ip);
if (fixup) {
+ /* If fixup is less than 16, it means uaccess error */
+ if (fixup->fixup < 16) {
+ current_thread_info()->uaccess_err = -EFAULT;
+ regs->ip += fixup->fixup;
+ return 1;
+ }
regs->ip = fixup->fixup;
return 1;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c76ef1d..a03b727 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1,73 +1,79 @@
/*
* Copyright (C) 1995 Linus Torvalds
- * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
+ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
+ * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
*/
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mmiotrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/vt_kern.h> /* For unblank_screen() */
+#include <linux/mmiotrace.h>
+#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/highmem.h>
-#include <linux/bootmem.h> /* for max_low_pfn */
-#include <linux/vmalloc.h>
-#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/vt_kern.h>
+#include <linux/signal.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/module.h>
#include <linux/kdebug.h>
+#include <linux/errno.h>
+#include <linux/magic.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/tty.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
-#include <asm/system.h>
-#include <asm/desc.h>
-#include <asm/segment.h>
-#include <asm/pgalloc.h>
-#include <asm/smp.h>
-#include <asm/tlbflush.h>
-#include <asm/proto.h>
#include <asm-generic/sections.h>
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <asm/proto.h>
#include <asm/traps.h>
+#include <asm/desc.h>
/*
- * Page fault error code bits
- * bit 0 == 0 means no page found, 1 means protection fault
- * bit 1 == 0 means read, 1 means write
- * bit 2 == 0 means kernel, 1 means user-mode
- * bit 3 == 1 means use of reserved bit detected
- * bit 4 == 1 means fault was an instruction fetch
+ * Page fault error code bits:
+ *
+ * bit 0 == 0: no page found 1: protection fault
+ * bit 1 == 0: read access 1: write access
+ * bit 2 == 0: kernel-mode access 1: user-mode access
+ * bit 3 == 1: use of reserved bit detected
+ * bit 4 == 1: fault was an instruction fetch
*/
-#define PF_PROT (1<<0)
-#define PF_WRITE (1<<1)
-#define PF_USER (1<<2)
-#define PF_RSVD (1<<3)
-#define PF_INSTR (1<<4)
+enum x86_pf_error_code {
+ PF_PROT = 1 << 0,
+ PF_WRITE = 1 << 1,
+ PF_USER = 1 << 2,
+ PF_RSVD = 1 << 3,
+ PF_INSTR = 1 << 4,
+};
+
+/*
+ * Returns 0 if mmiotrace is disabled, or if the fault is not
+ * handled by mmiotrace:
+ */
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
-#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
-#endif
return 0;
}
static inline int notify_page_fault(struct pt_regs *regs)
{
-#ifdef CONFIG_KPROBES
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
- if (!user_mode_vm(regs)) {
+ if (kprobes_built_in() && !user_mode_vm(regs)) {
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
@@ -75,29 +81,76 @@
}
return ret;
-#else
- return 0;
-#endif
}
/*
- * X86_32
- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
- * Check that here and ignore it.
+ * Prefetch quirks:
*
- * X86_64
- * Sometimes the CPU reports invalid exceptions on prefetch.
- * Check that here and ignore it.
+ * 32-bit mode:
*
- * Opcode checker based on code by Richard Brunner
+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * 64-bit mode:
+ *
+ * Sometimes the CPU reports invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * Opcode checker based on code by Richard Brunner.
*/
-static int is_prefetch(struct pt_regs *regs, unsigned long addr,
- unsigned long error_code)
+static inline int
+check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
+ unsigned char opcode, int *prefetch)
{
- unsigned char *instr;
- int scan_more = 1;
- int prefetch = 0;
+ unsigned char instr_hi = opcode & 0xf0;
+ unsigned char instr_lo = opcode & 0x0f;
+
+ switch (instr_hi) {
+ case 0x20:
+ case 0x30:
+ /*
+ * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
+ * In X86_64 long mode, the CPU will signal invalid
+ * opcode if some of these prefixes are present so
+ * X86_64 will never get here anyway
+ */
+ return ((instr_lo & 7) == 0x6);
+#ifdef CONFIG_X86_64
+ case 0x40:
+ /*
+ * In AMD64 long mode 0x40..0x4F are valid REX prefixes
+ * Need to figure out under what instruction mode the
+ * instruction was issued. Could check the LDT for lm,
+ * but for now it's good enough to assume that long
+ * mode only uses well known segments or kernel.
+ */
+ return (!user_mode(regs)) || (regs->cs == __USER_CS);
+#endif
+ case 0x60:
+ /* 0x64 thru 0x67 are valid prefixes in all modes. */
+ return (instr_lo & 0xC) == 0x4;
+ case 0xF0:
+ /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
+ return !instr_lo || (instr_lo>>1) == 1;
+ case 0x00:
+ /* Prefetch instruction is 0x0F0D or 0x0F18 */
+ if (probe_kernel_address(instr, opcode))
+ return 0;
+
+ *prefetch = (instr_lo == 0xF) &&
+ (opcode == 0x0D || opcode == 0x18);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static int
+is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
+{
unsigned char *max_instr;
+ unsigned char *instr;
+ int prefetch = 0;
/*
* If it was a exec (instruction fetch) fault on NX page, then
@@ -106,164 +159,42 @@
if (error_code & PF_INSTR)
return 0;
- instr = (unsigned char *)convert_ip_to_linear(current, regs);
+ instr = (void *)convert_ip_to_linear(current, regs);
max_instr = instr + 15;
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
return 0;
- while (scan_more && instr < max_instr) {
+ while (instr < max_instr) {
unsigned char opcode;
- unsigned char instr_hi;
- unsigned char instr_lo;
if (probe_kernel_address(instr, opcode))
break;
- instr_hi = opcode & 0xf0;
- instr_lo = opcode & 0x0f;
instr++;
- switch (instr_hi) {
- case 0x20:
- case 0x30:
- /*
- * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
- * In X86_64 long mode, the CPU will signal invalid
- * opcode if some of these prefixes are present so
- * X86_64 will never get here anyway
- */
- scan_more = ((instr_lo & 7) == 0x6);
+ if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
break;
-#ifdef CONFIG_X86_64
- case 0x40:
- /*
- * In AMD64 long mode 0x40..0x4F are valid REX prefixes
- * Need to figure out under what instruction mode the
- * instruction was issued. Could check the LDT for lm,
- * but for now it's good enough to assume that long
- * mode only uses well known segments or kernel.
- */
- scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
- break;
-#endif
- case 0x60:
- /* 0x64 thru 0x67 are valid prefixes in all modes. */
- scan_more = (instr_lo & 0xC) == 0x4;
- break;
- case 0xF0:
- /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
- scan_more = !instr_lo || (instr_lo>>1) == 1;
- break;
- case 0x00:
- /* Prefetch instruction is 0x0F0D or 0x0F18 */
- scan_more = 0;
-
- if (probe_kernel_address(instr, opcode))
- break;
- prefetch = (instr_lo == 0xF) &&
- (opcode == 0x0D || opcode == 0x18);
- break;
- default:
- scan_more = 0;
- break;
- }
}
return prefetch;
}
-static void force_sig_info_fault(int si_signo, int si_code,
- unsigned long address, struct task_struct *tsk)
+static void
+force_sig_info_fault(int si_signo, int si_code, unsigned long address,
+ struct task_struct *tsk)
{
siginfo_t info;
- info.si_signo = si_signo;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void __user *)address;
+ info.si_signo = si_signo;
+ info.si_errno = 0;
+ info.si_code = si_code;
+ info.si_addr = (void __user *)address;
+
force_sig_info(si_signo, &info, tsk);
}
-#ifdef CONFIG_X86_64
-static int bad_address(void *p)
-{
- unsigned long dummy;
- return probe_kernel_address((unsigned long *)p, dummy);
-}
-#endif
-
-static void dump_pagetable(unsigned long address)
-{
-#ifdef CONFIG_X86_32
- __typeof__(pte_val(__pte(0))) page;
-
- page = read_cr3();
- page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
-#ifdef CONFIG_X86_PAE
- printk("*pdpt = %016Lx ", page);
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && page & _PAGE_PRESENT) {
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
- & (PTRS_PER_PMD - 1)];
- printk(KERN_CONT "*pde = %016Lx ", page);
- page &= ~_PAGE_NX;
- }
-#else
- printk("*pde = %08lx ", page);
-#endif
-
- /*
- * We must not directly access the pte in the highpte
- * case if the page table is located in highmem.
- * And let's rather not kmap-atomic the pte, just in case
- * it's allocated already.
- */
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && (page & _PAGE_PRESENT)
- && !(page & _PAGE_PSE)) {
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
- & (PTRS_PER_PTE - 1)];
- printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
- }
-
- printk("\n");
-#else /* CONFIG_X86_64 */
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = (pgd_t *)read_cr3();
-
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
- pgd += pgd_index(address);
- if (bad_address(pgd)) goto bad;
- printk("PGD %lx ", pgd_val(*pgd));
- if (!pgd_present(*pgd)) goto ret;
-
- pud = pud_offset(pgd, address);
- if (bad_address(pud)) goto bad;
- printk("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud) || pud_large(*pud))
- goto ret;
-
- pmd = pmd_offset(pud, address);
- if (bad_address(pmd)) goto bad;
- printk("PMD %lx ", pmd_val(*pmd));
- if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
-
- pte = pte_offset_kernel(pmd, address);
- if (bad_address(pte)) goto bad;
- printk("PTE %lx", pte_val(*pte));
-ret:
- printk("\n");
- return;
-bad:
- printk("BAD\n");
-#endif
-}
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
#ifdef CONFIG_X86_32
static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
@@ -284,7 +215,6 @@
* and redundant with the set_pmd() on non-PAE. As would
* set_pud.
*/
-
pud = pud_offset(pgd, address);
pud_k = pud_offset(pgd_k, address);
if (!pud_present(*pud_k))
@@ -294,46 +224,334 @@
pmd_k = pmd_offset(pud_k, address);
if (!pmd_present(*pmd_k))
return NULL;
+
if (!pmd_present(*pmd)) {
set_pmd(pmd, *pmd_k);
arch_flush_lazy_mmu_mode();
- } else
+ } else {
BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+ }
+
return pmd_k;
}
+
+void vmalloc_sync_all(void)
+{
+ unsigned long address;
+
+ if (SHARED_KERNEL_PMD)
+ return;
+
+ for (address = VMALLOC_START & PMD_MASK;
+ address >= TASK_SIZE && address < FIXADDR_TOP;
+ address += PMD_SIZE) {
+
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each_entry(page, &pgd_list, lru) {
+ if (!vmalloc_sync_one(page_address(page), address))
+ break;
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+
+/*
+ * 32-bit:
+ *
+ * Handle a fault on the vmalloc or module mapping area
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+
+ /* Make sure we are in vmalloc area: */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "current" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ pgd_paddr = read_cr3();
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+ if (!pmd_k)
+ return -1;
+
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Did it hit the DOS screen memory VA from vm86 mode?
+ */
+static inline void
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
+ struct task_struct *tsk)
+{
+ unsigned long bit;
+
+ if (!v8086_mode(regs))
+ return;
+
+ bit = (address - 0xA0000) >> PAGE_SHIFT;
+ if (bit < 32)
+ tsk->thread.screen_bitmap |= 1 << bit;
+}
+
+static void dump_pagetable(unsigned long address)
+{
+ __typeof__(pte_val(__pte(0))) page;
+
+ page = read_cr3();
+ page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+
+#ifdef CONFIG_X86_PAE
+ printk("*pdpt = %016Lx ", page);
+ if ((page >> PAGE_SHIFT) < max_low_pfn
+ && page & _PAGE_PRESENT) {
+ page &= PAGE_MASK;
+ page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
+ & (PTRS_PER_PMD - 1)];
+ printk(KERN_CONT "*pde = %016Lx ", page);
+ page &= ~_PAGE_NX;
+ }
+#else
+ printk("*pde = %08lx ", page);
#endif
-#ifdef CONFIG_X86_64
+ /*
+ * We must not directly access the pte in the highpte
+ * case if the page table is located in highmem.
+ * And let's rather not kmap-atomic the pte, just in case
+ * it's allocated already:
+ */
+ if ((page >> PAGE_SHIFT) < max_low_pfn
+ && (page & _PAGE_PRESENT)
+ && !(page & _PAGE_PSE)) {
+
+ page &= PAGE_MASK;
+ page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
+ & (PTRS_PER_PTE - 1)];
+ printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
+ }
+
+ printk("\n");
+}
+
+#else /* CONFIG_X86_64: */
+
+void vmalloc_sync_all(void)
+{
+ unsigned long address;
+
+ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
+ address += PGDIR_SIZE) {
+
+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
+ struct page *page;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+
+/*
+ * 64-bit:
+ *
+ * Handle a fault on the vmalloc area
+ *
+ * This assumes no large pages in there.
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+ pgd_t *pgd, *pgd_ref;
+ pud_t *pud, *pud_ref;
+ pmd_t *pmd, *pmd_ref;
+ pte_t *pte, *pte_ref;
+
+ /* Make sure we are in vmalloc area: */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Copy kernel mappings over when needed. This can also
+ * happen within a race in page table update. In the later
+ * case just flush:
+ */
+ pgd = pgd_offset(current->active_mm, address);
+ pgd_ref = pgd_offset_k(address);
+ if (pgd_none(*pgd_ref))
+ return -1;
+
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+
+ /*
+ * Below here mismatches are bugs because these lower tables
+ * are shared:
+ */
+
+ pud = pud_offset(pgd, address);
+ pud_ref = pud_offset(pgd_ref, address);
+ if (pud_none(*pud_ref))
+ return -1;
+
+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+ BUG();
+
+ pmd = pmd_offset(pud, address);
+ pmd_ref = pmd_offset(pud_ref, address);
+ if (pmd_none(*pmd_ref))
+ return -1;
+
+ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+ BUG();
+
+ pte_ref = pte_offset_kernel(pmd_ref, address);
+ if (!pte_present(*pte_ref))
+ return -1;
+
+ pte = pte_offset_kernel(pmd, address);
+
+ /*
+ * Don't use pte_page here, because the mappings can point
+ * outside mem_map, and the NUMA hash lookup cannot handle
+ * that:
+ */
+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
+ BUG();
+
+ return 0;
+}
+
static const char errata93_warning[] =
KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
KERN_ERR "******* Please consider a BIOS update.\n"
KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
-#endif
-/* Workaround for K8 erratum #93 & buggy BIOS.
- BIOS SMM functions are required to use a specific workaround
- to avoid corruption of the 64bit RIP register on C stepping K8.
- A lot of BIOS that didn't get tested properly miss this.
- The OS sees this as a page fault with the upper 32bits of RIP cleared.
- Try to work around it here.
- Note we only handle faults in kernel here.
- Does nothing for X86_32
+/*
+ * No vm86 mode in 64-bit mode:
+ */
+static inline void
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
+ struct task_struct *tsk)
+{
+}
+
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+static void dump_pagetable(unsigned long address)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = (pgd_t *)read_cr3();
+
+ pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
+
+ pgd += pgd_index(address);
+ if (bad_address(pgd))
+ goto bad;
+
+ printk("PGD %lx ", pgd_val(*pgd));
+
+ if (!pgd_present(*pgd))
+ goto out;
+
+ pud = pud_offset(pgd, address);
+ if (bad_address(pud))
+ goto bad;
+
+ printk("PUD %lx ", pud_val(*pud));
+ if (!pud_present(*pud) || pud_large(*pud))
+ goto out;
+
+ pmd = pmd_offset(pud, address);
+ if (bad_address(pmd))
+ goto bad;
+
+ printk("PMD %lx ", pmd_val(*pmd));
+ if (!pmd_present(*pmd) || pmd_large(*pmd))
+ goto out;
+
+ pte = pte_offset_kernel(pmd, address);
+ if (bad_address(pte))
+ goto bad;
+
+ printk("PTE %lx", pte_val(*pte));
+out:
+ printk("\n");
+ return;
+bad:
+ printk("BAD\n");
+}
+
+#endif /* CONFIG_X86_64 */
+
+/*
+ * Workaround for K8 erratum #93 & buggy BIOS.
+ *
+ * BIOS SMM functions are required to use a specific workaround
+ * to avoid corruption of the 64bit RIP register on C stepping K8.
+ *
+ * A lot of BIOS that didn't get tested properly miss this.
+ *
+ * The OS sees this as a page fault with the upper 32bits of RIP cleared.
+ * Try to work around it here.
+ *
+ * Note we only handle faults in kernel here.
+ * Does nothing on 32-bit.
*/
static int is_errata93(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_64
- static int warned;
+ static int once;
+
if (address != regs->ip)
return 0;
+
if ((address >> 32) != 0)
return 0;
+
address |= 0xffffffffUL << 32;
if ((address >= (u64)_stext && address <= (u64)_etext) ||
(address >= MODULES_VADDR && address <= MODULES_END)) {
- if (!warned) {
+ if (!once) {
printk(errata93_warning);
- warned = 1;
+ once = 1;
}
regs->ip = address;
return 1;
@@ -343,16 +561,17 @@
}
/*
- * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal
- * addresses >4GB. We catch this in the page fault handler because these
- * addresses are not reachable. Just detect this case and return. Any code
+ * Work around K8 erratum #100 K8 in compat mode occasionally jumps
+ * to illegal addresses >4GB.
+ *
+ * We catch this in the page fault handler because these addresses
+ * are not reachable. Just detect this case and return. Any code
* segment in LDT is compatibility mode.
*/
static int is_errata100(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_64
- if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
- (address >> 32))
+ if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32))
return 1;
#endif
return 0;
@@ -362,8 +581,9 @@
{
#ifdef CONFIG_X86_F00F_BUG
unsigned long nr;
+
/*
- * Pentium F0 0F C7 C8 bug workaround.
+ * Pentium F0 0F C7 C8 bug workaround:
*/
if (boot_cpu_data.f00f_bug) {
nr = (address - idt_descr.address) >> 3;
@@ -377,62 +597,277 @@
return 0;
}
-static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
+static const char nx_warning[] = KERN_CRIT
+"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
+
+static void
+show_fault_oops(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
-#ifdef CONFIG_X86_32
if (!oops_may_print())
return;
-#endif
-#ifdef CONFIG_X86_PAE
if (error_code & PF_INSTR) {
unsigned int level;
+
pte_t *pte = lookup_address(address, &level);
if (pte && pte_present(*pte) && !pte_exec(*pte))
- printk(KERN_CRIT "kernel tried to execute "
- "NX-protected page - exploit attempt? "
- "(uid: %d)\n", current_uid());
+ printk(nx_warning, current_uid());
}
-#endif
printk(KERN_ALERT "BUG: unable to handle kernel ");
if (address < PAGE_SIZE)
printk(KERN_CONT "NULL pointer dereference");
else
printk(KERN_CONT "paging request");
+
printk(KERN_CONT " at %p\n", (void *) address);
printk(KERN_ALERT "IP:");
printk_address(regs->ip, 1);
+
dump_pagetable(address);
}
-#ifdef CONFIG_X86_64
-static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
- unsigned long error_code)
+static noinline void
+pgtable_bad(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
- unsigned long flags = oops_begin();
- int sig = SIGKILL;
struct task_struct *tsk;
+ unsigned long flags;
+ int sig;
+
+ flags = oops_begin();
+ tsk = current;
+ sig = SIGKILL;
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
- current->comm, address);
+ tsk->comm, address);
dump_pagetable(address);
- tsk = current;
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
+
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+
if (__die("Bad pagetable", regs, error_code))
sig = 0;
+
oops_end(flags, regs, sig);
}
-#endif
+
+static noinline void
+no_context(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+{
+ struct task_struct *tsk = current;
+ unsigned long *stackend;
+ unsigned long flags;
+ int sig;
+
+ /* Are we prepared to handle this kernel fault? */
+ if (fixup_exception(regs))
+ return;
+
+ /*
+ * 32-bit:
+ *
+ * Valid to do another page fault here, because if this fault
+ * had been triggered by is_prefetch fixup_exception would have
+ * handled it.
+ *
+ * 64-bit:
+ *
+ * Hall of shame of CPU/BIOS bugs.
+ */
+ if (is_prefetch(regs, error_code, address))
+ return;
+
+ if (is_errata93(regs, address))
+ return;
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice:
+ */
+ flags = oops_begin();
+
+ show_fault_oops(regs, error_code, address);
+
+ stackend = end_of_stack(tsk);
+ if (*stackend != STACK_END_MAGIC)
+ printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+
+ sig = SIGKILL;
+ if (__die("Oops", regs, error_code))
+ sig = 0;
+
+ /* Executive summary in case the body of the oops scrolled away */
+ printk(KERN_EMERG "CR2: %016lx\n", address);
+
+ oops_end(flags, regs, sig);
+}
+
+/*
+ * Print out info about fatal segfaults, if the show_unhandled_signals
+ * sysctl is set:
+ */
+static inline void
+show_signal_msg(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, struct task_struct *tsk)
+{
+ if (!unhandled_signal(tsk, SIGSEGV))
+ return;
+
+ if (!printk_ratelimit())
+ return;
+
+ printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+ tsk->comm, task_pid_nr(tsk), address,
+ (void *)regs->ip, (void *)regs->sp, error_code);
+
+ print_vma_addr(KERN_CONT " in ", regs->ip);
+
+ printk(KERN_CONT "\n");
+}
+
+static void
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, int si_code)
+{
+ struct task_struct *tsk = current;
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & PF_USER) {
+ /*
+ * It's possible to have interrupts off here:
+ */
+ local_irq_enable();
+
+ /*
+ * Valid to do another page fault here because this one came
+ * from user space:
+ */
+ if (is_prefetch(regs, error_code, address))
+ return;
+
+ if (is_errata100(regs, address))
+ return;
+
+ if (unlikely(show_unhandled_signals))
+ show_signal_msg(regs, error_code, address, tsk);
+
+ /* Kernel addresses are always protection faults: */
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.trap_no = 14;
+
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+
+ return;
+ }
+
+ if (is_f00f_bug(regs, address))
+ return;
+
+ no_context(regs, error_code, address);
+}
+
+static noinline void
+bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+{
+ __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
+}
+
+static void
+__bad_area(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, int si_code)
+{
+ struct mm_struct *mm = current->mm;
+
+ /*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+ up_read(&mm->mmap_sem);
+
+ __bad_area_nosemaphore(regs, error_code, address, si_code);
+}
+
+static noinline void
+bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+{
+ __bad_area(regs, error_code, address, SEGV_MAPERR);
+}
+
+static noinline void
+bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+{
+ __bad_area(regs, error_code, address, SEGV_ACCERR);
+}
+
+/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
+static void
+out_of_memory(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+{
+ /*
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed):
+ */
+ up_read(¤t->mm->mmap_sem);
+
+ pagefault_out_of_memory();
+}
+
+static void
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+
+ up_read(&mm->mmap_sem);
+
+ /* Kernel mode? Handle exceptions or die: */
+ if (!(error_code & PF_USER))
+ no_context(regs, error_code, address);
+
+ /* User-space => ok to do another page fault: */
+ if (is_prefetch(regs, error_code, address))
+ return;
+
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 14;
+
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+}
+
+static noinline void
+mm_fault_error(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, unsigned int fault)
+{
+ if (fault & VM_FAULT_OOM) {
+ out_of_memory(regs, error_code, address);
+ } else {
+ if (fault & VM_FAULT_SIGBUS)
+ do_sigbus(regs, error_code, address);
+ else
+ BUG();
+ }
+}
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
{
if ((error_code & PF_WRITE) && !pte_write(*pte))
return 0;
+
if ((error_code & PF_INSTR) && !pte_exec(*pte))
return 0;
@@ -440,21 +875,25 @@
}
/*
- * Handle a spurious fault caused by a stale TLB entry. This allows
- * us to lazily refresh the TLB when increasing the permissions of a
- * kernel page (RO -> RW or NX -> X). Doing it eagerly is very
- * expensive since that implies doing a full cross-processor TLB
- * flush, even if no stale TLB entries exist on other processors.
+ * Handle a spurious fault caused by a stale TLB entry.
+ *
+ * This allows us to lazily refresh the TLB when increasing the
+ * permissions of a kernel page (RO -> RW or NX -> X). Doing it
+ * eagerly is very expensive since that implies doing a full
+ * cross-processor TLB flush, even if no stale TLB entries exist
+ * on other processors.
+ *
* There are no security implications to leaving a stale TLB when
* increasing the permissions on a page.
*/
-static int spurious_fault(unsigned long address,
- unsigned long error_code)
+static noinline int
+spurious_fault(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ int ret;
/* Reserved-bit violation or user access to kernel space? */
if (error_code & (PF_USER | PF_RSVD))
@@ -482,127 +921,71 @@
if (!pte_present(*pte))
return 0;
- return spurious_fault_check(error_code, pte);
-}
-
-/*
- * X86_32
- * Handle a fault on the vmalloc or module mapping area
- *
- * X86_64
- * Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
- */
-static int vmalloc_fault(unsigned long address)
-{
-#ifdef CONFIG_X86_32
- unsigned long pgd_paddr;
- pmd_t *pmd_k;
- pte_t *pte_k;
-
- /* Make sure we are in vmalloc area */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
+ ret = spurious_fault_check(error_code, pte);
+ if (!ret)
+ return 0;
/*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "current" here. We might be inside
- * an interrupt in the middle of a task switch..
+ * Make sure we have permissions in PMD.
+ * If not, then there's a bug in the page tables:
*/
- pgd_paddr = read_cr3();
- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
- if (!pmd_k)
- return -1;
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- return -1;
- return 0;
-#else
- pgd_t *pgd, *pgd_ref;
- pud_t *pud, *pud_ref;
- pmd_t *pmd, *pmd_ref;
- pte_t *pte, *pte_ref;
+ ret = spurious_fault_check(error_code, (pte_t *) pmd);
+ WARN_ONCE(!ret, "PMD has incorrect permission bits\n");
- /* Make sure we are in vmalloc area */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
-
- /* Copy kernel mappings over when needed. This can also
- happen within a race in page table update. In the later
- case just flush. */
-
- pgd = pgd_offset(current->active_mm, address);
- pgd_ref = pgd_offset_k(address);
- if (pgd_none(*pgd_ref))
- return -1;
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-
- /* Below here mismatches are bugs because these lower tables
- are shared */
-
- pud = pud_offset(pgd, address);
- pud_ref = pud_offset(pgd_ref, address);
- if (pud_none(*pud_ref))
- return -1;
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
- BUG();
- pmd = pmd_offset(pud, address);
- pmd_ref = pmd_offset(pud_ref, address);
- if (pmd_none(*pmd_ref))
- return -1;
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
- BUG();
- pte_ref = pte_offset_kernel(pmd_ref, address);
- if (!pte_present(*pte_ref))
- return -1;
- pte = pte_offset_kernel(pmd, address);
- /* Don't use pte_page here, because the mappings can point
- outside mem_map, and the NUMA hash lookup cannot handle
- that. */
- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
- BUG();
- return 0;
-#endif
+ return ret;
}
int show_unhandled_signals = 1;
+static inline int
+access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
+{
+ if (write) {
+ /* write, present and write, not present: */
+ if (unlikely(!(vma->vm_flags & VM_WRITE)))
+ return 1;
+ return 0;
+ }
+
+ /* read, present: */
+ if (unlikely(error_code & PF_PROT))
+ return 1;
+
+ /* read, not present: */
+ if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+ return 1;
+
+ return 0;
+}
+
+static int fault_in_kernel_space(unsigned long address)
+{
+ return address >= TASK_SIZE_MAX;
+}
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* routines.
*/
-#ifdef CONFIG_X86_64
-asmlinkage
-#endif
-void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
+dotraplinkage void __kprobes
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
- struct task_struct *tsk;
- struct mm_struct *mm;
struct vm_area_struct *vma;
+ struct task_struct *tsk;
unsigned long address;
- int write, si_code;
+ struct mm_struct *mm;
+ int write;
int fault;
-#ifdef CONFIG_X86_64
- unsigned long flags;
- int sig;
-#endif
tsk = current;
mm = tsk->mm;
+
prefetchw(&mm->mmap_sem);
- /* get the address */
+ /* Get the faulting address: */
address = read_cr2();
- si_code = SEGV_MAPERR;
-
if (unlikely(kmmio_fault(regs, address)))
return;
@@ -619,319 +1002,147 @@
* (error_code & 4) == 0, and that the fault was not a
* protection error (error_code & 9) == 0.
*/
-#ifdef CONFIG_X86_32
- if (unlikely(address >= TASK_SIZE)) {
-#else
- if (unlikely(address >= TASK_SIZE64)) {
-#endif
+ if (unlikely(fault_in_kernel_space(address))) {
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
vmalloc_fault(address) >= 0)
return;
- /* Can handle a stale RO->RW TLB */
- if (spurious_fault(address, error_code))
+ /* Can handle a stale RO->RW TLB: */
+ if (spurious_fault(error_code, address))
return;
- /* kprobes don't want to hook the spurious faults. */
+ /* kprobes don't want to hook the spurious faults: */
if (notify_page_fault(regs))
return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
+ * fault we could otherwise deadlock:
*/
- goto bad_area_nosemaphore;
+ bad_area_nosemaphore(regs, error_code, address);
+
+ return;
}
- /* kprobes don't want to hook the spurious faults. */
- if (notify_page_fault(regs))
+ /* kprobes don't want to hook the spurious faults: */
+ if (unlikely(notify_page_fault(regs)))
return;
-
/*
* It's safe to allow irq's after cr2 has been saved and the
* vmalloc fault has been handled.
*
* User-mode registers count as a user access even for any
- * potential system fault or CPU buglet.
+ * potential system fault or CPU buglet:
*/
if (user_mode_vm(regs)) {
local_irq_enable();
error_code |= PF_USER;
- } else if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
+ } else {
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
+ }
-#ifdef CONFIG_X86_64
if (unlikely(error_code & PF_RSVD))
- pgtable_bad(address, regs, error_code);
-#endif
+ pgtable_bad(regs, error_code, address);
/*
- * If we're in an interrupt, have no user context or are running in an
- * atomic region then we must not take the fault.
+ * If we're in an interrupt, have no user context or are running
+ * in an atomic region then we must not take the fault:
*/
- if (unlikely(in_atomic() || !mm))
- goto bad_area_nosemaphore;
+ if (unlikely(in_atomic() || !mm)) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
+ }
/*
* When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in the
- * kernel and should generate an OOPS. Unfortunately, in the case of an
- * erroneous fault occurring in a code path which already holds mmap_sem
- * we will deadlock attempting to validate the fault against the
- * address space. Luckily the kernel only validly references user
- * space from well defined areas of code, which are listed in the
- * exceptions table.
+ * addresses in user space. All other faults represent errors in
+ * the kernel and should generate an OOPS. Unfortunately, in the
+ * case of an erroneous fault occurring in a code path which already
+ * holds mmap_sem we will deadlock attempting to validate the fault
+ * against the address space. Luckily the kernel only validly
+ * references user space from well defined areas of code, which are
+ * listed in the exceptions table.
*
* As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibility of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
+ * the source reference check when there is a possibility of a
+ * deadlock. Attempt to lock the address space, if we cannot we then
+ * validate the source. If this is invalid we can skip the address
+ * space check, thus avoiding the deadlock:
*/
- if (!down_read_trylock(&mm->mmap_sem)) {
+ if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
if ((error_code & PF_USER) == 0 &&
- !search_exception_tables(regs->ip))
- goto bad_area_nosemaphore;
+ !search_exception_tables(regs->ip)) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
+ }
down_read(&mm->mmap_sem);
+ } else {
+ /*
+ * The above down_read_trylock() might have succeeded in
+ * which case we'll have missed the might_sleep() from
+ * down_read():
+ */
+ might_sleep();
}
vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
+ if (unlikely(!vma)) {
+ bad_area(regs, error_code, address);
+ return;
+ }
+ if (likely(vma->vm_start <= address))
goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+ bad_area(regs, error_code, address);
+ return;
+ }
if (error_code & PF_USER) {
/*
* Accessing the stack below %sp is always a bug.
* The large cushion allows instructions like enter
- * and pusha to work. ("enter $65535,$31" pushes
+ * and pusha to work. ("enter $65535, $31" pushes
* 32 pointers and then decrements %sp by 65535.)
*/
- if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
- goto bad_area;
+ if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
+ bad_area(regs, error_code, address);
+ return;
+ }
}
- if (expand_stack(vma, address))
- goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
+ if (unlikely(expand_stack(vma, address))) {
+ bad_area(regs, error_code, address);
+ return;
+ }
+
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
good_area:
- si_code = SEGV_ACCERR;
- write = 0;
- switch (error_code & (PF_PROT|PF_WRITE)) {
- default: /* 3: write, present */
- /* fall through */
- case PF_WRITE: /* write, not present */
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- write++;
- break;
- case PF_PROT: /* read, present */
- goto bad_area;
- case 0: /* read, not present */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
+ write = error_code & PF_WRITE;
+
+ if (unlikely(access_error(error_code, write, vma))) {
+ bad_area_access_error(regs, error_code, address);
+ return;
}
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
- * the fault.
+ * the fault:
*/
fault = handle_mm_fault(mm, vma, address, write);
+
if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
+ mm_fault_error(regs, error_code, address, fault);
+ return;
}
+
if (fault & VM_FAULT_MAJOR)
tsk->maj_flt++;
else
tsk->min_flt++;
-#ifdef CONFIG_X86_32
- /*
- * Did it hit the DOS screen memory VA from vm86 mode?
- */
- if (v8086_mode(regs)) {
- unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
- if (bit < 32)
- tsk->thread.screen_bitmap |= 1 << bit;
- }
-#endif
+ check_v8086_mode(regs, address, tsk);
+
up_read(&mm->mmap_sem);
- return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
- up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
- /* User mode accesses just cause a SIGSEGV */
- if (error_code & PF_USER) {
- /*
- * It's possible to have interrupts off here.
- */
- local_irq_enable();
-
- /*
- * Valid to do another page fault here because this one came
- * from user space.
- */
- if (is_prefetch(regs, address, error_code))
- return;
-
- if (is_errata100(regs, address))
- return;
-
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit()) {
- printk(
- "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, task_pid_nr(tsk), address,
- (void *) regs->ip, (void *) regs->sp, error_code);
- print_vma_addr(" in ", regs->ip);
- printk("\n");
- }
-
- tsk->thread.cr2 = address;
- /* Kernel addresses are always protection faults */
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
- tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
- return;
- }
-
- if (is_f00f_bug(regs, address))
- return;
-
-no_context:
- /* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs))
- return;
-
- /*
- * X86_32
- * Valid to do another page fault here, because if this fault
- * had been triggered by is_prefetch fixup_exception would have
- * handled it.
- *
- * X86_64
- * Hall of shame of CPU/BIOS bugs.
- */
- if (is_prefetch(regs, address, error_code))
- return;
-
- if (is_errata93(regs, address))
- return;
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-#ifdef CONFIG_X86_32
- bust_spinlocks(1);
-#else
- flags = oops_begin();
-#endif
-
- show_fault_oops(regs, error_code, address);
-
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
-
-#ifdef CONFIG_X86_32
- die("Oops", regs, error_code);
- bust_spinlocks(0);
- do_exit(SIGKILL);
-#else
- sig = SIGKILL;
- if (__die("Oops", regs, error_code))
- sig = 0;
- /* Executive summary in case the body of the oops scrolled away */
- printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end(flags, regs, sig);
-#endif
-
-out_of_memory:
- /*
- * We ran out of memory, call the OOM killer, and return the userspace
- * (which will retry the fault, or kill us if we got oom-killed).
- */
- up_read(&mm->mmap_sem);
- pagefault_out_of_memory();
- return;
-
-do_sigbus:
- up_read(&mm->mmap_sem);
-
- /* Kernel mode? Handle exceptions or die */
- if (!(error_code & PF_USER))
- goto no_context;
-#ifdef CONFIG_X86_32
- /* User space => ok to do another page fault */
- if (is_prefetch(regs, address, error_code))
- return;
-#endif
- tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
-}
-
-DEFINE_SPINLOCK(pgd_lock);
-LIST_HEAD(pgd_list);
-
-void vmalloc_sync_all(void)
-{
- unsigned long address;
-
-#ifdef CONFIG_X86_32
- if (SHARED_KERNEL_PMD)
- return;
-
- for (address = VMALLOC_START & PMD_MASK;
- address >= TASK_SIZE && address < FIXADDR_TOP;
- address += PMD_SIZE) {
- unsigned long flags;
- struct page *page;
-
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
- if (!vmalloc_sync_one(page_address(page),
- address))
- break;
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
-#else /* CONFIG_X86_64 */
- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
- address += PGDIR_SIZE) {
- const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
- struct page *page;
-
- if (pgd_none(*pgd_ref))
- continue;
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
-#endif
}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2cef050..06708ee 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,7 +49,6 @@
#include <asm/paravirt.h>
#include <asm/setup.h>
#include <asm/cacheflush.h>
-#include <asm/smp.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
@@ -675,6 +674,86 @@
}
early_param("highmem", parse_highmem);
+#define MSG_HIGHMEM_TOO_BIG \
+ "highmem size (%luMB) is bigger than pages available (%luMB)!\n"
+
+#define MSG_LOWMEM_TOO_SMALL \
+ "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n"
+/*
+ * All of RAM fits into lowmem - but if user wants highmem
+ * artificially via the highmem=x boot parameter then create
+ * it:
+ */
+void __init lowmem_pfn_init(void)
+{
+ /* max_low_pfn is 0, we already have early_res support */
+ max_low_pfn = max_pfn;
+
+ if (highmem_pages == -1)
+ highmem_pages = 0;
+#ifdef CONFIG_HIGHMEM
+ if (highmem_pages >= max_pfn) {
+ printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
+ pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
+ highmem_pages = 0;
+ }
+ if (highmem_pages) {
+ if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {
+ printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
+ pages_to_mb(highmem_pages));
+ highmem_pages = 0;
+ }
+ max_low_pfn -= highmem_pages;
+ }
+#else
+ if (highmem_pages)
+ printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
+#endif
+}
+
+#define MSG_HIGHMEM_TOO_SMALL \
+ "only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
+
+#define MSG_HIGHMEM_TRIMMED \
+ "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
+/*
+ * We have more RAM than fits into lowmem - we try to put it into
+ * highmem, also taking the highmem=x boot parameter into account:
+ */
+void __init highmem_pfn_init(void)
+{
+ max_low_pfn = MAXMEM_PFN;
+
+ if (highmem_pages == -1)
+ highmem_pages = max_pfn - MAXMEM_PFN;
+
+ if (highmem_pages + MAXMEM_PFN < max_pfn)
+ max_pfn = MAXMEM_PFN + highmem_pages;
+
+ if (highmem_pages + MAXMEM_PFN > max_pfn) {
+ printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
+ pages_to_mb(max_pfn - MAXMEM_PFN),
+ pages_to_mb(highmem_pages));
+ highmem_pages = 0;
+ }
+#ifndef CONFIG_HIGHMEM
+ /* Maximum memory usable is what is directly addressable */
+ printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
+ if (max_pfn > MAX_NONPAE_PFN)
+ printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
+ else
+ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+ max_pfn = MAXMEM_PFN;
+#else /* !CONFIG_HIGHMEM */
+#ifndef CONFIG_HIGHMEM64G
+ if (max_pfn > MAX_NONPAE_PFN) {
+ max_pfn = MAX_NONPAE_PFN;
+ printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
+ }
+#endif /* !CONFIG_HIGHMEM64G */
+#endif /* !CONFIG_HIGHMEM */
+}
+
/*
* Determine low and high memory ranges:
*/
@@ -682,68 +761,10 @@
{
/* it could update max_pfn */
- /* max_low_pfn is 0, we already have early_res support */
-
- max_low_pfn = max_pfn;
- if (max_low_pfn > MAXMEM_PFN) {
- if (highmem_pages == -1)
- highmem_pages = max_pfn - MAXMEM_PFN;
- if (highmem_pages + MAXMEM_PFN < max_pfn)
- max_pfn = MAXMEM_PFN + highmem_pages;
- if (highmem_pages + MAXMEM_PFN > max_pfn) {
- printk(KERN_WARNING "only %luMB highmem pages "
- "available, ignoring highmem size of %uMB.\n",
- pages_to_mb(max_pfn - MAXMEM_PFN),
- pages_to_mb(highmem_pages));
- highmem_pages = 0;
- }
- max_low_pfn = MAXMEM_PFN;
-#ifndef CONFIG_HIGHMEM
- /* Maximum memory usable is what is directly addressable */
- printk(KERN_WARNING "Warning only %ldMB will be used.\n",
- MAXMEM>>20);
- if (max_pfn > MAX_NONPAE_PFN)
- printk(KERN_WARNING
- "Use a HIGHMEM64G enabled kernel.\n");
- else
- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
- max_pfn = MAXMEM_PFN;
-#else /* !CONFIG_HIGHMEM */
-#ifndef CONFIG_HIGHMEM64G
- if (max_pfn > MAX_NONPAE_PFN) {
- max_pfn = MAX_NONPAE_PFN;
- printk(KERN_WARNING "Warning only 4GB will be used."
- "Use a HIGHMEM64G enabled kernel.\n");
- }
-#endif /* !CONFIG_HIGHMEM64G */
-#endif /* !CONFIG_HIGHMEM */
- } else {
- if (highmem_pages == -1)
- highmem_pages = 0;
-#ifdef CONFIG_HIGHMEM
- if (highmem_pages >= max_pfn) {
- printk(KERN_ERR "highmem size specified (%uMB) is "
- "bigger than pages available (%luMB)!.\n",
- pages_to_mb(highmem_pages),
- pages_to_mb(max_pfn));
- highmem_pages = 0;
- }
- if (highmem_pages) {
- if (max_low_pfn - highmem_pages <
- 64*1024*1024/PAGE_SIZE){
- printk(KERN_ERR "highmem size %uMB results in "
- "smaller than 64MB lowmem, ignoring it.\n"
- , pages_to_mb(highmem_pages));
- highmem_pages = 0;
- }
- max_low_pfn -= highmem_pages;
- }
-#else
- if (highmem_pages)
- printk(KERN_ERR "ignoring highmem size on non-highmem"
- " kernel!\n");
-#endif
- }
+ if (max_pfn <= MAXMEM_PFN)
+ lowmem_pfn_init();
+ else
+ highmem_pfn_init();
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index f45d5e2..433f7bd 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -348,7 +348,7 @@
*
* Must be freed with iounmap.
*/
-void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
{
if (pat_enabled)
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 9cab18b..0bcd788 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -9,44 +9,44 @@
#include <asm/e820.h>
-static void __init memtest(unsigned long start_phys, unsigned long size,
- unsigned pattern)
+static u64 patterns[] __initdata = {
+ 0,
+ 0xffffffffffffffffULL,
+ 0x5555555555555555ULL,
+ 0xaaaaaaaaaaaaaaaaULL,
+ 0x1111111111111111ULL,
+ 0x2222222222222222ULL,
+ 0x4444444444444444ULL,
+ 0x8888888888888888ULL,
+ 0x3333333333333333ULL,
+ 0x6666666666666666ULL,
+ 0x9999999999999999ULL,
+ 0xccccccccccccccccULL,
+ 0x7777777777777777ULL,
+ 0xbbbbbbbbbbbbbbbbULL,
+ 0xddddddddddddddddULL,
+ 0xeeeeeeeeeeeeeeeeULL,
+ 0x7a6c7258554e494cULL, /* yeah ;-) */
+};
+
+static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
{
- unsigned long i;
- unsigned long *start;
- unsigned long start_bad;
- unsigned long last_bad;
- unsigned long val;
- unsigned long start_phys_aligned;
- unsigned long count;
- unsigned long incr;
+ printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n",
+ (unsigned long long) pattern,
+ (unsigned long long) start_bad,
+ (unsigned long long) end_bad);
+ reserve_early(start_bad, end_bad, "BAD RAM");
+}
- switch (pattern) {
- case 0:
- val = 0UL;
- break;
- case 1:
- val = -1UL;
- break;
- case 2:
-#ifdef CONFIG_X86_64
- val = 0x5555555555555555UL;
-#else
- val = 0x55555555UL;
-#endif
- break;
- case 3:
-#ifdef CONFIG_X86_64
- val = 0xaaaaaaaaaaaaaaaaUL;
-#else
- val = 0xaaaaaaaaUL;
-#endif
- break;
- default:
- return;
- }
+static void __init memtest(u64 pattern, u64 start_phys, u64 size)
+{
+ u64 i, count;
+ u64 *start;
+ u64 start_bad, last_bad;
+ u64 start_phys_aligned;
+ size_t incr;
- incr = sizeof(unsigned long);
+ incr = sizeof(pattern);
start_phys_aligned = ALIGN(start_phys, incr);
count = (size - (start_phys_aligned - start_phys))/incr;
start = __va(start_phys_aligned);
@@ -54,25 +54,42 @@
last_bad = 0;
for (i = 0; i < count; i++)
- start[i] = val;
+ start[i] = pattern;
for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
- if (*start != val) {
- if (start_phys_aligned == last_bad + incr) {
- last_bad += incr;
- } else {
- if (start_bad) {
- printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
- val, start_bad, last_bad + incr);
- reserve_early(start_bad, last_bad + incr, "BAD RAM");
- }
- start_bad = last_bad = start_phys_aligned;
- }
+ if (*start == pattern)
+ continue;
+ if (start_phys_aligned == last_bad + incr) {
+ last_bad += incr;
+ continue;
}
+ if (start_bad)
+ reserve_bad_mem(pattern, start_bad, last_bad + incr);
+ start_bad = last_bad = start_phys_aligned;
}
- if (start_bad) {
- printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
- val, start_bad, last_bad + incr);
- reserve_early(start_bad, last_bad + incr, "BAD RAM");
+ if (start_bad)
+ reserve_bad_mem(pattern, start_bad, last_bad + incr);
+}
+
+static void __init do_one_pass(u64 pattern, u64 start, u64 end)
+{
+ u64 size = 0;
+
+ while (start < end) {
+ start = find_e820_area_size(start, &size, 1);
+
+ /* done ? */
+ if (start >= end)
+ break;
+ if (start + size > end)
+ size = end - start;
+
+ printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
+ (unsigned long long) start,
+ (unsigned long long) start + size,
+ (unsigned long long) cpu_to_be64(pattern));
+ memtest(pattern, start, size);
+
+ start += size;
}
}
@@ -90,33 +107,22 @@
void __init early_memtest(unsigned long start, unsigned long end)
{
- u64 t_start, t_size;
- unsigned pattern;
+ unsigned int i;
+ unsigned int idx = 0;
if (!memtest_pattern)
return;
- printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
- for (pattern = 0; pattern < memtest_pattern; pattern++) {
- t_start = start;
- t_size = 0;
- while (t_start < end) {
- t_start = find_e820_area_size(t_start, &t_size, 1);
-
- /* done ? */
- if (t_start >= end)
- break;
- if (t_start + t_size > end)
- t_size = end - t_start;
-
- printk(KERN_CONT "\n %010llx - %010llx pattern %d",
- (unsigned long long)t_start,
- (unsigned long long)t_start + t_size, pattern);
-
- memtest(t_start, t_size, pattern);
-
- t_start += t_size;
- }
+ printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
+ for (i = 0; i < memtest_pattern; i++) {
+ idx = i % ARRAY_SIZE(patterns);
+ do_one_pass(patterns[idx], start, end);
}
- printk(KERN_CONT "\n");
+
+ if (idx > 0) {
+ printk(KERN_INFO "early_memtest: wipe out "
+ "test pattern from memory\n");
+ /* additional test with pattern 0 will do this */
+ do_one_pass(0, start, end);
+ }
}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 56fe712..1658296 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -4,7 +4,7 @@
* Based on code by Ingo Molnar and Andi Kleen, copyrighted
* as follows:
*
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * Copyright 2003-2009 Red Hat Inc.
* All Rights Reserved.
* Copyright 2005 Andi Kleen, SUSE Labs.
* Copyright 2007 Jiri Kosina, SUSE Labs.
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index d1f7439..3957cd6 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -194,7 +194,7 @@
size = ALIGN(size, L1_CACHE_BYTES);
if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
- return 0;
+ return NULL;
node_remap_alloc_vaddr[nid] += size;
memset(allocation, 0, size);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index f3516da..64c9cf0 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,6 +20,12 @@
#include <asm/acpi.h>
#include <asm/k8.h>
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+# define DBG(x...) printk(KERN_DEBUG x)
+#else
+# define DBG(x...)
+#endif
+
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@@ -33,6 +39,21 @@
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;
+DEFINE_PER_CPU(int, node_number) = 0;
+EXPORT_PER_CPU_SYMBOL(node_number);
+
+/*
+ * Map cpu index to node index
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+/*
+ * Which logical CPUs are on which nodes
+ */
+cpumask_t *node_to_cpumask_map;
+EXPORT_SYMBOL(node_to_cpumask_map);
+
/*
* Given a shift value, try to populate memnodemap[]
* Returns :
@@ -640,3 +661,199 @@
#endif
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: node_to_cpumask() is not valid until after this is done.
+ * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
+ */
+void __init setup_node_to_cpumask_map(void)
+{
+ unsigned int node, num = 0;
+ cpumask_t *map;
+
+ /* setup nr_node_ids if not done yet */
+ if (nr_node_ids == MAX_NUMNODES) {
+ for_each_node_mask(node, node_possible_map)
+ num = node;
+ nr_node_ids = num + 1;
+ }
+
+ /* allocate the map */
+ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
+ DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
+
+ pr_debug("Node to cpumask map at %p for %d nodes\n",
+ map, nr_node_ids);
+
+ /* node_to_cpumask() will now work */
+ node_to_cpumask_map = map;
+}
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+ int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+ /* early setting, no percpu area yet */
+ if (cpu_to_node_map) {
+ cpu_to_node_map[cpu] = node;
+ return;
+ }
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
+ printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+ dump_stack();
+ return;
+ }
+#endif
+ per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+ if (node != NUMA_NO_NODE)
+ per_cpu(node_number, cpu) = node;
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+ numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+#else /* CONFIG_DEBUG_PER_CPU_MAPS */
+
+/*
+ * --------- debug versions of the numa functions ---------
+ */
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ cpumask_t *mask;
+ char buf[64];
+
+ if (node_to_cpumask_map == NULL) {
+ printk(KERN_ERR "node_to_cpumask_map NULL\n");
+ dump_stack();
+ return;
+ }
+
+ mask = &node_to_cpumask_map[node];
+ if (enable)
+ cpu_set(cpu, *mask);
+ else
+ cpu_clear(cpu, *mask);
+
+ cpulist_scnprintf(buf, sizeof(buf), mask);
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+
+int cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+ printk(KERN_WARNING
+ "cpu_to_node(%d): usage too early!\n", cpu);
+ dump_stack();
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map))
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+ if (!cpu_possible(cpu)) {
+ printk(KERN_WARNING
+ "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+ dump_stack();
+ return NUMA_NO_NODE;
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
+
+/* empty cpumask */
+static const cpumask_t cpu_mask_none;
+
+/*
+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
+ */
+const cpumask_t *cpumask_of_node(int node)
+{
+ if (node_to_cpumask_map == NULL) {
+ printk(KERN_WARNING
+ "cpumask_of_node(%d): no node_to_cpumask_map!\n",
+ node);
+ dump_stack();
+ return (const cpumask_t *)&cpu_online_map;
+ }
+ if (node >= nr_node_ids) {
+ printk(KERN_WARNING
+ "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
+ node, nr_node_ids);
+ dump_stack();
+ return &cpu_mask_none;
+ }
+ return &node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(cpumask_of_node);
+
+/*
+ * Returns a bitmask of CPUs on Node 'node'.
+ *
+ * Side note: this function creates the returned cpumask on the stack
+ * so with a high NR_CPUS count, excessive stack space is used. The
+ * node_to_cpumask_ptr function should be used whenever possible.
+ */
+cpumask_t node_to_cpumask(int node)
+{
+ if (node_to_cpumask_map == NULL) {
+ printk(KERN_WARNING
+ "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
+ dump_stack();
+ return cpu_online_map;
+ }
+ if (node >= nr_node_ids) {
+ printk(KERN_WARNING
+ "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
+ node, nr_node_ids);
+ dump_stack();
+ return cpu_mask_none;
+ }
+ return node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(node_to_cpumask);
+
+/*
+ * --------- end of debug versions of the numa functions ---------
+ */
+
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7be47d1a9..8253bc9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -482,6 +482,13 @@
pbase = (pte_t *)page_address(base);
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+ /*
+ * If we ever want to utilize the PAT bit, we need to
+ * update this function to make sure it's converted from
+ * bit 12 to bit 7 when we cross from the 2MB level to
+ * the 4K level:
+ */
+ WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE);
#ifdef CONFIG_X86_64
if (level == PG_LEVEL_1G) {
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e0ab173..2ed3715 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -31,7 +31,7 @@
#ifdef CONFIG_X86_PAT
int __read_mostly pat_enabled = 1;
-void __cpuinit pat_disable(char *reason)
+void __cpuinit pat_disable(const char *reason)
{
pat_enabled = 0;
printk(KERN_INFO "%s\n", reason);
@@ -43,6 +43,11 @@
return 0;
}
early_param("nopat", nopat);
+#else
+static inline void pat_disable(const char *reason)
+{
+ (void)reason;
+}
#endif
@@ -79,16 +84,20 @@
if (!pat_enabled)
return;
- /* Paranoia check. */
- if (!cpu_has_pat && boot_pat_state) {
- /*
- * If this happens we are on a secondary CPU, but
- * switched to PAT on the boot CPU. We have no way to
- * undo PAT.
- */
- printk(KERN_ERR "PAT enabled, "
- "but not supported by secondary CPU\n");
- BUG();
+ if (!cpu_has_pat) {
+ if (!boot_pat_state) {
+ pat_disable("PAT not supported by CPU.");
+ return;
+ } else {
+ /*
+ * If this happens we are on a secondary CPU, but
+ * switched to PAT on the boot CPU. We have no way to
+ * undo PAT.
+ */
+ printk(KERN_ERR "PAT enabled, "
+ "but not supported by secondary CPU\n");
+ BUG();
+ }
}
/* Set PWT to Write-Combining. All other bits stay the same */
@@ -626,6 +635,33 @@
}
/*
+ * Change the memory type for the physial address range in kernel identity
+ * mapping space if that range is a part of identity map.
+ */
+int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
+{
+ unsigned long id_sz;
+
+ if (!pat_enabled || base >= __pa(high_memory))
+ return 0;
+
+ id_sz = (__pa(high_memory) < base + size) ?
+ __pa(high_memory) - base :
+ size;
+
+ if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
+ printk(KERN_INFO
+ "%s:%d ioremap_change_attr failed %s "
+ "for %Lx-%Lx\n",
+ current->comm, current->pid,
+ cattr_name(flags),
+ base, (unsigned long long)(base + size));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
* Internal interface to reserve a range of physical memory with prot.
* Reserved non RAM regions only and after successful reserve_memtype,
* this func also keeps identity mapping (if any) in sync with this new prot.
@@ -634,7 +670,7 @@
int strict_prot)
{
int is_ram = 0;
- int id_sz, ret;
+ int ret;
unsigned long flags;
unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
@@ -671,23 +707,8 @@
flags);
}
- /* Need to keep identity mapping in sync */
- if (paddr >= __pa(high_memory))
- return 0;
-
- id_sz = (__pa(high_memory) < paddr + size) ?
- __pa(high_memory) - paddr :
- size;
-
- if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
+ if (kernel_map_sync_memtype(paddr, size, flags) < 0) {
free_memtype(paddr, paddr + size);
- printk(KERN_ERR
- "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
- "for %Lx-%Lx\n",
- current->comm, current->pid,
- cattr_name(flags),
- (unsigned long long)paddr,
- (unsigned long long)(paddr + size));
return -EINVAL;
}
return 0;
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 09737c8..574c8bc 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -20,7 +20,8 @@
#include <asm/proto.h>
#include <asm/numa.h>
#include <asm/e820.h>
-#include <asm/genapic.h>
+#include <asm/apic.h>
+#include <asm/uv/uv.h>
int acpi_numa __initdata;
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/mm/tlb.c
similarity index 67%
rename from arch/x86/kernel/tlb_64.c
rename to arch/x86/mm/tlb.c
index f8be6f1..a654d59 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/mm/tlb.c
@@ -1,24 +1,19 @@
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
#include <linux/interrupt.h>
+#include <linux/module.h>
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#include <asm/proto.h>
-#include <asm/apicdef.h>
-#include <asm/idle.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_bau.h>
+#include <asm/apic.h>
+#include <asm/uv/uv.h>
-#include <mach_ipi.h>
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
+ = { &init_mm, 0, };
+
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
@@ -33,7 +28,7 @@
* To avoid global state use 8 different call vectors.
* Each CPU uses a specific vector to trigger flushes on other
* CPUs. Depending on the received vector the target CPUs look into
- * the right per cpu variable for the flush data.
+ * the right array slot for the flush data.
*
* With more than 8 CPUs they are hashed to the 8 available
* vectors. The limited global vector space forces us to this right now.
@@ -43,18 +38,18 @@
union smp_flush_state {
struct {
- cpumask_t flush_cpumask;
struct mm_struct *flush_mm;
unsigned long flush_va;
spinlock_t tlbstate_lock;
+ DECLARE_BITMAP(flush_cpumask, NR_CPUS);
};
- char pad[SMP_CACHE_BYTES];
-} ____cacheline_aligned;
+ char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
+} ____cacheline_internodealigned_in_smp;
/* State is put into the per CPU data section, but padded
to a full cache line because other CPUs can access it and we don't
want false sharing in the per cpu data segment. */
-static DEFINE_PER_CPU(union smp_flush_state, flush_state);
+static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
/*
* We cannot call mmdrop() because we are in interrupt context,
@@ -62,9 +57,9 @@
*/
void leave_mm(int cpu)
{
- if (read_pda(mmu_state) == TLBSTATE_OK)
+ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
BUG();
- cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
+ cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
@@ -117,10 +112,20 @@
* Interrupts are disabled.
*/
-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
+/*
+ * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
+ * but still used for documentation purpose but the usage is slightly
+ * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
+ * entry calls in with the first parameter in %eax. Maybe define
+ * intrlinkage?
+ */
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
+void smp_invalidate_interrupt(struct pt_regs *regs)
{
- int cpu;
- int sender;
+ unsigned int cpu;
+ unsigned int sender;
union smp_flush_state *f;
cpu = smp_processor_id();
@@ -129,9 +134,9 @@
* Use that to determine where the sender put the data.
*/
sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
- f = &per_cpu(flush_state, sender);
+ f = &flush_state[sender];
- if (!cpu_isset(cpu, f->flush_cpumask))
+ if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
goto out;
/*
* This was a BUG() but until someone can quote me the
@@ -142,8 +147,8 @@
* BUG();
*/
- if (f->flush_mm == read_pda(active_mm)) {
- if (read_pda(mmu_state) == TLBSTATE_OK) {
+ if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
+ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
if (f->flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
@@ -153,23 +158,21 @@
}
out:
ack_APIC_irq();
- cpu_clear(cpu, f->flush_cpumask);
+ smp_mb__before_clear_bit();
+ cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
+ smp_mb__after_clear_bit();
inc_irq_stat(irq_tlb_count);
}
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
- unsigned long va)
+static void flush_tlb_others_ipi(const struct cpumask *cpumask,
+ struct mm_struct *mm, unsigned long va)
{
- int sender;
+ unsigned int sender;
union smp_flush_state *f;
- cpumask_t cpumask = *cpumaskp;
-
- if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
- return;
/* Caller has disabled preemption */
sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
- f = &per_cpu(flush_state, sender);
+ f = &flush_state[sender];
/*
* Could avoid this lock when
@@ -180,7 +183,8 @@
f->flush_mm = mm;
f->flush_va = va;
- cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
+ cpumask_andnot(to_cpumask(f->flush_cpumask),
+ cpumask, cpumask_of(smp_processor_id()));
/*
* Make the above memory operations globally visible before
@@ -191,9 +195,10 @@
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+ apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
+ INVALIDATE_TLB_VECTOR_START + sender);
- while (!cpus_empty(f->flush_cpumask))
+ while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
cpu_relax();
f->flush_mm = NULL;
@@ -201,12 +206,28 @@
spin_unlock(&f->tlbstate_lock);
}
+void native_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm, unsigned long va)
+{
+ if (is_uv_system()) {
+ unsigned int cpu;
+
+ cpu = get_cpu();
+ cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
+ if (cpumask)
+ flush_tlb_others_ipi(cpumask, mm, va);
+ put_cpu();
+ return;
+ }
+ flush_tlb_others_ipi(cpumask, mm, va);
+}
+
static int __cpuinit init_smp_flush(void)
{
int i;
- for_each_possible_cpu(i)
- spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
+ for (i = 0; i < ARRAY_SIZE(flush_state); i++)
+ spin_lock_init(&flush_state[i].tlbstate_lock);
return 0;
}
@@ -215,25 +236,18 @@
void flush_tlb_current_task(void)
{
struct mm_struct *mm = current->mm;
- cpumask_t cpu_mask;
preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
local_flush_tlb();
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+ if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
void flush_tlb_mm(struct mm_struct *mm)
{
- cpumask_t cpu_mask;
-
preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
if (current->active_mm == mm) {
if (current->mm)
@@ -241,8 +255,8 @@
else
leave_mm(smp_processor_id());
}
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+ if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
@@ -250,11 +264,8 @@
void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
{
struct mm_struct *mm = vma->vm_mm;
- cpumask_t cpu_mask;
preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
if (current->active_mm == mm) {
if (current->mm)
@@ -263,8 +274,8 @@
leave_mm(smp_processor_id());
}
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, va);
+ if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(&mm->cpu_vm_mask, mm, va);
preempt_enable();
}
@@ -274,7 +285,7 @@
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
- if (read_pda(mmu_state) == TLBSTATE_LAZY)
+ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(cpu);
}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 202864a..c638685 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -40,8 +40,9 @@
switch (val) {
case DIE_NMI:
- if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
- ret = NOTIFY_STOP;
+ case DIE_NMI_IPI:
+ model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
+ ret = NOTIFY_STOP;
break;
default:
break;
@@ -134,7 +135,7 @@
static struct notifier_block profile_exceptions_nb = {
.notifier_call = profile_exceptions_notify,
.next = NULL,
- .priority = 0
+ .priority = 2
};
static int nmi_setup(void)
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 10131fb..4da7230 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -18,7 +18,7 @@
#include <asm/msr.h>
#include <asm/apic.h>
#include <asm/nmi.h>
-#include <asm/intel_arch_perfmon.h>
+#include <asm/perf_counter.h>
#include "op_x86_model.h"
#include "op_counter.h"
@@ -136,6 +136,13 @@
u64 val;
int i;
+ /*
+ * This can happen if perf counters are in use when
+ * we steal the die notifier NMI.
+ */
+ if (unlikely(!reset_value))
+ goto out;
+
for (i = 0 ; i < num_counters; ++i) {
if (!reset_value[i])
continue;
@@ -146,6 +153,7 @@
}
}
+out:
/* Only P6 based Pentium M need to re-unmask the apic vector but it
* doesn't hurt other P6 variant */
apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 2089354..8eb295e 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -5,7 +5,7 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/nodemask.h>
-#include <mach_apic.h>
+#include <asm/apic.h>
#include <asm/mpspec.h>
#include <asm/pci_x86.h>
@@ -18,10 +18,6 @@
#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-EXPORT_SYMBOL(xquad_portio);
-
#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index b82cae9..1c975cc 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -7,7 +7,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
#include <asm/pci_x86.h>
-#include <asm/mach-default/pci-functions.h>
+#include <asm/pci-functions.h>
/* BIOS32 signature: "_32_" */
#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index d1e9b53..b641388 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -8,7 +8,7 @@
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 0004159..9356547 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -18,7 +18,7 @@
.text
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 4d6ef0a..16a9020 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -38,7 +38,7 @@
$(call if_changed,objcopy)
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
- $(filter -g%,$(KBUILD_CFLAGS))
+ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector)
$(vobjs): KBUILD_CFLAGS += $(CFL)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 9c98cc6..7133cdf 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -85,8 +85,8 @@
unsigned long addr, end;
unsigned offset;
end = (start + PMD_SIZE - 1) & PMD_MASK;
- if (end >= TASK_SIZE64)
- end = TASK_SIZE64;
+ if (end >= TASK_SIZE_MAX)
+ end = TASK_SIZE_MAX;
end -= len;
/* This loses some more bits than a modulo, but is cheaper */
offset = get_random_int() & (PTRS_PER_PTE - 1);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 87b9ab1..b83e119 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,7 +6,7 @@
bool "Xen guest support"
select PARAVIRT
select PARAVIRT_CLOCK
- depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER))
+ depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
depends on X86_CMPXCHG && X86_TSC
help
This is the Linux Xen port. Enabling this will allow the
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 6dcefba..3b767d0 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -6,7 +6,8 @@
endif
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
- time.o xen-asm_$(BITS).o grant-table.o suspend.o
+ time.o xen-asm.o xen-asm_$(BITS).o \
+ grant-table.o suspend.o
obj-$(CONFIG_SMP) += smp.o spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
\ No newline at end of file
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b58e963..c52f403 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -61,40 +61,13 @@
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);
-/*
- * Identity map, in addition to plain kernel map. This needs to be
- * large enough to allocate page table pages to allocate the rest.
- * Each page can map 2MB.
- */
-static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
-
-#ifdef CONFIG_X86_64
-/* l3 pud for userspace vsyscall mapping */
-static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
-#endif /* CONFIG_X86_64 */
-
-/*
- * Note about cr3 (pagetable base) values:
- *
- * xen_cr3 contains the current logical cr3 value; it contains the
- * last set cr3. This may not be the current effective cr3, because
- * its update may be being lazily deferred. However, a vcpu looking
- * at its own cr3 can use this value knowing that it everything will
- * be self-consistent.
- *
- * xen_current_cr3 contains the actual vcpu cr3; it is set once the
- * hypercall to set the vcpu cr3 is complete (so it may be a little
- * out of date, but it will never be set early). If one vcpu is
- * looking at another vcpu's cr3 value, it should use this variable.
- */
-DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
-DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
-
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
struct shared_info xen_dummy_shared_info;
+void *xen_initial_gdt;
+
/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
@@ -114,14 +87,7 @@
*
* 0: not available, 1: available
*/
-static int have_vcpu_info_placement =
-#ifdef CONFIG_X86_32
- 1
-#else
- 0
-#endif
- ;
-
+static int have_vcpu_info_placement = 1;
static void xen_vcpu_setup(int cpu)
{
@@ -237,7 +203,7 @@
return HYPERVISOR_get_debugreg(reg);
}
-static void xen_leave_lazy(void)
+void xen_leave_lazy(void)
{
paravirt_leave_lazy(paravirt_get_lazy_mode());
xen_mc_flush();
@@ -357,13 +323,14 @@
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
{
/*
- * XXX sleazy hack: If we're being called in a lazy-cpu zone,
- * it means we're in a context switch, and %gs has just been
- * saved. This means we can zero it out to prevent faults on
- * exit from the hypervisor if the next process has no %gs.
- * Either way, it has been saved, and the new value will get
- * loaded properly. This will go away as soon as Xen has been
- * modified to not save/restore %gs for normal hypercalls.
+ * XXX sleazy hack: If we're being called in a lazy-cpu zone
+ * and lazy gs handling is enabled, it means we're in a
+ * context switch, and %gs has just been saved. This means we
+ * can zero it out to prevent faults on exit from the
+ * hypervisor if the next process has no %gs. Either way, it
+ * has been saved, and the new value will get loaded properly.
+ * This will go away as soon as Xen has been modified to not
+ * save/restore %gs for normal hypercalls.
*
* On x86_64, this hack is not used for %gs, because gs points
* to KERNEL_GS_BASE (and uses it for PDA references), so we
@@ -375,7 +342,7 @@
*/
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
#ifdef CONFIG_X86_32
- loadsegment(gs, 0);
+ lazy_load_gs(0);
#else
loadsegment(fs, 0);
#endif
@@ -587,94 +554,18 @@
return 0;
}
-static struct apic_ops xen_basic_apic_ops = {
- .read = xen_apic_read,
- .write = xen_apic_write,
- .icr_read = xen_apic_icr_read,
- .icr_write = xen_apic_icr_write,
- .wait_icr_idle = xen_apic_wait_icr_idle,
- .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
-};
+static void set_xen_basic_apic_ops(void)
+{
+ apic->read = xen_apic_read;
+ apic->write = xen_apic_write;
+ apic->icr_read = xen_apic_icr_read;
+ apic->icr_write = xen_apic_icr_write;
+ apic->wait_icr_idle = xen_apic_wait_icr_idle;
+ apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
+}
#endif
-static void xen_flush_tlb(void)
-{
- struct mmuext_op *op;
- struct multicall_space mcs;
-
- preempt_disable();
-
- mcs = xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-static void xen_flush_tlb_single(unsigned long addr)
-{
- struct mmuext_op *op;
- struct multicall_space mcs;
-
- preempt_disable();
-
- mcs = xen_mc_entry(sizeof(*op));
- op = mcs.args;
- op->cmd = MMUEXT_INVLPG_LOCAL;
- op->arg1.linear_addr = addr & PAGE_MASK;
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
- unsigned long va)
-{
- struct {
- struct mmuext_op op;
- cpumask_t mask;
- } *args;
- cpumask_t cpumask = *cpus;
- struct multicall_space mcs;
-
- /*
- * A couple of (to be removed) sanity checks:
- *
- * - current CPU must not be in mask
- * - mask must exist :)
- */
- BUG_ON(cpus_empty(cpumask));
- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
- BUG_ON(!mm);
-
- /* If a CPU which we ran on has gone down, OK. */
- cpus_and(cpumask, cpumask, cpu_online_map);
- if (cpus_empty(cpumask))
- return;
-
- mcs = xen_mc_entry(sizeof(*args));
- args = mcs.args;
- args->mask = cpumask;
- args->op.arg2.vcpumask = &args->mask;
-
- if (va == TLB_FLUSH_ALL) {
- args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
- } else {
- args->op.cmd = MMUEXT_INVLPG_MULTI;
- args->op.arg1.linear_addr = va;
- }
-
- MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
static void xen_clts(void)
{
@@ -700,21 +591,6 @@
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
-static void xen_write_cr2(unsigned long cr2)
-{
- x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
-}
-
-static unsigned long xen_read_cr2(void)
-{
- return x86_read_percpu(xen_vcpu)->arch.cr2;
-}
-
-static unsigned long xen_read_cr2_direct(void)
-{
- return x86_read_percpu(xen_vcpu_info.arch.cr2);
-}
-
static void xen_write_cr4(unsigned long cr4)
{
cr4 &= ~X86_CR4_PGE;
@@ -723,71 +599,6 @@
native_write_cr4(cr4);
}
-static unsigned long xen_read_cr3(void)
-{
- return x86_read_percpu(xen_cr3);
-}
-
-static void set_current_cr3(void *v)
-{
- x86_write_percpu(xen_current_cr3, (unsigned long)v);
-}
-
-static void __xen_write_cr3(bool kernel, unsigned long cr3)
-{
- struct mmuext_op *op;
- struct multicall_space mcs;
- unsigned long mfn;
-
- if (cr3)
- mfn = pfn_to_mfn(PFN_DOWN(cr3));
- else
- mfn = 0;
-
- WARN_ON(mfn == 0 && kernel);
-
- mcs = __xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
- op->arg1.mfn = mfn;
-
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- if (kernel) {
- x86_write_percpu(xen_cr3, cr3);
-
- /* Update xen_current_cr3 once the batch has actually
- been submitted. */
- xen_mc_callback(set_current_cr3, (void *)cr3);
- }
-}
-
-static void xen_write_cr3(unsigned long cr3)
-{
- BUG_ON(preemptible());
-
- xen_mc_batch(); /* disables interrupts */
-
- /* Update while interrupts are disabled, so its atomic with
- respect to ipis */
- x86_write_percpu(xen_cr3, cr3);
-
- __xen_write_cr3(true, cr3);
-
-#ifdef CONFIG_X86_64
- {
- pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
- if (user_pgd)
- __xen_write_cr3(false, __pa(user_pgd));
- else
- __xen_write_cr3(false, 0);
- }
-#endif
-
- xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
-}
-
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
@@ -829,185 +640,6 @@
return ret;
}
-/* Early in boot, while setting up the initial pagetable, assume
- everything is pinned. */
-static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
-{
-#ifdef CONFIG_FLATMEM
- BUG_ON(mem_map); /* should only be used early */
-#endif
- make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
-}
-
-/* Early release_pte assumes that all pts are pinned, since there's
- only init_mm and anything attached to that is pinned. */
-static void xen_release_pte_init(unsigned long pfn)
-{
- make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
-}
-
-static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
-{
- struct mmuext_op op;
- op.cmd = cmd;
- op.arg1.mfn = pfn_to_mfn(pfn);
- if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
- BUG();
-}
-
-/* This needs to make sure the new pte page is pinned iff its being
- attached to a pinned pagetable. */
-static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
-{
- struct page *page = pfn_to_page(pfn);
-
- if (PagePinned(virt_to_page(mm->pgd))) {
- SetPagePinned(page);
-
- vm_unmap_aliases();
- if (!PageHighMem(page)) {
- make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
- if (level == PT_PTE && USE_SPLIT_PTLOCKS)
- pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
- } else {
- /* make sure there are no stray mappings of
- this page */
- kmap_flush_unused();
- }
- }
-}
-
-static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
-{
- xen_alloc_ptpage(mm, pfn, PT_PTE);
-}
-
-static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
-{
- xen_alloc_ptpage(mm, pfn, PT_PMD);
-}
-
-static int xen_pgd_alloc(struct mm_struct *mm)
-{
- pgd_t *pgd = mm->pgd;
- int ret = 0;
-
- BUG_ON(PagePinned(virt_to_page(pgd)));
-
-#ifdef CONFIG_X86_64
- {
- struct page *page = virt_to_page(pgd);
- pgd_t *user_pgd;
-
- BUG_ON(page->private != 0);
-
- ret = -ENOMEM;
-
- user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- page->private = (unsigned long)user_pgd;
-
- if (user_pgd != NULL) {
- user_pgd[pgd_index(VSYSCALL_START)] =
- __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
- ret = 0;
- }
-
- BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
- }
-#endif
-
- return ret;
-}
-
-static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-#ifdef CONFIG_X86_64
- pgd_t *user_pgd = xen_get_user_pgd(pgd);
-
- if (user_pgd)
- free_page((unsigned long)user_pgd);
-#endif
-}
-
-/* This should never happen until we're OK to use struct page */
-static void xen_release_ptpage(unsigned long pfn, unsigned level)
-{
- struct page *page = pfn_to_page(pfn);
-
- if (PagePinned(page)) {
- if (!PageHighMem(page)) {
- if (level == PT_PTE && USE_SPLIT_PTLOCKS)
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
- make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
- }
- ClearPagePinned(page);
- }
-}
-
-static void xen_release_pte(unsigned long pfn)
-{
- xen_release_ptpage(pfn, PT_PTE);
-}
-
-static void xen_release_pmd(unsigned long pfn)
-{
- xen_release_ptpage(pfn, PT_PMD);
-}
-
-#if PAGETABLE_LEVELS == 4
-static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
-{
- xen_alloc_ptpage(mm, pfn, PT_PUD);
-}
-
-static void xen_release_pud(unsigned long pfn)
-{
- xen_release_ptpage(pfn, PT_PUD);
-}
-#endif
-
-#ifdef CONFIG_HIGHPTE
-static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
-{
- pgprot_t prot = PAGE_KERNEL;
-
- if (PagePinned(page))
- prot = PAGE_KERNEL_RO;
-
- if (0 && PageHighMem(page))
- printk("mapping highpte %lx type %d prot %s\n",
- page_to_pfn(page), type,
- (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
-
- return kmap_atomic_prot(page, type, prot);
-}
-#endif
-
-#ifdef CONFIG_X86_32
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
-{
- /* If there's an existing pte, then don't allow _PAGE_RW to be set */
- if (pte_val_ma(*ptep) & _PAGE_PRESENT)
- pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
- pte_val_ma(pte));
-
- return pte;
-}
-
-/* Init-time set_pte while constructing initial pagetables, which
- doesn't allow RO pagetable pages to be remapped RW */
-static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
-{
- pte = mask_rw_pte(ptep, pte);
-
- xen_set_pte(ptep, pte);
-}
-#endif
-
-static __init void xen_pagetable_setup_start(pgd_t *base)
-{
-}
-
void xen_setup_shared_info(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1028,37 +660,6 @@
xen_setup_mfn_list_list();
}
-static __init void xen_pagetable_setup_done(pgd_t *base)
-{
- xen_setup_shared_info();
-}
-
-static __init void xen_post_allocator_init(void)
-{
- pv_mmu_ops.set_pte = xen_set_pte;
- pv_mmu_ops.set_pmd = xen_set_pmd;
- pv_mmu_ops.set_pud = xen_set_pud;
-#if PAGETABLE_LEVELS == 4
- pv_mmu_ops.set_pgd = xen_set_pgd;
-#endif
-
- /* This will work as long as patching hasn't happened yet
- (which it hasn't) */
- pv_mmu_ops.alloc_pte = xen_alloc_pte;
- pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
- pv_mmu_ops.release_pte = xen_release_pte;
- pv_mmu_ops.release_pmd = xen_release_pmd;
-#if PAGETABLE_LEVELS == 4
- pv_mmu_ops.alloc_pud = xen_alloc_pud;
- pv_mmu_ops.release_pud = xen_release_pud;
-#endif
-
-#ifdef CONFIG_X86_64
- SetPagePinned(virt_to_page(level3_user_vsyscall));
-#endif
- xen_mark_init_mm_pinned();
-}
-
/* This is called once we have the cpu_possible_map */
void xen_setup_vcpu_info_placement(void)
{
@@ -1072,10 +673,10 @@
if (have_vcpu_info_placement) {
printk(KERN_INFO "Xen: using vcpu_info placement\n");
- pv_irq_ops.save_fl = xen_save_fl_direct;
- pv_irq_ops.restore_fl = xen_restore_fl_direct;
- pv_irq_ops.irq_disable = xen_irq_disable_direct;
- pv_irq_ops.irq_enable = xen_irq_enable_direct;
+ pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
+ pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
+ pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
+ pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
}
}
@@ -1133,49 +734,6 @@
return ret;
}
-static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
-{
- pte_t pte;
-
- phys >>= PAGE_SHIFT;
-
- switch (idx) {
- case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-#ifdef CONFIG_X86_F00F_BUG
- case FIX_F00F_IDT:
-#endif
-#ifdef CONFIG_X86_32
- case FIX_WP_TEST:
- case FIX_VDSO:
-# ifdef CONFIG_HIGHMEM
- case FIX_KMAP_BEGIN ... FIX_KMAP_END:
-# endif
-#else
- case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
- case FIX_APIC_BASE: /* maps dummy local APIC */
-#endif
- pte = pfn_pte(phys, prot);
- break;
-
- default:
- pte = mfn_pte(phys, prot);
- break;
- }
-
- __native_set_fixmap(idx, pte);
-
-#ifdef CONFIG_X86_64
- /* Replicate changes to map the vsyscall page into the user
- pagetable vsyscall mapping. */
- if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
- unsigned long vaddr = __fix_to_virt(idx);
- set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
- }
-#endif
-}
-
static const struct pv_info xen_info __initdata = {
.paravirt_enabled = 1,
.shared_kernel_pmd = 0,
@@ -1271,87 +829,6 @@
#endif
};
-static const struct pv_mmu_ops xen_mmu_ops __initdata = {
- .pagetable_setup_start = xen_pagetable_setup_start,
- .pagetable_setup_done = xen_pagetable_setup_done,
-
- .read_cr2 = xen_read_cr2,
- .write_cr2 = xen_write_cr2,
-
- .read_cr3 = xen_read_cr3,
- .write_cr3 = xen_write_cr3,
-
- .flush_tlb_user = xen_flush_tlb,
- .flush_tlb_kernel = xen_flush_tlb,
- .flush_tlb_single = xen_flush_tlb_single,
- .flush_tlb_others = xen_flush_tlb_others,
-
- .pte_update = paravirt_nop,
- .pte_update_defer = paravirt_nop,
-
- .pgd_alloc = xen_pgd_alloc,
- .pgd_free = xen_pgd_free,
-
- .alloc_pte = xen_alloc_pte_init,
- .release_pte = xen_release_pte_init,
- .alloc_pmd = xen_alloc_pte_init,
- .alloc_pmd_clone = paravirt_nop,
- .release_pmd = xen_release_pte_init,
-
-#ifdef CONFIG_HIGHPTE
- .kmap_atomic_pte = xen_kmap_atomic_pte,
-#endif
-
-#ifdef CONFIG_X86_64
- .set_pte = xen_set_pte,
-#else
- .set_pte = xen_set_pte_init,
-#endif
- .set_pte_at = xen_set_pte_at,
- .set_pmd = xen_set_pmd_hyper,
-
- .ptep_modify_prot_start = __ptep_modify_prot_start,
- .ptep_modify_prot_commit = __ptep_modify_prot_commit,
-
- .pte_val = xen_pte_val,
- .pte_flags = native_pte_flags,
- .pgd_val = xen_pgd_val,
-
- .make_pte = xen_make_pte,
- .make_pgd = xen_make_pgd,
-
-#ifdef CONFIG_X86_PAE
- .set_pte_atomic = xen_set_pte_atomic,
- .set_pte_present = xen_set_pte_at,
- .pte_clear = xen_pte_clear,
- .pmd_clear = xen_pmd_clear,
-#endif /* CONFIG_X86_PAE */
- .set_pud = xen_set_pud_hyper,
-
- .make_pmd = xen_make_pmd,
- .pmd_val = xen_pmd_val,
-
-#if PAGETABLE_LEVELS == 4
- .pud_val = xen_pud_val,
- .make_pud = xen_make_pud,
- .set_pgd = xen_set_pgd_hyper,
-
- .alloc_pud = xen_alloc_pte_init,
- .release_pud = xen_release_pte_init,
-#endif /* PAGETABLE_LEVELS == 4 */
-
- .activate_mm = xen_activate_mm,
- .dup_mmap = xen_dup_mmap,
- .exit_mmap = xen_exit_mmap,
-
- .lazy_mode = {
- .enter = paravirt_enter_lazy_mmu,
- .leave = xen_leave_lazy,
- },
-
- .set_fixmap = xen_set_fixmap,
-};
-
static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
@@ -1394,223 +871,6 @@
};
-static void __init xen_reserve_top(void)
-{
-#ifdef CONFIG_X86_32
- unsigned long top = HYPERVISOR_VIRT_START;
- struct xen_platform_parameters pp;
-
- if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
- top = pp.virt_start;
-
- reserve_top_address(-top);
-#endif /* CONFIG_X86_32 */
-}
-
-/*
- * Like __va(), but returns address in the kernel mapping (which is
- * all we have until the physical memory mapping has been set up.
- */
-static void *__ka(phys_addr_t paddr)
-{
-#ifdef CONFIG_X86_64
- return (void *)(paddr + __START_KERNEL_map);
-#else
- return __va(paddr);
-#endif
-}
-
-/* Convert a machine address to physical address */
-static unsigned long m2p(phys_addr_t maddr)
-{
- phys_addr_t paddr;
-
- maddr &= PTE_PFN_MASK;
- paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
-
- return paddr;
-}
-
-/* Convert a machine address to kernel virtual */
-static void *m2v(phys_addr_t maddr)
-{
- return __ka(m2p(maddr));
-}
-
-static void set_page_prot(void *addr, pgprot_t prot)
-{
- unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
- pte_t pte = pfn_pte(pfn, prot);
-
- if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
- BUG();
-}
-
-static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
-{
- unsigned pmdidx, pteidx;
- unsigned ident_pte;
- unsigned long pfn;
-
- ident_pte = 0;
- pfn = 0;
- for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
- pte_t *pte_page;
-
- /* Reuse or allocate a page of ptes */
- if (pmd_present(pmd[pmdidx]))
- pte_page = m2v(pmd[pmdidx].pmd);
- else {
- /* Check for free pte pages */
- if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
- break;
-
- pte_page = &level1_ident_pgt[ident_pte];
- ident_pte += PTRS_PER_PTE;
-
- pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
- }
-
- /* Install mappings */
- for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
- pte_t pte;
-
- if (pfn > max_pfn_mapped)
- max_pfn_mapped = pfn;
-
- if (!pte_none(pte_page[pteidx]))
- continue;
-
- pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
- pte_page[pteidx] = pte;
- }
- }
-
- for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
- set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
-
- set_page_prot(pmd, PAGE_KERNEL_RO);
-}
-
-#ifdef CONFIG_X86_64
-static void convert_pfn_mfn(void *v)
-{
- pte_t *pte = v;
- int i;
-
- /* All levels are converted the same way, so just treat them
- as ptes. */
- for (i = 0; i < PTRS_PER_PTE; i++)
- pte[i] = xen_make_pte(pte[i].pte);
-}
-
-/*
- * Set up the inital kernel pagetable.
- *
- * We can construct this by grafting the Xen provided pagetable into
- * head_64.S's preconstructed pagetables. We copy the Xen L2's into
- * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
- * means that only the kernel has a physical mapping to start with -
- * but that's enough to get __va working. We need to fill in the rest
- * of the physical mapping once some sort of allocator has been set
- * up.
- */
-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
- unsigned long max_pfn)
-{
- pud_t *l3;
- pmd_t *l2;
-
- /* Zap identity mapping */
- init_level4_pgt[0] = __pgd(0);
-
- /* Pre-constructed entries are in pfn, so convert to mfn */
- convert_pfn_mfn(init_level4_pgt);
- convert_pfn_mfn(level3_ident_pgt);
- convert_pfn_mfn(level3_kernel_pgt);
-
- l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
- l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
-
- memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
- memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
-
- l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
- l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
- memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
-
- /* Set up identity map */
- xen_map_identity_early(level2_ident_pgt, max_pfn);
-
- /* Make pagetable pieces RO */
- set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
- set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
- set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
- set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
- set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
- set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
-
- /* Pin down new L4 */
- pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
- PFN_DOWN(__pa_symbol(init_level4_pgt)));
-
- /* Unpin Xen-provided one */
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
-
- /* Switch over */
- pgd = init_level4_pgt;
-
- /*
- * At this stage there can be no user pgd, and no page
- * structure to attach it to, so make sure we just set kernel
- * pgd.
- */
- xen_mc_batch();
- __xen_write_cr3(true, __pa(pgd));
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-
- reserve_early(__pa(xen_start_info->pt_base),
- __pa(xen_start_info->pt_base +
- xen_start_info->nr_pt_frames * PAGE_SIZE),
- "XEN PAGETABLES");
-
- return pgd;
-}
-#else /* !CONFIG_X86_64 */
-static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
-
-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
- unsigned long max_pfn)
-{
- pmd_t *kernel_pmd;
-
- init_pg_tables_start = __pa(pgd);
- init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
- max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
-
- kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
- memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
-
- xen_map_identity_early(level2_kernel_pgt, max_pfn);
-
- memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
- set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
- __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
-
- set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
- set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
- set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
-
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
-
- xen_write_cr3(__pa(swapper_pg_dir));
-
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
-
- return swapper_pg_dir;
-}
-#endif /* CONFIG_X86_64 */
-
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
@@ -1639,7 +899,7 @@
/*
* set up the basic apic ops.
*/
- apic_ops = &xen_basic_apic_ops;
+ set_xen_basic_apic_ops();
#endif
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
@@ -1650,10 +910,18 @@
machine_ops = xen_machine_ops;
#ifdef CONFIG_X86_64
- /* Disable until direct per-cpu data access. */
- have_vcpu_info_placement = 0;
- x86_64_init_pda();
+ /*
+ * Setup percpu state. We only need to do this for 64-bit
+ * because 32-bit already has %fs set properly.
+ */
+ load_percpu_segment(0);
#endif
+ /*
+ * The only reliable way to retain the initial address of the
+ * percpu gdt_page is to remember it here, so we can go and
+ * mark it RW later, when the initial percpu area is freed.
+ */
+ xen_initial_gdt = &per_cpu(gdt_page, 0);
xen_smp_init();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index bb04260..cfd1779 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -19,27 +19,12 @@
(void)HYPERVISOR_xen_version(0, NULL);
}
-static void __init __xen_init_IRQ(void)
-{
- int i;
-
- /* Create identity vector->irq map */
- for(i = 0; i < NR_VECTORS; i++) {
- int cpu;
-
- for_each_possible_cpu(cpu)
- per_cpu(vector_irq, cpu)[i] = i;
- }
-
- xen_init_IRQ();
-}
-
static unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
- vcpu = x86_read_percpu(xen_vcpu);
+ vcpu = percpu_read(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
@@ -50,6 +35,7 @@
*/
return (-flags) & X86_EFLAGS_IF;
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
static void xen_restore_fl(unsigned long flags)
{
@@ -62,7 +48,7 @@
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
- vcpu = x86_read_percpu(xen_vcpu);
+ vcpu = percpu_read(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
preempt_enable_no_resched();
@@ -76,6 +62,7 @@
xen_force_evtchn_callback();
}
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
static void xen_irq_disable(void)
{
@@ -83,9 +70,10 @@
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
- x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
+ percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
static void xen_irq_enable(void)
{
@@ -96,7 +84,7 @@
the caller is confused and is trying to re-enable interrupts
on an indeterminate processor. */
- vcpu = x86_read_percpu(xen_vcpu);
+ vcpu = percpu_read(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
/* Doesn't matter if we get preempted here, because any
@@ -106,6 +94,7 @@
if (unlikely(vcpu->evtchn_upcall_pending))
xen_force_evtchn_callback();
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
static void xen_safe_halt(void)
{
@@ -123,11 +112,13 @@
}
static const struct pv_irq_ops xen_irq_ops __initdata = {
- .init_IRQ = __xen_init_IRQ,
- .save_fl = xen_save_fl,
- .restore_fl = xen_restore_fl,
- .irq_disable = xen_irq_disable,
- .irq_enable = xen_irq_enable,
+ .init_IRQ = xen_init_IRQ,
+
+ .save_fl = PV_CALLEE_SAVE(xen_save_fl),
+ .restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
+ .irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
+ .irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
+
.safe_halt = xen_safe_halt,
.halt = xen_halt,
#ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 503c240..319bd40 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -47,6 +47,7 @@
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
#include <asm/mmu_context.h>
+#include <asm/setup.h>
#include <asm/paravirt.h>
#include <asm/linkage.h>
@@ -55,6 +56,8 @@
#include <xen/page.h>
#include <xen/interface/xen.h>
+#include <xen/interface/version.h>
+#include <xen/hvc-console.h>
#include "multicalls.h"
#include "mmu.h"
@@ -114,6 +117,37 @@
#endif /* CONFIG_XEN_DEBUG_FS */
+
+/*
+ * Identity map, in addition to plain kernel map. This needs to be
+ * large enough to allocate page table pages to allocate the rest.
+ * Each page can map 2MB.
+ */
+static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+
+#ifdef CONFIG_X86_64
+/* l3 pud for userspace vsyscall mapping */
+static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
+#endif /* CONFIG_X86_64 */
+
+/*
+ * Note about cr3 (pagetable base) values:
+ *
+ * xen_cr3 contains the current logical cr3 value; it contains the
+ * last set cr3. This may not be the current effective cr3, because
+ * its update may be being lazily deferred. However, a vcpu looking
+ * at its own cr3 can use this value knowing that it everything will
+ * be self-consistent.
+ *
+ * xen_current_cr3 contains the actual vcpu cr3; it is set once the
+ * hypercall to set the vcpu cr3 is complete (so it may be a little
+ * out of date, but it will never be set early). If one vcpu is
+ * looking at another vcpu's cr3 value, it should use this variable.
+ */
+DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
+DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
+
+
/*
* Just beyond the highest usermode address. STACK_TOP_MAX has a
* redzone above it, so round it up to a PGD boundary.
@@ -458,28 +492,33 @@
{
return pte_mfn_to_pfn(pte.pte);
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
pgdval_t xen_pgd_val(pgd_t pgd)
{
return pte_mfn_to_pfn(pgd.pgd);
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
pte_t xen_make_pte(pteval_t pte)
{
pte = pte_pfn_to_mfn(pte);
return native_make_pte(pte);
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
return native_make_pgd(pgd);
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
pmdval_t xen_pmd_val(pmd_t pmd)
{
return pte_mfn_to_pfn(pmd.pmd);
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
void xen_set_pud_hyper(pud_t *ptr, pud_t val)
{
@@ -556,12 +595,14 @@
pmd = pte_pfn_to_mfn(pmd);
return native_make_pmd(pmd);
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
#if PAGETABLE_LEVELS == 4
pudval_t xen_pud_val(pud_t pud)
{
return pte_mfn_to_pfn(pud.pud);
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
pud_t xen_make_pud(pudval_t pud)
{
@@ -569,6 +610,7 @@
return native_make_pud(pud);
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
pgd_t *xen_get_user_pgd(pgd_t *pgd)
{
@@ -1063,18 +1105,14 @@
struct mm_struct *mm = info;
struct mm_struct *active_mm;
-#ifdef CONFIG_X86_64
- active_mm = read_pda(active_mm);
-#else
- active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
-#endif
+ active_mm = percpu_read(cpu_tlbstate.active_mm);
if (active_mm == mm)
leave_mm(smp_processor_id());
/* If this cpu still has a stale cr3 reference, then make sure
it has been flushed. */
- if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
+ if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
load_cr3(swapper_pg_dir);
arch_flush_lazy_cpu_mode();
}
@@ -1156,6 +1194,706 @@
spin_unlock(&mm->page_table_lock);
}
+static __init void xen_pagetable_setup_start(pgd_t *base)
+{
+}
+
+static __init void xen_pagetable_setup_done(pgd_t *base)
+{
+ xen_setup_shared_info();
+}
+
+static void xen_write_cr2(unsigned long cr2)
+{
+ percpu_read(xen_vcpu)->arch.cr2 = cr2;
+}
+
+static unsigned long xen_read_cr2(void)
+{
+ return percpu_read(xen_vcpu)->arch.cr2;
+}
+
+unsigned long xen_read_cr2_direct(void)
+{
+ return percpu_read(xen_vcpu_info.arch.cr2);
+}
+
+static void xen_flush_tlb(void)
+{
+ struct mmuext_op *op;
+ struct multicall_space mcs;
+
+ preempt_disable();
+
+ mcs = xen_mc_entry(sizeof(*op));
+
+ op = mcs.args;
+ op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
+ MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+ preempt_enable();
+}
+
+static void xen_flush_tlb_single(unsigned long addr)
+{
+ struct mmuext_op *op;
+ struct multicall_space mcs;
+
+ preempt_disable();
+
+ mcs = xen_mc_entry(sizeof(*op));
+ op = mcs.args;
+ op->cmd = MMUEXT_INVLPG_LOCAL;
+ op->arg1.linear_addr = addr & PAGE_MASK;
+ MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+ preempt_enable();
+}
+
+static void xen_flush_tlb_others(const struct cpumask *cpus,
+ struct mm_struct *mm, unsigned long va)
+{
+ struct {
+ struct mmuext_op op;
+ DECLARE_BITMAP(mask, NR_CPUS);
+ } *args;
+ struct multicall_space mcs;
+
+ BUG_ON(cpumask_empty(cpus));
+ BUG_ON(!mm);
+
+ mcs = xen_mc_entry(sizeof(*args));
+ args = mcs.args;
+ args->op.arg2.vcpumask = to_cpumask(args->mask);
+
+ /* Remove us, and any offline CPUS. */
+ cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
+
+ if (va == TLB_FLUSH_ALL) {
+ args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
+ } else {
+ args->op.cmd = MMUEXT_INVLPG_MULTI;
+ args->op.arg1.linear_addr = va;
+ }
+
+ MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+}
+
+static unsigned long xen_read_cr3(void)
+{
+ return percpu_read(xen_cr3);
+}
+
+static void set_current_cr3(void *v)
+{
+ percpu_write(xen_current_cr3, (unsigned long)v);
+}
+
+static void __xen_write_cr3(bool kernel, unsigned long cr3)
+{
+ struct mmuext_op *op;
+ struct multicall_space mcs;
+ unsigned long mfn;
+
+ if (cr3)
+ mfn = pfn_to_mfn(PFN_DOWN(cr3));
+ else
+ mfn = 0;
+
+ WARN_ON(mfn == 0 && kernel);
+
+ mcs = __xen_mc_entry(sizeof(*op));
+
+ op = mcs.args;
+ op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
+ op->arg1.mfn = mfn;
+
+ MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+ if (kernel) {
+ percpu_write(xen_cr3, cr3);
+
+ /* Update xen_current_cr3 once the batch has actually
+ been submitted. */
+ xen_mc_callback(set_current_cr3, (void *)cr3);
+ }
+}
+
+static void xen_write_cr3(unsigned long cr3)
+{
+ BUG_ON(preemptible());
+
+ xen_mc_batch(); /* disables interrupts */
+
+ /* Update while interrupts are disabled, so its atomic with
+ respect to ipis */
+ percpu_write(xen_cr3, cr3);
+
+ __xen_write_cr3(true, cr3);
+
+#ifdef CONFIG_X86_64
+ {
+ pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
+ if (user_pgd)
+ __xen_write_cr3(false, __pa(user_pgd));
+ else
+ __xen_write_cr3(false, 0);
+ }
+#endif
+
+ xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
+}
+
+static int xen_pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd = mm->pgd;
+ int ret = 0;
+
+ BUG_ON(PagePinned(virt_to_page(pgd)));
+
+#ifdef CONFIG_X86_64
+ {
+ struct page *page = virt_to_page(pgd);
+ pgd_t *user_pgd;
+
+ BUG_ON(page->private != 0);
+
+ ret = -ENOMEM;
+
+ user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ page->private = (unsigned long)user_pgd;
+
+ if (user_pgd != NULL) {
+ user_pgd[pgd_index(VSYSCALL_START)] =
+ __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+ ret = 0;
+ }
+
+ BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
+ }
+#endif
+
+ return ret;
+}
+
+static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+#ifdef CONFIG_X86_64
+ pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
+ if (user_pgd)
+ free_page((unsigned long)user_pgd);
+#endif
+}
+
+#ifdef CONFIG_HIGHPTE
+static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
+{
+ pgprot_t prot = PAGE_KERNEL;
+
+ if (PagePinned(page))
+ prot = PAGE_KERNEL_RO;
+
+ if (0 && PageHighMem(page))
+ printk("mapping highpte %lx type %d prot %s\n",
+ page_to_pfn(page), type,
+ (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
+
+ return kmap_atomic_prot(page, type, prot);
+}
+#endif
+
+#ifdef CONFIG_X86_32
+static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+{
+ /* If there's an existing pte, then don't allow _PAGE_RW to be set */
+ if (pte_val_ma(*ptep) & _PAGE_PRESENT)
+ pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
+ pte_val_ma(pte));
+
+ return pte;
+}
+
+/* Init-time set_pte while constructing initial pagetables, which
+ doesn't allow RO pagetable pages to be remapped RW */
+static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
+{
+ pte = mask_rw_pte(ptep, pte);
+
+ xen_set_pte(ptep, pte);
+}
+#endif
+
+/* Early in boot, while setting up the initial pagetable, assume
+ everything is pinned. */
+static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
+{
+#ifdef CONFIG_FLATMEM
+ BUG_ON(mem_map); /* should only be used early */
+#endif
+ make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
+}
+
+/* Early release_pte assumes that all pts are pinned, since there's
+ only init_mm and anything attached to that is pinned. */
+static void xen_release_pte_init(unsigned long pfn)
+{
+ make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+}
+
+static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+{
+ struct mmuext_op op;
+ op.cmd = cmd;
+ op.arg1.mfn = pfn_to_mfn(pfn);
+ if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+ BUG();
+}
+
+/* This needs to make sure the new pte page is pinned iff its being
+ attached to a pinned pagetable. */
+static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
+{
+ struct page *page = pfn_to_page(pfn);
+
+ if (PagePinned(virt_to_page(mm->pgd))) {
+ SetPagePinned(page);
+
+ vm_unmap_aliases();
+ if (!PageHighMem(page)) {
+ make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
+ if (level == PT_PTE && USE_SPLIT_PTLOCKS)
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+ } else {
+ /* make sure there are no stray mappings of
+ this page */
+ kmap_flush_unused();
+ }
+ }
+}
+
+static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
+{
+ xen_alloc_ptpage(mm, pfn, PT_PTE);
+}
+
+static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
+{
+ xen_alloc_ptpage(mm, pfn, PT_PMD);
+}
+
+/* This should never happen until we're OK to use struct page */
+static void xen_release_ptpage(unsigned long pfn, unsigned level)
+{
+ struct page *page = pfn_to_page(pfn);
+
+ if (PagePinned(page)) {
+ if (!PageHighMem(page)) {
+ if (level == PT_PTE && USE_SPLIT_PTLOCKS)
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
+ make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+ }
+ ClearPagePinned(page);
+ }
+}
+
+static void xen_release_pte(unsigned long pfn)
+{
+ xen_release_ptpage(pfn, PT_PTE);
+}
+
+static void xen_release_pmd(unsigned long pfn)
+{
+ xen_release_ptpage(pfn, PT_PMD);
+}
+
+#if PAGETABLE_LEVELS == 4
+static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
+{
+ xen_alloc_ptpage(mm, pfn, PT_PUD);
+}
+
+static void xen_release_pud(unsigned long pfn)
+{
+ xen_release_ptpage(pfn, PT_PUD);
+}
+#endif
+
+void __init xen_reserve_top(void)
+{
+#ifdef CONFIG_X86_32
+ unsigned long top = HYPERVISOR_VIRT_START;
+ struct xen_platform_parameters pp;
+
+ if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
+ top = pp.virt_start;
+
+ reserve_top_address(-top);
+#endif /* CONFIG_X86_32 */
+}
+
+/*
+ * Like __va(), but returns address in the kernel mapping (which is
+ * all we have until the physical memory mapping has been set up.
+ */
+static void *__ka(phys_addr_t paddr)
+{
+#ifdef CONFIG_X86_64
+ return (void *)(paddr + __START_KERNEL_map);
+#else
+ return __va(paddr);
+#endif
+}
+
+/* Convert a machine address to physical address */
+static unsigned long m2p(phys_addr_t maddr)
+{
+ phys_addr_t paddr;
+
+ maddr &= PTE_PFN_MASK;
+ paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
+
+ return paddr;
+}
+
+/* Convert a machine address to kernel virtual */
+static void *m2v(phys_addr_t maddr)
+{
+ return __ka(m2p(maddr));
+}
+
+static void set_page_prot(void *addr, pgprot_t prot)
+{
+ unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
+ pte_t pte = pfn_pte(pfn, prot);
+
+ if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+ BUG();
+}
+
+static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+{
+ unsigned pmdidx, pteidx;
+ unsigned ident_pte;
+ unsigned long pfn;
+
+ ident_pte = 0;
+ pfn = 0;
+ for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
+ pte_t *pte_page;
+
+ /* Reuse or allocate a page of ptes */
+ if (pmd_present(pmd[pmdidx]))
+ pte_page = m2v(pmd[pmdidx].pmd);
+ else {
+ /* Check for free pte pages */
+ if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
+ break;
+
+ pte_page = &level1_ident_pgt[ident_pte];
+ ident_pte += PTRS_PER_PTE;
+
+ pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
+ }
+
+ /* Install mappings */
+ for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
+ pte_t pte;
+
+ if (pfn > max_pfn_mapped)
+ max_pfn_mapped = pfn;
+
+ if (!pte_none(pte_page[pteidx]))
+ continue;
+
+ pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
+ pte_page[pteidx] = pte;
+ }
+ }
+
+ for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
+ set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+
+ set_page_prot(pmd, PAGE_KERNEL_RO);
+}
+
+#ifdef CONFIG_X86_64
+static void convert_pfn_mfn(void *v)
+{
+ pte_t *pte = v;
+ int i;
+
+ /* All levels are converted the same way, so just treat them
+ as ptes. */
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ pte[i] = xen_make_pte(pte[i].pte);
+}
+
+/*
+ * Set up the inital kernel pagetable.
+ *
+ * We can construct this by grafting the Xen provided pagetable into
+ * head_64.S's preconstructed pagetables. We copy the Xen L2's into
+ * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
+ * means that only the kernel has a physical mapping to start with -
+ * but that's enough to get __va working. We need to fill in the rest
+ * of the physical mapping once some sort of allocator has been set
+ * up.
+ */
+__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+ unsigned long max_pfn)
+{
+ pud_t *l3;
+ pmd_t *l2;
+
+ /* Zap identity mapping */
+ init_level4_pgt[0] = __pgd(0);
+
+ /* Pre-constructed entries are in pfn, so convert to mfn */
+ convert_pfn_mfn(init_level4_pgt);
+ convert_pfn_mfn(level3_ident_pgt);
+ convert_pfn_mfn(level3_kernel_pgt);
+
+ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
+ l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
+
+ memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+ memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+
+ l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
+ l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
+ memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+
+ /* Set up identity map */
+ xen_map_identity_early(level2_ident_pgt, max_pfn);
+
+ /* Make pagetable pieces RO */
+ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+ set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+ set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+ set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+ set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+ set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+
+ /* Pin down new L4 */
+ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+ PFN_DOWN(__pa_symbol(init_level4_pgt)));
+
+ /* Unpin Xen-provided one */
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+
+ /* Switch over */
+ pgd = init_level4_pgt;
+
+ /*
+ * At this stage there can be no user pgd, and no page
+ * structure to attach it to, so make sure we just set kernel
+ * pgd.
+ */
+ xen_mc_batch();
+ __xen_write_cr3(true, __pa(pgd));
+ xen_mc_issue(PARAVIRT_LAZY_CPU);
+
+ reserve_early(__pa(xen_start_info->pt_base),
+ __pa(xen_start_info->pt_base +
+ xen_start_info->nr_pt_frames * PAGE_SIZE),
+ "XEN PAGETABLES");
+
+ return pgd;
+}
+#else /* !CONFIG_X86_64 */
+static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
+
+__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+ unsigned long max_pfn)
+{
+ pmd_t *kernel_pmd;
+
+ init_pg_tables_start = __pa(pgd);
+ init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
+ max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+
+ kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
+ memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+
+ xen_map_identity_early(level2_kernel_pgt, max_pfn);
+
+ memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+ set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
+ __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+
+ set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+ set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+ set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+
+ xen_write_cr3(__pa(swapper_pg_dir));
+
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+
+ return swapper_pg_dir;
+}
+#endif /* CONFIG_X86_64 */
+
+static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
+{
+ pte_t pte;
+
+ phys >>= PAGE_SHIFT;
+
+ switch (idx) {
+ case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
+#ifdef CONFIG_X86_F00F_BUG
+ case FIX_F00F_IDT:
+#endif
+#ifdef CONFIG_X86_32
+ case FIX_WP_TEST:
+ case FIX_VDSO:
+# ifdef CONFIG_HIGHMEM
+ case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+# endif
+#else
+ case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+ case FIX_APIC_BASE: /* maps dummy local APIC */
+#endif
+ pte = pfn_pte(phys, prot);
+ break;
+
+ default:
+ pte = mfn_pte(phys, prot);
+ break;
+ }
+
+ __native_set_fixmap(idx, pte);
+
+#ifdef CONFIG_X86_64
+ /* Replicate changes to map the vsyscall page into the user
+ pagetable vsyscall mapping. */
+ if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+ unsigned long vaddr = __fix_to_virt(idx);
+ set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
+ }
+#endif
+}
+
+__init void xen_post_allocator_init(void)
+{
+ pv_mmu_ops.set_pte = xen_set_pte;
+ pv_mmu_ops.set_pmd = xen_set_pmd;
+ pv_mmu_ops.set_pud = xen_set_pud;
+#if PAGETABLE_LEVELS == 4
+ pv_mmu_ops.set_pgd = xen_set_pgd;
+#endif
+
+ /* This will work as long as patching hasn't happened yet
+ (which it hasn't) */
+ pv_mmu_ops.alloc_pte = xen_alloc_pte;
+ pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
+ pv_mmu_ops.release_pte = xen_release_pte;
+ pv_mmu_ops.release_pmd = xen_release_pmd;
+#if PAGETABLE_LEVELS == 4
+ pv_mmu_ops.alloc_pud = xen_alloc_pud;
+ pv_mmu_ops.release_pud = xen_release_pud;
+#endif
+
+#ifdef CONFIG_X86_64
+ SetPagePinned(virt_to_page(level3_user_vsyscall));
+#endif
+ xen_mark_init_mm_pinned();
+}
+
+
+const struct pv_mmu_ops xen_mmu_ops __initdata = {
+ .pagetable_setup_start = xen_pagetable_setup_start,
+ .pagetable_setup_done = xen_pagetable_setup_done,
+
+ .read_cr2 = xen_read_cr2,
+ .write_cr2 = xen_write_cr2,
+
+ .read_cr3 = xen_read_cr3,
+ .write_cr3 = xen_write_cr3,
+
+ .flush_tlb_user = xen_flush_tlb,
+ .flush_tlb_kernel = xen_flush_tlb,
+ .flush_tlb_single = xen_flush_tlb_single,
+ .flush_tlb_others = xen_flush_tlb_others,
+
+ .pte_update = paravirt_nop,
+ .pte_update_defer = paravirt_nop,
+
+ .pgd_alloc = xen_pgd_alloc,
+ .pgd_free = xen_pgd_free,
+
+ .alloc_pte = xen_alloc_pte_init,
+ .release_pte = xen_release_pte_init,
+ .alloc_pmd = xen_alloc_pte_init,
+ .alloc_pmd_clone = paravirt_nop,
+ .release_pmd = xen_release_pte_init,
+
+#ifdef CONFIG_HIGHPTE
+ .kmap_atomic_pte = xen_kmap_atomic_pte,
+#endif
+
+#ifdef CONFIG_X86_64
+ .set_pte = xen_set_pte,
+#else
+ .set_pte = xen_set_pte_init,
+#endif
+ .set_pte_at = xen_set_pte_at,
+ .set_pmd = xen_set_pmd_hyper,
+
+ .ptep_modify_prot_start = __ptep_modify_prot_start,
+ .ptep_modify_prot_commit = __ptep_modify_prot_commit,
+
+ .pte_val = PV_CALLEE_SAVE(xen_pte_val),
+ .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
+
+ .make_pte = PV_CALLEE_SAVE(xen_make_pte),
+ .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
+
+#ifdef CONFIG_X86_PAE
+ .set_pte_atomic = xen_set_pte_atomic,
+ .set_pte_present = xen_set_pte_at,
+ .pte_clear = xen_pte_clear,
+ .pmd_clear = xen_pmd_clear,
+#endif /* CONFIG_X86_PAE */
+ .set_pud = xen_set_pud_hyper,
+
+ .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
+ .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
+
+#if PAGETABLE_LEVELS == 4
+ .pud_val = PV_CALLEE_SAVE(xen_pud_val),
+ .make_pud = PV_CALLEE_SAVE(xen_make_pud),
+ .set_pgd = xen_set_pgd_hyper,
+
+ .alloc_pud = xen_alloc_pte_init,
+ .release_pud = xen_release_pte_init,
+#endif /* PAGETABLE_LEVELS == 4 */
+
+ .activate_mm = xen_activate_mm,
+ .dup_mmap = xen_dup_mmap,
+ .exit_mmap = xen_exit_mmap,
+
+ .lazy_mode = {
+ .enter = paravirt_enter_lazy_mmu,
+ .leave = xen_leave_lazy,
+ },
+
+ .set_fixmap = xen_set_fixmap,
+};
+
+
#ifdef CONFIG_XEN_DEBUG_FS
static struct dentry *d_mmu_debug;
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 98d7165..24d1b44 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -54,4 +54,7 @@
void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
+unsigned long xen_read_cr2_direct(void);
+
+extern const struct pv_mmu_ops xen_mmu_ops;
#endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index c738644..8bff7e7 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -39,6 +39,7 @@
struct multicall_entry entries[MC_BATCH];
#if MC_DEBUG
struct multicall_entry debug[MC_BATCH];
+ void *caller[MC_BATCH];
#endif
unsigned char args[MC_ARGS];
struct callback {
@@ -154,11 +155,12 @@
ret, smp_processor_id());
dump_stack();
for (i = 0; i < b->mcidx; i++) {
- printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
+ printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
i+1, b->mcidx,
b->debug[i].op,
b->debug[i].args[0],
- b->entries[i].result);
+ b->entries[i].result,
+ b->caller[i]);
}
}
#endif
@@ -168,8 +170,6 @@
} else
BUG_ON(b->argidx != 0);
- local_irq_restore(flags);
-
for (i = 0; i < b->cbidx; i++) {
struct callback *cb = &b->callbacks[i];
@@ -177,7 +177,9 @@
}
b->cbidx = 0;
- BUG_ON(ret);
+ local_irq_restore(flags);
+
+ WARN_ON(ret);
}
struct multicall_space __xen_mc_entry(size_t args)
@@ -197,6 +199,9 @@
}
ret.mc = &b->entries[b->mcidx];
+#ifdef MC_DEBUG
+ b->caller[b->mcidx] = __builtin_return_address(0);
+#endif
b->mcidx++;
ret.args = &b->args[argidx];
b->argidx = argidx + args;
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index fa3e107..9e565da 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -41,7 +41,7 @@
xen_mc_flush();
/* restore flags saved in xen_mc_batch */
- local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
+ local_irq_restore(percpu_read(xen_mc_irq_flags));
}
/* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c44e206..035582a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -50,11 +50,7 @@
*/
static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
{
-#ifdef CONFIG_X86_32
- __get_cpu_var(irq_stat).irq_resched_count++;
-#else
- add_pda(irq_resched_count, 1);
-#endif
+ inc_irq_stat(irq_resched_count);
return IRQ_HANDLED;
}
@@ -78,7 +74,7 @@
xen_setup_cpu_clockevents();
cpu_set(cpu, cpu_online_map);
- x86_write_percpu(cpu_state, CPU_ONLINE);
+ percpu_write(cpu_state, CPU_ONLINE);
wmb();
/* We can take interrupts now: we're officially "up". */
@@ -174,7 +170,7 @@
/* We've switched to the "real" per-cpu gdt, so make sure the
old memory can be recycled */
- make_lowmem_page_readwrite(&per_cpu_var(gdt_page));
+ make_lowmem_page_readwrite(xen_initial_gdt);
xen_setup_vcpu_info_placement();
}
@@ -239,6 +235,8 @@
ctxt->user_regs.ss = __KERNEL_DS;
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
+#else
+ ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
@@ -283,23 +281,14 @@
struct task_struct *idle = idle_task(cpu);
int rc;
-#ifdef CONFIG_X86_64
- /* Allocate node local memory for AP pdas */
- WARN_ON(cpu == 0);
- if (cpu > 0) {
- rc = get_local_pda(cpu);
- if (rc)
- return rc;
- }
-#endif
-
-#ifdef CONFIG_X86_32
- init_gdt(cpu);
per_cpu(current_task, cpu) = idle;
+#ifdef CONFIG_X86_32
irq_ctx_init(cpu);
#else
- cpu_pda(cpu)->pcurrent = idle;
clear_tsk_thread_flag(idle, TIF_FORK);
+ per_cpu(kernel_stack, cpu) =
+ (unsigned long)task_stack_page(idle) -
+ KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
xen_setup_timer(cpu);
xen_init_lock_cpu(cpu);
@@ -445,11 +434,7 @@
{
irq_enter();
generic_smp_call_function_interrupt();
-#ifdef CONFIG_X86_32
- __get_cpu_var(irq_stat).irq_call_count++;
-#else
- add_pda(irq_call_count, 1);
-#endif
+ inc_irq_stat(irq_call_count);
irq_exit();
return IRQ_HANDLED;
@@ -459,11 +444,7 @@
{
irq_enter();
generic_smp_call_function_single_interrupt();
-#ifdef CONFIG_X86_32
- __get_cpu_var(irq_stat).irq_call_count++;
-#else
- add_pda(irq_call_count, 1);
-#endif
+ inc_irq_stat(irq_call_count);
irq_exit();
return IRQ_HANDLED;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 212ffe0..95be7b4 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -6,6 +6,7 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
+#include <asm/fixmap.h>
#include "xen-ops.h"
#include "mmu.h"
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
new file mode 100644
index 0000000..79d7362
--- /dev/null
+++ b/arch/x86/xen/xen-asm.S
@@ -0,0 +1,142 @@
+/*
+ * Asm versions of Xen pv-ops, suitable for either direct use or
+ * inlining. The inline versions are the same as the direct-use
+ * versions, with the pre- and post-amble chopped off.
+ *
+ * This code is encoded for size rather than absolute efficiency, with
+ * a view to being able to inline as much as possible.
+ *
+ * We only bother with direct forms (ie, vcpu in percpu data) of the
+ * operations here; the indirect forms are better handled in C, since
+ * they're generally too large to inline anyway.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
+
+#include "xen-asm.h"
+
+/*
+ * Enable events. This clears the event mask and tests the pending
+ * event status with one and operation. If there are pending events,
+ * then enter the hypervisor to get them handled.
+ */
+ENTRY(xen_irq_enable_direct)
+ /* Unmask events */
+ movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+
+ /*
+ * Preempt here doesn't matter because that will deal with any
+ * pending interrupts. The pending check may end up being run
+ * on the wrong CPU, but that doesn't hurt.
+ */
+
+ /* Test for pending */
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+ jz 1f
+
+2: call check_events
+1:
+ENDPATCH(xen_irq_enable_direct)
+ ret
+ ENDPROC(xen_irq_enable_direct)
+ RELOC(xen_irq_enable_direct, 2b+1)
+
+
+/*
+ * Disabling events is simply a matter of making the event mask
+ * non-zero.
+ */
+ENTRY(xen_irq_disable_direct)
+ movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ENDPATCH(xen_irq_disable_direct)
+ ret
+ ENDPROC(xen_irq_disable_direct)
+ RELOC(xen_irq_disable_direct, 0)
+
+/*
+ * (xen_)save_fl is used to get the current interrupt enable status.
+ * Callers expect the status to be in X86_EFLAGS_IF, and other bits
+ * may be set in the return value. We take advantage of this by
+ * making sure that X86_EFLAGS_IF has the right value (and other bits
+ * in that byte are 0), but other bits in the return value are
+ * undefined. We need to toggle the state of the bit, because Xen and
+ * x86 use opposite senses (mask vs enable).
+ */
+ENTRY(xen_save_fl_direct)
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ setz %ah
+ addb %ah, %ah
+ENDPATCH(xen_save_fl_direct)
+ ret
+ ENDPROC(xen_save_fl_direct)
+ RELOC(xen_save_fl_direct, 0)
+
+
+/*
+ * In principle the caller should be passing us a value return from
+ * xen_save_fl_direct, but for robustness sake we test only the
+ * X86_EFLAGS_IF flag rather than the whole byte. After setting the
+ * interrupt mask state, it checks for unmasked pending events and
+ * enters the hypervisor to get them delivered if so.
+ */
+ENTRY(xen_restore_fl_direct)
+#ifdef CONFIG_X86_64
+ testw $X86_EFLAGS_IF, %di
+#else
+ testb $X86_EFLAGS_IF>>8, %ah
+#endif
+ setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ /*
+ * Preempt here doesn't matter because that will deal with any
+ * pending interrupts. The pending check may end up being run
+ * on the wrong CPU, but that doesn't hurt.
+ */
+
+ /* check for unmasked and pending */
+ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+ jz 1f
+2: call check_events
+1:
+ENDPATCH(xen_restore_fl_direct)
+ ret
+ ENDPROC(xen_restore_fl_direct)
+ RELOC(xen_restore_fl_direct, 2b+1)
+
+
+/*
+ * Force an event check by making a hypercall, but preserve regs
+ * before making the call.
+ */
+check_events:
+#ifdef CONFIG_X86_32
+ push %eax
+ push %ecx
+ push %edx
+ call xen_force_evtchn_callback
+ pop %edx
+ pop %ecx
+ pop %eax
+#else
+ push %rax
+ push %rcx
+ push %rdx
+ push %rsi
+ push %rdi
+ push %r8
+ push %r9
+ push %r10
+ push %r11
+ call xen_force_evtchn_callback
+ pop %r11
+ pop %r10
+ pop %r9
+ pop %r8
+ pop %rdi
+ pop %rsi
+ pop %rdx
+ pop %rcx
+ pop %rax
+#endif
+ ret
diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h
new file mode 100644
index 0000000..4652764
--- /dev/null
+++ b/arch/x86/xen/xen-asm.h
@@ -0,0 +1,12 @@
+#ifndef _XEN_XEN_ASM_H
+#define _XEN_XEN_ASM_H
+
+#include <linux/linkage.h>
+
+#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
+#define ENDPATCH(x) .globl x##_end; x##_end=.
+
+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+#define XEN_EFLAGS_NMI 0x80000000
+
+#endif
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 42786f5..88e15de 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -1,298 +1,27 @@
/*
- Asm versions of Xen pv-ops, suitable for either direct use or inlining.
- The inline versions are the same as the direct-use versions, with the
- pre- and post-amble chopped off.
-
- This code is encoded for size rather than absolute efficiency,
- with a view to being able to inline as much as possible.
-
- We only bother with direct forms (ie, vcpu in pda) of the operations
- here; the indirect forms are better handled in C, since they're
- generally too large to inline anyway.
+ * Asm versions of Xen pv-ops, suitable for either direct use or
+ * inlining. The inline versions are the same as the direct-use
+ * versions, with the pre- and post-amble chopped off.
+ *
+ * This code is encoded for size rather than absolute efficiency, with
+ * a view to being able to inline as much as possible.
+ *
+ * We only bother with direct forms (ie, vcpu in pda) of the
+ * operations here; the indirect forms are better handled in C, since
+ * they're generally too large to inline anyway.
*/
-#include <linux/linkage.h>
-
-#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
-#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
#include <xen/interface/xen.h>
-#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
-#define ENDPATCH(x) .globl x##_end; x##_end=.
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI 0x80000000
+#include "xen-asm.h"
/*
- Enable events. This clears the event mask and tests the pending
- event status with one and operation. If there are pending
- events, then enter the hypervisor to get them handled.
- */
-ENTRY(xen_irq_enable_direct)
- /* Unmask events */
- movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-
- /* Preempt here doesn't matter because that will deal with
- any pending interrupts. The pending check may end up being
- run on the wrong CPU, but that doesn't hurt. */
-
- /* Test for pending */
- testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
- jz 1f
-
-2: call check_events
-1:
-ENDPATCH(xen_irq_enable_direct)
- ret
- ENDPROC(xen_irq_enable_direct)
- RELOC(xen_irq_enable_direct, 2b+1)
-
-
-/*
- Disabling events is simply a matter of making the event mask
- non-zero.
- */
-ENTRY(xen_irq_disable_direct)
- movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-ENDPATCH(xen_irq_disable_direct)
- ret
- ENDPROC(xen_irq_disable_direct)
- RELOC(xen_irq_disable_direct, 0)
-
-/*
- (xen_)save_fl is used to get the current interrupt enable status.
- Callers expect the status to be in X86_EFLAGS_IF, and other bits
- may be set in the return value. We take advantage of this by
- making sure that X86_EFLAGS_IF has the right value (and other bits
- in that byte are 0), but other bits in the return value are
- undefined. We need to toggle the state of the bit, because
- Xen and x86 use opposite senses (mask vs enable).
- */
-ENTRY(xen_save_fl_direct)
- testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
- setz %ah
- addb %ah,%ah
-ENDPATCH(xen_save_fl_direct)
- ret
- ENDPROC(xen_save_fl_direct)
- RELOC(xen_save_fl_direct, 0)
-
-
-/*
- In principle the caller should be passing us a value return
- from xen_save_fl_direct, but for robustness sake we test only
- the X86_EFLAGS_IF flag rather than the whole byte. After
- setting the interrupt mask state, it checks for unmasked
- pending events and enters the hypervisor to get them delivered
- if so.
- */
-ENTRY(xen_restore_fl_direct)
- testb $X86_EFLAGS_IF>>8, %ah
- setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
- /* Preempt here doesn't matter because that will deal with
- any pending interrupts. The pending check may end up being
- run on the wrong CPU, but that doesn't hurt. */
-
- /* check for unmasked and pending */
- cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
- jz 1f
-2: call check_events
-1:
-ENDPATCH(xen_restore_fl_direct)
- ret
- ENDPROC(xen_restore_fl_direct)
- RELOC(xen_restore_fl_direct, 2b+1)
-
-/*
- We can't use sysexit directly, because we're not running in ring0.
- But we can easily fake it up using iret. Assuming xen_sysexit
- is jumped to with a standard stack frame, we can just strip it
- back to a standard iret frame and use iret.
- */
-ENTRY(xen_sysexit)
- movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
- orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
- lea PT_EIP(%esp), %esp
-
- jmp xen_iret
-ENDPROC(xen_sysexit)
-
-/*
- This is run where a normal iret would be run, with the same stack setup:
- 8: eflags
- 4: cs
- esp-> 0: eip
-
- This attempts to make sure that any pending events are dealt
- with on return to usermode, but there is a small window in
- which an event can happen just before entering usermode. If
- the nested interrupt ends up setting one of the TIF_WORK_MASK
- pending work flags, they will not be tested again before
- returning to usermode. This means that a process can end up
- with pending work, which will be unprocessed until the process
- enters and leaves the kernel again, which could be an
- unbounded amount of time. This means that a pending signal or
- reschedule event could be indefinitely delayed.
-
- The fix is to notice a nested interrupt in the critical
- window, and if one occurs, then fold the nested interrupt into
- the current interrupt stack frame, and re-process it
- iteratively rather than recursively. This means that it will
- exit via the normal path, and all pending work will be dealt
- with appropriately.
-
- Because the nested interrupt handler needs to deal with the
- current stack state in whatever form its in, we keep things
- simple by only using a single register which is pushed/popped
- on the stack.
- */
-ENTRY(xen_iret)
- /* test eflags for special cases */
- testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
- jnz hyper_iret
-
- push %eax
- ESP_OFFSET=4 # bytes pushed onto stack
-
- /* Store vcpu_info pointer for easy access. Do it this
- way to avoid having to reload %fs */
-#ifdef CONFIG_SMP
- GET_THREAD_INFO(%eax)
- movl TI_cpu(%eax),%eax
- movl __per_cpu_offset(,%eax,4),%eax
- mov per_cpu__xen_vcpu(%eax),%eax
-#else
- movl per_cpu__xen_vcpu, %eax
-#endif
-
- /* check IF state we're restoring */
- testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
-
- /* Maybe enable events. Once this happens we could get a
- recursive event, so the critical region starts immediately
- afterwards. However, if that happens we don't end up
- resuming the code, so we don't have to be worried about
- being preempted to another CPU. */
- setz XEN_vcpu_info_mask(%eax)
-xen_iret_start_crit:
-
- /* check for unmasked and pending */
- cmpw $0x0001, XEN_vcpu_info_pending(%eax)
-
- /* If there's something pending, mask events again so we
- can jump back into xen_hypervisor_callback */
- sete XEN_vcpu_info_mask(%eax)
-
- popl %eax
-
- /* From this point on the registers are restored and the stack
- updated, so we don't need to worry about it if we're preempted */
-iret_restore_end:
-
- /* Jump to hypervisor_callback after fixing up the stack.
- Events are masked, so jumping out of the critical
- region is OK. */
- je xen_hypervisor_callback
-
-1: iret
-xen_iret_end_crit:
-.section __ex_table,"a"
- .align 4
- .long 1b,iret_exc
-.previous
-
-hyper_iret:
- /* put this out of line since its very rarely used */
- jmp hypercall_page + __HYPERVISOR_iret * 32
-
- .globl xen_iret_start_crit, xen_iret_end_crit
-
-/*
- This is called by xen_hypervisor_callback in entry.S when it sees
- that the EIP at the time of interrupt was between xen_iret_start_crit
- and xen_iret_end_crit. We're passed the EIP in %eax so we can do
- a more refined determination of what to do.
-
- The stack format at this point is:
- ----------------
- ss : (ss/esp may be present if we came from usermode)
- esp :
- eflags } outer exception info
- cs }
- eip }
- ---------------- <- edi (copy dest)
- eax : outer eax if it hasn't been restored
- ----------------
- eflags } nested exception info
- cs } (no ss/esp because we're nested
- eip } from the same ring)
- orig_eax }<- esi (copy src)
- - - - - - - - -
- fs }
- es }
- ds } SAVE_ALL state
- eax }
- : :
- ebx }<- esp
- ----------------
-
- In order to deliver the nested exception properly, we need to shift
- everything from the return addr up to the error code so it
- sits just under the outer exception info. This means that when we
- handle the exception, we do it in the context of the outer exception
- rather than starting a new one.
-
- The only caveat is that if the outer eax hasn't been
- restored yet (ie, it's still on stack), we need to insert
- its value into the SAVE_ALL state before going on, since
- it's usermode state which we eventually need to restore.
- */
-ENTRY(xen_iret_crit_fixup)
- /*
- Paranoia: Make sure we're really coming from kernel space.
- One could imagine a case where userspace jumps into the
- critical range address, but just before the CPU delivers a GP,
- it decides to deliver an interrupt instead. Unlikely?
- Definitely. Easy to avoid? Yes. The Intel documents
- explicitly say that the reported EIP for a bad jump is the
- jump instruction itself, not the destination, but some virtual
- environments get this wrong.
- */
- movl PT_CS(%esp), %ecx
- andl $SEGMENT_RPL_MASK, %ecx
- cmpl $USER_RPL, %ecx
- je 2f
-
- lea PT_ORIG_EAX(%esp), %esi
- lea PT_EFLAGS(%esp), %edi
-
- /* If eip is before iret_restore_end then stack
- hasn't been restored yet. */
- cmp $iret_restore_end, %eax
- jae 1f
-
- movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */
- movl %eax, PT_EAX(%esp)
-
- lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
-
- /* set up the copy */
-1: std
- mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
- rep movsl
- cld
-
- lea 4(%edi),%esp /* point esp to new frame */
-2: jmp xen_do_upcall
-
-
-/*
- Force an event check by making a hypercall,
- but preserve regs before making the call.
+ * Force an event check by making a hypercall, but preserve regs
+ * before making the call.
*/
check_events:
push %eax
@@ -303,3 +32,197 @@
pop %ecx
pop %eax
ret
+
+/*
+ * We can't use sysexit directly, because we're not running in ring0.
+ * But we can easily fake it up using iret. Assuming xen_sysexit is
+ * jumped to with a standard stack frame, we can just strip it back to
+ * a standard iret frame and use iret.
+ */
+ENTRY(xen_sysexit)
+ movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
+ orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
+ lea PT_EIP(%esp), %esp
+
+ jmp xen_iret
+ENDPROC(xen_sysexit)
+
+/*
+ * This is run where a normal iret would be run, with the same stack setup:
+ * 8: eflags
+ * 4: cs
+ * esp-> 0: eip
+ *
+ * This attempts to make sure that any pending events are dealt with
+ * on return to usermode, but there is a small window in which an
+ * event can happen just before entering usermode. If the nested
+ * interrupt ends up setting one of the TIF_WORK_MASK pending work
+ * flags, they will not be tested again before returning to
+ * usermode. This means that a process can end up with pending work,
+ * which will be unprocessed until the process enters and leaves the
+ * kernel again, which could be an unbounded amount of time. This
+ * means that a pending signal or reschedule event could be
+ * indefinitely delayed.
+ *
+ * The fix is to notice a nested interrupt in the critical window, and
+ * if one occurs, then fold the nested interrupt into the current
+ * interrupt stack frame, and re-process it iteratively rather than
+ * recursively. This means that it will exit via the normal path, and
+ * all pending work will be dealt with appropriately.
+ *
+ * Because the nested interrupt handler needs to deal with the current
+ * stack state in whatever form its in, we keep things simple by only
+ * using a single register which is pushed/popped on the stack.
+ */
+ENTRY(xen_iret)
+ /* test eflags for special cases */
+ testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
+ jnz hyper_iret
+
+ push %eax
+ ESP_OFFSET=4 # bytes pushed onto stack
+
+ /*
+ * Store vcpu_info pointer for easy access. Do it this way to
+ * avoid having to reload %fs
+ */
+#ifdef CONFIG_SMP
+ GET_THREAD_INFO(%eax)
+ movl TI_cpu(%eax), %eax
+ movl __per_cpu_offset(,%eax,4), %eax
+ mov per_cpu__xen_vcpu(%eax), %eax
+#else
+ movl per_cpu__xen_vcpu, %eax
+#endif
+
+ /* check IF state we're restoring */
+ testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
+
+ /*
+ * Maybe enable events. Once this happens we could get a
+ * recursive event, so the critical region starts immediately
+ * afterwards. However, if that happens we don't end up
+ * resuming the code, so we don't have to be worried about
+ * being preempted to another CPU.
+ */
+ setz XEN_vcpu_info_mask(%eax)
+xen_iret_start_crit:
+
+ /* check for unmasked and pending */
+ cmpw $0x0001, XEN_vcpu_info_pending(%eax)
+
+ /*
+ * If there's something pending, mask events again so we can
+ * jump back into xen_hypervisor_callback
+ */
+ sete XEN_vcpu_info_mask(%eax)
+
+ popl %eax
+
+ /*
+ * From this point on the registers are restored and the stack
+ * updated, so we don't need to worry about it if we're
+ * preempted
+ */
+iret_restore_end:
+
+ /*
+ * Jump to hypervisor_callback after fixing up the stack.
+ * Events are masked, so jumping out of the critical region is
+ * OK.
+ */
+ je xen_hypervisor_callback
+
+1: iret
+xen_iret_end_crit:
+.section __ex_table, "a"
+ .align 4
+ .long 1b, iret_exc
+.previous
+
+hyper_iret:
+ /* put this out of line since its very rarely used */
+ jmp hypercall_page + __HYPERVISOR_iret * 32
+
+ .globl xen_iret_start_crit, xen_iret_end_crit
+
+/*
+ * This is called by xen_hypervisor_callback in entry.S when it sees
+ * that the EIP at the time of interrupt was between
+ * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in
+ * %eax so we can do a more refined determination of what to do.
+ *
+ * The stack format at this point is:
+ * ----------------
+ * ss : (ss/esp may be present if we came from usermode)
+ * esp :
+ * eflags } outer exception info
+ * cs }
+ * eip }
+ * ---------------- <- edi (copy dest)
+ * eax : outer eax if it hasn't been restored
+ * ----------------
+ * eflags } nested exception info
+ * cs } (no ss/esp because we're nested
+ * eip } from the same ring)
+ * orig_eax }<- esi (copy src)
+ * - - - - - - - -
+ * fs }
+ * es }
+ * ds } SAVE_ALL state
+ * eax }
+ * : :
+ * ebx }<- esp
+ * ----------------
+ *
+ * In order to deliver the nested exception properly, we need to shift
+ * everything from the return addr up to the error code so it sits
+ * just under the outer exception info. This means that when we
+ * handle the exception, we do it in the context of the outer
+ * exception rather than starting a new one.
+ *
+ * The only caveat is that if the outer eax hasn't been restored yet
+ * (ie, it's still on stack), we need to insert its value into the
+ * SAVE_ALL state before going on, since it's usermode state which we
+ * eventually need to restore.
+ */
+ENTRY(xen_iret_crit_fixup)
+ /*
+ * Paranoia: Make sure we're really coming from kernel space.
+ * One could imagine a case where userspace jumps into the
+ * critical range address, but just before the CPU delivers a
+ * GP, it decides to deliver an interrupt instead. Unlikely?
+ * Definitely. Easy to avoid? Yes. The Intel documents
+ * explicitly say that the reported EIP for a bad jump is the
+ * jump instruction itself, not the destination, but some
+ * virtual environments get this wrong.
+ */
+ movl PT_CS(%esp), %ecx
+ andl $SEGMENT_RPL_MASK, %ecx
+ cmpl $USER_RPL, %ecx
+ je 2f
+
+ lea PT_ORIG_EAX(%esp), %esi
+ lea PT_EFLAGS(%esp), %edi
+
+ /*
+ * If eip is before iret_restore_end then stack
+ * hasn't been restored yet.
+ */
+ cmp $iret_restore_end, %eax
+ jae 1f
+
+ movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */
+ movl %eax, PT_EAX(%esp)
+
+ lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */
+
+ /* set up the copy */
+1: std
+ mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
+ rep movsl
+ cld
+
+ lea 4(%edi), %esp /* point esp to new frame */
+2: jmp xen_do_upcall
+
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 05794c5..02f496a 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -1,174 +1,45 @@
/*
- Asm versions of Xen pv-ops, suitable for either direct use or inlining.
- The inline versions are the same as the direct-use versions, with the
- pre- and post-amble chopped off.
-
- This code is encoded for size rather than absolute efficiency,
- with a view to being able to inline as much as possible.
-
- We only bother with direct forms (ie, vcpu in pda) of the operations
- here; the indirect forms are better handled in C, since they're
- generally too large to inline anyway.
+ * Asm versions of Xen pv-ops, suitable for either direct use or
+ * inlining. The inline versions are the same as the direct-use
+ * versions, with the pre- and post-amble chopped off.
+ *
+ * This code is encoded for size rather than absolute efficiency, with
+ * a view to being able to inline as much as possible.
+ *
+ * We only bother with direct forms (ie, vcpu in pda) of the
+ * operations here; the indirect forms are better handled in C, since
+ * they're generally too large to inline anyway.
*/
-#include <linux/linkage.h>
-
-#include <asm/asm-offsets.h>
-#include <asm/processor-flags.h>
#include <asm/errno.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
#include <asm/segment.h>
#include <xen/interface/xen.h>
-#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
-#define ENDPATCH(x) .globl x##_end; x##_end=.
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI 0x80000000
-
-#if 1
-/*
- x86-64 does not yet support direct access to percpu variables
- via a segment override, so we just need to make sure this code
- never gets used
- */
-#define BUG ud2a
-#define PER_CPU_VAR(var, off) 0xdeadbeef
-#endif
-
-/*
- Enable events. This clears the event mask and tests the pending
- event status with one and operation. If there are pending
- events, then enter the hypervisor to get them handled.
- */
-ENTRY(xen_irq_enable_direct)
- BUG
-
- /* Unmask events */
- movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
-
- /* Preempt here doesn't matter because that will deal with
- any pending interrupts. The pending check may end up being
- run on the wrong CPU, but that doesn't hurt. */
-
- /* Test for pending */
- testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
- jz 1f
-
-2: call check_events
-1:
-ENDPATCH(xen_irq_enable_direct)
- ret
- ENDPROC(xen_irq_enable_direct)
- RELOC(xen_irq_enable_direct, 2b+1)
-
-/*
- Disabling events is simply a matter of making the event mask
- non-zero.
- */
-ENTRY(xen_irq_disable_direct)
- BUG
-
- movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
-ENDPATCH(xen_irq_disable_direct)
- ret
- ENDPROC(xen_irq_disable_direct)
- RELOC(xen_irq_disable_direct, 0)
-
-/*
- (xen_)save_fl is used to get the current interrupt enable status.
- Callers expect the status to be in X86_EFLAGS_IF, and other bits
- may be set in the return value. We take advantage of this by
- making sure that X86_EFLAGS_IF has the right value (and other bits
- in that byte are 0), but other bits in the return value are
- undefined. We need to toggle the state of the bit, because
- Xen and x86 use opposite senses (mask vs enable).
- */
-ENTRY(xen_save_fl_direct)
- BUG
-
- testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
- setz %ah
- addb %ah,%ah
-ENDPATCH(xen_save_fl_direct)
- ret
- ENDPROC(xen_save_fl_direct)
- RELOC(xen_save_fl_direct, 0)
-
-/*
- In principle the caller should be passing us a value return
- from xen_save_fl_direct, but for robustness sake we test only
- the X86_EFLAGS_IF flag rather than the whole byte. After
- setting the interrupt mask state, it checks for unmasked
- pending events and enters the hypervisor to get them delivered
- if so.
- */
-ENTRY(xen_restore_fl_direct)
- BUG
-
- testb $X86_EFLAGS_IF>>8, %ah
- setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
- /* Preempt here doesn't matter because that will deal with
- any pending interrupts. The pending check may end up being
- run on the wrong CPU, but that doesn't hurt. */
-
- /* check for unmasked and pending */
- cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
- jz 1f
-2: call check_events
-1:
-ENDPATCH(xen_restore_fl_direct)
- ret
- ENDPROC(xen_restore_fl_direct)
- RELOC(xen_restore_fl_direct, 2b+1)
-
-
-/*
- Force an event check by making a hypercall,
- but preserve regs before making the call.
- */
-check_events:
- push %rax
- push %rcx
- push %rdx
- push %rsi
- push %rdi
- push %r8
- push %r9
- push %r10
- push %r11
- call xen_force_evtchn_callback
- pop %r11
- pop %r10
- pop %r9
- pop %r8
- pop %rdi
- pop %rsi
- pop %rdx
- pop %rcx
- pop %rax
- ret
+#include "xen-asm.h"
ENTRY(xen_adjust_exception_frame)
- mov 8+0(%rsp),%rcx
- mov 8+8(%rsp),%r11
+ mov 8+0(%rsp), %rcx
+ mov 8+8(%rsp), %r11
ret $16
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
- Xen64 iret frame:
-
- ss
- rsp
- rflags
- cs
- rip <-- standard iret frame
-
- flags
-
- rcx }
- r11 }<-- pushed by hypercall page
-rsp -> rax }
+ * Xen64 iret frame:
+ *
+ * ss
+ * rsp
+ * rflags
+ * cs
+ * rip <-- standard iret frame
+ *
+ * flags
+ *
+ * rcx }
+ * r11 }<-- pushed by hypercall page
+ * rsp->rax }
*/
ENTRY(xen_iret)
pushq $0
@@ -177,8 +48,8 @@
RELOC(xen_iret, 1b+1)
/*
- sysexit is not used for 64-bit processes, so it's
- only ever used to return to 32-bit compat userspace.
+ * sysexit is not used for 64-bit processes, so it's only ever used to
+ * return to 32-bit compat userspace.
*/
ENTRY(xen_sysexit)
pushq $__USER32_DS
@@ -193,13 +64,15 @@
RELOC(xen_sysexit, 1b+1)
ENTRY(xen_sysret64)
- /* We're already on the usermode stack at this point, but still
- with the kernel gs, so we can easily switch back */
- movq %rsp, %gs:pda_oldrsp
- movq %gs:pda_kernelstack,%rsp
+ /*
+ * We're already on the usermode stack at this point, but
+ * still with the kernel gs, so we can easily switch back
+ */
+ movq %rsp, PER_CPU_VAR(old_rsp)
+ movq PER_CPU_VAR(kernel_stack), %rsp
pushq $__USER_DS
- pushq %gs:pda_oldrsp
+ pushq PER_CPU_VAR(old_rsp)
pushq %r11
pushq $__USER_CS
pushq %rcx
@@ -210,13 +83,15 @@
RELOC(xen_sysret64, 1b+1)
ENTRY(xen_sysret32)
- /* We're already on the usermode stack at this point, but still
- with the kernel gs, so we can easily switch back */
- movq %rsp, %gs:pda_oldrsp
- movq %gs:pda_kernelstack, %rsp
+ /*
+ * We're already on the usermode stack at this point, but
+ * still with the kernel gs, so we can easily switch back
+ */
+ movq %rsp, PER_CPU_VAR(old_rsp)
+ movq PER_CPU_VAR(kernel_stack), %rsp
pushq $__USER32_DS
- pushq %gs:pda_oldrsp
+ pushq PER_CPU_VAR(old_rsp)
pushq %r11
pushq $__USER32_CS
pushq %rcx
@@ -227,28 +102,27 @@
RELOC(xen_sysret32, 1b+1)
/*
- Xen handles syscall callbacks much like ordinary exceptions,
- which means we have:
- - kernel gs
- - kernel rsp
- - an iret-like stack frame on the stack (including rcx and r11):
- ss
- rsp
- rflags
- cs
- rip
- r11
- rsp-> rcx
-
- In all the entrypoints, we undo all that to make it look
- like a CPU-generated syscall/sysenter and jump to the normal
- entrypoint.
+ * Xen handles syscall callbacks much like ordinary exceptions, which
+ * means we have:
+ * - kernel gs
+ * - kernel rsp
+ * - an iret-like stack frame on the stack (including rcx and r11):
+ * ss
+ * rsp
+ * rflags
+ * cs
+ * rip
+ * r11
+ * rsp->rcx
+ *
+ * In all the entrypoints, we undo all that to make it look like a
+ * CPU-generated syscall/sysenter and jump to the normal entrypoint.
*/
.macro undo_xen_syscall
- mov 0*8(%rsp),%rcx
- mov 1*8(%rsp),%r11
- mov 5*8(%rsp),%rsp
+ mov 0*8(%rsp), %rcx
+ mov 1*8(%rsp), %r11
+ mov 5*8(%rsp), %rsp
.endm
/* Normal 64-bit system call target */
@@ -275,7 +149,7 @@
ENTRY(xen_syscall32_target)
ENTRY(xen_sysenter_target)
- lea 16(%rsp), %rsp /* strip %rcx,%r11 */
+ lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $VGCF_in_syscall
jmp hypercall_iret
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 63d49a5..1a5ff24 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -8,7 +8,7 @@
#include <asm/boot.h>
#include <asm/asm.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <xen/interface/elfnote.h>
#include <asm/xen/interface.h>
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index c1f8faf..2f5ef26 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -10,9 +10,12 @@
extern const char xen_hypervisor_callback[];
extern const char xen_failsafe_callback[];
+extern void *xen_initial_gdt;
+
struct trap_info;
void xen_copy_trap_info(struct trap_info *traps);
+DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info);
DECLARE_PER_CPU(unsigned long, xen_cr3);
DECLARE_PER_CPU(unsigned long, xen_current_cr3);
@@ -22,6 +25,13 @@
void xen_setup_mfn_list_list(void);
void xen_setup_shared_info(void);
+void xen_setup_machphys_mapping(void);
+pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
+void xen_ident_map_ISA(void);
+void xen_reserve_top(void);
+
+void xen_leave_lazy(void);
+void xen_post_allocator_init(void);
char * __init xen_memory_setup(void);
void __init xen_arch_setup(void);
diff --git a/arch/xtensa/include/asm/swab.h b/arch/xtensa/include/asm/swab.h
index f50b697..226a391 100644
--- a/arch/xtensa/include/asm/swab.h
+++ b/arch/xtensa/include/asm/swab.h
@@ -11,7 +11,7 @@
#ifndef _XTENSA_SWAB_H
#define _XTENSA_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/compiler.h>
#define __SWAB_64_THRU_32__
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index c3e841f..ab0aff3 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -365,7 +365,7 @@
/*******************************************************************************
*
- * FUNCTION: acpi_get_table
+ * FUNCTION: acpi_get_table_with_size
*
* PARAMETERS: Signature - ACPI signature of needed table
* Instance - Which instance (for SSDTs)
@@ -377,8 +377,9 @@
*
*****************************************************************************/
acpi_status
-acpi_get_table(char *signature,
- u32 instance, struct acpi_table_header **out_table)
+acpi_get_table_with_size(char *signature,
+ u32 instance, struct acpi_table_header **out_table,
+ acpi_size *tbl_size)
{
u32 i;
u32 j;
@@ -408,6 +409,7 @@
acpi_tb_verify_table(&acpi_gbl_root_table_list.tables[i]);
if (ACPI_SUCCESS(status)) {
*out_table = acpi_gbl_root_table_list.tables[i].pointer;
+ *tbl_size = acpi_gbl_root_table_list.tables[i].length;
}
if (!acpi_gbl_permanent_mmap) {
@@ -420,6 +422,15 @@
return (AE_NOT_FOUND);
}
+acpi_status
+acpi_get_table(char *signature,
+ u32 instance, struct acpi_table_header **out_table)
+{
+ acpi_size tbl_size;
+
+ return acpi_get_table_with_size(signature,
+ instance, out_table, &tbl_size);
+}
ACPI_EXPORT_SYMBOL(acpi_get_table)
/*******************************************************************************
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index b3193ec..2b6c590 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -272,14 +272,21 @@
}
EXPORT_SYMBOL_GPL(acpi_os_map_memory);
-void acpi_os_unmap_memory(void __iomem * virt, acpi_size size)
+void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
{
- if (acpi_gbl_permanent_mmap) {
+ if (acpi_gbl_permanent_mmap)
iounmap(virt);
- }
+ else
+ __acpi_unmap_table(virt, size);
}
EXPORT_SYMBOL_GPL(acpi_os_unmap_memory);
+void __init early_acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
+{
+ if (!acpi_gbl_permanent_mmap)
+ __acpi_unmap_table(virt, size);
+}
+
#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_os_get_physical_address(void *virt, acpi_physical_address * phys)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 7bc22a4..08def2f 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -824,8 +824,11 @@
*/
static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
{
+ u64 perf_flags;
+
/* Don't trace irqs off for idle */
stop_critical_timings();
+ perf_flags = hw_perf_save_disable();
if (cx->entry_method == ACPI_CSTATE_FFH) {
/* Call into architectural FFH based C-state */
acpi_processor_ffh_cstate_enter(cx);
@@ -840,6 +843,7 @@
gets asserted in time to freeze execution properly. */
unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
}
+ hw_perf_restore(perf_flags);
start_critical_timings();
}
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index a885295..fec1ae3 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -181,14 +181,15 @@
struct acpi_subtable_header *entry;
unsigned int count = 0;
unsigned long table_end;
+ acpi_size tbl_size;
if (!handler)
return -EINVAL;
if (strncmp(id, ACPI_SIG_MADT, 4) == 0)
- acpi_get_table(id, acpi_apic_instance, &table_header);
+ acpi_get_table_with_size(id, acpi_apic_instance, &table_header, &tbl_size);
else
- acpi_get_table(id, 0, &table_header);
+ acpi_get_table_with_size(id, 0, &table_header, &tbl_size);
if (!table_header) {
printk(KERN_WARNING PREFIX "%4.4s not present\n", id);
@@ -206,8 +207,10 @@
table_end) {
if (entry->type == entry_id
&& (!max_entries || count++ < max_entries))
- if (handler(entry, table_end))
+ if (handler(entry, table_end)) {
+ early_acpi_os_unmap_memory((char *)table_header, tbl_size);
return -EINVAL;
+ }
entry = (struct acpi_subtable_header *)
((unsigned long)entry + entry->length);
@@ -217,6 +220,7 @@
"%i found\n", id, entry_id, count - max_entries, count);
}
+ early_acpi_os_unmap_memory((char *)table_header, tbl_size);
return count;
}
@@ -241,17 +245,19 @@
int __init acpi_table_parse(char *id, acpi_table_handler handler)
{
struct acpi_table_header *table = NULL;
+ acpi_size tbl_size;
if (!handler)
return -EINVAL;
if (strncmp(id, ACPI_SIG_MADT, 4) == 0)
- acpi_get_table(id, acpi_apic_instance, &table);
+ acpi_get_table_with_size(id, acpi_apic_instance, &table, &tbl_size);
else
- acpi_get_table(id, 0, &table);
+ acpi_get_table_with_size(id, 0, &table, &tbl_size);
if (table) {
handler(table);
+ early_acpi_os_unmap_memory(table, tbl_size);
return 0;
} else
return 1;
@@ -265,8 +271,9 @@
static void __init check_multiple_madt(void)
{
struct acpi_table_header *table = NULL;
+ acpi_size tbl_size;
- acpi_get_table(ACPI_SIG_MADT, 2, &table);
+ acpi_get_table_with_size(ACPI_SIG_MADT, 2, &table, &tbl_size);
if (table) {
printk(KERN_WARNING PREFIX
"BIOS bug: multiple APIC/MADT found,"
@@ -275,6 +282,7 @@
"If \"acpi_apic_instance=%d\" works better, "
"notify linux-acpi@vger.kernel.org\n",
acpi_apic_instance ? 0 : 2);
+ early_acpi_os_unmap_memory(table, tbl_size);
} else
acpi_apic_instance = 0;
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 719ee5c..5b257a5 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -107,7 +107,7 @@
/*
* Print cpu online, possible, present, and system maps
*/
-static ssize_t print_cpus_map(char *buf, cpumask_t *map)
+static ssize_t print_cpus_map(char *buf, const struct cpumask *map)
{
int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map);
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index a778fb5..bf6b132 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -31,7 +31,10 @@
#include <linux/hardirq.h>
#include <linux/topology.h>
-#define define_one_ro(_name) \
+#define define_one_ro_named(_name, _func) \
+static SYSDEV_ATTR(_name, 0444, _func, NULL)
+
+#define define_one_ro(_name) \
static SYSDEV_ATTR(_name, 0444, show_##_name, NULL)
#define define_id_show_func(name) \
@@ -42,8 +45,8 @@
return sprintf(buf, "%d\n", topology_##name(cpu)); \
}
-#if defined(topology_thread_siblings) || defined(topology_core_siblings)
-static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
+#if defined(topology_thread_cpumask) || defined(topology_core_cpumask)
+static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf)
{
ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
int n = 0;
@@ -65,7 +68,7 @@
struct sysdev_attribute *attr, char *buf) \
{ \
unsigned int cpu = dev->id; \
- return show_cpumap(0, &(topology_##name(cpu)), buf); \
+ return show_cpumap(0, topology_##name(cpu), buf); \
}
#define define_siblings_show_list(name) \
@@ -74,7 +77,7 @@
char *buf) \
{ \
unsigned int cpu = dev->id; \
- return show_cpumap(1, &(topology_##name(cpu)), buf); \
+ return show_cpumap(1, topology_##name(cpu), buf); \
}
#else
@@ -82,9 +85,7 @@
static ssize_t show_##name(struct sys_device *dev, \
struct sysdev_attribute *attr, char *buf) \
{ \
- unsigned int cpu = dev->id; \
- cpumask_t mask = topology_##name(cpu); \
- return show_cpumap(0, &mask, buf); \
+ return show_cpumap(0, topology_##name(dev->id), buf); \
}
#define define_siblings_show_list(name) \
@@ -92,9 +93,7 @@
struct sysdev_attribute *attr, \
char *buf) \
{ \
- unsigned int cpu = dev->id; \
- cpumask_t mask = topology_##name(cpu); \
- return show_cpumap(1, &mask, buf); \
+ return show_cpumap(1, topology_##name(dev->id), buf); \
}
#endif
@@ -107,13 +106,13 @@
define_id_show_func(core_id);
define_one_ro(core_id);
-define_siblings_show_func(thread_siblings);
-define_one_ro(thread_siblings);
-define_one_ro(thread_siblings_list);
+define_siblings_show_func(thread_cpumask);
+define_one_ro_named(thread_siblings, show_thread_cpumask);
+define_one_ro_named(thread_siblings_list, show_thread_cpumask_list);
-define_siblings_show_func(core_siblings);
-define_one_ro(core_siblings);
-define_one_ro(core_siblings_list);
+define_siblings_show_func(core_cpumask);
+define_one_ro_named(core_siblings, show_core_cpumask);
+define_one_ro_named(core_siblings_list, show_core_cpumask_list);
static struct attribute *default_attrs[] = {
&attr_physical_package_id.attr,
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 33a9351..fa71b84 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -25,6 +25,7 @@
#include <linux/kbd_kern.h>
#include <linux/proc_fs.h>
#include <linux/quotaops.h>
+#include <linux/perf_counter.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/suspend.h>
@@ -244,6 +245,7 @@
struct pt_regs *regs = get_irq_regs();
if (regs)
show_regs(regs);
+ perf_counter_print_debug();
}
static struct sysrq_key_op sysrq_showregs_op = {
.handler = sysrq_handle_showregs,
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index e1129fa..ee19b6e 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -143,7 +143,7 @@
#endif
#ifndef CONFIG_X86_64
-#include "mach_timer.h"
+#include <asm/mach_timer.h>
#define PMTMR_EXPECTED_RATE \
((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
/*
diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c
index 1bde303..8615059 100644
--- a/drivers/clocksource/cyclone.c
+++ b/drivers/clocksource/cyclone.c
@@ -7,7 +7,7 @@
#include <asm/pgtable.h>
#include <asm/io.h>
-#include "mach_timer.h"
+#include <asm/mach_timer.h>
#define CYCLONE_CBAR_ADDR 0xFEB00CD0 /* base address ptr */
#define CYCLONE_PMCC_OFFSET 0x51A0 /* offset to control register */
diff --git a/drivers/eisa/Kconfig b/drivers/eisa/Kconfig
index c0646576..2705284 100644
--- a/drivers/eisa/Kconfig
+++ b/drivers/eisa/Kconfig
@@ -3,7 +3,7 @@
#
config EISA_VLB_PRIMING
bool "Vesa Local Bus priming"
- depends on X86_PC && EISA
+ depends on X86 && EISA
default n
---help---
Activate this option if your system contains a Vesa Local
@@ -24,11 +24,11 @@
When in doubt, say Y.
# Using EISA_VIRTUAL_ROOT on something other than an Alpha or
-# an X86_PC may lead to crashes...
+# an X86 may lead to crashes...
config EISA_VIRTUAL_ROOT
bool "EISA virtual root device"
- depends on EISA && (ALPHA || X86_PC)
+ depends on EISA && (ALPHA || X86)
default y
---help---
Activate this option if your system only have EISA bus
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 777fba4..3009e01 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -244,7 +244,7 @@
*/
int dcdbas_smi_request(struct smi_cmd *smi_cmd)
{
- cpumask_t old_mask;
+ cpumask_var_t old_mask;
int ret = 0;
if (smi_cmd->magic != SMI_CMD_MAGIC) {
@@ -254,8 +254,11 @@
}
/* SMI requires CPU 0 */
- old_mask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(0));
+ if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_copy(old_mask, ¤t->cpus_allowed);
+ set_cpus_allowed_ptr(current, cpumask_of(0));
if (smp_processor_id() != 0) {
dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
__func__);
@@ -275,7 +278,8 @@
);
out:
- set_cpus_allowed_ptr(current, &old_mask);
+ set_cpus_allowed_ptr(current, old_mask);
+ free_cpumask_var(old_mask);
return ret;
}
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index 3ab3e4a..7b7ddc2 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -938,8 +938,8 @@
return -ENOMEM;
if (ibft_addr) {
- printk(KERN_INFO "iBFT detected at 0x%lx.\n",
- virt_to_phys((void *)ibft_addr));
+ printk(KERN_INFO "iBFT detected at 0x%llx.\n",
+ (u64)virt_to_phys((void *)ibft_addr));
rc = ibft_check_device();
if (rc)
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index 8df849f..b756f04 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -678,9 +678,9 @@
*start = &buf[offset];
*eof = 0;
- DRM_PROC_PRINT("vma use count: %d, high_memory = %p, 0x%08lx\n",
+ DRM_PROC_PRINT("vma use count: %d, high_memory = %p, 0x%llx\n",
atomic_read(&dev->vma_count),
- high_memory, virt_to_phys(high_memory));
+ high_memory, (u64)virt_to_phys(high_memory));
list_for_each_entry(pt, &dev->vmalist, head) {
if (!(vma = pt->vma))
continue;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85685bf..c29feb9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -211,7 +211,7 @@
vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
- user_data, length);
+ user_data, length, length);
io_mapping_unmap_atomic(vaddr_atomic);
if (unwritten)
return -EFAULT;
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 3556168..ea2638b 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -13,11 +13,11 @@
if INPUT_KEYBOARD
config KEYBOARD_ATKBD
- tristate "AT keyboard" if EMBEDDED || !X86_PC
+ tristate "AT keyboard" if EMBEDDED || !X86
default y
select SERIO
select SERIO_LIBPS2
- select SERIO_I8042 if X86_PC
+ select SERIO_I8042 if X86
select SERIO_GSCPS2 if GSC
help
Say Y here if you want to use a standard AT or PS/2 keyboard. Usually
diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index 9705f3a..4f38e6f 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -17,7 +17,7 @@
default y
select SERIO
select SERIO_LIBPS2
- select SERIO_I8042 if X86_PC
+ select SERIO_I8042 if X86
select SERIO_GSCPS2 if GSC
help
Say Y here if you have a PS/2 mouse connected to your system. This
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 76f2b36..a3d3cba 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
config LGUEST
tristate "Linux hypervisor example code"
- depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX && !X86_VOYAGER
+ depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX
select HVC_DRIVER
---help---
This is a very simple module which allows you to run
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index c64e679..1c48408 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -162,7 +162,7 @@
config SGI_XP
tristate "Support communication between SGI SSIs"
depends on NET
- depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP
+ depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
@@ -189,7 +189,7 @@
config SGI_GRU
tristate "SGI GRU driver"
- depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
+ depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
default n
select MMU_NOTIFIER
---help---
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 6509838..c67e4e8 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -36,23 +36,11 @@
#include <linux/interrupt.h>
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
+#include <asm/uv/uv.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
-#if defined CONFIG_X86_64
-#include <asm/genapic.h>
-#include <asm/irq.h>
-#define IS_UV() is_uv_system()
-#elif defined CONFIG_IA64
-#include <asm/system.h>
-#include <asm/sn/simulator.h>
-/* temp support for running on hardware simulator */
-#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv")
-#else
-#define IS_UV() 0
-#endif
-
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_mmrs.h>
@@ -381,7 +369,7 @@
char id[10];
void *gru_start_vaddr;
- if (!IS_UV())
+ if (!is_uv_system())
return 0;
#if defined CONFIG_IA64
@@ -451,7 +439,7 @@
int order = get_order(sizeof(struct gru_state) *
GRU_CHIPLETS_PER_BLADE);
- if (!IS_UV())
+ if (!is_uv_system())
return;
for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 7b4cbd5..2275126 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -15,19 +15,19 @@
#include <linux/mutex.h>
-#ifdef CONFIG_IA64
+#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
+#include <asm/uv/uv.h>
+#define is_uv() is_uv_system()
+#endif
+
+#ifndef is_uv
+#define is_uv() 0
+#endif
+
+#if defined CONFIG_IA64
#include <asm/system.h>
#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
#define is_shub() ia64_platform_is("sn2")
-#ifdef CONFIG_IA64_SGI_UV
-#define is_uv() ia64_platform_is("uv")
-#else
-#define is_uv() 0
-#endif
-#endif
-#ifdef CONFIG_X86_64
-#include <asm/genapic.h>
-#define is_uv() is_uv_system()
#endif
#ifndef is_shub1
@@ -42,10 +42,6 @@
#define is_shub() 0
#endif
-#ifndef is_uv
-#define is_uv() 0
-#endif
-
#ifdef USE_DBUG_ON
#define DBUG_ON(condition) BUG_ON(condition)
#else
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 89218f7..6576170 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -318,7 +318,7 @@
/* this thread was marked active by xpc_hb_init() */
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU));
+ set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU));
/* set our heartbeating to other partitions into motion */
xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 8b12e6e..2ff8879 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -273,7 +273,7 @@
config MTD_NAND_CS553X
tristate "NAND support for CS5535/CS5536 (AMD Geode companion chip)"
- depends on X86_32 && (X86_PC || X86_GENERICARCH)
+ depends on X86_32
help
The CS553x companion chips for the AMD Geode processor
include NAND flash controllers with built-in hardware ECC
diff --git a/drivers/net/ne3210.c b/drivers/net/ne3210.c
index fac43fd..6a843f7 100644
--- a/drivers/net/ne3210.c
+++ b/drivers/net/ne3210.c
@@ -150,7 +150,8 @@
if (phys_mem < virt_to_phys(high_memory)) {
printk(KERN_CRIT "ne3210.c: Card RAM overlaps with normal memory!!!\n");
printk(KERN_CRIT "ne3210.c: Use EISA SCU to set card memory below 1MB,\n");
- printk(KERN_CRIT "ne3210.c: or to an address above 0x%lx.\n", virt_to_phys(high_memory));
+ printk(KERN_CRIT "ne3210.c: or to an address above 0x%llx.\n",
+ (u64)virt_to_phys(high_memory));
printk(KERN_CRIT "ne3210.c: Driver NOT installed.\n");
retval = -EINVAL;
goto out3;
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index ab0e09b..847e9bb 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -854,20 +854,27 @@
* interrupts across them. */
static int efx_wanted_rx_queues(void)
{
- cpumask_t core_mask;
+ cpumask_var_t core_mask;
int count;
int cpu;
- cpus_clear(core_mask);
+ if (!alloc_cpumask_var(&core_mask, GFP_KERNEL)) {
+ printk(KERN_WARNING
+ "efx.c: allocation failure, irq balancing hobbled\n");
+ return 1;
+ }
+
+ cpumask_clear(core_mask);
count = 0;
for_each_online_cpu(cpu) {
- if (!cpu_isset(cpu, core_mask)) {
+ if (!cpumask_test_cpu(cpu, core_mask)) {
++count;
- cpus_or(core_mask, core_mask,
- topology_core_siblings(cpu));
+ cpumask_or(core_mask, core_mask,
+ topology_core_cpumask(cpu));
}
}
+ free_cpumask_var(core_mask);
return count;
}
diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c
index d5378e6..064307c 100644
--- a/drivers/net/sfc/falcon.c
+++ b/drivers/net/sfc/falcon.c
@@ -338,10 +338,10 @@
nic_data->next_buffer_table += buffer->entries;
EFX_LOG(efx, "allocating special buffers %d-%d at %llx+%x "
- "(virt %p phys %lx)\n", buffer->index,
+ "(virt %p phys %llx)\n", buffer->index,
buffer->index + buffer->entries - 1,
- (unsigned long long)buffer->dma_addr, len,
- buffer->addr, virt_to_phys(buffer->addr));
+ (u64)buffer->dma_addr, len,
+ buffer->addr, (u64)virt_to_phys(buffer->addr));
return 0;
}
@@ -353,10 +353,10 @@
return;
EFX_LOG(efx, "deallocating special buffers %d-%d at %llx+%x "
- "(virt %p phys %lx)\n", buffer->index,
+ "(virt %p phys %llx)\n", buffer->index,
buffer->index + buffer->entries - 1,
- (unsigned long long)buffer->dma_addr, buffer->len,
- buffer->addr, virt_to_phys(buffer->addr));
+ (u64)buffer->dma_addr, buffer->len,
+ buffer->addr, (u64)virt_to_phys(buffer->addr));
pci_free_consistent(efx->pci_dev, buffer->len, buffer->addr,
buffer->dma_addr);
@@ -2343,10 +2343,10 @@
FALCON_MAC_STATS_SIZE);
if (rc)
return rc;
- EFX_LOG(efx, "stats buffer at %llx (virt %p phys %lx)\n",
- (unsigned long long)efx->stats_buffer.dma_addr,
+ EFX_LOG(efx, "stats buffer at %llx (virt %p phys %llx)\n",
+ (u64)efx->stats_buffer.dma_addr,
efx->stats_buffer.addr,
- virt_to_phys(efx->stats_buffer.addr));
+ (u64)virt_to_phys(efx->stats_buffer.addr));
return 0;
}
@@ -2921,9 +2921,9 @@
goto fail4;
BUG_ON(efx->irq_status.dma_addr & 0x0f);
- EFX_LOG(efx, "INT_KER at %llx (virt %p phys %lx)\n",
- (unsigned long long)efx->irq_status.dma_addr,
- efx->irq_status.addr, virt_to_phys(efx->irq_status.addr));
+ EFX_LOG(efx, "INT_KER at %llx (virt %p phys %llx)\n",
+ (u64)efx->irq_status.dma_addr,
+ efx->irq_status.addr, (u64)virt_to_phys(efx->irq_status.addr));
falcon_probe_spi_devices(efx);
diff --git a/drivers/net/wireless/arlan-main.c b/drivers/net/wireless/arlan-main.c
index bfca15d..14c1165 100644
--- a/drivers/net/wireless/arlan-main.c
+++ b/drivers/net/wireless/arlan-main.c
@@ -1082,8 +1082,8 @@
if (arlan_check_fingerprint(memaddr))
return -ENODEV;
- printk(KERN_NOTICE "%s: Arlan found at %x, \n ", dev->name,
- (int) virt_to_phys((void*)memaddr));
+ printk(KERN_NOTICE "%s: Arlan found at %llx, \n ", dev->name,
+ (u64) virt_to_phys((void*)memaddr));
ap->card = (void *) memaddr;
dev->mem_start = memaddr;
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 9da5a4b..c3ea5fa 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -38,7 +38,7 @@
static LIST_HEAD(dying_tasks);
static LIST_HEAD(dead_tasks);
-static cpumask_t marked_cpus = CPU_MASK_NONE;
+static cpumask_var_t marked_cpus;
static DEFINE_SPINLOCK(task_mortuary);
static void process_task_mortuary(void);
@@ -456,10 +456,10 @@
{
int i;
- cpu_set(cpu, marked_cpus);
+ cpumask_set_cpu(cpu, marked_cpus);
for_each_online_cpu(i) {
- if (!cpu_isset(i, marked_cpus))
+ if (!cpumask_test_cpu(i, marked_cpus))
return;
}
@@ -468,7 +468,7 @@
*/
process_task_mortuary();
- cpus_clear(marked_cpus);
+ cpumask_clear(marked_cpus);
}
@@ -565,6 +565,20 @@
mutex_unlock(&buffer_mutex);
}
+int __init buffer_sync_init(void)
+{
+ if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_clear(marked_cpus);
+ return 0;
+}
+
+void __exit buffer_sync_cleanup(void)
+{
+ free_cpumask_var(marked_cpus);
+}
+
/* The function can be used to add a buffer worth of data directly to
* the kernel buffer. The buffer is assumed to be a circular buffer.
* Take the entries from index start and end at index end, wrapping
diff --git a/drivers/oprofile/buffer_sync.h b/drivers/oprofile/buffer_sync.h
index 3110732..0ebf5db 100644
--- a/drivers/oprofile/buffer_sync.h
+++ b/drivers/oprofile/buffer_sync.h
@@ -19,4 +19,8 @@
/* sync the given CPU's buffer */
void sync_buffer(int cpu);
+/* initialize/destroy the buffer system. */
+int buffer_sync_init(void);
+void buffer_sync_cleanup(void);
+
#endif /* OPROFILE_BUFFER_SYNC_H */
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index 3cffce9..ced39f6 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -183,6 +183,10 @@
{
int err;
+ err = buffer_sync_init();
+ if (err)
+ return err;
+
err = oprofile_arch_init(&oprofile_ops);
if (err < 0 || timer) {
@@ -191,8 +195,10 @@
}
err = oprofilefs_register();
- if (err)
+ if (err) {
oprofile_arch_exit();
+ buffer_sync_cleanup();
+ }
return err;
}
@@ -202,6 +208,7 @@
{
oprofilefs_unregister();
oprofile_arch_exit();
+ buffer_sync_cleanup();
}
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 26c536b5..5f33340 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -42,6 +42,7 @@
LIST_HEAD(dmar_drhd_units);
static struct acpi_table_header * __initdata dmar_tbl;
+static acpi_size dmar_tbl_size;
static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
{
@@ -288,8 +289,9 @@
acpi_status status = AE_OK;
/* if we could find DMAR table, then there are DMAR devices */
- status = acpi_get_table(ACPI_SIG_DMAR, 0,
- (struct acpi_table_header **)&dmar_tbl);
+ status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
+ (struct acpi_table_header **)&dmar_tbl,
+ &dmar_tbl_size);
if (ACPI_SUCCESS(status) && !dmar_tbl) {
printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
@@ -489,6 +491,7 @@
iommu_detected = 1;
#endif
}
+ early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
dmar_tbl = NULL;
}
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 45effc5..8e44db0 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -6,6 +6,7 @@
#include <linux/irq.h>
#include <asm/io_apic.h>
#include <asm/smp.h>
+#include <asm/cpu.h>
#include <linux/intel-iommu.h>
#include "intr_remapping.h"
diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c
index bf92802..36e221b 100644
--- a/drivers/watchdog/rdc321x_wdt.c
+++ b/drivers/watchdog/rdc321x_wdt.c
@@ -37,7 +37,7 @@
#include <linux/io.h>
#include <linux/uaccess.h>
-#include <asm/mach-rdc321x/rdc321x_defs.h>
+#include <asm/rdc321x_defs.h>
#define RDC_WDT_MASK 0x80000000 /* Mask */
#define RDC_WDT_EN 0x00800000 /* Enable bit */
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index eb0dfde..30963af 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -26,9 +26,11 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/bootmem.h>
#include <asm/ptrace.h>
#include <asm/irq.h>
+#include <asm/idle.h>
#include <asm/sync_bitops.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -50,36 +52,55 @@
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
-/* Packed IRQ information: binding type, sub-type index, and event channel. */
-struct packed_irq
-{
- unsigned short evtchn;
- unsigned char index;
- unsigned char type;
-};
-
-static struct packed_irq irq_info[NR_IRQS];
-
-/* Binding types. */
-enum {
- IRQT_UNBOUND,
+/* Interrupt types. */
+enum xen_irq_type {
+ IRQT_UNBOUND = 0,
IRQT_PIRQ,
IRQT_VIRQ,
IRQT_IPI,
IRQT_EVTCHN
};
-/* Convenient shorthand for packed representation of an unbound IRQ. */
-#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
+/*
+ * Packed IRQ information:
+ * type - enum xen_irq_type
+ * event channel - irq->event channel mapping
+ * cpu - cpu this event channel is bound to
+ * index - type-specific information:
+ * PIRQ - vector, with MSB being "needs EIO"
+ * VIRQ - virq number
+ * IPI - IPI vector
+ * EVTCHN -
+ */
+struct irq_info
+{
+ enum xen_irq_type type; /* type */
+ unsigned short evtchn; /* event channel */
+ unsigned short cpu; /* cpu bound */
+
+ union {
+ unsigned short virq;
+ enum ipi_vector ipi;
+ struct {
+ unsigned short gsi;
+ unsigned short vector;
+ } pirq;
+ } u;
+};
+
+static struct irq_info irq_info[NR_IRQS];
static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
[0 ... NR_EVENT_CHANNELS-1] = -1
};
-static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
-static u8 cpu_evtchn[NR_EVENT_CHANNELS];
-
-/* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
+struct cpu_evtchn_s {
+ unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
+};
+static struct cpu_evtchn_s *cpu_evtchn_mask_p;
+static inline unsigned long *cpu_evtchn_mask(int cpu)
+{
+ return cpu_evtchn_mask_p[cpu].bits;
+}
/* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn) ((chn) != 0)
@@ -87,27 +108,108 @@
static struct irq_chip xen_dynamic_chip;
/* Constructor for packed IRQ information. */
-static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
+static struct irq_info mk_unbound_info(void)
{
- return (struct packed_irq) { evtchn, index, type };
+ return (struct irq_info) { .type = IRQT_UNBOUND };
+}
+
+static struct irq_info mk_evtchn_info(unsigned short evtchn)
+{
+ return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn,
+ .cpu = 0 };
+}
+
+static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi)
+{
+ return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn,
+ .cpu = 0, .u.ipi = ipi };
+}
+
+static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
+{
+ return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn,
+ .cpu = 0, .u.virq = virq };
+}
+
+static struct irq_info mk_pirq_info(unsigned short evtchn,
+ unsigned short gsi, unsigned short vector)
+{
+ return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
+ .cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } };
}
/*
* Accessors for packed IRQ information.
*/
-static inline unsigned int evtchn_from_irq(int irq)
+static struct irq_info *info_for_irq(unsigned irq)
{
- return irq_info[irq].evtchn;
+ return &irq_info[irq];
}
-static inline unsigned int index_from_irq(int irq)
+static unsigned int evtchn_from_irq(unsigned irq)
{
- return irq_info[irq].index;
+ return info_for_irq(irq)->evtchn;
}
-static inline unsigned int type_from_irq(int irq)
+static enum ipi_vector ipi_from_irq(unsigned irq)
{
- return irq_info[irq].type;
+ struct irq_info *info = info_for_irq(irq);
+
+ BUG_ON(info == NULL);
+ BUG_ON(info->type != IRQT_IPI);
+
+ return info->u.ipi;
+}
+
+static unsigned virq_from_irq(unsigned irq)
+{
+ struct irq_info *info = info_for_irq(irq);
+
+ BUG_ON(info == NULL);
+ BUG_ON(info->type != IRQT_VIRQ);
+
+ return info->u.virq;
+}
+
+static unsigned gsi_from_irq(unsigned irq)
+{
+ struct irq_info *info = info_for_irq(irq);
+
+ BUG_ON(info == NULL);
+ BUG_ON(info->type != IRQT_PIRQ);
+
+ return info->u.pirq.gsi;
+}
+
+static unsigned vector_from_irq(unsigned irq)
+{
+ struct irq_info *info = info_for_irq(irq);
+
+ BUG_ON(info == NULL);
+ BUG_ON(info->type != IRQT_PIRQ);
+
+ return info->u.pirq.vector;
+}
+
+static enum xen_irq_type type_from_irq(unsigned irq)
+{
+ return info_for_irq(irq)->type;
+}
+
+static unsigned cpu_from_irq(unsigned irq)
+{
+ return info_for_irq(irq)->cpu;
+}
+
+static unsigned int cpu_from_evtchn(unsigned int evtchn)
+{
+ int irq = evtchn_to_irq[evtchn];
+ unsigned ret = 0;
+
+ if (irq != -1)
+ ret = cpu_from_irq(irq);
+
+ return ret;
}
static inline unsigned long active_evtchns(unsigned int cpu,
@@ -115,7 +217,7 @@
unsigned int idx)
{
return (sh->evtchn_pending[idx] &
- cpu_evtchn_mask[cpu][idx] &
+ cpu_evtchn_mask(cpu)[idx] &
~sh->evtchn_mask[idx]);
}
@@ -125,13 +227,13 @@
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
- irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
+ cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
#endif
- __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
- __set_bit(chn, cpu_evtchn_mask[cpu]);
+ __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
+ __set_bit(chn, cpu_evtchn_mask(cpu));
- cpu_evtchn[chn] = cpu;
+ irq_info[irq].cpu = cpu;
}
static void init_evtchn_cpu_bindings(void)
@@ -142,17 +244,11 @@
/* By default all event channels notify CPU#0. */
for_each_irq_desc(i, desc) {
- desc->affinity = cpumask_of_cpu(0);
+ cpumask_copy(desc->affinity, cpumask_of(0));
}
#endif
- memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
- memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
-}
-
-static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
-{
- return cpu_evtchn[evtchn];
+ memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0)));
}
static inline void clear_evtchn(int port)
@@ -232,9 +328,8 @@
int irq;
struct irq_desc *desc;
- /* Only allocate from dynirq range */
for (irq = 0; irq < nr_irqs; irq++)
- if (irq_bindcount[irq] == 0)
+ if (irq_info[irq].type == IRQT_UNBOUND)
break;
if (irq == nr_irqs)
@@ -244,6 +339,8 @@
if (WARN_ON(desc == NULL))
return -1;
+ dynamic_irq_init(irq);
+
return irq;
}
@@ -258,16 +355,13 @@
if (irq == -1) {
irq = find_unbound_irq();
- dynamic_irq_init(irq);
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_level_irq, "event");
evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
+ irq_info[irq] = mk_evtchn_info(evtchn);
}
- irq_bindcount[irq]++;
-
spin_unlock(&irq_mapping_update_lock);
return irq;
@@ -282,12 +376,12 @@
spin_lock(&irq_mapping_update_lock);
irq = per_cpu(ipi_to_irq, cpu)[ipi];
+
if (irq == -1) {
irq = find_unbound_irq();
if (irq < 0)
goto out;
- dynamic_irq_init(irq);
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_level_irq, "ipi");
@@ -298,15 +392,12 @@
evtchn = bind_ipi.port;
evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
-
+ irq_info[irq] = mk_ipi_info(evtchn, ipi);
per_cpu(ipi_to_irq, cpu)[ipi] = irq;
bind_evtchn_to_cpu(evtchn, cpu);
}
- irq_bindcount[irq]++;
-
out:
spin_unlock(&irq_mapping_update_lock);
return irq;
@@ -332,20 +423,17 @@
irq = find_unbound_irq();
- dynamic_irq_init(irq);
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_level_irq, "virq");
evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+ irq_info[irq] = mk_virq_info(evtchn, virq);
per_cpu(virq_to_irq, cpu)[virq] = irq;
bind_evtchn_to_cpu(evtchn, cpu);
}
- irq_bindcount[irq]++;
-
spin_unlock(&irq_mapping_update_lock);
return irq;
@@ -358,7 +446,7 @@
spin_lock(&irq_mapping_update_lock);
- if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
+ if (VALID_EVTCHN(evtchn)) {
close.port = evtchn;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
@@ -366,11 +454,11 @@
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
- [index_from_irq(irq)] = -1;
+ [virq_from_irq(irq)] = -1;
break;
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
- [index_from_irq(irq)] = -1;
+ [ipi_from_irq(irq)] = -1;
break;
default:
break;
@@ -380,7 +468,7 @@
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
- irq_info[irq] = IRQ_UNBOUND;
+ irq_info[irq] = mk_unbound_info();
dynamic_irq_cleanup(irq);
}
@@ -498,8 +586,8 @@
for(i = 0; i < NR_EVENT_CHANNELS; i++) {
if (sync_test_bit(i, sh->evtchn_pending)) {
printk(" %d: event %d -> irq %d\n",
- cpu_evtchn[i], i,
- evtchn_to_irq[i]);
+ cpu_from_evtchn(i), i,
+ evtchn_to_irq[i]);
}
}
@@ -508,7 +596,6 @@
return IRQ_HANDLED;
}
-
/*
* Search the CPUs pending events bitmasks. For each one found, map
* the event number to an irq, and feed it into do_IRQ() for
@@ -521,11 +608,15 @@
void xen_evtchn_do_upcall(struct pt_regs *regs)
{
int cpu = get_cpu();
+ struct pt_regs *old_regs = set_irq_regs(regs);
struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
static DEFINE_PER_CPU(unsigned, nesting_count);
unsigned count;
+ exit_idle();
+ irq_enter();
+
do {
unsigned long pending_words;
@@ -550,7 +641,7 @@
int irq = evtchn_to_irq[port];
if (irq != -1)
- xen_do_IRQ(irq, regs);
+ handle_irq(irq, regs);
}
}
@@ -561,12 +652,17 @@
} while(count != 1);
out:
+ irq_exit();
+ set_irq_regs(old_regs);
+
put_cpu();
}
/* Rebind a new event channel to an existing irq. */
void rebind_evtchn_irq(int evtchn, int irq)
{
+ struct irq_info *info = info_for_irq(irq);
+
/* Make sure the irq is masked, since the new event channel
will also be masked. */
disable_irq(irq);
@@ -576,11 +672,11 @@
/* After resume the irq<->evtchn mappings are all cleared out */
BUG_ON(evtchn_to_irq[evtchn] != -1);
/* Expect irq to have been bound before,
- so the bindcount should be non-0 */
- BUG_ON(irq_bindcount[irq] == 0);
+ so there should be a proper type */
+ BUG_ON(info->type == IRQT_UNBOUND);
evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
+ irq_info[irq] = mk_evtchn_info(evtchn);
spin_unlock(&irq_mapping_update_lock);
@@ -690,8 +786,7 @@
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
continue;
- BUG_ON(irq_info[irq].type != IRQT_VIRQ);
- BUG_ON(irq_info[irq].index != virq);
+ BUG_ON(virq_from_irq(irq) != virq);
/* Get a new binding from Xen. */
bind_virq.virq = virq;
@@ -703,7 +798,7 @@
/* Record the new mapping. */
evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+ irq_info[irq] = mk_virq_info(evtchn, virq);
bind_evtchn_to_cpu(evtchn, cpu);
/* Ready for use. */
@@ -720,8 +815,7 @@
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
continue;
- BUG_ON(irq_info[irq].type != IRQT_IPI);
- BUG_ON(irq_info[irq].index != ipi);
+ BUG_ON(ipi_from_irq(irq) != ipi);
/* Get a new binding from Xen. */
bind_ipi.vcpu = cpu;
@@ -732,7 +826,7 @@
/* Record the new mapping. */
evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+ irq_info[irq] = mk_ipi_info(evtchn, ipi);
bind_evtchn_to_cpu(evtchn, cpu);
/* Ready for use. */
@@ -812,8 +906,11 @@
static struct irq_chip xen_dynamic_chip __read_mostly = {
.name = "xen-dyn",
+
+ .disable = disable_dynirq,
.mask = disable_dynirq,
.unmask = enable_dynirq,
+
.ack = ack_dynirq,
.set_affinity = set_affinity_irq,
.retrigger = retrigger_dynirq,
@@ -822,6 +919,10 @@
void __init xen_init_IRQ(void)
{
int i;
+ size_t size = nr_cpu_ids * sizeof(struct cpu_evtchn_s);
+
+ cpu_evtchn_mask_p = alloc_bootmem(size);
+ BUG_ON(cpu_evtchn_mask_p == NULL);
init_evtchn_cpu_bindings();
@@ -829,9 +930,5 @@
for (i = 0; i < NR_EVENT_CHANNELS; i++)
mask_evtchn(i);
- /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
- for (i = 0; i < nr_irqs; i++)
- irq_bindcount[i] = 0;
-
irq_ctx_init(smp_processor_id());
}
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 56892a1..3ccd348 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -108,7 +108,7 @@
/* XXX use normal device tree? */
xenbus_suspend();
- err = stop_machine(xen_suspend, &cancelled, &cpumask_of_cpu(0));
+ err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
goto out;
diff --git a/fs/exec.c b/fs/exec.c
index 929b580..af1600c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/pagemap.h>
+#include <linux/perf_counter.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <linux/key.h>
@@ -1010,6 +1011,13 @@
current->personality &= ~bprm->per_clear;
+ /*
+ * Flush performance counters when crossing a
+ * security domain:
+ */
+ if (!get_dumpable(current->mm))
+ perf_counter_exit_task(current);
+
/* An exec changes our domain. We are no longer part of the thread
group */
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index a62720a..ab0b85c 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -144,6 +144,7 @@
acpi_size length);
void acpi_os_unmap_memory(void __iomem * logical_address, acpi_size size);
+void early_acpi_os_unmap_memory(void __iomem * virt, acpi_size size);
#ifdef ACPI_FUTURE_USAGE
acpi_status
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index c8e8cf4..cc40102 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -130,6 +130,10 @@
struct acpi_table_header *out_table_header);
acpi_status
+acpi_get_table_with_size(acpi_string signature,
+ u32 instance, struct acpi_table_header **out_table,
+ acpi_size *tbl_size);
+acpi_status
acpi_get_table(acpi_string signature,
u32 instance, struct acpi_table_header **out_table);
diff --git a/include/asm-frv/swab.h b/include/asm-frv/swab.h
index afb3396..f305834 100644
--- a/include/asm-frv/swab.h
+++ b/include/asm-frv/swab.h
@@ -1,7 +1,7 @@
#ifndef _ASM_SWAB_H
#define _ASM_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
# define __SWAB_64_THRU_32__
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b0e63c6..00f45ff 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -80,4 +80,56 @@
#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
__typeof__(type) per_cpu_var(name)
+/*
+ * Optional methods for optimized non-lvalue per-cpu variable access.
+ *
+ * @var can be a percpu variable or a field of it and its size should
+ * equal char, int or long. percpu_read() evaluates to a lvalue and
+ * all others to void.
+ *
+ * These operations are guaranteed to be atomic w.r.t. preemption.
+ * The generic versions use plain get/put_cpu_var(). Archs are
+ * encouraged to implement single-instruction alternatives which don't
+ * require preemption protection.
+ */
+#ifndef percpu_read
+# define percpu_read(var) \
+ ({ \
+ typeof(per_cpu_var(var)) __tmp_var__; \
+ __tmp_var__ = get_cpu_var(var); \
+ put_cpu_var(var); \
+ __tmp_var__; \
+ })
+#endif
+
+#define __percpu_generic_to_op(var, val, op) \
+do { \
+ get_cpu_var(var) op val; \
+ put_cpu_var(var); \
+} while (0)
+
+#ifndef percpu_write
+# define percpu_write(var, val) __percpu_generic_to_op(var, (val), =)
+#endif
+
+#ifndef percpu_add
+# define percpu_add(var, val) __percpu_generic_to_op(var, (val), +=)
+#endif
+
+#ifndef percpu_sub
+# define percpu_sub(var, val) __percpu_generic_to_op(var, (val), -=)
+#endif
+
+#ifndef percpu_and
+# define percpu_and(var, val) __percpu_generic_to_op(var, (val), &=)
+#endif
+
+#ifndef percpu_or
+# define percpu_or(var, val) __percpu_generic_to_op(var, (val), |=)
+#endif
+
+#ifndef percpu_xor
+# define percpu_xor(var, val) __percpu_generic_to_op(var, (val), ^=)
+#endif
+
#endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 79a7ff9..4ce48e8 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -9,7 +9,7 @@
extern char __init_begin[], __init_end[];
extern char _sinittext[], _einittext[];
extern char _end[];
-extern char __per_cpu_start[], __per_cpu_end[];
+extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __start_rodata[], __end_rodata[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c61fab1..5406e70 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -430,12 +430,59 @@
*(.initcall7.init) \
*(.initcall7s.init)
-#define PERCPU(align) \
- . = ALIGN(align); \
- VMLINUX_SYMBOL(__per_cpu_start) = .; \
- .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
+/**
+ * PERCPU_VADDR - define output section for percpu area
+ * @vaddr: explicit base address (optional)
+ * @phdr: destination PHDR (optional)
+ *
+ * Macro which expands to output section for percpu area. If @vaddr
+ * is not blank, it specifies explicit base address and all percpu
+ * symbols will be offset from the given address. If blank, @vaddr
+ * always equals @laddr + LOAD_OFFSET.
+ *
+ * @phdr defines the output PHDR to use if not blank. Be warned that
+ * output PHDR is sticky. If @phdr is specified, the next output
+ * section in the linker script will go there too. @phdr should have
+ * a leading colon.
+ *
+ * Note that this macros defines __per_cpu_load as an absolute symbol.
+ * If there is no need to put the percpu section at a predetermined
+ * address, use PERCPU().
+ */
+#define PERCPU_VADDR(vaddr, phdr) \
+ VMLINUX_SYMBOL(__per_cpu_load) = .; \
+ .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
+ - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__per_cpu_start) = .; \
+ *(.data.percpu.first) \
*(.data.percpu.page_aligned) \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
- } \
- VMLINUX_SYMBOL(__per_cpu_end) = .;
+ VMLINUX_SYMBOL(__per_cpu_end) = .; \
+ } phdr \
+ . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
+
+/**
+ * PERCPU - define output section for percpu area, simple version
+ * @align: required alignment
+ *
+ * Align to @align and outputs output section for percpu area. This
+ * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
+ * __per_cpu_start will be identical.
+ *
+ * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except
+ * that __per_cpu_load is defined as a relative symbol against
+ * .data.percpu which is required for relocatable x86_32
+ * configuration.
+ */
+#define PERCPU(align) \
+ . = ALIGN(align); \
+ .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__per_cpu_load) = .; \
+ VMLINUX_SYMBOL(__per_cpu_start) = .; \
+ *(.data.percpu.first) \
+ *(.data.percpu.page_aligned) \
+ *(.data.percpu) \
+ *(.data.percpu.shared_aligned) \
+ VMLINUX_SYMBOL(__per_cpu_end) = .; \
+ }
diff --git a/include/asm-m32r/swab.h b/include/asm-m32r/swab.h
index 97973e1..54dab00 100644
--- a/include/asm-m32r/swab.h
+++ b/include/asm-m32r/swab.h
@@ -1,7 +1,7 @@
#ifndef _ASM_M32R_SWAB_H
#define _ASM_M32R_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
# define __SWAB_64_THRU_32__
diff --git a/include/asm-mn10300/swab.h b/include/asm-mn10300/swab.h
index 4504d1b..bd818a8 100644
--- a/include/asm-mn10300/swab.h
+++ b/include/asm-mn10300/swab.h
@@ -11,7 +11,7 @@
#ifndef _ASM_SWAB_H
#define _ASM_SWAB_H
-#include <asm/types.h>
+#include <linux/types.h>
#ifdef __GNUC__
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6fce2fc..7819915 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -79,6 +79,7 @@
typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end);
char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
+void __acpi_unmap_table(char *map, unsigned long size);
int early_acpi_boot_init(void);
int acpi_boot_init (void);
int acpi_boot_table_init (void);
diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 07ae8f8..5b5d473 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -6,6 +6,7 @@
#define CODA_PSDEV_MAJOR 67
#define MAX_CODADEVS 5 /* how many do we allow */
+#ifdef __KERNEL__
struct kstatfs;
/* communication pending/processing queues */
@@ -24,7 +25,6 @@
return (struct venus_comm *)((sb)->s_fs_info);
}
-
/* upcalls */
int venus_rootfid(struct super_block *sb, struct CodaFid *fidp);
int venus_getattr(struct super_block *sb, struct CodaFid *fid,
@@ -64,6 +64,12 @@
int venus_fsync(struct super_block *sb, struct CodaFid *fid);
int venus_statfs(struct dentry *dentry, struct kstatfs *sfs);
+/*
+ * Statistics
+ */
+
+extern struct venus_comm coda_comms[];
+#endif /* __KERNEL__ */
/* messages between coda filesystem in kernel and Venus */
struct upc_req {
@@ -82,11 +88,4 @@
#define REQ_WRITE 0x4
#define REQ_ABORT 0x8
-
-/*
- * Statistics
- */
-
-extern struct venus_comm coda_comms[];
-
#endif
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 5ca54d7..7605c5e 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -111,6 +111,15 @@
#endif
}
+static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
+{
+#ifdef ELF_CORE_COPY_KERNEL_REGS
+ ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs);
+#else
+ elf_core_copy_regs(elfregs, regs);
+#endif
+}
+
static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
{
#ifdef ELF_CORE_COPY_TASK_REGS
diff --git a/include/linux/in6.h b/include/linux/in6.h
index bc49204..718bf21 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -44,11 +44,11 @@
* NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
* in network byte order, not in host byte order as are the IPv4 equivalents
*/
+#ifdef __KERNEL__
extern const struct in6_addr in6addr_any;
#define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } }
extern const struct in6_addr in6addr_loopback;
#define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
-#ifdef __KERNEL__
extern const struct in6_addr in6addr_linklocal_allnodes;
#define IN6ADDR_LINKLOCAL_ALLNODES_INIT \
{ { { 0xff,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e752d973f..2ee9694 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -120,6 +120,16 @@
extern struct cred init_cred;
+#ifdef CONFIG_PERF_COUNTERS
+# define INIT_PERF_COUNTERS(tsk) \
+ .perf_counter_ctx.counter_list = \
+ LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \
+ .perf_counter_ctx.lock = \
+ __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
+#else
+# define INIT_PERF_COUNTERS(tsk)
+#endif
+
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -184,6 +194,7 @@
INIT_IDS \
INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \
+ INIT_PERF_COUNTERS(tsk) \
}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9127f6b..472f117 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -467,6 +467,7 @@
struct irq_desc;
extern int early_irq_init(void);
+extern int arch_probe_nr_irqs(void);
extern int arch_early_irq_init(void);
extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 0adb0f9..c1b4830 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -49,8 +49,9 @@
io_mapping_create_wc(resource_size_t base, unsigned long size)
{
struct io_mapping *iomap;
+ pgprot_t prot;
- if (!is_io_mapping_possible(base, size))
+ if (!reserve_io_memtype_wc(base, size, &prot))
return NULL;
iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
@@ -59,13 +60,14 @@
iomap->base = base;
iomap->size = size;
- iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL));
+ iomap->prot = prot;
return iomap;
}
static inline void
io_mapping_free(struct io_mapping *mapping)
{
+ free_io_memtype(mapping->base, mapping->size);
kfree(mapping);
}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index f899b50..27a6753 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -182,11 +182,11 @@
unsigned int irqs_unhandled;
spinlock_t lock;
#ifdef CONFIG_SMP
- cpumask_t affinity;
+ cpumask_var_t affinity;
unsigned int cpu;
-#endif
#ifdef CONFIG_GENERIC_PENDING_IRQ
- cpumask_t pending_mask;
+ cpumask_var_t pending_mask;
+#endif
#endif
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *dir;
@@ -422,4 +422,84 @@
#endif /* !CONFIG_S390 */
+#ifdef CONFIG_SMP
+/**
+ * init_alloc_desc_masks - allocate cpumasks for irq_desc
+ * @desc: pointer to irq_desc struct
+ * @cpu: cpu which will be handling the cpumasks
+ * @boot: true if need bootmem
+ *
+ * Allocates affinity and pending_mask cpumask if required.
+ * Returns true if successful (or not required).
+ * Side effect: affinity has all bits set, pending_mask has all bits clear.
+ */
+static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
+ bool boot)
+{
+ int node;
+
+ if (boot) {
+ alloc_bootmem_cpumask_var(&desc->affinity);
+ cpumask_setall(desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ alloc_bootmem_cpumask_var(&desc->pending_mask);
+ cpumask_clear(desc->pending_mask);
+#endif
+ return true;
+ }
+
+ node = cpu_to_node(cpu);
+
+ if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
+ return false;
+ cpumask_setall(desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
+ free_cpumask_var(desc->affinity);
+ return false;
+ }
+ cpumask_clear(desc->pending_mask);
+#endif
+ return true;
+}
+
+/**
+ * init_copy_desc_masks - copy cpumasks for irq_desc
+ * @old_desc: pointer to old irq_desc struct
+ * @new_desc: pointer to new irq_desc struct
+ *
+ * Insures affinity and pending_masks are copied to new irq_desc.
+ * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
+ * irq_desc struct so the copy is redundant.
+ */
+
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+ struct irq_desc *new_desc)
+{
+#ifdef CONFIG_CPUMASKS_OFFSTACK
+ cpumask_copy(new_desc->affinity, old_desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
+#endif
+#endif
+}
+
+#else /* !CONFIG_SMP */
+
+static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
+ bool boot)
+{
+ return true;
+}
+
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+ struct irq_desc *new_desc)
+{
+}
+
+#endif /* CONFIG_SMP */
+
#endif /* _LINUX_IRQ_H */
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 86af92e..887477b 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -20,6 +20,7 @@
# define for_each_irq_desc_reverse(irq, desc) \
for (irq = nr_irqs - 1; irq >= 0; irq--)
+
#else /* CONFIG_GENERIC_HARDIRQS */
extern int nr_irqs;
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 570d204..ecfa668 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -78,7 +78,15 @@
return sum;
}
+
+/*
+ * Lock/unlock the current runqueue - to extract task statistics:
+ */
+extern void curr_rq_lock_irq_save(unsigned long *flags);
+extern void curr_rq_unlock_irq_restore(unsigned long *flags);
+extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
extern unsigned long long task_delta_exec(struct task_struct *);
+
extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
extern void account_steal_time(cputime_t);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 32851ee..2ec6cc1 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -182,6 +182,14 @@
DECLARE_PER_CPU(struct kprobe *, current_kprobe);
DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+/*
+ * For #ifdef avoidance:
+ */
+static inline int kprobes_built_in(void)
+{
+ return 1;
+}
+
#ifdef CONFIG_KRETPROBES
extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs);
@@ -271,8 +279,16 @@
void kprobe_flush_task(struct task_struct *tk);
void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
-#else /* CONFIG_KPROBES */
+#else /* !CONFIG_KPROBES: */
+static inline int kprobes_built_in(void)
+{
+ return 0;
+}
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ return 0;
+}
static inline struct kprobe *get_kprobe(void *addr)
{
return NULL;
@@ -329,5 +345,5 @@
static inline void kprobe_flush_task(struct task_struct *tk)
{
}
-#endif /* CONFIG_KPROBES */
-#endif /* _LINUX_KPROBES_H */
+#endif /* CONFIG_KPROBES */
+#endif /* _LINUX_KPROBES_H */
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 0b4df7e..5b4e28b 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -49,4 +49,5 @@
#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA
#define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA
+#define STACK_END_MAGIC 0x57AC6E9D
#endif /* __LINUX_MAGIC_H__ */
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 139d7c8..3d1b7bd 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -1,5 +1,5 @@
-#ifndef MMIOTRACE_H
-#define MMIOTRACE_H
+#ifndef _LINUX_MMIOTRACE_H
+#define _LINUX_MMIOTRACE_H
#include <linux/types.h>
#include <linux/list.h>
@@ -13,28 +13,34 @@
unsigned long condition, struct pt_regs *);
struct kmmio_probe {
- struct list_head list; /* kmmio internal list */
- unsigned long addr; /* start location of the probe point */
- unsigned long len; /* length of the probe region */
- kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */
- kmmio_post_handler_t post_handler; /* Called after addr is executed */
- void *private;
+ /* kmmio internal list: */
+ struct list_head list;
+ /* start location of the probe point: */
+ unsigned long addr;
+ /* length of the probe region: */
+ unsigned long len;
+ /* Called before addr is executed: */
+ kmmio_pre_handler_t pre_handler;
+ /* Called after addr is executed: */
+ kmmio_post_handler_t post_handler;
+ void *private;
};
-/* kmmio is active by some kmmio_probes? */
-static inline int is_kmmio_active(void)
-{
- extern unsigned int kmmio_count;
- return kmmio_count;
-}
+extern unsigned int kmmio_count;
extern int register_kmmio_probe(struct kmmio_probe *p);
extern void unregister_kmmio_probe(struct kmmio_probe *p);
+#ifdef CONFIG_MMIOTRACE
+/* kmmio is active by some kmmio_probes? */
+static inline int is_kmmio_active(void)
+{
+ return kmmio_count;
+}
+
/* Called from page fault handler. */
extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
-#ifdef CONFIG_MMIOTRACE
/* Called from ioremap.c */
extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
void __iomem *addr);
@@ -43,7 +49,17 @@
/* For anyone to insert markers. Remember trailing newline. */
extern int mmiotrace_printk(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
-#else
+#else /* !CONFIG_MMIOTRACE: */
+static inline int is_kmmio_active(void)
+{
+ return 0;
+}
+
+static inline int kmmio_handler(struct pt_regs *regs, unsigned long addr)
+{
+ return 0;
+}
+
static inline void mmiotrace_ioremap(resource_size_t offset,
unsigned long size, void __iomem *addr)
{
@@ -63,28 +79,28 @@
#endif /* CONFIG_MMIOTRACE */
enum mm_io_opcode {
- MMIO_READ = 0x1, /* struct mmiotrace_rw */
- MMIO_WRITE = 0x2, /* struct mmiotrace_rw */
- MMIO_PROBE = 0x3, /* struct mmiotrace_map */
- MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */
- MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
+ MMIO_READ = 0x1, /* struct mmiotrace_rw */
+ MMIO_WRITE = 0x2, /* struct mmiotrace_rw */
+ MMIO_PROBE = 0x3, /* struct mmiotrace_map */
+ MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */
+ MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
};
struct mmiotrace_rw {
- resource_size_t phys; /* PCI address of register */
- unsigned long value;
- unsigned long pc; /* optional program counter */
- int map_id;
- unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */
- unsigned char width; /* size of register access in bytes */
+ resource_size_t phys; /* PCI address of register */
+ unsigned long value;
+ unsigned long pc; /* optional program counter */
+ int map_id;
+ unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */
+ unsigned char width; /* size of register access in bytes */
};
struct mmiotrace_map {
- resource_size_t phys; /* base address in PCI space */
- unsigned long virt; /* base virtual address */
- unsigned long len; /* mapping size */
- int map_id;
- unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */
+ resource_size_t phys; /* base address in PCI space */
+ unsigned long virt; /* base virtual address */
+ unsigned long len; /* mapping size */
+ int map_id;
+ unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */
};
/* in kernel/trace/trace_mmiotrace.c */
@@ -94,4 +110,4 @@
extern void mmio_trace_mapping(struct mmiotrace_map *map);
extern int mmio_trace_printk(const char *fmt, va_list args);
-#endif /* MMIOTRACE_H */
+#endif /* _LINUX_MMIOTRACE_H */
diff --git a/include/linux/nubus.h b/include/linux/nubus.h
index 7382af3..e137b3c 100644
--- a/include/linux/nubus.h
+++ b/include/linux/nubus.h
@@ -237,6 +237,7 @@
int mask;
};
+#ifdef __KERNEL__
struct nubus_board {
struct nubus_board* next;
struct nubus_dev* first_dev;
@@ -351,6 +352,7 @@
void nubus_get_rsrc_str(void* dest,
const struct nubus_dirent *dirent,
int maxlen);
+#endif /* __KERNEL__ */
/* We'd like to get rid of this eventually. Only daynaport.c uses it now. */
static inline void *nubus_slot_addr(int slot)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9f2a375..3577ffd 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -8,35 +8,46 @@
#include <asm/percpu.h>
+#ifndef PER_CPU_BASE_SECTION
#ifdef CONFIG_SMP
-#define DEFINE_PER_CPU(type, name) \
- __attribute__((__section__(".data.percpu"))) \
- PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+#define PER_CPU_BASE_SECTION ".data.percpu"
+#else
+#define PER_CPU_BASE_SECTION ".data"
+#endif
+#endif
+
+#ifdef CONFIG_SMP
#ifdef MODULE
-#define SHARED_ALIGNED_SECTION ".data.percpu"
+#define PER_CPU_SHARED_ALIGNED_SECTION ""
#else
-#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned"
+#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
#endif
+#define PER_CPU_FIRST_SECTION ".first"
+
+#else
+
+#define PER_CPU_SHARED_ALIGNED_SECTION ""
+#define PER_CPU_FIRST_SECTION ""
+
+#endif
+
+#define DEFINE_PER_CPU_SECTION(type, name, section) \
+ __attribute__((__section__(PER_CPU_BASE_SECTION section))) \
+ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "")
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- __attribute__((__section__(SHARED_ALIGNED_SECTION))) \
- PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
+ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
____cacheline_aligned_in_smp
-#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
- __attribute__((__section__(".data.percpu.page_aligned"))) \
- PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-#else
-#define DEFINE_PER_CPU(type, name) \
- PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
-
-#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
- DEFINE_PER_CPU(type, name)
-#endif
+#define DEFINE_PER_CPU_FIRST(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
new file mode 100644
index 0000000..186efaf
--- /dev/null
+++ b/include/linux/perf_counter.h
@@ -0,0 +1,299 @@
+/*
+ * Performance counters:
+ *
+ * Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar
+ *
+ * Data type definitions, declarations, prototypes.
+ *
+ * Started by: Thomas Gleixner and Ingo Molnar
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_COUNTER_H
+#define _LINUX_PERF_COUNTER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_types {
+ /*
+ * Common hardware events, generalized by the kernel:
+ */
+ PERF_COUNT_CPU_CYCLES = 0,
+ PERF_COUNT_INSTRUCTIONS = 1,
+ PERF_COUNT_CACHE_REFERENCES = 2,
+ PERF_COUNT_CACHE_MISSES = 3,
+ PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
+ PERF_COUNT_BRANCH_MISSES = 5,
+ PERF_COUNT_BUS_CYCLES = 6,
+
+ PERF_HW_EVENTS_MAX = 7,
+
+ /*
+ * Special "software" counters provided by the kernel, even if
+ * the hardware does not support performance counters. These
+ * counters measure various physical and sw events of the
+ * kernel (and allow the profiling of them as well):
+ */
+ PERF_COUNT_CPU_CLOCK = -1,
+ PERF_COUNT_TASK_CLOCK = -2,
+ PERF_COUNT_PAGE_FAULTS = -3,
+ PERF_COUNT_CONTEXT_SWITCHES = -4,
+ PERF_COUNT_CPU_MIGRATIONS = -5,
+
+ PERF_SW_EVENTS_MIN = -6,
+};
+
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_counter_record_type {
+ PERF_RECORD_SIMPLE = 0,
+ PERF_RECORD_IRQ = 1,
+ PERF_RECORD_GROUP = 2,
+};
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+ __s64 type;
+
+ __u64 irq_period;
+ __u32 record_type;
+
+ __u32 disabled : 1, /* off by default */
+ nmi : 1, /* NMI sampling */
+ raw : 1, /* raw event type */
+ inherit : 1, /* children inherit it */
+ pinned : 1, /* must always be on PMU */
+ exclusive : 1, /* only group on PMU */
+ exclude_user : 1, /* don't count user */
+ exclude_kernel : 1, /* ditto kernel */
+ exclude_hv : 1, /* ditto hypervisor */
+
+ __reserved_1 : 23;
+
+ __u64 __reserved_2;
+};
+
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE _IO('$', 0)
+#define PERF_COUNTER_IOC_DISABLE _IO('$', 1)
+
+#ifdef __KERNEL__
+/*
+ * Kernel-internal data types and definitions:
+ */
+
+#ifdef CONFIG_PERF_COUNTERS
+# include <asm/perf_counter.h>
+#endif
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+struct task_struct;
+
+/**
+ * struct hw_perf_counter - performance counter hardware details:
+ */
+struct hw_perf_counter {
+#ifdef CONFIG_PERF_COUNTERS
+ u64 config;
+ unsigned long config_base;
+ unsigned long counter_base;
+ int nmi;
+ unsigned int idx;
+ atomic64_t prev_count;
+ u64 irq_period;
+ atomic64_t period_left;
+#endif
+};
+
+/*
+ * Hardcoded buffer length limit for now, for IRQ-fed events:
+ */
+#define PERF_DATA_BUFLEN 2048
+
+/**
+ * struct perf_data - performance counter IRQ data sampling ...
+ */
+struct perf_data {
+ int len;
+ int rd_idx;
+ int overrun;
+ u8 data[PERF_DATA_BUFLEN];
+};
+
+struct perf_counter;
+
+/**
+ * struct hw_perf_counter_ops - performance counter hw ops
+ */
+struct hw_perf_counter_ops {
+ int (*enable) (struct perf_counter *counter);
+ void (*disable) (struct perf_counter *counter);
+ void (*read) (struct perf_counter *counter);
+};
+
+/**
+ * enum perf_counter_active_state - the states of a counter
+ */
+enum perf_counter_active_state {
+ PERF_COUNTER_STATE_ERROR = -2,
+ PERF_COUNTER_STATE_OFF = -1,
+ PERF_COUNTER_STATE_INACTIVE = 0,
+ PERF_COUNTER_STATE_ACTIVE = 1,
+};
+
+struct file;
+
+/**
+ * struct perf_counter - performance counter kernel representation:
+ */
+struct perf_counter {
+#ifdef CONFIG_PERF_COUNTERS
+ struct list_head list_entry;
+ struct list_head sibling_list;
+ struct perf_counter *group_leader;
+ const struct hw_perf_counter_ops *hw_ops;
+
+ enum perf_counter_active_state state;
+ enum perf_counter_active_state prev_state;
+ atomic64_t count;
+
+ struct perf_counter_hw_event hw_event;
+ struct hw_perf_counter hw;
+
+ struct perf_counter_context *ctx;
+ struct task_struct *task;
+ struct file *filp;
+
+ struct perf_counter *parent;
+ struct list_head child_list;
+
+ /*
+ * Protect attach/detach and child_list:
+ */
+ struct mutex mutex;
+
+ int oncpu;
+ int cpu;
+
+ /* read() / irq related data */
+ wait_queue_head_t waitq;
+ /* optional: for NMIs */
+ int wakeup_pending;
+ struct perf_data *irqdata;
+ struct perf_data *usrdata;
+ struct perf_data data[2];
+#endif
+};
+
+/**
+ * struct perf_counter_context - counter context structure
+ *
+ * Used as a container for task counters and CPU counters as well:
+ */
+struct perf_counter_context {
+#ifdef CONFIG_PERF_COUNTERS
+ /*
+ * Protect the states of the counters in the list,
+ * nr_active, and the list:
+ */
+ spinlock_t lock;
+ /*
+ * Protect the list of counters. Locking either mutex or lock
+ * is sufficient to ensure the list doesn't change; to change
+ * the list you need to lock both the mutex and the spinlock.
+ */
+ struct mutex mutex;
+
+ struct list_head counter_list;
+ int nr_counters;
+ int nr_active;
+ int is_active;
+ struct task_struct *task;
+#endif
+};
+
+/**
+ * struct perf_counter_cpu_context - per cpu counter context structure
+ */
+struct perf_cpu_context {
+ struct perf_counter_context ctx;
+ struct perf_counter_context *task_ctx;
+ int active_oncpu;
+ int max_pertask;
+ int exclusive;
+};
+
+/*
+ * Set by architecture code:
+ */
+extern int perf_max_counters;
+
+#ifdef CONFIG_PERF_COUNTERS
+extern const struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter);
+
+extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
+extern void perf_counter_task_tick(struct task_struct *task, int cpu);
+extern void perf_counter_init_task(struct task_struct *child);
+extern void perf_counter_exit_task(struct task_struct *child);
+extern void perf_counter_notify(struct pt_regs *regs);
+extern void perf_counter_print_debug(void);
+extern void perf_counter_unthrottle(void);
+extern u64 hw_perf_save_disable(void);
+extern void hw_perf_restore(u64 ctrl);
+extern int perf_counter_task_disable(void);
+extern int perf_counter_task_enable(void);
+extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx, int cpu);
+
+/*
+ * Return 1 for a software counter, 0 for a hardware counter
+ */
+static inline int is_software_counter(struct perf_counter *counter)
+{
+ return !counter->hw_event.raw && counter->hw_event.type < 0;
+}
+
+#else
+static inline void
+perf_counter_task_sched_in(struct task_struct *task, int cpu) { }
+static inline void
+perf_counter_task_sched_out(struct task_struct *task, int cpu) { }
+static inline void
+perf_counter_task_tick(struct task_struct *task, int cpu) { }
+static inline void perf_counter_init_task(struct task_struct *child) { }
+static inline void perf_counter_exit_task(struct task_struct *child) { }
+static inline void perf_counter_notify(struct pt_regs *regs) { }
+static inline void perf_counter_print_debug(void) { }
+static inline void perf_counter_unthrottle(void) { }
+static inline void hw_perf_restore(u64 ctrl) { }
+static inline u64 hw_perf_save_disable(void) { return 0; }
+static inline int perf_counter_task_disable(void) { return -EINVAL; }
+static inline int perf_counter_task_enable(void) { return -EINVAL; }
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 48d887e..b00df4c 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -85,4 +85,7 @@
#define PR_SET_TIMERSLACK 29
#define PR_GET_TIMERSLACK 30
+#define PR_TASK_PERF_COUNTERS_DISABLE 31
+#define PR_TASK_PERF_COUNTERS_ENABLE 32
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index bc5114d..e356c99 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -28,8 +28,6 @@
#include <linux/reiserfs_fs_sb.h>
#endif
-struct fid;
-
/*
* include/linux/reiser_fs.h
*
@@ -37,6 +35,33 @@
*
*/
+/* ioctl's command */
+#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long)
+/* define following flags to be the same as in ext2, so that chattr(1),
+ lsattr(1) will work with us. */
+#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS
+#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS
+#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION
+#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION
+
+#ifdef __KERNEL__
+/* the 32 bit compat definitions with int argument */
+#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int)
+#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
+#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
+
+/* Locking primitives */
+/* Right now we are still falling back to (un)lock_kernel, but eventually that
+ would evolve into real per-fs locks */
+#define reiserfs_write_lock( sb ) lock_kernel()
+#define reiserfs_write_unlock( sb ) unlock_kernel()
+
+/* xattr stuff */
+#define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem)
+struct fid;
+
/* in reading the #defines, it may help to understand that they employ
the following abbreviations:
@@ -698,6 +723,7 @@
/* object identifier for root dir */
#define REISERFS_ROOT_OBJECTID 2
#define REISERFS_ROOT_PARENT_OBJECTID 1
+
extern struct reiserfs_key root_key;
/*
@@ -1540,7 +1566,6 @@
/* FUNCTION DECLARATIONS */
/***************************************************************************/
-/*#ifdef __KERNEL__*/
#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
#define journal_trans_half(blocksize) \
@@ -2178,29 +2203,6 @@
unsigned int cmd, unsigned long arg);
int reiserfs_unpack(struct inode *inode, struct file *filp);
-/* ioctl's command */
-#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long)
-/* define following flags to be the same as in ext2, so that chattr(1),
- lsattr(1) will work with us. */
-#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS
-#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS
-#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION
-#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION
-/* the 32 bit compat definitions with int argument */
-#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int)
-#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
-#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
-#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
-#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
-
-/* Locking primitives */
-/* Right now we are still falling back to (un)lock_kernel, but eventually that
- would evolve into real per-fs locks */
-#define reiserfs_write_lock( sb ) lock_kernel()
-#define reiserfs_write_unlock( sb ) unlock_kernel()
-
-/* xattr stuff */
-#define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem)
-
+#endif /* __KERNEL__ */
#endif /* _LINUX_REISER_FS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8c216e0..3aee423 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,6 +71,7 @@
#include <linux/fs_struct.h>
#include <linux/compiler.h>
#include <linux/completion.h>
+#include <linux/perf_counter.h>
#include <linux/pid.h>
#include <linux/percpu.h>
#include <linux/topology.h>
@@ -136,6 +137,8 @@
extern unsigned long nr_uninterruptible(void);
extern unsigned long nr_active(void);
extern unsigned long nr_iowait(void);
+extern u64 cpu_nr_switches(int cpu);
+extern u64 cpu_nr_migrations(int cpu);
struct seq_file;
struct cfs_rq;
@@ -1052,6 +1055,8 @@
u64 last_wakeup;
u64 avg_overlap;
+ u64 nr_migrations;
+
#ifdef CONFIG_SCHEDSTATS
u64 wait_start;
u64 wait_max;
@@ -1067,7 +1072,6 @@
u64 exec_max;
u64 slice_max;
- u64 nr_migrations;
u64 nr_migrations_cold;
u64 nr_failed_migrations_affine;
u64 nr_failed_migrations_running;
@@ -1178,10 +1182,9 @@
pid_t pid;
pid_t tgid;
-#ifdef CONFIG_CC_STACKPROTECTOR
/* Canary value for the -fstack-protector gcc feature */
unsigned long stack_canary;
-#endif
+
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
@@ -1370,6 +1373,7 @@
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
#endif
+ struct perf_counter_context perf_counter_ctx;
#ifdef CONFIG_NUMA
struct mempolicy *mempolicy;
short il_next;
@@ -2087,6 +2091,19 @@
extern void thread_info_cache_init(void);
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long stack_not_used(struct task_struct *p)
+{
+ unsigned long *n = end_of_stack(p);
+
+ do { /* Skip over canary */
+ n++;
+ } while (!*n);
+
+ return (unsigned long)n - (unsigned long)end_of_stack(p);
+}
+#endif
+
/* set thread flags in other task's structures
* - see asm/thread_info.h for TIF_xxxx flags available
*/
@@ -2340,6 +2357,13 @@
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
+/*
+ * Call the function if the target task is executing on a CPU right now:
+ */
+extern void task_oncpu_function_call(struct task_struct *p,
+ void (*func) (void *info), void *info);
+
+
#ifdef CONFIG_MM_OWNER
extern void mm_update_next_owner(struct mm_struct *mm);
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 715196b..bbacb7b 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -176,6 +176,12 @@
#define put_cpu() preempt_enable()
#define put_cpu_no_resched() preempt_enable_no_resched()
+/*
+ * Callback to arch code if there's nosmp or maxcpus=0 on the
+ * boot command line:
+ */
+extern void arch_disable_smp_support(void);
+
void smp_setup_processor_id(void);
#endif /* __LINUX_SMP_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 20fc4bb..afc0190 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -24,10 +24,12 @@
#include <linux/types.h> /* pid_t */
#include <linux/compiler.h> /* __user */
-#ifdef CONFIG_PROC_FS
+#ifdef __KERNEL__
+# ifdef CONFIG_PROC_FS
struct seq_file;
extern void socket_seq_show(struct seq_file *seq);
-#endif
+# endif
+#endif /* __KERNEL__ */
typedef unsigned short sa_family_t;
diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h
new file mode 100644
index 0000000..6f3e54c
--- /dev/null
+++ b/include/linux/stackprotector.h
@@ -0,0 +1,16 @@
+#ifndef _LINUX_STACKPROTECTOR_H
+#define _LINUX_STACKPROTECTOR_H 1
+
+#include <linux/compiler.h>
+#include <linux/sched.h>
+#include <linux/random.h>
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+# include <asm/stackprotector.h>
+#else
+static inline void boot_init_stack_canary(void)
+{
+}
+#endif
+
+#endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f9f900c..28ef2be 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -55,6 +55,7 @@
struct robust_list_head;
struct getcpu_cache;
struct old_linux_dirent;
+struct perf_counter_hw_event;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -694,4 +695,8 @@
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+
+asmlinkage long sys_perf_counter_open(
+ const struct perf_counter_hw_event __user *hw_event_uptr,
+ pid_t pid, int cpu, int group_fd);
#endif
diff --git a/include/linux/topology.h b/include/linux/topology.h
index e632d29..a16b9e0 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -193,5 +193,11 @@
#ifndef topology_core_siblings
#define topology_core_siblings(cpu) cpumask_of_cpu(cpu)
#endif
+#ifndef topology_thread_cpumask
+#define topology_thread_cpumask(cpu) cpumask_of(cpu)
+#endif
+#ifndef topology_core_cpumask
+#define topology_core_cpumask(cpu) cpumask_of(cpu)
+#endif
#endif /* _LINUX_TOPOLOGY_H */
diff --git a/include/linux/types.h b/include/linux/types.h
index 712ca53..fca82ed5 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -1,6 +1,9 @@
#ifndef _LINUX_TYPES_H
#define _LINUX_TYPES_H
+#include <asm/types.h>
+
+#ifndef __ASSEMBLY__
#ifdef __KERNEL__
#define DECLARE_BITMAP(name,bits) \
@@ -9,7 +12,6 @@
#endif
#include <linux/posix_types.h>
-#include <asm/types.h>
#ifndef __KERNEL_STRICT_NAMES
@@ -212,5 +214,5 @@
};
#endif /* __KERNEL__ */
-
+#endif /* __ASSEMBLY__ */
#endif /* _LINUX_TYPES_H */
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 6b58367..6f3c603 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -41,13 +41,13 @@
#ifndef ARCH_HAS_NOCACHE_UACCESS
static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
- const void __user *from, unsigned long n)
+ const void __user *from, unsigned long n, unsigned long total)
{
return __copy_from_user_inatomic(to, from, n);
}
static inline unsigned long __copy_from_user_nocache(void *to,
- const void __user *from, unsigned long n)
+ const void __user *from, unsigned long n, unsigned long total)
{
return __copy_from_user(to, from, n);
}
diff --git a/init/Kconfig b/init/Kconfig
index f068071..5a3ad5c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -869,6 +869,36 @@
by some high performance threaded applications. Disabling
this option saves about 7k.
+config HAVE_PERF_COUNTERS
+ bool
+
+menu "Performance Counters"
+
+config PERF_COUNTERS
+ bool "Kernel Performance Counters"
+ depends on HAVE_PERF_COUNTERS
+ default y
+ select ANON_INODES
+ help
+ Enable kernel support for performance counter hardware.
+
+ Performance counters are special hardware registers available
+ on most modern CPUs. These registers count the number of certain
+ types of hw events: such as instructions executed, cachemisses
+ suffered, or branches mis-predicted - without slowing down the
+ kernel or applications. These registers can also trigger interrupts
+ when a threshold number of events have passed - and can thus be
+ used to profile the code that runs on that CPU.
+
+ The Linux Performance Counter subsystem provides an abstraction of
+ these hardware capabilities, available via a system call. It
+ provides per task and per CPU counters, and it provides event
+ capabilities on top of those.
+
+ Say Y if unsure.
+
+endmenu
+
config VM_EVENT_COUNTERS
default y
bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
diff --git a/init/main.c b/init/main.c
index 83697e1..6bf83af 100644
--- a/init/main.c
+++ b/init/main.c
@@ -14,6 +14,7 @@
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/syscalls.h>
+#include <linux/stackprotector.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/delay.h>
@@ -135,14 +136,14 @@
* greater than 0, limits the maximum number of CPUs activated in
* SMP mode to <NUM>.
*/
-#ifndef CONFIG_X86_IO_APIC
-static inline void disable_ioapic_setup(void) {};
-#endif
+
+void __weak arch_disable_smp_support(void) { }
static int __init nosmp(char *str)
{
setup_max_cpus = 0;
- disable_ioapic_setup();
+ arch_disable_smp_support();
+
return 0;
}
@@ -152,14 +153,14 @@
{
get_option(&str, &setup_max_cpus);
if (setup_max_cpus == 0)
- disable_ioapic_setup();
+ arch_disable_smp_support();
return 0;
}
early_param("maxcpus", maxcpus);
#else
-#define setup_max_cpus NR_CPUS
+const unsigned int setup_max_cpus = NR_CPUS;
#endif
/*
@@ -540,6 +541,12 @@
*/
lockdep_init();
debug_objects_early_init();
+
+ /*
+ * Set up the the initial canary ASAP:
+ */
+ boot_init_stack_canary();
+
cgroup_init_early();
local_irq_disable();
diff --git a/kernel/Makefile b/kernel/Makefile
index e4791b3..9ef39e5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -93,6 +93,7 @@
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
+obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index efd30cc..f52c24e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -162,6 +162,9 @@
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+#ifdef CONFIG_PERF_COUNTERS
+ WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
+#endif
trace_sched_process_free(tsk);
put_task_struct(tsk);
}
@@ -980,12 +983,9 @@
{
static DEFINE_SPINLOCK(low_water_lock);
static int lowest_to_date = THREAD_SIZE;
- unsigned long *n = end_of_stack(current);
unsigned long free;
- while (*n == 0)
- n++;
- free = (unsigned long)n - (unsigned long)end_of_stack(current);
+ free = stack_not_used(current);
if (free >= lowest_to_date)
return;
@@ -1096,10 +1096,6 @@
tsk->mempolicy = NULL;
#endif
#ifdef CONFIG_FUTEX
- /*
- * This must happen late, after the PID is not
- * hashed anymore:
- */
if (unlikely(!list_empty(&tsk->pi_state_list)))
exit_pi_state_list(tsk);
if (unlikely(current->pi_state_cache))
@@ -1366,6 +1362,12 @@
*/
read_unlock(&tasklist_lock);
+ /*
+ * Flush inherited counters to the parent - before the parent
+ * gets woken up by child-exit notifications.
+ */
+ perf_counter_exit_task(p);
+
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
status = (p->signal->flags & SIGNAL_GROUP_EXIT)
? p->signal->group_exit_code : p->exit_code;
diff --git a/kernel/fork.c b/kernel/fork.c
index a66fbde..4640a3e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,6 +61,7 @@
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
#include <trace/sched.h>
+#include <linux/magic.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -212,6 +213,8 @@
{
struct task_struct *tsk;
struct thread_info *ti;
+ unsigned long *stackend;
+
int err;
prepare_to_copy(orig);
@@ -237,6 +240,8 @@
goto out;
setup_thread_stack(tsk, orig);
+ stackend = end_of_stack(tsk);
+ *stackend = STACK_END_MAGIC; /* for overflow detection */
#ifdef CONFIG_CC_STACKPROTECTOR
tsk->stack_canary = get_random_int();
@@ -984,6 +989,7 @@
goto fork_out;
rt_mutex_init_task(p);
+ perf_counter_init_task(p);
#ifdef CONFIG_PROVE_LOCKING
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 7de11bd..122fef4 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -46,7 +46,10 @@
desc->irq_count = 0;
desc->irqs_unhandled = 0;
#ifdef CONFIG_SMP
- cpumask_setall(&desc->affinity);
+ cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ cpumask_clear(desc->pending_mask);
+#endif
#endif
spin_unlock_irqrestore(&desc->lock, flags);
}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 3aba8d1..f51eaee 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -17,6 +17,7 @@
#include <linux/kernel_stat.h>
#include <linux/rculist.h>
#include <linux/hash.h>
+#include <linux/bootmem.h>
#include "internals.h"
@@ -69,6 +70,7 @@
EXPORT_SYMBOL_GPL(nr_irqs);
#ifdef CONFIG_SPARSE_IRQ
+
static struct irq_desc irq_desc_init = {
.irq = -1,
.status = IRQ_DISABLED,
@@ -76,9 +78,6 @@
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
-#ifdef CONFIG_SMP
- .affinity = CPU_MASK_ALL
-#endif
};
void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
@@ -113,6 +112,10 @@
printk(KERN_ERR "can not alloc kstat_irqs\n");
BUG_ON(1);
}
+ if (!init_alloc_desc_masks(desc, cpu, false)) {
+ printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
+ BUG_ON(1);
+ }
arch_init_chip_data(desc, cpu);
}
@@ -121,7 +124,7 @@
*/
DEFINE_SPINLOCK(sparse_irq_lock);
-struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
+struct irq_desc **irq_desc_ptrs __read_mostly;
static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS_LEGACY-1] = {
@@ -131,14 +134,10 @@
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
-#ifdef CONFIG_SMP
- .affinity = CPU_MASK_ALL
-#endif
}
};
-/* FIXME: use bootmem alloc ...*/
-static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
+static unsigned int *kstat_irqs_legacy;
int __init early_irq_init(void)
{
@@ -148,18 +147,30 @@
init_irq_default_affinity();
+ /* initialize nr_irqs based on nr_cpu_ids */
+ arch_probe_nr_irqs();
+ printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
+
desc = irq_desc_legacy;
legacy_count = ARRAY_SIZE(irq_desc_legacy);
+ /* allocate irq_desc_ptrs array based on nr_irqs */
+ irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
+
+ /* allocate based on nr_cpu_ids */
+ /* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */
+ kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids *
+ sizeof(int));
+
for (i = 0; i < legacy_count; i++) {
desc[i].irq = i;
- desc[i].kstat_irqs = kstat_irqs_legacy[i];
+ desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-
+ init_alloc_desc_masks(&desc[i], 0, true);
irq_desc_ptrs[i] = desc + i;
}
- for (i = legacy_count; i < NR_IRQS; i++)
+ for (i = legacy_count; i < nr_irqs; i++)
irq_desc_ptrs[i] = NULL;
return arch_early_irq_init();
@@ -167,7 +178,10 @@
struct irq_desc *irq_to_desc(unsigned int irq)
{
- return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL;
+ if (irq_desc_ptrs && irq < nr_irqs)
+ return irq_desc_ptrs[irq];
+
+ return NULL;
}
struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
@@ -176,10 +190,9 @@
unsigned long flags;
int node;
- if (irq >= NR_IRQS) {
- printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n",
- irq, NR_IRQS);
- WARN_ON(1);
+ if (irq >= nr_irqs) {
+ WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
+ irq, nr_irqs);
return NULL;
}
@@ -221,9 +234,6 @@
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
-#ifdef CONFIG_SMP
- .affinity = CPU_MASK_ALL
-#endif
}
};
@@ -235,12 +245,15 @@
init_irq_default_affinity();
+ printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
+
desc = irq_desc;
count = ARRAY_SIZE(irq_desc);
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++) {
desc[i].irq = i;
-
+ init_alloc_desc_masks(&desc[i], 0, true);
+ }
return arch_early_irq_init();
}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index e6d0a43..40416a8 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -16,7 +16,14 @@
extern struct lock_class_key irq_desc_lock_class;
extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
extern spinlock_t sparse_irq_lock;
+
+#ifdef CONFIG_SPARSE_IRQ
+/* irq_desc_ptrs allocated at boot time */
+extern struct irq_desc **irq_desc_ptrs;
+#else
+/* irq_desc_ptrs is a fixed size array */
extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
+#endif
#ifdef CONFIG_PROC_FS
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 291f036..a3a5dc9 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -90,14 +90,14 @@
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
- cpumask_copy(&desc->affinity, cpumask);
+ cpumask_copy(desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
} else {
desc->status |= IRQ_MOVE_PENDING;
- cpumask_copy(&desc->pending_mask, cpumask);
+ cpumask_copy(desc->pending_mask, cpumask);
}
#else
- cpumask_copy(&desc->affinity, cpumask);
+ cpumask_copy(desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
#endif
desc->status |= IRQ_AFFINITY_SET;
@@ -119,16 +119,16 @@
* one of the targets is online.
*/
if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
- if (cpumask_any_and(&desc->affinity, cpu_online_mask)
+ if (cpumask_any_and(desc->affinity, cpu_online_mask)
< nr_cpu_ids)
goto set_affinity;
else
desc->status &= ~IRQ_AFFINITY_SET;
}
- cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity);
+ cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity);
set_affinity:
- desc->chip->set_affinity(irq, &desc->affinity);
+ desc->chip->set_affinity(irq, desc->affinity);
return 0;
}
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index bd72329..e05ad9b 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -18,7 +18,7 @@
desc->status &= ~IRQ_MOVE_PENDING;
- if (unlikely(cpumask_empty(&desc->pending_mask)))
+ if (unlikely(cpumask_empty(desc->pending_mask)))
return;
if (!desc->chip->set_affinity)
@@ -38,13 +38,13 @@
* For correct operation this depends on the caller
* masking the irqs.
*/
- if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
+ if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
< nr_cpu_ids)) {
- cpumask_and(&desc->affinity,
- &desc->pending_mask, cpu_online_mask);
- desc->chip->set_affinity(irq, &desc->affinity);
+ cpumask_and(desc->affinity,
+ desc->pending_mask, cpu_online_mask);
+ desc->chip->set_affinity(irq, desc->affinity);
}
- cpumask_clear(&desc->pending_mask);
+ cpumask_clear(desc->pending_mask);
}
void move_native_irq(int irq)
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index acd8835..7f9b804 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -38,15 +38,22 @@
old_desc->kstat_irqs = NULL;
}
-static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
+static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
struct irq_desc *desc, int cpu)
{
memcpy(desc, old_desc, sizeof(struct irq_desc));
+ if (!init_alloc_desc_masks(desc, cpu, false)) {
+ printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
+ "for migration.\n", irq);
+ return false;
+ }
spin_lock_init(&desc->lock);
desc->cpu = cpu;
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+ init_copy_desc_masks(old_desc, desc);
arch_init_copy_chip_data(old_desc, desc, cpu);
+ return true;
}
static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
@@ -76,12 +83,18 @@
node = cpu_to_node(cpu);
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
if (!desc) {
- printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq);
+ printk(KERN_ERR "irq %d: can not get new irq_desc "
+ "for migration.\n", irq);
/* still use old one */
desc = old_desc;
goto out_unlock;
}
- init_copy_one_irq_desc(irq, old_desc, desc, cpu);
+ if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
+ /* still use old one */
+ kfree(desc);
+ desc = old_desc;
+ goto out_unlock;
+ }
irq_desc_ptrs[irq] = desc;
spin_unlock_irqrestore(&sparse_irq_lock, flags);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index aae3f74..692363d 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -20,11 +20,11 @@
static int irq_affinity_proc_show(struct seq_file *m, void *v)
{
struct irq_desc *desc = irq_to_desc((long)m->private);
- const struct cpumask *mask = &desc->affinity;
+ const struct cpumask *mask = desc->affinity;
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PENDING)
- mask = &desc->pending_mask;
+ mask = desc->pending_mask;
#endif
seq_cpumask(m, mask);
seq_putc(m, '\n');
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4838995..c7fd669 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1130,7 +1130,7 @@
return;
memset(&prstatus, 0, sizeof(prstatus));
prstatus.pr_pid = current->pid;
- elf_core_copy_regs(&prstatus.pr_reg, regs);
+ elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
&prstatus, sizeof(prstatus));
final_note(buf);
diff --git a/kernel/panic.c b/kernel/panic.c
index 2a2ff36..32fe4ef 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -74,6 +74,9 @@
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+ dump_stack();
+#endif
bust_spinlocks(0);
/*
@@ -355,15 +358,18 @@
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
+
/*
* Called when gcc's -fstack-protector feature is used, and
* gcc detects corruption of the on-stack canary value
*/
void __stack_chk_fail(void)
{
- panic("stack-protector: Kernel stack is corrupted");
+ panic("stack-protector: Kernel stack is corrupted in: %p\n",
+ __builtin_return_address(0));
}
EXPORT_SYMBOL(__stack_chk_fail);
+
#endif
core_param(panic, panic_timeout, int, 0644);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
new file mode 100644
index 0000000..16b14ba
--- /dev/null
+++ b/kernel/perf_counter.c
@@ -0,0 +1,2208 @@
+/*
+ * Performance counter core code
+ *
+ * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/fs.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/sysfs.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/anon_inodes.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_counter.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+
+/*
+ * Each CPU has a list of per CPU counters:
+ */
+DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
+
+int perf_max_counters __read_mostly = 1;
+static int perf_reserved_percpu __read_mostly;
+static int perf_overcommit __read_mostly = 1;
+
+/*
+ * Mutex for (sysadmin-configurable) counter reservations:
+ */
+static DEFINE_MUTEX(perf_resource_mutex);
+
+/*
+ * Architecture provided APIs - weak aliases:
+ */
+extern __weak const struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter)
+{
+ return NULL;
+}
+
+u64 __weak hw_perf_save_disable(void) { return 0; }
+void __weak hw_perf_restore(u64 ctrl) { barrier(); }
+void __weak hw_perf_counter_setup(int cpu) { barrier(); }
+int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx, int cpu)
+{
+ return 0;
+}
+
+void __weak perf_counter_print_debug(void) { }
+
+static void
+list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
+{
+ struct perf_counter *group_leader = counter->group_leader;
+
+ /*
+ * Depending on whether it is a standalone or sibling counter,
+ * add it straight to the context's counter list, or to the group
+ * leader's sibling list:
+ */
+ if (counter->group_leader == counter)
+ list_add_tail(&counter->list_entry, &ctx->counter_list);
+ else
+ list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+}
+
+static void
+list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
+{
+ struct perf_counter *sibling, *tmp;
+
+ list_del_init(&counter->list_entry);
+
+ /*
+ * If this was a group counter with sibling counters then
+ * upgrade the siblings to singleton counters by adding them
+ * to the context list directly:
+ */
+ list_for_each_entry_safe(sibling, tmp,
+ &counter->sibling_list, list_entry) {
+
+ list_del_init(&sibling->list_entry);
+ list_add_tail(&sibling->list_entry, &ctx->counter_list);
+ sibling->group_leader = sibling;
+ }
+}
+
+static void
+counter_sched_out(struct perf_counter *counter,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx)
+{
+ if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+ return;
+
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->hw_ops->disable(counter);
+ counter->oncpu = -1;
+
+ if (!is_software_counter(counter))
+ cpuctx->active_oncpu--;
+ ctx->nr_active--;
+ if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
+ cpuctx->exclusive = 0;
+}
+
+static void
+group_sched_out(struct perf_counter *group_counter,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx)
+{
+ struct perf_counter *counter;
+
+ if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
+ return;
+
+ counter_sched_out(group_counter, cpuctx, ctx);
+
+ /*
+ * Schedule out siblings (if any):
+ */
+ list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+ counter_sched_out(counter, cpuctx, ctx);
+
+ if (group_counter->hw_event.exclusive)
+ cpuctx->exclusive = 0;
+}
+
+/*
+ * Cross CPU call to remove a performance counter
+ *
+ * We disable the counter on the hardware level first. After that we
+ * remove it from the context list.
+ */
+static void __perf_counter_remove_from_context(void *info)
+{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ struct perf_counter *counter = info;
+ struct perf_counter_context *ctx = counter->ctx;
+ unsigned long flags;
+ u64 perf_flags;
+
+ /*
+ * If this is a task context, we need to check whether it is
+ * the current task context of this cpu. If not it has been
+ * scheduled out before the smp call arrived.
+ */
+ if (ctx->task && cpuctx->task_ctx != ctx)
+ return;
+
+ curr_rq_lock_irq_save(&flags);
+ spin_lock(&ctx->lock);
+
+ counter_sched_out(counter, cpuctx, ctx);
+
+ counter->task = NULL;
+ ctx->nr_counters--;
+
+ /*
+ * Protect the list operation against NMI by disabling the
+ * counters on a global level. NOP for non NMI based counters.
+ */
+ perf_flags = hw_perf_save_disable();
+ list_del_counter(counter, ctx);
+ hw_perf_restore(perf_flags);
+
+ if (!ctx->task) {
+ /*
+ * Allow more per task counters with respect to the
+ * reservation:
+ */
+ cpuctx->max_pertask =
+ min(perf_max_counters - ctx->nr_counters,
+ perf_max_counters - perf_reserved_percpu);
+ }
+
+ spin_unlock(&ctx->lock);
+ curr_rq_unlock_irq_restore(&flags);
+}
+
+
+/*
+ * Remove the counter from a task's (or a CPU's) list of counters.
+ *
+ * Must be called with counter->mutex and ctx->mutex held.
+ *
+ * CPU counters are removed with a smp call. For task counters we only
+ * call when the task is on a CPU.
+ */
+static void perf_counter_remove_from_context(struct perf_counter *counter)
+{
+ struct perf_counter_context *ctx = counter->ctx;
+ struct task_struct *task = ctx->task;
+
+ if (!task) {
+ /*
+ * Per cpu counters are removed via an smp call and
+ * the removal is always sucessful.
+ */
+ smp_call_function_single(counter->cpu,
+ __perf_counter_remove_from_context,
+ counter, 1);
+ return;
+ }
+
+retry:
+ task_oncpu_function_call(task, __perf_counter_remove_from_context,
+ counter);
+
+ spin_lock_irq(&ctx->lock);
+ /*
+ * If the context is active we need to retry the smp call.
+ */
+ if (ctx->nr_active && !list_empty(&counter->list_entry)) {
+ spin_unlock_irq(&ctx->lock);
+ goto retry;
+ }
+
+ /*
+ * The lock prevents that this context is scheduled in so we
+ * can remove the counter safely, if the call above did not
+ * succeed.
+ */
+ if (!list_empty(&counter->list_entry)) {
+ ctx->nr_counters--;
+ list_del_counter(counter, ctx);
+ counter->task = NULL;
+ }
+ spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Cross CPU call to disable a performance counter
+ */
+static void __perf_counter_disable(void *info)
+{
+ struct perf_counter *counter = info;
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ struct perf_counter_context *ctx = counter->ctx;
+ unsigned long flags;
+
+ /*
+ * If this is a per-task counter, need to check whether this
+ * counter's task is the current task on this cpu.
+ */
+ if (ctx->task && cpuctx->task_ctx != ctx)
+ return;
+
+ curr_rq_lock_irq_save(&flags);
+ spin_lock(&ctx->lock);
+
+ /*
+ * If the counter is on, turn it off.
+ * If it is in error state, leave it in error state.
+ */
+ if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
+ if (counter == counter->group_leader)
+ group_sched_out(counter, cpuctx, ctx);
+ else
+ counter_sched_out(counter, cpuctx, ctx);
+ counter->state = PERF_COUNTER_STATE_OFF;
+ }
+
+ spin_unlock(&ctx->lock);
+ curr_rq_unlock_irq_restore(&flags);
+}
+
+/*
+ * Disable a counter.
+ */
+static void perf_counter_disable(struct perf_counter *counter)
+{
+ struct perf_counter_context *ctx = counter->ctx;
+ struct task_struct *task = ctx->task;
+
+ if (!task) {
+ /*
+ * Disable the counter on the cpu that it's on
+ */
+ smp_call_function_single(counter->cpu, __perf_counter_disable,
+ counter, 1);
+ return;
+ }
+
+ retry:
+ task_oncpu_function_call(task, __perf_counter_disable, counter);
+
+ spin_lock_irq(&ctx->lock);
+ /*
+ * If the counter is still active, we need to retry the cross-call.
+ */
+ if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
+ spin_unlock_irq(&ctx->lock);
+ goto retry;
+ }
+
+ /*
+ * Since we have the lock this context can't be scheduled
+ * in, so we can change the state safely.
+ */
+ if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+ counter->state = PERF_COUNTER_STATE_OFF;
+
+ spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Disable a counter and all its children.
+ */
+static void perf_counter_disable_family(struct perf_counter *counter)
+{
+ struct perf_counter *child;
+
+ perf_counter_disable(counter);
+
+ /*
+ * Lock the mutex to protect the list of children
+ */
+ mutex_lock(&counter->mutex);
+ list_for_each_entry(child, &counter->child_list, child_list)
+ perf_counter_disable(child);
+ mutex_unlock(&counter->mutex);
+}
+
+static int
+counter_sched_in(struct perf_counter *counter,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx,
+ int cpu)
+{
+ if (counter->state <= PERF_COUNTER_STATE_OFF)
+ return 0;
+
+ counter->state = PERF_COUNTER_STATE_ACTIVE;
+ counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
+ /*
+ * The new state must be visible before we turn it on in the hardware:
+ */
+ smp_wmb();
+
+ if (counter->hw_ops->enable(counter)) {
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->oncpu = -1;
+ return -EAGAIN;
+ }
+
+ if (!is_software_counter(counter))
+ cpuctx->active_oncpu++;
+ ctx->nr_active++;
+
+ if (counter->hw_event.exclusive)
+ cpuctx->exclusive = 1;
+
+ return 0;
+}
+
+/*
+ * Return 1 for a group consisting entirely of software counters,
+ * 0 if the group contains any hardware counters.
+ */
+static int is_software_only_group(struct perf_counter *leader)
+{
+ struct perf_counter *counter;
+
+ if (!is_software_counter(leader))
+ return 0;
+ list_for_each_entry(counter, &leader->sibling_list, list_entry)
+ if (!is_software_counter(counter))
+ return 0;
+ return 1;
+}
+
+/*
+ * Work out whether we can put this counter group on the CPU now.
+ */
+static int group_can_go_on(struct perf_counter *counter,
+ struct perf_cpu_context *cpuctx,
+ int can_add_hw)
+{
+ /*
+ * Groups consisting entirely of software counters can always go on.
+ */
+ if (is_software_only_group(counter))
+ return 1;
+ /*
+ * If an exclusive group is already on, no other hardware
+ * counters can go on.
+ */
+ if (cpuctx->exclusive)
+ return 0;
+ /*
+ * If this group is exclusive and there are already
+ * counters on the CPU, it can't go on.
+ */
+ if (counter->hw_event.exclusive && cpuctx->active_oncpu)
+ return 0;
+ /*
+ * Otherwise, try to add it if all previous groups were able
+ * to go on.
+ */
+ return can_add_hw;
+}
+
+/*
+ * Cross CPU call to install and enable a performance counter
+ */
+static void __perf_install_in_context(void *info)
+{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ struct perf_counter *counter = info;
+ struct perf_counter_context *ctx = counter->ctx;
+ struct perf_counter *leader = counter->group_leader;
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ u64 perf_flags;
+ int err;
+
+ /*
+ * If this is a task context, we need to check whether it is
+ * the current task context of this cpu. If not it has been
+ * scheduled out before the smp call arrived.
+ */
+ if (ctx->task && cpuctx->task_ctx != ctx)
+ return;
+
+ curr_rq_lock_irq_save(&flags);
+ spin_lock(&ctx->lock);
+
+ /*
+ * Protect the list operation against NMI by disabling the
+ * counters on a global level. NOP for non NMI based counters.
+ */
+ perf_flags = hw_perf_save_disable();
+
+ list_add_counter(counter, ctx);
+ ctx->nr_counters++;
+ counter->prev_state = PERF_COUNTER_STATE_OFF;
+
+ /*
+ * Don't put the counter on if it is disabled or if
+ * it is in a group and the group isn't on.
+ */
+ if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
+ (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
+ goto unlock;
+
+ /*
+ * An exclusive counter can't go on if there are already active
+ * hardware counters, and no hardware counter can go on if there
+ * is already an exclusive counter on.
+ */
+ if (!group_can_go_on(counter, cpuctx, 1))
+ err = -EEXIST;
+ else
+ err = counter_sched_in(counter, cpuctx, ctx, cpu);
+
+ if (err) {
+ /*
+ * This counter couldn't go on. If it is in a group
+ * then we have to pull the whole group off.
+ * If the counter group is pinned then put it in error state.
+ */
+ if (leader != counter)
+ group_sched_out(leader, cpuctx, ctx);
+ if (leader->hw_event.pinned)
+ leader->state = PERF_COUNTER_STATE_ERROR;
+ }
+
+ if (!err && !ctx->task && cpuctx->max_pertask)
+ cpuctx->max_pertask--;
+
+ unlock:
+ hw_perf_restore(perf_flags);
+
+ spin_unlock(&ctx->lock);
+ curr_rq_unlock_irq_restore(&flags);
+}
+
+/*
+ * Attach a performance counter to a context
+ *
+ * First we add the counter to the list with the hardware enable bit
+ * in counter->hw_config cleared.
+ *
+ * If the counter is attached to a task which is on a CPU we use a smp
+ * call to enable it in the task context. The task might have been
+ * scheduled away, but we check this in the smp call again.
+ *
+ * Must be called with ctx->mutex held.
+ */
+static void
+perf_install_in_context(struct perf_counter_context *ctx,
+ struct perf_counter *counter,
+ int cpu)
+{
+ struct task_struct *task = ctx->task;
+
+ if (!task) {
+ /*
+ * Per cpu counters are installed via an smp call and
+ * the install is always sucessful.
+ */
+ smp_call_function_single(cpu, __perf_install_in_context,
+ counter, 1);
+ return;
+ }
+
+ counter->task = task;
+retry:
+ task_oncpu_function_call(task, __perf_install_in_context,
+ counter);
+
+ spin_lock_irq(&ctx->lock);
+ /*
+ * we need to retry the smp call.
+ */
+ if (ctx->is_active && list_empty(&counter->list_entry)) {
+ spin_unlock_irq(&ctx->lock);
+ goto retry;
+ }
+
+ /*
+ * The lock prevents that this context is scheduled in so we
+ * can add the counter safely, if it the call above did not
+ * succeed.
+ */
+ if (list_empty(&counter->list_entry)) {
+ list_add_counter(counter, ctx);
+ ctx->nr_counters++;
+ }
+ spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Cross CPU call to enable a performance counter
+ */
+static void __perf_counter_enable(void *info)
+{
+ struct perf_counter *counter = info;
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ struct perf_counter_context *ctx = counter->ctx;
+ struct perf_counter *leader = counter->group_leader;
+ unsigned long flags;
+ int err;
+
+ /*
+ * If this is a per-task counter, need to check whether this
+ * counter's task is the current task on this cpu.
+ */
+ if (ctx->task && cpuctx->task_ctx != ctx)
+ return;
+
+ curr_rq_lock_irq_save(&flags);
+ spin_lock(&ctx->lock);
+
+ counter->prev_state = counter->state;
+ if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+ goto unlock;
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+
+ /*
+ * If the counter is in a group and isn't the group leader,
+ * then don't put it on unless the group is on.
+ */
+ if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
+ goto unlock;
+
+ if (!group_can_go_on(counter, cpuctx, 1))
+ err = -EEXIST;
+ else
+ err = counter_sched_in(counter, cpuctx, ctx,
+ smp_processor_id());
+
+ if (err) {
+ /*
+ * If this counter can't go on and it's part of a
+ * group, then the whole group has to come off.
+ */
+ if (leader != counter)
+ group_sched_out(leader, cpuctx, ctx);
+ if (leader->hw_event.pinned)
+ leader->state = PERF_COUNTER_STATE_ERROR;
+ }
+
+ unlock:
+ spin_unlock(&ctx->lock);
+ curr_rq_unlock_irq_restore(&flags);
+}
+
+/*
+ * Enable a counter.
+ */
+static void perf_counter_enable(struct perf_counter *counter)
+{
+ struct perf_counter_context *ctx = counter->ctx;
+ struct task_struct *task = ctx->task;
+
+ if (!task) {
+ /*
+ * Enable the counter on the cpu that it's on
+ */
+ smp_call_function_single(counter->cpu, __perf_counter_enable,
+ counter, 1);
+ return;
+ }
+
+ spin_lock_irq(&ctx->lock);
+ if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+ goto out;
+
+ /*
+ * If the counter is in error state, clear that first.
+ * That way, if we see the counter in error state below, we
+ * know that it has gone back into error state, as distinct
+ * from the task having been scheduled away before the
+ * cross-call arrived.
+ */
+ if (counter->state == PERF_COUNTER_STATE_ERROR)
+ counter->state = PERF_COUNTER_STATE_OFF;
+
+ retry:
+ spin_unlock_irq(&ctx->lock);
+ task_oncpu_function_call(task, __perf_counter_enable, counter);
+
+ spin_lock_irq(&ctx->lock);
+
+ /*
+ * If the context is active and the counter is still off,
+ * we need to retry the cross-call.
+ */
+ if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
+ goto retry;
+
+ /*
+ * Since we have the lock this context can't be scheduled
+ * in, so we can change the state safely.
+ */
+ if (counter->state == PERF_COUNTER_STATE_OFF)
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ out:
+ spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Enable a counter and all its children.
+ */
+static void perf_counter_enable_family(struct perf_counter *counter)
+{
+ struct perf_counter *child;
+
+ perf_counter_enable(counter);
+
+ /*
+ * Lock the mutex to protect the list of children
+ */
+ mutex_lock(&counter->mutex);
+ list_for_each_entry(child, &counter->child_list, child_list)
+ perf_counter_enable(child);
+ mutex_unlock(&counter->mutex);
+}
+
+void __perf_counter_sched_out(struct perf_counter_context *ctx,
+ struct perf_cpu_context *cpuctx)
+{
+ struct perf_counter *counter;
+ u64 flags;
+
+ spin_lock(&ctx->lock);
+ ctx->is_active = 0;
+ if (likely(!ctx->nr_counters))
+ goto out;
+
+ flags = hw_perf_save_disable();
+ if (ctx->nr_active) {
+ list_for_each_entry(counter, &ctx->counter_list, list_entry)
+ group_sched_out(counter, cpuctx, ctx);
+ }
+ hw_perf_restore(flags);
+ out:
+ spin_unlock(&ctx->lock);
+}
+
+/*
+ * Called from scheduler to remove the counters of the current task,
+ * with interrupts disabled.
+ *
+ * We stop each counter and update the counter value in counter->count.
+ *
+ * This does not protect us against NMI, but disable()
+ * sets the disabled bit in the control field of counter _before_
+ * accessing the counter control register. If a NMI hits, then it will
+ * not restart the counter.
+ */
+void perf_counter_task_sched_out(struct task_struct *task, int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_counter_context *ctx = &task->perf_counter_ctx;
+
+ if (likely(!cpuctx->task_ctx))
+ return;
+
+ __perf_counter_sched_out(ctx, cpuctx);
+
+ cpuctx->task_ctx = NULL;
+}
+
+static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
+{
+ __perf_counter_sched_out(&cpuctx->ctx, cpuctx);
+}
+
+static int
+group_sched_in(struct perf_counter *group_counter,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx,
+ int cpu)
+{
+ struct perf_counter *counter, *partial_group;
+ int ret;
+
+ if (group_counter->state == PERF_COUNTER_STATE_OFF)
+ return 0;
+
+ ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
+ if (ret)
+ return ret < 0 ? ret : 0;
+
+ group_counter->prev_state = group_counter->state;
+ if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
+ return -EAGAIN;
+
+ /*
+ * Schedule in siblings as one group (if any):
+ */
+ list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+ counter->prev_state = counter->state;
+ if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
+ partial_group = counter;
+ goto group_error;
+ }
+ }
+
+ return 0;
+
+group_error:
+ /*
+ * Groups can be scheduled in as one unit only, so undo any
+ * partial group before returning:
+ */
+ list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+ if (counter == partial_group)
+ break;
+ counter_sched_out(counter, cpuctx, ctx);
+ }
+ counter_sched_out(group_counter, cpuctx, ctx);
+
+ return -EAGAIN;
+}
+
+static void
+__perf_counter_sched_in(struct perf_counter_context *ctx,
+ struct perf_cpu_context *cpuctx, int cpu)
+{
+ struct perf_counter *counter;
+ u64 flags;
+ int can_add_hw = 1;
+
+ spin_lock(&ctx->lock);
+ ctx->is_active = 1;
+ if (likely(!ctx->nr_counters))
+ goto out;
+
+ flags = hw_perf_save_disable();
+
+ /*
+ * First go through the list and put on any pinned groups
+ * in order to give them the best chance of going on.
+ */
+ list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ if (counter->state <= PERF_COUNTER_STATE_OFF ||
+ !counter->hw_event.pinned)
+ continue;
+ if (counter->cpu != -1 && counter->cpu != cpu)
+ continue;
+
+ if (group_can_go_on(counter, cpuctx, 1))
+ group_sched_in(counter, cpuctx, ctx, cpu);
+
+ /*
+ * If this pinned group hasn't been scheduled,
+ * put it in error state.
+ */
+ if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+ counter->state = PERF_COUNTER_STATE_ERROR;
+ }
+
+ list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ /*
+ * Ignore counters in OFF or ERROR state, and
+ * ignore pinned counters since we did them already.
+ */
+ if (counter->state <= PERF_COUNTER_STATE_OFF ||
+ counter->hw_event.pinned)
+ continue;
+
+ /*
+ * Listen to the 'cpu' scheduling filter constraint
+ * of counters:
+ */
+ if (counter->cpu != -1 && counter->cpu != cpu)
+ continue;
+
+ if (group_can_go_on(counter, cpuctx, can_add_hw)) {
+ if (group_sched_in(counter, cpuctx, ctx, cpu))
+ can_add_hw = 0;
+ }
+ }
+ hw_perf_restore(flags);
+ out:
+ spin_unlock(&ctx->lock);
+}
+
+/*
+ * Called from scheduler to add the counters of the current task
+ * with interrupts disabled.
+ *
+ * We restore the counter value and then enable it.
+ *
+ * This does not protect us against NMI, but enable()
+ * sets the enabled bit in the control field of counter _before_
+ * accessing the counter control register. If a NMI hits, then it will
+ * keep the counter running.
+ */
+void perf_counter_task_sched_in(struct task_struct *task, int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_counter_context *ctx = &task->perf_counter_ctx;
+
+ __perf_counter_sched_in(ctx, cpuctx, cpu);
+ cpuctx->task_ctx = ctx;
+}
+
+static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+{
+ struct perf_counter_context *ctx = &cpuctx->ctx;
+
+ __perf_counter_sched_in(ctx, cpuctx, cpu);
+}
+
+int perf_counter_task_disable(void)
+{
+ struct task_struct *curr = current;
+ struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+ struct perf_counter *counter;
+ unsigned long flags;
+ u64 perf_flags;
+ int cpu;
+
+ if (likely(!ctx->nr_counters))
+ return 0;
+
+ curr_rq_lock_irq_save(&flags);
+ cpu = smp_processor_id();
+
+ /* force the update of the task clock: */
+ __task_delta_exec(curr, 1);
+
+ perf_counter_task_sched_out(curr, cpu);
+
+ spin_lock(&ctx->lock);
+
+ /*
+ * Disable all the counters:
+ */
+ perf_flags = hw_perf_save_disable();
+
+ list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ if (counter->state != PERF_COUNTER_STATE_ERROR)
+ counter->state = PERF_COUNTER_STATE_OFF;
+ }
+
+ hw_perf_restore(perf_flags);
+
+ spin_unlock(&ctx->lock);
+
+ curr_rq_unlock_irq_restore(&flags);
+
+ return 0;
+}
+
+int perf_counter_task_enable(void)
+{
+ struct task_struct *curr = current;
+ struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+ struct perf_counter *counter;
+ unsigned long flags;
+ u64 perf_flags;
+ int cpu;
+
+ if (likely(!ctx->nr_counters))
+ return 0;
+
+ curr_rq_lock_irq_save(&flags);
+ cpu = smp_processor_id();
+
+ /* force the update of the task clock: */
+ __task_delta_exec(curr, 1);
+
+ perf_counter_task_sched_out(curr, cpu);
+
+ spin_lock(&ctx->lock);
+
+ /*
+ * Disable all the counters:
+ */
+ perf_flags = hw_perf_save_disable();
+
+ list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ if (counter->state > PERF_COUNTER_STATE_OFF)
+ continue;
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->hw_event.disabled = 0;
+ }
+ hw_perf_restore(perf_flags);
+
+ spin_unlock(&ctx->lock);
+
+ perf_counter_task_sched_in(curr, cpu);
+
+ curr_rq_unlock_irq_restore(&flags);
+
+ return 0;
+}
+
+/*
+ * Round-robin a context's counters:
+ */
+static void rotate_ctx(struct perf_counter_context *ctx)
+{
+ struct perf_counter *counter;
+ u64 perf_flags;
+
+ if (!ctx->nr_counters)
+ return;
+
+ spin_lock(&ctx->lock);
+ /*
+ * Rotate the first entry last (works just fine for group counters too):
+ */
+ perf_flags = hw_perf_save_disable();
+ list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ list_del(&counter->list_entry);
+ list_add_tail(&counter->list_entry, &ctx->counter_list);
+ break;
+ }
+ hw_perf_restore(perf_flags);
+
+ spin_unlock(&ctx->lock);
+}
+
+void perf_counter_task_tick(struct task_struct *curr, int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+ const int rotate_percpu = 0;
+
+ if (rotate_percpu)
+ perf_counter_cpu_sched_out(cpuctx);
+ perf_counter_task_sched_out(curr, cpu);
+
+ if (rotate_percpu)
+ rotate_ctx(&cpuctx->ctx);
+ rotate_ctx(ctx);
+
+ if (rotate_percpu)
+ perf_counter_cpu_sched_in(cpuctx, cpu);
+ perf_counter_task_sched_in(curr, cpu);
+}
+
+/*
+ * Cross CPU call to read the hardware counter
+ */
+static void __read(void *info)
+{
+ struct perf_counter *counter = info;
+ unsigned long flags;
+
+ curr_rq_lock_irq_save(&flags);
+ counter->hw_ops->read(counter);
+ curr_rq_unlock_irq_restore(&flags);
+}
+
+static u64 perf_counter_read(struct perf_counter *counter)
+{
+ /*
+ * If counter is enabled and currently active on a CPU, update the
+ * value in the counter structure:
+ */
+ if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
+ smp_call_function_single(counter->oncpu,
+ __read, counter, 1);
+ }
+
+ return atomic64_read(&counter->count);
+}
+
+/*
+ * Cross CPU call to switch performance data pointers
+ */
+static void __perf_switch_irq_data(void *info)
+{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ struct perf_counter *counter = info;
+ struct perf_counter_context *ctx = counter->ctx;
+ struct perf_data *oldirqdata = counter->irqdata;
+
+ /*
+ * If this is a task context, we need to check whether it is
+ * the current task context of this cpu. If not it has been
+ * scheduled out before the smp call arrived.
+ */
+ if (ctx->task) {
+ if (cpuctx->task_ctx != ctx)
+ return;
+ spin_lock(&ctx->lock);
+ }
+
+ /* Change the pointer NMI safe */
+ atomic_long_set((atomic_long_t *)&counter->irqdata,
+ (unsigned long) counter->usrdata);
+ counter->usrdata = oldirqdata;
+
+ if (ctx->task)
+ spin_unlock(&ctx->lock);
+}
+
+static struct perf_data *perf_switch_irq_data(struct perf_counter *counter)
+{
+ struct perf_counter_context *ctx = counter->ctx;
+ struct perf_data *oldirqdata = counter->irqdata;
+ struct task_struct *task = ctx->task;
+
+ if (!task) {
+ smp_call_function_single(counter->cpu,
+ __perf_switch_irq_data,
+ counter, 1);
+ return counter->usrdata;
+ }
+
+retry:
+ spin_lock_irq(&ctx->lock);
+ if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
+ counter->irqdata = counter->usrdata;
+ counter->usrdata = oldirqdata;
+ spin_unlock_irq(&ctx->lock);
+ return oldirqdata;
+ }
+ spin_unlock_irq(&ctx->lock);
+ task_oncpu_function_call(task, __perf_switch_irq_data, counter);
+ /* Might have failed, because task was scheduled out */
+ if (counter->irqdata == oldirqdata)
+ goto retry;
+
+ return counter->usrdata;
+}
+
+static void put_context(struct perf_counter_context *ctx)
+{
+ if (ctx->task)
+ put_task_struct(ctx->task);
+}
+
+static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_counter_context *ctx;
+ struct task_struct *task;
+
+ /*
+ * If cpu is not a wildcard then this is a percpu counter:
+ */
+ if (cpu != -1) {
+ /* Must be root to operate on a CPU counter: */
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EACCES);
+
+ if (cpu < 0 || cpu > num_possible_cpus())
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * We could be clever and allow to attach a counter to an
+ * offline CPU and activate it when the CPU comes up, but
+ * that's for later.
+ */
+ if (!cpu_isset(cpu, cpu_online_map))
+ return ERR_PTR(-ENODEV);
+
+ cpuctx = &per_cpu(perf_cpu_context, cpu);
+ ctx = &cpuctx->ctx;
+
+ return ctx;
+ }
+
+ rcu_read_lock();
+ if (!pid)
+ task = current;
+ else
+ task = find_task_by_vpid(pid);
+ if (task)
+ get_task_struct(task);
+ rcu_read_unlock();
+
+ if (!task)
+ return ERR_PTR(-ESRCH);
+
+ ctx = &task->perf_counter_ctx;
+ ctx->task = task;
+
+ /* Reuse ptrace permission checks for now. */
+ if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
+ put_context(ctx);
+ return ERR_PTR(-EACCES);
+ }
+
+ return ctx;
+}
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static int perf_release(struct inode *inode, struct file *file)
+{
+ struct perf_counter *counter = file->private_data;
+ struct perf_counter_context *ctx = counter->ctx;
+
+ file->private_data = NULL;
+
+ mutex_lock(&ctx->mutex);
+ mutex_lock(&counter->mutex);
+
+ perf_counter_remove_from_context(counter);
+
+ mutex_unlock(&counter->mutex);
+ mutex_unlock(&ctx->mutex);
+
+ kfree(counter);
+ put_context(ctx);
+
+ return 0;
+}
+
+/*
+ * Read the performance counter - simple non blocking version for now
+ */
+static ssize_t
+perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
+{
+ u64 cntval;
+
+ if (count != sizeof(cntval))
+ return -EINVAL;
+
+ /*
+ * Return end-of-file for a read on a counter that is in
+ * error state (i.e. because it was pinned but it couldn't be
+ * scheduled on to the CPU at some point).
+ */
+ if (counter->state == PERF_COUNTER_STATE_ERROR)
+ return 0;
+
+ mutex_lock(&counter->mutex);
+ cntval = perf_counter_read(counter);
+ mutex_unlock(&counter->mutex);
+
+ return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
+}
+
+static ssize_t
+perf_copy_usrdata(struct perf_data *usrdata, char __user *buf, size_t count)
+{
+ if (!usrdata->len)
+ return 0;
+
+ count = min(count, (size_t)usrdata->len);
+ if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count))
+ return -EFAULT;
+
+ /* Adjust the counters */
+ usrdata->len -= count;
+ if (!usrdata->len)
+ usrdata->rd_idx = 0;
+ else
+ usrdata->rd_idx += count;
+
+ return count;
+}
+
+static ssize_t
+perf_read_irq_data(struct perf_counter *counter,
+ char __user *buf,
+ size_t count,
+ int nonblocking)
+{
+ struct perf_data *irqdata, *usrdata;
+ DECLARE_WAITQUEUE(wait, current);
+ ssize_t res, res2;
+
+ irqdata = counter->irqdata;
+ usrdata = counter->usrdata;
+
+ if (usrdata->len + irqdata->len >= count)
+ goto read_pending;
+
+ if (nonblocking)
+ return -EAGAIN;
+
+ spin_lock_irq(&counter->waitq.lock);
+ __add_wait_queue(&counter->waitq, &wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (usrdata->len + irqdata->len >= count)
+ break;
+
+ if (signal_pending(current))
+ break;
+
+ if (counter->state == PERF_COUNTER_STATE_ERROR)
+ break;
+
+ spin_unlock_irq(&counter->waitq.lock);
+ schedule();
+ spin_lock_irq(&counter->waitq.lock);
+ }
+ __remove_wait_queue(&counter->waitq, &wait);
+ __set_current_state(TASK_RUNNING);
+ spin_unlock_irq(&counter->waitq.lock);
+
+ if (usrdata->len + irqdata->len < count &&
+ counter->state != PERF_COUNTER_STATE_ERROR)
+ return -ERESTARTSYS;
+read_pending:
+ mutex_lock(&counter->mutex);
+
+ /* Drain pending data first: */
+ res = perf_copy_usrdata(usrdata, buf, count);
+ if (res < 0 || res == count)
+ goto out;
+
+ /* Switch irq buffer: */
+ usrdata = perf_switch_irq_data(counter);
+ res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
+ if (res2 < 0) {
+ if (!res)
+ res = -EFAULT;
+ } else {
+ res += res2;
+ }
+out:
+ mutex_unlock(&counter->mutex);
+
+ return res;
+}
+
+static ssize_t
+perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+ struct perf_counter *counter = file->private_data;
+
+ switch (counter->hw_event.record_type) {
+ case PERF_RECORD_SIMPLE:
+ return perf_read_hw(counter, buf, count);
+
+ case PERF_RECORD_IRQ:
+ case PERF_RECORD_GROUP:
+ return perf_read_irq_data(counter, buf, count,
+ file->f_flags & O_NONBLOCK);
+ }
+ return -EINVAL;
+}
+
+static unsigned int perf_poll(struct file *file, poll_table *wait)
+{
+ struct perf_counter *counter = file->private_data;
+ unsigned int events = 0;
+ unsigned long flags;
+
+ poll_wait(file, &counter->waitq, wait);
+
+ spin_lock_irqsave(&counter->waitq.lock, flags);
+ if (counter->usrdata->len || counter->irqdata->len)
+ events |= POLLIN;
+ spin_unlock_irqrestore(&counter->waitq.lock, flags);
+
+ return events;
+}
+
+static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct perf_counter *counter = file->private_data;
+ int err = 0;
+
+ switch (cmd) {
+ case PERF_COUNTER_IOC_ENABLE:
+ perf_counter_enable_family(counter);
+ break;
+ case PERF_COUNTER_IOC_DISABLE:
+ perf_counter_disable_family(counter);
+ break;
+ default:
+ err = -ENOTTY;
+ }
+ return err;
+}
+
+static const struct file_operations perf_fops = {
+ .release = perf_release,
+ .read = perf_read,
+ .poll = perf_poll,
+ .unlocked_ioctl = perf_ioctl,
+ .compat_ioctl = perf_ioctl,
+};
+
+static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
+{
+ int cpu = raw_smp_processor_id();
+
+ atomic64_set(&counter->hw.prev_count, cpu_clock(cpu));
+ return 0;
+}
+
+static void cpu_clock_perf_counter_update(struct perf_counter *counter)
+{
+ int cpu = raw_smp_processor_id();
+ s64 prev;
+ u64 now;
+
+ now = cpu_clock(cpu);
+ prev = atomic64_read(&counter->hw.prev_count);
+ atomic64_set(&counter->hw.prev_count, now);
+ atomic64_add(now - prev, &counter->count);
+}
+
+static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
+{
+ cpu_clock_perf_counter_update(counter);
+}
+
+static void cpu_clock_perf_counter_read(struct perf_counter *counter)
+{
+ cpu_clock_perf_counter_update(counter);
+}
+
+static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
+ .enable = cpu_clock_perf_counter_enable,
+ .disable = cpu_clock_perf_counter_disable,
+ .read = cpu_clock_perf_counter_read,
+};
+
+/*
+ * Called from within the scheduler:
+ */
+static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
+{
+ struct task_struct *curr = counter->task;
+ u64 delta;
+
+ delta = __task_delta_exec(curr, update);
+
+ return curr->se.sum_exec_runtime + delta;
+}
+
+static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
+{
+ u64 prev;
+ s64 delta;
+
+ prev = atomic64_read(&counter->hw.prev_count);
+
+ atomic64_set(&counter->hw.prev_count, now);
+
+ delta = now - prev;
+
+ atomic64_add(delta, &counter->count);
+}
+
+static void task_clock_perf_counter_read(struct perf_counter *counter)
+{
+ u64 now = task_clock_perf_counter_val(counter, 1);
+
+ task_clock_perf_counter_update(counter, now);
+}
+
+static int task_clock_perf_counter_enable(struct perf_counter *counter)
+{
+ if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+ atomic64_set(&counter->hw.prev_count,
+ task_clock_perf_counter_val(counter, 0));
+
+ return 0;
+}
+
+static void task_clock_perf_counter_disable(struct perf_counter *counter)
+{
+ u64 now = task_clock_perf_counter_val(counter, 0);
+
+ task_clock_perf_counter_update(counter, now);
+}
+
+static const struct hw_perf_counter_ops perf_ops_task_clock = {
+ .enable = task_clock_perf_counter_enable,
+ .disable = task_clock_perf_counter_disable,
+ .read = task_clock_perf_counter_read,
+};
+
+#ifdef CONFIG_VM_EVENT_COUNTERS
+#define cpu_page_faults() __get_cpu_var(vm_event_states).event[PGFAULT]
+#else
+#define cpu_page_faults() 0
+#endif
+
+static u64 get_page_faults(struct perf_counter *counter)
+{
+ struct task_struct *curr = counter->ctx->task;
+
+ if (curr)
+ return curr->maj_flt + curr->min_flt;
+ return cpu_page_faults();
+}
+
+static void page_faults_perf_counter_update(struct perf_counter *counter)
+{
+ u64 prev, now;
+ s64 delta;
+
+ prev = atomic64_read(&counter->hw.prev_count);
+ now = get_page_faults(counter);
+
+ atomic64_set(&counter->hw.prev_count, now);
+
+ delta = now - prev;
+
+ atomic64_add(delta, &counter->count);
+}
+
+static void page_faults_perf_counter_read(struct perf_counter *counter)
+{
+ page_faults_perf_counter_update(counter);
+}
+
+static int page_faults_perf_counter_enable(struct perf_counter *counter)
+{
+ if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+ atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
+ return 0;
+}
+
+static void page_faults_perf_counter_disable(struct perf_counter *counter)
+{
+ page_faults_perf_counter_update(counter);
+}
+
+static const struct hw_perf_counter_ops perf_ops_page_faults = {
+ .enable = page_faults_perf_counter_enable,
+ .disable = page_faults_perf_counter_disable,
+ .read = page_faults_perf_counter_read,
+};
+
+static u64 get_context_switches(struct perf_counter *counter)
+{
+ struct task_struct *curr = counter->ctx->task;
+
+ if (curr)
+ return curr->nvcsw + curr->nivcsw;
+ return cpu_nr_switches(smp_processor_id());
+}
+
+static void context_switches_perf_counter_update(struct perf_counter *counter)
+{
+ u64 prev, now;
+ s64 delta;
+
+ prev = atomic64_read(&counter->hw.prev_count);
+ now = get_context_switches(counter);
+
+ atomic64_set(&counter->hw.prev_count, now);
+
+ delta = now - prev;
+
+ atomic64_add(delta, &counter->count);
+}
+
+static void context_switches_perf_counter_read(struct perf_counter *counter)
+{
+ context_switches_perf_counter_update(counter);
+}
+
+static int context_switches_perf_counter_enable(struct perf_counter *counter)
+{
+ if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+ atomic64_set(&counter->hw.prev_count,
+ get_context_switches(counter));
+ return 0;
+}
+
+static void context_switches_perf_counter_disable(struct perf_counter *counter)
+{
+ context_switches_perf_counter_update(counter);
+}
+
+static const struct hw_perf_counter_ops perf_ops_context_switches = {
+ .enable = context_switches_perf_counter_enable,
+ .disable = context_switches_perf_counter_disable,
+ .read = context_switches_perf_counter_read,
+};
+
+static inline u64 get_cpu_migrations(struct perf_counter *counter)
+{
+ struct task_struct *curr = counter->ctx->task;
+
+ if (curr)
+ return curr->se.nr_migrations;
+ return cpu_nr_migrations(smp_processor_id());
+}
+
+static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
+{
+ u64 prev, now;
+ s64 delta;
+
+ prev = atomic64_read(&counter->hw.prev_count);
+ now = get_cpu_migrations(counter);
+
+ atomic64_set(&counter->hw.prev_count, now);
+
+ delta = now - prev;
+
+ atomic64_add(delta, &counter->count);
+}
+
+static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
+{
+ cpu_migrations_perf_counter_update(counter);
+}
+
+static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
+{
+ if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+ atomic64_set(&counter->hw.prev_count,
+ get_cpu_migrations(counter));
+ return 0;
+}
+
+static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
+{
+ cpu_migrations_perf_counter_update(counter);
+}
+
+static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
+ .enable = cpu_migrations_perf_counter_enable,
+ .disable = cpu_migrations_perf_counter_disable,
+ .read = cpu_migrations_perf_counter_read,
+};
+
+static const struct hw_perf_counter_ops *
+sw_perf_counter_init(struct perf_counter *counter)
+{
+ const struct hw_perf_counter_ops *hw_ops = NULL;
+
+ /*
+ * Software counters (currently) can't in general distinguish
+ * between user, kernel and hypervisor events.
+ * However, context switches and cpu migrations are considered
+ * to be kernel events, and page faults are never hypervisor
+ * events.
+ */
+ switch (counter->hw_event.type) {
+ case PERF_COUNT_CPU_CLOCK:
+ if (!(counter->hw_event.exclude_user ||
+ counter->hw_event.exclude_kernel ||
+ counter->hw_event.exclude_hv))
+ hw_ops = &perf_ops_cpu_clock;
+ break;
+ case PERF_COUNT_TASK_CLOCK:
+ if (counter->hw_event.exclude_user ||
+ counter->hw_event.exclude_kernel ||
+ counter->hw_event.exclude_hv)
+ break;
+ /*
+ * If the user instantiates this as a per-cpu counter,
+ * use the cpu_clock counter instead.
+ */
+ if (counter->ctx->task)
+ hw_ops = &perf_ops_task_clock;
+ else
+ hw_ops = &perf_ops_cpu_clock;
+ break;
+ case PERF_COUNT_PAGE_FAULTS:
+ if (!(counter->hw_event.exclude_user ||
+ counter->hw_event.exclude_kernel))
+ hw_ops = &perf_ops_page_faults;
+ break;
+ case PERF_COUNT_CONTEXT_SWITCHES:
+ if (!counter->hw_event.exclude_kernel)
+ hw_ops = &perf_ops_context_switches;
+ break;
+ case PERF_COUNT_CPU_MIGRATIONS:
+ if (!counter->hw_event.exclude_kernel)
+ hw_ops = &perf_ops_cpu_migrations;
+ break;
+ default:
+ break;
+ }
+ return hw_ops;
+}
+
+/*
+ * Allocate and initialize a counter structure
+ */
+static struct perf_counter *
+perf_counter_alloc(struct perf_counter_hw_event *hw_event,
+ int cpu,
+ struct perf_counter_context *ctx,
+ struct perf_counter *group_leader,
+ gfp_t gfpflags)
+{
+ const struct hw_perf_counter_ops *hw_ops;
+ struct perf_counter *counter;
+
+ counter = kzalloc(sizeof(*counter), gfpflags);
+ if (!counter)
+ return NULL;
+
+ /*
+ * Single counters are their own group leaders, with an
+ * empty sibling list:
+ */
+ if (!group_leader)
+ group_leader = counter;
+
+ mutex_init(&counter->mutex);
+ INIT_LIST_HEAD(&counter->list_entry);
+ INIT_LIST_HEAD(&counter->sibling_list);
+ init_waitqueue_head(&counter->waitq);
+
+ INIT_LIST_HEAD(&counter->child_list);
+
+ counter->irqdata = &counter->data[0];
+ counter->usrdata = &counter->data[1];
+ counter->cpu = cpu;
+ counter->hw_event = *hw_event;
+ counter->wakeup_pending = 0;
+ counter->group_leader = group_leader;
+ counter->hw_ops = NULL;
+ counter->ctx = ctx;
+
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ if (hw_event->disabled)
+ counter->state = PERF_COUNTER_STATE_OFF;
+
+ hw_ops = NULL;
+ if (!hw_event->raw && hw_event->type < 0)
+ hw_ops = sw_perf_counter_init(counter);
+ else
+ hw_ops = hw_perf_counter_init(counter);
+
+ if (!hw_ops) {
+ kfree(counter);
+ return NULL;
+ }
+ counter->hw_ops = hw_ops;
+
+ return counter;
+}
+
+/**
+ * sys_perf_task_open - open a performance counter, associate it to a task/cpu
+ *
+ * @hw_event_uptr: event type attributes for monitoring/sampling
+ * @pid: target pid
+ * @cpu: target cpu
+ * @group_fd: group leader counter fd
+ */
+SYSCALL_DEFINE4(perf_counter_open,
+ const struct perf_counter_hw_event __user *, hw_event_uptr,
+ pid_t, pid, int, cpu, int, group_fd)
+{
+ struct perf_counter *counter, *group_leader;
+ struct perf_counter_hw_event hw_event;
+ struct perf_counter_context *ctx;
+ struct file *counter_file = NULL;
+ struct file *group_file = NULL;
+ int fput_needed = 0;
+ int fput_needed2 = 0;
+ int ret;
+
+ if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
+ return -EFAULT;
+
+ /*
+ * Get the target context (task or percpu):
+ */
+ ctx = find_get_context(pid, cpu);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ /*
+ * Look up the group leader (we will attach this counter to it):
+ */
+ group_leader = NULL;
+ if (group_fd != -1) {
+ ret = -EINVAL;
+ group_file = fget_light(group_fd, &fput_needed);
+ if (!group_file)
+ goto err_put_context;
+ if (group_file->f_op != &perf_fops)
+ goto err_put_context;
+
+ group_leader = group_file->private_data;
+ /*
+ * Do not allow a recursive hierarchy (this new sibling
+ * becoming part of another group-sibling):
+ */
+ if (group_leader->group_leader != group_leader)
+ goto err_put_context;
+ /*
+ * Do not allow to attach to a group in a different
+ * task or CPU context:
+ */
+ if (group_leader->ctx != ctx)
+ goto err_put_context;
+ /*
+ * Only a group leader can be exclusive or pinned
+ */
+ if (hw_event.exclusive || hw_event.pinned)
+ goto err_put_context;
+ }
+
+ ret = -EINVAL;
+ counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
+ GFP_KERNEL);
+ if (!counter)
+ goto err_put_context;
+
+ ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
+ if (ret < 0)
+ goto err_free_put_context;
+
+ counter_file = fget_light(ret, &fput_needed2);
+ if (!counter_file)
+ goto err_free_put_context;
+
+ counter->filp = counter_file;
+ mutex_lock(&ctx->mutex);
+ perf_install_in_context(ctx, counter, cpu);
+ mutex_unlock(&ctx->mutex);
+
+ fput_light(counter_file, fput_needed2);
+
+out_fput:
+ fput_light(group_file, fput_needed);
+
+ return ret;
+
+err_free_put_context:
+ kfree(counter);
+
+err_put_context:
+ put_context(ctx);
+
+ goto out_fput;
+}
+
+/*
+ * Initialize the perf_counter context in a task_struct:
+ */
+static void
+__perf_counter_init_context(struct perf_counter_context *ctx,
+ struct task_struct *task)
+{
+ memset(ctx, 0, sizeof(*ctx));
+ spin_lock_init(&ctx->lock);
+ mutex_init(&ctx->mutex);
+ INIT_LIST_HEAD(&ctx->counter_list);
+ ctx->task = task;
+}
+
+/*
+ * inherit a counter from parent task to child task:
+ */
+static struct perf_counter *
+inherit_counter(struct perf_counter *parent_counter,
+ struct task_struct *parent,
+ struct perf_counter_context *parent_ctx,
+ struct task_struct *child,
+ struct perf_counter *group_leader,
+ struct perf_counter_context *child_ctx)
+{
+ struct perf_counter *child_counter;
+
+ /*
+ * Instead of creating recursive hierarchies of counters,
+ * we link inherited counters back to the original parent,
+ * which has a filp for sure, which we use as the reference
+ * count:
+ */
+ if (parent_counter->parent)
+ parent_counter = parent_counter->parent;
+
+ child_counter = perf_counter_alloc(&parent_counter->hw_event,
+ parent_counter->cpu, child_ctx,
+ group_leader, GFP_KERNEL);
+ if (!child_counter)
+ return NULL;
+
+ /*
+ * Link it up in the child's context:
+ */
+ child_counter->task = child;
+ list_add_counter(child_counter, child_ctx);
+ child_ctx->nr_counters++;
+
+ child_counter->parent = parent_counter;
+ /*
+ * inherit into child's child as well:
+ */
+ child_counter->hw_event.inherit = 1;
+
+ /*
+ * Get a reference to the parent filp - we will fput it
+ * when the child counter exits. This is safe to do because
+ * we are in the parent and we know that the filp still
+ * exists and has a nonzero count:
+ */
+ atomic_long_inc(&parent_counter->filp->f_count);
+
+ /*
+ * Link this into the parent counter's child list
+ */
+ mutex_lock(&parent_counter->mutex);
+ list_add_tail(&child_counter->child_list, &parent_counter->child_list);
+
+ /*
+ * Make the child state follow the state of the parent counter,
+ * not its hw_event.disabled bit. We hold the parent's mutex,
+ * so we won't race with perf_counter_{en,dis}able_family.
+ */
+ if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
+ child_counter->state = PERF_COUNTER_STATE_INACTIVE;
+ else
+ child_counter->state = PERF_COUNTER_STATE_OFF;
+
+ mutex_unlock(&parent_counter->mutex);
+
+ return child_counter;
+}
+
+static int inherit_group(struct perf_counter *parent_counter,
+ struct task_struct *parent,
+ struct perf_counter_context *parent_ctx,
+ struct task_struct *child,
+ struct perf_counter_context *child_ctx)
+{
+ struct perf_counter *leader;
+ struct perf_counter *sub;
+
+ leader = inherit_counter(parent_counter, parent, parent_ctx,
+ child, NULL, child_ctx);
+ if (!leader)
+ return -ENOMEM;
+ list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
+ if (!inherit_counter(sub, parent, parent_ctx,
+ child, leader, child_ctx))
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void sync_child_counter(struct perf_counter *child_counter,
+ struct perf_counter *parent_counter)
+{
+ u64 parent_val, child_val;
+
+ parent_val = atomic64_read(&parent_counter->count);
+ child_val = atomic64_read(&child_counter->count);
+
+ /*
+ * Add back the child's count to the parent's count:
+ */
+ atomic64_add(child_val, &parent_counter->count);
+
+ /*
+ * Remove this counter from the parent's list
+ */
+ mutex_lock(&parent_counter->mutex);
+ list_del_init(&child_counter->child_list);
+ mutex_unlock(&parent_counter->mutex);
+
+ /*
+ * Release the parent counter, if this was the last
+ * reference to it.
+ */
+ fput(parent_counter->filp);
+}
+
+static void
+__perf_counter_exit_task(struct task_struct *child,
+ struct perf_counter *child_counter,
+ struct perf_counter_context *child_ctx)
+{
+ struct perf_counter *parent_counter;
+ struct perf_counter *sub, *tmp;
+
+ /*
+ * If we do not self-reap then we have to wait for the
+ * child task to unschedule (it will happen for sure),
+ * so that its counter is at its final count. (This
+ * condition triggers rarely - child tasks usually get
+ * off their CPU before the parent has a chance to
+ * get this far into the reaping action)
+ */
+ if (child != current) {
+ wait_task_inactive(child, 0);
+ list_del_init(&child_counter->list_entry);
+ } else {
+ struct perf_cpu_context *cpuctx;
+ unsigned long flags;
+ u64 perf_flags;
+
+ /*
+ * Disable and unlink this counter.
+ *
+ * Be careful about zapping the list - IRQ/NMI context
+ * could still be processing it:
+ */
+ curr_rq_lock_irq_save(&flags);
+ perf_flags = hw_perf_save_disable();
+
+ cpuctx = &__get_cpu_var(perf_cpu_context);
+
+ group_sched_out(child_counter, cpuctx, child_ctx);
+
+ list_del_init(&child_counter->list_entry);
+
+ child_ctx->nr_counters--;
+
+ hw_perf_restore(perf_flags);
+ curr_rq_unlock_irq_restore(&flags);
+ }
+
+ parent_counter = child_counter->parent;
+ /*
+ * It can happen that parent exits first, and has counters
+ * that are still around due to the child reference. These
+ * counters need to be zapped - but otherwise linger.
+ */
+ if (parent_counter) {
+ sync_child_counter(child_counter, parent_counter);
+ list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
+ list_entry) {
+ if (sub->parent) {
+ sync_child_counter(sub, sub->parent);
+ kfree(sub);
+ }
+ }
+ kfree(child_counter);
+ }
+}
+
+/*
+ * When a child task exits, feed back counter values to parent counters.
+ *
+ * Note: we may be running in child context, but the PID is not hashed
+ * anymore so new counters will not be added.
+ */
+void perf_counter_exit_task(struct task_struct *child)
+{
+ struct perf_counter *child_counter, *tmp;
+ struct perf_counter_context *child_ctx;
+
+ child_ctx = &child->perf_counter_ctx;
+
+ if (likely(!child_ctx->nr_counters))
+ return;
+
+ list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
+ list_entry)
+ __perf_counter_exit_task(child, child_counter, child_ctx);
+}
+
+/*
+ * Initialize the perf_counter context in task_struct
+ */
+void perf_counter_init_task(struct task_struct *child)
+{
+ struct perf_counter_context *child_ctx, *parent_ctx;
+ struct perf_counter *counter;
+ struct task_struct *parent = current;
+
+ child_ctx = &child->perf_counter_ctx;
+ parent_ctx = &parent->perf_counter_ctx;
+
+ __perf_counter_init_context(child_ctx, child);
+
+ /*
+ * This is executed from the parent task context, so inherit
+ * counters that have been marked for cloning:
+ */
+
+ if (likely(!parent_ctx->nr_counters))
+ return;
+
+ /*
+ * Lock the parent list. No need to lock the child - not PID
+ * hashed yet and not running, so nobody can access it.
+ */
+ mutex_lock(&parent_ctx->mutex);
+
+ /*
+ * We dont have to disable NMIs - we are only looking at
+ * the list, not manipulating it:
+ */
+ list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
+ if (!counter->hw_event.inherit)
+ continue;
+
+ if (inherit_group(counter, parent,
+ parent_ctx, child, child_ctx))
+ break;
+ }
+
+ mutex_unlock(&parent_ctx->mutex);
+}
+
+static void __cpuinit perf_counter_init_cpu(int cpu)
+{
+ struct perf_cpu_context *cpuctx;
+
+ cpuctx = &per_cpu(perf_cpu_context, cpu);
+ __perf_counter_init_context(&cpuctx->ctx, NULL);
+
+ mutex_lock(&perf_resource_mutex);
+ cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
+ mutex_unlock(&perf_resource_mutex);
+
+ hw_perf_counter_setup(cpu);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __perf_counter_exit_cpu(void *info)
+{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ struct perf_counter_context *ctx = &cpuctx->ctx;
+ struct perf_counter *counter, *tmp;
+
+ list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
+ __perf_counter_remove_from_context(counter);
+}
+static void perf_counter_exit_cpu(int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_counter_context *ctx = &cpuctx->ctx;
+
+ mutex_lock(&ctx->mutex);
+ smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
+ mutex_unlock(&ctx->mutex);
+}
+#else
+static inline void perf_counter_exit_cpu(int cpu) { }
+#endif
+
+static int __cpuinit
+perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ switch (action) {
+
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ perf_counter_init_cpu(cpu);
+ break;
+
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ perf_counter_exit_cpu(cpu);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata perf_cpu_nb = {
+ .notifier_call = perf_cpu_notify,
+};
+
+static int __init perf_counter_init(void)
+{
+ perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
+ (void *)(long)smp_processor_id());
+ register_cpu_notifier(&perf_cpu_nb);
+
+ return 0;
+}
+early_initcall(perf_counter_init);
+
+static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
+{
+ return sprintf(buf, "%d\n", perf_reserved_percpu);
+}
+
+static ssize_t
+perf_set_reserve_percpu(struct sysdev_class *class,
+ const char *buf,
+ size_t count)
+{
+ struct perf_cpu_context *cpuctx;
+ unsigned long val;
+ int err, cpu, mpt;
+
+ err = strict_strtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val > perf_max_counters)
+ return -EINVAL;
+
+ mutex_lock(&perf_resource_mutex);
+ perf_reserved_percpu = val;
+ for_each_online_cpu(cpu) {
+ cpuctx = &per_cpu(perf_cpu_context, cpu);
+ spin_lock_irq(&cpuctx->ctx.lock);
+ mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
+ perf_max_counters - perf_reserved_percpu);
+ cpuctx->max_pertask = mpt;
+ spin_unlock_irq(&cpuctx->ctx.lock);
+ }
+ mutex_unlock(&perf_resource_mutex);
+
+ return count;
+}
+
+static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
+{
+ return sprintf(buf, "%d\n", perf_overcommit);
+}
+
+static ssize_t
+perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
+{
+ unsigned long val;
+ int err;
+
+ err = strict_strtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val > 1)
+ return -EINVAL;
+
+ mutex_lock(&perf_resource_mutex);
+ perf_overcommit = val;
+ mutex_unlock(&perf_resource_mutex);
+
+ return count;
+}
+
+static SYSDEV_CLASS_ATTR(
+ reserve_percpu,
+ 0644,
+ perf_show_reserve_percpu,
+ perf_set_reserve_percpu
+ );
+
+static SYSDEV_CLASS_ATTR(
+ overcommit,
+ 0644,
+ perf_show_overcommit,
+ perf_set_overcommit
+ );
+
+static struct attribute *perfclass_attrs[] = {
+ &attr_reserve_percpu.attr,
+ &attr_overcommit.attr,
+ NULL
+};
+
+static struct attribute_group perfclass_attr_group = {
+ .attrs = perfclass_attrs,
+ .name = "perf_counters",
+};
+
+static int __init perf_counter_sysfs_init(void)
+{
+ return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
+ &perfclass_attr_group);
+}
+device_initcall(perf_counter_sysfs_init);
diff --git a/kernel/sched.c b/kernel/sched.c
index 8e2558c..78f4424 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -558,6 +558,7 @@
struct load_weight load;
unsigned long nr_load_updates;
u64 nr_switches;
+ u64 nr_migrations_in;
struct cfs_rq cfs;
struct rt_rq rt;
@@ -668,7 +669,7 @@
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-static inline void update_rq_clock(struct rq *rq)
+inline void update_rq_clock(struct rq *rq)
{
rq->clock = sched_clock_cpu(cpu_of(rq));
}
@@ -979,6 +980,26 @@
}
}
+void curr_rq_lock_irq_save(unsigned long *flags)
+ __acquires(rq->lock)
+{
+ struct rq *rq;
+
+ local_irq_save(*flags);
+ rq = cpu_rq(smp_processor_id());
+ spin_lock(&rq->lock);
+}
+
+void curr_rq_unlock_irq_restore(unsigned long *flags)
+ __releases(rq->lock)
+{
+ struct rq *rq;
+
+ rq = cpu_rq(smp_processor_id());
+ spin_unlock(&rq->lock);
+ local_irq_restore(*flags);
+}
+
void task_rq_unlock_wait(struct task_struct *p)
{
struct rq *rq = task_rq(p);
@@ -1885,12 +1906,15 @@
p->se.sleep_start -= clock_offset;
if (p->se.block_start)
p->se.block_start -= clock_offset;
+#endif
if (old_cpu != new_cpu) {
- schedstat_inc(p, se.nr_migrations);
+ p->se.nr_migrations++;
+ new_rq->nr_migrations_in++;
+#ifdef CONFIG_SCHEDSTATS
if (task_hot(p, old_rq->clock, NULL))
schedstat_inc(p, se.nr_forced2_migrations);
- }
#endif
+ }
p->se.vruntime -= old_cfsrq->min_vruntime -
new_cfsrq->min_vruntime;
@@ -2242,6 +2266,27 @@
#endif /* CONFIG_SMP */
+/**
+ * task_oncpu_function_call - call a function on the cpu on which a task runs
+ * @p: the task to evaluate
+ * @func: the function to be called
+ * @info: the function call argument
+ *
+ * Calls the function @func when the task is currently running. This might
+ * be on the current CPU, which just calls the function directly
+ */
+void task_oncpu_function_call(struct task_struct *p,
+ void (*func) (void *info), void *info)
+{
+ int cpu;
+
+ preempt_disable();
+ cpu = task_cpu(p);
+ if (task_curr(p))
+ smp_call_function_single(cpu, func, info, 1);
+ preempt_enable();
+}
+
/***
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
@@ -2384,6 +2429,7 @@
p->se.exec_start = 0;
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
+ p->se.nr_migrations = 0;
p->se.last_wakeup = 0;
p->se.avg_overlap = 0;
@@ -2604,6 +2650,7 @@
*/
prev_state = prev->state;
finish_arch_switch(prev);
+ perf_counter_task_sched_in(current, cpu_of(rq));
finish_lock_switch(rq, prev);
#ifdef CONFIG_SMP
if (current->sched_class->post_schedule)
@@ -2766,6 +2813,21 @@
}
/*
+ * Externally visible per-cpu scheduler statistics:
+ * cpu_nr_switches(cpu) - number of context switches on that cpu
+ * cpu_nr_migrations(cpu) - number of migrations into that cpu
+ */
+u64 cpu_nr_switches(int cpu)
+{
+ return cpu_rq(cpu)->nr_switches;
+}
+
+u64 cpu_nr_migrations(int cpu)
+{
+ return cpu_rq(cpu)->nr_migrations_in;
+}
+
+/*
* Update rq->cpu_load[] statistics. This function is usually called every
* scheduler tick (TICK_NSEC).
*/
@@ -4137,6 +4199,29 @@
* Return any ns on the sched_clock that have not yet been banked in
* @p in case that task is currently running.
*/
+unsigned long long __task_delta_exec(struct task_struct *p, int update)
+{
+ s64 delta_exec;
+ struct rq *rq;
+
+ rq = task_rq(p);
+ WARN_ON_ONCE(!runqueue_is_locked());
+ WARN_ON_ONCE(!task_current(rq, p));
+
+ if (update)
+ update_rq_clock(rq);
+
+ delta_exec = rq->clock - p->se.exec_start;
+
+ WARN_ON_ONCE(delta_exec < 0);
+
+ return delta_exec;
+}
+
+/*
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
+ */
unsigned long long task_delta_exec(struct task_struct *p)
{
unsigned long flags;
@@ -4396,6 +4481,7 @@
update_rq_clock(rq);
update_cpu_load(rq);
curr->sched_class->task_tick(rq, curr, 0);
+ perf_counter_task_tick(curr, cpu);
spin_unlock(&rq->lock);
#ifdef CONFIG_SMP
@@ -4591,6 +4677,7 @@
if (likely(prev != next)) {
sched_info_switch(prev, next);
+ perf_counter_task_sched_out(prev, cpu);
rq->nr_switches++;
rq->curr = next;
@@ -5944,12 +6031,7 @@
printk(KERN_CONT " %016lx ", thread_saved_pc(p));
#endif
#ifdef CONFIG_DEBUG_STACK_USAGE
- {
- unsigned long *n = end_of_stack(p);
- while (!*n)
- n++;
- free = (unsigned long)n - (unsigned long)end_of_stack(p);
- }
+ free = stack_not_used(p);
#endif
printk(KERN_CONT "%5lu %5d %6d\n", free,
task_pid_nr(p), task_pid_nr(p->real_parent));
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index bac1061..da932f4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -960,12 +960,13 @@
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
-static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
+static inline int pick_optimal_cpu(int this_cpu,
+ const struct cpumask *mask)
{
int first;
/* "this_cpu" is cheaper to preempt than a remote processor */
- if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
+ if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
return this_cpu;
first = cpumask_first(mask);
@@ -981,6 +982,7 @@
struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
+ cpumask_var_t domain_mask;
if (task->rt.nr_cpus_allowed == 1)
return -1; /* No other targets possible */
@@ -1013,19 +1015,25 @@
if (this_cpu == cpu)
this_cpu = -1; /* Skip this_cpu opt if the same */
- for_each_domain(cpu, sd) {
- if (sd->flags & SD_WAKE_AFFINE) {
- cpumask_t domain_mask;
- int best_cpu;
+ if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
+ for_each_domain(cpu, sd) {
+ if (sd->flags & SD_WAKE_AFFINE) {
+ int best_cpu;
- cpumask_and(&domain_mask, sched_domain_span(sd),
- lowest_mask);
+ cpumask_and(domain_mask,
+ sched_domain_span(sd),
+ lowest_mask);
- best_cpu = pick_optimal_cpu(this_cpu,
- &domain_mask);
- if (best_cpu != -1)
- return best_cpu;
+ best_cpu = pick_optimal_cpu(this_cpu,
+ domain_mask);
+
+ if (best_cpu != -1) {
+ free_cpumask_var(domain_mask);
+ return best_cpu;
+ }
+ }
}
+ free_cpumask_var(domain_mask);
}
/*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bdbe9de..0365b48 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -795,6 +795,11 @@
return 0;
}
+int __init __weak arch_probe_nr_irqs(void)
+{
+ return 0;
+}
+
int __init __weak arch_early_irq_init(void)
{
return 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index 37f458e6..7306f94 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -14,6 +14,7 @@
#include <linux/prctl.h>
#include <linux/highuid.h>
#include <linux/fs.h>
+#include <linux/perf_counter.h>
#include <linux/resource.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
@@ -1800,6 +1801,12 @@
case PR_SET_TSC:
error = SET_TSC_CTL(arg2);
break;
+ case PR_TASK_PERF_COUNTERS_DISABLE:
+ error = perf_counter_task_disable();
+ break;
+ case PR_TASK_PERF_COUNTERS_ENABLE:
+ error = perf_counter_task_enable();
+ break;
case PR_GET_TIMERSLACK:
error = current->timer_slack_ns;
break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 27dad29..68320f6 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -175,3 +175,6 @@
cond_syscall(compat_sys_timerfd_gettime);
cond_syscall(sys_eventfd);
cond_syscall(sys_eventfd2);
+
+/* performance counters: */
+cond_syscall(sys_perf_counter_open);
diff --git a/mm/filemap.c b/mm/filemap.c
index 23acefe..60fd567 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1816,14 +1816,14 @@
static size_t __iovec_copy_from_user_inatomic(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes)
{
- size_t copied = 0, left = 0;
+ size_t copied = 0, left = 0, total = bytes;
while (bytes) {
char __user *buf = iov->iov_base + base;
int copy = min(bytes, iov->iov_len - base);
base = 0;
- left = __copy_from_user_inatomic_nocache(vaddr, buf, copy);
+ left = __copy_from_user_inatomic_nocache(vaddr, buf, copy, total);
copied += copy;
bytes -= copy;
vaddr += copy;
@@ -1851,8 +1851,9 @@
if (likely(i->nr_segs == 1)) {
int left;
char __user *buf = i->iov->iov_base + i->iov_offset;
+
left = __copy_from_user_inatomic_nocache(kaddr + offset,
- buf, bytes);
+ buf, bytes, bytes);
copied = bytes - left;
} else {
copied = __iovec_copy_from_user_inatomic(kaddr + offset,
@@ -1880,7 +1881,8 @@
if (likely(i->nr_segs == 1)) {
int left;
char __user *buf = i->iov->iov_base + i->iov_offset;
- left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
+
+ left = __copy_from_user_nocache(kaddr + offset, buf, bytes, bytes);
copied = bytes - left;
} else {
copied = __iovec_copy_from_user_inatomic(kaddr + offset,
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 0c04615..bf54f8a 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -354,7 +354,7 @@
break;
copied = bytes -
- __copy_from_user_nocache(xip_mem + offset, buf, bytes);
+ __copy_from_user_nocache(xip_mem + offset, buf, bytes, bytes);
if (likely(copied > 0)) {
status = copied;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 520a759..11a9298 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1267,6 +1267,7 @@
void vunmap(const void *addr)
{
BUG_ON(in_interrupt());
+ might_sleep();
__vunmap(addr, 0);
}
EXPORT_SYMBOL(vunmap);
@@ -1286,6 +1287,8 @@
{
struct vm_struct *area;
+ might_sleep();
+
if (count > num_physpages)
return NULL;
diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh
new file mode 100644
index 0000000..29493dc
--- /dev/null
+++ b/scripts/gcc-x86_32-has-stack-protector.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+if [ "$?" -eq "0" ] ; then
+ echo y
+else
+ echo n
+fi
diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
index 325c0a1..afaec61 100644
--- a/scripts/gcc-x86_64-has-stack-protector.sh
+++ b/scripts/gcc-x86_64-has-stack-protector.sh
@@ -1,6 +1,8 @@
#!/bin/sh
-echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
if [ "$?" -eq "0" ] ; then
- echo $2
+ echo y
+else
+ echo n
fi
diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
index db30fac3..56f90a4 100644
--- a/scripts/headers_check.pl
+++ b/scripts/headers_check.pl
@@ -38,7 +38,7 @@
&check_asm_types();
&check_sizetypes();
&check_prototypes();
- &check_config();
+ # Dropped for now. Too much noise &check_config();
}
close FH;
}
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 8892161..7e62303 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -415,8 +415,9 @@
const char *secstrings
= (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
const char *secname;
+ int nobits = sechdrs[i].sh_type == SHT_NOBITS;
- if (sechdrs[i].sh_offset > info->size) {
+ if (!nobits && sechdrs[i].sh_offset > info->size) {
fatal("%s is truncated. sechdrs[i].sh_offset=%lu > "
"sizeof(*hrd)=%zu\n", filename,
(unsigned long)sechdrs[i].sh_offset,
@@ -425,6 +426,8 @@
}
secname = secstrings + sechdrs[i].sh_name;
if (strcmp(secname, ".modinfo") == 0) {
+ if (nobits)
+ fatal("%s has NOBITS .modinfo\n", filename);
info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
info->modinfo_len = sechdrs[i].sh_size;
} else if (strcmp(secname, "__ksymtab") == 0)
diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig
index 0bcf146..84714a6 100644
--- a/sound/drivers/Kconfig
+++ b/sound/drivers/Kconfig
@@ -33,7 +33,7 @@
config SND_PCSP
tristate "PC-Speaker support (READ HELP!)"
- depends on PCSPKR_PLATFORM && X86_PC && HIGH_RES_TIMERS
+ depends on PCSPKR_PLATFORM && X86 && HIGH_RES_TIMERS
depends on INPUT
depends on EXPERIMENTAL
select SND_PCM