Merge git://git.kernel.org/pub/scm/linux/kernel/git/bart/ide-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/bart/ide-2.6:
ide: remove stale comments from ide-dma.c (take 2)
ide: remove ide-tape documentation from Documentation/ide.txt
qd65xx: remove commented out code
ide-tape: schedule driver for removal after 6 months
ide-disk: add missing printk() KERN_* levels
ide: fix sparse warning about shadowing 'flags' symbol
ide-cd: fix CD/DVD burning
ide-cd: fix 'ireason' handling for REQ_TYPE_ATA_PC requests
qd65xx: fix setup of QD6580 Control register
ide: skip probing port if "hdx=noprobe" was used for both devices on it
ide: remove redundant comment from ide_unregister()
hpt366: fix section mismatch warnings
ide-cd: Enable audio play quirk for Optiarc DVD RW AD-5200A drive
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt
index de4804e..c360d4e 100644
--- a/Documentation/debugging-via-ohci1394.txt
+++ b/Documentation/debugging-via-ohci1394.txt
@@ -36,14 +36,15 @@
Drivers
-------
-The OHCI-1394 drivers in drivers/firewire and drivers/ieee1394 initialize
-the OHCI-1394 controllers to a working state and can be used to enable
-physical DMA. By default you only have to load the driver, and physical
-DMA access will be granted to all remote nodes, but it can be turned off
-when using the ohci1394 driver.
+The ohci1394 driver in drivers/ieee1394 initializes the OHCI-1394 controllers
+to a working state and enables physical DMA by default for all remote nodes.
+This can be turned off by ohci1394's module parameter phys_dma=0.
-Because these drivers depend on the PCI enumeration to be completed, an
-initialization routine which can runs pretty early (long before console_init(),
+The alternative firewire-ohci driver in drivers/firewire uses filtered physical
+DMA, hence is not yet suitable for remote debugging.
+
+Because ohci1394 depends on the PCI enumeration to be completed, an
+initialization routine which runs pretty early (long before console_init()
which makes the printk buffer appear on the console can be called) was written.
To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index fc50d2f..e8cb9ff 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -128,8 +128,6 @@
return current;
}
-extern void schedule_tail(struct task_struct *prev);
-
/*
* This is called magically, by its address being stuffed in a jmp_buf
* and being longjmp-d to.
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index e09a6b7..6d50064 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -377,6 +377,19 @@
def_bool y
depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
+#
+# P6_NOPs are a relatively minor optimization that require a family >=
+# 6 processor, except that it is broken on certain VIA chips.
+# Furthermore, AMD chips prefer a totally different sequence of NOPs
+# (which work on all CPUs). As a result, disallow these if we're
+# compiling X86_GENERIC but not X86_64 (these NOPs do work on all
+# x86-64 capable chips); the list of processors in the right-hand clause
+# are the cores that benefit from this optimization.
+#
+config X86_P6_NOP
+ def_bool y
+ depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4)
+
config X86_TSC
def_bool y
depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
@@ -390,6 +403,7 @@
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
+ default "6" if X86_32 && X86_P6_NOP
default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
default "3"
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 3783539..e77d89f 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -37,6 +37,12 @@
"=m" (*desc)
: "D" (desc), "d" (SMAP), "a" (0xe820));
+ /* BIOSes which terminate the chain with CF = 1 as opposed
+ to %ebx = 0 don't always report the SMAP signature on
+ the final, failing, probe. */
+ if (err)
+ break;
+
/* Some BIOSes stop returning SMAP in the middle of
the search loop. We don't know exactly how the BIOS
screwed up the map at that point, we might have a
@@ -47,9 +53,6 @@
break;
}
- if (err)
- break;
-
count++;
desc++;
} while (next && count < E820MAX);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index a33d5301..8ea0401 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -128,13 +128,11 @@
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
#endif
-#ifdef CONFIG_LGUEST_GUEST
+#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
-#endif
-#ifdef CONFIG_LGUEST
BLANK();
OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f86a3c4..a38aafae 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -504,7 +504,7 @@
/* Clear all flags overriden by options */
for (i = 0; i < NCAPINTS; i++)
- c->x86_capability[i] ^= cleared_cpu_caps[i];
+ c->x86_capability[i] &= ~cleared_cpu_caps[i];
/* Init Machine Check Exception if available. */
mcheck_init(c);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b6e136f..be83336 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -43,6 +43,7 @@
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
+#include <asm/kvm_para.h>
#include "mtrr.h"
u32 num_var_ranges = 0;
@@ -649,6 +650,7 @@
/**
* mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ * @end_pfn: ending page frame number
*
* Some buggy BIOSes don't setup the MTRRs properly for systems with certain
* memory configurations. This routine checks that the highest MTRR matches
@@ -688,8 +690,11 @@
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
if (!highest_pfn) {
- printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
- WARN_ON(1);
+ if (!kvm_para_available()) {
+ printk(KERN_WARNING
+ "WARNING: strange, CPU MTRRs all blank?\n");
+ WARN_ON(1);
+ }
return 0;
}
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 200fb3f..e8b422c 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -76,13 +76,6 @@
/* All Transmeta CPUs have a constant TSC */
set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
- /* If we can run i686 user-space code, call us an i686 */
-#define USER686 ((1 << X86_FEATURE_TSC)|\
- (1 << X86_FEATURE_CX8)|\
- (1 << X86_FEATURE_CMOV))
- if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686)
- c->x86 = 6;
-
#ifdef CONFIG_SYSCTL
/* randomize_va_space slows us down enormously;
it probably triggers retranslation of x86->native bytecode */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2ad9a1b..c20c9e7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -453,6 +453,7 @@
CFI_REGISTER rip, r11
SAVE_REST
FIXUP_TOP_OF_STACK %r11
+ movq %rsp, %rcx
call sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
@@ -1036,15 +1037,16 @@
* rdi: name, rsi: argv, rdx: envp
*
* We want to fallback into:
- * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
+ * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
*
* do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+ * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
*/
ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
SAVE_ALL
+ movq %rsp,%rcx
call sys_execve
movq %rax, RAX(%rsp)
RESTORE_REST
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 25eb985..fd8ca53 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -606,7 +606,7 @@
.section ".bss.page_aligned","wa"
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
-ENTRY(swapper_pg_pmd)
+swapper_pg_pmd:
.fill 1024*KPMDS,4,0
#else
ENTRY(swapper_pg_dir)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index eb41504..a007454 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -379,18 +379,24 @@
/* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
*/
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
NEXT_PAGE(level2_kernel_pgt)
- /* 40MB kernel mapping. The kernel code cannot be bigger than that.
- When you change this change KERNEL_TEXT_SIZE in page.h too. */
- /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE)
- /* Module mapping starts here */
- .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
+ /*
+ * 128 MB kernel mapping. We spend a full page on this pagetable
+ * anyway.
+ *
+ * The kernel code+data+bss must not be bigger than that.
+ *
+ * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
+ * If you want to increase this then increase MODULES_VADDR
+ * too.)
+ */
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+ KERNEL_IMAGE_SIZE/PMD_SIZE)
NEXT_PAGE(level2_spare_pgt)
- .fill 512,8,0
+ .fill 512, 8, 0
#undef PMDS
#undef NEXT_PAGE
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 429d084..235fd6c 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -368,8 +368,8 @@
return 0;
}
-/*
- * Try to setup the HPET timer
+/**
+ * hpet_enable - Try to setup the HPET timer. Returns 1 on success.
*/
int __init hpet_enable(void)
{
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b0cc8f0..43f2877 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -730,16 +730,16 @@
*/
asmlinkage
long sys_execve(char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs regs)
+ char __user * __user *envp, struct pt_regs *regs)
{
long error;
char * filename;
filename = getname(name);
error = PTR_ERR(filename);
- if (IS_ERR(filename))
+ if (IS_ERR(filename))
return error;
- error = do_execve(filename, argv, envp, ®s);
+ error = do_execve(filename, argv, envp, regs);
putname(filename);
return error;
}
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6fd804f..7637dc9 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1021,7 +1021,7 @@
/* Clear all flags overriden by options */
for (i = 0; i < NCAPINTS; i++)
- c->x86_capability[i] ^= cleared_cpu_caps[i];
+ c->x86_capability[i] &= ~cleared_cpu_caps[i];
#ifdef CONFIG_X86_MCE
mcheck_init(c);
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index d53bd6f..0880f2c 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -554,10 +554,10 @@
int timeout;
unsigned long start_rip;
struct create_idle c_idle = {
- .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
.cpu = cpu,
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
+ INIT_WORK(&c_idle.work, do_fork_idle);
/* allocate memory for gdts of secondary cpus. Hotplug is considered */
if (!cpu_gdt_descr[cpu].address &&
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 02f0f61..c28c342 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -25,6 +25,8 @@
static void save_stack_address(void *data, unsigned long addr, int reliable)
{
struct stack_trace *trace = data;
+ if (!reliable)
+ return;
if (trace->skip > 0) {
trace->skip--;
return;
@@ -37,6 +39,8 @@
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
struct stack_trace *trace = (struct stack_trace *)data;
+ if (!reliable)
+ return;
if (in_sched_functions(addr))
return;
if (trace->skip > 0) {
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 43517e3..f14cfd9 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -28,7 +28,8 @@
static int __init tsc_setup(char *str)
{
printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC.\n");
+ "cannot disable TSC completely.\n");
+ mark_tsc_unstable("user disabled TSC");
return 1;
}
#else
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3f82427..b6be812 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -44,11 +44,6 @@
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
#define __syscall_clobber "r11","cx","memory"
-#define __pa_vsymbol(x) \
- ({unsigned long v; \
- extern char __vsyscall_0; \
- asm("" : "=r" (v) : "0" (x)); \
- ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); })
/*
* vsyscall_gtod_data contains data that is :
@@ -102,7 +97,7 @@
static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
{
int ret;
- asm volatile("vsysc2: syscall"
+ asm volatile("syscall"
: "=a" (ret)
: "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
: __syscall_clobber );
@@ -112,7 +107,7 @@
static __always_inline long time_syscall(long *t)
{
long secs;
- asm volatile("vsysc1: syscall"
+ asm volatile("syscall"
: "=a" (secs)
: "0" (__NR_time),"D" (t) : __syscall_clobber);
return secs;
@@ -227,50 +222,10 @@
}
#ifdef CONFIG_SYSCTL
-
-#define SYSCALL 0x050f
-#define NOP2 0x9090
-
-/*
- * NOP out syscall in vsyscall page when not needed.
- */
-static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- extern u16 vsysc1, vsysc2;
- u16 __iomem *map1;
- u16 __iomem *map2;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- if (!write)
- return ret;
- /* gcc has some trouble with __va(__pa()), so just do it this
- way. */
- map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
- if (!map1)
- return -ENOMEM;
- map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
- if (!map2) {
- ret = -ENOMEM;
- goto out;
- }
- if (!vsyscall_gtod_data.sysctl_enabled) {
- writew(SYSCALL, map1);
- writew(SYSCALL, map2);
- } else {
- writew(NOP2, map1);
- writew(NOP2, map2);
- }
- iounmap(map2);
-out:
- iounmap(map1);
- return ret;
-}
-
static ctl_table kernel_table2[] = {
{ .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = vsyscall_sysctl_change },
+ .mode = 0644 },
{}
};
@@ -279,7 +234,6 @@
.child = kernel_table2 },
{}
};
-
#endif
/* Assume __initcall executes before all user space. Hopefully kmod
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5afdde4..cccb38a 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -57,6 +57,7 @@
#include <linux/lguest_launcher.h>
#include <linux/virtio_console.h>
#include <linux/pm.h>
+#include <asm/lguest.h>
#include <asm/paravirt.h>
#include <asm/param.h>
#include <asm/page.h>
@@ -75,15 +76,6 @@
* behaving in simplified but equivalent ways. In particular, the Guest is the
* same kernel as the Host (or at least, built from the same source code). :*/
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
-extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
-extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
-extern void lguest_iret(void);
-
struct lguest_data lguest_data = {
.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
.noirq_start = (u32)lguest_noirq_start,
@@ -489,7 +481,7 @@
{
*pmdp = pmdval;
lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
- (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
+ (__pa(pmdp)&(PAGE_SIZE-1)), 0);
}
/* There are a couple of legacy places where the kernel sets a PTE, but we
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bb652f5..a02a14f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -172,8 +172,9 @@
}
/*
- * The head.S code sets up the kernel high mapping from:
- * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE
+ * The head.S code sets up the kernel high mapping:
+ *
+ * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
*
* phys_addr holds the negative offset to the kernel, which is added
* to the compile time generated pmds. This results in invalid pmds up
@@ -515,14 +516,6 @@
/* clear_bss() already clear the empty_zero_page */
- /* temporary debugging - double check it's true: */
- {
- int i;
-
- for (i = 0; i < 1024; i++)
- WARN_ON_ONCE(empty_zero_page[i]);
- }
-
reservedpages = 0;
/* this will put all low memory onto the freelists */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 464d8fc..14e48b5 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -44,6 +44,12 @@
#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+# define debug_pagealloc 1
+#else
+# define debug_pagealloc 0
+#endif
+
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -355,45 +361,48 @@
static LIST_HEAD(page_pool);
static unsigned long pool_size, pool_pages, pool_low;
-static unsigned long pool_used, pool_failed, pool_refill;
+static unsigned long pool_used, pool_failed;
-static void cpa_fill_pool(void)
+static void cpa_fill_pool(struct page **ret)
{
- struct page *p;
gfp_t gfp = GFP_KERNEL;
+ unsigned long flags;
+ struct page *p;
- /* Do not allocate from interrupt context */
- if (in_irq() || irqs_disabled())
- return;
/*
- * Check unlocked. I does not matter when we have one more
- * page in the pool. The bit lock avoids recursive pool
- * allocations:
+ * Avoid recursion (on debug-pagealloc) and also signal
+ * our priority to get to these pagetables:
*/
- if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
+ if (current->flags & PF_MEMALLOC)
return;
+ current->flags |= PF_MEMALLOC;
-#ifdef CONFIG_DEBUG_PAGEALLOC
/*
- * We could do:
- * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
- * but this fails on !PREEMPT kernels
+ * Allocate atomically from atomic contexts:
*/
- gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-#endif
+ if (in_atomic() || irqs_disabled() || debug_pagealloc)
+ gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
- while (pool_pages < pool_size) {
+ while (pool_pages < pool_size || (ret && !*ret)) {
p = alloc_pages(gfp, 0);
if (!p) {
pool_failed++;
break;
}
- spin_lock_irq(&pgd_lock);
+ /*
+ * If the call site needs a page right now, provide it:
+ */
+ if (ret && !*ret) {
+ *ret = p;
+ continue;
+ }
+ spin_lock_irqsave(&pgd_lock, flags);
list_add(&p->lru, &page_pool);
pool_pages++;
- spin_unlock_irq(&pgd_lock);
+ spin_unlock_irqrestore(&pgd_lock, flags);
}
- clear_bit_unlock(0, &pool_refill);
+
+ current->flags &= ~PF_MEMALLOC;
}
#define SHIFT_MB (20 - PAGE_SHIFT)
@@ -414,11 +423,15 @@
* GiB. Shift MiB to Gib and multiply the result by
* POOL_PAGES_PER_GB:
*/
- gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
- pool_size = POOL_PAGES_PER_GB * gb;
+ if (debug_pagealloc) {
+ gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
+ pool_size = POOL_PAGES_PER_GB * gb;
+ } else {
+ pool_size = 1;
+ }
pool_low = pool_size;
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
printk(KERN_DEBUG
"CPA: page pool initialized %lu of %lu pages preallocated\n",
pool_pages, pool_size);
@@ -440,16 +453,20 @@
spin_lock_irqsave(&pgd_lock, flags);
if (list_empty(&page_pool)) {
spin_unlock_irqrestore(&pgd_lock, flags);
- return -ENOMEM;
+ base = NULL;
+ cpa_fill_pool(&base);
+ if (!base)
+ return -ENOMEM;
+ spin_lock_irqsave(&pgd_lock, flags);
+ } else {
+ base = list_first_entry(&page_pool, struct page, lru);
+ list_del(&base->lru);
+ pool_pages--;
+
+ if (pool_pages < pool_low)
+ pool_low = pool_pages;
}
- base = list_first_entry(&page_pool, struct page, lru);
- list_del(&base->lru);
- pool_pages--;
-
- if (pool_pages < pool_low)
- pool_low = pool_pages;
-
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -734,7 +751,8 @@
cpa_flush_all(cache);
out:
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
+
return ret;
}
@@ -897,7 +915,7 @@
* Try to refill the page pool here. We can do this only after
* the tlb flush.
*/
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
}
#ifdef CONFIG_HIBERNATION
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index f385a4b..b8bd0c4 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -48,7 +48,7 @@
# Match symbols in the DSO that look like VDSO*; produce a file of constants.
#
sed-vdsosym := -e 's/^00*/0/' \
- -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+ -e 's/^\([[:xdigit:]]*\) . \(VDSO[[:alnum:]_]*\)$$/\2 = 0x\1;/p'
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index fbc2435..4fbcce7 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -113,7 +113,7 @@
module_param(atapi_enabled, int, 0444);
MODULE_PARM_DESC(atapi_enabled, "Enable discovery of ATAPI devices (0=off, 1=on)");
-int atapi_dmadir = 0;
+static int atapi_dmadir = 0;
module_param(atapi_dmadir, int, 0444);
MODULE_PARM_DESC(atapi_dmadir, "Enable ATAPI DMADIR bridge support (0=off, 1=on)");
@@ -6567,6 +6567,8 @@
ata_lpm_enable(host);
rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1);
+ if (rc == 0)
+ host->dev->power.power_state = mesg;
return rc;
}
@@ -6585,6 +6587,7 @@
{
ata_host_request_pm(host, PMSG_ON, ATA_EH_SOFTRESET,
ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 0);
+ host->dev->power.power_state = PMSG_ON;
/* reenable link pm */
ata_lpm_disable(host);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 6036ded..aa884f7 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -56,7 +56,6 @@
extern unsigned int ata_print_id;
extern struct workqueue_struct *ata_aux_wq;
extern int atapi_enabled;
-extern int atapi_dmadir;
extern int atapi_passthru16;
extern int libata_fua;
extern int libata_noacpi;
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 78b151c..5c3142b 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -110,8 +110,8 @@
#define hpet_set_rtc_irq_bit(arg) 0
#define hpet_rtc_timer_init() do { } while (0)
#define hpet_rtc_dropped_irq() 0
-#define hpet_register_irq_handler(h) 0
-#define hpet_unregister_irq_handler(h) 0
+#define hpet_register_irq_handler(h) ({ 0; })
+#define hpet_unregister_irq_handler(h) ({ 0; })
#ifdef RTC_IRQ
static irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
{
diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 7e73cba..46bc197 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -109,15 +109,17 @@
struct client *client;
unsigned long flags;
- device = fw_device_from_devt(inode->i_rdev);
+ device = fw_device_get_by_devt(inode->i_rdev);
if (device == NULL)
return -ENODEV;
client = kzalloc(sizeof(*client), GFP_KERNEL);
- if (client == NULL)
+ if (client == NULL) {
+ fw_device_put(device);
return -ENOMEM;
+ }
- client->device = fw_device_get(device);
+ client->device = device;
INIT_LIST_HEAD(&client->event_list);
INIT_LIST_HEAD(&client->resource_list);
spin_lock_init(&client->lock);
@@ -644,6 +646,10 @@
struct fw_cdev_create_iso_context *request = buffer;
struct fw_iso_context *context;
+ /* We only support one context at this time. */
+ if (client->iso_context != NULL)
+ return -EBUSY;
+
if (request->channel > 63)
return -EINVAL;
@@ -790,8 +796,9 @@
{
struct fw_cdev_start_iso *request = buffer;
- if (request->handle != 0)
+ if (client->iso_context == NULL || request->handle != 0)
return -EINVAL;
+
if (client->iso_context->type == FW_ISO_CONTEXT_RECEIVE) {
if (request->tags == 0 || request->tags > 15)
return -EINVAL;
@@ -808,7 +815,7 @@
{
struct fw_cdev_stop_iso *request = buffer;
- if (request->handle != 0)
+ if (client->iso_context == NULL || request->handle != 0)
return -EINVAL;
return fw_iso_context_stop(client->iso_context);
diff --git a/drivers/firewire/fw-device.c b/drivers/firewire/fw-device.c
index de9066e..2ab13e0 100644
--- a/drivers/firewire/fw-device.c
+++ b/drivers/firewire/fw-device.c
@@ -358,12 +358,9 @@
guid_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct fw_device *device = fw_device(dev);
- u64 guid;
- guid = ((u64)device->config_rom[3] << 32) | device->config_rom[4];
-
- return snprintf(buf, PAGE_SIZE, "0x%016llx\n",
- (unsigned long long)guid);
+ return snprintf(buf, PAGE_SIZE, "0x%08x%08x\n",
+ device->config_rom[3], device->config_rom[4]);
}
static struct device_attribute fw_device_attributes[] = {
@@ -610,12 +607,14 @@
static DEFINE_IDR(fw_device_idr);
int fw_cdev_major;
-struct fw_device *fw_device_from_devt(dev_t devt)
+struct fw_device *fw_device_get_by_devt(dev_t devt)
{
struct fw_device *device;
down_read(&idr_rwsem);
device = idr_find(&fw_device_idr, MINOR(devt));
+ if (device)
+ fw_device_get(device);
up_read(&idr_rwsem);
return device;
@@ -627,13 +626,14 @@
container_of(work, struct fw_device, work.work);
int minor = MINOR(device->device.devt);
- down_write(&idr_rwsem);
- idr_remove(&fw_device_idr, minor);
- up_write(&idr_rwsem);
-
fw_device_cdev_remove(device);
device_for_each_child(&device->device, NULL, shutdown_unit);
device_unregister(&device->device);
+
+ down_write(&idr_rwsem);
+ idr_remove(&fw_device_idr, minor);
+ up_write(&idr_rwsem);
+ fw_device_put(device);
}
static struct device_type fw_device_type = {
@@ -682,10 +682,13 @@
}
err = -ENOMEM;
+
+ fw_device_get(device);
down_write(&idr_rwsem);
if (idr_pre_get(&fw_device_idr, GFP_KERNEL))
err = idr_get_new(&fw_device_idr, device, &minor);
up_write(&idr_rwsem);
+
if (err < 0)
goto error;
@@ -717,13 +720,22 @@
*/
if (atomic_cmpxchg(&device->state,
FW_DEVICE_INITIALIZING,
- FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
+ FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) {
fw_device_shutdown(&device->work.work);
- else
- fw_notify("created new fw device %s "
- "(%d config rom retries, S%d00)\n",
- device->device.bus_id, device->config_rom_retries,
- 1 << device->max_speed);
+ } else {
+ if (device->config_rom_retries)
+ fw_notify("created device %s: GUID %08x%08x, S%d00, "
+ "%d config ROM retries\n",
+ device->device.bus_id,
+ device->config_rom[3], device->config_rom[4],
+ 1 << device->max_speed,
+ device->config_rom_retries);
+ else
+ fw_notify("created device %s: GUID %08x%08x, S%d00\n",
+ device->device.bus_id,
+ device->config_rom[3], device->config_rom[4],
+ 1 << device->max_speed);
+ }
/*
* Reschedule the IRM work if we just finished reading the
@@ -741,7 +753,9 @@
idr_remove(&fw_device_idr, minor);
up_write(&idr_rwsem);
error:
- put_device(&device->device);
+ fw_device_put(device); /* fw_device_idr's reference */
+
+ put_device(&device->device); /* our reference */
}
static int update_unit(struct device *dev, void *data)
diff --git a/drivers/firewire/fw-device.h b/drivers/firewire/fw-device.h
index 0854fe2..43808c0 100644
--- a/drivers/firewire/fw-device.h
+++ b/drivers/firewire/fw-device.h
@@ -77,13 +77,13 @@
}
struct fw_device *fw_device_get(struct fw_device *device);
+struct fw_device *fw_device_get_by_devt(dev_t devt);
void fw_device_put(struct fw_device *device);
int fw_device_enable_phys_dma(struct fw_device *device);
void fw_device_cdev_update(struct fw_device *device);
void fw_device_cdev_remove(struct fw_device *device);
-struct fw_device *fw_device_from_devt(dev_t devt);
extern int fw_cdev_major;
struct fw_unit {
diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
index 19ece9b..5259491 100644
--- a/drivers/firewire/fw-sbp2.c
+++ b/drivers/firewire/fw-sbp2.c
@@ -28,14 +28,15 @@
* and many others.
*/
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
-#include <linux/mod_devicetable.h>
-#include <linux/device.h>
#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/blkdev.h>
#include <linux/string.h>
#include <linux/stringify.h>
#include <linux/timer.h>
@@ -47,9 +48,9 @@
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
-#include "fw-transaction.h"
-#include "fw-topology.h"
#include "fw-device.h"
+#include "fw-topology.h"
+#include "fw-transaction.h"
/*
* So far only bridges from Oxford Semiconductor are known to support
@@ -82,6 +83,9 @@
* Avoids access beyond actual disk limits on devices with an off-by-one bug.
* Don't use this with devices which don't have this bug.
*
+ * - delay inquiry
+ * Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry.
+ *
* - override internal blacklist
* Instead of adding to the built-in blacklist, use only the workarounds
* specified in the module load parameter.
@@ -91,6 +95,8 @@
#define SBP2_WORKAROUND_INQUIRY_36 0x2
#define SBP2_WORKAROUND_MODE_SENSE_8 0x4
#define SBP2_WORKAROUND_FIX_CAPACITY 0x8
+#define SBP2_WORKAROUND_DELAY_INQUIRY 0x10
+#define SBP2_INQUIRY_DELAY 12
#define SBP2_WORKAROUND_OVERRIDE 0x100
static int sbp2_param_workarounds;
@@ -100,6 +106,7 @@
", 36 byte inquiry = " __stringify(SBP2_WORKAROUND_INQUIRY_36)
", skip mode page 8 = " __stringify(SBP2_WORKAROUND_MODE_SENSE_8)
", fix capacity = " __stringify(SBP2_WORKAROUND_FIX_CAPACITY)
+ ", delay inquiry = " __stringify(SBP2_WORKAROUND_DELAY_INQUIRY)
", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE)
", or a combination)");
@@ -132,6 +139,7 @@
int generation;
int retries;
struct delayed_work work;
+ bool blocked;
};
/*
@@ -141,16 +149,18 @@
struct sbp2_target {
struct kref kref;
struct fw_unit *unit;
+ const char *bus_id;
+ struct list_head lu_list;
u64 management_agent_address;
int directory_id;
int node_id;
int address_high;
-
- unsigned workarounds;
- struct list_head lu_list;
-
+ unsigned int workarounds;
unsigned int mgt_orb_timeout;
+
+ int dont_block; /* counter for each logical unit */
+ int blocked; /* ditto */
};
/*
@@ -160,7 +170,7 @@
*/
#define SBP2_MIN_LOGIN_ORB_TIMEOUT 5000U /* Timeout in ms */
#define SBP2_MAX_LOGIN_ORB_TIMEOUT 40000U /* Timeout in ms */
-#define SBP2_ORB_TIMEOUT 2000 /* Timeout in ms */
+#define SBP2_ORB_TIMEOUT 2000U /* Timeout in ms */
#define SBP2_ORB_NULL 0x80000000
#define SBP2_MAX_SG_ELEMENT_LENGTH 0xf000
@@ -297,7 +307,7 @@
static const struct {
u32 firmware_revision;
u32 model;
- unsigned workarounds;
+ unsigned int workarounds;
} sbp2_workarounds_table[] = {
/* DViCO Momobay CX-1 with TSB42AA9 bridge */ {
.firmware_revision = 0x002800,
@@ -305,6 +315,11 @@
.workarounds = SBP2_WORKAROUND_INQUIRY_36 |
SBP2_WORKAROUND_MODE_SENSE_8,
},
+ /* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
+ .firmware_revision = 0x002800,
+ .model = 0x000000,
+ .workarounds = SBP2_WORKAROUND_DELAY_INQUIRY,
+ },
/* Initio bridges, actually only needed for some older ones */ {
.firmware_revision = 0x000200,
.model = ~0,
@@ -501,6 +516,9 @@
unsigned int timeout;
int retval = -ENOMEM;
+ if (function == SBP2_LOGOUT_REQUEST && fw_device_is_shutdown(device))
+ return 0;
+
orb = kzalloc(sizeof(*orb), GFP_ATOMIC);
if (orb == NULL)
return -ENOMEM;
@@ -553,20 +571,20 @@
retval = -EIO;
if (sbp2_cancel_orbs(lu) == 0) {
- fw_error("orb reply timed out, rcode=0x%02x\n",
- orb->base.rcode);
+ fw_error("%s: orb reply timed out, rcode=0x%02x\n",
+ lu->tgt->bus_id, orb->base.rcode);
goto out;
}
if (orb->base.rcode != RCODE_COMPLETE) {
- fw_error("management write failed, rcode 0x%02x\n",
- orb->base.rcode);
+ fw_error("%s: management write failed, rcode 0x%02x\n",
+ lu->tgt->bus_id, orb->base.rcode);
goto out;
}
if (STATUS_GET_RESPONSE(orb->status) != 0 ||
STATUS_GET_SBP_STATUS(orb->status) != 0) {
- fw_error("error status: %d:%d\n",
+ fw_error("%s: error status: %d:%d\n", lu->tgt->bus_id,
STATUS_GET_RESPONSE(orb->status),
STATUS_GET_SBP_STATUS(orb->status));
goto out;
@@ -590,29 +608,147 @@
static void
complete_agent_reset_write(struct fw_card *card, int rcode,
- void *payload, size_t length, void *data)
+ void *payload, size_t length, void *done)
{
- struct fw_transaction *t = data;
-
- kfree(t);
+ complete(done);
}
-static int sbp2_agent_reset(struct sbp2_logical_unit *lu)
+static void sbp2_agent_reset(struct sbp2_logical_unit *lu)
+{
+ struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct fw_transaction t;
+ static u32 z;
+
+ fw_send_request(device->card, &t, TCODE_WRITE_QUADLET_REQUEST,
+ lu->tgt->node_id, lu->generation, device->max_speed,
+ lu->command_block_agent_address + SBP2_AGENT_RESET,
+ &z, sizeof(z), complete_agent_reset_write, &done);
+ wait_for_completion(&done);
+}
+
+static void
+complete_agent_reset_write_no_wait(struct fw_card *card, int rcode,
+ void *payload, size_t length, void *data)
+{
+ kfree(data);
+}
+
+static void sbp2_agent_reset_no_wait(struct sbp2_logical_unit *lu)
{
struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
struct fw_transaction *t;
- static u32 zero;
+ static u32 z;
- t = kzalloc(sizeof(*t), GFP_ATOMIC);
+ t = kmalloc(sizeof(*t), GFP_ATOMIC);
if (t == NULL)
- return -ENOMEM;
+ return;
fw_send_request(device->card, t, TCODE_WRITE_QUADLET_REQUEST,
lu->tgt->node_id, lu->generation, device->max_speed,
lu->command_block_agent_address + SBP2_AGENT_RESET,
- &zero, sizeof(zero), complete_agent_reset_write, t);
+ &z, sizeof(z), complete_agent_reset_write_no_wait, t);
+}
- return 0;
+static void sbp2_set_generation(struct sbp2_logical_unit *lu, int generation)
+{
+ struct fw_card *card = fw_device(lu->tgt->unit->device.parent)->card;
+ unsigned long flags;
+
+ /* serialize with comparisons of lu->generation and card->generation */
+ spin_lock_irqsave(&card->lock, flags);
+ lu->generation = generation;
+ spin_unlock_irqrestore(&card->lock, flags);
+}
+
+static inline void sbp2_allow_block(struct sbp2_logical_unit *lu)
+{
+ /*
+ * We may access dont_block without taking card->lock here:
+ * All callers of sbp2_allow_block() and all callers of sbp2_unblock()
+ * are currently serialized against each other.
+ * And a wrong result in sbp2_conditionally_block()'s access of
+ * dont_block is rather harmless, it simply misses its first chance.
+ */
+ --lu->tgt->dont_block;
+}
+
+/*
+ * Blocks lu->tgt if all of the following conditions are met:
+ * - Login, INQUIRY, and high-level SCSI setup of all of the target's
+ * logical units have been finished (indicated by dont_block == 0).
+ * - lu->generation is stale.
+ *
+ * Note, scsi_block_requests() must be called while holding card->lock,
+ * otherwise it might foil sbp2_[conditionally_]unblock()'s attempt to
+ * unblock the target.
+ */
+static void sbp2_conditionally_block(struct sbp2_logical_unit *lu)
+{
+ struct sbp2_target *tgt = lu->tgt;
+ struct fw_card *card = fw_device(tgt->unit->device.parent)->card;
+ struct Scsi_Host *shost =
+ container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+ unsigned long flags;
+
+ spin_lock_irqsave(&card->lock, flags);
+ if (!tgt->dont_block && !lu->blocked &&
+ lu->generation != card->generation) {
+ lu->blocked = true;
+ if (++tgt->blocked == 1) {
+ scsi_block_requests(shost);
+ fw_notify("blocked %s\n", lu->tgt->bus_id);
+ }
+ }
+ spin_unlock_irqrestore(&card->lock, flags);
+}
+
+/*
+ * Unblocks lu->tgt as soon as all its logical units can be unblocked.
+ * Note, it is harmless to run scsi_unblock_requests() outside the
+ * card->lock protected section. On the other hand, running it inside
+ * the section might clash with shost->host_lock.
+ */
+static void sbp2_conditionally_unblock(struct sbp2_logical_unit *lu)
+{
+ struct sbp2_target *tgt = lu->tgt;
+ struct fw_card *card = fw_device(tgt->unit->device.parent)->card;
+ struct Scsi_Host *shost =
+ container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+ unsigned long flags;
+ bool unblock = false;
+
+ spin_lock_irqsave(&card->lock, flags);
+ if (lu->blocked && lu->generation == card->generation) {
+ lu->blocked = false;
+ unblock = --tgt->blocked == 0;
+ }
+ spin_unlock_irqrestore(&card->lock, flags);
+
+ if (unblock) {
+ scsi_unblock_requests(shost);
+ fw_notify("unblocked %s\n", lu->tgt->bus_id);
+ }
+}
+
+/*
+ * Prevents future blocking of tgt and unblocks it.
+ * Note, it is harmless to run scsi_unblock_requests() outside the
+ * card->lock protected section. On the other hand, running it inside
+ * the section might clash with shost->host_lock.
+ */
+static void sbp2_unblock(struct sbp2_target *tgt)
+{
+ struct fw_card *card = fw_device(tgt->unit->device.parent)->card;
+ struct Scsi_Host *shost =
+ container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+ unsigned long flags;
+
+ spin_lock_irqsave(&card->lock, flags);
+ ++tgt->dont_block;
+ spin_unlock_irqrestore(&card->lock, flags);
+
+ scsi_unblock_requests(shost);
}
static void sbp2_release_target(struct kref *kref)
@@ -621,23 +757,24 @@
struct sbp2_logical_unit *lu, *next;
struct Scsi_Host *shost =
container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
- struct fw_device *device = fw_device(tgt->unit->device.parent);
+
+ /* prevent deadlocks */
+ sbp2_unblock(tgt);
list_for_each_entry_safe(lu, next, &tgt->lu_list, link) {
- if (lu->sdev)
+ if (lu->sdev) {
scsi_remove_device(lu->sdev);
-
- if (!fw_device_is_shutdown(device))
- sbp2_send_management_orb(lu, tgt->node_id,
- lu->generation, SBP2_LOGOUT_REQUEST,
- lu->login_id, NULL);
+ scsi_device_put(lu->sdev);
+ }
+ sbp2_send_management_orb(lu, tgt->node_id, lu->generation,
+ SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
fw_core_remove_address_handler(&lu->address_handler);
list_del(&lu->link);
kfree(lu);
}
scsi_remove_host(shost);
- fw_notify("released %s\n", tgt->unit->device.bus_id);
+ fw_notify("released %s\n", tgt->bus_id);
put_device(&tgt->unit->device);
scsi_host_put(shost);
@@ -666,33 +803,43 @@
{
struct sbp2_logical_unit *lu =
container_of(work, struct sbp2_logical_unit, work.work);
- struct Scsi_Host *shost =
- container_of((void *)lu->tgt, struct Scsi_Host, hostdata[0]);
+ struct sbp2_target *tgt = lu->tgt;
+ struct fw_device *device = fw_device(tgt->unit->device.parent);
+ struct Scsi_Host *shost;
struct scsi_device *sdev;
struct scsi_lun eight_bytes_lun;
- struct fw_unit *unit = lu->tgt->unit;
- struct fw_device *device = fw_device(unit->device.parent);
struct sbp2_login_response response;
int generation, node_id, local_node_id;
+ if (fw_device_is_shutdown(device))
+ goto out;
+
generation = device->generation;
smp_rmb(); /* node_id must not be older than generation */
node_id = device->node_id;
local_node_id = device->card->node_id;
+ /* If this is a re-login attempt, log out, or we might be rejected. */
+ if (lu->sdev)
+ sbp2_send_management_orb(lu, device->node_id, generation,
+ SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
+
if (sbp2_send_management_orb(lu, node_id, generation,
SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) {
- if (lu->retries++ < 5)
+ if (lu->retries++ < 5) {
sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5));
- else
- fw_error("failed to login to %s LUN %04x\n",
- unit->device.bus_id, lu->lun);
+ } else {
+ fw_error("%s: failed to login to LUN %04x\n",
+ tgt->bus_id, lu->lun);
+ /* Let any waiting I/O fail from now on. */
+ sbp2_unblock(lu->tgt);
+ }
goto out;
}
- lu->generation = generation;
- lu->tgt->node_id = node_id;
- lu->tgt->address_high = local_node_id << 16;
+ tgt->node_id = node_id;
+ tgt->address_high = local_node_id << 16;
+ sbp2_set_generation(lu, generation);
/* Get command block agent offset and login id. */
lu->command_block_agent_address =
@@ -700,8 +847,8 @@
response.command_block_agent.low;
lu->login_id = LOGIN_RESPONSE_GET_LOGIN_ID(response);
- fw_notify("logged in to %s LUN %04x (%d retries)\n",
- unit->device.bus_id, lu->lun, lu->retries);
+ fw_notify("%s: logged in to LUN %04x (%d retries)\n",
+ tgt->bus_id, lu->lun, lu->retries);
#if 0
/* FIXME: The linux1394 sbp2 does this last step. */
@@ -711,26 +858,62 @@
PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect);
sbp2_agent_reset(lu);
+ /* This was a re-login. */
+ if (lu->sdev) {
+ sbp2_cancel_orbs(lu);
+ sbp2_conditionally_unblock(lu);
+ goto out;
+ }
+
+ if (lu->tgt->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY)
+ ssleep(SBP2_INQUIRY_DELAY);
+
memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun));
eight_bytes_lun.scsi_lun[0] = (lu->lun >> 8) & 0xff;
eight_bytes_lun.scsi_lun[1] = lu->lun & 0xff;
+ shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
sdev = __scsi_add_device(shost, 0, 0,
scsilun_to_int(&eight_bytes_lun), lu);
- if (IS_ERR(sdev)) {
- sbp2_send_management_orb(lu, node_id, generation,
- SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
- /*
- * Set this back to sbp2_login so we fall back and
- * retry login on bus reset.
- */
- PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
- } else {
- lu->sdev = sdev;
+ /*
+ * FIXME: We are unable to perform reconnects while in sbp2_login().
+ * Therefore __scsi_add_device() will get into trouble if a bus reset
+ * happens in parallel. It will either fail or leave us with an
+ * unusable sdev. As a workaround we check for this and retry the
+ * whole login and SCSI probing.
+ */
+
+ /* Reported error during __scsi_add_device() */
+ if (IS_ERR(sdev))
+ goto out_logout_login;
+
+ /* Unreported error during __scsi_add_device() */
+ smp_rmb(); /* get current card generation */
+ if (generation != device->card->generation) {
+ scsi_remove_device(sdev);
scsi_device_put(sdev);
+ goto out_logout_login;
}
+
+ /* No error during __scsi_add_device() */
+ lu->sdev = sdev;
+ sbp2_allow_block(lu);
+ goto out;
+
+ out_logout_login:
+ smp_rmb(); /* generation may have changed */
+ generation = device->generation;
+ smp_rmb(); /* node_id must not be older than generation */
+
+ sbp2_send_management_orb(lu, device->node_id, generation,
+ SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
+ /*
+ * If a bus reset happened, sbp2_update will have requeued
+ * lu->work already. Reset the work from reconnect to login.
+ */
+ PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
out:
- sbp2_target_put(lu->tgt);
+ sbp2_target_put(tgt);
}
static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
@@ -755,6 +938,8 @@
lu->sdev = NULL;
lu->lun = lun_entry & 0xffff;
lu->retries = 0;
+ lu->blocked = false;
+ ++tgt->dont_block;
INIT_LIST_HEAD(&lu->orb_list);
INIT_DELAYED_WORK(&lu->work, sbp2_login);
@@ -813,7 +998,7 @@
if (timeout > tgt->mgt_orb_timeout)
fw_notify("%s: config rom contains %ds "
"management ORB timeout, limiting "
- "to %ds\n", tgt->unit->device.bus_id,
+ "to %ds\n", tgt->bus_id,
timeout / 1000,
tgt->mgt_orb_timeout / 1000);
break;
@@ -836,12 +1021,12 @@
u32 firmware_revision)
{
int i;
- unsigned w = sbp2_param_workarounds;
+ unsigned int w = sbp2_param_workarounds;
if (w)
fw_notify("Please notify linux1394-devel@lists.sourceforge.net "
"if you need the workarounds parameter for %s\n",
- tgt->unit->device.bus_id);
+ tgt->bus_id);
if (w & SBP2_WORKAROUND_OVERRIDE)
goto out;
@@ -863,8 +1048,7 @@
if (w)
fw_notify("Workarounds for %s: 0x%x "
"(firmware_revision 0x%06x, model_id 0x%06x)\n",
- tgt->unit->device.bus_id,
- w, firmware_revision, model);
+ tgt->bus_id, w, firmware_revision, model);
tgt->workarounds = w;
}
@@ -888,6 +1072,7 @@
tgt->unit = unit;
kref_init(&tgt->kref);
INIT_LIST_HEAD(&tgt->lu_list);
+ tgt->bus_id = unit->device.bus_id;
if (fw_device_enable_phys_dma(device) < 0)
goto fail_shost_put;
@@ -938,10 +1123,13 @@
{
struct sbp2_logical_unit *lu =
container_of(work, struct sbp2_logical_unit, work.work);
- struct fw_unit *unit = lu->tgt->unit;
- struct fw_device *device = fw_device(unit->device.parent);
+ struct sbp2_target *tgt = lu->tgt;
+ struct fw_device *device = fw_device(tgt->unit->device.parent);
int generation, node_id, local_node_id;
+ if (fw_device_is_shutdown(device))
+ goto out;
+
generation = device->generation;
smp_rmb(); /* node_id must not be older than generation */
node_id = device->node_id;
@@ -950,10 +1138,17 @@
if (sbp2_send_management_orb(lu, node_id, generation,
SBP2_RECONNECT_REQUEST,
lu->login_id, NULL) < 0) {
- if (lu->retries++ >= 5) {
- fw_error("failed to reconnect to %s\n",
- unit->device.bus_id);
- /* Fall back and try to log in again. */
+ /*
+ * If reconnect was impossible even though we are in the
+ * current generation, fall back and try to log in again.
+ *
+ * We could check for "Function rejected" status, but
+ * looking at the bus generation as simpler and more general.
+ */
+ smp_rmb(); /* get current card generation */
+ if (generation == device->card->generation ||
+ lu->retries++ >= 5) {
+ fw_error("%s: failed to reconnect\n", tgt->bus_id);
lu->retries = 0;
PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
}
@@ -961,17 +1156,18 @@
goto out;
}
- lu->generation = generation;
- lu->tgt->node_id = node_id;
- lu->tgt->address_high = local_node_id << 16;
+ tgt->node_id = node_id;
+ tgt->address_high = local_node_id << 16;
+ sbp2_set_generation(lu, generation);
- fw_notify("reconnected to %s LUN %04x (%d retries)\n",
- unit->device.bus_id, lu->lun, lu->retries);
+ fw_notify("%s: reconnected to LUN %04x (%d retries)\n",
+ tgt->bus_id, lu->lun, lu->retries);
sbp2_agent_reset(lu);
sbp2_cancel_orbs(lu);
+ sbp2_conditionally_unblock(lu);
out:
- sbp2_target_put(lu->tgt);
+ sbp2_target_put(tgt);
}
static void sbp2_update(struct fw_unit *unit)
@@ -986,6 +1182,7 @@
* Iteration over tgt->lu_list is therefore safe here.
*/
list_for_each_entry(lu, &tgt->lu_list, link) {
+ sbp2_conditionally_block(lu);
lu->retries = 0;
sbp2_queue_work(lu, 0);
}
@@ -1063,7 +1260,7 @@
if (status != NULL) {
if (STATUS_GET_DEAD(*status))
- sbp2_agent_reset(orb->lu);
+ sbp2_agent_reset_no_wait(orb->lu);
switch (STATUS_GET_RESPONSE(*status)) {
case SBP2_STATUS_REQUEST_COMPLETE:
@@ -1089,6 +1286,7 @@
* or when sending the write (less likely).
*/
result = DID_BUS_BUSY << 16;
+ sbp2_conditionally_block(orb->lu);
}
dma_unmap_single(device->card->device, orb->base.request_bus,
@@ -1197,7 +1395,7 @@
struct sbp2_logical_unit *lu = cmd->device->hostdata;
struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
struct sbp2_command_orb *orb;
- unsigned max_payload;
+ unsigned int max_payload;
int retval = SCSI_MLQUEUE_HOST_BUSY;
/*
@@ -1275,6 +1473,10 @@
{
struct sbp2_logical_unit *lu = sdev->hostdata;
+ /* (Re-)Adding logical units via the SCSI stack is not supported. */
+ if (!lu)
+ return -ENOSYS;
+
sdev->allow_restart = 1;
/*
@@ -1319,7 +1521,7 @@
{
struct sbp2_logical_unit *lu = cmd->device->hostdata;
- fw_notify("sbp2_scsi_abort\n");
+ fw_notify("%s: sbp2_scsi_abort\n", lu->tgt->bus_id);
sbp2_agent_reset(lu);
sbp2_cancel_orbs(lu);
diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index 28e155a..9e2b1964 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -183,6 +183,9 @@
* Avoids access beyond actual disk limits on devices with an off-by-one bug.
* Don't use this with devices which don't have this bug.
*
+ * - delay inquiry
+ * Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry.
+ *
* - override internal blacklist
* Instead of adding to the built-in blacklist, use only the workarounds
* specified in the module load parameter.
@@ -195,6 +198,7 @@
", 36 byte inquiry = " __stringify(SBP2_WORKAROUND_INQUIRY_36)
", skip mode page 8 = " __stringify(SBP2_WORKAROUND_MODE_SENSE_8)
", fix capacity = " __stringify(SBP2_WORKAROUND_FIX_CAPACITY)
+ ", delay inquiry = " __stringify(SBP2_WORKAROUND_DELAY_INQUIRY)
", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE)
", or a combination)");
@@ -357,6 +361,11 @@
.workarounds = SBP2_WORKAROUND_INQUIRY_36 |
SBP2_WORKAROUND_MODE_SENSE_8,
},
+ /* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
+ .firmware_revision = 0x002800,
+ .model_id = 0x000000,
+ .workarounds = SBP2_WORKAROUND_DELAY_INQUIRY,
+ },
/* Initio bridges, actually only needed for some older ones */ {
.firmware_revision = 0x000200,
.model_id = SBP2_ROM_VALUE_WILDCARD,
@@ -914,6 +923,9 @@
sbp2_agent_reset(lu, 1);
sbp2_max_speed_and_size(lu);
+ if (lu->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY)
+ ssleep(SBP2_INQUIRY_DELAY);
+
error = scsi_add_device(lu->shost, 0, lu->ud->id, 0);
if (error) {
SBP2_ERR("scsi_add_device failed");
@@ -1962,6 +1974,9 @@
{
struct sbp2_lu *lu = (struct sbp2_lu *)sdev->host->hostdata[0];
+ if (sdev->lun != 0 || sdev->id != lu->ud->id || sdev->channel != 0)
+ return -ENODEV;
+
lu->sdev = sdev;
sdev->allow_restart = 1;
diff --git a/drivers/ieee1394/sbp2.h b/drivers/ieee1394/sbp2.h
index d2ecb0d..80d8e09 100644
--- a/drivers/ieee1394/sbp2.h
+++ b/drivers/ieee1394/sbp2.h
@@ -343,6 +343,8 @@
#define SBP2_WORKAROUND_INQUIRY_36 0x2
#define SBP2_WORKAROUND_MODE_SENSE_8 0x4
#define SBP2_WORKAROUND_FIX_CAPACITY 0x8
+#define SBP2_WORKAROUND_DELAY_INQUIRY 0x10
+#define SBP2_INQUIRY_DELAY 12
#define SBP2_WORKAROUND_OVERRIDE 0x100
#endif /* SBP2_H */
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 33888bb..2c23bad 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -46,7 +46,7 @@
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
- .fsync = ext4_sync_file, /* BKL held */
+ .fsync = ext4_sync_file,
.release = ext4_release_dir,
};
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bc7081f..9ae6e67 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -148,6 +148,7 @@
{
struct ext4_inode_info *ei = EXT4_I(inode);
ext4_fsblk_t bg_start;
+ ext4_fsblk_t last_block;
ext4_grpblk_t colour;
int depth;
@@ -169,8 +170,13 @@
/* OK. use inode's group */
bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
- colour = (current->pid % 16) *
+ last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+
+ if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
+ colour = (current->pid % 16) *
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+ else
+ colour = (current->pid % 16) * ((last_block - bg_start) / 16);
return bg_start + colour + block;
}
@@ -349,7 +355,7 @@
#define ext4_ext_show_leaf(inode,path)
#endif
-static void ext4_ext_drop_refs(struct ext4_ext_path *path)
+void ext4_ext_drop_refs(struct ext4_ext_path *path)
{
int depth = path->p_depth;
int i;
@@ -2168,6 +2174,10 @@
newblock = iblock - ee_block + ext_pblock(ex);
ex2 = ex;
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+
/* ex1: ee_block to iblock - 1 : uninitialized */
if (iblock > ee_block) {
ex1 = ex;
@@ -2200,16 +2210,20 @@
newdepth = ext_depth(inode);
if (newdepth != depth) {
depth = newdepth;
- path = ext4_ext_find_extent(inode, iblock, NULL);
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, iblock, path);
if (IS_ERR(path)) {
err = PTR_ERR(path);
- path = NULL;
goto out;
}
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex2 != &newex)
ex2 = ex;
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
}
allocated = max_blocks;
}
@@ -2230,9 +2244,6 @@
ex2->ee_len = cpu_to_le16(allocated);
if (ex2 != ex)
goto insert;
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
/*
* New (initialized) extent starts from the first block
* in the current extent. i.e., ex2 == ex
@@ -2276,9 +2287,22 @@
}
/*
+ * Block allocation/map/preallocation routine for extents based files
+ *
+ *
* Need to be called with
* down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
* (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
+ *
+ * return > 0, number of of blocks already mapped/allocated
+ * if create == 0 and these are pre-allocated blocks
+ * buffer head is unmapped
+ * otherwise blocks are mapped
+ *
+ * return = 0, if plain look up failed (blocks have not been allocated)
+ * buffer head is unmapped
+ *
+ * return < 0, error case.
*/
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock,
@@ -2623,7 +2647,7 @@
* modify 1 super block, 1 block bitmap and 1 group descriptor.
*/
credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
- down_write((&EXT4_I(inode)->i_data_sem));
+ mutex_lock(&inode->i_mutex);
retry:
while (ret >= 0 && ret < max_blocks) {
block = block + ret;
@@ -2634,16 +2658,17 @@
break;
}
- ret = ext4_ext_get_blocks(handle, inode, block,
+ ret = ext4_get_blocks_wrap(handle, inode, block,
max_blocks, &map_bh,
EXT4_CREATE_UNINITIALIZED_EXT, 0);
- WARN_ON(ret <= 0);
if (ret <= 0) {
- ext4_error(inode->i_sb, "ext4_fallocate",
- "ext4_ext_get_blocks returned error: "
- "inode#%lu, block=%u, max_blocks=%lu",
+#ifdef EXT4FS_DEBUG
+ WARN_ON(ret <= 0);
+ printk(KERN_ERR "%s: ext4_ext_get_blocks "
+ "returned error inode#%lu, block=%u, "
+ "max_blocks=%lu", __func__,
inode->i_ino, block, max_blocks);
- ret = -EIO;
+#endif
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
break;
@@ -2680,7 +2705,6 @@
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
- up_write((&EXT4_I(inode)->i_data_sem));
/*
* Time to update the file size.
* Update only when preallocation was requested beyond the file size.
@@ -2692,21 +2716,18 @@
* if no error, we assume preallocation succeeded
* completely
*/
- mutex_lock(&inode->i_mutex);
i_size_write(inode, offset + len);
EXT4_I(inode)->i_disksize = i_size_read(inode);
- mutex_unlock(&inode->i_mutex);
} else if (ret < 0 && nblocks) {
/* Handle partial allocation scenario */
loff_t newsize;
- mutex_lock(&inode->i_mutex);
newsize = (nblocks << blkbits) + i_size_read(inode);
i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
EXT4_I(inode)->i_disksize = i_size_read(inode);
- mutex_unlock(&inode->i_mutex);
}
}
+ mutex_unlock(&inode->i_mutex);
return ret > 0 ? ret2 : ret;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index da18a74..8036b9b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -702,7 +702,12 @@
ei->i_dir_start_lookup = 0;
ei->i_disksize = 0;
- ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL;
+ /*
+ * Don't inherit extent flag from directory. We set extent flag on
+ * newly created directory and file only if -o extent mount option is
+ * specified
+ */
+ ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
if (S_ISLNK(mode))
ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
/* dirsync only applies to directories */
@@ -745,12 +750,15 @@
goto fail_free_drop;
}
if (test_opt(sb, EXTENTS)) {
- EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
- ext4_ext_tree_init(handle, inode);
- err = ext4_update_incompat_feature(handle, sb,
- EXT4_FEATURE_INCOMPAT_EXTENTS);
- if (err)
- goto fail;
+ /* set extent flag only for directory and file */
+ if (S_ISDIR(mode) || S_ISREG(mode)) {
+ EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+ ext4_ext_tree_init(handle, inode);
+ err = ext4_update_incompat_feature(handle, sb,
+ EXT4_FEATURE_INCOMPAT_EXTENTS);
+ if (err)
+ goto fail;
+ }
}
ext4_debug("allocating inode %lu\n", inode->i_ino);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7dd9b50..945cbf6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -403,6 +403,7 @@
__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
__le32 *p;
ext4_fsblk_t bg_start;
+ ext4_fsblk_t last_block;
ext4_grpblk_t colour;
/* Try to find previous block */
@@ -420,8 +421,13 @@
* into the same cylinder group then.
*/
bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
- colour = (current->pid % 16) *
+ last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+
+ if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
+ colour = (current->pid % 16) *
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+ else
+ colour = (current->pid % 16) * ((last_block - bg_start) / 16);
return bg_start + colour;
}
@@ -768,7 +774,6 @@
*
* `handle' can be NULL if create == 0.
*
- * The BKL may not be held on entry here. Be sure to take it early.
* return > 0, # of blocks mapped or allocated.
* return = 0, if plain lookup failed.
* return < 0, error case.
@@ -903,11 +908,38 @@
*/
#define DIO_CREDITS 25
+
+/*
+ *
+ *
+ * ext4_ext4 get_block() wrapper function
+ * It will do a look up first, and returns if the blocks already mapped.
+ * Otherwise it takes the write lock of the i_data_sem and allocate blocks
+ * and store the allocated blocks in the result buffer head and mark it
+ * mapped.
+ *
+ * If file type is extents based, it will call ext4_ext_get_blocks(),
+ * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
+ * based files
+ *
+ * On success, it returns the number of blocks being mapped or allocate.
+ * if create==0 and the blocks are pre-allocated and uninitialized block,
+ * the result buffer head is unmapped. If the create ==1, it will make sure
+ * the buffer head is mapped.
+ *
+ * It returns 0 if plain look up failed (blocks have not been allocated), in
+ * that casem, buffer head is unmapped
+ *
+ * It returns the error in case of allocation failure.
+ */
int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
int create, int extend_disksize)
{
int retval;
+
+ clear_buffer_mapped(bh);
+
/*
* Try to see if we can get the block without requesting
* for new file system block.
@@ -921,12 +953,26 @@
inode, block, max_blocks, bh, 0, 0);
}
up_read((&EXT4_I(inode)->i_data_sem));
- if (!create || (retval > 0))
+
+ /* If it is only a block(s) look up */
+ if (!create)
return retval;
/*
- * We need to allocate new blocks which will result
- * in i_data update
+ * Returns if the blocks have already allocated
+ *
+ * Note that if blocks have been preallocated
+ * ext4_ext_get_block() returns th create = 0
+ * with buffer head unmapped.
+ */
+ if (retval > 0 && buffer_mapped(bh))
+ return retval;
+
+ /*
+ * New blocks allocate and/or writing to uninitialized extent
+ * will possibly result in updating i_data, so we take
+ * the write lock of i_data_sem, and call get_blocks()
+ * with create == 1 flag.
*/
down_write((&EXT4_I(inode)->i_data_sem));
/*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dd0fcfc..ef97f19 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -627,21 +627,19 @@
return block;
}
+static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+{
#if BITS_PER_LONG == 64
-#define mb_correct_addr_and_bit(bit, addr) \
-{ \
- bit += ((unsigned long) addr & 7UL) << 3; \
- addr = (void *) ((unsigned long) addr & ~7UL); \
-}
+ *bit += ((unsigned long) addr & 7UL) << 3;
+ addr = (void *) ((unsigned long) addr & ~7UL);
#elif BITS_PER_LONG == 32
-#define mb_correct_addr_and_bit(bit, addr) \
-{ \
- bit += ((unsigned long) addr & 3UL) << 3; \
- addr = (void *) ((unsigned long) addr & ~3UL); \
-}
+ *bit += ((unsigned long) addr & 3UL) << 3;
+ addr = (void *) ((unsigned long) addr & ~3UL);
#else
#error "how many bits you are?!"
#endif
+ return addr;
+}
static inline int mb_test_bit(int bit, void *addr)
{
@@ -649,34 +647,54 @@
* ext4_test_bit on architecture like powerpc
* needs unsigned long aligned address
*/
- mb_correct_addr_and_bit(bit, addr);
+ addr = mb_correct_addr_and_bit(&bit, addr);
return ext4_test_bit(bit, addr);
}
static inline void mb_set_bit(int bit, void *addr)
{
- mb_correct_addr_and_bit(bit, addr);
+ addr = mb_correct_addr_and_bit(&bit, addr);
ext4_set_bit(bit, addr);
}
static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
{
- mb_correct_addr_and_bit(bit, addr);
+ addr = mb_correct_addr_and_bit(&bit, addr);
ext4_set_bit_atomic(lock, bit, addr);
}
static inline void mb_clear_bit(int bit, void *addr)
{
- mb_correct_addr_and_bit(bit, addr);
+ addr = mb_correct_addr_and_bit(&bit, addr);
ext4_clear_bit(bit, addr);
}
static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
{
- mb_correct_addr_and_bit(bit, addr);
+ addr = mb_correct_addr_and_bit(&bit, addr);
ext4_clear_bit_atomic(lock, bit, addr);
}
+static inline int mb_find_next_zero_bit(void *addr, int max, int start)
+{
+ int fix = 0;
+ addr = mb_correct_addr_and_bit(&fix, addr);
+ max += fix;
+ start += fix;
+
+ return ext4_find_next_zero_bit(addr, max, start) - fix;
+}
+
+static inline int mb_find_next_bit(void *addr, int max, int start)
+{
+ int fix = 0;
+ addr = mb_correct_addr_and_bit(&fix, addr);
+ max += fix;
+ start += fix;
+
+ return ext4_find_next_bit(addr, max, start) - fix;
+}
+
static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
{
char *bb;
@@ -906,7 +924,7 @@
unsigned short chunk;
unsigned short border;
- BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb));
+ BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
border = 2 << sb->s_blocksize_bits;
@@ -946,12 +964,12 @@
/* initialize buddy from bitmap which is aggregation
* of on-disk bitmap and preallocations */
- i = ext4_find_next_zero_bit(bitmap, max, 0);
+ i = mb_find_next_zero_bit(bitmap, max, 0);
grp->bb_first_free = i;
while (i < max) {
fragments++;
first = i;
- i = ext4_find_next_bit(bitmap, max, i);
+ i = mb_find_next_bit(bitmap, max, i);
len = i - first;
free += len;
if (len > 1)
@@ -959,7 +977,7 @@
else
grp->bb_counters[0]++;
if (i < max)
- i = ext4_find_next_zero_bit(bitmap, max, i);
+ i = mb_find_next_zero_bit(bitmap, max, i);
}
grp->bb_fragments = fragments;
@@ -967,6 +985,10 @@
ext4_error(sb, __FUNCTION__,
"EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
group, free, grp->bb_free);
+ /*
+ * If we intent to continue, we consider group descritor
+ * corrupt and update bb_free using bitmap value
+ */
grp->bb_free = free;
}
@@ -1778,7 +1800,7 @@
buddy = mb_find_buddy(e4b, i, &max);
BUG_ON(buddy == NULL);
- k = ext4_find_next_zero_bit(buddy, max, 0);
+ k = mb_find_next_zero_bit(buddy, max, 0);
BUG_ON(k >= max);
ac->ac_found++;
@@ -1818,11 +1840,11 @@
i = e4b->bd_info->bb_first_free;
while (free && ac->ac_status == AC_STATUS_CONTINUE) {
- i = ext4_find_next_zero_bit(bitmap,
+ i = mb_find_next_zero_bit(bitmap,
EXT4_BLOCKS_PER_GROUP(sb), i);
if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
/*
- * IF we corrupt the bitmap we won't find any
+ * IF we have corrupt bitmap, we won't find any
* free blocks even though group info says we
* we have free blocks
*/
@@ -1838,6 +1860,12 @@
ext4_error(sb, __FUNCTION__, "%d free blocks as per "
"group info. But got %d blocks\n",
free, ex.fe_len);
+ /*
+ * The number of free blocks differs. This mostly
+ * indicate that the bitmap is corrupt. So exit
+ * without claiming the space.
+ */
+ break;
}
ext4_mb_measure_extent(ac, &ex, e4b);
@@ -3740,10 +3768,10 @@
}
while (bit < end) {
- bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit);
+ bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
if (bit >= end)
break;
- next = ext4_find_next_bit(bitmap_bh->b_data, end, bit);
+ next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
if (next > end)
next = end;
start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
@@ -3771,6 +3799,10 @@
(unsigned long) pa->pa_len);
ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
free, pa->pa_free);
+ /*
+ * pa is already deleted so we use the value obtained
+ * from the bitmap and continue.
+ */
}
atomic_add(free, &sbi->s_mb_discarded);
if (ac)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 8c6c685..5c1e27d 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -43,6 +43,7 @@
if (IS_ERR(path)) {
retval = PTR_ERR(path);
+ path = NULL;
goto err_out;
}
@@ -74,6 +75,10 @@
}
retval = ext4_ext_insert_extent(handle, inode, path, &newext);
err_out:
+ if (path) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
lb->first_pblock = 0;
return retval;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a9347fb..28aa2ed 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1804,12 +1804,8 @@
inode->i_fop = &ext4_dir_operations;
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
dir_block = ext4_bread (handle, inode, 0, 1, &err);
- if (!dir_block) {
- ext4_dec_count(handle, inode); /* is this nlink == 0? */
- ext4_mark_inode_dirty(handle, inode);
- iput (inode);
- goto out_stop;
- }
+ if (!dir_block)
+ goto out_clear_inode;
BUFFER_TRACE(dir_block, "get_write_access");
ext4_journal_get_write_access(handle, dir_block);
de = (struct ext4_dir_entry_2 *) dir_block->b_data;
@@ -1832,7 +1828,8 @@
ext4_mark_inode_dirty(handle, inode);
err = ext4_add_entry (handle, dentry, inode);
if (err) {
- inode->i_nlink = 0;
+out_clear_inode:
+ clear_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
@@ -2164,7 +2161,7 @@
dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
- ext4_dec_count(handle, inode);
+ drop_nlink(inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
inode->i_ctime = ext4_current_time(inode);
@@ -2214,7 +2211,7 @@
err = __page_symlink(inode, symname, l,
mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
if (err) {
- ext4_dec_count(handle, inode);
+ clear_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
@@ -2223,7 +2220,6 @@
inode->i_op = &ext4_fast_symlink_inode_operations;
memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
inode->i_size = l-1;
- EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
}
EXT4_I(inode)->i_disksize = inode->i_size;
err = ext4_add_nondir(handle, dentry, inode);
@@ -2407,7 +2403,7 @@
ext4_dec_count(handle, old_dir);
if (new_inode) {
/* checked empty_dir above, can't have another parent,
- * ext3_dec_count() won't work for many-linked dirs */
+ * ext4_dec_count() won't work for many-linked dirs */
new_inode->i_nlink = 0;
} else {
ext4_inc_count(handle, new_dir);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9477a2b..e29efa0 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1037,6 +1037,7 @@
ext4_warning(sb, __FUNCTION__,
"multiple resizers run on filesystem!");
unlock_super(sb);
+ ext4_journal_stop(handle);
err = -EBUSY;
goto exit_put;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 96ee899..91a1bd6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -314,9 +314,12 @@
static int lstats_show_proc(struct seq_file *m, void *v)
{
int i;
- struct task_struct *task = m->private;
- seq_puts(m, "Latency Top version : v0.1\n");
+ struct inode *inode = m->private;
+ struct task_struct *task = get_proc_task(inode);
+ if (!task)
+ return -ESRCH;
+ seq_puts(m, "Latency Top version : v0.1\n");
for (i = 0; i < 32; i++) {
if (task->latency_record[i].backtrace[0]) {
int q;
@@ -341,32 +344,24 @@
}
}
+ put_task_struct(task);
return 0;
}
static int lstats_open(struct inode *inode, struct file *file)
{
- int ret;
- struct seq_file *m;
- struct task_struct *task = get_proc_task(inode);
-
- ret = single_open(file, lstats_show_proc, NULL);
- if (!ret) {
- m = file->private_data;
- m->private = task;
- }
- return ret;
+ return single_open(file, lstats_show_proc, inode);
}
static ssize_t lstats_write(struct file *file, const char __user *buf,
size_t count, loff_t *offs)
{
- struct seq_file *m;
- struct task_struct *task;
+ struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
- m = file->private_data;
- task = m->private;
+ if (!task)
+ return -ESRCH;
clear_all_latency_tracing(task);
+ put_task_struct(task);
return count;
}
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 4822884..fab0b6d 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -25,6 +25,109 @@
* XFS bit manipulation routines, used in non-realtime code.
*/
+#ifndef HAVE_ARCH_HIGHBIT
+/*
+ * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+static const char xfs_highbit[256] = {
+ -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */
+ 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */
+ 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */
+ 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */
+ 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */
+ 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */
+ 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */
+ 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */
+ 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */
+ 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */
+ 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */
+ 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */
+ 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */
+ 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */
+ 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */
+ 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */
+ 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */
+};
+#endif
+
+/*
+ * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
+ */
+inline int
+xfs_highbit32(
+ __uint32_t v)
+{
+#ifdef HAVE_ARCH_HIGHBIT
+ return highbit32(v);
+#else
+ int i;
+
+ if (v & 0xffff0000)
+ if (v & 0xff000000)
+ i = 24;
+ else
+ i = 16;
+ else if (v & 0x0000ffff)
+ if (v & 0x0000ff00)
+ i = 8;
+ else
+ i = 0;
+ else
+ return -1;
+ return i + xfs_highbit[(v >> i) & 0xff];
+#endif
+}
+
+/*
+ * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
+ */
+int
+xfs_lowbit64(
+ __uint64_t v)
+{
+ __uint32_t w = (__uint32_t)v;
+ int n = 0;
+
+ if (w) { /* lower bits */
+ n = ffs(w);
+ } else { /* upper bits */
+ w = (__uint32_t)(v >> 32);
+ if (w && (n = ffs(w)))
+ n += 32;
+ }
+ return n - 1;
+}
+
+/*
+ * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
+ */
+int
+xfs_highbit64(
+ __uint64_t v)
+{
+ __uint32_t h = (__uint32_t)(v >> 32);
+
+ if (h)
+ return xfs_highbit32(h) + 32;
+ return xfs_highbit32((__uint32_t)v);
+}
+
+
/*
* Return whether bitmap is empty.
* Size is number of words in the bitmap, which is padded to word boundary
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 325a007..082641a 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -47,30 +47,13 @@
}
/* Get high bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_highbit32(__uint32_t v)
-{
- return fls(v) - 1;
-}
-
-/* Get high bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_highbit64(__uint64_t v)
-{
- return fls64(v) - 1;
-}
-
-/* Get low bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_lowbit32(__uint32_t v)
-{
- __uint32_t t = v;
- return (t) ? find_first_bit((unsigned long *)&t, 32) : -1;
-}
+extern int xfs_highbit32(__uint32_t v);
/* Get low bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_lowbit64(__uint64_t v)
-{
- __uint64_t t = v;
- return (t) ? find_first_bit((unsigned long *)&t, 64) : -1;
-}
+extern int xfs_lowbit64(__uint64_t v);
+
+/* Get high bit set out of 64-bit argument, -1 if none set */
+extern int xfs_highbit64(__uint64_t);
/* Return whether bitmap is empty (1 == empty) */
extern int xfs_bitmap_empty(uint *map, uint size);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ca83ddf..47082c0 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -73,6 +73,18 @@
*/
/*
+ * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
+ */
+STATIC int
+xfs_lowbit32(
+ __uint32_t v)
+{
+ if (v)
+ return ffs(v) - 1;
+ return -1;
+}
+
+/*
* Allocate space to the bitmap or summary file, and zero it, for growfs.
*/
STATIC int /* error */
@@ -432,7 +444,6 @@
}
bbno = XFS_BITTOBLOCK(mp, bno);
i = 0;
- ASSERT(minlen != 0);
log2len = xfs_highbit32(minlen);
/*
* Loop over all bitmap blocks (bbno + i is current block).
@@ -601,8 +612,6 @@
xfs_suminfo_t sum; /* summary information for extents */
ASSERT(minlen % prod == 0 && maxlen % prod == 0);
- ASSERT(maxlen != 0);
-
/*
* Loop over all the levels starting with maxlen.
* At each level, look at all the bitmap blocks, to see if there
@@ -660,9 +669,6 @@
*rtblock = NULLRTBLOCK;
return 0;
}
- ASSERT(minlen != 0);
- ASSERT(maxlen != 0);
-
/*
* Loop over sizes, from maxlen down to minlen.
* This time, when we do the allocations, allow smaller ones
@@ -1948,7 +1954,6 @@
nsbp->sb_blocksize * nsbp->sb_rextsize);
nsbp->sb_rextents = nsbp->sb_rblocks;
do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
- ASSERT(nsbp->sb_rextents != 0);
nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
nrsumsize =
diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h
index cd9f894..c9952ea 100644
--- a/include/asm-x86/futex.h
+++ b/include/asm-x86/futex.h
@@ -102,6 +102,13 @@
static inline int
futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
{
+
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
+ /* Real i386 machines have no cmpxchg instruction */
+ if (boot_cpu_data.x86 == 3)
+ return -ENOSYS;
+#endif
+
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT;
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index 4d9367b..9b17571 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
@@ -23,6 +23,17 @@
/* Found in switcher.S */
extern unsigned long default_idt_entries[];
+/* Declarations for definitions in lguest_guest.S */
+extern char lguest_noirq_start[], lguest_noirq_end[];
+extern const char lgstart_cli[], lgend_cli[];
+extern const char lgstart_sti[], lgend_sti[];
+extern const char lgstart_popf[], lgend_popf[];
+extern const char lgstart_pushf[], lgend_pushf[];
+extern const char lgstart_iret[], lgend_iret[];
+
+extern void lguest_iret(void);
+extern void lguest_init(void);
+
struct lguest_regs
{
/* Manually saved part. */
diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
index fec025c..e3b2bce0 100644
--- a/include/asm-x86/nops.h
+++ b/include/asm-x86/nops.h
@@ -3,17 +3,29 @@
/* Define nops for use with alternative() */
-/* generic versions from gas */
-#define GENERIC_NOP1 ".byte 0x90\n"
-#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
-#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
-#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
-#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
-#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
+/* generic versions from gas
+ 1: nop
+ 2: movl %esi,%esi
+ 3: leal 0x00(%esi),%esi
+ 4: leal 0x00(,%esi,1),%esi
+ 6: leal 0x00000000(%esi),%esi
+ 7: leal 0x00000000(,%esi,1),%esi
+*/
+#define GENERIC_NOP1 ".byte 0x90\n"
+#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
+#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
+#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
+#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
+#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
-/* Opteron 64bit nops */
+/* Opteron 64bit nops
+ 1: nop
+ 2: osp nop
+ 3: osp osp nop
+ 4: osp osp osp nop
+*/
#define K8_NOP1 GENERIC_NOP1
#define K8_NOP2 ".byte 0x66,0x90\n"
#define K8_NOP3 ".byte 0x66,0x66,0x90\n"
@@ -23,19 +35,35 @@
#define K8_NOP7 K8_NOP4 K8_NOP3
#define K8_NOP8 K8_NOP4 K8_NOP4
-/* K7 nops */
-/* uses eax dependencies (arbitary choice) */
-#define K7_NOP1 GENERIC_NOP1
+/* K7 nops
+ uses eax dependencies (arbitary choice)
+ 1: nop
+ 2: movl %eax,%eax
+ 3: leal (,%eax,1),%eax
+ 4: leal 0x00(,%eax,1),%eax
+ 6: leal 0x00000000(%eax),%eax
+ 7: leal 0x00000000(,%eax,1),%eax
+*/
+#define K7_NOP1 GENERIC_NOP1
#define K7_NOP2 ".byte 0x8b,0xc0\n"
#define K7_NOP3 ".byte 0x8d,0x04,0x20\n"
#define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n"
#define K7_NOP5 K7_NOP4 ASM_NOP1
#define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n"
-#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n"
-#define K7_NOP8 K7_NOP7 ASM_NOP1
+#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n"
+#define K7_NOP8 K7_NOP7 ASM_NOP1
-/* P6 nops */
-/* uses eax dependencies (Intel-recommended choice) */
+/* P6 nops
+ uses eax dependencies (Intel-recommended choice)
+ 1: nop
+ 2: osp nop
+ 3: nopl (%eax)
+ 4: nopl 0x00(%eax)
+ 5: nopl 0x00(%eax,%eax,1)
+ 6: osp nopl 0x00(%eax,%eax,1)
+ 7: nopl 0x00000000(%eax)
+ 8: nopl 0x00000000(%eax,%eax,1)
+*/
#define P6_NOP1 GENERIC_NOP1
#define P6_NOP2 ".byte 0x66,0x90\n"
#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n"
@@ -63,9 +91,7 @@
#define ASM_NOP6 K7_NOP6
#define ASM_NOP7 K7_NOP7
#define ASM_NOP8 K7_NOP8
-#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
- defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
- defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4)
+#elif defined(CONFIG_X86_P6_NOP)
#define ASM_NOP1 P6_NOP1
#define ASM_NOP2 P6_NOP2
#define ASM_NOP3 P6_NOP3
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index f7393bc..1435460 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -47,8 +47,12 @@
#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 48
-#define KERNEL_TEXT_SIZE (40*1024*1024)
-#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
+/*
+ * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
+ * arch/x86/kernel/head_64.S), and it is mapped here:
+ */
+#define KERNEL_IMAGE_SIZE (128*1024*1024)
+#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
#ifndef __ASSEMBLY__
void clear_page(void *page);
diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h
index 532d13a..0a90e1c 100644
--- a/include/linux/elfcore-compat.h
+++ b/include/linux/elfcore-compat.h
@@ -45,8 +45,8 @@
char pr_zomb;
char pr_nice;
compat_ulong_t pr_flag;
- compat_uid_t pr_uid;
- compat_gid_t pr_gid;
+ __compat_uid_t pr_uid;
+ __compat_gid_t pr_gid;
compat_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index 697da4b..1285c58 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -227,5 +227,6 @@
ext4_lblk_t *, ext4_fsblk_t *);
extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
+extern void ext4_ext_drop_refs(struct ext4_ext_path *);
#endif /* _LINUX_EXT4_EXTENTS */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e217d18..2c9621f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -242,6 +242,7 @@
extern void sched_init(void);
extern void sched_init_smp(void);
+extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
extern void init_idle_bootup_task(struct task_struct *idle);
@@ -1189,7 +1190,7 @@
int softirq_context;
#endif
#ifdef CONFIG_LOCKDEP
-# define MAX_LOCK_DEPTH 30UL
+# define MAX_LOCK_DEPTH 48UL
u64 curr_chain_key;
int lockdep_depth;
struct held_lock held_locks[MAX_LOCK_DEPTH];
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3574379..81a4e4a 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -779,6 +779,10 @@
* parallel walking of the hash-list safe:
*/
list_add_tail_rcu(&class->hash_entry, hash_head);
+ /*
+ * Add it to the global list of classes:
+ */
+ list_add_tail_rcu(&class->lock_entry, &all_lock_classes);
if (verbose(class)) {
graph_unlock();
@@ -2282,10 +2286,6 @@
return 0;
break;
case LOCK_USED:
- /*
- * Add it to the global list of classes:
- */
- list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
debug_atomic_dec(&nr_unused_locks);
break;
default:
diff --git a/kernel/printk.c b/kernel/printk.c
index bee3610..9adc2a4 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -666,7 +666,7 @@
}
/* Emit the output into the temporary buffer */
printed_len += vscnprintf(printk_buf + printed_len,
- sizeof(printk_buf), fmt, args);
+ sizeof(printk_buf) - printed_len, fmt, args);
/*
* Copy the output into log_buf. If the caller didn't provide
diff --git a/kernel/sched.c b/kernel/sched.c
index b387a8d..f06950c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -668,6 +668,8 @@
*/
unsigned int sysctl_sched_rt_period = 1000000;
+static __read_mostly int scheduler_running;
+
/*
* part of the period that we allow rt tasks to run in us.
* default: 0.95s
@@ -689,14 +691,16 @@
unsigned long flags;
struct rq *rq;
- local_irq_save(flags);
- rq = cpu_rq(cpu);
/*
* Only call sched_clock() if the scheduler has already been
* initialized (some code might call cpu_clock() very early):
*/
- if (rq->idle)
- update_rq_clock(rq);
+ if (unlikely(!scheduler_running))
+ return 0;
+
+ local_irq_save(flags);
+ rq = cpu_rq(cpu);
+ update_rq_clock(rq);
now = rq->clock;
local_irq_restore(flags);
@@ -3885,7 +3889,7 @@
asmlinkage void __sched schedule(void)
{
struct task_struct *prev, *next;
- long *switch_count;
+ unsigned long *switch_count;
struct rq *rq;
int cpu;
@@ -7284,6 +7288,8 @@
* During early bootup we pretend to be a normal task:
*/
current->sched_class = &fair_sched_class;
+
+ scheduler_running = 1;
}
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6c091d6..c8e6492 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -202,17 +202,12 @@
static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
- struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
- struct sched_entity *se = NULL;
- struct rb_node *parent;
+ struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
- while (*link) {
- parent = *link;
- se = rb_entry(parent, struct sched_entity, run_node);
- link = &parent->rb_right;
- }
+ if (!last)
+ return NULL;
- return se;
+ return rb_entry(last, struct sched_entity, run_node);
}
/**************************************************************