diff --git a/.mailmap b/.mailmap
index 97f7b4f..4e83e7b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -32,6 +32,7 @@
 Corey Minyard <minyard@acm.org>
 David Brownell <david-b@pacbell.net>
 David Woodhouse <dwmw2@shinybook.infradead.org>
+Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
 Domen Puncer <domen@coderock.org>
 Douglas Gilbert <dougg@torque.net>
 Ed L. Cashin <ecashin@coraid.com>
diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
index 7a16fe1..9fe91c0 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -6,7 +6,6 @@
 		internal state of the kernel memory blocks. Files could be
 		added or removed dynamically to represent hot-add/remove
 		operations.
-
 Users:		hotplug memory add/remove tools
 		https://w3.opensource.ibm.com/projects/powerpc-utils/
 
@@ -19,6 +18,56 @@
 		This is useful for a user-level agent to determine
 		identify removable sections of the memory before attempting
 		potentially expensive hot-remove memory operation
-
 Users:		hotplug memory remove tools
 		https://w3.opensource.ibm.com/projects/powerpc-utils/
+
+What:		/sys/devices/system/memory/memoryX/phys_device
+Date:		September 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/phys_device
+		is read-only and is designed to show the name of physical
+		memory device.  Implementation is currently incomplete.
+
+What:		/sys/devices/system/memory/memoryX/phys_index
+Date:		September 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/phys_index
+		is read-only and contains the section ID in hexadecimal
+		which is equivalent to decimal X contained in the
+		memory section directory name.
+
+What:		/sys/devices/system/memory/memoryX/state
+Date:		September 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/state
+		is read-write.  When read, it's contents show the
+		online/offline state of the memory section.  When written,
+		root can toggle the the online/offline state of a removable
+		memory section (see removable file description above)
+		using the following commands.
+		# echo online > /sys/devices/system/memory/memoryX/state
+		# echo offline > /sys/devices/system/memory/memoryX/state
+
+		For example, if /sys/devices/system/memory/memory22/removable
+		contains a value of 1 and
+		/sys/devices/system/memory/memory22/state contains the
+		string "online" the following command can be executed by
+		by root to offline that section.
+		# echo offline > /sys/devices/system/memory/memory22/state
+Users:		hotplug memory remove tools
+		https://w3.opensource.ibm.com/projects/powerpc-utils/
+
+What:		/sys/devices/system/node/nodeX/memoryY
+Date:		September 2008
+Contact:	Gary Hade <garyhade@us.ibm.com>
+Description:
+		When CONFIG_NUMA is enabled
+		/sys/devices/system/node/nodeX/memoryY is a symbolic link that
+		points to the corresponding /sys/devices/system/memory/memoryY
+		memory section directory.  For example, the following symbolic
+		link is created for memory section 9 on node0.
+		/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
+
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index c74fec8..b2a4d6d 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -26,7 +26,7 @@
 transfer.
 
 The following API will work of course even on platforms where no such
-hardware exists, see e.g. include/asm-i386/pci.h for how it is implemented on
+hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on
 top of the virt_to_bus interface.
 
 First of all, you should make sure
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index ccec553..cfbfa15 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -397,7 +397,7 @@
 };
 
 locking rules:
-	All except ->poll() may block.
+	All may block.
 			BKL
 llseek:			no	(see below)
 read:			no
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 71df353..32e9463 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1385,6 +1385,15 @@
 to retain dentry and inode caches.  Increasing vfs_cache_pressure beyond 100
 causes the kernel to prefer to reclaim dentries and inodes.
 
+dirty_background_bytes
+----------------------
+
+Contains the amount of dirty memory at which the pdflush background writeback
+daemon will start writeback.
+
+If dirty_background_bytes is written, dirty_background_ratio becomes a function
+of its value (dirty_background_bytes / the amount of dirtyable system memory).
+
 dirty_background_ratio
 ----------------------
 
@@ -1393,14 +1402,29 @@
 pages at which the pdflush background writeback daemon will start writing out
 dirty data.
 
+If dirty_background_ratio is written, dirty_background_bytes becomes a function
+of its value (dirty_background_ratio * the amount of dirtyable system memory).
+
+dirty_bytes
+-----------
+
+Contains the amount of dirty memory at which a process generating disk writes
+will itself start writeback.
+
+If dirty_bytes is written, dirty_ratio becomes a function of its value
+(dirty_bytes / the amount of dirtyable system memory).
+
 dirty_ratio
------------------
+-----------
 
 Contains, as a percentage of the dirtyable system memory (free pages + mapped
 pages + file cache, not including locked pages and HugePages), the number of
 pages at which a process which is generating disk writes will itself start
 writing out dirty data.
 
+If dirty_ratio is written, dirty_bytes becomes a function of its value
+(dirty_ratio * the amount of dirtyable system memory).
+
 dirty_writeback_centisecs
 -------------------------
 
diff --git a/Documentation/hwmon/adt7470 b/Documentation/hwmon/adt7470
index 75d13ca..8ce4aa0 100644
--- a/Documentation/hwmon/adt7470
+++ b/Documentation/hwmon/adt7470
@@ -31,15 +31,11 @@
 limit values. The ADT7470 will signal an ALARM if any measured value exceeds
 either limit.
 
-The ADT7470 DOES NOT sample all inputs continuously.  A single pin on the
-ADT7470 is connected to a multitude of thermal diodes, but the chip must be
-instructed explicitly to read the multitude of diodes.  If you want to use
-automatic fan control mode, you must manually read any of the temperature
-sensors or the fan control algorithm will not run.  The chip WILL NOT DO THIS
-AUTOMATICALLY; this must be done from userspace.  This may be a bug in the chip
-design, given that many other AD chips take care of this.  The driver will not
-read the registers more often than once every 5 seconds.  Further,
-configuration data is only read once per minute.
+The ADT7470 samples all inputs continuously.  A kernel thread is started up for
+the purpose of periodically querying the temperature sensors, thus allowing the
+automatic fan pwm control to set the fan speed.  The driver will not read the
+registers more often than once every 5 seconds.  Further, configuration data is
+only read once per minute.
 
 Special Features
 ----------------
@@ -72,5 +68,6 @@
 Notes
 -----
 
-As stated above, the temperature inputs must be read periodically from
-userspace in order for the automatic pwm algorithm to run.
+The temperature inputs no longer need to be read periodically from userspace in
+order for the automatic pwm algorithm to run.  This was the case for earlier
+versions of the driver.
diff --git a/Documentation/ide/warm-plug-howto.txt b/Documentation/ide/warm-plug-howto.txt
index d588546..98152bc 100644
--- a/Documentation/ide/warm-plug-howto.txt
+++ b/Documentation/ide/warm-plug-howto.txt
@@ -11,3 +11,8 @@
 # echo -n "1" > /sys/class/ide_port/idex/scan
 
 done
+
+NOTE: please make sure that partitions are unmounted and that there are
+no other active references to devices before doing "delete_devices" step,
+also do not attempt "scan" step on devices currently in use -- otherwise
+results may be unpredictable and lead to data loss if you're unlucky
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 8246991..f1d6399 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -84,7 +84,7 @@
 'B'	C0-FF				advanced bbus
 					<mailto:maassen@uni-freiburg.de>
 'C'	all	linux/soundcard.h
-'D'	all	asm-s390/dasd.h
+'D'	all	arch/s390/include/asm/dasd.h
 'E'	all	linux/input.h
 'F'	all	linux/fb.h
 'H'	all	linux/hiddev.h
@@ -105,7 +105,7 @@
 'S'	80-81	scsi/scsi_ioctl.h	conflict!
 'S'	82-FF	scsi/scsi.h		conflict!
 'T'	all	linux/soundcard.h	conflict!
-'T'	all	asm-i386/ioctls.h	conflict!
+'T'	all	arch/x86/include/asm/ioctls.h	conflict!
 'U'	00-EF	linux/drivers/usb/usb.h
 'V'	all	linux/vt.h
 'W'	00-1F	linux/watchdog.h	conflict!
@@ -120,7 +120,7 @@
 					<mailto:natalia@nikhefk.nikhef.nl>
 'c'	00-7F	linux/comstats.h	conflict!
 'c'	00-7F	linux/coda.h		conflict!
-'c'	80-9F	asm-s390/chsc.h
+'c'	80-9F	arch/s390/include/asm/chsc.h
 'd'	00-FF	linux/char/drm/drm/h	conflict!
 'd'	00-DF	linux/video_decoder.h	conflict!
 'd'	F0-FF	linux/digi1.h
@@ -170,7 +170,7 @@
 					<mailto:oe@port.de>
 0x80	00-1F	linux/fb.h
 0x81	00-1F	linux/videotext.h
-0x89	00-06	asm-i386/sockios.h
+0x89	00-06	arch/x86/include/asm/sockios.h
 0x89	0B-DF	linux/sockios.h
 0x89	E0-EF	linux/sockios.h		SIOCPROTOPRIVATE range
 0x89	F0-FF	linux/sockios.h		SIOCDEVPRIVATE range
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
index c6841ee..d73fbd2 100644
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/Documentation/kernel-doc-nano-HOWTO.txt
@@ -71,6 +71,11 @@
 this opening short function description line, with no intervening
 empty comment lines.
 
+If a function parameter is "..." (varargs), it should be listed in
+kernel-doc notation as:
+ * @...: description
+
+
 Example kernel-doc data structure comment.
 
 /**
@@ -282,6 +287,32 @@
 };
 
 
+Including documentation blocks in source files
+----------------------------------------------
+
+To facilitate having source code and comments close together, you can
+include kernel-doc documentation blocks that are free-form comments
+instead of being kernel-doc for functions, structures, unions,
+enums, or typedefs.  This could be used for something like a
+theory of operation for a driver or library code, for example.
+
+This is done by using a DOC: section keyword with a section title.  E.g.:
+
+/**
+ * DOC: Theory of Operation
+ *
+ * The whizbang foobar is a dilly of a gizmo.  It can do whatever you
+ * want it to do, at any time.  It reads your mind.  Here's how it works.
+ *
+ * foo bar splat
+ *
+ * The only drawback to this gizmo is that is can sometimes damage
+ * hardware, software, or its subject(s).
+ */
+
+DOC: sections are used in SGML templates files as indicated below.
+
+
 How to make new SGML template files
 -----------------------------------
 
@@ -302,6 +333,9 @@
 !F<filename> <function [functions...]> is replaced by the
 documentation, in <filename>, for the functions listed.
 
+!P<filename> <section title> is replaced by the contents of the DOC:
+section titled <section title> from <filename>.
+Spaces are allowed in <section title>; do not quote the <section title>.
 
 Tim.
 */ <twaugh@redhat.com>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a2d8805..0b3f671 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -469,8 +469,8 @@
 
 	clearcpuid=BITNUM [X86]
 			Disable CPUID feature X for the kernel. See
-			include/asm-x86/cpufeature.h for the valid bit numbers.
-			Note the Linux specific bits are not necessarily
+			arch/x86/include/asm/cpufeature.h for the valid bit
+			numbers. Note the Linux specific bits are not necessarily
 			stable over kernel options, but the vendor specific
 			ones should be.
 			Also note that user programs calling CPUID directly
@@ -551,6 +551,11 @@
 			not work reliably with all consoles, but is known
 			to work with serial and VGA consoles.
 
+	coredump_filter=
+			[KNL] Change the default value for
+			/proc/<pid>/coredump_filter.
+			See also Documentation/filesystems/proc.txt.
+
 	cpcihp_generic=	[HW,PCI] Generic port I/O CompactPCI driver
 			Format:
 			<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
@@ -1117,6 +1122,8 @@
 			If there are multiple matching configurations changing
 			the same attribute, the last one is used.
 
+	lmb=debug	[KNL] Enable lmb debug messages.
+
 	load_ramdisk=	[RAM] List of ramdisks to load from floppy
 			See Documentation/blockdev/ramdisk.txt.
 
@@ -1569,6 +1576,10 @@
 
 	nr_uarts=	[SERIAL] maximum number of UARTs to be registered.
 
+	ohci1394_dma=early	[HW] enable debugging via the ohci1394 driver.
+			See Documentation/debugging-via-ohci1394.txt for more
+			info.
+
 	olpc_ec_timeout= [OLPC] ms delay when issuing EC commands
 			Rather than timing out after 20 ms if an EC
 			command is not properly ACKed, override the length
@@ -1793,10 +1804,10 @@
 			autoconfiguration.
 			Ranges are in pairs (memory base and size).
 
-	dynamic_printk
-			Enables pr_debug()/dev_dbg() calls if
-			CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. These can also
-			be switched on/off via <debugfs>/dynamic_printk/modules
+	dynamic_printk	Enables pr_debug()/dev_dbg() calls if
+			CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled.
+			These can also be switched on/off via
+			<debugfs>/dynamic_printk/modules
 
 	print-fatal-signals=
 			[KNL] debug: print fatal signals
@@ -1884,7 +1895,7 @@
 
 	reboot=		[BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
-			See arch/*/kernel/reboot.c or arch/*/kernel/process.c			
+			See arch/*/kernel/reboot.c or arch/*/kernel/process.c
 
 	relax_domain_level=
 			[KNL, SMP] Set scheduler's default relax_domain_level.
@@ -2432,8 +2443,8 @@
 			Format:
 			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
 
-	norandmaps	Don't use address space randomization
-			Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space
+	norandmaps	Don't use address space randomization.  Equivalent to
+			echo 0 > /proc/sys/kernel/randomize_va_space
 
 ______________________________________________________________________
 
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index a79633d..48b3de9 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -497,7 +497,10 @@
 The second column identifies the type of probe (k - kprobe, r - kretprobe
 and j - jprobe), while the third column specifies the symbol+offset of
 the probe. If the probed function belongs to a module, the module name
-is also specified.
+is also specified. Following columns show probe status. If the probe is on
+a virtual address that is no longer valid (module init sections, module
+virtual addresses that correspond to modules that've been unloaded),
+such probes are marked with [GONE].
 
 /debug/kprobes/enabled: Turn kprobes ON/OFF
 
diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt
index 9507002..505f196 100644
--- a/Documentation/magic-number.txt
+++ b/Documentation/magic-number.txt
@@ -125,14 +125,14 @@
 ROUTER_MAGIC          0x524d4157  wan_device        include/linux/wanrouter.h
 SCC_MAGIC             0x52696368  gs_port           drivers/char/scc.h
 SAVEKMSG_MAGIC1       0x53415645  savekmsg          arch/*/amiga/config.c
-GDA_MAGIC             0x58464552  gda               include/asm-mips64/sn/gda.h
+GDA_MAGIC             0x58464552  gda               arch/mips/include/asm/sn/gda.h
 RED_MAGIC1            0x5a2cf071  (any)             mm/slab.c
 STL_PORTMAGIC         0x5a7182c9  stlport           include/linux/stallion.h
 EEPROM_MAGIC_VALUE    0x5ab478d2  lanai_dev         drivers/atm/lanai.c
 HDLCDRV_MAGIC         0x5ac6e778  hdlcdrv_state     include/linux/hdlcdrv.h
 EPCA_MAGIC            0x5c6df104  channel           include/linux/epca.h
 PCXX_MAGIC            0x5c6df104  channel           drivers/char/pcxx.h
-KV_MAGIC              0x5f4b565f  kernel_vars_s     include/asm-mips64/sn/klkernvars.h
+KV_MAGIC              0x5f4b565f  kernel_vars_s     arch/mips/include/asm/sn/klkernvars.h
 I810_STATE_MAGIC      0x63657373  i810_state        sound/oss/i810_audio.c
 TRIDENT_STATE_MAGIC   0x63657373  trient_state      sound/oss/trident.c
 M3_CARD_MAGIC         0x646e6f50  m3_card           sound/oss/maestro3.c
@@ -158,7 +158,7 @@
 QUEUE_MAGIC_FREE      0xf7e1c9a3  queue_entry       drivers/scsi/arm/queue.c
 QUEUE_MAGIC_USED      0xf7e1cc33  queue_entry       drivers/scsi/arm/queue.c
 HTB_CMAGIC            0xFEFAFEF1  htb_class         net/sched/sch_htb.c
-NMI_MAGIC             0x48414d4d455201 nmi_s        include/asm-mips64/sn/nmi.h
+NMI_MAGIC             0x48414d4d455201 nmi_s        arch/mips/include/asm/sn/nmi.h
 
 Note that there are also defined special per-driver magic numbers in sound
 memory management. See include/sound/sndmagic.h for complete list of them. Many
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 168117b..4c2ecf5 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -124,7 +124,7 @@
     This option can be kernel module too.
 
 --------------------------------
-3 sysfs files for memory hotplug
+4 sysfs files for memory hotplug
 --------------------------------
 All sections have their device information under /sys/devices/system/memory as
 
@@ -138,11 +138,12 @@
 (0x100000000 / 1Gib = 4)
 This device covers address range [0x100000000 ... 0x140000000)
 
-Under each section, you can see 3 files.
+Under each section, you can see 4 files.
 
 /sys/devices/system/memory/memoryXXX/phys_index
 /sys/devices/system/memory/memoryXXX/phys_device
 /sys/devices/system/memory/memoryXXX/state
+/sys/devices/system/memory/memoryXXX/removable
 
 'phys_index' : read-only and contains section id, same as XXX.
 'state'      : read-write
@@ -150,10 +151,20 @@
                at write: user can specify "online", "offline" command
 'phys_device': read-only: designed to show the name of physical memory device.
                This is not well implemented now.
+'removable'  : read-only: contains an integer value indicating
+               whether the memory section is removable or not
+               removable.  A value of 1 indicates that the memory
+               section is removable and a value of 0 indicates that
+               it is not removable.
 
 NOTE:
   These directories/files appear after physical memory hotplug phase.
 
+If CONFIG_NUMA is enabled the
+/sys/devices/system/memory/memoryXXX memory section
+directories can also be accessed via symbolic links located in
+the /sys/devices/system/node/node* directories.  For example:
+/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
 
 --------------------------------
 4. Physical memory hot-add phase
@@ -365,7 +376,6 @@
   - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
     sysctl or new control file.
   - showing memory section and physical device relationship.
-  - showing memory section and node relationship (maybe good for NUMA)
   - showing memory section is under ZONE_MOVABLE or not
   - test and make it better memory offlining.
   - support HugeTLB page migration and offlining.
diff --git a/Documentation/mips/AU1xxx_IDE.README b/Documentation/mips/AU1xxx_IDE.README
index 25a6ed1..f54962a 100644
--- a/Documentation/mips/AU1xxx_IDE.README
+++ b/Documentation/mips/AU1xxx_IDE.README
@@ -44,7 +44,7 @@
 
 Two files are introduced:
 
-  a) 'include/asm-mips/mach-au1x00/au1xxx_ide.h'
+  a) 'arch/mips/include/asm/mach-au1x00/au1xxx_ide.h'
      containes : struct _auide_hwif
                  timing parameters for PIO mode 0/1/2/3/4
                  timing parameters for MWDMA 0/1/2
diff --git a/Documentation/powerpc/cpu_features.txt b/Documentation/powerpc/cpu_features.txt
index 4727398..ffa4183 100644
--- a/Documentation/powerpc/cpu_features.txt
+++ b/Documentation/powerpc/cpu_features.txt
@@ -31,7 +31,7 @@
 
 After detecting the processor type, the kernel patches out sections of code
 that shouldn't be used by writing nop's over it. Using cpufeatures requires
-just 2 macros (found in include/asm-ppc/cputable.h), as seen in head.S
+just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
 transfer_to_handler:
 
 	#ifdef CONFIG_ALTIVEC
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index d30a281..10711d9 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -1402,7 +1402,7 @@
 possibilities of these as the instruction is made up of a  0xA opcode & the second byte being
 the syscall number. They are traced using the simple command.
 TR SVC  <Optional value or range>
-the syscalls are defined in linux/include/asm-s390/unistd.h
+the syscalls are defined in linux/arch/s390/include/asm/unistd.h
 e.g. to trace all file opens just do
 TR SVC 5 ( as this is the syscall number of open )
 
diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.txt
index c4b7b2b..480a78e 100644
--- a/Documentation/s390/cds.txt
+++ b/Documentation/s390/cds.txt
@@ -98,7 +98,7 @@
 of them can be found on other Linux platforms implementations too.
 Miscellaneous function prototypes, data declarations, and macro definitions
 can be found in the architecture specific C header file
-linux/include/asm-s390/irq.h.
+linux/arch/s390/include/asm/irq.h.
 
 Overview of CDS interface concepts
 
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index e054209..2d10053 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt
@@ -2,7 +2,7 @@
 ==================
 
 files: arch/s390/kernel/debug.c
-       include/asm-s390/debug.h
+       arch/s390/include/asm/debug.h
 
 Description:
 ------------
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index d79eeda..cd05994 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -41,7 +41,8 @@
 
 ==============================================================
 
-dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
+dirty_bytes, dirty_ratio, dirty_background_bytes,
+dirty_background_ratio, dirty_expire_centisecs,
 dirty_writeback_centisecs, highmem_is_dirtyable,
 vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout,
 drop-caches, hugepages_treat_as_movable:
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
index 125eed5..0706a72 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.txt
@@ -137,13 +137,6 @@
 map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
 will cull the page at that point.
 
-Note that for anonymous pages, shrink_page_list() attempts to add the page to
-the swap cache before it tries to unmap the page.  To avoid this unnecessary
-consumption of swap space, shrink_page_list() calls try_to_munlock() to check
-whether any VM_LOCKED vmas map the page without attempting to unmap the page.
-If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page
-without consuming swap space.  try_to_munlock() will be described below.
-
 To "cull" an unevictable page, vmscan simply puts the page back on the lru
 list using putback_lru_page()--the inverse operation to isolate_lru_page()--
 after dropping the page lock.  Because the condition which makes the page
@@ -190,8 +183,8 @@
    in the VM_LOCKED flag being set for the vma.
 3) in the fault path, if mlocked pages are "culled" in the fault path,
    and when a VM_LOCKED stack segment is expanded.
-4) as mentioned above, in vmscan:shrink_page_list() with attempting to
-   reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock().
+4) as mentioned above, in vmscan:shrink_page_list() when attempting to
+   reclaim a page in a VM_LOCKED vma via try_to_unmap().
 
 Mlocked pages become unlocked and rescued from the unevictable list when:
 
@@ -260,9 +253,9 @@
 
 2) vmas mapping hugetlbfs page are already effectively pinned into memory.
    We don't need nor want to mlock() these pages.  However, to preserve the
-   prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup()
-   will call make_pages_present() in the hugetlbfs vma range to allocate the
-   huge pages and populate the ptes.
+   prior behavior of mlock()--before the unevictable/mlock changes--
+   mlock_fixup() will call make_pages_present() in the hugetlbfs vma range
+   to allocate the huge pages and populate the ptes.
 
 3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
    kernel pages, such as the vdso page, relay channel pages, etc.  These pages
@@ -322,7 +315,7 @@
 passing a flag to indicate that munlock() is being performed.
 
 Because the vma access protections could have been changed to PROT_NONE after
-faulting in and mlocking some pages, get_user_pages() was unreliable for visiting
+faulting in and mlocking pages, get_user_pages() was unreliable for visiting
 these pages for munlocking.  Because we don't want to leave pages mlocked(),
 get_user_pages() was enhanced to accept a flag to ignore the permissions when
 fetching the pages--all of which should be resident as a result of previous
@@ -416,8 +409,8 @@
 When unmapping an mlocked region of memory, whether by an explicit call to
 munmap() or via an internal unmap from exit() or exec() processing, we must
 munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
-Before the unevictable/mlock changes, mlocking did not mark the pages in any way,
-so unmapping them required no processing.
+Before the unevictable/mlock changes, mlocking did not mark the pages in any
+way, so unmapping them required no processing.
 
 To munlock a range of memory under the unevictable/mlock infrastructure, the
 munmap() hander and task address space tear down function call
@@ -517,12 +510,10 @@
 Mlocked pages:  try_to_munlock() Reverse Map Scan
 
 TODO/FIXME:  a better name might be page_mlocked()--analogous to the
-page_referenced() reverse map walker--especially if we continue to call this
-from shrink_page_list().  See related TODO/FIXME below.
+page_referenced() reverse map walker.
 
-When munlock_vma_page()--see "Mlocked Pages:  munlock()/munlockall() System
-Call Handling" above--tries to munlock a page, or when shrink_page_list()
-encounters an anonymous page that is not yet in the swap cache, they need to
+When munlock_vma_page()--see "Mlocked Pages:  munlock()/munlockall()
+System Call Handling" above--tries to munlock a page, it needs to
 determine whether or not the page is mapped by any VM_LOCKED vma, without
 actually attempting to unmap all ptes from the page.  For this purpose, the
 unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
@@ -535,10 +526,7 @@
 pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
 attempt to acquire the associated mmap semphore, mlock the page via
 mlock_vma_page() and return SWAP_MLOCK.  This effectively undoes the
-pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs
-shrink_page_list() that the anonymous page should be culled rather than added
-to the swap cache in preparation for a try_to_unmap() that will almost
-certainly fail.
+pre-clearing of the page's PG_mlocked done by munlock_vma_page.
 
 If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
 semaphore, it will return SWAP_AGAIN.  This will allow shrink_page_list()
@@ -557,10 +545,7 @@
 successfully acquire the vma's mmap semphore for read and mlock the page.
 Although try_to_munlock() can be called many [very many!] times when
 munlock()ing a large region or tearing down a large address space that has been
-mlocked via mlockall(), overall this is a fairly rare event.  In addition,
-although shrink_page_list() calls try_to_munlock() for every anonymous page that
-it handles that is not yet in the swap cache, on average anonymous pages will
-have very short reverse map lists.
+mlocked via mlockall(), overall this is a fairly rare event.
 
 Mlocked Page:  Page Reclaim in shrink_*_list()
 
@@ -588,8 +573,8 @@
    munlock_vma_page() was forced to let the page back on to the normal
    LRU list for vmscan to handle.
 
-shrink_inactive_list() also culls any unevictable pages that it finds
-on the inactive lists, again diverting them to the appropriate zone's unevictable
+shrink_inactive_list() also culls any unevictable pages that it finds on
+the inactive lists, again diverting them to the appropriate zone's unevictable
 lru list.  shrink_inactive_list() should only see SHM_LOCKed pages that became
 SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
 pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
@@ -597,19 +582,7 @@
 the latter, but will pass on to shrink_page_list().
 
 shrink_page_list() again culls obviously unevictable pages that it could
-encounter for similar reason to shrink_inactive_list().  As already discussed,
-shrink_page_list() proactively looks for anonymous pages that should have
-PG_mlocked set but don't--these would not be detected by page_evictable()--to
-avoid adding them to the swap cache unnecessarily.  File pages mapped into
+encounter for similar reason to shrink_inactive_list().  Pages mapped into
 VM_LOCKED vmas but without PG_mlocked set will make it all the way to
-try_to_unmap().  shrink_page_list() will divert them to the unevictable list when
-try_to_unmap() returns SWAP_MLOCK, as discussed above.
-
-TODO/FIXME:  If we can enhance the swap cache to reliably remove entries
-with page_count(page) > 2, as long as all ptes are mapped to the page and
-not the swap entry, we can probably remove the call to try_to_munlock() in
-shrink_page_list() and just remove the page from the swap cache when
-try_to_unmap() returns SWAP_MLOCK.   Currently, remove_exclusive_swap_page()
-doesn't seem to allow that.
-
-
+try_to_unmap().  shrink_page_list() will divert them to the unevictable list
+when try_to_unmap() returns SWAP_MLOCK, as discussed above.
diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt
index 169ad42..4f91385 100644
--- a/Documentation/x86/zero-page.txt
+++ b/Documentation/x86/zero-page.txt
@@ -3,7 +3,7 @@
 real-mode setup code of the kernel. References/settings to it mainly
 are in:
 
-  include/asm-x86/bootparam.h
+  arch/x86/include/asm/bootparam.h
 
 
 Offset	Proto	Name		Meaning
diff --git a/MAINTAINERS b/MAINTAINERS
index 141aff6..094dd52 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -616,7 +616,7 @@
 S:	Maintained
 
 ARM/TOSA MACHINE SUPPORT
-P:	Dmitry Baryshkov
+P:	Dmitry Eremin-Solenikov
 M:	dbaryshkov@gmail.com
 P:	Dirk Opfer
 M:	dirk@opfer-online.de
@@ -1092,11 +1092,8 @@
 
 CHECKPATCH
 P:	Andy Whitcroft
-M:	apw@shadowen.org
-P:	Randy Dunlap
-M:	rdunlap@xenotime.net
-P:	Joel Schopp
-M:	jschopp@austin.ibm.com
+M:	apw@canonical.com
+L:	linux-kernel@vger.kernel.org
 S:	Supported
 
 CISCO 10G ETHERNET DRIVER
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index ca88e54..62b3635 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -1,6 +1,7 @@
 #ifndef _ALPHA_ATOMIC_H
 #define _ALPHA_ATOMIC_H
 
+#include <linux/types.h>
 #include <asm/barrier.h>
 #include <asm/system.h>
 
@@ -13,14 +14,6 @@
  */
 
 
-/*
- * Counter is volatile to make sure gcc doesn't try to be clever
- * and move things around on us. We need to use _exactly_ the address
- * the user gave us, not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-typedef struct { volatile long counter; } atomic64_t;
-
 #define ATOMIC_INIT(i)		( (atomic_t) { (i) } )
 #define ATOMIC64_INIT(i)	( (atomic64_t) { (i) } )
 
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 325f881..ee99723 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -12,10 +12,9 @@
 #define __ASM_ARM_ATOMIC_H
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 #include <asm/system.h>
 
-typedef struct { volatile int counter; } atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef __KERNEL__
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 3f9abe0..f692efd 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -92,9 +92,7 @@
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	if (p->ainsn.insn) {
-		mutex_lock(&kprobe_mutex);
 		free_insn_slot(p->ainsn.insn, 0);
-		mutex_unlock(&kprobe_mutex);
 		p->ainsn.insn = NULL;
 	}
 }
diff --git a/arch/arm/mach-s3c2410/include/mach/spi.h b/arch/arm/mach-s3c2410/include/mach/spi.h
index 774f3ad..1d300fb 100644
--- a/arch/arm/mach-s3c2410/include/mach/spi.h
+++ b/arch/arm/mach-s3c2410/include/mach/spi.h
@@ -14,7 +14,7 @@
 #define __ASM_ARCH_SPI_H __FILE__
 
 struct s3c2410_spi_info {
-	unsigned long		 pin_cs;	/* simple gpio cs */
+	int			 pin_cs;	/* simple gpio cs */
 	unsigned int		 num_cs;	/* total chipselects */
 	int			 bus_num;       /* bus number to use. */
 
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 7ef3862..3188151 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -14,9 +14,9 @@
 #ifndef __ASM_AVR32_ATOMIC_H
 #define __ASM_AVR32_ATOMIC_H
 
+#include <linux/types.h>
 #include <asm/system.h>
 
-typedef struct { volatile int counter; } atomic_t;
 #define ATOMIC_INIT(i)  { (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 0d98737..d547c8d 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/bug.h>
+#include <linux/hardirq.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/kdebug.h>
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 7cf5087..25776c1 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -1,6 +1,7 @@
 #ifndef __ARCH_BLACKFIN_ATOMIC__
 #define __ARCH_BLACKFIN_ATOMIC__
 
+#include <linux/types.h>
 #include <asm/system.h>	/* local_irq_XXX() */
 
 /*
@@ -13,9 +14,6 @@
  * Tony Kou (tonyko@lineo.ca)   Lineo Inc.   2001
  */
 
-typedef struct {
-	int counter;
-} atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h
index f71ea68..5718dd8 100644
--- a/arch/cris/include/asm/atomic.h
+++ b/arch/cris/include/asm/atomic.h
@@ -4,7 +4,7 @@
 #define __ASM_CRIS_ATOMIC__
 
 #include <linux/compiler.h>
-
+#include <linux/types.h>
 #include <asm/system.h>
 #include <arch/atomic.h>
 
@@ -13,8 +13,6 @@
  * resource counting etc..
  */
 
-typedef struct { volatile int counter; } atomic_t;
-
 #define ATOMIC_INIT(i)  { (i) }
 
 #define atomic_read(v) ((v)->counter)
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index b4cf0ea..833186c 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -1,12 +1,13 @@
 #ifndef __ARCH_H8300_ATOMIC__
 #define __ARCH_H8300_ATOMIC__
 
+#include <linux/types.h>
+
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
  */
 
-typedef struct { int counter; } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 50c2b83..d37292b 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -17,12 +17,6 @@
 #include <asm/intrinsics.h>
 #include <asm/system.h>
 
-/*
- * On IA-64, counter must always be volatile to ensure that that the
- * memory accesses are ordered.
- */
-typedef struct { volatile __s32 counter; } atomic_t;
-typedef struct { volatile __s64 counter; } atomic64_t;
 
 #define ATOMIC_INIT(i)		((atomic_t) { (i) })
 #define ATOMIC64_INIT(i)	((atomic64_t) { (i) })
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index f07688d..097b84d 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -670,9 +670,11 @@
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn,
+			       p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
+		p->ainsn.insn = NULL;
+	}
 }
 /*
  * We are resuming execution after a single step fault, so the pt_regs
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 054bcd9..56e1290 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -692,7 +692,7 @@
 	pgdat = NODE_DATA(nid);
 
 	zone = pgdat->node_zones + ZONE_NORMAL;
-	ret = __add_pages(zone, start_pfn, nr_pages);
+	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 
 	if (ret)
 		printk("%s: Problem encountered in __add_pages() as ret=%d\n",
diff --git a/arch/m68knommu/include/asm/atomic.h b/arch/m68knommu/include/asm/atomic.h
index d5632a3..6bb6748 100644
--- a/arch/m68knommu/include/asm/atomic.h
+++ b/arch/m68knommu/include/asm/atomic.h
@@ -1,6 +1,7 @@
 #ifndef __ARCH_M68KNOMMU_ATOMIC__
 #define __ARCH_M68KNOMMU_ATOMIC__
 
+#include <linux/types.h>
 #include <asm/system.h>
 
 /*
@@ -12,7 +13,6 @@
  * We do not have SMP m68k systems, so we don't have to deal with that.
  */
 
-typedef struct { int counter; } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 1232be3..c996c3b 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -15,13 +15,12 @@
 #define _ASM_ATOMIC_H
 
 #include <linux/irqflags.h>
+#include <linux/types.h>
 #include <asm/barrier.h>
 #include <asm/cpu-features.h>
 #include <asm/war.h>
 #include <asm/system.h>
 
-typedef struct { volatile int counter; } atomic_t;
-
 #define ATOMIC_INIT(i)    { (i) }
 
 /*
@@ -404,8 +403,6 @@
 
 #ifdef CONFIG_64BIT
 
-typedef struct { volatile long counter; } atomic64_t;
-
 #define ATOMIC64_INIT(i)    { (i) }
 
 /*
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 57fcc4a..edbfe25c 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -155,14 +155,11 @@
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
 #endif
 
-/* Note that we need not lock read accesses - aligned word writes/reads
- * are atomic, so a reader never sees unconsistent values.
- *
- * Cache-line alignment would conflict with, for example, linux/module.h
+/*
+ * Note that we need not lock read accesses - aligned word writes/reads
+ * are atomic, so a reader never sees inconsistent values.
  */
 
-typedef struct { volatile int counter; } atomic_t;
-
 /* It's possible to reduce all atomic operations to either
  * __atomic_add_return, atomic_set and atomic_read (the latter
  * is there only for consistency).
@@ -260,8 +257,6 @@
 
 #ifdef CONFIG_64BIT
 
-typedef struct { volatile s64 counter; } atomic64_t;
-
 #define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
 
 static __inline__ int
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 499be5b..b401950 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -5,7 +5,7 @@
  * PowerPC atomic operations
  */
 
-typedef struct { int counter; } atomic_t;
+#include <linux/types.h>
 
 #ifdef __KERNEL__
 #include <linux/compiler.h>
@@ -251,8 +251,6 @@
 
 #ifdef __powerpc64__
 
-typedef struct { long counter; } atomic64_t;
-
 #define ATOMIC64_INIT(i)	{ (i) }
 
 static __inline__ long atomic64_read(const atomic64_t *v)
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 26f0d0a..b1dafb6 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -18,6 +18,12 @@
 			      pte_t *ptep);
 
 /*
+ * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs
+ * to override the version in mm/hugetlb.c
+ */
+#define vma_mmu_pagesize vma_mmu_pagesize
+
+/*
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index de79915..989edcd 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -96,9 +96,10 @@
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, 0);
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
 }
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 201c7a5..9920d6a 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -512,6 +512,13 @@
 	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
 }
 
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+	unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
+
+	return 1UL << mmu_psize_to_shift(psize);
+}
+
 /*
  * Called by asm hashtable.S for doing lazy icache flush
  */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 53b06eb..f00f09a 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -132,7 +132,7 @@
 	/* this should work for most non-highmem platforms */
 	zone = pgdata->node_zones;
 
-	return __add_pages(zone, start_pfn, nr_pages);
+	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 2d18465..de432f2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -2,6 +2,7 @@
 #define __ARCH_S390_ATOMIC__
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 
 /*
  *  include/asm-s390/atomic.h
@@ -23,9 +24,6 @@
  * S390 uses 'Compare And Swap' for atomicity in SMP enviroment
  */
 
-typedef struct {
-	int counter;
-} __attribute__ ((aligned (4))) atomic_t;
 #define ATOMIC_INIT(i)  { (i) }
 
 #ifdef __KERNEL__
@@ -149,9 +147,6 @@
 #undef __CS_LOOP
 
 #ifdef __s390x__
-typedef struct {
-	long long counter;
-} __attribute__ ((aligned (8))) atomic64_t;
 #define ATOMIC64_INIT(i)  { (i) }
 
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 569079e..9b92856 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -218,9 +218,10 @@
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, 0);
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
 }
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 158b0d6..f0258ca 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -183,7 +183,7 @@
 	rc = vmem_add_mapping(start, size);
 	if (rc)
 		return rc;
-	rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size));
+	rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size));
 	if (rc)
 		vmem_remove_mapping(start, size);
 	return rc;
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index c043ef0..6327ffb 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -7,16 +7,15 @@
  *
  */
 
-typedef struct { volatile int counter; } atomic_t;
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/system.h>
 
 #define ATOMIC_INIT(i)	( (atomic_t) { (i) } )
 
 #define atomic_read(v)		((v)->counter)
 #define atomic_set(v,i)		((v)->counter = (i))
 
-#include <linux/compiler.h>
-#include <asm/system.h>
-
 #if defined(CONFIG_GUSA_RB)
 #include <asm/atomic-grb.h>
 #elif defined(CONFIG_CPU_SH4A)
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 88807a2..c0aa3d8 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -13,6 +13,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
+#include <linux/hardirq.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/module.h>
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 6cbef8c..3edf297 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -311,7 +311,8 @@
 	pgdat = NODE_DATA(nid);
 
 	/* We only have ZONE_NORMAL, so this is easy.. */
-	ret = __add_pages(pgdat->node_zones + ZONE_NORMAL, start_pfn, nr_pages);
+	ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
+				start_pfn, nr_pages);
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 5c944b5..ce46597 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -13,8 +13,6 @@
 
 #include <linux/types.h>
 
-typedef struct { volatile int counter; } atomic_t;
-
 #ifdef __KERNEL__
 
 #define ATOMIC_INIT(i)  { (i) }
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 5982c5a..a0a7064 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -10,9 +10,6 @@
 #include <linux/types.h>
 #include <asm/system.h>
 
-typedef struct { volatile int counter; } atomic_t;
-typedef struct { volatile __s64 counter; } atomic64_t;
-
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
 
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 44e4904..7384d8a 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -64,11 +64,10 @@
 
 	do {
 		int fault;
-survive:
+
 		fault = handle_mm_fault(mm, vma, address, is_write);
 		if (unlikely(fault & VM_FAULT_ERROR)) {
 			if (fault & VM_FAULT_OOM) {
-				err = -ENOMEM;
 				goto out_of_memory;
 			} else if (fault & VM_FAULT_SIGBUS) {
 				err = -EACCES;
@@ -104,18 +103,14 @@
 out_nosemaphore:
 	return err;
 
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
 out_of_memory:
-	if (is_global_init(current)) {
-		up_read(&mm->mmap_sem);
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	goto out;
+	/*
+	 * We ran out of memory, call the OOM killer, and return the userspace
+	 * (which will retry the fault, or kill us if we got oom-killed).
+	 */
+	up_read(&mm->mmap_sem);
+	pagefault_out_of_memory();
+	return 0;
 }
 
 static void bad_segv(struct faultinfo fi, unsigned long ip)
@@ -214,9 +209,6 @@
 		si.si_addr = (void __user *)address;
 		current->thread.arch.faultinfo = fi;
 		force_sig_info(SIGBUS, &si, current);
-	} else if (err == -ENOMEM) {
-		printk(KERN_INFO "VM: killing process %s\n", current->comm);
-		do_exit(SIGKILL);
 	} else {
 		BUG_ON(err != -EFAULT);
 		si.si_signo = SIGSEGV;
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index ad5b9f6..85b46fb 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_ATOMIC_32_H
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 #include <asm/processor.h>
 #include <asm/cmpxchg.h>
 
@@ -10,15 +11,6 @@
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct {
-	int counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 /**
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
index 279d2a73..8c21731 100644
--- a/arch/x86/include/asm/atomic_64.h
+++ b/arch/x86/include/asm/atomic_64.h
@@ -1,25 +1,15 @@
 #ifndef _ASM_X86_ATOMIC_64_H
 #define _ASM_X86_ATOMIC_64_H
 
+#include <linux/types.h>
 #include <asm/alternative.h>
 #include <asm/cmpxchg.h>
 
-/* atomic_t should be 32 bit signed type */
-
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct {
-	int counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 /**
@@ -191,11 +181,7 @@
 #define atomic_inc_return(v)  (atomic_add_return(1, v))
 #define atomic_dec_return(v)  (atomic_sub_return(1, v))
 
-/* An 64bit atomic type */
-
-typedef struct {
-	long counter;
-} atomic64_t;
+/* The 64-bit atomic type */
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
deleted file mode 100644
index 8b064bd..0000000
--- a/arch/x86/include/asm/unwind.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_X86_UNWIND_H
-#define _ASM_X86_UNWIND_H
-
-#define UNW_PC(frame) ((void)(frame), 0UL)
-#define UNW_SP(frame) ((void)(frame), 0UL)
-#define UNW_FP(frame) ((void)(frame), 0UL)
-
-static inline int arch_unw_user_mode(const void *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 6c27679..eead6f8 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -376,9 +376,10 @@
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
+		p->ainsn.insn = NULL;
+	}
 }
 
 static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ce6650e..c9a666c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/string.h>
-#include <linux/unwind.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/kexec.h>
@@ -51,7 +50,6 @@
 #include <asm/debugreg.h>
 #include <asm/atomic.h>
 #include <asm/system.h>
-#include <asm/unwind.h>
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 57ec8c8..9e268b6b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -667,7 +667,6 @@
 	if (unlikely(in_atomic() || !mm))
 		goto bad_area_nosemaphore;
 
-again:
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -859,25 +858,14 @@
 	oops_end(flags, regs, sig);
 #endif
 
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
 out_of_memory:
+	/*
+	 * We ran out of memory, call the OOM killer, and return the userspace
+	 * (which will retry the fault, or kill us if we got oom-killed).
+	 */
 	up_read(&mm->mmap_sem);
-	if (is_global_init(tsk)) {
-		yield();
-		/*
-		 * Re-lookup the vma - in theory the vma tree might
-		 * have changed:
-		 */
-		goto again;
-	}
-
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & PF_USER)
-		do_group_exit(SIGKILL);
-	goto no_context;
+	pagefault_out_of_memory();
+	return;
 
 do_sigbus:
 	up_read(&mm->mmap_sem);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f99a6c6..544d724 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1079,7 +1079,7 @@
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
-	return __add_pages(zone, start_pfn, nr_pages);
+	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
 #endif
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9f7a0d2..54c437e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -857,7 +857,7 @@
 	if (last_mapped_pfn > max_pfn_mapped)
 		max_pfn_mapped = last_mapped_pfn;
 
-	ret = __add_pages(zone, start_pfn, nr_pages);
+	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 	WARN_ON_ONCE(ret);
 
 	return ret;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 38aca04..66a9d81 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -41,6 +41,7 @@
 #include <linux/pm_qos_params.h>
 #include <linux/clockchips.h>
 #include <linux/cpuidle.h>
+#include <linux/irqflags.h>
 
 /*
  * Include the apic definitions for x86 to have the APIC timer related defines
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 5260e9e..989429c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -347,8 +347,9 @@
  * section belongs to...
  */
 
-static int add_memory_block(unsigned long node_id, struct mem_section *section,
-		     unsigned long state, int phys_device)
+static int add_memory_block(int nid, struct mem_section *section,
+			unsigned long state, int phys_device,
+			enum mem_add_context context)
 {
 	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 	int ret = 0;
@@ -370,6 +371,10 @@
 		ret = mem_create_simple_file(mem, phys_device);
 	if (!ret)
 		ret = mem_create_simple_file(mem, removable);
+	if (!ret) {
+		if (context == HOTPLUG)
+			ret = register_mem_sect_under_node(mem, nid);
+	}
 
 	return ret;
 }
@@ -382,7 +387,7 @@
  *
  * This could be made generic for all sysdev classes.
  */
-static struct memory_block *find_memory_block(struct mem_section *section)
+struct memory_block *find_memory_block(struct mem_section *section)
 {
 	struct kobject *kobj;
 	struct sys_device *sysdev;
@@ -411,6 +416,7 @@
 	struct memory_block *mem;
 
 	mem = find_memory_block(section);
+	unregister_mem_sect_under_nodes(mem);
 	mem_remove_simple_file(mem, phys_index);
 	mem_remove_simple_file(mem, state);
 	mem_remove_simple_file(mem, phys_device);
@@ -424,9 +430,9 @@
  * need an interface for the VM to add new memory regions,
  * but without onlining it.
  */
-int register_new_memory(struct mem_section *section)
+int register_new_memory(int nid, struct mem_section *section)
 {
-	return add_memory_block(0, section, MEM_OFFLINE, 0);
+	return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG);
 }
 
 int unregister_memory_section(struct mem_section *section)
@@ -458,7 +464,8 @@
 	for (i = 0; i < NR_MEM_SECTIONS; i++) {
 		if (!present_section_nr(i))
 			continue;
-		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0);
+		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
+					0, BOOT);
 		if (!ret)
 			ret = err;
 	}
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 91636cd..43fa90b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/memory.h>
 #include <linux/node.h>
 #include <linux/hugetlb.h>
 #include <linux/cpumask.h>
@@ -248,6 +249,105 @@
 	return 0;
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#define page_initialized(page)  (page->lru.next)
+
+static int get_nid_for_pfn(unsigned long pfn)
+{
+	struct page *page;
+
+	if (!pfn_valid_within(pfn))
+		return -1;
+	page = pfn_to_page(pfn);
+	if (!page_initialized(page))
+		return -1;
+	return pfn_to_nid(pfn);
+}
+
+/* register memory section under specified node if it spans that node */
+int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
+{
+	unsigned long pfn, sect_start_pfn, sect_end_pfn;
+
+	if (!mem_blk)
+		return -EFAULT;
+	if (!node_online(nid))
+		return 0;
+	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
+	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
+	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+		int page_nid;
+
+		page_nid = get_nid_for_pfn(pfn);
+		if (page_nid < 0)
+			continue;
+		if (page_nid != nid)
+			continue;
+		return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj,
+					&mem_blk->sysdev.kobj,
+					kobject_name(&mem_blk->sysdev.kobj));
+	}
+	/* mem section does not span the specified node */
+	return 0;
+}
+
+/* unregister memory section under all nodes that it spans */
+int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
+{
+	nodemask_t unlinked_nodes;
+	unsigned long pfn, sect_start_pfn, sect_end_pfn;
+
+	if (!mem_blk)
+		return -EFAULT;
+	nodes_clear(unlinked_nodes);
+	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
+	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
+	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+		unsigned int nid;
+
+		nid = get_nid_for_pfn(pfn);
+		if (nid < 0)
+			continue;
+		if (!node_online(nid))
+			continue;
+		if (node_test_and_set(nid, unlinked_nodes))
+			continue;
+		sysfs_remove_link(&node_devices[nid].sysdev.kobj,
+			 kobject_name(&mem_blk->sysdev.kobj));
+	}
+	return 0;
+}
+
+static int link_mem_sections(int nid)
+{
+	unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
+	unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
+	unsigned long pfn;
+	int err = 0;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		unsigned long section_nr = pfn_to_section_nr(pfn);
+		struct mem_section *mem_sect;
+		struct memory_block *mem_blk;
+		int ret;
+
+		if (!present_section_nr(section_nr))
+			continue;
+		mem_sect = __nr_to_section(section_nr);
+		mem_blk = find_memory_block(mem_sect);
+		ret = register_mem_sect_under_node(mem_blk, nid);
+		if (!err)
+			err = ret;
+
+		/* discard ref obtained in find_memory_block() */
+		kobject_put(&mem_blk->sysdev.kobj);
+	}
+	return err;
+}
+#else
+static int link_mem_sections(int nid) { return 0; }
+#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
+
 int register_one_node(int nid)
 {
 	int error = 0;
@@ -267,6 +367,9 @@
 			if (cpu_to_node(cpu) == nid)
 				register_cpu_under_node(cpu, nid);
 		}
+
+		/* link memory sections under this node */
+		error = link_mem_sections(nid);
 	}
 
 	return error;
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 16970431..35914b6 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -841,7 +841,7 @@
 
 config GEN_RTC
 	tristate "Generic /dev/rtc emulation"
-	depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32
+	depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 && !BLACKFIN
 	---help---
 	  If you say Y here and create a character special file /dev/rtc with
 	  major number 10 and minor number 135 using mknod ("man mknod"), you
diff --git a/drivers/char/consolemap.c b/drivers/char/consolemap.c
index 4246b8e..45d3e80 100644
--- a/drivers/char/consolemap.c
+++ b/drivers/char/consolemap.c
@@ -554,7 +554,7 @@
 		__get_user(fontpos, &list->fontpos);
 		if ((err1 = con_insert_unipair(p, unicode,fontpos)) != 0)
 			err = err1;
-			list++;
+		list++;
 	}
 	
 	if (con_unify_unimap(vc, p))
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 6431f69..3586b3b 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -425,9 +425,6 @@
 }
 #endif
 
-extern long vread(char *buf, char *addr, unsigned long count);
-extern long vwrite(char *buf, char *addr, unsigned long count);
-
 #ifdef CONFIG_DEVKMEM
 /*
  * This function reads the *virtual* memory as seen by the kernel.
diff --git a/drivers/char/random.c b/drivers/char/random.c
index c7afc06..7c13581 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -407,7 +407,7 @@
 	/* read-write data: */
 	spinlock_t lock;
 	unsigned add_ptr;
-	int entropy_count;	/* Must at no time exceed ->POOLBITS! */
+	int entropy_count;
 	int input_rotate;
 };
 
@@ -767,11 +767,10 @@
 {
 	unsigned long flags;
 
-	BUG_ON(r->entropy_count > r->poolinfo->POOLBITS);
-
 	/* Hold lock while accounting */
 	spin_lock_irqsave(&r->lock, flags);
 
+	BUG_ON(r->entropy_count > r->poolinfo->POOLBITS);
 	DEBUG_ENT("trying to extract %d bits from %s\n",
 		  nbytes * 8, r->name);
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 94966ed..d41b9f6 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -82,7 +82,7 @@
 }
 static struct sysrq_key_op sysrq_loglevel_op = {
 	.handler	= sysrq_handle_loglevel,
-	.help_msg	= "loglevel0-8",
+	.help_msg	= "loglevel(0-9)",
 	.action_msg	= "Changing Loglevel",
 	.enable_mask	= SYSRQ_ENABLE_LOG,
 };
@@ -233,7 +233,7 @@
 
 static struct sysrq_key_op sysrq_showallcpus_op = {
 	.handler	= sysrq_handle_showallcpus,
-	.help_msg	= "aLlcpus",
+	.help_msg	= "show-backtrace-all-active-cpus(L)",
 	.action_msg	= "Show backtrace of all active CPUs",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
@@ -247,7 +247,7 @@
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
-	.help_msg	= "showPc",
+	.help_msg	= "show-registers(P)",
 	.action_msg	= "Show Regs",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
@@ -258,7 +258,7 @@
 }
 static struct sysrq_key_op sysrq_showstate_op = {
 	.handler	= sysrq_handle_showstate,
-	.help_msg	= "showTasks",
+	.help_msg	= "show-task-states(T)",
 	.action_msg	= "Show State",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
@@ -269,7 +269,7 @@
 }
 static struct sysrq_key_op sysrq_showstate_blocked_op = {
 	.handler	= sysrq_handle_showstate_blocked,
-	.help_msg	= "shoW-blocked-tasks",
+	.help_msg	= "show-blocked-tasks(W)",
 	.action_msg	= "Show Blocked State",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
@@ -297,7 +297,7 @@
 }
 static struct sysrq_key_op sysrq_showmem_op = {
 	.handler	= sysrq_handle_showmem,
-	.help_msg	= "showMem",
+	.help_msg	= "show-memory-usage(M)",
 	.action_msg	= "Show Memory",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
@@ -323,7 +323,7 @@
 }
 static struct sysrq_key_op sysrq_term_op = {
 	.handler	= sysrq_handle_term,
-	.help_msg	= "tErm",
+	.help_msg	= "terminate-all-tasks(E)",
 	.action_msg	= "Terminate All Tasks",
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
@@ -341,7 +341,7 @@
 }
 static struct sysrq_key_op sysrq_moom_op = {
 	.handler	= sysrq_handle_moom,
-	.help_msg	= "Full",
+	.help_msg	= "memory-full-oom-kill(F)",
 	.action_msg	= "Manual OOM execution",
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
@@ -353,7 +353,7 @@
 }
 static struct sysrq_key_op sysrq_kill_op = {
 	.handler	= sysrq_handle_kill,
-	.help_msg	= "kIll",
+	.help_msg	= "kill-all-tasks(I)",
 	.action_msg	= "Kill All Tasks",
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
@@ -364,7 +364,7 @@
 }
 static struct sysrq_key_op sysrq_unrt_op = {
 	.handler	= sysrq_handle_unrt,
-	.help_msg	= "Nice",
+	.help_msg	= "nice-all-RT-tasks(N)",
 	.action_msg	= "Nice All RT Tasks",
 	.enable_mask	= SYSRQ_ENABLE_RTNICE,
 };
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index e2667a8..eee47fd 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -109,6 +109,13 @@
 	  Support for error detection and correction on the Intel
 	  X38 server chipsets.
 
+config EDAC_I5400
+	tristate "Intel 5400 (Seaburg) chipsets"
+	depends on EDAC_MM_EDAC && PCI && X86
+	help
+	  Support for error detection and correction the Intel
+	  i5400 MCH chipset (Seaburg).
+
 config EDAC_I82860
 	tristate "Intel 82860"
 	depends on EDAC_MM_EDAC && PCI && X86_32
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 62c2d9b..b751969 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -20,6 +20,7 @@
 obj-$(CONFIG_EDAC_AMD76X)		+= amd76x_edac.o
 obj-$(CONFIG_EDAC_I5000)		+= i5000_edac.o
 obj-$(CONFIG_EDAC_I5100)		+= i5100_edac.o
+obj-$(CONFIG_EDAC_I5400)		+= i5400_edac.o
 obj-$(CONFIG_EDAC_E7XXX)		+= e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)		+= e752x_edac.o
 obj-$(CONFIG_EDAC_I82443BXGX)		+= i82443bxgx_edac.o
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 4041e91..ca9113e 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -333,7 +333,7 @@
 fail0:
 	edac_printk(KERN_WARNING, EDAC_MC,
 			"%s (%s) %s %s already assigned %d\n",
-			rover->dev->bus_id, edac_dev_name(rover),
+			dev_name(rover->dev), edac_dev_name(rover),
 			rover->mod_name, rover->ctl_name, rover->dev_idx);
 	return 1;
 
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index d110392..25d6694 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -401,7 +401,7 @@
 
 fail0:
 	edac_printk(KERN_WARNING, EDAC_MC,
-		"%s (%s) %s %s already assigned %d\n", p->dev->bus_id,
+		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 	return 1;
 
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 22ec9d5..5d3c808 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -150,7 +150,7 @@
 fail0:
 	edac_printk(KERN_WARNING, EDAC_PCI,
 		"%s (%s) %s %s already assigned %d\n",
-		rover->dev->bus_id, edac_dev_name(rover),
+		dev_name(rover->dev), edac_dev_name(rover),
 		rover->mod_name, rover->ctl_name, rover->pci_idx);
 	return 1;
 
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 5c153dc..422728c 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -569,7 +569,7 @@
 
 	local_irq_restore(flags);
 
-	debugf4("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
+	debugf4("PCI STATUS= 0x%04x %s\n", status, dev_name(&dev->dev));
 
 	/* check the status reg for errors on boards NOT marked as broken
 	 * if broken, we cannot trust any of the status bits
@@ -600,13 +600,13 @@
 	}
 
 
-	debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id);
+	debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev_name(&dev->dev));
 
 	if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
 		/* On bridges, need to examine secondary status register  */
 		status = get_pci_parity_status(dev, 1);
 
-		debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
+		debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev_name(&dev->dev));
 
 		/* check the secondary status reg for errors,
 		 * on NOT broken boards
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
new file mode 100644
index 0000000..b08b6d8
--- /dev/null
+++ b/drivers/edac/i5400_edac.c
@@ -0,0 +1,1476 @@
+/*
+ * Intel 5400 class Memory Controllers kernel module (Seaburg)
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ * Copyright (c) 2008 by:
+ *	 Ben Woodard <woodard@redhat.com>
+ *	 Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * Red Hat Inc. http://www.redhat.com
+ *
+ * Forked and adapted from the i5000_edac driver which was
+ * written by Douglas Thompson Linux Networx <norsk5@xmission.com>
+ *
+ * This module is based on the following document:
+ *
+ * Intel 5400 Chipset Memory Controller Hub (MCH) - Datasheet
+ * 	http://developer.intel.com/design/chipsets/datashts/313070.htm
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+
+#include "edac_core.h"
+
+/*
+ * Alter this version for the I5400 module when modifications are made
+ */
+#define I5400_REVISION    " Ver: 1.0.0 " __DATE__
+
+#define EDAC_MOD_STR      "i5400_edac"
+
+#define i5400_printk(level, fmt, arg...) \
+	edac_printk(level, "i5400", fmt, ##arg)
+
+#define i5400_mc_printk(mci, level, fmt, arg...) \
+	edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg)
+
+/* Limits for i5400 */
+#define NUM_MTRS_PER_BRANCH	4
+#define CHANNELS_PER_BRANCH	2
+#define	MAX_CHANNELS		4
+#define MAX_DIMMS		(MAX_CHANNELS * 4)	/* Up to 4 DIMM's per channel */
+#define MAX_CSROWS		(MAX_DIMMS * 2)		/* max possible csrows per channel */
+
+/* Device 16,
+ * Function 0: System Address
+ * Function 1: Memory Branch Map, Control, Errors Register
+ * Function 2: FSB Error Registers
+ *
+ * All 3 functions of Device 16 (0,1,2) share the SAME DID and
+ * uses PCI_DEVICE_ID_INTEL_5400_ERR for device 16 (0,1,2),
+ * PCI_DEVICE_ID_INTEL_5400_FBD0 and PCI_DEVICE_ID_INTEL_5400_FBD1
+ * for device 21 (0,1).
+ */
+
+	/* OFFSETS for Function 0 */
+#define		AMBASE			0x48 /* AMB Mem Mapped Reg Region Base */
+#define		MAXCH			0x56 /* Max Channel Number */
+#define		MAXDIMMPERCH		0x57 /* Max DIMM PER Channel Number */
+
+	/* OFFSETS for Function 1 */
+#define		TOLM			0x6C
+#define		REDMEMB			0x7C
+#define			REC_ECC_LOCATOR_ODD(x)	((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0]  indicate EVEN */
+#define		MIR0			0x80
+#define		MIR1			0x84
+#define		AMIR0			0x8c
+#define		AMIR1			0x90
+
+	/* Fatal error registers */
+#define		FERR_FAT_FBD		0x98	/* also called as FERR_FAT_FB_DIMM at datasheet */
+#define			FERR_FAT_FBDCHAN (3<<28)	/* channel index where the highest-order error occurred */
+
+#define		NERR_FAT_FBD		0x9c
+#define		FERR_NF_FBD		0xa0	/* also called as FERR_NFAT_FB_DIMM at datasheet */
+
+	/* Non-fatal error register */
+#define		NERR_NF_FBD		0xa4
+
+	/* Enable error mask */
+#define		EMASK_FBD		0xa8
+
+#define		ERR0_FBD		0xac
+#define		ERR1_FBD		0xb0
+#define		ERR2_FBD		0xb4
+#define		MCERR_FBD		0xb8
+
+	/* No OFFSETS for Device 16 Function 2 */
+
+/*
+ * Device 21,
+ * Function 0: Memory Map Branch 0
+ *
+ * Device 22,
+ * Function 0: Memory Map Branch 1
+ */
+
+	/* OFFSETS for Function 0 */
+#define AMBPRESENT_0	0x64
+#define AMBPRESENT_1	0x66
+#define MTR0		0x80
+#define MTR1		0x82
+#define MTR2		0x84
+#define MTR3		0x86
+
+	/* OFFSETS for Function 1 */
+#define NRECFGLOG		0x74
+#define RECFGLOG		0x78
+#define NRECMEMA		0xbe
+#define NRECMEMB		0xc0
+#define NRECFB_DIMMA		0xc4
+#define NRECFB_DIMMB		0xc8
+#define NRECFB_DIMMC		0xcc
+#define NRECFB_DIMMD		0xd0
+#define NRECFB_DIMME		0xd4
+#define NRECFB_DIMMF		0xd8
+#define REDMEMA			0xdC
+#define RECMEMA			0xf0
+#define RECMEMB			0xf4
+#define RECFB_DIMMA		0xf8
+#define RECFB_DIMMB		0xec
+#define RECFB_DIMMC		0xf0
+#define RECFB_DIMMD		0xf4
+#define RECFB_DIMME		0xf8
+#define RECFB_DIMMF		0xfC
+
+/*
+ * Error indicator bits and masks
+ * Error masks are according with Table 5-17 of i5400 datasheet
+ */
+
+enum error_mask {
+	EMASK_M1  = 1<<0,  /* Memory Write error on non-redundant retry */
+	EMASK_M2  = 1<<1,  /* Memory or FB-DIMM configuration CRC read error */
+	EMASK_M3  = 1<<2,  /* Reserved */
+	EMASK_M4  = 1<<3,  /* Uncorrectable Data ECC on Replay */
+	EMASK_M5  = 1<<4,  /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */
+	EMASK_M6  = 1<<5,  /* Unsupported on i5400 */
+	EMASK_M7  = 1<<6,  /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
+	EMASK_M8  = 1<<7,  /* Aliased Uncorrectable Patrol Data ECC */
+	EMASK_M9  = 1<<8,  /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */
+	EMASK_M10 = 1<<9,  /* Unsupported on i5400 */
+	EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC  */
+	EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */
+	EMASK_M13 = 1<<12, /* Memory Write error on first attempt */
+	EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */
+	EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */
+	EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */
+	EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */
+	EMASK_M18 = 1<<17, /* Unsupported on i5400 */
+	EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */
+	EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */
+	EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */
+	EMASK_M22 = 1<<21, /* SPD protocol Error */
+	EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */
+	EMASK_M24 = 1<<23, /* Refresh error */
+	EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */
+	EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */
+	EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */
+	EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */
+	EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */
+};
+
+/*
+ * Names to translate bit error into something useful
+ */
+static const char *error_name[] = {
+	[0]  = "Memory Write error on non-redundant retry",
+	[1]  = "Memory or FB-DIMM configuration CRC read error",
+	/* Reserved */
+	[3]  = "Uncorrectable Data ECC on Replay",
+	[4]  = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
+	/* M6 Unsupported on i5400 */
+	[6]  = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
+	[7]  = "Aliased Uncorrectable Patrol Data ECC",
+	[8]  = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
+	/* M10 Unsupported on i5400 */
+	[10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
+	[11] = "Non-Aliased Uncorrectable Patrol Data ECC",
+	[12] = "Memory Write error on first attempt",
+	[13] = "FB-DIMM Configuration Write error on first attempt",
+	[14] = "Memory or FB-DIMM configuration CRC read error",
+	[15] = "Channel Failed-Over Occurred",
+	[16] = "Correctable Non-Mirrored Demand Data ECC",
+	/* M18 Unsupported on i5400 */
+	[18] = "Correctable Resilver- or Spare-Copy Data ECC",
+	[19] = "Correctable Patrol Data ECC",
+	[20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status",
+	[21] = "SPD protocol Error",
+	[22] = "Non-Redundant Fast Reset Timeout",
+	[23] = "Refresh error",
+	[24] = "Memory Write error on redundant retry",
+	[25] = "Redundant Fast Reset Timeout",
+	[26] = "Correctable Counter Threshold Exceeded",
+	[27] = "DIMM-Spare Copy Completed",
+	[28] = "DIMM-Isolation Completed",
+};
+
+/* Fatal errors */
+#define ERROR_FAT_MASK		(EMASK_M1 | \
+				 EMASK_M2 | \
+				 EMASK_M23)
+
+/* Correctable errors */
+#define ERROR_NF_CORRECTABLE	(EMASK_M27 | \
+				 EMASK_M20 | \
+				 EMASK_M19 | \
+				 EMASK_M18 | \
+				 EMASK_M17 | \
+				 EMASK_M16)
+#define ERROR_NF_DIMM_SPARE	(EMASK_M29 | \
+				 EMASK_M28)
+#define ERROR_NF_SPD_PROTOCOL	(EMASK_M22)
+#define ERROR_NF_NORTH_CRC	(EMASK_M21)
+
+/* Recoverable errors */
+#define ERROR_NF_RECOVERABLE	(EMASK_M26 | \
+				 EMASK_M25 | \
+				 EMASK_M24 | \
+				 EMASK_M15 | \
+				 EMASK_M14 | \
+				 EMASK_M13 | \
+				 EMASK_M12 | \
+				 EMASK_M11 | \
+				 EMASK_M9  | \
+				 EMASK_M8  | \
+				 EMASK_M7  | \
+				 EMASK_M5)
+
+/* uncorrectable errors */
+#define ERROR_NF_UNCORRECTABLE	(EMASK_M4)
+
+/* mask to all non-fatal errors */
+#define ERROR_NF_MASK		(ERROR_NF_CORRECTABLE   | \
+				 ERROR_NF_UNCORRECTABLE | \
+				 ERROR_NF_RECOVERABLE   | \
+				 ERROR_NF_DIMM_SPARE    | \
+				 ERROR_NF_SPD_PROTOCOL  | \
+				 ERROR_NF_NORTH_CRC)
+
+/*
+ * Define error masks for the several registers
+ */
+
+/* Enable all fatal and non fatal errors */
+#define ENABLE_EMASK_ALL	(ERROR_FAT_MASK | ERROR_NF_MASK)
+
+/* mask for fatal error registers */
+#define FERR_FAT_MASK ERROR_FAT_MASK
+
+/* masks for non-fatal error register */
+static inline int to_nf_mask(unsigned int mask)
+{
+	return (mask & EMASK_M29) | (mask >> 3);
+};
+
+static inline int from_nf_ferr(unsigned int mask)
+{
+	return (mask & EMASK_M29) |		/* Bit 28 */
+	       (mask & ((1 << 28) - 1) << 3);	/* Bits 0 to 27 */
+};
+
+#define FERR_NF_MASK		to_nf_mask(ERROR_NF_MASK)
+#define FERR_NF_CORRECTABLE	to_nf_mask(ERROR_NF_CORRECTABLE)
+#define FERR_NF_DIMM_SPARE	to_nf_mask(ERROR_NF_DIMM_SPARE)
+#define FERR_NF_SPD_PROTOCOL	to_nf_mask(ERROR_NF_SPD_PROTOCOL)
+#define FERR_NF_NORTH_CRC	to_nf_mask(ERROR_NF_NORTH_CRC)
+#define FERR_NF_RECOVERABLE	to_nf_mask(ERROR_NF_RECOVERABLE)
+#define FERR_NF_UNCORRECTABLE	to_nf_mask(ERROR_NF_UNCORRECTABLE)
+
+/* Defines to extract the vaious fields from the
+ *	MTRx - Memory Technology Registers
+ */
+#define MTR_DIMMS_PRESENT(mtr)		((mtr) & (1 << 10))
+#define MTR_DIMMS_ETHROTTLE(mtr)	((mtr) & (1 << 9))
+#define MTR_DRAM_WIDTH(mtr)		(((mtr) & (1 << 8)) ? 8 : 4)
+#define MTR_DRAM_BANKS(mtr)		(((mtr) & (1 << 6)) ? 8 : 4)
+#define MTR_DRAM_BANKS_ADDR_BITS(mtr)	((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2)
+#define MTR_DIMM_RANK(mtr)		(((mtr) >> 5) & 0x1)
+#define MTR_DIMM_RANK_ADDR_BITS(mtr)	(MTR_DIMM_RANK(mtr) ? 2 : 1)
+#define MTR_DIMM_ROWS(mtr)		(((mtr) >> 2) & 0x3)
+#define MTR_DIMM_ROWS_ADDR_BITS(mtr)	(MTR_DIMM_ROWS(mtr) + 13)
+#define MTR_DIMM_COLS(mtr)		((mtr) & 0x3)
+#define MTR_DIMM_COLS_ADDR_BITS(mtr)	(MTR_DIMM_COLS(mtr) + 10)
+
+/* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */
+static inline int extract_fbdchan_indx(u32 x)
+{
+	return (x>>28) & 0x3;
+}
+
+#ifdef CONFIG_EDAC_DEBUG
+/* MTR NUMROW */
+static const char *numrow_toString[] = {
+	"8,192 - 13 rows",
+	"16,384 - 14 rows",
+	"32,768 - 15 rows",
+	"65,536 - 16 rows"
+};
+
+/* MTR NUMCOL */
+static const char *numcol_toString[] = {
+	"1,024 - 10 columns",
+	"2,048 - 11 columns",
+	"4,096 - 12 columns",
+	"reserved"
+};
+#endif
+
+/* Device name and register DID (Device ID) */
+struct i5400_dev_info {
+	const char *ctl_name;	/* name for this device */
+	u16 fsb_mapping_errors;	/* DID for the branchmap,control */
+};
+
+/* Table of devices attributes supported by this driver */
+static const struct i5400_dev_info i5400_devs[] = {
+	{
+		.ctl_name = "I5400",
+		.fsb_mapping_errors = PCI_DEVICE_ID_INTEL_5400_ERR,
+	},
+};
+
+struct i5400_dimm_info {
+	int megabytes;		/* size, 0 means not present  */
+	int dual_rank;
+};
+
+/* driver private data structure */
+struct i5400_pvt {
+	struct pci_dev *system_address;		/* 16.0 */
+	struct pci_dev *branchmap_werrors;	/* 16.1 */
+	struct pci_dev *fsb_error_regs;		/* 16.2 */
+	struct pci_dev *branch_0;		/* 21.0 */
+	struct pci_dev *branch_1;		/* 22.0 */
+
+	u16 tolm;				/* top of low memory */
+	u64 ambase;				/* AMB BAR */
+
+	u16 mir0, mir1;
+
+	u16 b0_mtr[NUM_MTRS_PER_BRANCH];	/* Memory Technlogy Reg */
+	u16 b0_ambpresent0;			/* Branch 0, Channel 0 */
+	u16 b0_ambpresent1;			/* Brnach 0, Channel 1 */
+
+	u16 b1_mtr[NUM_MTRS_PER_BRANCH];	/* Memory Technlogy Reg */
+	u16 b1_ambpresent0;			/* Branch 1, Channel 8 */
+	u16 b1_ambpresent1;			/* Branch 1, Channel 1 */
+
+	/* DIMM information matrix, allocating architecture maximums */
+	struct i5400_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS];
+
+	/* Actual values for this controller */
+	int maxch;				/* Max channels */
+	int maxdimmperch;			/* Max DIMMs per channel */
+};
+
+/* I5400 MCH error information retrieved from Hardware */
+struct i5400_error_info {
+	/* These registers are always read from the MC */
+	u32 ferr_fat_fbd;	/* First Errors Fatal */
+	u32 nerr_fat_fbd;	/* Next Errors Fatal */
+	u32 ferr_nf_fbd;	/* First Errors Non-Fatal */
+	u32 nerr_nf_fbd;	/* Next Errors Non-Fatal */
+
+	/* These registers are input ONLY if there was a Recoverable Error */
+	u32 redmemb;		/* Recoverable Mem Data Error log B */
+	u16 recmema;		/* Recoverable Mem Error log A */
+	u32 recmemb;		/* Recoverable Mem Error log B */
+
+	/* These registers are input ONLY if there was a Non-Rec Error */
+	u16 nrecmema;		/* Non-Recoverable Mem log A */
+	u16 nrecmemb;		/* Non-Recoverable Mem log B */
+
+};
+
+/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and
+   5400 better to use an inline function than a macro in this case */
+static inline int nrec_bank(struct i5400_error_info *info)
+{
+	return ((info->nrecmema) >> 12) & 0x7;
+}
+static inline int nrec_rank(struct i5400_error_info *info)
+{
+	return ((info->nrecmema) >> 8) & 0xf;
+}
+static inline int nrec_buf_id(struct i5400_error_info *info)
+{
+	return ((info->nrecmema)) & 0xff;
+}
+static inline int nrec_rdwr(struct i5400_error_info *info)
+{
+	return (info->nrecmemb) >> 31;
+}
+/* This applies to both NREC and REC string so it can be used with nrec_rdwr
+   and rec_rdwr */
+static inline const char *rdwr_str(int rdwr)
+{
+	return rdwr ? "Write" : "Read";
+}
+static inline int nrec_cas(struct i5400_error_info *info)
+{
+	return ((info->nrecmemb) >> 16) & 0x1fff;
+}
+static inline int nrec_ras(struct i5400_error_info *info)
+{
+	return (info->nrecmemb) & 0xffff;
+}
+static inline int rec_bank(struct i5400_error_info *info)
+{
+	return ((info->recmema) >> 12) & 0x7;
+}
+static inline int rec_rank(struct i5400_error_info *info)
+{
+	return ((info->recmema) >> 8) & 0xf;
+}
+static inline int rec_rdwr(struct i5400_error_info *info)
+{
+	return (info->recmemb) >> 31;
+}
+static inline int rec_cas(struct i5400_error_info *info)
+{
+	return ((info->recmemb) >> 16) & 0x1fff;
+}
+static inline int rec_ras(struct i5400_error_info *info)
+{
+	return (info->recmemb) & 0xffff;
+}
+
+static struct edac_pci_ctl_info *i5400_pci;
+
+/*
+ *	i5400_get_error_info	Retrieve the hardware error information from
+ *				the hardware and cache it in the 'info'
+ *				structure
+ */
+static void i5400_get_error_info(struct mem_ctl_info *mci,
+				 struct i5400_error_info *info)
+{
+	struct i5400_pvt *pvt;
+	u32 value;
+
+	pvt = mci->pvt_info;
+
+	/* read in the 1st FATAL error register */
+	pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value);
+
+	/* Mask only the bits that the doc says are valid
+	 */
+	value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK);
+
+	/* If there is an error, then read in the
+	   NEXT FATAL error register and the Memory Error Log Register A
+	 */
+	if (value & FERR_FAT_MASK) {
+		info->ferr_fat_fbd = value;
+
+		/* harvest the various error data we need */
+		pci_read_config_dword(pvt->branchmap_werrors,
+				NERR_FAT_FBD, &info->nerr_fat_fbd);
+		pci_read_config_word(pvt->branchmap_werrors,
+				NRECMEMA, &info->nrecmema);
+		pci_read_config_word(pvt->branchmap_werrors,
+				NRECMEMB, &info->nrecmemb);
+
+		/* Clear the error bits, by writing them back */
+		pci_write_config_dword(pvt->branchmap_werrors,
+				FERR_FAT_FBD, value);
+	} else {
+		info->ferr_fat_fbd = 0;
+		info->nerr_fat_fbd = 0;
+		info->nrecmema = 0;
+		info->nrecmemb = 0;
+	}
+
+	/* read in the 1st NON-FATAL error register */
+	pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value);
+
+	/* If there is an error, then read in the 1st NON-FATAL error
+	 * register as well */
+	if (value & FERR_NF_MASK) {
+		info->ferr_nf_fbd = value;
+
+		/* harvest the various error data we need */
+		pci_read_config_dword(pvt->branchmap_werrors,
+				NERR_NF_FBD, &info->nerr_nf_fbd);
+		pci_read_config_word(pvt->branchmap_werrors,
+				RECMEMA, &info->recmema);
+		pci_read_config_dword(pvt->branchmap_werrors,
+				RECMEMB, &info->recmemb);
+		pci_read_config_dword(pvt->branchmap_werrors,
+				REDMEMB, &info->redmemb);
+
+		/* Clear the error bits, by writing them back */
+		pci_write_config_dword(pvt->branchmap_werrors,
+				FERR_NF_FBD, value);
+	} else {
+		info->ferr_nf_fbd = 0;
+		info->nerr_nf_fbd = 0;
+		info->recmema = 0;
+		info->recmemb = 0;
+		info->redmemb = 0;
+	}
+}
+
+/*
+ * i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
+ * 					struct i5400_error_info *info,
+ * 					int handle_errors);
+ *
+ *	handle the Intel FATAL and unrecoverable errors, if any
+ */
+static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
+				    struct i5400_error_info *info,
+				    unsigned long allErrors)
+{
+	char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
+	int branch;
+	int channel;
+	int bank;
+	int buf_id;
+	int rank;
+	int rdwr;
+	int ras, cas;
+	int errnum;
+	char *type = NULL;
+
+	if (!allErrors)
+		return;		/* if no error, return now */
+
+	if (allErrors &  ERROR_FAT_MASK)
+		type = "FATAL";
+	else if (allErrors & FERR_NF_UNCORRECTABLE)
+		type = "NON-FATAL uncorrected";
+	else
+		type = "NON-FATAL recoverable";
+
+	/* ONLY ONE of the possible error bits will be set, as per the docs */
+
+	branch = extract_fbdchan_indx(info->ferr_fat_fbd);
+	channel = branch;
+
+	/* Use the NON-Recoverable macros to extract data */
+	bank = nrec_bank(info);
+	rank = nrec_rank(info);
+	buf_id = nrec_buf_id(info);
+	rdwr = nrec_rdwr(info);
+	ras = nrec_ras(info);
+	cas = nrec_cas(info);
+
+	debugf0("\t\tCSROW= %d  Channels= %d,%d  (Branch= %d "
+		"DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
+		rank, channel, channel + 1, branch >> 1, bank,
+		buf_id, rdwr_str(rdwr), ras, cas);
+
+	/* Only 1 bit will be on */
+	errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
+
+	/* Form out message */
+	snprintf(msg, sizeof(msg),
+		 "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
+		 "RAS=%d CAS=%d %s Err=0x%lx (%s))",
+		 type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
+		 type, allErrors, error_name[errnum]);
+
+	/* Call the helper to output message */
+	edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
+}
+
+/*
+ * i5400_process_fatal_error_info(struct mem_ctl_info *mci,
+ * 				struct i5400_error_info *info,
+ * 				int handle_errors);
+ *
+ *	handle the Intel NON-FATAL errors, if any
+ */
+static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,
+					struct i5400_error_info *info)
+{
+	char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
+	unsigned long allErrors;
+	int branch;
+	int channel;
+	int bank;
+	int rank;
+	int rdwr;
+	int ras, cas;
+	int errnum;
+
+	/* mask off the Error bits that are possible */
+	allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK);
+	if (!allErrors)
+		return;		/* if no error, return now */
+
+	/* ONLY ONE of the possible error bits will be set, as per the docs */
+
+	if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) {
+		i5400_proccess_non_recoverable_info(mci, info, allErrors);
+		return;
+	}
+
+	/* Correctable errors */
+	if (allErrors & ERROR_NF_CORRECTABLE) {
+		debugf0("\tCorrected bits= 0x%lx\n", allErrors);
+
+		branch = extract_fbdchan_indx(info->ferr_nf_fbd);
+
+		channel = 0;
+		if (REC_ECC_LOCATOR_ODD(info->redmemb))
+			channel = 1;
+
+		/* Convert channel to be based from zero, instead of
+		 * from branch base of 0 */
+		channel += branch;
+
+		bank = rec_bank(info);
+		rank = rec_rank(info);
+		rdwr = rec_rdwr(info);
+		ras = rec_ras(info);
+		cas = rec_cas(info);
+
+		/* Only 1 bit will be on */
+		errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
+
+		debugf0("\t\tCSROW= %d Channel= %d  (Branch %d "
+			"DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
+			rank, channel, branch >> 1, bank,
+			rdwr_str(rdwr), ras, cas);
+
+		/* Form out message */
+		snprintf(msg, sizeof(msg),
+			 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s "
+			 "RAS=%d CAS=%d, CE Err=0x%lx (%s))",
+			 branch >> 1, bank, rdwr_str(rdwr), ras, cas,
+			 allErrors, error_name[errnum]);
+
+		/* Call the helper to output message */
+		edac_mc_handle_fbd_ce(mci, rank, channel, msg);
+
+		return;
+	}
+
+	/* Miscelaneous errors */
+	errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
+
+	branch = extract_fbdchan_indx(info->ferr_nf_fbd);
+
+	i5400_mc_printk(mci, KERN_EMERG,
+			"Non-Fatal misc error (Branch=%d Err=%#lx (%s))",
+			branch >> 1, allErrors, error_name[errnum]);
+}
+
+/*
+ *	i5400_process_error_info	Process the error info that is
+ *	in the 'info' structure, previously retrieved from hardware
+ */
+static void i5400_process_error_info(struct mem_ctl_info *mci,
+				struct i5400_error_info *info)
+{	u32 allErrors;
+
+	/* First handle any fatal errors that occurred */
+	allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK);
+	i5400_proccess_non_recoverable_info(mci, info, allErrors);
+
+	/* now handle any non-fatal errors that occurred */
+	i5400_process_nonfatal_error_info(mci, info);
+}
+
+/*
+ *	i5400_clear_error	Retrieve any error from the hardware
+ *				but do NOT process that error.
+ *				Used for 'clearing' out of previous errors
+ *				Called by the Core module.
+ */
+static void i5400_clear_error(struct mem_ctl_info *mci)
+{
+	struct i5400_error_info info;
+
+	i5400_get_error_info(mci, &info);
+}
+
+/*
+ *	i5400_check_error	Retrieve and process errors reported by the
+ *				hardware. Called by the Core module.
+ */
+static void i5400_check_error(struct mem_ctl_info *mci)
+{
+	struct i5400_error_info info;
+	debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
+	i5400_get_error_info(mci, &info);
+	i5400_process_error_info(mci, &info);
+}
+
+/*
+ *	i5400_put_devices	'put' all the devices that we have
+ *				reserved via 'get'
+ */
+static void i5400_put_devices(struct mem_ctl_info *mci)
+{
+	struct i5400_pvt *pvt;
+
+	pvt = mci->pvt_info;
+
+	/* Decrement usage count for devices */
+	pci_dev_put(pvt->branch_1);
+	pci_dev_put(pvt->branch_0);
+	pci_dev_put(pvt->fsb_error_regs);
+	pci_dev_put(pvt->branchmap_werrors);
+}
+
+/*
+ *	i5400_get_devices	Find and perform 'get' operation on the MCH's
+ *			device/functions we want to reference for this driver
+ *
+ *			Need to 'get' device 16 func 1 and func 2
+ */
+static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
+{
+	struct i5400_pvt *pvt;
+	struct pci_dev *pdev;
+
+	pvt = mci->pvt_info;
+	pvt->branchmap_werrors = NULL;
+	pvt->fsb_error_regs = NULL;
+	pvt->branch_0 = NULL;
+	pvt->branch_1 = NULL;
+
+	/* Attempt to 'get' the MCH register we want */
+	pdev = NULL;
+	while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) {
+		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+				      PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
+		if (!pdev) {
+			/* End of list, leave */
+			i5400_printk(KERN_ERR,
+				"'system address,Process Bus' "
+				"device not found:"
+				"vendor 0x%x device 0x%x ERR funcs "
+				"(broken BIOS?)\n",
+				PCI_VENDOR_ID_INTEL,
+				PCI_DEVICE_ID_INTEL_5400_ERR);
+			goto error;
+		}
+
+		/* Store device 16 funcs 1 and 2 */
+		switch (PCI_FUNC(pdev->devfn)) {
+		case 1:
+			pvt->branchmap_werrors = pdev;
+			break;
+		case 2:
+			pvt->fsb_error_regs = pdev;
+			break;
+		}
+	}
+
+	debugf1("System Address, processor bus- PCI Bus ID: %s  %x:%x\n",
+		pci_name(pvt->system_address),
+		pvt->system_address->vendor, pvt->system_address->device);
+	debugf1("Branchmap, control and errors - PCI Bus ID: %s  %x:%x\n",
+		pci_name(pvt->branchmap_werrors),
+		pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device);
+	debugf1("FSB Error Regs - PCI Bus ID: %s  %x:%x\n",
+		pci_name(pvt->fsb_error_regs),
+		pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
+
+	pvt->branch_0 = pci_get_device(PCI_VENDOR_ID_INTEL,
+				       PCI_DEVICE_ID_INTEL_5400_FBD0, NULL);
+	if (!pvt->branch_0) {
+		i5400_printk(KERN_ERR,
+			"MC: 'BRANCH 0' device not found:"
+			"vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
+			PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0);
+		goto error;
+	}
+
+	/* If this device claims to have more than 2 channels then
+	 * fetch Branch 1's information
+	 */
+	if (pvt->maxch < CHANNELS_PER_BRANCH)
+		return 0;
+
+	pvt->branch_1 = pci_get_device(PCI_VENDOR_ID_INTEL,
+				       PCI_DEVICE_ID_INTEL_5400_FBD1, NULL);
+	if (!pvt->branch_1) {
+		i5400_printk(KERN_ERR,
+			"MC: 'BRANCH 1' device not found:"
+			"vendor 0x%x device 0x%x Func 0 "
+			"(broken BIOS?)\n",
+			PCI_VENDOR_ID_INTEL,
+			PCI_DEVICE_ID_INTEL_5400_FBD1);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	i5400_put_devices(mci);
+	return -ENODEV;
+}
+
+/*
+ *	determine_amb_present
+ *
+ *		the information is contained in NUM_MTRS_PER_BRANCH different
+ *		registers determining which of the NUM_MTRS_PER_BRANCH requires
+ *              knowing which channel is in question
+ *
+ *	2 branches, each with 2 channels
+ *		b0_ambpresent0 for channel '0'
+ *		b0_ambpresent1 for channel '1'
+ *		b1_ambpresent0 for channel '2'
+ *		b1_ambpresent1 for channel '3'
+ */
+static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel)
+{
+	int amb_present;
+
+	if (channel < CHANNELS_PER_BRANCH) {
+		if (channel & 0x1)
+			amb_present = pvt->b0_ambpresent1;
+		else
+			amb_present = pvt->b0_ambpresent0;
+	} else {
+		if (channel & 0x1)
+			amb_present = pvt->b1_ambpresent1;
+		else
+			amb_present = pvt->b1_ambpresent0;
+	}
+
+	return amb_present;
+}
+
+/*
+ * determine_mtr(pvt, csrow, channel)
+ *
+ * return the proper MTR register as determine by the csrow and desired channel
+ */
+static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
+{
+	int mtr;
+	int n;
+
+	/* There is one MTR for each slot pair of FB-DIMMs,
+	   Each slot may have one or two ranks (2 csrows),
+	   Each slot pair may be at branch 0 or branch 1.
+	   So, csrow should be divided by eight
+	 */
+	n = csrow >> 3;
+
+	if (n >= NUM_MTRS_PER_BRANCH) {
+		debugf0("ERROR: trying to access an invalid csrow: %d\n",
+			csrow);
+		return 0;
+	}
+
+	if (channel < CHANNELS_PER_BRANCH)
+		mtr = pvt->b0_mtr[n];
+	else
+		mtr = pvt->b1_mtr[n];
+
+	return mtr;
+}
+
+/*
+ */
+static void decode_mtr(int slot_row, u16 mtr)
+{
+	int ans;
+
+	ans = MTR_DIMMS_PRESENT(mtr);
+
+	debugf2("\tMTR%d=0x%x:  DIMMs are %s\n", slot_row, mtr,
+		ans ? "Present" : "NOT Present");
+	if (!ans)
+		return;
+
+	debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
+
+	debugf2("\t\tELECTRICAL THROTTLING is %s\n",
+		MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
+
+	debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
+	debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single");
+	debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);
+	debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
+}
+
+static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel,
+			struct i5400_dimm_info *dinfo)
+{
+	int mtr;
+	int amb_present_reg;
+	int addrBits;
+
+	mtr = determine_mtr(pvt, csrow, channel);
+	if (MTR_DIMMS_PRESENT(mtr)) {
+		amb_present_reg = determine_amb_present_reg(pvt, channel);
+
+		/* Determine if there is a DIMM present in this DIMM slot */
+		if (amb_present_reg & (1 << (csrow >> 1))) {
+			dinfo->dual_rank = MTR_DIMM_RANK(mtr);
+
+			if (!((dinfo->dual_rank == 0) &&
+				((csrow & 0x1) == 0x1))) {
+				/* Start with the number of bits for a Bank
+				 * on the DRAM */
+				addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
+				/* Add thenumber of ROW bits */
+				addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
+				/* add the number of COLUMN bits */
+				addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
+
+				addrBits += 6;	/* add 64 bits per DIMM */
+				addrBits -= 20;	/* divide by 2^^20 */
+				addrBits -= 3;	/* 8 bits per bytes */
+
+				dinfo->megabytes = 1 << addrBits;
+			}
+		}
+	}
+}
+
+/*
+ *	calculate_dimm_size
+ *
+ *	also will output a DIMM matrix map, if debug is enabled, for viewing
+ *	how the DIMMs are populated
+ */
+static void calculate_dimm_size(struct i5400_pvt *pvt)
+{
+	struct i5400_dimm_info *dinfo;
+	int csrow, max_csrows;
+	char *p, *mem_buffer;
+	int space, n;
+	int channel;
+
+	/* ================= Generate some debug output ================= */
+	space = PAGE_SIZE;
+	mem_buffer = p = kmalloc(space, GFP_KERNEL);
+	if (p == NULL) {
+		i5400_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n",
+			__FILE__, __func__);
+		return;
+	}
+
+	/* Scan all the actual CSROWS (which is # of DIMMS * 2)
+	 * and calculate the information for each DIMM
+	 * Start with the highest csrow first, to display it first
+	 * and work toward the 0th csrow
+	 */
+	max_csrows = pvt->maxdimmperch * 2;
+	for (csrow = max_csrows - 1; csrow >= 0; csrow--) {
+
+		/* on an odd csrow, first output a 'boundary' marker,
+		 * then reset the message buffer  */
+		if (csrow & 0x1) {
+			n = snprintf(p, space, "---------------------------"
+					"--------------------------------");
+			p += n;
+			space -= n;
+			debugf2("%s\n", mem_buffer);
+			p = mem_buffer;
+			space = PAGE_SIZE;
+		}
+		n = snprintf(p, space, "csrow %2d    ", csrow);
+		p += n;
+		space -= n;
+
+		for (channel = 0; channel < pvt->maxch; channel++) {
+			dinfo = &pvt->dimm_info[csrow][channel];
+			handle_channel(pvt, csrow, channel, dinfo);
+			n = snprintf(p, space, "%4d MB   | ", dinfo->megabytes);
+			p += n;
+			space -= n;
+		}
+		debugf2("%s\n", mem_buffer);
+		p = mem_buffer;
+		space = PAGE_SIZE;
+	}
+
+	/* Output the last bottom 'boundary' marker */
+	n = snprintf(p, space, "---------------------------"
+			"--------------------------------");
+	p += n;
+	space -= n;
+	debugf2("%s\n", mem_buffer);
+	p = mem_buffer;
+	space = PAGE_SIZE;
+
+	/* now output the 'channel' labels */
+	n = snprintf(p, space, "            ");
+	p += n;
+	space -= n;
+	for (channel = 0; channel < pvt->maxch; channel++) {
+		n = snprintf(p, space, "channel %d | ", channel);
+		p += n;
+		space -= n;
+	}
+
+	/* output the last message and free buffer */
+	debugf2("%s\n", mem_buffer);
+	kfree(mem_buffer);
+}
+
+/*
+ *	i5400_get_mc_regs	read in the necessary registers and
+ *				cache locally
+ *
+ *			Fills in the private data members
+ */
+static void i5400_get_mc_regs(struct mem_ctl_info *mci)
+{
+	struct i5400_pvt *pvt;
+	u32 actual_tolm;
+	u16 limit;
+	int slot_row;
+	int maxch;
+	int maxdimmperch;
+	int way0, way1;
+
+	pvt = mci->pvt_info;
+
+	pci_read_config_dword(pvt->system_address, AMBASE,
+			(u32 *) &pvt->ambase);
+	pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32),
+			((u32 *) &pvt->ambase) + sizeof(u32));
+
+	maxdimmperch = pvt->maxdimmperch;
+	maxch = pvt->maxch;
+
+	debugf2("AMBASE= 0x%lx  MAXCH= %d  MAX-DIMM-Per-CH= %d\n",
+		(long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch);
+
+	/* Get the Branch Map regs */
+	pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm);
+	pvt->tolm >>= 12;
+	debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm,
+		pvt->tolm);
+
+	actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
+	debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
+		actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
+
+	pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0);
+	pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1);
+
+	/* Get the MIR[0-1] regs */
+	limit = (pvt->mir0 >> 4) & 0x0fff;
+	way0 = pvt->mir0 & 0x1;
+	way1 = pvt->mir0 & 0x2;
+	debugf2("MIR0: limit= 0x%x  WAY1= %u  WAY0= %x\n", limit, way1, way0);
+	limit = (pvt->mir1 >> 4) & 0xfff;
+	way0 = pvt->mir1 & 0x1;
+	way1 = pvt->mir1 & 0x2;
+	debugf2("MIR1: limit= 0x%x  WAY1= %u  WAY0= %x\n", limit, way1, way0);
+
+	/* Get the set of MTR[0-3] regs by each branch */
+	for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) {
+		int where = MTR0 + (slot_row * sizeof(u32));
+
+		/* Branch 0 set of MTR registers */
+		pci_read_config_word(pvt->branch_0, where,
+				&pvt->b0_mtr[slot_row]);
+
+		debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where,
+			pvt->b0_mtr[slot_row]);
+
+		if (pvt->maxch < CHANNELS_PER_BRANCH) {
+			pvt->b1_mtr[slot_row] = 0;
+			continue;
+		}
+
+		/* Branch 1 set of MTR registers */
+		pci_read_config_word(pvt->branch_1, where,
+				&pvt->b1_mtr[slot_row]);
+		debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, where,
+			pvt->b1_mtr[slot_row]);
+	}
+
+	/* Read and dump branch 0's MTRs */
+	debugf2("\nMemory Technology Registers:\n");
+	debugf2("   Branch 0:\n");
+	for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++)
+		decode_mtr(slot_row, pvt->b0_mtr[slot_row]);
+
+	pci_read_config_word(pvt->branch_0, AMBPRESENT_0,
+			&pvt->b0_ambpresent0);
+	debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0);
+	pci_read_config_word(pvt->branch_0, AMBPRESENT_1,
+			&pvt->b0_ambpresent1);
+	debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
+
+	/* Only if we have 2 branchs (4 channels) */
+	if (pvt->maxch < CHANNELS_PER_BRANCH) {
+		pvt->b1_ambpresent0 = 0;
+		pvt->b1_ambpresent1 = 0;
+	} else {
+		/* Read and dump  branch 1's MTRs */
+		debugf2("   Branch 1:\n");
+		for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++)
+			decode_mtr(slot_row, pvt->b1_mtr[slot_row]);
+
+		pci_read_config_word(pvt->branch_1, AMBPRESENT_0,
+				&pvt->b1_ambpresent0);
+		debugf2("\t\tAMB-Branch 1-present0 0x%x:\n",
+			pvt->b1_ambpresent0);
+		pci_read_config_word(pvt->branch_1, AMBPRESENT_1,
+				&pvt->b1_ambpresent1);
+		debugf2("\t\tAMB-Branch 1-present1 0x%x:\n",
+			pvt->b1_ambpresent1);
+	}
+
+	/* Go and determine the size of each DIMM and place in an
+	 * orderly matrix */
+	calculate_dimm_size(pvt);
+}
+
+/*
+ *	i5400_init_csrows	Initialize the 'csrows' table within
+ *				the mci control	structure with the
+ *				addressing of memory.
+ *
+ *	return:
+ *		0	success
+ *		1	no actual memory found on this MC
+ */
+static int i5400_init_csrows(struct mem_ctl_info *mci)
+{
+	struct i5400_pvt *pvt;
+	struct csrow_info *p_csrow;
+	int empty, channel_count;
+	int max_csrows;
+	int mtr;
+	int csrow_megs;
+	int channel;
+	int csrow;
+
+	pvt = mci->pvt_info;
+
+	channel_count = pvt->maxch;
+	max_csrows = pvt->maxdimmperch * 2;
+
+	empty = 1;		/* Assume NO memory */
+
+	for (csrow = 0; csrow < max_csrows; csrow++) {
+		p_csrow = &mci->csrows[csrow];
+
+		p_csrow->csrow_idx = csrow;
+
+		/* use branch 0 for the basis */
+		mtr = determine_mtr(pvt, csrow, 0);
+
+		/* if no DIMMS on this row, continue */
+		if (!MTR_DIMMS_PRESENT(mtr))
+			continue;
+
+		/* FAKE OUT VALUES, FIXME */
+		p_csrow->first_page = 0 + csrow * 20;
+		p_csrow->last_page = 9 + csrow * 20;
+		p_csrow->page_mask = 0xFFF;
+
+		p_csrow->grain = 8;
+
+		csrow_megs = 0;
+		for (channel = 0; channel < pvt->maxch; channel++)
+			csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
+
+		p_csrow->nr_pages = csrow_megs << 8;
+
+		/* Assume DDR2 for now */
+		p_csrow->mtype = MEM_FB_DDR2;
+
+		/* ask what device type on this row */
+		if (MTR_DRAM_WIDTH(mtr))
+			p_csrow->dtype = DEV_X8;
+		else
+			p_csrow->dtype = DEV_X4;
+
+		p_csrow->edac_mode = EDAC_S8ECD8ED;
+
+		empty = 0;
+	}
+
+	return empty;
+}
+
+/*
+ *	i5400_enable_error_reporting
+ *			Turn on the memory reporting features of the hardware
+ */
+static void i5400_enable_error_reporting(struct mem_ctl_info *mci)
+{
+	struct i5400_pvt *pvt;
+	u32 fbd_error_mask;
+
+	pvt = mci->pvt_info;
+
+	/* Read the FBD Error Mask Register */
+	pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD,
+			&fbd_error_mask);
+
+	/* Enable with a '0' */
+	fbd_error_mask &= ~(ENABLE_EMASK_ALL);
+
+	pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD,
+			fbd_error_mask);
+}
+
+/*
+ * i5400_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels)
+ *
+ *	ask the device how many channels are present and how many CSROWS
+ *	 as well
+ */
+static void i5400_get_dimm_and_channel_counts(struct pci_dev *pdev,
+					int *num_dimms_per_channel,
+					int *num_channels)
+{
+	u8 value;
+
+	/* Need to retrieve just how many channels and dimms per channel are
+	 * supported on this memory controller
+	 */
+	pci_read_config_byte(pdev, MAXDIMMPERCH, &value);
+	*num_dimms_per_channel = (int)value * 2;
+
+	pci_read_config_byte(pdev, MAXCH, &value);
+	*num_channels = (int)value;
+}
+
+/*
+ *	i5400_probe1	Probe for ONE instance of device to see if it is
+ *			present.
+ *	return:
+ *		0 for FOUND a device
+ *		< 0 for error code
+ */
+static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
+{
+	struct mem_ctl_info *mci;
+	struct i5400_pvt *pvt;
+	int num_channels;
+	int num_dimms_per_channel;
+	int num_csrows;
+
+	if (dev_idx >= ARRAY_SIZE(i5400_devs))
+		return -EINVAL;
+
+	debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
+		__func__,
+		pdev->bus->number,
+		PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+	/* We only are looking for func 0 of the set */
+	if (PCI_FUNC(pdev->devfn) != 0)
+		return -ENODEV;
+
+	/* Ask the devices for the number of CSROWS and CHANNELS so
+	 * that we can calculate the memory resources, etc
+	 *
+	 * The Chipset will report what it can handle which will be greater
+	 * or equal to what the motherboard manufacturer will implement.
+	 *
+	 * As we don't have a motherboard identification routine to determine
+	 * actual number of slots/dimms per channel, we thus utilize the
+	 * resource as specified by the chipset. Thus, we might have
+	 * have more DIMMs per channel than actually on the mobo, but this
+	 * allows the driver to support upto the chipset max, without
+	 * some fancy mobo determination.
+	 */
+	i5400_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel,
+					&num_channels);
+	num_csrows = num_dimms_per_channel * 2;
+
+	debugf0("MC: %s(): Number of - Channels= %d  DIMMS= %d  CSROWS= %d\n",
+		__func__, num_channels, num_dimms_per_channel, num_csrows);
+
+	/* allocate a new MC control structure */
+	mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
+
+	if (mci == NULL)
+		return -ENOMEM;
+
+	debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
+
+	mci->dev = &pdev->dev;	/* record ptr  to the generic device */
+
+	pvt = mci->pvt_info;
+	pvt->system_address = pdev;	/* Record this device in our private */
+	pvt->maxch = num_channels;
+	pvt->maxdimmperch = num_dimms_per_channel;
+
+	/* 'get' the pci devices we want to reserve for our use */
+	if (i5400_get_devices(mci, dev_idx))
+		goto fail0;
+
+	/* Time to get serious */
+	i5400_get_mc_regs(mci);	/* retrieve the hardware registers */
+
+	mci->mc_idx = 0;
+	mci->mtype_cap = MEM_FLAG_FB_DDR2;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE;
+	mci->edac_cap = EDAC_FLAG_NONE;
+	mci->mod_name = "i5400_edac.c";
+	mci->mod_ver = I5400_REVISION;
+	mci->ctl_name = i5400_devs[dev_idx].ctl_name;
+	mci->dev_name = pci_name(pdev);
+	mci->ctl_page_to_phys = NULL;
+
+	/* Set the function pointer to an actual operation function */
+	mci->edac_check = i5400_check_error;
+
+	/* initialize the MC control structure 'csrows' table
+	 * with the mapping and control information */
+	if (i5400_init_csrows(mci)) {
+		debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
+			"    because i5400_init_csrows() returned nonzero "
+			"value\n");
+		mci->edac_cap = EDAC_FLAG_NONE;	/* no csrows found */
+	} else {
+		debugf1("MC: Enable error reporting now\n");
+		i5400_enable_error_reporting(mci);
+	}
+
+	/* add this new MC control structure to EDAC's list of MCs */
+	if (edac_mc_add_mc(mci)) {
+		debugf0("MC: " __FILE__
+			": %s(): failed edac_mc_add_mc()\n", __func__);
+		/* FIXME: perhaps some code should go here that disables error
+		 * reporting if we just enabled it
+		 */
+		goto fail1;
+	}
+
+	i5400_clear_error(mci);
+
+	/* allocating generic PCI control info */
+	i5400_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
+	if (!i5400_pci) {
+		printk(KERN_WARNING
+			"%s(): Unable to create PCI control\n",
+			__func__);
+		printk(KERN_WARNING
+			"%s(): PCI error report via EDAC not setup\n",
+			__func__);
+	}
+
+	return 0;
+
+	/* Error exit unwinding stack */
+fail1:
+
+	i5400_put_devices(mci);
+
+fail0:
+	edac_mc_free(mci);
+	return -ENODEV;
+}
+
+/*
+ *	i5400_init_one	constructor for one instance of device
+ *
+ * 	returns:
+ *		negative on error
+ *		count (>= 0)
+ */
+static int __devinit i5400_init_one(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	int rc;
+
+	debugf0("MC: " __FILE__ ": %s()\n", __func__);
+
+	/* wake up device */
+	rc = pci_enable_device(pdev);
+	if (rc == -EIO)
+		return rc;
+
+	/* now probe and enable the device */
+	return i5400_probe1(pdev, id->driver_data);
+}
+
+/*
+ *	i5400_remove_one	destructor for one instance of device
+ *
+ */
+static void __devexit i5400_remove_one(struct pci_dev *pdev)
+{
+	struct mem_ctl_info *mci;
+
+	debugf0(__FILE__ ": %s()\n", __func__);
+
+	if (i5400_pci)
+		edac_pci_release_generic_ctl(i5400_pci);
+
+	mci = edac_mc_del_mc(&pdev->dev);
+	if (!mci)
+		return;
+
+	/* retrieve references to resources, and free those resources */
+	i5400_put_devices(mci);
+
+	edac_mc_free(mci);
+}
+
+/*
+ *	pci_device_id	table for which devices we are looking for
+ *
+ *	The "E500P" device is the first device supported.
+ */
+static const struct pci_device_id i5400_pci_tbl[] __devinitdata = {
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)},
+	{0,}			/* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, i5400_pci_tbl);
+
+/*
+ *	i5400_driver	pci_driver structure for this module
+ *
+ */
+static struct pci_driver i5400_driver = {
+	.name = "i5400_edac",
+	.probe = i5400_init_one,
+	.remove = __devexit_p(i5400_remove_one),
+	.id_table = i5400_pci_tbl,
+};
+
+/*
+ *	i5400_init		Module entry function
+ *			Try to initialize this module for its devices
+ */
+static int __init i5400_init(void)
+{
+	int pci_rc;
+
+	debugf2("MC: " __FILE__ ": %s()\n", __func__);
+
+	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
+	opstate_init();
+
+	pci_rc = pci_register_driver(&i5400_driver);
+
+	return (pci_rc < 0) ? pci_rc : 0;
+}
+
+/*
+ *	i5400_exit()	Module exit function
+ *			Unregister the driver
+ */
+static void __exit i5400_exit(void)
+{
+	debugf2("MC: " __FILE__ ": %s()\n", __func__);
+	pci_unregister_driver(&i5400_driver);
+}
+
+module_init(i5400_init);
+module_exit(i5400_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
+MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - "
+		   I5400_REVISION);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c
index ebb037b..b2d83b9 100644
--- a/drivers/edac/i82875p_edac.c
+++ b/drivers/edac/i82875p_edac.c
@@ -311,9 +311,7 @@
 	}
 
 	/* cache is irrelevant for PCI bus reads/writes */
-	window = ioremap_nocache(pci_resource_start(dev, 0),
-				 pci_resource_len(dev, 0));
-
+	window = pci_ioremap_bar(dev, 0);
 	if (window == NULL) {
 		i82875p_printk(KERN_ERR, "%s(): Failed to ioremap bar6\n",
 			__func__);
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 0cfcb2d..853ef37 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -630,27 +630,22 @@
 }
 
 static struct of_device_id mpc85xx_l2_err_of_match[] = {
-	{
-	 .compatible = "fsl,8540-l2-cache-controller",
-	 },
-	{
-	 .compatible = "fsl,8541-l2-cache-controller",
-	 },
-	{
-	 .compatible = "fsl,8544-l2-cache-controller",
-	 },
-	{
-	 .compatible = "fsl,8548-l2-cache-controller",
-	 },
-	{
-	 .compatible = "fsl,8555-l2-cache-controller",
-	 },
-	{
-	 .compatible = "fsl,8568-l2-cache-controller",
-	 },
-	{
-	 .compatible = "fsl,mpc8572-l2-cache-controller",
-	 },
+/* deprecate the fsl,85.. forms in the future, 2.6.30? */
+	{ .compatible = "fsl,8540-l2-cache-controller", },
+	{ .compatible = "fsl,8541-l2-cache-controller", },
+	{ .compatible = "fsl,8544-l2-cache-controller", },
+	{ .compatible = "fsl,8548-l2-cache-controller", },
+	{ .compatible = "fsl,8555-l2-cache-controller", },
+	{ .compatible = "fsl,8568-l2-cache-controller", },
+	{ .compatible = "fsl,mpc8536-l2-cache-controller", },
+	{ .compatible = "fsl,mpc8540-l2-cache-controller", },
+	{ .compatible = "fsl,mpc8541-l2-cache-controller", },
+	{ .compatible = "fsl,mpc8544-l2-cache-controller", },
+	{ .compatible = "fsl,mpc8548-l2-cache-controller", },
+	{ .compatible = "fsl,mpc8555-l2-cache-controller", },
+	{ .compatible = "fsl,mpc8560-l2-cache-controller", },
+	{ .compatible = "fsl,mpc8568-l2-cache-controller", },
+	{ .compatible = "fsl,mpc8572-l2-cache-controller", },
 	{},
 };
 
@@ -967,27 +962,22 @@
 }
 
 static struct of_device_id mpc85xx_mc_err_of_match[] = {
-	{
-	 .compatible = "fsl,8540-memory-controller",
-	 },
-	{
-	 .compatible = "fsl,8541-memory-controller",
-	 },
-	{
-	 .compatible = "fsl,8544-memory-controller",
-	 },
-	{
-	 .compatible = "fsl,8548-memory-controller",
-	 },
-	{
-	 .compatible = "fsl,8555-memory-controller",
-	 },
-	{
-	 .compatible = "fsl,8568-memory-controller",
-	 },
-	{
-	 .compatible = "fsl,mpc8572-memory-controller",
-	 },
+/* deprecate the fsl,85.. forms in the future, 2.6.30? */
+	{ .compatible = "fsl,8540-memory-controller", },
+	{ .compatible = "fsl,8541-memory-controller", },
+	{ .compatible = "fsl,8544-memory-controller", },
+	{ .compatible = "fsl,8548-memory-controller", },
+	{ .compatible = "fsl,8555-memory-controller", },
+	{ .compatible = "fsl,8568-memory-controller", },
+	{ .compatible = "fsl,mpc8536-memory-controller", },
+	{ .compatible = "fsl,mpc8540-memory-controller", },
+	{ .compatible = "fsl,mpc8541-memory-controller", },
+	{ .compatible = "fsl,mpc8544-memory-controller", },
+	{ .compatible = "fsl,mpc8548-memory-controller", },
+	{ .compatible = "fsl,mpc8555-memory-controller", },
+	{ .compatible = "fsl,mpc8560-memory-controller", },
+	{ .compatible = "fsl,mpc8568-memory-controller", },
+	{ .compatible = "fsl,mpc8572-memory-controller", },
 	{},
 };
 
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 78b989d..d76adfe 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -468,8 +468,8 @@
 EXPORT_SYMBOL(dmi_get_system_info);
 
 /**
- *	dmi_name_in_serial - 	Check if string is in the DMI product serial
- *				information.
+ * dmi_name_in_serial - Check if string is in the DMI product serial information
+ * @str: string to check for
  */
 int dmi_name_in_serial(const char *str)
 {
@@ -585,6 +585,8 @@
 
 /**
  * dmi_match - compare a string to the dmi field (if exists)
+ * @f: DMI field identifier
+ * @str: string to compare the DMI field to
  *
  * Returns true if the requested field equals to the str (including NULL).
  */
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 48f49d9..3d25654 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -95,7 +95,7 @@
 	  number for these GPIOs.
 
 config GPIO_PCA953X
-	tristate "PCA953x, PCA955x, and MAX7310 I/O ports"
+	tristate "PCA953x, PCA955x, TCA64xx, and MAX7310 I/O ports"
 	depends on I2C
 	help
 	  Say yes here to provide access to several register-oriented
@@ -104,9 +104,10 @@
 
 	  4 bits:	pca9536, pca9537
 
-	  8 bits:	max7310, pca9534, pca9538, pca9554, pca9557
+	  8 bits:	max7310, pca9534, pca9538, pca9554, pca9557,
+	  		tca6408
 
-	  16 bits:	pca9535, pca9539, pca9555
+	  16 bits:	pca9535, pca9539, pca9555, tca6416
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called pca953x.
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 9ceeb89..37f35388 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -33,7 +33,12 @@
 	{ "pca9554", 8, },
 	{ "pca9555", 16, },
 	{ "pca9557", 8, },
+
 	{ "max7310", 8, },
+	{ "pca6107", 8, },
+	{ "tca6408", 8, },
+	{ "tca6416", 16, },
+	/* NYET:  { "tca6424", 24, }, */
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, pca953x_id);
@@ -47,9 +52,6 @@
 	struct gpio_chip gpio_chip;
 };
 
-/* NOTE:  we can't currently rely on fault codes to come from SMBus
- * calls, so we map all errors to EIO here and return zero otherwise.
- */
 static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val)
 {
 	int ret;
@@ -61,7 +63,7 @@
 
 	if (ret < 0) {
 		dev_err(&chip->client->dev, "failed writing register\n");
-		return -EIO;
+		return ret;
 	}
 
 	return 0;
@@ -78,7 +80,7 @@
 
 	if (ret < 0) {
 		dev_err(&chip->client->dev, "failed reading register\n");
-		return -EIO;
+		return ret;
 	}
 
 	*val = (uint16_t)ret;
diff --git a/drivers/gpio/twl4030-gpio.c b/drivers/gpio/twl4030-gpio.c
index 37d3eec..afad147 100644
--- a/drivers/gpio/twl4030-gpio.c
+++ b/drivers/gpio/twl4030-gpio.c
@@ -202,37 +202,6 @@
 	return ret;
 }
 
-/*
- * Configure debounce timing value for a GPIO pin on TWL4030
- */
-int twl4030_set_gpio_debounce(int gpio, int enable)
-{
-	u8 d_bnk = gpio >> 3;
-	u8 d_msk = BIT(gpio & 0x7);
-	u8 reg = 0;
-	u8 base = 0;
-	int ret = 0;
-
-	if (unlikely((gpio >= TWL4030_GPIO_MAX)
-		|| !(gpio_usage_count & BIT(gpio))))
-		return -EPERM;
-
-	base = REG_GPIO_DEBEN1 + d_bnk;
-	mutex_lock(&gpio_lock);
-	ret = gpio_twl4030_read(base);
-	if (ret >= 0) {
-		if (enable)
-			reg = ret | d_msk;
-		else
-			reg = ret & ~d_msk;
-
-		ret = gpio_twl4030_write(base, reg);
-	}
-	mutex_unlock(&gpio_lock);
-	return ret;
-}
-EXPORT_SYMBOL(twl4030_set_gpio_debounce);
-
 /*----------------------------------------------------------------------*/
 
 static int twl_request(struct gpio_chip *chip, unsigned offset)
@@ -405,6 +374,23 @@
 				REG_GPIOPUPDCTR1, 5);
 }
 
+static int __devinit gpio_twl4030_debounce(u32 debounce, u8 mmc_cd)
+{
+	u8		message[4];
+
+	/* 30 msec of debouncing is always used for MMC card detect,
+	 * and is optional for everything else.
+	 */
+	message[1] = (debounce & 0xff) | (mmc_cd & 0x03);
+	debounce >>= 8;
+	message[2] = (debounce & 0xff);
+	debounce >>= 8;
+	message[3] = (debounce & 0x03);
+
+	return twl4030_i2c_write(TWL4030_MODULE_GPIO, message,
+				REG_GPIO_DEBEN1, 3);
+}
+
 static int gpio_twl4030_remove(struct platform_device *pdev);
 
 static int __devinit gpio_twl4030_probe(struct platform_device *pdev)
@@ -439,6 +425,12 @@
 				pdata->pullups, pdata->pulldowns,
 				ret);
 
+	ret = gpio_twl4030_debounce(pdata->debounce, pdata->mmc_cd);
+	if (ret)
+		dev_dbg(&pdev->dev, "debounce %.03x %.01x --> %d\n",
+				pdata->debounce, pdata->mmc_cd,
+				ret);
+
 	twl_gpiochip.base = pdata->gpio_base;
 	twl_gpiochip.ngpio = TWL4030_GPIO_MAX;
 	twl_gpiochip.dev = &pdev->dev;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 3733e36..b06a537 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -183,6 +183,10 @@
 
 	old_fops = filp->f_op;
 	filp->f_op = fops_get(&dev->driver->fops);
+	if (filp->f_op == NULL) {
+		filp->f_op = old_fops;
+		goto out;
+	}
 	if (filp->f_op->open && (err = filp->f_op->open(inode, filp))) {
 		fops_put(filp->f_op);
 		filp->f_op = fops_get(old_fops);
diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c
index 66107b4..1852f27 100644
--- a/drivers/hwmon/adt7462.c
+++ b/drivers/hwmon/adt7462.c
@@ -204,8 +204,6 @@
 #define MASK_AND_SHIFT(value, prefix)	\
 	(((value) & prefix##_MASK) >> prefix##_SHIFT)
 
-#define ROUND_DIV(x, divisor)  (((x) + ((divisor) / 2)) / (divisor))
-
 struct adt7462_data {
 	struct device		*hwmon_dev;
 	struct attribute_group	attrs;
@@ -840,7 +838,7 @@
 	if (strict_strtol(buf, 10, &temp) || !temp_enabled(data, attr->index))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000) + 64;
+	temp = DIV_ROUND_CLOSEST(temp, 1000) + 64;
 	temp = SENSORS_LIMIT(temp, 0, 255);
 
 	mutex_lock(&data->lock);
@@ -878,7 +876,7 @@
 	if (strict_strtol(buf, 10, &temp) || !temp_enabled(data, attr->index))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000) + 64;
+	temp = DIV_ROUND_CLOSEST(temp, 1000) + 64;
 	temp = SENSORS_LIMIT(temp, 0, 255);
 
 	mutex_lock(&data->lock);
@@ -943,7 +941,7 @@
 		return -EINVAL;
 
 	temp *= 1000; /* convert mV to uV */
-	temp = ROUND_DIV(temp, x);
+	temp = DIV_ROUND_CLOSEST(temp, x);
 	temp = SENSORS_LIMIT(temp, 0, 255);
 
 	mutex_lock(&data->lock);
@@ -985,7 +983,7 @@
 		return -EINVAL;
 
 	temp *= 1000; /* convert mV to uV */
-	temp = ROUND_DIV(temp, x);
+	temp = DIV_ROUND_CLOSEST(temp, x);
 	temp = SENSORS_LIMIT(temp, 0, 255);
 
 	mutex_lock(&data->lock);
@@ -1250,7 +1248,7 @@
 	if (strict_strtol(buf, 10, &temp))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000);
+	temp = DIV_ROUND_CLOSEST(temp, 1000);
 	temp = SENSORS_LIMIT(temp, 0, 15);
 
 	/* package things up */
@@ -1337,7 +1335,7 @@
 	if (strict_strtol(buf, 10, &temp))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000) + 64;
+	temp = DIV_ROUND_CLOSEST(temp, 1000) + 64;
 	temp = SENSORS_LIMIT(temp, 0, 255);
 
 	mutex_lock(&data->lock);
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index 1311a59..633e1a1 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -28,6 +28,7 @@
 #include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/log2.h>
+#include <linux/kthread.h>
 
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END };
@@ -74,6 +75,7 @@
 #define ADT7470_REG_PWM12_CFG			0x68
 #define		ADT7470_PWM2_AUTO_MASK		0x40
 #define		ADT7470_PWM1_AUTO_MASK		0x80
+#define		ADT7470_PWM_AUTO_MASK		0xC0
 #define ADT7470_REG_PWM34_CFG			0x69
 #define		ADT7470_PWM3_AUTO_MASK		0x40
 #define		ADT7470_PWM4_AUTO_MASK		0x80
@@ -128,8 +130,11 @@
 /* How often do we reread sensor limit values? (In jiffies) */
 #define LIMIT_REFRESH_INTERVAL	(60 * HZ)
 
-/* sleep 1s while gathering temperature data */
-#define TEMP_COLLECTION_TIME	1000
+/* Wait at least 200ms per sensor for 10 sensors */
+#define TEMP_COLLECTION_TIME	2000
+
+/* auto update thing won't fire more than every 2s */
+#define AUTO_UPDATE_INTERVAL	2000
 
 /* datasheet says to divide this number by the fan reading to get fan rpm */
 #define FAN_PERIOD_TO_RPM(x)	((90000 * 60) / (x))
@@ -137,8 +142,6 @@
 #define FAN_PERIOD_INVALID	65535
 #define FAN_DATA_VALID(x)	((x) && (x) != FAN_PERIOD_INVALID)
 
-#define ROUND_DIV(x, divisor)	(((x) + ((divisor) / 2)) / (divisor))
-
 struct adt7470_data {
 	struct device		*hwmon_dev;
 	struct attribute_group	attrs;
@@ -148,6 +151,9 @@
 	unsigned long		sensors_last_updated;	/* In jiffies */
 	unsigned long		limits_last_updated;	/* In jiffies */
 
+	int			num_temp_sensors;	/* -1 = probe */
+	int			temperatures_probed;
+
 	s8			temp[ADT7470_TEMP_COUNT];
 	s8			temp_min[ADT7470_TEMP_COUNT];
 	s8			temp_max[ADT7470_TEMP_COUNT];
@@ -163,6 +169,10 @@
 	u8			pwm_min[ADT7470_PWM_COUNT];
 	s8			pwm_tmin[ADT7470_PWM_COUNT];
 	u8			pwm_auto_temp[ADT7470_PWM_COUNT];
+
+	struct task_struct	*auto_update;
+	struct completion	auto_update_stop;
+	unsigned int		auto_update_interval;
 };
 
 static int adt7470_probe(struct i2c_client *client,
@@ -220,6 +230,88 @@
 	}
 }
 
+/* Probe for temperature sensors.  Assumes lock is held */
+static int adt7470_read_temperatures(struct i2c_client *client,
+				     struct adt7470_data *data)
+{
+	unsigned long res;
+	int i;
+	u8 cfg, pwm[4], pwm_cfg[2];
+
+	/* save pwm[1-4] config register */
+	pwm_cfg[0] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM_CFG(0));
+	pwm_cfg[1] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM_CFG(2));
+
+	/* set manual pwm to whatever it is set to now */
+	for (i = 0; i < ADT7470_FAN_COUNT; i++)
+		pwm[i] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM(i));
+
+	/* put pwm in manual mode */
+	i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(0),
+		pwm_cfg[0] & ~(ADT7470_PWM_AUTO_MASK));
+	i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(2),
+		pwm_cfg[1] & ~(ADT7470_PWM_AUTO_MASK));
+
+	/* write pwm control to whatever it was */
+	for (i = 0; i < ADT7470_FAN_COUNT; i++)
+		i2c_smbus_write_byte_data(client, ADT7470_REG_PWM(i), pwm[i]);
+
+	/* start reading temperature sensors */
+	cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
+	cfg |= 0x80;
+	i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg);
+
+	/* Delay is 200ms * number of temp sensors. */
+	res = msleep_interruptible((data->num_temp_sensors >= 0 ?
+				    data->num_temp_sensors * 200 :
+				    TEMP_COLLECTION_TIME));
+
+	/* done reading temperature sensors */
+	cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
+	cfg &= ~0x80;
+	i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg);
+
+	/* restore pwm[1-4] config registers */
+	i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(0), pwm_cfg[0]);
+	i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(2), pwm_cfg[1]);
+
+	if (res) {
+		printk(KERN_ERR "ha ha, interrupted");
+		return -EAGAIN;
+	}
+
+	/* Only count fans if we have to */
+	if (data->num_temp_sensors >= 0)
+		return 0;
+
+	for (i = 0; i < ADT7470_TEMP_COUNT; i++) {
+		data->temp[i] = i2c_smbus_read_byte_data(client,
+						ADT7470_TEMP_REG(i));
+		if (data->temp[i])
+			data->num_temp_sensors = i + 1;
+	}
+	data->temperatures_probed = 1;
+	return 0;
+}
+
+static int adt7470_update_thread(void *p)
+{
+	struct i2c_client *client = p;
+	struct adt7470_data *data = i2c_get_clientdata(client);
+
+	while (!kthread_should_stop()) {
+		mutex_lock(&data->lock);
+		adt7470_read_temperatures(client, data);
+		mutex_unlock(&data->lock);
+		if (kthread_should_stop())
+			break;
+		msleep_interruptible(data->auto_update_interval);
+	}
+
+	complete_all(&data->auto_update_stop);
+	return 0;
+}
+
 static struct adt7470_data *adt7470_update_device(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
@@ -227,32 +319,36 @@
 	unsigned long local_jiffies = jiffies;
 	u8 cfg;
 	int i;
-
-	mutex_lock(&data->lock);
-	if (time_before(local_jiffies, data->sensors_last_updated +
-		SENSOR_REFRESH_INTERVAL)
-		&& data->sensors_valid)
-		goto no_sensor_update;
-
-	/* start reading temperature sensors */
-	cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
-	cfg |= 0x80;
-	i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg);
+	int need_sensors = 1;
+	int need_limits = 1;
 
 	/*
-	 * Delay is 200ms * number of tmp05 sensors.  Too bad
-	 * there's no way to figure out how many are connected.
-	 * For now, assume 1s will work.
+	 * Figure out if we need to update the shadow registers.
+	 * Lockless means that we may occasionally report out of
+	 * date data.
 	 */
-	msleep(TEMP_COLLECTION_TIME);
+	if (time_before(local_jiffies, data->sensors_last_updated +
+			SENSOR_REFRESH_INTERVAL) &&
+	    data->sensors_valid)
+		need_sensors = 0;
 
-	/* done reading temperature sensors */
-	cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
-	cfg &= ~0x80;
-	i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg);
+	if (time_before(local_jiffies, data->limits_last_updated +
+			LIMIT_REFRESH_INTERVAL) &&
+	    data->limits_valid)
+		need_limits = 0;
 
-	for (i = 0; i < ADT7470_TEMP_COUNT; i++)
-		data->temp[i] = i2c_smbus_read_byte_data(client,
+	if (!need_sensors && !need_limits)
+		return data;
+
+	mutex_lock(&data->lock);
+	if (!need_sensors)
+		goto no_sensor_update;
+
+	if (!data->temperatures_probed)
+		adt7470_read_temperatures(client, data);
+	else
+		for (i = 0; i < ADT7470_TEMP_COUNT; i++)
+			data->temp[i] = i2c_smbus_read_byte_data(client,
 						ADT7470_TEMP_REG(i));
 
 	for (i = 0; i < ADT7470_FAN_COUNT; i++)
@@ -302,9 +398,7 @@
 	data->sensors_valid = 1;
 
 no_sensor_update:
-	if (time_before(local_jiffies, data->limits_last_updated +
-		LIMIT_REFRESH_INTERVAL)
-		&& data->limits_valid)
+	if (!need_limits)
 		goto out;
 
 	for (i = 0; i < ADT7470_TEMP_COUNT; i++) {
@@ -338,6 +432,66 @@
 	return data;
 }
 
+static ssize_t show_auto_update_interval(struct device *dev,
+					 struct device_attribute *devattr,
+					 char *buf)
+{
+	struct adt7470_data *data = adt7470_update_device(dev);
+	return sprintf(buf, "%d\n", data->auto_update_interval);
+}
+
+static ssize_t set_auto_update_interval(struct device *dev,
+					struct device_attribute *devattr,
+					const char *buf,
+					size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct adt7470_data *data = i2c_get_clientdata(client);
+	long temp;
+
+	if (strict_strtol(buf, 10, &temp))
+		return -EINVAL;
+
+	temp = SENSORS_LIMIT(temp, 0, 60000);
+
+	mutex_lock(&data->lock);
+	data->auto_update_interval = temp;
+	mutex_unlock(&data->lock);
+
+	return count;
+}
+
+static ssize_t show_num_temp_sensors(struct device *dev,
+				     struct device_attribute *devattr,
+				     char *buf)
+{
+	struct adt7470_data *data = adt7470_update_device(dev);
+	return sprintf(buf, "%d\n", data->num_temp_sensors);
+}
+
+static ssize_t set_num_temp_sensors(struct device *dev,
+				    struct device_attribute *devattr,
+				    const char *buf,
+				    size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct adt7470_data *data = i2c_get_clientdata(client);
+	long temp;
+
+	if (strict_strtol(buf, 10, &temp))
+		return -EINVAL;
+
+	temp = SENSORS_LIMIT(temp, -1, 10);
+
+	mutex_lock(&data->lock);
+	data->num_temp_sensors = temp;
+	if (temp < 0)
+		data->temperatures_probed = 0;
+	mutex_unlock(&data->lock);
+
+	return count;
+}
+
 static ssize_t show_temp_min(struct device *dev,
 			     struct device_attribute *devattr,
 			     char *buf)
@@ -360,7 +514,7 @@
 	if (strict_strtol(buf, 10, &temp))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000);
+	temp = DIV_ROUND_CLOSEST(temp, 1000);
 	temp = SENSORS_LIMIT(temp, 0, 255);
 
 	mutex_lock(&data->lock);
@@ -394,7 +548,7 @@
 	if (strict_strtol(buf, 10, &temp))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000);
+	temp = DIV_ROUND_CLOSEST(temp, 1000);
 	temp = SENSORS_LIMIT(temp, 0, 255);
 
 	mutex_lock(&data->lock);
@@ -671,7 +825,7 @@
 	if (strict_strtol(buf, 10, &temp))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000);
+	temp = DIV_ROUND_CLOSEST(temp, 1000);
 	temp = SENSORS_LIMIT(temp, 0, 255);
 
 	mutex_lock(&data->lock);
@@ -804,6 +958,10 @@
 }
 
 static DEVICE_ATTR(alarm_mask, S_IRUGO, show_alarm_mask, NULL);
+static DEVICE_ATTR(num_temp_sensors, S_IWUSR | S_IRUGO, show_num_temp_sensors,
+		   set_num_temp_sensors);
+static DEVICE_ATTR(auto_update_interval, S_IWUSR | S_IRUGO,
+		   show_auto_update_interval, set_auto_update_interval);
 
 static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp_max,
 		    set_temp_max, 0);
@@ -976,6 +1134,8 @@
 static struct attribute *adt7470_attr[] =
 {
 	&dev_attr_alarm_mask.attr,
+	&dev_attr_num_temp_sensors.attr,
+	&dev_attr_auto_update_interval.attr,
 	&sensor_dev_attr_temp1_max.dev_attr.attr,
 	&sensor_dev_attr_temp2_max.dev_attr.attr,
 	&sensor_dev_attr_temp3_max.dev_attr.attr,
@@ -1108,6 +1268,9 @@
 		goto exit;
 	}
 
+	data->num_temp_sensors = -1;
+	data->auto_update_interval = AUTO_UPDATE_INTERVAL;
+
 	i2c_set_clientdata(client, data);
 	mutex_init(&data->lock);
 
@@ -1127,8 +1290,16 @@
 		goto exit_remove;
 	}
 
+	init_completion(&data->auto_update_stop);
+	data->auto_update = kthread_run(adt7470_update_thread, client,
+					dev_name(data->hwmon_dev));
+	if (IS_ERR(data->auto_update))
+		goto exit_unregister;
+
 	return 0;
 
+exit_unregister:
+	hwmon_device_unregister(data->hwmon_dev);
 exit_remove:
 	sysfs_remove_group(&client->dev.kobj, &data->attrs);
 exit_free:
@@ -1141,6 +1312,8 @@
 {
 	struct adt7470_data *data = i2c_get_clientdata(client);
 
+	kthread_stop(data->auto_update);
+	wait_for_completion(&data->auto_update_stop);
 	hwmon_device_unregister(data->hwmon_dev);
 	sysfs_remove_group(&client->dev.kobj, &data->attrs);
 	kfree(data);
diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c
index 18aa308..0a6ce23 100644
--- a/drivers/hwmon/adt7473.c
+++ b/drivers/hwmon/adt7473.c
@@ -129,8 +129,6 @@
 #define FAN_PERIOD_INVALID	65535
 #define FAN_DATA_VALID(x)	((x) && (x) != FAN_PERIOD_INVALID)
 
-#define ROUND_DIV(x, divisor)	(((x) + ((divisor) / 2)) / (divisor))
-
 struct adt7473_data {
 	struct device		*hwmon_dev;
 	struct attribute_group	attrs;
@@ -459,7 +457,7 @@
 	if (strict_strtol(buf, 10, &temp))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000);
+	temp = DIV_ROUND_CLOSEST(temp, 1000);
 	temp = encode_temp(data->temp_twos_complement, temp);
 
 	mutex_lock(&data->lock);
@@ -495,7 +493,7 @@
 	if (strict_strtol(buf, 10, &temp))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000);
+	temp = DIV_ROUND_CLOSEST(temp, 1000);
 	temp = encode_temp(data->temp_twos_complement, temp);
 
 	mutex_lock(&data->lock);
@@ -720,7 +718,7 @@
 	if (strict_strtol(buf, 10, &temp))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000);
+	temp = DIV_ROUND_CLOSEST(temp, 1000);
 	temp = encode_temp(data->temp_twos_complement, temp);
 
 	mutex_lock(&data->lock);
@@ -756,7 +754,7 @@
 	if (strict_strtol(buf, 10, &temp))
 		return -EINVAL;
 
-	temp = ROUND_DIV(temp, 1000);
+	temp = DIV_ROUND_CLOSEST(temp, 1000);
 	temp = encode_temp(data->temp_twos_complement, temp);
 
 	mutex_lock(&data->lock);
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 086c2a5..dca47a5 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -131,6 +131,10 @@
 /* Set 14: iMac 6,1 */
 	{ "TA0P", "TC0D", "TC0H", "TC0P", "TG0D", "TG0H", "TG0P", "TH0P",
 	  "TO0P", "Tp0P", NULL },
+/* Set 15: MacBook Air 2,1 */
+	{ "TB0T", "TB1S", "TB1T", "TB2S", "TB2T", "TC0D", "TN0D", "TTF0",
+	  "TV0P", "TVFP", "TW0P", "Th0P", "Tp0P", "Tp1P", "TpFP", "Ts0P",
+	  "Ts0S", NULL },
 };
 
 /* List of keys used to read/write fan speeds */
@@ -1301,11 +1305,17 @@
 	{ .accelerometer = 0, .light = 0, .temperature_set = 13 },
 /* iMac 6: light sensor only, temperature set 14 */
 	{ .accelerometer = 0, .light = 0, .temperature_set = 14 },
+/* MacBook Air 2,1: accelerometer, backlight and temperature set 15 */
+	{ .accelerometer = 1, .light = 1, .temperature_set = 15 },
 };
 
 /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1".
  * So we need to put "Apple MacBook Pro" before "Apple MacBook". */
 static __initdata struct dmi_system_id applesmc_whitelist[] = {
+	{ applesmc_dmi_match, "Apple MacBook Air 2", {
+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir2") },
+		&applesmc_dmi_data[15]},
 	{ applesmc_dmi_match, "Apple MacBook Air", {
 	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
 	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") },
diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c
index 537d9fb..a363633 100644
--- a/drivers/hwmon/ibmpex.c
+++ b/drivers/hwmon/ibmpex.c
@@ -40,7 +40,7 @@
 
 static inline u16 extract_value(const char *data, int offset)
 {
-	return be16_to_cpup((u16 *)&data[offset]);
+	return be16_to_cpup((__be16 *)&data[offset]);
 }
 
 #define TEMP_SENSOR		1
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 4ee85fc..3f95038 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -511,6 +511,13 @@
 	  This allows the kernel to change PIO, DMA and UDMA speeds and to
 	  configure the chip to optimum performance.
 
+config BLK_DEV_IT8172
+	tristate "IT8172 IDE support"
+	select BLK_DEV_IDEDMA_PCI
+	help
+	  This driver adds support for the IDE controller on the
+	  IT8172 System Controller.
+
 config BLK_DEV_IT8213
 	tristate "IT8213 IDE support"
 	select BLK_DEV_IDEDMA_PCI
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 4107289..c2b9c93 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -47,6 +47,7 @@
 obj-$(CONFIG_BLK_DEV_CY82C693)		+= cy82c693.o
 obj-$(CONFIG_BLK_DEV_DELKIN)		+= delkin_cb.o
 obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
+obj-$(CONFIG_BLK_DEV_IT8172)		+= it8172.o
 obj-$(CONFIG_BLK_DEV_IT8213)		+= it8213.o
 obj-$(CONFIG_BLK_DEV_IT821X)		+= it821x.o
 obj-$(CONFIG_BLK_DEV_JMICRON)		+= jmicron.o
diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 4142c69..4485b9c 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -83,7 +83,7 @@
 
 static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
@@ -111,7 +111,7 @@
 
 static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 45d2356..66f4308 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -68,7 +68,7 @@
 
 static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
 	int s_time = t->setup, a_time = t->active, c_time = t->cycle;
@@ -150,7 +150,7 @@
 
 static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 speed1		= speed;
 	u8 unit			= drive->dn & 1;
@@ -198,7 +198,7 @@
 static int ali15x3_dma_setup(ide_drive_t *drive)
 {
 	if (m5229_revision < 0xC2 && drive->media != ide_disk) {
-		if (rq_data_dir(drive->hwif->hwgroup->rq))
+		if (rq_data_dir(drive->hwif->rq))
 			return 1;	/* try PIO instead of DMA */
 	}
 	return ide_dma_setup(drive);
@@ -490,8 +490,6 @@
 	if (ide_allocate_dma_engine(hwif))
 		return -1;
 
-	hwif->dma_ops = &sff_dma_ops;
-
 	return 0;
 }
 
@@ -511,6 +509,7 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info ali15x3_chipset __devinitdata = {
@@ -519,6 +518,7 @@
 	.init_hwif	= init_hwif_ali15x3,
 	.init_dma	= init_dma_ali15x3,
 	.port_ops	= &ali_port_ops,
+	.dma_ops	= &sff_dma_ops,
 	.pio_mask	= ATA_PIO5,
 	.swdma_mask	= ATA_SWDMA2,
 	.mwdma_mask	= ATA_MWDMA2,
diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c
index c6bcd30..69660a4 100644
--- a/drivers/ide/amd74xx.c
+++ b/drivers/ide/amd74xx.c
@@ -82,7 +82,7 @@
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	ide_drive_t *peer = hwif->drives + (~drive->dn & 1);
+	ide_drive_t *peer = ide_get_pair_dev(drive);
 	struct ide_timing t, p;
 	int T, UT;
 	u8 udma_mask = hwif->ultra_mask;
@@ -92,7 +92,7 @@
 
 	ide_timing_compute(drive, speed, &t, T, UT);
 
-	if (peer->dev_flags & IDE_DFLAG_PRESENT) {
+	if (peer) {
 		ide_timing_compute(peer, peer->current_speed, &p, T, UT);
 		ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
 	}
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 0ec8fd1..79a2dfe 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -212,8 +212,8 @@
 static int auide_build_dmatable(ide_drive_t *drive)
 {
 	int i, iswrite, count = 0;
-	ide_hwif_t *hwif = HWIF(drive);
-	struct request *rq = HWGROUP(drive)->rq;
+	ide_hwif_t *hwif = drive->hwif;
+	struct request *rq = hwif->rq;
 	_auide_hwif *ahwif = &auide_hwif;
 	struct scatterlist *sg;
 
@@ -286,7 +286,7 @@
 
 static int auide_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 
 	if (hwif->sg_nents) {
 		ide_destroy_dmatable(drive);
@@ -309,8 +309,8 @@
 }
 
 static int auide_dma_setup(ide_drive_t *drive)
-{       	
-	struct request *rq = HWGROUP(drive)->rq;
+{
+	struct request *rq = drive->hwif->rq;
 
 	if (!auide_build_dmatable(drive)) {
 		ide_map_sg(drive, rq);
@@ -502,7 +502,6 @@
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index e430664..8890276 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -467,11 +467,10 @@
 	 * so we merge the timings, using the slowest value for each timing.
 	 */
 	if (index > 1) {
-		ide_hwif_t *hwif = drive->hwif;
-		ide_drive_t *peer = &hwif->drives[!(drive->dn & 1)];
+		ide_drive_t *peer = ide_get_pair_dev(drive);
 		unsigned int mate = index ^ 1;
 
-		if (peer->dev_flags & IDE_DFLAG_PRESENT) {
+		if (peer) {
 			if (setup_count < setup_counts[mate])
 				setup_count = setup_counts[mate];
 			if (active_count < active_counts[mate])
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 3623bf0..2f9688d 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -115,7 +115,7 @@
  */
 static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_timing *t	= ide_timing_find_mode(XFER_PIO_0 + pio);
 	unsigned int cycle_time;
@@ -138,10 +138,12 @@
 	 * the slowest address setup timing ourselves.
 	 */
 	if (hwif->channel) {
-		ide_drive_t *drives = hwif->drives;
+		ide_drive_t *pair = ide_get_pair_dev(drive);
 
 		drive->drive_data = setup_count;
-		setup_count = max(drives[0].drive_data, drives[1].drive_data);
+
+		if (pair)
+			setup_count = max_t(u8, setup_count, pair->drive_data);
 	}
 
 	if (setup_count > 5)		/* shouldn't actually happen... */
@@ -180,7 +182,7 @@
 
 static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 unit			= drive->dn & 0x01;
 	u8 regU = 0, pciU	= hwif->channel ? UDIDETCR1 : UDIDETCR0;
@@ -226,7 +228,7 @@
 
 static int cmd648_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long base	= hwif->dma_base - (hwif->channel * 8);
 	int err			= ide_dma_end(drive);
 	u8  irq_mask		= hwif->channel ? MRDMODE_INTR_CH1 :
@@ -242,7 +244,7 @@
 
 static int cmd64x_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int irq_reg		= hwif->channel ? ARTTIM23 : CFR;
 	u8  irq_mask		= hwif->channel ? ARTTIM23_INTR_CH1 :
@@ -259,7 +261,7 @@
 
 static int cmd648_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long base	= hwif->dma_base - (hwif->channel * 8);
 	u8 irq_mask		= hwif->channel ? MRDMODE_INTR_CH1 :
 						  MRDMODE_INTR_CH0;
@@ -282,7 +284,7 @@
 
 static int cmd64x_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int irq_reg		= hwif->channel ? ARTTIM23 : CFR;
 	u8  irq_mask		= hwif->channel ? ARTTIM23_INTR_CH1 :
@@ -313,7 +315,7 @@
 
 static int cmd646_1_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u8 dma_stat = 0, dma_cmd = 0;
 
 	drive->waiting_for_dma = 0;
@@ -383,6 +385,7 @@
 	.dma_test_irq		= cmd64x_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops cmd646_rev1_dma_ops = {
@@ -394,6 +397,7 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops cmd648_dma_ops = {
@@ -405,6 +409,7 @@
 	.dma_test_irq		= cmd648_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 5efb467..d003bec 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -59,7 +59,7 @@
 
 static void cs5520_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	int controller = drive->dn > 1 ? 1 : 0;
 
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index d37baf8..74fc540 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -203,7 +203,7 @@
 
 static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	pio_clocks_t pclk;
 	unsigned int addrCtrl;
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 39d500d..a5ba820 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -70,7 +70,6 @@
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index b18e10d..3eb9b5c 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -626,7 +626,7 @@
 
 static u8 hpt3xx_udma_filter(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 	u8 mask 		= hwif->ultra_mask;
 
@@ -665,7 +665,7 @@
 
 static u8 hpt3xx_mdma_filter(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
 	switch (info->chip_type) {
@@ -743,7 +743,7 @@
 
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
@@ -788,7 +788,7 @@
 
 static void hpt370_clear_engine(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 
 	pci_write_config_byte(dev, hwif->select_data, 0x37);
@@ -797,7 +797,7 @@
 
 static void hpt370_irq_timeout(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u16 bfifo		= 0;
 	u8  dma_cmd;
@@ -822,7 +822,7 @@
 
 static int hpt370_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	u8  dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
 
 	if (dma_stat & 0x01) {
@@ -844,7 +844,7 @@
 /* returns 1 if DMA IRQ issued, 0 otherwise */
 static int hpt374_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u16 bfifo		= 0;
 	u8  dma_stat;
@@ -865,7 +865,7 @@
 
 static int hpt374_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 mcr	= 0, mcr_addr	= hwif->select_data;
 	u8 bwsr = 0, mask	= hwif->channel ? 0x02 : 0x01;
@@ -927,7 +927,7 @@
 
 static void hpt3xxn_rw_disk(ide_drive_t *drive, struct request *rq)
 {
-	hpt3xxn_set_clock(HWIF(drive), rq_data_dir(rq) ? 0x23 : 0x21);
+	hpt3xxn_set_clock(drive->hwif, rq_data_dir(rq) ? 0x23 : 0x21);
 }
 
 /**
@@ -1349,8 +1349,6 @@
 	if (ide_allocate_dma_engine(hwif))
 		return -1;
 
-	hwif->dma_ops = &sff_dma_ops;
-
 	return 0;
 }
 
@@ -1426,6 +1424,7 @@
 	.dma_test_irq		= hpt374_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops hpt370_dma_ops = {
@@ -1437,6 +1436,7 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= hpt370_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops hpt36x_dma_ops = {
@@ -1448,6 +1448,7 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= hpt366_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info hpt366_chipsets[] __devinitdata = {
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 81f70ca..97a35c6 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -166,7 +166,7 @@
  */
 static void icside_maskproc(ide_drive_t *drive, int mask)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 	struct icside_state *state = ecard_get_drvdata(ec);
 	unsigned long flags;
@@ -284,7 +284,7 @@
 
 static int icside_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 
 	drive->waiting_for_dma = 0;
@@ -299,7 +299,7 @@
 
 static void icside_dma_start(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 
 	/* We can not enable DMA on both channels simultaneously. */
@@ -309,10 +309,10 @@
 
 static int icside_dma_setup(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 	struct icside_state *state = ecard_get_drvdata(ec);
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	unsigned int dma_mode;
 
 	if (rq_data_dir(rq))
@@ -362,7 +362,7 @@
 
 static int icside_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 	struct icside_state *state = ecard_get_drvdata(ec);
 
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index fd4a364..2f9e941 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -218,7 +218,7 @@
  */
 static acpi_handle ide_acpi_drive_get_handle(ide_drive_t *drive)
 {
-	ide_hwif_t	*hwif = HWIF(drive);
+	ide_hwif_t	*hwif = drive->hwif;
 	int		 port;
 	acpi_handle	 drive_handle;
 
@@ -263,7 +263,7 @@
 	acpi_status			status;
 	struct acpi_buffer		output;
 	union acpi_object 		*out_obj;
-	ide_hwif_t			*hwif = HWIF(drive);
+	ide_hwif_t			*hwif = drive->hwif;
 	struct device			*dev = hwif->gendev.parent;
 	int				err = -ENODEV;
 	int				port;
@@ -641,7 +641,8 @@
  */
 void ide_acpi_set_state(ide_hwif_t *hwif, int on)
 {
-	int unit;
+	ide_drive_t *drive;
+	int i;
 
 	if (ide_noacpi || ide_noacpi_psx)
 		return;
@@ -655,9 +656,8 @@
 	/* channel first and then drives for power on and verse versa for power off */
 	if (on)
 		acpi_bus_set_power(hwif->acpidata->obj_handle, ACPI_STATE_D0);
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
 
+	ide_port_for_each_dev(i, drive, hwif) {
 		if (!drive->acpidata->obj_handle)
 			drive->acpidata->obj_handle = ide_acpi_drive_get_handle(drive);
 
@@ -711,15 +711,13 @@
 	 * for both drives, regardless whether they are connected
 	 * or not.
 	 */
-	hwif->drives[0].acpidata = &hwif->acpidata->master;
-	hwif->drives[1].acpidata = &hwif->acpidata->slave;
+	hwif->devices[0]->acpidata = &hwif->acpidata->master;
+	hwif->devices[1]->acpidata = &hwif->acpidata->slave;
 
 	/*
 	 * Send IDENTIFY for each drive
 	 */
-	for (i = 0; i < MAX_DRIVES; i++) {
-		drive = &hwif->drives[i];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		memset(drive->acpidata, 0, sizeof(*drive->acpidata));
 
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
@@ -744,9 +742,7 @@
 	ide_acpi_get_timing(hwif);
 	ide_acpi_push_timing(hwif);
 
-	for (i = 0; i < MAX_DRIVES; i++) {
-		drive = &hwif->drives[i];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if (drive->dev_flags & IDE_DFLAG_PRESENT)
 			/* Execute ACPI startup code */
 			ide_acpi_exec_tfs(drive);
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index e8688c0..e96c012 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -243,7 +243,7 @@
 
 int ide_cd_expiry(ide_drive_t *drive)
 {
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	unsigned long wait = 0;
 
 	debug_log("%s: rq->cmd[0]: 0x%x\n", __func__, rq->cmd[0]);
@@ -294,7 +294,7 @@
 {
 	struct ide_atapi_pc *pc = drive->pc;
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	xfer_func_t *xferfunc;
 	unsigned int timeout, temp;
@@ -491,7 +491,7 @@
 {
 	struct ide_atapi_pc *uninitialized_var(pc);
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	ide_expiry_t *expiry;
 	unsigned int timeout;
 	int cmd_len;
@@ -549,7 +549,10 @@
 	}
 
 	/* Set the interrupt routine */
-	ide_set_handler(drive, ide_pc_intr, timeout, expiry);
+	ide_set_handler(drive,
+			(dev_is_idecd(drive) ? drive->irq_handler
+					     : ide_pc_intr),
+			timeout, expiry);
 
 	/* Begin DMA, if necessary */
 	if (dev_is_idecd(drive)) {
@@ -580,7 +583,7 @@
 
 	if (dev_is_idecd(drive)) {
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
-		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
+		bcount = ide_cd_get_xferlen(hwif->rq);
 		expiry = ide_cd_expiry;
 		timeout = ATAPI_WAIT_PC;
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 1a7410f..cae6937 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -239,7 +239,7 @@
 
 static void cdrom_end_request(ide_drive_t *drive, int uptodate)
 {
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	int nsectors = rq->hard_cur_sectors;
 
 	ide_debug_log(IDE_DBG_FUNC, "Call %s, cmd: 0x%x, uptodate: 0x%x, "
@@ -306,8 +306,7 @@
 static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
-	struct request *rq = hwgroup->rq;
+	struct request *rq = hwif->rq;
 	int stat, err, sense_key;
 
 	/* check for errors */
@@ -502,7 +501,7 @@
 		blkdev_dequeue_request(rq);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 
-		hwgroup->rq = NULL;
+		hwif->rq = NULL;
 
 		cdrom_queue_request_sense(drive, rq->sense, rq);
 	} else
@@ -511,106 +510,6 @@
 	return 1;
 }
 
-static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *);
-static ide_startstop_t cdrom_newpc_intr(ide_drive_t *);
-
-/*
- * Set up the device registers for transferring a packet command on DEV,
- * expecting to later transfer XFERLEN bytes.  HANDLER is the routine
- * which actually transfers the command to the drive.  If this is a
- * drq_interrupt device, this routine will arrange for HANDLER to be
- * called when the interrupt from the drive arrives.  Otherwise, HANDLER
- * will be called immediately after the drive is prepared for the transfer.
- */
-static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
-	int xferlen;
-
-	xferlen = ide_cd_get_xferlen(rq);
-
-	ide_debug_log(IDE_DBG_PC, "Call %s, xferlen: %d\n", __func__, xferlen);
-
-	/* FIXME: for Virtual DMA we must check harder */
-	if (drive->dma)
-		drive->dma = !hwif->dma_ops->dma_setup(drive);
-
-	/* set up the controller registers */
-	ide_pktcmd_tf_load(drive, IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL,
-			   xferlen, drive->dma);
-
-	if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
-		/* waiting for CDB interrupt, not DMA yet. */
-		if (drive->dma)
-			drive->waiting_for_dma = 0;
-
-		/* packet command */
-		ide_execute_command(drive, ATA_CMD_PACKET,
-				    cdrom_transfer_packet_command,
-				    ATAPI_WAIT_PC, ide_cd_expiry);
-		return ide_started;
-	} else {
-		ide_execute_pkt_cmd(drive);
-
-		return cdrom_transfer_packet_command(drive);
-	}
-}
-
-/*
- * Send a packet command to DRIVE described by CMD_BUF and CMD_LEN. The device
- * registers must have already been prepared by cdrom_start_packet_command.
- * HANDLER is the interrupt handler to call when the command completes or
- * there's data ready.
- */
-#define ATAPI_MIN_CDB_BYTES 12
-static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
-	int cmd_len;
-	ide_startstop_t startstop;
-
-	ide_debug_log(IDE_DBG_PC, "Call %s\n", __func__);
-
-	if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
-		/*
-		 * Here we should have been called after receiving an interrupt
-		 * from the device.  DRQ should how be set.
-		 */
-
-		/* check for errors */
-		if (cdrom_decode_status(drive, ATA_DRQ, NULL))
-			return ide_stopped;
-
-		/* ok, next interrupt will be DMA interrupt */
-		if (drive->dma)
-			drive->waiting_for_dma = 1;
-	} else {
-		/* otherwise, we must wait for DRQ to get set */
-		if (ide_wait_stat(&startstop, drive, ATA_DRQ,
-				  ATA_BUSY, WAIT_READY))
-			return startstop;
-	}
-
-	/* arm the interrupt handler */
-	ide_set_handler(drive, cdrom_newpc_intr, rq->timeout, ide_cd_expiry);
-
-	/* ATAPI commands get padded out to 12 bytes minimum */
-	cmd_len = COMMAND_SIZE(rq->cmd[0]);
-	if (cmd_len < ATAPI_MIN_CDB_BYTES)
-		cmd_len = ATAPI_MIN_CDB_BYTES;
-
-	/* send the command to the device */
-	hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len);
-
-	/* start the DMA if need be */
-	if (drive->dma)
-		hwif->dma_ops->dma_start(drive);
-
-	return ide_started;
-}
-
 /*
  * Check the contents of the interrupt reason register from the cdrom
  * and attempt to recover if there are problems.  Returns  0 if everything's
@@ -854,8 +753,7 @@
 static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
-	struct request *rq = hwgroup->rq;
+	struct request *rq = hwif->rq;
 	xfer_func_t *xferfunc;
 	ide_expiry_t *expiry = NULL;
 	int dma_error = 0, dma, stat, thislen, uptodate = 0;
@@ -1061,7 +959,7 @@
 		if (blk_end_request(rq, 0, dlen))
 			BUG();
 
-		hwgroup->rq = NULL;
+		hwif->rq = NULL;
 	} else {
 		if (!uptodate)
 			rq->cmd_flags |= REQ_FAILED;
@@ -1183,7 +1081,7 @@
 		return ide_stopped;
 	}
 
-	return cdrom_start_packet_command(drive);
+	return ide_issue_pc(drive);
 }
 
 /*
@@ -1916,7 +1814,7 @@
 
 static int ide_cd_probe(ide_drive_t *);
 
-static ide_driver_t ide_cdrom_driver = {
+static struct ide_driver ide_cdrom_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
 		.name		= "ide-cdrom",
@@ -1927,7 +1825,6 @@
 	.version		= IDECD_VERSION,
 	.do_request		= ide_cd_do_request,
 	.end_request		= ide_end_request,
-	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
 	.proc_entries		= ide_cd_proc_entries,
 	.proc_devsets		= ide_cd_proc_devsets,
@@ -2082,6 +1979,7 @@
 	}
 
 	drive->debug_mask = debug_mask;
+	drive->irq_handler = cdrom_newpc_intr;
 
 	info = kzalloc(sizeof(struct cdrom_info), GFP_KERNEL);
 	if (info == NULL) {
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index bf676b2..ac40d6c 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -33,33 +33,33 @@
 
 /* Structure of a MSF cdrom address. */
 struct atapi_msf {
-	byte reserved;
-	byte minute;
-	byte second;
-	byte frame;
+	u8 reserved;
+	u8 minute;
+	u8 second;
+	u8 frame;
 };
 
 /* Space to hold the disk TOC. */
 #define MAX_TRACKS 99
 struct atapi_toc_header {
 	unsigned short toc_length;
-	byte first_track;
-	byte last_track;
+	u8 first_track;
+	u8 last_track;
 };
 
 struct atapi_toc_entry {
-	byte reserved1;
+	u8 reserved1;
 #if defined(__BIG_ENDIAN_BITFIELD)
-	__u8 adr     : 4;
-	__u8 control : 4;
+	u8 adr     : 4;
+	u8 control : 4;
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-	__u8 control : 4;
-	__u8 adr     : 4;
+	u8 control : 4;
+	u8 adr     : 4;
 #else
 #error "Please fix <asm/byteorder.h>"
 #endif
-	byte track;
-	byte reserved2;
+	u8 track;
+	u8 reserved2;
 	union {
 		unsigned lba;
 		struct atapi_msf msf;
@@ -77,10 +77,10 @@
 
 /* Extra per-device info for cdrom drives. */
 struct cdrom_info {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
+	ide_drive_t		*drive;
+	struct ide_driver	*driver;
+	struct gendisk		*disk;
+	struct kref		kref;
 
 	/* Buffer for table of contents.  NULL if we haven't allocated
 	   a TOC buffer for this device yet. */
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index eb9fac4..4088a62 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -89,7 +89,7 @@
 static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	u16 nsectors		= (u16)rq->nr_sectors;
 	u8 lba48		= !!(drive->dev_flags & IDE_DFLAG_LBA48);
 	u8 dma			= !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
@@ -187,7 +187,7 @@
 static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
 				      sector_t block)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 
 	BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED);
 
diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index f6d2d44..123d393 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -50,6 +50,27 @@
 	return 0;
 }
 
+u8 ide_dma_sff_read_status(ide_hwif_t *hwif)
+{
+	unsigned long addr = hwif->dma_base + ATA_DMA_STATUS;
+
+	if (hwif->host_flags & IDE_HFLAG_MMIO)
+		return readb((void __iomem *)addr);
+	else
+		return inb(addr);
+}
+EXPORT_SYMBOL_GPL(ide_dma_sff_read_status);
+
+static void ide_dma_sff_write_status(ide_hwif_t *hwif, u8 val)
+{
+	unsigned long addr = hwif->dma_base + ATA_DMA_STATUS;
+
+	if (hwif->host_flags & IDE_HFLAG_MMIO)
+		writeb(val, (void __iomem *)addr);
+	else
+		outb(val, addr);
+}
+
 /**
  *	ide_dma_host_set	-	Enable/disable DMA on a host
  *	@drive: drive to control
@@ -62,18 +83,14 @@
 {
 	ide_hwif_t *hwif = drive->hwif;
 	u8 unit = drive->dn & 1;
-	u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
 	if (on)
 		dma_stat |= (1 << (5 + unit));
 	else
 		dma_stat &= ~(1 << (5 + unit));
 
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		writeb(dma_stat,
-		       (void __iomem *)(hwif->dma_base + ATA_DMA_STATUS));
-	else
-		outb(dma_stat, hwif->dma_base + ATA_DMA_STATUS);
+	ide_dma_sff_write_status(hwif, dma_stat);
 }
 EXPORT_SYMBOL_GPL(ide_dma_host_set);
 
@@ -175,7 +192,7 @@
 int ide_dma_setup(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	unsigned int reading = rq_data_dir(rq) ? 0 : ATA_DMA_WR;
 	u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
 	u8 dma_stat;
@@ -187,7 +204,7 @@
 	}
 
 	/* PRD table */
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
+	if (mmio)
 		writel(hwif->dmatable_dma,
 		       (void __iomem *)(hwif->dma_base + ATA_DMA_TABLE_OFS));
 	else
@@ -200,15 +217,10 @@
 		outb(reading, hwif->dma_base + ATA_DMA_CMD);
 
 	/* read DMA status for INTR & ERROR flags */
-	dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
 	/* clear INTR & ERROR flags */
-	if (mmio)
-		writeb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR,
-		       (void __iomem *)(hwif->dma_base + ATA_DMA_STATUS));
-	else
-		outb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR,
-		     hwif->dma_base + ATA_DMA_STATUS);
+	ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR);
 
 	drive->waiting_for_dma = 1;
 	return 0;
@@ -232,7 +244,7 @@
 static int dma_timer_expiry(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
 	printk(KERN_WARNING "%s: %s: DMA status (0x%02x)\n",
 		drive->name, __func__, dma_stat);
@@ -240,7 +252,7 @@
 	if ((dma_stat & 0x18) == 0x18)	/* BUSY Stupid Early Timer !! */
 		return WAIT_CMD;
 
-	hwif->hwgroup->expiry = NULL;	/* one free ride for now */
+	hwif->expiry = NULL;	/* one free ride for now */
 
 	if (dma_stat & ATA_DMA_ERR)	/* ERROR */
 		return -1;
@@ -289,13 +301,12 @@
 int ide_dma_end(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
 	u8 dma_stat = 0, dma_cmd = 0, mask;
 
 	drive->waiting_for_dma = 0;
 
 	/* stop DMA */
-	if (mmio) {
+	if (hwif->host_flags & IDE_HFLAG_MMIO) {
 		dma_cmd = readb((void __iomem *)(hwif->dma_base + ATA_DMA_CMD));
 		writeb(dma_cmd & ~ATA_DMA_START,
 		       (void __iomem *)(hwif->dma_base + ATA_DMA_CMD));
@@ -305,15 +316,10 @@
 	}
 
 	/* get DMA status */
-	dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
-	if (mmio)
-		/* clear the INTR & ERROR bits */
-		writeb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR,
-		       (void __iomem *)(hwif->dma_base + ATA_DMA_STATUS));
-	else
-		outb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR,
-		     hwif->dma_base + ATA_DMA_STATUS);
+	/* clear INTR & ERROR bits */
+	ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR);
 
 	/* purge DMA mappings */
 	ide_destroy_dmatable(drive);
@@ -331,7 +337,7 @@
 int ide_dma_test_irq(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
 	return (dma_stat & ATA_DMA_INTR) ? 1 : 0;
 }
@@ -346,5 +352,6 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_timeout		= ide_dma_timeout,
 	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 EXPORT_SYMBOL_GPL(sff_dma_ops);
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index fffd117..72ebab0 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -96,7 +96,7 @@
 
 	if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) {
 		if (!dma_stat) {
-			struct request *rq = hwif->hwgroup->rq;
+			struct request *rq = hwif->rq;
 
 			task_end_request(drive, rq, stat);
 			return ide_stopped;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 0a48e2d..3eab1c6 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -71,7 +71,7 @@
 static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	int error;
 
 	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index b8078b3..7857b20 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -149,7 +149,7 @@
 	return drive->disk_ops->end_request(drive, uptodate, nrsecs);
 }
 
-static ide_driver_t ide_gd_driver = {
+static struct ide_driver ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
 		.name		= "ide-gd",
@@ -162,7 +162,6 @@
 	.version		= IDE_GD_VERSION,
 	.do_request		= ide_gd_do_request,
 	.end_request		= ide_gd_end_request,
-	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
 	.proc_entries		= ide_disk_proc_entries,
 	.proc_devsets		= ide_disk_proc_devsets,
diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h
index 7d3d101..a86779f 100644
--- a/drivers/ide/ide-gd.h
+++ b/drivers/ide/ide-gd.h
@@ -14,11 +14,11 @@
 #endif
 
 struct ide_disk_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
-	unsigned int	openers;	/* protected by BKL for now */
+	ide_drive_t		*drive;
+	struct ide_driver	*driver;
+	struct gendisk		*disk;
+	struct kref		kref;
+	unsigned int		openers;	/* protected by BKL for now */
 
 	/* Last failed packet command */
 	struct ide_atapi_pc *failed_pc;
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index e2cdd2e..9270d32 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -159,7 +159,6 @@
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 1c36a8e..cc16331 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -88,7 +88,7 @@
 		ret = 0;
 
 	if (ret == 0 && dequeue)
-		drive->hwif->hwgroup->rq = NULL;
+		drive->hwif->rq = NULL;
 
 	return ret;
 }
@@ -107,7 +107,7 @@
 int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
 {
 	unsigned int nr_bytes = nr_sectors << 9;
-	struct request *rq = drive->hwif->hwgroup->rq;
+	struct request *rq = drive->hwif->rq;
 
 	if (!nr_bytes) {
 		if (blk_pc_request(rq))
@@ -160,8 +160,8 @@
  
 void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 {
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
-	struct request *rq = hwgroup->rq;
+	ide_hwif_t *hwif = drive->hwif;
+	struct request *rq = hwif->rq;
 
 	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *task = (ide_task_t *)rq->special;
@@ -186,7 +186,7 @@
 		return;
 	}
 
-	hwgroup->rq = NULL;
+	hwif->rq = NULL;
 
 	rq->errors = err;
 
@@ -199,9 +199,9 @@
 static void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 {
 	if (rq->rq_disk) {
-		ide_driver_t *drv;
+		struct ide_driver *drv;
 
-		drv = *(ide_driver_t **)rq->rq_disk->private_data;
+		drv = *(struct ide_driver **)rq->rq_disk->private_data;
 		drv->end_request(drive, 0, 0);
 	} else
 		ide_end_request(drive, 0, 0);
@@ -291,7 +291,7 @@
 	return ide_stopped;
 }
 
-ide_startstop_t
+static ide_startstop_t
 __ide_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
 {
 	if (drive->media == ide_disk)
@@ -299,8 +299,6 @@
 	return ide_atapi_error(drive, rq, stat, err);
 }
 
-EXPORT_SYMBOL_GPL(__ide_error);
-
 /**
  *	ide_error	-	handle an error on the IDE
  *	@drive: drive the error occurred on
@@ -321,7 +319,8 @@
 
 	err = ide_dump_status(drive, msg, stat);
 
-	if ((rq = HWGROUP(drive)->rq) == NULL)
+	rq = drive->hwif->rq;
+	if (rq == NULL)
 		return ide_stopped;
 
 	/* retry only "normal" I/O: */
@@ -331,15 +330,8 @@
 		return ide_stopped;
 	}
 
-	if (rq->rq_disk) {
-		ide_driver_t *drv;
-
-		drv = *(ide_driver_t **)rq->rq_disk->private_data;
-		return drv->error(drive, rq, stat, err);
-	} else
-		return __ide_error(drive, rq, stat, err);
+	return __ide_error(drive, rq, stat, err);
 }
-
 EXPORT_SYMBOL_GPL(ide_error);
 
 static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -462,7 +454,7 @@
 static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 		struct request *rq)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	ide_task_t *task = rq->special;
 
 	if (task) {
@@ -586,7 +578,7 @@
 
 #ifdef DEBUG
 	printk("%s: start_request: current=0x%08lx\n",
-		HWIF(drive)->name, (unsigned long) rq);
+		drive->hwif->name, (unsigned long) rq);
 #endif
 
 	/* bail early if we've exceeded max_failures */
@@ -605,7 +597,7 @@
 		return startstop;
 	}
 	if (!drive->special.all) {
-		ide_driver_t *drv;
+		struct ide_driver *drv;
 
 		/*
 		 * We reset the drive so we need to issue a SETFEATURES.
@@ -638,7 +630,7 @@
 			 */
 			return ide_special_rq(drive, rq);
 
-		drv = *(ide_driver_t **)rq->rq_disk->private_data;
+		drv = *(struct ide_driver **)rq->rq_disk->private_data;
 
 		return drv->do_request(drive, rq, rq->sector);
 	}
@@ -654,7 +646,7 @@
  *	@timeout: time to stall for (jiffies)
  *
  *	ide_stall_queue() can be used by a drive to give excess bandwidth back
- *	to the hwgroup by sleeping for timeout jiffies.
+ *	to the port by sleeping for timeout jiffies.
  */
  
 void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
@@ -666,45 +658,53 @@
 }
 EXPORT_SYMBOL(ide_stall_queue);
 
+static inline int ide_lock_port(ide_hwif_t *hwif)
+{
+	if (hwif->busy)
+		return 1;
+
+	hwif->busy = 1;
+
+	return 0;
+}
+
+static inline void ide_unlock_port(ide_hwif_t *hwif)
+{
+	hwif->busy = 0;
+}
+
+static inline int ide_lock_host(struct ide_host *host, ide_hwif_t *hwif)
+{
+	int rc = 0;
+
+	if (host->host_flags & IDE_HFLAG_SERIALIZE) {
+		rc = test_and_set_bit_lock(IDE_HOST_BUSY, &host->host_busy);
+		if (rc == 0) {
+			/* for atari only */
+			ide_get_lock(ide_intr, hwif);
+		}
+	}
+	return rc;
+}
+
+static inline void ide_unlock_host(struct ide_host *host)
+{
+	if (host->host_flags & IDE_HFLAG_SERIALIZE) {
+		/* for atari only */
+		ide_release_lock();
+		clear_bit_unlock(IDE_HOST_BUSY, &host->host_busy);
+	}
+}
+
 /*
- * Issue a new request to a drive from hwgroup
- *
- * A hwgroup is a serialized group of IDE interfaces.  Usually there is
- * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
- * may have both interfaces in a single hwgroup to "serialize" access.
- * Or possibly multiple ISA interfaces can share a common IRQ by being grouped
- * together into one hwgroup for serialized access.
- *
- * Note also that several hwgroups can end up sharing a single IRQ,
- * possibly along with many other devices.  This is especially common in
- * PCI-based systems with off-board IDE controller cards.
- *
- * The IDE driver uses a per-hwgroup lock to protect the hwgroup->busy flag.
- *
- * The first thread into the driver for a particular hwgroup sets the
- * hwgroup->busy flag to indicate that this hwgroup is now active,
- * and then initiates processing of the top request from the request queue.
- *
- * Other threads attempting entry notice the busy setting, and will simply
- * queue their new requests and exit immediately.  Note that hwgroup->busy
- * remains set even when the driver is merely awaiting the next interrupt.
- * Thus, the meaning is "this hwgroup is busy processing a request".
- *
- * When processing of a request completes, the completing thread or IRQ-handler
- * will start the next request from the queue.  If no more work remains,
- * the driver will clear the hwgroup->busy flag and exit.
- *
- * The per-hwgroup spinlock is used to protect all access to the
- * hwgroup->busy flag, but is otherwise not needed for most processing in
- * the driver.  This makes the driver much more friendlier to shared IRQs
- * than previous designs, while remaining 100% (?) SMP safe and capable.
+ * Issue a new request to a device.
  */
 void do_ide_request(struct request_queue *q)
 {
 	ide_drive_t	*drive = q->queuedata;
 	ide_hwif_t	*hwif = drive->hwif;
-	ide_hwgroup_t	*hwgroup = hwif->hwgroup;
-	struct request	*rq;
+	struct ide_host *host = hwif->host;
+	struct request	*rq = NULL;
 	ide_startstop_t	startstop;
 
 	/*
@@ -721,32 +721,40 @@
 		blk_remove_plug(q);
 
 	spin_unlock_irq(q->queue_lock);
-	spin_lock_irq(&hwgroup->lock);
 
-	if (!ide_lock_hwgroup(hwgroup)) {
+	if (ide_lock_host(host, hwif))
+		goto plug_device_2;
+
+	spin_lock_irq(&hwif->lock);
+
+	if (!ide_lock_port(hwif)) {
+		ide_hwif_t *prev_port;
 repeat:
-		hwgroup->rq = NULL;
+		prev_port = hwif->host->cur_port;
+		hwif->rq = NULL;
 
 		if (drive->dev_flags & IDE_DFLAG_SLEEPING) {
 			if (time_before(drive->sleep, jiffies)) {
-				ide_unlock_hwgroup(hwgroup);
+				ide_unlock_port(hwif);
 				goto plug_device;
 			}
 		}
 
-		if (hwif != hwgroup->hwif) {
+		if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
+		    hwif != prev_port) {
 			/*
-			 * set nIEN for previous hwif, drives in the
+			 * set nIEN for previous port, drives in the
 			 * quirk_list may not like intr setups/cleanups
 			 */
-			if (drive->quirk_list == 0)
-				hwif->tp_ops->set_irq(hwif, 0);
+			if (prev_port && prev_port->cur_dev->quirk_list == 0)
+				prev_port->tp_ops->set_irq(prev_port, 0);
+
+			hwif->host->cur_port = hwif;
 		}
-		hwgroup->hwif = hwif;
-		hwgroup->drive = drive;
+		hwif->cur_dev = drive;
 		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
 
-		spin_unlock_irq(&hwgroup->lock);
+		spin_unlock_irq(&hwif->lock);
 		spin_lock_irq(q->queue_lock);
 		/*
 		 * we know that the queue isn't empty, but this can happen
@@ -754,10 +762,10 @@
 		 */
 		rq = elv_next_request(drive->queue);
 		spin_unlock_irq(q->queue_lock);
-		spin_lock_irq(&hwgroup->lock);
+		spin_lock_irq(&hwif->lock);
 
 		if (!rq) {
-			ide_unlock_hwgroup(hwgroup);
+			ide_unlock_port(hwif);
 			goto out;
 		}
 
@@ -778,27 +786,31 @@
 		    blk_pm_request(rq) == 0 &&
 		    (rq->cmd_flags & REQ_PREEMPT) == 0) {
 			/* there should be no pending command at this point */
-			ide_unlock_hwgroup(hwgroup);
+			ide_unlock_port(hwif);
 			goto plug_device;
 		}
 
-		hwgroup->rq = rq;
+		hwif->rq = rq;
 
-		spin_unlock_irq(&hwgroup->lock);
+		spin_unlock_irq(&hwif->lock);
 		startstop = start_request(drive, rq);
-		spin_lock_irq(&hwgroup->lock);
+		spin_lock_irq(&hwif->lock);
 
 		if (startstop == ide_stopped)
 			goto repeat;
 	} else
 		goto plug_device;
 out:
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
+	if (rq == NULL)
+		ide_unlock_host(host);
 	spin_lock_irq(q->queue_lock);
 	return;
 
 plug_device:
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
+	ide_unlock_host(host);
+plug_device_2:
 	spin_lock_irq(q->queue_lock);
 
 	if (!elv_queue_empty(q))
@@ -806,13 +818,13 @@
 }
 
 /*
- * un-busy the hwgroup etc, and clear any pending DMA status. we want to
+ * un-busy the port etc, and clear any pending DMA status. we want to
  * retry the current request in pio mode instead of risking tossing it
  * all away
  */
 static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	ide_startstop_t ret = ide_stopped;
 
@@ -840,15 +852,14 @@
 	ide_dma_off_quietly(drive);
 
 	/*
-	 * un-busy drive etc (hwgroup->busy is cleared on return) and
-	 * make sure request is sane
+	 * un-busy drive etc and make sure request is sane
 	 */
-	rq = HWGROUP(drive)->rq;
 
+	rq = hwif->rq;
 	if (!rq)
 		goto out;
 
-	HWGROUP(drive)->rq = NULL;
+	hwif->rq = NULL;
 
 	rq->errors = 0;
 
@@ -876,7 +887,7 @@
 
 /**
  *	ide_timer_expiry	-	handle lack of an IDE interrupt
- *	@data: timer callback magic (hwgroup)
+ *	@data: timer callback magic (hwif)
  *
  *	An IDE command has timed out before the expected drive return
  *	occurred. At this point we attempt to clean up the current
@@ -890,18 +901,18 @@
  
 void ide_timer_expiry (unsigned long data)
 {
-	ide_hwgroup_t	*hwgroup = (ide_hwgroup_t *) data;
+	ide_hwif_t	*hwif = (ide_hwif_t *)data;
 	ide_drive_t	*uninitialized_var(drive);
 	ide_handler_t	*handler;
-	ide_expiry_t	*expiry;
 	unsigned long	flags;
 	unsigned long	wait = -1;
 	int		plug_device = 0;
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 
-	if (((handler = hwgroup->handler) == NULL) ||
-	    (hwgroup->req_gen != hwgroup->req_gen_timer)) {
+	handler = hwif->handler;
+
+	if (handler == NULL || hwif->req_gen != hwif->req_gen_timer) {
 		/*
 		 * Either a marginal timeout occurred
 		 * (got the interrupt just as timer expired),
@@ -909,72 +920,68 @@
 		 * Either way, we don't really want to complain about anything.
 		 */
 	} else {
-		drive = hwgroup->drive;
-		if (!drive) {
-			printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n");
-			hwgroup->handler = NULL;
-		} else {
-			ide_hwif_t *hwif;
-			ide_startstop_t startstop = ide_stopped;
+		ide_expiry_t *expiry = hwif->expiry;
+		ide_startstop_t startstop = ide_stopped;
 
-			if ((expiry = hwgroup->expiry) != NULL) {
-				/* continue */
-				if ((wait = expiry(drive)) > 0) {
-					/* reset timer */
-					hwgroup->timer.expires  = jiffies + wait;
-					hwgroup->req_gen_timer = hwgroup->req_gen;
-					add_timer(&hwgroup->timer);
-					spin_unlock_irqrestore(&hwgroup->lock, flags);
-					return;
-				}
-			}
-			hwgroup->handler = NULL;
-			/*
-			 * We need to simulate a real interrupt when invoking
-			 * the handler() function, which means we need to
-			 * globally mask the specific IRQ:
-			 */
-			spin_unlock(&hwgroup->lock);
-			hwif  = HWIF(drive);
-			/* disable_irq_nosync ?? */
-			disable_irq(hwif->irq);
-			/* local CPU only,
-			 * as if we were handling an interrupt */
-			local_irq_disable();
-			if (hwgroup->polling) {
-				startstop = handler(drive);
-			} else if (drive_is_ready(drive)) {
-				if (drive->waiting_for_dma)
-					hwif->dma_ops->dma_lost_irq(drive);
-				(void)ide_ack_intr(hwif);
-				printk(KERN_WARNING "%s: lost interrupt\n", drive->name);
-				startstop = handler(drive);
-			} else {
-				if (drive->waiting_for_dma) {
-					startstop = ide_dma_timeout_retry(drive, wait);
-				} else
-					startstop =
-					ide_error(drive, "irq timeout",
-						  hwif->tp_ops->read_status(hwif));
-			}
-			spin_lock_irq(&hwgroup->lock);
-			enable_irq(hwif->irq);
-			if (startstop == ide_stopped) {
-				ide_unlock_hwgroup(hwgroup);
-				plug_device = 1;
+		drive = hwif->cur_dev;
+
+		if (expiry) {
+			wait = expiry(drive);
+			if (wait > 0) { /* continue */
+				/* reset timer */
+				hwif->timer.expires = jiffies + wait;
+				hwif->req_gen_timer = hwif->req_gen;
+				add_timer(&hwif->timer);
+				spin_unlock_irqrestore(&hwif->lock, flags);
+				return;
 			}
 		}
+		hwif->handler = NULL;
+		/*
+		 * We need to simulate a real interrupt when invoking
+		 * the handler() function, which means we need to
+		 * globally mask the specific IRQ:
+		 */
+		spin_unlock(&hwif->lock);
+		/* disable_irq_nosync ?? */
+		disable_irq(hwif->irq);
+		/* local CPU only, as if we were handling an interrupt */
+		local_irq_disable();
+		if (hwif->polling) {
+			startstop = handler(drive);
+		} else if (drive_is_ready(drive)) {
+			if (drive->waiting_for_dma)
+				hwif->dma_ops->dma_lost_irq(drive);
+			(void)ide_ack_intr(hwif);
+			printk(KERN_WARNING "%s: lost interrupt\n",
+				drive->name);
+			startstop = handler(drive);
+		} else {
+			if (drive->waiting_for_dma)
+				startstop = ide_dma_timeout_retry(drive, wait);
+			else
+				startstop = ide_error(drive, "irq timeout",
+					hwif->tp_ops->read_status(hwif));
+		}
+		spin_lock_irq(&hwif->lock);
+		enable_irq(hwif->irq);
+		if (startstop == ide_stopped) {
+			ide_unlock_port(hwif);
+			plug_device = 1;
+		}
 	}
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 
-	if (plug_device)
+	if (plug_device) {
+		ide_unlock_host(hwif->host);
 		ide_plug_device(drive);
+	}
 }
 
 /**
  *	unexpected_intr		-	handle an unexpected IDE interrupt
  *	@irq: interrupt line
- *	@hwgroup: hwgroup being processed
+ *	@hwif: port being processed
  *
  *	There's nothing really useful we can do with an unexpected interrupt,
  *	other than reading the status register (to clear it), and logging it.
@@ -998,52 +1005,38 @@
  *	before completing the issuance of any new drive command, so we will not
  *	be accidentally invoked as a result of any valid command completion
  *	interrupt.
- *
- *	Note that we must walk the entire hwgroup here. We know which hwif
- *	is doing the current command, but we don't know which hwif burped
- *	mysteriously.
  */
- 
-static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup)
+
+static void unexpected_intr(int irq, ide_hwif_t *hwif)
 {
-	u8 stat;
-	ide_hwif_t *hwif = hwgroup->hwif;
+	u8 stat = hwif->tp_ops->read_status(hwif);
 
-	/*
-	 * handle the unexpected interrupt
-	 */
-	do {
-		if (hwif->irq == irq) {
-			stat = hwif->tp_ops->read_status(hwif);
+	if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
+		/* Try to not flood the console with msgs */
+		static unsigned long last_msgtime, count;
+		++count;
 
-			if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
-				/* Try to not flood the console with msgs */
-				static unsigned long last_msgtime, count;
-				++count;
-				if (time_after(jiffies, last_msgtime + HZ)) {
-					last_msgtime = jiffies;
-					printk(KERN_ERR "%s%s: unexpected interrupt, "
-						"status=0x%02x, count=%ld\n",
-						hwif->name,
-						(hwif->next==hwgroup->hwif) ? "" : "(?)", stat, count);
-				}
-			}
+		if (time_after(jiffies, last_msgtime + HZ)) {
+			last_msgtime = jiffies;
+			printk(KERN_ERR "%s: unexpected interrupt, "
+				"status=0x%02x, count=%ld\n",
+				hwif->name, stat, count);
 		}
-	} while ((hwif = hwif->next) != hwgroup->hwif);
+	}
 }
 
 /**
  *	ide_intr	-	default IDE interrupt handler
  *	@irq: interrupt number
- *	@dev_id: hwif group
+ *	@dev_id: hwif
  *	@regs: unused weirdness from the kernel irq layer
  *
  *	This is the default IRQ handler for the IDE layer. You should
  *	not need to override it. If you do be aware it is subtle in
  *	places
  *
- *	hwgroup->hwif is the interface in the group currently performing
- *	a command. hwgroup->drive is the drive and hwgroup->handler is
+ *	hwif is the interface in the group currently performing
+ *	a command. hwif->cur_dev is the drive and hwif->handler is
  *	the IRQ handler to call. As we issue a command the handlers
  *	step through multiple states, reassigning the handler to the
  *	next step in the process. Unlike a smart SCSI controller IDE
@@ -1054,26 +1047,32 @@
  *
  *	The handler eventually returns ide_stopped to indicate the
  *	request completed. At this point we issue the next request
- *	on the hwgroup and the process begins again.
+ *	on the port and the process begins again.
  */
- 
+
 irqreturn_t ide_intr (int irq, void *dev_id)
 {
-	unsigned long flags;
-	ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
-	ide_hwif_t *hwif = hwgroup->hwif;
+	ide_hwif_t *hwif = (ide_hwif_t *)dev_id;
 	ide_drive_t *uninitialized_var(drive);
 	ide_handler_t *handler;
+	unsigned long flags;
 	ide_startstop_t startstop;
 	irqreturn_t irq_ret = IRQ_NONE;
 	int plug_device = 0;
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) {
+		if (hwif != hwif->host->cur_port)
+			goto out_early;
+	}
+
+	spin_lock_irqsave(&hwif->lock, flags);
 
 	if (!ide_ack_intr(hwif))
 		goto out;
 
-	if ((handler = hwgroup->handler) == NULL || hwgroup->polling) {
+	handler = hwif->handler;
+
+	if (handler == NULL || hwif->polling) {
 		/*
 		 * Not expecting an interrupt from this drive.
 		 * That means this could be:
@@ -1097,7 +1096,7 @@
 			 * Probably not a shared PCI interrupt,
 			 * so we can safely try to do something about it:
 			 */
-			unexpected_intr(irq, hwgroup);
+			unexpected_intr(irq, hwif);
 #ifdef CONFIG_BLK_DEV_IDEPCI
 		} else {
 			/*
@@ -1110,16 +1109,7 @@
 		goto out;
 	}
 
-	drive = hwgroup->drive;
-	if (!drive) {
-		/*
-		 * This should NEVER happen, and there isn't much
-		 * we could do about it here.
-		 *
-		 * [Note - this can occur if the drive is hot unplugged]
-		 */
-		goto out_handled;
-	}
+	drive = hwif->cur_dev;
 
 	if (!drive_is_ready(drive))
 		/*
@@ -1131,10 +1121,10 @@
 		 */
 		goto out;
 
-	hwgroup->handler = NULL;
-	hwgroup->req_gen++;
-	del_timer(&hwgroup->timer);
-	spin_unlock(&hwgroup->lock);
+	hwif->handler = NULL;
+	hwif->req_gen++;
+	del_timer(&hwif->timer);
+	spin_unlock(&hwif->lock);
 
 	if (hwif->port_ops && hwif->port_ops->clear_irq)
 		hwif->port_ops->clear_irq(drive);
@@ -1145,7 +1135,7 @@
 	/* service this interrupt, may set handler for next interrupt */
 	startstop = handler(drive);
 
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 	/*
 	 * Note that handler() may have set things up for another
 	 * interrupt to occur soon, but it cannot happen until
@@ -1154,20 +1144,18 @@
 	 * won't allow another of the same (on any CPU) until we return.
 	 */
 	if (startstop == ide_stopped) {
-		if (hwgroup->handler == NULL) {	/* paranoia */
-			ide_unlock_hwgroup(hwgroup);
-			plug_device = 1;
-		} else
-			printk(KERN_ERR "%s: %s: huh? expected NULL handler "
-					"on exit\n", __func__, drive->name);
+		BUG_ON(hwif->handler);
+		ide_unlock_port(hwif);
+		plug_device = 1;
 	}
-out_handled:
 	irq_ret = IRQ_HANDLED;
 out:
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
-
-	if (plug_device)
+	spin_unlock_irqrestore(&hwif->lock, flags);
+out_early:
+	if (plug_device) {
+		ide_unlock_host(hwif->host);
 		ide_plug_device(drive);
+	}
 
 	return irq_ret;
 }
@@ -1189,15 +1177,13 @@
 
 void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq)
 {
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 	struct request_queue *q = drive->queue;
 	unsigned long flags;
 
-	hwgroup->rq = NULL;
+	drive->hwif->rq = NULL;
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
-	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(ide_do_drive_cmd);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index ad8bd65..e728cfe 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -105,15 +105,6 @@
 }
 EXPORT_SYMBOL_GPL(ide_read_altstatus);
 
-u8 ide_read_sff_dma_status(ide_hwif_t *hwif)
-{
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		return readb((void __iomem *)(hwif->dma_base + ATA_DMA_STATUS));
-	else
-		return inb(hwif->dma_base + ATA_DMA_STATUS);
-}
-EXPORT_SYMBOL_GPL(ide_read_sff_dma_status);
-
 void ide_set_irq(ide_hwif_t *hwif, int on)
 {
 	u8 ctl = ATA_DEVCTL_OBS;
@@ -388,7 +379,6 @@
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
@@ -451,7 +441,7 @@
  */
 int drive_is_ready (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	u8 stat			= 0;
 
 	if (drive->waiting_for_dma)
@@ -503,7 +493,8 @@
 	stat = tp_ops->read_status(hwif);
 
 	if (stat & ATA_BUSY) {
-		local_irq_set(flags);
+		local_irq_save(flags);
+		local_irq_enable_in_hardirq();
 		timeout += jiffies;
 		while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) {
 			if (time_after(jiffies, timeout)) {
@@ -822,25 +813,25 @@
 static void __ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
 		      unsigned int timeout, ide_expiry_t *expiry)
 {
-	ide_hwgroup_t *hwgroup = HWGROUP(drive);
+	ide_hwif_t *hwif = drive->hwif;
 
-	BUG_ON(hwgroup->handler);
-	hwgroup->handler	= handler;
-	hwgroup->expiry		= expiry;
-	hwgroup->timer.expires	= jiffies + timeout;
-	hwgroup->req_gen_timer	= hwgroup->req_gen;
-	add_timer(&hwgroup->timer);
+	BUG_ON(hwif->handler);
+	hwif->handler		= handler;
+	hwif->expiry		= expiry;
+	hwif->timer.expires	= jiffies + timeout;
+	hwif->req_gen_timer	= hwif->req_gen;
+	add_timer(&hwif->timer);
 }
 
 void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
 		      unsigned int timeout, ide_expiry_t *expiry)
 {
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long flags;
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 	__ide_set_handler(drive, handler, timeout, expiry);
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 }
 
 EXPORT_SYMBOL(ide_set_handler);
@@ -863,10 +854,9 @@
 			 unsigned timeout, ide_expiry_t *expiry)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	unsigned long flags;
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 	__ide_set_handler(drive, handler, timeout, expiry);
 	hwif->tp_ops->exec_command(hwif, cmd);
 	/*
@@ -876,26 +866,25 @@
 	 * FIXME: we could skip this delay with care on non shared devices
 	 */
 	ndelay(400);
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 }
 EXPORT_SYMBOL(ide_execute_command);
 
 void ide_execute_pkt_cmd(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	unsigned long flags;
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 	hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET);
 	ndelay(400);
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 }
 EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd);
 
 static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 {
-	struct request *rq = drive->hwif->hwgroup->rq;
+	struct request *rq = drive->hwif->rq;
 
 	if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET)
 		ide_end_request(drive, err ? err : 1, 0);
@@ -913,7 +902,6 @@
 static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	u8 stat;
 
 	SELECT_DRIVE(drive);
@@ -923,20 +911,20 @@
 	if (OK_STAT(stat, 0, ATA_BUSY))
 		printk("%s: ATAPI reset complete\n", drive->name);
 	else {
-		if (time_before(jiffies, hwgroup->poll_timeout)) {
+		if (time_before(jiffies, hwif->poll_timeout)) {
 			ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);
 			/* continue polling */
 			return ide_started;
 		}
 		/* end of polling */
-		hwgroup->polling = 0;
+		hwif->polling = 0;
 		printk("%s: ATAPI reset timed-out, status=0x%02x\n",
 				drive->name, stat);
 		/* do it the old fashioned way */
 		return do_reset1(drive, 1);
 	}
 	/* done polling */
-	hwgroup->polling = 0;
+	hwif->polling = 0;
 	ide_complete_drive_reset(drive, 0);
 	return ide_stopped;
 }
@@ -968,8 +956,7 @@
  */
 static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
 {
-	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 	u8 tmp;
 	int err = 0;
@@ -986,7 +973,7 @@
 	tmp = hwif->tp_ops->read_status(hwif);
 
 	if (!OK_STAT(tmp, 0, ATA_BUSY)) {
-		if (time_before(jiffies, hwgroup->poll_timeout)) {
+		if (time_before(jiffies, hwif->poll_timeout)) {
 			ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL);
 			/* continue polling */
 			return ide_started;
@@ -1007,7 +994,7 @@
 		}
 	}
 out:
-	hwgroup->polling = 0;	/* done polling */
+	hwif->polling = 0;	/* done polling */
 	ide_complete_drive_reset(drive, err);
 	return ide_stopped;
 }
@@ -1081,18 +1068,18 @@
 static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	struct ide_io_ports *io_ports = &hwif->io_ports;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	const struct ide_port_ops *port_ops;
+	ide_drive_t *tdrive;
 	unsigned long flags, timeout;
-	unsigned int unit;
+	int i;
 	DEFINE_WAIT(wait);
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 
 	/* We must not reset with running handlers */
-	BUG_ON(hwgroup->handler != NULL);
+	BUG_ON(hwif->handler != NULL);
 
 	/* For an ATAPI device, first try an ATAPI SRST. */
 	if (drive->media != ide_disk && !do_not_try_atapi) {
@@ -1101,10 +1088,10 @@
 		udelay (20);
 		tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
 		ndelay(400);
-		hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
-		hwgroup->polling = 1;
+		hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
+		hwif->polling = 1;
 		__ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);
-		spin_unlock_irqrestore(&hwgroup->lock, flags);
+		spin_unlock_irqrestore(&hwif->lock, flags);
 		return ide_started;
 	}
 
@@ -1114,9 +1101,7 @@
 
 		prepare_to_wait(&ide_park_wq, &wait, TASK_UNINTERRUPTIBLE);
 		timeout = jiffies;
-		for (unit = 0; unit < MAX_DRIVES; unit++) {
-			ide_drive_t *tdrive = &hwif->drives[unit];
-
+		ide_port_for_each_dev(i, tdrive, hwif) {
 			if (tdrive->dev_flags & IDE_DFLAG_PRESENT &&
 			    tdrive->dev_flags & IDE_DFLAG_PARKED &&
 			    time_after(tdrive->sleep, timeout))
@@ -1127,9 +1112,9 @@
 		if (time_before_eq(timeout, now))
 			break;
 
-		spin_unlock_irqrestore(&hwgroup->lock, flags);
+		spin_unlock_irqrestore(&hwif->lock, flags);
 		timeout = schedule_timeout_uninterruptible(timeout - now);
-		spin_lock_irqsave(&hwgroup->lock, flags);
+		spin_lock_irqsave(&hwif->lock, flags);
 	} while (timeout);
 	finish_wait(&ide_park_wq, &wait);
 
@@ -1137,11 +1122,11 @@
 	 * First, reset any device state data we were maintaining
 	 * for any of the drives on this interface.
 	 */
-	for (unit = 0; unit < MAX_DRIVES; ++unit)
-		pre_reset(&hwif->drives[unit]);
+	ide_port_for_each_dev(i, tdrive, hwif)
+		pre_reset(tdrive);
 
 	if (io_ports->ctl_addr == 0) {
-		spin_unlock_irqrestore(&hwgroup->lock, flags);
+		spin_unlock_irqrestore(&hwif->lock, flags);
 		ide_complete_drive_reset(drive, -ENXIO);
 		return ide_stopped;
 	}
@@ -1164,8 +1149,8 @@
 	tp_ops->set_irq(hwif, drive->quirk_list == 2);
 	/* more than enough time */
 	udelay(10);
-	hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
-	hwgroup->polling = 1;
+	hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
+	hwif->polling = 1;
 	__ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL);
 
 	/*
@@ -1177,7 +1162,7 @@
 	if (port_ops && port_ops->resetproc)
 		port_ops->resetproc(drive);
 
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 	return ide_started;
 }
 
@@ -1221,6 +1206,3 @@
 	}
 	return -EBUSY;
 }
-
-EXPORT_SYMBOL_GPL(ide_wait_not_busy);
-
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 9f6e33d..09526a0 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -273,7 +273,7 @@
 
 static void ide_dump_opcode(ide_drive_t *drive)
 {
-	struct request *rq = drive->hwif->hwgroup->rq;
+	struct request *rq = drive->hwif->rq;
 	ide_task_t *task = NULL;
 
 	if (!rq)
@@ -346,10 +346,13 @@
 	printk(KERN_CONT "}");
 	if ((err & (ATA_BBK | ATA_ABORTED)) == ATA_BBK ||
 	    (err & (ATA_UNC | ATA_IDNF | ATA_AMNF))) {
+		struct request *rq = drive->hwif->rq;
+
 		ide_dump_sector(drive);
-		if (HWGROUP(drive) && HWGROUP(drive)->rq)
+
+		if (rq)
 			printk(KERN_CONT ", sector=%llu",
-			       (unsigned long long)HWGROUP(drive)->rq->sector);
+			       (unsigned long long)rq->sector);
 	}
 	printk(KERN_CONT "\n");
 }
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 678454a..c875a95 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -7,22 +7,22 @@
 
 static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 {
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
+	ide_hwif_t *hwif = drive->hwif;
 	struct request_queue *q = drive->queue;
 	struct request *rq;
 	int rc;
 
 	timeout += jiffies;
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 	if (drive->dev_flags & IDE_DFLAG_PARKED) {
 		int reset_timer = time_before(timeout, drive->sleep);
 		int start_queue = 0;
 
 		drive->sleep = timeout;
 		wake_up_all(&ide_park_wq);
-		if (reset_timer && del_timer(&hwgroup->timer))
+		if (reset_timer && del_timer(&hwif->timer))
 			start_queue = 1;
-		spin_unlock_irq(&hwgroup->lock);
+		spin_unlock_irq(&hwif->lock);
 
 		if (start_queue) {
 			spin_lock_irq(q->queue_lock);
@@ -31,7 +31,7 @@
 		}
 		return;
 	}
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 
 	rq = blk_get_request(q, READ, __GFP_WAIT);
 	rq->cmd[0] = REQ_PARK_HEADS;
@@ -64,21 +64,21 @@
 		      char *buf)
 {
 	ide_drive_t *drive = to_ide_device(dev);
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long now;
 	unsigned int msecs;
 
 	if (drive->dev_flags & IDE_DFLAG_NO_UNLOAD)
 		return -EOPNOTSUPP;
 
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 	now = jiffies;
 	if (drive->dev_flags & IDE_DFLAG_PARKED &&
 	    time_after(drive->sleep, now))
 		msecs = jiffies_to_msecs(drive->sleep - now);
 	else
 		msecs = 0;
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 
 	return snprintf(buf, 20, "%u\n", msecs);
 }
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 8282c60..4b3bf6a 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -5,7 +5,7 @@
 int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 {
 	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
 	ide_task_t args;
@@ -39,7 +39,7 @@
 int generic_ide_resume(struct device *dev)
 {
 	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
 	ide_task_t args;
@@ -67,7 +67,7 @@
 	blk_put_request(rq);
 
 	if (err == 0 && dev->driver) {
-		ide_driver_t *drv = to_ide_driver(dev->driver);
+		struct ide_driver *drv = to_ide_driver(dev->driver);
 
 		if (drv->resume)
 			drv->resume(drive);
@@ -194,7 +194,7 @@
 	}
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	drive->hwif->hwgroup->rq = NULL;
+	drive->hwif->rq = NULL;
 
 	if (blk_end_request(rq, 0, 0))
 		BUG();
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index c5adb7b..0ccbb44 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -189,7 +189,7 @@
 
 static void do_identify(ide_drive_t *drive, u8 cmd)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u16 *id = drive->id;
 	char *m = (char *)&id[ATA_ID_PROD];
 	unsigned long flags;
@@ -266,7 +266,7 @@
 
 static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct ide_io_ports *io_ports = &hwif->io_ports;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	int use_altstatus = 0, rc;
@@ -341,7 +341,7 @@
  
 static int try_to_identify (ide_drive_t *drive, u8 cmd)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	int retval;
 	int autoprobe = 0;
@@ -438,7 +438,7 @@
 
 static int do_probe (ide_drive_t *drive, u8 cmd)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	int rc;
 	u8 present = !!(drive->dev_flags & IDE_DFLAG_PRESENT), stat;
@@ -463,7 +463,7 @@
 	if (ide_read_device(drive) != drive->select && present == 0) {
 		if (drive->dn & 1) {
 			/* exit with drive0 selected */
-			SELECT_DRIVE(&hwif->drives[0]);
+			SELECT_DRIVE(hwif->devices[0]);
 			/* allow ATA_BUSY to assert & clear */
 			msleep(50);
 		}
@@ -509,7 +509,7 @@
 	}
 	if (drive->dn & 1) {
 		/* exit with drive0 selected */
-		SELECT_DRIVE(&hwif->drives[0]);
+		SELECT_DRIVE(hwif->devices[0]);
 		msleep(50);
 		/* ensure drive irq is clear */
 		(void)tp_ops->read_status(hwif);
@@ -522,7 +522,7 @@
  */
 static void enable_nest (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	u8 stat;
 
@@ -697,7 +697,8 @@
 
 static int ide_port_wait_ready(ide_hwif_t *hwif)
 {
-	int unit, rc;
+	ide_drive_t *drive;
+	int i, rc;
 
 	printk(KERN_DEBUG "Probing IDE interface %s...\n", hwif->name);
 
@@ -714,9 +715,7 @@
 		return rc;
 
 	/* Now make sure both master & slave are ready */
-	for (unit = 0; unit < MAX_DRIVES; unit++) {
-		ide_drive_t *drive = &hwif->drives[unit];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		/* Ignore disks that we will not probe for later. */
 		if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 ||
 		    (drive->dev_flags & IDE_DFLAG_PRESENT)) {
@@ -732,8 +731,8 @@
 	}
 out:
 	/* Exit function with master reselected (let's be sane) */
-	if (unit)
-		SELECT_DRIVE(&hwif->drives[0]);
+	if (i)
+		SELECT_DRIVE(hwif->devices[0]);
 
 	return rc;
 }
@@ -749,7 +748,7 @@
 
 void ide_undecoded_slave(ide_drive_t *dev1)
 {
-	ide_drive_t *dev0 = &dev1->hwif->drives[0];
+	ide_drive_t *dev0 = dev1->hwif->devices[0];
 
 	if ((dev1->dn & 1) == 0 || (dev0->dev_flags & IDE_DFLAG_PRESENT) == 0)
 		return;
@@ -778,14 +777,15 @@
 
 static int ide_probe_port(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	unsigned long flags;
 	unsigned int irqd;
-	int unit, rc = -ENODEV;
+	int i, rc = -ENODEV;
 
 	BUG_ON(hwif->present);
 
-	if ((hwif->drives[0].dev_flags & IDE_DFLAG_NOPROBE) &&
-	    (hwif->drives[1].dev_flags & IDE_DFLAG_NOPROBE))
+	if ((hwif->devices[0]->dev_flags & IDE_DFLAG_NOPROBE) &&
+	    (hwif->devices[1]->dev_flags & IDE_DFLAG_NOPROBE))
 		return -EACCES;
 
 	/*
@@ -796,7 +796,8 @@
 	if (irqd)
 		disable_irq(hwif->irq);
 
-	local_irq_set(flags);
+	local_irq_save(flags);
+	local_irq_enable_in_hardirq();
 
 	if (ide_port_wait_ready(hwif) == -EBUSY)
 		printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name);
@@ -805,9 +806,7 @@
 	 * Second drive should only exist if first drive was found,
 	 * but a lot of cdrom drives are configured as single slaves.
 	 */
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		(void) probe_for_drive(drive);
 		if (drive->dev_flags & IDE_DFLAG_PRESENT)
 			rc = 0;
@@ -828,20 +827,17 @@
 static void ide_port_tune_devices(ide_hwif_t *hwif)
 {
 	const struct ide_port_ops *port_ops = hwif->port_ops;
-	int unit;
+	ide_drive_t *drive;
+	int i;
 
-	for (unit = 0; unit < MAX_DRIVES; unit++) {
-		ide_drive_t *drive = &hwif->drives[unit];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
 			if (port_ops && port_ops->quirkproc)
 				port_ops->quirkproc(drive);
 		}
 	}
 
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
 			ide_set_max_pio(drive);
 
@@ -852,11 +848,8 @@
 		}
 	}
 
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
-
-		if ((hwif->host_flags & IDE_HFLAG_NO_IO_32BIT) ||
-		    drive->id[ATA_ID_DWORD_IO])
+	ide_port_for_each_dev(i, drive, hwif) {
+		if (hwif->host_flags & IDE_HFLAG_NO_IO_32BIT)
 			drive->dev_flags |= IDE_DFLAG_NO_IO_32BIT;
 		else
 			drive->dev_flags &= ~IDE_DFLAG_NO_IO_32BIT;
@@ -869,7 +862,7 @@
 static int ide_init_queue(ide_drive_t *drive)
 {
 	struct request_queue *q;
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	int max_sectors = 256;
 	int max_sg_entries = PRD_ENTRIES;
 
@@ -918,36 +911,19 @@
 	return 0;
 }
 
-static void ide_add_drive_to_hwgroup(ide_drive_t *drive)
-{
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
-
-	spin_lock_irq(&hwgroup->lock);
-	if (!hwgroup->drive) {
-		/* first drive for hwgroup. */
-		drive->next = drive;
-		hwgroup->drive = drive;
-		hwgroup->hwif = HWIF(hwgroup->drive);
-	} else {
-		drive->next = hwgroup->drive->next;
-		hwgroup->drive->next = drive;
-	}
-	spin_unlock_irq(&hwgroup->lock);
-}
+static DEFINE_MUTEX(ide_cfg_mtx);
 
 /*
  * For any present drive:
  * - allocate the block device queue
- * - link drive into the hwgroup
  */
 static int ide_port_setup_devices(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	int i, j = 0;
 
 	mutex_lock(&ide_cfg_mtx);
-	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = &hwif->drives[i];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
 			continue;
 
@@ -961,139 +937,39 @@
 		}
 
 		j++;
-
-		ide_add_drive_to_hwgroup(drive);
 	}
 	mutex_unlock(&ide_cfg_mtx);
 
 	return j;
 }
 
-static ide_hwif_t *ide_ports[MAX_HWIFS];
-
-void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
-{
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
-
-	ide_ports[hwif->index] = NULL;
-
-	spin_lock_irq(&hwgroup->lock);
-	/*
-	 * Remove us from the hwgroup, and free
-	 * the hwgroup if we were the only member
-	 */
-	if (hwif->next == hwif) {
-		BUG_ON(hwgroup->hwif != hwif);
-		kfree(hwgroup);
-	} else {
-		/* There is another interface in hwgroup.
-		 * Unlink us, and set hwgroup->drive and ->hwif to
-		 * something sane.
-		 */
-		ide_hwif_t *g = hwgroup->hwif;
-
-		while (g->next != hwif)
-			g = g->next;
-		g->next = hwif->next;
-		if (hwgroup->hwif == hwif) {
-			/* Chose a random hwif for hwgroup->hwif.
-			 * It's guaranteed that there are no drives
-			 * left in the hwgroup.
-			 */
-			BUG_ON(hwgroup->drive != NULL);
-			hwgroup->hwif = g;
-		}
-		BUG_ON(hwgroup->hwif == hwif);
-	}
-	spin_unlock_irq(&hwgroup->lock);
-}
-
 /*
- * This routine sets up the irq for an ide interface, and creates a new
- * hwgroup for the irq/hwif if none was previously assigned.
- *
- * Much of the code is for correctly detecting/handling irq sharing
- * and irq serialization situations.  This is somewhat complex because
- * it handles static as well as dynamic (PCMCIA) IDE interfaces.
+ * This routine sets up the IRQ for an IDE interface.
  */
 static int init_irq (ide_hwif_t *hwif)
 {
 	struct ide_io_ports *io_ports = &hwif->io_ports;
-	unsigned int index;
-	ide_hwgroup_t *hwgroup;
-	ide_hwif_t *match = NULL;
+	int sa = 0;
 
 	mutex_lock(&ide_cfg_mtx);
-	hwif->hwgroup = NULL;
+	spin_lock_init(&hwif->lock);
 
-	for (index = 0; index < MAX_HWIFS; index++) {
-		ide_hwif_t *h = ide_ports[index];
+	init_timer(&hwif->timer);
+	hwif->timer.function = &ide_timer_expiry;
+	hwif->timer.data = (unsigned long)hwif;
 
-		if (h && h->hwgroup) {  /* scan only initialized ports */
-			if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) {
-				if (hwif->host == h->host)
-					match = h;
-			}
-		}
-	}
-
-	/*
-	 * If we are still without a hwgroup, then form a new one
-	 */
-	if (match) {
-		hwgroup = match->hwgroup;
-		hwif->hwgroup = hwgroup;
-		/*
-		 * Link us into the hwgroup.
-		 * This must be done early, do ensure that unexpected_intr
-		 * can find the hwif and prevent irq storms.
-		 * No drives are attached to the new hwif, choose_drive
-		 * can't do anything stupid (yet).
-		 * Add ourself as the 2nd entry to the hwgroup->hwif
-		 * linked list, the first entry is the hwif that owns
-		 * hwgroup->handler - do not change that.
-		 */
-		spin_lock_irq(&hwgroup->lock);
-		hwif->next = hwgroup->hwif->next;
-		hwgroup->hwif->next = hwif;
-		BUG_ON(hwif->next == hwif);
-		spin_unlock_irq(&hwgroup->lock);
-	} else {
-		hwgroup = kmalloc_node(sizeof(*hwgroup), GFP_KERNEL|__GFP_ZERO,
-				       hwif_to_node(hwif));
-		if (hwgroup == NULL)
-			goto out_up;
-
-		spin_lock_init(&hwgroup->lock);
-
-		hwif->hwgroup = hwgroup;
-		hwgroup->hwif = hwif->next = hwif;
-
-		init_timer(&hwgroup->timer);
-		hwgroup->timer.function = &ide_timer_expiry;
-		hwgroup->timer.data = (unsigned long) hwgroup;
-	}
-
-	ide_ports[hwif->index] = hwif;
-
-	/*
-	 * Allocate the irq, if not already obtained for another hwif
-	 */
-	if (!match || match->irq != hwif->irq) {
-		int sa = 0;
 #if defined(__mc68000__)
-		sa = IRQF_SHARED;
+	sa = IRQF_SHARED;
 #endif /* __mc68000__ */
 
-		if (hwif->chipset == ide_pci)
-			sa = IRQF_SHARED;
+	if (hwif->chipset == ide_pci)
+		sa = IRQF_SHARED;
 
-		if (io_ports->ctl_addr)
-			hwif->tp_ops->set_irq(hwif, 1);
+	if (io_ports->ctl_addr)
+		hwif->tp_ops->set_irq(hwif, 1);
 
-		if (request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup))
-	       		goto out_unlink;
-	}
+	if (request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwif))
+		goto out_up;
 
 	if (!hwif->rqsize) {
 		if ((hwif->host_flags & IDE_HFLAG_NO_LBA48) ||
@@ -1111,14 +987,12 @@
 	printk(KERN_INFO "%s at 0x%08lx on irq %d", hwif->name,
 		io_ports->data_addr, hwif->irq);
 #endif /* __mc68000__ */
-	if (match)
-		printk(KERN_CONT " (serialized with %s)", match->name);
+	if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE)
+		printk(KERN_CONT " (serialized)");
 	printk(KERN_CONT "\n");
 
 	mutex_unlock(&ide_cfg_mtx);
 	return 0;
-out_unlink:
-	ide_remove_port_from_hwgroup(hwif);
 out_up:
 	mutex_unlock(&ide_cfg_mtx);
 	return 1;
@@ -1134,7 +1008,7 @@
 {
 	ide_hwif_t *hwif = data;
 	int unit = *part >> PARTN_BITS;
-	ide_drive_t *drive = &hwif->drives[unit];
+	ide_drive_t *drive = hwif->devices[unit];
 
 	if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
 		return NULL;
@@ -1196,47 +1070,23 @@
 
 EXPORT_SYMBOL_GPL(ide_init_disk);
 
-static void ide_remove_drive_from_hwgroup(ide_drive_t *drive)
-{
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
-
-	if (drive == drive->next) {
-		/* special case: last drive from hwgroup. */
-		BUG_ON(hwgroup->drive != drive);
-		hwgroup->drive = NULL;
-	} else {
-		ide_drive_t *walk;
-
-		walk = hwgroup->drive;
-		while (walk->next != drive)
-			walk = walk->next;
-		walk->next = drive->next;
-		if (hwgroup->drive == drive) {
-			hwgroup->drive = drive->next;
-			hwgroup->hwif = hwgroup->drive->hwif;
-		}
-	}
-	BUG_ON(hwgroup->drive == drive);
-}
-
 static void drive_release_dev (struct device *dev)
 {
 	ide_drive_t *drive = container_of(dev, ide_drive_t, gendev);
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
+	ide_hwif_t *hwif = drive->hwif;
 
 	ide_proc_unregister_device(drive);
 
-	spin_lock_irq(&hwgroup->lock);
-	ide_remove_drive_from_hwgroup(drive);
+	spin_lock_irq(&hwif->lock);
 	kfree(drive->id);
 	drive->id = NULL;
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 	/* Messed up locking ... */
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 	blk_cleanup_queue(drive->queue);
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 	drive->queue = NULL;
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 
 	complete(&drive->gendev_rel_comp);
 }
@@ -1302,10 +1152,10 @@
 
 static void hwif_register_devices(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	unsigned int i;
 
-	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = &hwif->drives[i];
+	ide_port_for_each_dev(i, drive, hwif) {
 		struct device *dev = &drive->gendev;
 		int ret;
 
@@ -1328,11 +1178,10 @@
 static void ide_port_init_devices(ide_hwif_t *hwif)
 {
 	const struct ide_port_ops *port_ops = hwif->port_ops;
+	ide_drive_t *drive;
 	int i;
 
-	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = &hwif->drives[i];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		drive->dn = i + hwif->channel * 2;
 
 		if (hwif->host_flags & IDE_HFLAG_IO_32BIT)
@@ -1380,6 +1229,8 @@
 	if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) {
 		int rc;
 
+		hwif->dma_ops = d->dma_ops;
+
 		if (d->init_dma)
 			rc = d->init_dma(hwif, d);
 		else
@@ -1387,12 +1238,13 @@
 
 		if (rc < 0) {
 			printk(KERN_INFO "%s: DMA disabled\n", hwif->name);
+
+			hwif->dma_ops = NULL;
 			hwif->dma_base = 0;
 			hwif->swdma_mask = 0;
 			hwif->mwdma_mask = 0;
 			hwif->ultra_mask = 0;
-		} else if (d->dma_ops)
-			hwif->dma_ops = d->dma_ops;
+		}
 	}
 
 	if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
@@ -1417,6 +1269,66 @@
 	}
 }
 
+static const u8 ide_hwif_to_major[] =
+	{ IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR,
+	  IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR };
+
+static void ide_port_init_devices_data(ide_hwif_t *hwif)
+{
+	ide_drive_t *drive;
+	int i;
+
+	ide_port_for_each_dev(i, drive, hwif) {
+		u8 j = (hwif->index * MAX_DRIVES) + i;
+
+		memset(drive, 0, sizeof(*drive));
+
+		drive->media			= ide_disk;
+		drive->select			= (i << 4) | ATA_DEVICE_OBS;
+		drive->hwif			= hwif;
+		drive->ready_stat		= ATA_DRDY;
+		drive->bad_wstat		= BAD_W_STAT;
+		drive->special.b.recalibrate	= 1;
+		drive->special.b.set_geometry	= 1;
+		drive->name[0]			= 'h';
+		drive->name[1]			= 'd';
+		drive->name[2]			= 'a' + j;
+		drive->max_failures		= IDE_DEFAULT_MAX_FAILURES;
+
+		INIT_LIST_HEAD(&drive->list);
+		init_completion(&drive->gendev_rel_comp);
+	}
+}
+
+static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
+{
+	/* fill in any non-zero initial values */
+	hwif->index	= index;
+	hwif->major	= ide_hwif_to_major[index];
+
+	hwif->name[0]	= 'i';
+	hwif->name[1]	= 'd';
+	hwif->name[2]	= 'e';
+	hwif->name[3]	= '0' + index;
+
+	init_completion(&hwif->gendev_rel_comp);
+
+	hwif->tp_ops = &default_tp_ops;
+
+	ide_port_init_devices_data(hwif);
+}
+
+static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
+{
+	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
+	hwif->irq = hw->irq;
+	hwif->chipset = hw->chipset;
+	hwif->dev = hw->dev;
+	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
+	hwif->ack_intr = hw->ack_intr;
+	hwif->config_data = hw->config;
+}
+
 static unsigned int ide_indexes;
 
 /**
@@ -1466,12 +1378,43 @@
 	mutex_unlock(&ide_cfg_mtx);
 }
 
+static void ide_port_free_devices(ide_hwif_t *hwif)
+{
+	ide_drive_t *drive;
+	int i;
+
+	ide_port_for_each_dev(i, drive, hwif)
+		kfree(drive);
+}
+
+static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
+{
+	int i;
+
+	for (i = 0; i < MAX_DRIVES; i++) {
+		ide_drive_t *drive;
+
+		drive = kzalloc_node(sizeof(*drive), GFP_KERNEL, node);
+		if (drive == NULL)
+			goto out_nomem;
+
+		hwif->devices[i] = drive;
+	}
+	return 0;
+
+out_nomem:
+	ide_port_free_devices(hwif);
+	return -ENOMEM;
+}
+
 struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
 {
 	struct ide_host *host;
+	struct device *dev = hws[0] ? hws[0]->dev : NULL;
+	int node = dev ? dev_to_node(dev) : -1;
 	int i;
 
-	host = kzalloc(sizeof(*host), GFP_KERNEL);
+	host = kzalloc_node(sizeof(*host), GFP_KERNEL, node);
 	if (host == NULL)
 		return NULL;
 
@@ -1482,10 +1425,15 @@
 		if (hws[i] == NULL)
 			continue;
 
-		hwif = kzalloc(sizeof(*hwif), GFP_KERNEL);
+		hwif = kzalloc_node(sizeof(*hwif), GFP_KERNEL, node);
 		if (hwif == NULL)
 			continue;
 
+		if (ide_port_alloc_devices(hwif, node) < 0) {
+			kfree(hwif);
+			continue;
+		}
+
 		idx = ide_find_port_slot(d);
 		if (idx < 0) {
 			printk(KERN_ERR "%s: no free slot for interface\n",
@@ -1507,8 +1455,7 @@
 		return NULL;
 	}
 
-	if (hws[0])
-		host->dev[0] = hws[0]->dev;
+	host->dev[0] = dev;
 
 	if (d) {
 		host->init_chipset = d->init_chipset;
@@ -1525,9 +1472,7 @@
 	ide_hwif_t *hwif, *mate = NULL;
 	int i, j = 0;
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL) {
 			mate = NULL;
 			continue;
@@ -1553,9 +1498,7 @@
 		ide_port_init_devices(hwif);
 	}
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
@@ -1570,9 +1513,7 @@
 			ide_port_tune_devices(hwif);
 	}
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
@@ -1597,9 +1538,7 @@
 			ide_acpi_port_init_devices(hwif);
 	}
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
@@ -1607,9 +1546,7 @@
 			hwif_register_devices(hwif);
 	}
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
@@ -1647,17 +1584,85 @@
 }
 EXPORT_SYMBOL_GPL(ide_host_add);
 
+static void __ide_port_unregister_devices(ide_hwif_t *hwif)
+{
+	ide_drive_t *drive;
+	int i;
+
+	ide_port_for_each_dev(i, drive, hwif) {
+		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
+			device_unregister(&drive->gendev);
+			wait_for_completion(&drive->gendev_rel_comp);
+		}
+	}
+}
+
+void ide_port_unregister_devices(ide_hwif_t *hwif)
+{
+	mutex_lock(&ide_cfg_mtx);
+	__ide_port_unregister_devices(hwif);
+	hwif->present = 0;
+	ide_port_init_devices_data(hwif);
+	mutex_unlock(&ide_cfg_mtx);
+}
+EXPORT_SYMBOL_GPL(ide_port_unregister_devices);
+
+/**
+ *	ide_unregister		-	free an IDE interface
+ *	@hwif: IDE interface
+ *
+ *	Perform the final unregister of an IDE interface.
+ *
+ *	Locking:
+ *	The caller must not hold the IDE locks.
+ *
+ *	It is up to the caller to be sure there is no pending I/O here,
+ *	and that the interface will not be reopened (present/vanishing
+ *	locking isn't yet done BTW).
+ */
+
+static void ide_unregister(ide_hwif_t *hwif)
+{
+	BUG_ON(in_interrupt());
+	BUG_ON(irqs_disabled());
+
+	mutex_lock(&ide_cfg_mtx);
+
+	if (hwif->present) {
+		__ide_port_unregister_devices(hwif);
+		hwif->present = 0;
+	}
+
+	ide_proc_unregister_port(hwif);
+
+	free_irq(hwif->irq, hwif);
+
+	device_unregister(hwif->portdev);
+	device_unregister(&hwif->gendev);
+	wait_for_completion(&hwif->gendev_rel_comp);
+
+	/*
+	 * Remove us from the kernel's knowledge
+	 */
+	blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS);
+	kfree(hwif->sg_table);
+	unregister_blkdev(hwif->major, hwif->name);
+
+	ide_release_dma_engine(hwif);
+
+	mutex_unlock(&ide_cfg_mtx);
+}
+
 void ide_host_free(struct ide_host *host)
 {
 	ide_hwif_t *hwif;
 	int i;
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
+		ide_port_free_devices(hwif);
 		ide_free_port_slot(hwif->index);
 		kfree(hwif);
 	}
@@ -1668,11 +1673,12 @@
 
 void ide_host_remove(struct ide_host *host)
 {
+	ide_hwif_t *hwif;
 	int i;
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		if (host->ports[i])
-			ide_unregister(host->ports[i]);
+	ide_host_for_each_port(i, hwif, host) {
+		if (hwif)
+			ide_unregister(hwif);
 	}
 
 	ide_host_free(host);
@@ -1691,8 +1697,8 @@
 	hwif->present = 1;
 
 	ide_port_tune_devices(hwif);
-	ide_acpi_port_init_devices(hwif);
 	ide_port_setup_devices(hwif);
+	ide_acpi_port_init_devices(hwif);
 	hwif_register_devices(hwif);
 	ide_proc_port_register_devices(hwif);
 }
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index a14e293..1d8978b 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -439,13 +439,13 @@
 static int proc_ide_read_driver
 	(char *page, char **start, off_t off, int count, int *eof, void *data)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
-	struct device	*dev = &drive->gendev;
-	ide_driver_t	*ide_drv;
-	int		len;
+	ide_drive_t		*drive = (ide_drive_t *)data;
+	struct device		*dev = &drive->gendev;
+	struct ide_driver	*ide_drv;
+	int			len;
 
 	if (dev->driver) {
-		ide_drv = container_of(dev->driver, ide_driver_t, gen_driver);
+		ide_drv = to_ide_driver(dev->driver);
 		len = sprintf(page, "%s version %s\n",
 				dev->driver->name, ide_drv->version);
 	} else
@@ -555,7 +555,7 @@
 	}
 }
 
-void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver)
+void ide_proc_register_driver(ide_drive_t *drive, struct ide_driver *driver)
 {
 	mutex_lock(&ide_setting_mtx);
 	drive->settings = driver->proc_devsets(drive);
@@ -577,7 +577,7 @@
  *	Takes ide_setting_mtx.
  */
 
-void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
+void ide_proc_unregister_driver(ide_drive_t *drive, struct ide_driver *driver)
 {
 	ide_remove_proc_entries(drive->proc, driver->proc_entries(drive));
 
@@ -593,14 +593,13 @@
 
 void ide_proc_port_register_devices(ide_hwif_t *hwif)
 {
-	int	d;
 	struct proc_dir_entry *ent;
 	struct proc_dir_entry *parent = hwif->proc;
+	ide_drive_t *drive;
 	char name[64];
+	int i;
 
-	for (d = 0; d < MAX_DRIVES; d++) {
-		ide_drive_t *drive = &hwif->drives[d];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0 || drive->proc)
 			continue;
 
@@ -653,7 +652,7 @@
 
 static int proc_print_driver(struct device_driver *drv, void *data)
 {
-	ide_driver_t *ide_drv = container_of(drv, ide_driver_t, gen_driver);
+	struct ide_driver *ide_drv = to_ide_driver(drv);
 	struct seq_file *s = data;
 
 	seq_printf(s, "%s version %s\n", drv->name, ide_drv->version);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 5d2aa22..d7ecd3c 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -166,10 +166,10 @@
  * to an interrupt or a timer event is stored in the struct defined below.
  */
 typedef struct ide_tape_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
+	ide_drive_t		*drive;
+	struct ide_driver	*driver;
+	struct gendisk		*disk;
+	struct kref		kref;
 
 	/*
 	 *	failed_pc points to the last failed packet command, or contains
@@ -479,7 +479,7 @@
 
 static int idetape_end_request(ide_drive_t *drive, int uptodate, int nr_sects)
 {
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	idetape_tape_t *tape = drive->driver_data;
 	unsigned long flags;
 	int error;
@@ -531,7 +531,7 @@
 			printk(KERN_ERR "ide-tape: Error in REQUEST SENSE "
 					"itself - Aborting request!\n");
 	} else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
-		struct request *rq = drive->hwif->hwgroup->rq;
+		struct request *rq = drive->hwif->rq;
 		int blocks = pc->xferred / tape->blk_size;
 
 		tape->avg_size += blocks * tape->blk_size;
@@ -576,7 +576,7 @@
 
 /*
  * Postpone the current request so that ide.c will be able to service requests
- * from another device on the same hwgroup while we are polling for DSC.
+ * from another device on the same port while we are polling for DSC.
  */
 static void idetape_postpone_request(ide_drive_t *drive)
 {
@@ -584,7 +584,8 @@
 
 	debug_log(DBG_PROCS, "Enter %s\n", __func__);
 
-	tape->postponed_rq = HWGROUP(drive)->rq;
+	tape->postponed_rq = drive->hwif->rq;
+
 	ide_stall_queue(drive, tape->dsc_poll_freq);
 }
 
@@ -2312,7 +2313,7 @@
 
 static int ide_tape_probe(ide_drive_t *);
 
-static ide_driver_t idetape_driver = {
+static struct ide_driver idetape_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
 		.name		= "ide-tape",
@@ -2323,7 +2324,6 @@
 	.version		= IDETAPE_VERSION,
 	.do_request		= idetape_do_request,
 	.end_request		= idetape_end_request,
-	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
 	.proc_entries		= ide_tape_proc_entries,
 	.proc_devsets		= ide_tape_proc_devsets,
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index bf4fb9d..16138bc 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -58,7 +58,7 @@
 
 ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct ide_taskfile *tf = &task->tf;
 	ide_handler_t *handler = NULL;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
@@ -309,9 +309,9 @@
 		}
 
 		if (sectors > 0) {
-			ide_driver_t *drv;
+			struct ide_driver *drv;
 
-			drv = *(ide_driver_t **)rq->rq_disk->private_data;
+			drv = *(struct ide_driver **)rq->rq_disk->private_data;
 			drv->end_request(drive, 1, sectors);
 		}
 	}
@@ -328,9 +328,9 @@
 	}
 
 	if (rq->rq_disk) {
-		ide_driver_t *drv;
+		struct ide_driver *drv;
 
-		drv = *(ide_driver_t **)rq->rq_disk->private_data;;
+		drv = *(struct ide_driver **)rq->rq_disk->private_data;;
 		drv->end_request(drive, 1, rq->nr_sectors);
 	} else
 		ide_end_request(drive, 1, rq->nr_sectors);
@@ -361,7 +361,7 @@
 static ide_startstop_t task_in_intr(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	u8 stat = hwif->tp_ops->read_status(hwif);
 
 	/* Error? */
@@ -395,7 +395,7 @@
 static ide_startstop_t task_out_intr (ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = hwif->rq;
 	u8 stat = hwif->tp_ops->read_status(hwif);
 
 	if (!OK_STAT(stat, DRIVE_READY, drive->bad_wstat))
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 46a2d4c..258805d 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -60,179 +60,8 @@
 #include <linux/completion.h>
 #include <linux/device.h>
 
-
-/* default maximum number of failures */
-#define IDE_DEFAULT_MAX_FAILURES 	1
-
 struct class *ide_port_class;
 
-static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR,
-					IDE2_MAJOR, IDE3_MAJOR,
-					IDE4_MAJOR, IDE5_MAJOR,
-					IDE6_MAJOR, IDE7_MAJOR,
-					IDE8_MAJOR, IDE9_MAJOR };
-
-DEFINE_MUTEX(ide_cfg_mtx);
-
-static void ide_port_init_devices_data(ide_hwif_t *);
-
-/*
- * Do not even *think* about calling this!
- */
-void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
-{
-	/* bulk initialize hwif & drive info with zeros */
-	memset(hwif, 0, sizeof(ide_hwif_t));
-
-	/* fill in any non-zero initial values */
-	hwif->index	= index;
-	hwif->major	= ide_hwif_to_major[index];
-
-	hwif->name[0]	= 'i';
-	hwif->name[1]	= 'd';
-	hwif->name[2]	= 'e';
-	hwif->name[3]	= '0' + index;
-
-	init_completion(&hwif->gendev_rel_comp);
-
-	hwif->tp_ops = &default_tp_ops;
-
-	ide_port_init_devices_data(hwif);
-}
-
-static void ide_port_init_devices_data(ide_hwif_t *hwif)
-{
-	int unit;
-
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
-		u8 j = (hwif->index * MAX_DRIVES) + unit;
-
-		memset(drive, 0, sizeof(*drive));
-
-		drive->media			= ide_disk;
-		drive->select			= (unit << 4) | ATA_DEVICE_OBS;
-		drive->hwif			= hwif;
-		drive->ready_stat		= ATA_DRDY;
-		drive->bad_wstat		= BAD_W_STAT;
-		drive->special.b.recalibrate	= 1;
-		drive->special.b.set_geometry	= 1;
-		drive->name[0]			= 'h';
-		drive->name[1]			= 'd';
-		drive->name[2]			= 'a' + j;
-		drive->max_failures		= IDE_DEFAULT_MAX_FAILURES;
-
-		INIT_LIST_HEAD(&drive->list);
-		init_completion(&drive->gendev_rel_comp);
-	}
-}
-
-static void __ide_port_unregister_devices(ide_hwif_t *hwif)
-{
-	int i;
-
-	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = &hwif->drives[i];
-
-		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
-			device_unregister(&drive->gendev);
-			wait_for_completion(&drive->gendev_rel_comp);
-		}
-	}
-}
-
-void ide_port_unregister_devices(ide_hwif_t *hwif)
-{
-	mutex_lock(&ide_cfg_mtx);
-	__ide_port_unregister_devices(hwif);
-	hwif->present = 0;
-	ide_port_init_devices_data(hwif);
-	mutex_unlock(&ide_cfg_mtx);
-}
-EXPORT_SYMBOL_GPL(ide_port_unregister_devices);
-
-/**
- *	ide_unregister		-	free an IDE interface
- *	@hwif: IDE interface
- *
- *	Perform the final unregister of an IDE interface. At the moment
- *	we don't refcount interfaces so this will also get split up.
- *
- *	Locking:
- *	The caller must not hold the IDE locks
- *	The drive present/vanishing is not yet properly locked
- *	Take care with the callbacks. These have been split to avoid
- *	deadlocking the IDE layer. The shutdown callback is called
- *	before we take the lock and free resources. It is up to the
- *	caller to be sure there is no pending I/O here, and that
- *	the interface will not be reopened (present/vanishing locking
- *	isn't yet done BTW). After we commit to the final kill we
- *	call the cleanup callback with the ide locks held.
- *
- *	Unregister restores the hwif structures to the default state.
- *	This is raving bonkers.
- */
-
-void ide_unregister(ide_hwif_t *hwif)
-{
-	ide_hwif_t *g;
-	ide_hwgroup_t *hwgroup;
-	int irq_count = 0;
-
-	BUG_ON(in_interrupt());
-	BUG_ON(irqs_disabled());
-
-	mutex_lock(&ide_cfg_mtx);
-
-	if (hwif->present) {
-		__ide_port_unregister_devices(hwif);
-		hwif->present = 0;
-	}
-
-	ide_proc_unregister_port(hwif);
-
-	hwgroup = hwif->hwgroup;
-	/*
-	 * free the irq if we were the only hwif using it
-	 */
-	g = hwgroup->hwif;
-	do {
-		if (g->irq == hwif->irq)
-			++irq_count;
-		g = g->next;
-	} while (g != hwgroup->hwif);
-	if (irq_count == 1)
-		free_irq(hwif->irq, hwgroup);
-
-	ide_remove_port_from_hwgroup(hwif);
-
-	device_unregister(hwif->portdev);
-	device_unregister(&hwif->gendev);
-	wait_for_completion(&hwif->gendev_rel_comp);
-
-	/*
-	 * Remove us from the kernel's knowledge
-	 */
-	blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS);
-	kfree(hwif->sg_table);
-	unregister_blkdev(hwif->major, hwif->name);
-
-	ide_release_dma_engine(hwif);
-
-	mutex_unlock(&ide_cfg_mtx);
-}
-
-void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
-{
-	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
-	hwif->irq = hw->irq;
-	hwif->chipset = hw->chipset;
-	hwif->dev = hw->dev;
-	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
-	hwif->ack_intr = hw->ack_intr;
-	hwif->config_data = hw->config;
-}
-
 /*
  *	Locks for IDE setting functionality
  */
@@ -330,7 +159,6 @@
 static int set_pio_mode(ide_drive_t *drive, int arg)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 
 	if (arg < 0 || arg > 255)
@@ -345,9 +173,9 @@
 			unsigned long flags;
 
 			/* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */
-			spin_lock_irqsave(&hwgroup->lock, flags);
+			spin_lock_irqsave(&hwif->lock, flags);
 			port_ops->set_pio_mode(drive, arg);
-			spin_unlock_irqrestore(&hwgroup->lock, flags);
+			spin_unlock_irqrestore(&hwif->lock, flags);
 		} else
 			port_ops->set_pio_mode(drive, arg);
 	} else {
@@ -453,7 +281,7 @@
 static int generic_ide_probe(struct device *dev)
 {
 	ide_drive_t *drive = to_ide_device(dev);
-	ide_driver_t *drv = to_ide_driver(dev->driver);
+	struct ide_driver *drv = to_ide_driver(dev->driver);
 
 	return drv->probe ? drv->probe(drive) : -ENODEV;
 }
@@ -461,7 +289,7 @@
 static int generic_ide_remove(struct device *dev)
 {
 	ide_drive_t *drive = to_ide_device(dev);
-	ide_driver_t *drv = to_ide_driver(dev->driver);
+	struct ide_driver *drv = to_ide_driver(dev->driver);
 
 	if (drv->remove)
 		drv->remove(drive);
@@ -472,7 +300,7 @@
 static void generic_ide_shutdown(struct device *dev)
 {
 	ide_drive_t *drive = to_ide_device(dev);
-	ide_driver_t *drv = to_ide_driver(dev->driver);
+	struct ide_driver *drv = to_ide_driver(dev->driver);
 
 	if (dev->driver && drv->shutdown)
 		drv->shutdown(drive);
@@ -660,6 +488,7 @@
 
 void ide_port_apply_params(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	int i;
 
 	if (ide_ignore_cable & (1 << hwif->index)) {
@@ -668,8 +497,8 @@
 		hwif->cbl = ATA_CBL_PATA40_SHORT;
 	}
 
-	for (i = 0; i < MAX_DRIVES; i++)
-		ide_dev_apply_params(&hwif->drives[i], i);
+	ide_port_for_each_dev(i, drive, hwif)
+		ide_dev_apply_params(drive, i);
 }
 
 /*
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
new file mode 100644
index 0000000..e021078
--- /dev/null
+++ b/drivers/ide/it8172.c
@@ -0,0 +1,166 @@
+/*
+ *
+ * BRIEF MODULE DESCRIPTION
+ *      IT8172 IDE controller support
+ *
+ * Copyright (C) 2000 MontaVista Software Inc.
+ * Copyright (C) 2008 Shane McDonald
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/ide.h>
+#include <linux/init.h>
+
+#define DRV_NAME "IT8172"
+
+static void it8172_set_pio_mode(ide_drive_t *drive, const u8 pio)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	u16 drive_enables;
+	u32 drive_timing;
+
+	/*
+	 * The highest value of DIOR/DIOW pulse width and recovery time
+	 * that can be set in the IT8172 is 8 PCI clock cycles.  As a result,
+	 * it cannot be configured for PIO mode 0.  This table sets these
+	 * parameters to the maximum supported by the IT8172.
+	 */
+	static const u8 timings[] = { 0x3f, 0x3c, 0x1b, 0x12, 0x0a };
+
+	pci_read_config_word(dev, 0x40, &drive_enables);
+	pci_read_config_dword(dev, 0x44, &drive_timing);
+
+	/*
+	 * Enable port 0x44. The IT8172 spec is confused; it calls
+	 * this register the "Slave IDE Timing Register", but in fact,
+	 * it controls timing for both master and slave drives.
+	 */
+	drive_enables |= 0x4000;
+
+	drive_enables &= drive->dn ? 0xc006 : 0xc060;
+	if (drive->media == ide_disk)
+		/* enable prefetch */
+		drive_enables |= 0x0004 << (drive->dn * 4);
+	if (ata_id_has_iordy(drive->id))
+		/* enable IORDY sample-point */
+		drive_enables |= 0x0002 << (drive->dn * 4);
+
+	drive_timing &= drive->dn ? 0x00003f00 : 0x000fc000;
+	drive_timing |= timings[pio] << (drive->dn * 6 + 8);
+
+	pci_write_config_word(dev, 0x40, drive_enables);
+	pci_write_config_dword(dev, 0x44, drive_timing);
+}
+
+static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	int a_speed		= 3 << (drive->dn * 4);
+	int u_flag		= 1 << drive->dn;
+	int u_speed		= 0;
+	u8 reg48, reg4a;
+
+	pci_read_config_byte(dev, 0x48, &reg48);
+	pci_read_config_byte(dev, 0x4a, &reg4a);
+
+	if (speed >= XFER_UDMA_0) {
+		u8 udma = speed - XFER_UDMA_0;
+		u_speed = udma << (drive->dn * 4);
+
+		pci_write_config_byte(dev, 0x48, reg48 | u_flag);
+		reg4a &= ~a_speed;
+		pci_write_config_byte(dev, 0x4a, reg4a | u_speed);
+	} else {
+		const u8 mwdma_to_pio[] = { 0, 3, 4 };
+		u8 pio;
+
+		pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
+		pci_write_config_byte(dev, 0x4a, reg4a & ~a_speed);
+
+		pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+
+		it8172_set_pio_mode(drive, pio);
+	}
+}
+
+
+static const struct ide_port_ops it8172_port_ops = {
+	.set_pio_mode	= it8172_set_pio_mode,
+	.set_dma_mode	= it8172_set_dma_mode,
+};
+
+static const struct ide_port_info it8172_port_info __devinitdata = {
+	.name		= DRV_NAME,
+	.port_ops	= &it8172_port_ops,
+	.enablebits	= { {0x41, 0x80, 0x80}, {0x00, 0x00, 0x00} },
+	.host_flags	= IDE_HFLAG_SINGLE,
+	.pio_mask	= ATA_PIO4 & ~ATA_PIO0,
+	.mwdma_mask	= ATA_MWDMA2,
+	.udma_mask	= ATA_UDMA2,
+};
+
+static int __devinit it8172_init_one(struct pci_dev *dev,
+					const struct pci_device_id *id)
+{
+	if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+		return -ENODEV; /* IT8172 is more than an IDE controller */
+	return ide_pci_init_one(dev, &it8172_port_info, NULL);
+}
+
+static struct pci_device_id it8172_pci_tbl[] = {
+	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8172), 0 },
+	{ 0, },
+};
+MODULE_DEVICE_TABLE(pci, it8172_pci_tbl);
+
+static struct pci_driver it8172_pci_driver = {
+	.name		= "IT8172_IDE",
+	.id_table	= it8172_pci_tbl,
+	.probe		= it8172_init_one,
+	.remove		= ide_pci_remove,
+	.suspend	= ide_pci_suspend,
+	.resume		= ide_pci_resume,
+};
+
+static int __init it8172_ide_init(void)
+{
+	return ide_pci_register_driver(&it8172_pci_driver);
+}
+
+static void __exit it8172_ide_exit(void)
+{
+	pci_unregister_driver(&it8172_pci_driver);
+}
+
+module_init(it8172_ide_init);
+module_exit(it8172_ide_exit);
+
+MODULE_AUTHOR("Steve Longerbeam");
+MODULE_DESCRIPTION("PCI driver module for ITE 8172 IDE");
+MODULE_LICENSE("GPL");
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
index 7c2feeb..d7969b6 100644
--- a/drivers/ide/it8213.c
+++ b/drivers/ide/it8213.c
@@ -25,7 +25,7 @@
 
 static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= 0x40;
@@ -82,7 +82,7 @@
 
 static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
index ef00408..0be27ac 100644
--- a/drivers/ide/it821x.c
+++ b/drivers/ide/it821x.c
@@ -167,12 +167,10 @@
 	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
-	ide_drive_t *pair;
+	ide_drive_t *pair = ide_get_pair_dev(drive);
 	int clock, altclock, sel = 0;
 	u8 unit = drive->dn & 1, v;
 
-	pair = &hwif->drives[1 - unit];
-
 	if(itdev->want[0][0] > itdev->want[1][0]) {
 		clock = itdev->want[0][1];
 		altclock = itdev->want[1][1];
@@ -239,15 +237,13 @@
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
-	ide_drive_t *pair;
+	ide_drive_t *pair = ide_get_pair_dev(drive);
 	u8 unit = drive->dn & 1, set_pio = pio;
 
 	/* Spec says 89 ref driver uses 88 */
 	static u16 pio_timings[]= { 0xAA88, 0xA382, 0xA181, 0x3332, 0x3121 };
 	static u8 pio_want[]    = { ATA_66, ATA_66, ATA_66, ATA_66, ATA_ANY };
 
-	pair = &hwif->drives[1 - unit];
-
 	/*
 	 * Compute the best PIO mode we can for a given device. We must
 	 * pick a speed that does not cause problems with the other device
@@ -279,7 +275,7 @@
  *	the shared MWDMA/PIO timing register.
  */
 
-static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted)
+static void it821x_tune_mwdma(ide_drive_t *drive, u8 mode_wanted)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
@@ -316,7 +312,7 @@
  *	controller when doing UDMA modes in pass through.
  */
 
-static void it821x_tune_udma (ide_drive_t *drive, byte mode_wanted)
+static void it821x_tune_udma(ide_drive_t *drive, u8 mode_wanted)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
@@ -516,6 +512,7 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_timeout		= ide_dma_timeout,
 	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 /**
diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c
index 1378906..83643ed 100644
--- a/drivers/ide/ns87415.c
+++ b/drivers/ide/ns87415.c
@@ -56,7 +56,7 @@
 	return superio_ide_inb(hwif->io_ports.status_addr);
 }
 
-static u8 superio_read_sff_dma_status(ide_hwif_t *hwif)
+static u8 superio_dma_sff_read_status(ide_hwif_t *hwif)
 {
 	return superio_ide_inb(hwif->dma_base + ATA_DMA_STATUS);
 }
@@ -109,7 +109,6 @@
 	.exec_command		= ide_exec_command,
 	.read_status		= superio_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= superio_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
@@ -132,18 +131,20 @@
 	tmp = superio_ide_inb(dma_stat);
 	outb(tmp | 0x66, dma_stat);
 }
+#else
+#define superio_dma_sff_read_status ide_dma_sff_read_status
 #endif
 
 static unsigned int ns87415_count = 0, ns87415_control[MAX_HWIFS] = { 0 };
 
 /*
  * This routine either enables/disables (according to IDE_DFLAG_PRESENT)
- * the IRQ associated with the port (HWIF(drive)),
+ * the IRQ associated with the port,
  * and selects either PIO or DMA handshaking for the next I/O operation.
  */
 static void ns87415_prepare_drive (ide_drive_t *drive, unsigned int use_dma)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned int bit, other, new, *old = (unsigned int *) hwif->select_data;
 	unsigned long flags;
@@ -197,11 +198,11 @@
 
 static int ns87415_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t      *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u8 dma_stat = 0, dma_cmd = 0;
 
 	drive->waiting_for_dma = 0;
-	dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 	/* get DMA command mode */
 	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
 	/* stop DMA */
@@ -308,6 +309,7 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= superio_dma_sff_read_status,
 };
 
 static const struct ide_port_info ns87415_chipset __devinitdata = {
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 122ed3c..a7ac490 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -324,8 +324,6 @@
 
 	hwif->dma_base = hwif->io_ports.data_addr - IDE_PALM_ATA_PRI_REG_OFFSET;
 
-	hwif->dma_ops = &sff_dma_ops;
-
 	return 0;
 }
 
@@ -338,6 +336,7 @@
 static struct ide_port_info __devinitdata palm_bk3710_port_info = {
 	.init_dma		= palm_bk3710_init_dma,
 	.port_ops		= &palm_bk3710_ports_ops,
+	.dma_ops		= &sff_dma_ops,
 	.host_flags		= IDE_HFLAG_MMIO,
 	.pio_mask		= ATA_PIO4,
 	.mwdma_mask		= ATA_MWDMA2,
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index 211ae46..f21290c 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -143,7 +143,7 @@
 
 static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 adj			= (drive->dn & 1) ? 0x08 : 0x00;
 
@@ -219,7 +219,7 @@
 	 * Deleted this because it is redundant from the caller.
 	 */
 	printk(KERN_WARNING "pdc202xx_new: %s channel reset.\n",
-		HWIF(drive)->channel ? "Secondary" : "Primary");
+		drive->hwif->channel ? "Secondary" : "Primary");
 }
 
 /**
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 624e62e..9719332 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -39,7 +39,7 @@
 
 static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 drive_pci		= 0x60 + (drive->dn << 2);
 
@@ -169,8 +169,8 @@
 	if (drive->current_speed > XFER_UDMA_2)
 		pdc_old_enable_66MHz_clock(drive->hwif);
 	if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		struct request *rq	= HWGROUP(drive)->rq;
-		ide_hwif_t *hwif	= HWIF(drive);
+		ide_hwif_t *hwif	= drive->hwif;
+		struct request *rq	= hwif->rq;
 		unsigned long high_16	= hwif->extra_base - 16;
 		unsigned long atapi_reg	= high_16 + (hwif->channel ? 0x24 : 0x20);
 		u32 word_count	= 0;
@@ -189,7 +189,7 @@
 static int pdc202xx_dma_end(ide_drive_t *drive)
 {
 	if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		ide_hwif_t *hwif	= HWIF(drive);
+		ide_hwif_t *hwif	= drive->hwif;
 		unsigned long high_16	= hwif->extra_base - 16;
 		unsigned long atapi_reg	= high_16 + (hwif->channel ? 0x24 : 0x20);
 		u8 clock		= 0;
@@ -205,7 +205,7 @@
 
 static int pdc202xx_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long high_16	= hwif->extra_base - 16;
 	u8 dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
 	u8 sc1d			= inb(high_16 + 0x001d);
@@ -243,7 +243,7 @@
 
 static void pdc202xx_reset (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	ide_hwif_t *mate	= hwif->mate;
 
 	pdc202xx_reset_host(hwif);
@@ -337,6 +337,7 @@
 	.dma_test_irq		= pdc202xx_dma_test_irq,
 	.dma_lost_irq		= pdc202xx_dma_lost_irq,
 	.dma_timeout		= pdc202xx_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops pdc2026x_dma_ops = {
@@ -348,6 +349,7 @@
 	.dma_test_irq		= pdc202xx_dma_test_irq,
 	.dma_lost_irq		= pdc202xx_dma_lost_irq,
 	.dma_timeout		= pdc202xx_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 #define DECLARE_PDC2026X_DEV(udma, sectors) \
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index 61d2d92..f1e2e4e 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -67,7 +67,7 @@
 
 static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -136,7 +136,7 @@
 
 static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
@@ -224,7 +224,7 @@
  */
 static void ich_clear_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u8 dma_stat;
 
 	/*
@@ -260,6 +260,8 @@
 	{ 0x27DF, 0x103C, 0x30A1 },	/* ICH7 on HP Compaq nc2400 */
 	{ 0x27DF, 0x1071, 0xD221 },	/* ICH7 on Hercules EC-900 */
 	{ 0x24CA, 0x1025, 0x0061 },	/* ICH4 on Acer Aspire 2023WLMi */
+	{ 0x24CA, 0x1025, 0x003d },	/* ICH4 on ACER TM290 */
+	{ 0x266F, 0x1025, 0x0066 },	/* ICH6 on ACER Aspire 1694WLMi */
 	{ 0x2653, 0x1043, 0x82D8 },	/* ICH6M on Asus Eee 701 */
 	/* end marker */
 	{ 0, }
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index 7c481bb..74625e8 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -955,7 +955,6 @@
 	.exec_command		= pmac_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= pmac_set_irq,
 
@@ -1513,10 +1512,10 @@
 static int
 pmac_ide_dma_setup(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = hwif->rq;
 	u8 unit = drive->dn & 1, ata4 = (pmif->kind == controller_kl_ata4);
 
 	if (!pmac_ide_build_dmatable(drive, rq)) {
@@ -1637,7 +1636,7 @@
 			break;
 		if (++timeout > 100) {
 			printk(KERN_WARNING "ide%d, ide_dma_test_irq \
-			timeout flushing channel\n", HWIF(drive)->index);
+			timeout flushing channel\n", hwif->index);
 			break;
 		}
 	}	
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index 4af4a8c..9f9c0b3 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -99,7 +99,6 @@
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
index bc27c7a..5b2e3af4 100644
--- a/drivers/ide/qd65xx.c
+++ b/drivers/ide/qd65xx.c
@@ -202,7 +202,8 @@
 		recovery_time = drive->id[ATA_ID_EIDE_PIO] - 120;
 	}
 
-	qd_set_timing(drive, qd6500_compute_timing(HWIF(drive), active_time, recovery_time));
+	qd_set_timing(drive, qd6500_compute_timing(drive->hwif,
+				active_time, recovery_time));
 }
 
 static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
@@ -245,11 +246,11 @@
 		printk(KERN_INFO "%s: PIO mode%d\n", drive->name,pio);
 	}
 
-	if (!HWIF(drive)->channel && drive->media != ide_disk) {
+	if (!hwif->channel && drive->media != ide_disk) {
 		outb(0x5f, QD_CONTROL_PORT);
 		printk(KERN_WARNING "%s: ATAPI: disabled read-ahead FIFO "
 			"and post-write buffer on %s.\n",
-			drive->name, HWIF(drive)->name);
+			drive->name, hwif->name);
 	}
 
 	qd_set_timing(drive, qd6580_compute_timing(active_time, recovery_time));
diff --git a/drivers/ide/qd65xx.h b/drivers/ide/qd65xx.h
index c83dea8..6636f96 100644
--- a/drivers/ide/qd65xx.h
+++ b/drivers/ide/qd65xx.h
@@ -31,8 +31,8 @@
 
 #define QD_CONFIG(hwif)		((hwif)->config_data & 0x00ff)
 
-#define QD_TIMING(drive)	(byte)(((drive)->drive_data) & 0x00ff)
-#define QD_TIMREG(drive)	(byte)((((drive)->drive_data) & 0xff00) >> 8)
+#define QD_TIMING(drive)	(u8)(((drive)->drive_data) & 0x00ff)
+#define QD_TIMREG(drive)	(u8)((((drive)->drive_data) & 0xff00) >> 8)
 
 #define QD6500_DEF_DATA		((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0c : 0x08))
 #define QD6580_DEF_DATA		((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0a : 0x00))
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index ec7f766..dbdd298 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -125,7 +125,7 @@
 
 static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode)
 {
-	ide_hwif_t		*hwif = HWIF(drive);
+	ide_hwif_t		*hwif = drive->hwif;
 	struct pci_dev		*dev = to_pci_dev(hwif->dev);
 	unsigned int		reg, timings;
 	unsigned short		pci_clock;
@@ -170,9 +170,9 @@
  */
 static int sc1200_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long dma_base = hwif->dma_base;
-	byte dma_stat;
+	u8 dma_stat;
 
 	dma_stat = inb(dma_base+2);		/* get DMA status */
 
@@ -199,7 +199,7 @@
 
 static void sc1200_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t	*hwif = HWIF(drive);
+	ide_hwif_t	*hwif = drive->hwif;
 	int		mode = -1;
 
 	/*
@@ -292,6 +292,7 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info sc1200_chipset __devinitdata = {
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 0f48f9d..8d2314b 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -143,7 +143,7 @@
 	return (u8)in_be32((void *)hwif->io_ports.ctl_addr);
 }
 
-static u8 scc_read_sff_dma_status(ide_hwif_t *hwif)
+static u8 scc_dma_sff_read_status(ide_hwif_t *hwif)
 {
 	return (u8)in_be32((void *)(hwif->dma_base + 4));
 }
@@ -217,7 +217,7 @@
 
 static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -249,7 +249,7 @@
 
 static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -259,7 +259,7 @@
 	unsigned long scrcst_port = ctl_base + 0x014;
 	unsigned long udenvt_port = ctl_base + 0x018;
 	unsigned long tdvhsel_port   = ctl_base + 0x020;
-	int is_slave = (&hwif->drives[1] == drive);
+	int is_slave = drive->dn & 1;
 	int offset, idx;
 	unsigned long reg;
 	unsigned long jcactsel;
@@ -292,7 +292,7 @@
 {
 	ide_hwif_t *hwif = drive->hwif;
 	u8 unit = drive->dn & 1;
-	u8 dma_stat = scc_ide_inb(hwif->dma_base + 4);
+	u8 dma_stat = scc_dma_sff_read_status(hwif);
 
 	if (on)
 		dma_stat |= (1 << (5 + unit));
@@ -316,7 +316,7 @@
 static int scc_dma_setup(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = hwif->rq;
 	unsigned int reading;
 	u8 dma_stat;
 
@@ -338,7 +338,7 @@
 	out_be32((void __iomem *)hwif->dma_base, reading);
 
 	/* read DMA status for INTR & ERROR flags */
-	dma_stat = in_be32((void __iomem *)(hwif->dma_base + 4));
+	dma_stat = scc_dma_sff_read_status(hwif);
 
 	/* clear INTR & ERROR flags */
 	out_be32((void __iomem *)(hwif->dma_base + 4), dma_stat | 6);
@@ -367,7 +367,7 @@
 	/* stop DMA */
 	scc_ide_outb(dma_cmd & ~1, hwif->dma_base);
 	/* get DMA status */
-	dma_stat = scc_ide_inb(hwif->dma_base + 4);
+	dma_stat = scc_dma_sff_read_status(hwif);
 	/* clear the INTR & ERROR bits */
 	scc_ide_outb(dma_stat | 6, hwif->dma_base + 4);
 	/* purge DMA mappings */
@@ -387,7 +387,7 @@
 
 static int scc_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	void __iomem *dma_base = (void __iomem *)hwif->dma_base;
 	unsigned long intsts_port = hwif->dma_base + 0x014;
 	u32 reg;
@@ -405,17 +405,18 @@
 			       drive->name);
 			data_loss = 1;
 			if (retry++) {
-				struct request *rq = HWGROUP(drive)->rq;
-				int unit;
+				struct request *rq = hwif->rq;
+				ide_drive_t *drive;
+				int i;
+
 				/* ERROR_RESET and drive->crc_count are needed
 				 * to reduce DMA transfer mode in retry process.
 				 */
 				if (rq)
 					rq->errors |= ERROR_RESET;
-				for (unit = 0; unit < MAX_DRIVES; unit++) {
-					ide_drive_t *drive = &hwif->drives[unit];
+
+				ide_port_for_each_dev(i, drive, hwif)
 					drive->crc_count++;
-				}
 			}
 		}
 	}
@@ -496,7 +497,7 @@
 /* returns 1 if dma irq issued, 0 otherwise */
 static int scc_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u32 int_stat = in_be32((void __iomem *)hwif->dma_base + 0x014);
 
 	/* SCC errata A252,A308 workaround: Step4 */
@@ -852,7 +853,6 @@
 	.exec_command		= scc_exec_command,
 	.read_status		= scc_read_status,
 	.read_altstatus		= scc_read_altstatus,
-	.read_sff_dma_status	= scc_read_sff_dma_status,
 
 	.set_irq		= scc_set_irq,
 
@@ -879,6 +879,7 @@
 	.dma_test_irq		= scc_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= scc_dma_sff_read_status,
 };
 
 #define DECLARE_SCC_DEV(name_str)			\
diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
index 437bc91..382102b 100644
--- a/drivers/ide/serverworks.c
+++ b/drivers/ide/serverworks.c
@@ -151,7 +151,7 @@
 	static const u8 dma_modes[]		= { 0x77, 0x21, 0x20 };
 	static const u8 drive_pci2[]		= { 0x45, 0x44, 0x47, 0x46 };
 
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 unit			= drive->dn & 1;
 
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 9f1f9163..e85d1ed 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -130,7 +130,7 @@
 	 * we tune the drive then try to grab DMA ownership if we want to be
 	 * the DMA end.  This has to be become dynamic to handle hot-plug.
 	 */
-	dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 	if ((dma_stat & 0x80) && hwif->mate && hwif->mate->dma_base) {
 		printk(KERN_INFO "%s %s: simplex device: DMA disabled\n",
 			d->name, pci_name(dev));
@@ -377,6 +377,9 @@
 
 		hwif->dma_base = base;
 
+		if (hwif->dma_ops == NULL)
+			hwif->dma_ops = &sff_dma_ops;
+
 		if (ide_pci_check_simplex(hwif, d) < 0)
 			return -1;
 
@@ -393,8 +396,6 @@
 
 		if (ide_allocate_dma_engine(hwif))
 			return -1;
-
-		hwif->dma_ops = &sff_dma_ops;
 	}
 
 	return 0;
@@ -471,7 +472,7 @@
 	 */
 
 	for (port = 0; port < channels; ++port) {
-		const ide_pci_enablebit_t *e = &(d->enablebits[port]);
+		const struct ide_pci_enablebit *e = &d->enablebits[port];
 
 		if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) ||
 		    (tmp & e->mask) != e->val)) {
@@ -519,8 +520,7 @@
 	if (ret < 0)
 		goto out;
 
-	/* Is it an "IDE storage" device in non-PCI mode? */
-	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 5) != 5) {
+	if (ide_pci_is_in_compatibility_mode(dev)) {
 		if (noisy)
 			printk(KERN_INFO "%s %s: not 100%% native mode: will "
 				"probe irqs later\n", d->name, pci_name(dev));
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index a687a7d..fdb9d70 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -123,7 +123,7 @@
 sgiioc4_clearirq(ide_drive_t * drive)
 {
 	u32 intr_reg;
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct ide_io_ports *io_ports = &hwif->io_ports;
 	unsigned long other_ir = io_ports->irq_addr + (IOC4_INTR_REG << 2);
 
@@ -181,7 +181,7 @@
 
 static void sgiioc4_dma_start(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long ioc4_dma_addr = hwif->dma_base + IOC4_DMA_CTRL * 4;
 	unsigned int reg = readl((void __iomem *)ioc4_dma_addr);
 	unsigned int temp_reg = reg | IOC4_S_DMA_START;
@@ -209,7 +209,7 @@
 static int sgiioc4_dma_end(ide_drive_t *drive)
 {
 	u32 ioc4_dma, bc_dev, bc_mem, num, valid = 0, cnt = 0;
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long dma_base = hwif->dma_base;
 	int dma_stat = 0;
 	unsigned long *ending_dma = ide_get_hwifdata(hwif);
@@ -271,7 +271,7 @@
 /* returns 1 if dma irq issued, 0 otherwise */
 static int sgiioc4_dma_test_irq(ide_drive_t *drive)
 {
-	return sgiioc4_checkirq(HWIF(drive));
+	return sgiioc4_checkirq(drive->hwif);
 }
 
 static void sgiioc4_dma_host_set(ide_drive_t *drive, int on)
@@ -367,7 +367,7 @@
 sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive)
 {
 	u32 ioc4_dma;
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long dma_base = hwif->dma_base;
 	unsigned long ioc4_dma_addr = dma_base + IOC4_DMA_CTRL * 4;
 	u32 dma_addr, ending_dma_addr;
@@ -427,7 +427,7 @@
 static unsigned int
 sgiioc4_build_dma_table(ide_drive_t * drive, struct request *rq, int ddir)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned int *table = hwif->dmatable_cpu;
 	unsigned int count = 0, i = 1;
 	struct scatterlist *sg;
@@ -492,7 +492,7 @@
 
 static int sgiioc4_dma_setup(ide_drive_t *drive)
 {
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	unsigned int count = 0;
 	int ddir;
 
@@ -523,7 +523,6 @@
 	.exec_command		= ide_exec_command,
 	.read_status		= sgiioc4_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 7d622d2..cb2b352 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -114,7 +114,7 @@
 
 static inline unsigned long siimage_seldev(ide_drive_t *drive, int r)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long base	= (unsigned long)hwif->hwif_data;
 	u8 unit			= drive->dn & 1;
 
@@ -243,7 +243,7 @@
 	static const u16 tf_speed[]   = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 };
 	static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 };
 
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	ide_drive_t *pair	= ide_get_pair_dev(drive);
 	u32 speedt		= 0;
@@ -300,7 +300,7 @@
 	static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 };
 	static const u16 dma[]	 = { 0x2208, 0x10C2, 0x10C1 };
 
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	unsigned long base	= (unsigned long)hwif->hwif_data;
 	u16 ultra = 0, multi	= 0;
@@ -340,7 +340,7 @@
 /* returns 1 if dma irq issued, 0 otherwise */
 static int siimage_io_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 dma_altstat		= 0;
 	unsigned long addr	= siimage_selreg(hwif, 1);
@@ -367,7 +367,7 @@
 
 static int siimage_mmio_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long addr	= siimage_selreg(hwif, 0x1);
 	void __iomem *sata_error_addr
 		= (void __iomem *)hwif->sata_scr[SATA_ERROR_OFFSET];
@@ -717,6 +717,7 @@
 	.dma_test_irq		= siimage_dma_test_irq,
 	.dma_timeout		= ide_dma_timeout,
 	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 #define DECLARE_SII_DEV(p_ops)				\
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index ad32e18..9ec1a4a 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -274,7 +274,7 @@
 
 static void config_drive_art_rwp(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 reg4bh		= 0;
 	u8 rw_prefetch		= 0;
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 84dc336..48cc748 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -140,7 +140,7 @@
  */
 static void sl82c105_dma_lost_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u32 val, mask		= hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA;
 	u8 dma_cmd;
@@ -177,7 +177,7 @@
  */
 static void sl82c105_dma_start(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int reg 		= 0x44 + drive->dn * 4;
 
@@ -299,6 +299,7 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= sl82c105_dma_lost_irq,
 	.dma_timeout		= sl82c105_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info sl82c105_chipset __devinitdata = {
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
index 0f759e4..40b4b94 100644
--- a/drivers/ide/slc90e66.c
+++ b/drivers/ide/slc90e66.c
@@ -20,7 +20,7 @@
 
 static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -73,7 +73,7 @@
 
 static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int sitre = 0, a_speed	= 7 << (drive->dn * 4);
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index 93e2cce..84109f5 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -15,7 +15,7 @@
 
 static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long scr_port	= hwif->config_data + (drive->dn ? 0x02 : 0x00);
 	u16 mode, scr		= inw(scr_port);
 
@@ -62,13 +62,12 @@
  */
 static int tc86c001_timer_expiry(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	ide_expiry_t *expiry	= ide_get_hwifdata(hwif);
-	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
 	u8 dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
 
 	/* Restore a higher level driver's expiry handler first. */
-	hwgroup->expiry	= expiry;
+	hwif->expiry = expiry;
 
 	if ((dma_stat & 5) == 1) {	/* DMA active and no interrupt */
 		unsigned long sc_base	= hwif->config_data;
@@ -110,11 +109,10 @@
 
 static void tc86c001_dma_start(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
-	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long sc_base	= hwif->config_data;
 	unsigned long twcr_port	= sc_base + (drive->dn ? 0x06 : 0x04);
-	unsigned long nsectors	= hwgroup->rq->nr_sectors;
+	unsigned long nsectors	= hwif->rq->nr_sectors;
 
 	/*
 	 * We have to manually load the sector count and size into
@@ -125,8 +123,8 @@
 	outw(SECTOR_SIZE / 2, twcr_port); /* Transfer Word Count 1/2 */
 
 	/* Install our timeout expiry hook, saving the current handler... */
-	ide_set_hwifdata(hwif, hwgroup->expiry);
-	hwgroup->expiry = &tc86c001_timer_expiry;
+	ide_set_hwifdata(hwif, hwif->expiry);
+	hwif->expiry = &tc86c001_timer_expiry;
 
 	ide_dma_start(drive);
 }
@@ -190,6 +188,7 @@
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info tc86c001_chipset __devinitdata = {
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
index b6ff403..8773c3b 100644
--- a/drivers/ide/triflex.c
+++ b/drivers/ide/triflex.c
@@ -36,7 +36,7 @@
 
 static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u32 triflex_timings = 0;
 	u16 timing = 0;
diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c
index 2a5ea90..b6a1285 100644
--- a/drivers/ide/trm290.c
+++ b/drivers/ide/trm290.c
@@ -144,7 +144,7 @@
 
 static void trm290_prepare_drive (ide_drive_t *drive, unsigned int use_dma)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u16 reg = 0;
 	unsigned long flags;
 
@@ -184,7 +184,7 @@
 static int trm290_dma_setup(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	unsigned int count, rw;
 
 	if (rq_data_dir(rq)) {
@@ -222,15 +222,15 @@
 	drive->waiting_for_dma = 0;
 	/* purge DMA mappings */
 	ide_destroy_dmatable(drive);
-	status = inw(HWIF(drive)->dma_base + 2);
+	status = inw(drive->hwif->dma_base + 2);
+
 	return status != 0x00ff;
 }
 
 static int trm290_dma_test_irq(ide_drive_t *drive)
 {
-	u16 status;
+	u16 status = inw(drive->hwif->dma_base + 2);
 
-	status = inw(HWIF(drive)->dma_base + 2);
 	return status == 0x00ff;
 }
 
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 4a8c5a2..882f6f0 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -293,7 +293,7 @@
 {
 	ide_hwif_t *hwif = drive->hwif;
 	void __iomem *base = TX4939IDE_BASE(hwif);
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	u8 reading;
 	int nent;
 
@@ -397,6 +397,17 @@
 	return found;
 }
 
+#ifdef __BIG_ENDIAN
+static u8 tx4939ide_dma_sff_read_status(ide_hwif_t *hwif)
+{
+	void __iomem *base = TX4939IDE_BASE(hwif);
+
+	return tx4939ide_readb(base, TX4939IDE_DMA_Stat);
+}
+#else
+#define tx4939ide_dma_sff_read_status ide_dma_sff_read_status
+#endif
+
 static void tx4939ide_init_hwif(ide_hwif_t *hwif)
 {
 	void __iomem *base = TX4939IDE_BASE(hwif);
@@ -443,13 +454,6 @@
 
 #ifdef __BIG_ENDIAN
 
-static u8 tx4939ide_read_sff_dma_status(ide_hwif_t *hwif)
-{
-	void __iomem *base = TX4939IDE_BASE(hwif);
-
-	return tx4939ide_readb(base, TX4939IDE_DMA_Stat);
-}
-
 /* custom iops (independent from SWAP_IO_SPACE) */
 static u8 tx4939ide_inb(unsigned long port)
 {
@@ -585,7 +589,6 @@
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= tx4939ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
@@ -609,7 +612,6 @@
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
@@ -638,6 +640,7 @@
 	.dma_test_irq		= tx4939ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= tx4939ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info tx4939ide_port_info __initdata = {
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index e29978c..0608d41 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -106,22 +106,21 @@
 
 static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *mate_hwgroup = hwif->mate ? hwif->mate->hwgroup : NULL;
+	ide_hwif_t *hwif = drive->hwif, *mate = hwif->mate;
 	unsigned long uninitialized_var(flags);
 
 	printk("%s: setting umc8672 to PIO mode%d (speed %d)\n",
 		drive->name, pio, pio_to_umc[pio]);
-	if (mate_hwgroup)
-		spin_lock_irqsave(&mate_hwgroup->lock, flags);
-	if (mate_hwgroup && mate_hwgroup->handler) {
+	if (mate)
+		spin_lock_irqsave(&mate->lock, flags);
+	if (mate && mate->handler) {
 		printk(KERN_ERR "umc8672: other interface is busy: exiting tune_umc()\n");
 	} else {
 		current_speeds[drive->name[2] - 'a'] = pio_to_umc[pio];
 		umc_set_speeds(current_speeds);
 	}
-	if (mate_hwgroup)
-		spin_unlock_irqrestore(&mate_hwgroup->lock, flags);
+	if (mate)
+		spin_unlock_irqrestore(&mate->lock, flags);
 }
 
 static const struct ide_port_ops umc8672_port_ops = {
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index 2a812d3..fecc0e0 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -178,7 +178,7 @@
 		ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
 	}
 
-	via_set_speed(HWIF(drive), drive->dn, &t);
+	via_set_speed(hwif, drive->dn, &t);
 }
 
 /**
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index 22bf981..82607add 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -554,7 +554,7 @@
 	const u32 *prop;
 	int i = 0, offset = 0;
 	int err;
-	
+
 	np = of_find_node_by_name(NULL, "fan");
 	if (!np)
 		return -ENODEV;
@@ -613,13 +613,13 @@
 	}
 
 	of_dev = of_platform_device_create(np, "temperatures", NULL);
-	
+	of_node_put(np);
+
 	if (of_dev == NULL) {
 		printk(KERN_ERR "Can't register temperatures device !\n");
-		of_node_put(np);
 		return -ENODEV;
 	}
-	
+
 	err = device_create_file(&of_dev->dev, &dev_attr_sensor1_temperature);
 	err |= device_create_file(&of_dev->dev, &dev_attr_sensor2_temperature);
 	err |= device_create_file(&of_dev->dev, &dev_attr_sensor1_limit);
diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
index 65d6966..6a32680 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.c
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
@@ -79,6 +79,10 @@
 		file->private_data = dvbdev;
 		old_fops = file->f_op;
 		file->f_op = fops_get(dvbdev->fops);
+		if (file->f_op == NULL) {
+			file->f_op = old_fops;
+			goto fail;
+		}
 		if(file->f_op->open)
 			err = file->f_op->open(inode,file);
 		if (err) {
@@ -90,6 +94,7 @@
 		unlock_kernel();
 		return err;
 	}
+fail:
 	up_read(&minor_rwsem);
 	unlock_kernel();
 	return -ENODEV;
diff --git a/drivers/media/video/v4l1-compat.c b/drivers/media/video/v4l1-compat.c
index d450cab..b617bf0 100644
--- a/drivers/media/video/v4l1-compat.c
+++ b/drivers/media/video/v4l1-compat.c
@@ -203,7 +203,6 @@
 	table = &pwq->pt;
 	for (;;) {
 		int mask;
-		set_current_state(TASK_INTERRUPTIBLE);
 		mask = file->f_op->poll(file, table);
 		if (mask & POLLIN)
 			break;
@@ -212,9 +211,8 @@
 			retval = -ERESTARTSYS;
 			break;
 		}
-		schedule();
+		poll_schedule(pwq, TASK_INTERRUPTIBLE);
 	}
-	set_current_state(TASK_RUNNING);
 	poll_freewait(pwq);
 	return retval;
 }
diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c
index a7dd03e..0ee4264 100644
--- a/drivers/message/i2o/device.c
+++ b/drivers/message/i2o/device.c
@@ -52,7 +52,6 @@
 /**
  *	i2o_device_claim - claim a device for use by an OSM
  *	@dev: I2O device to claim
- *	@drv: I2O driver which wants to claim the device
  *
  *	Do the leg work to assign a device to a given OSM. If the claim succeeds,
  *	the owner is the primary. If the attempt fails a negative errno code
@@ -80,7 +79,6 @@
 /**
  *	i2o_device_claim_release - release a device that the OSM is using
  *	@dev: device to release
- *	@drv: driver which claimed the device
  *
  *	Drop a claim by an OSM on a given I2O device.
  *
diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c
index e0d474b..a0421ef 100644
--- a/drivers/message/i2o/driver.c
+++ b/drivers/message/i2o/driver.c
@@ -173,7 +173,6 @@
  *	i2o_driver_dispatch - dispatch an I2O reply message
  *	@c: I2O controller of the message
  *	@m: I2O message number
- *	@msg: I2O message to be delivered
  *
  *	The reply is delivered to the driver from which the original message
  *	was. This function is only called from interrupt context.
diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c
index b5f6add..dc14b0b 100644
--- a/drivers/misc/ibmasm/module.c
+++ b/drivers/misc/ibmasm/module.c
@@ -104,8 +104,7 @@
 	}
 
 	sp->irq = pdev->irq;
-	sp->base_address = ioremap(pci_resource_start(pdev, 0),
-					pci_resource_len(pdev, 0));
+	sp->base_address = pci_ioremap_bar(pdev, 0);
 	if (!sp->base_address) {
 		dev_err(sp->dev, "Failed to ioremap pci memory\n");
 		result =  -ENODEV;
diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c
index 6f76573..60b0b1a 100644
--- a/drivers/misc/ioc4.c
+++ b/drivers/misc/ioc4.c
@@ -269,6 +269,16 @@
 	return IOC4_VARIANT_PCI_RT;
 }
 
+static void
+ioc4_load_modules(struct work_struct *work)
+{
+	/* arg just has to be freed */
+
+	request_module("sgiioc4");
+
+	kfree(work);
+}
+
 /* Adds a new instance of an IOC4 card */
 static int
 ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
@@ -378,6 +388,30 @@
 	}
 	mutex_unlock(&ioc4_mutex);
 
+	/* Request sgiioc4 IDE driver on boards that bring that functionality
+	 * off of IOC4.  The root filesystem may be hosted on a drive connected
+	 * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it
+	 * won't be picked up by modprobes due to the ioc4 module owning the
+	 * PCI device.
+	 */
+	if (idd->idd_variant != IOC4_VARIANT_PCI_RT) {
+		struct work_struct *work;
+		work = kzalloc(sizeof(struct work_struct), GFP_KERNEL);
+		if (!work) {
+			printk(KERN_WARNING
+			       "%s: IOC4 unable to allocate memory for "
+			       "load of sub-modules.\n", __func__);
+		} else {
+			/* Request the module from a work procedure as the
+			 * modprobe goes out to a userland helper and that
+			 * will hang if done directly from ioc4_probe().
+			 */
+			printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n");
+			INIT_WORK(work, ioc4_load_modules);
+			schedule_work(work);
+		}
+	}
+
 	return 0;
 
 out_misc_region:
@@ -462,6 +496,8 @@
 static void __devexit
 ioc4_exit(void)
 {
+	/* Ensure ioc4_load_modules() has completed before exiting */
+	flush_scheduled_work();
 	pci_unregister_driver(&ioc4_driver);
 }
 
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
index e71eba3..be5672a 100644
--- a/drivers/misc/tifm_7xx1.c
+++ b/drivers/misc/tifm_7xx1.c
@@ -354,8 +354,7 @@
 	fm->has_ms_pif = tifm_7xx1_has_ms_pif;
 	pci_set_drvdata(dev, fm);
 
-	fm->addr = ioremap(pci_resource_start(dev, 0),
-			   pci_resource_len(dev, 0));
+	fm->addr = pci_ioremap_bar(dev, 0);
 	if (!fm->addr)
 		goto err_out_free;
 
diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c
index ac2a805..8901ecf 100644
--- a/drivers/parport/ieee1284.c
+++ b/drivers/parport/ieee1284.c
@@ -84,7 +84,7 @@
 
 	add_timer (&timer);
 	ret = down_interruptible (&port->physport->ieee1284.irq);
-	if (!del_timer (&timer) && !ret)
+	if (!del_timer_sync(&timer) && !ret)
 		/* Timed out. */
 		ret = 1;
 
diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 956d3e7..addb87c 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -79,7 +79,6 @@
 
 /**
  *  rio_device_probe - Tell if a RIO device structure has a matching RIO device id structure
- *  @id: the RIO device id structure to match against
  *  @dev: the RIO device structure to match against
  *
  * return 0 and set rio_dev->driver when drv claims rio_dev, else error
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 165a818..4ad831d 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -35,8 +35,8 @@
 	default "rtc0"
 	help
 	  The RTC device that will be used to (re)initialize the system
-	  clock, usually rtc0.  Initialization is done when the system
-	  starts up, and when it resumes from a low power state.  This
+	  clock, usually rtc0. Initialization is done when the system
+	  starts up, and when it resumes from a low power state. This
 	  device should record time in UTC, since the kernel won't do
 	  timezone correction.
 
@@ -44,7 +44,7 @@
 	  functions run, so it must usually be statically linked.
 
 	  This clock should be battery-backed, so that it reads the correct
-	  time when the system boots from a power-off state.  Otherwise, your
+	  time when the system boots from a power-off state. Otherwise, your
 	  system will need an external clock source (like an NTP server).
 
 	  If the clock you specify here is not battery backed, it may still
@@ -69,8 +69,7 @@
 	  Say yes here if you want to use your RTCs using sysfs interfaces,
 	  /sys/class/rtc/rtc0 through /sys/.../rtcN.
 
-	  This driver can also be built as a module. If so, the module
-	  will be called rtc-sysfs.
+	  If unsure, say Y.
 
 config RTC_INTF_PROC
 	boolean "/proc/driver/rtc (procfs for rtc0)"
@@ -78,11 +77,10 @@
 	default RTC_CLASS
 	help
 	  Say yes here if you want to use your first RTC through the proc
-	  interface, /proc/driver/rtc.  Other RTCs will not be available
+	  interface, /proc/driver/rtc. Other RTCs will not be available
 	  through that API.
 
-	  This driver can also be built as a module. If so, the module
-	  will be called rtc-proc.
+	  If unsure, say Y.
 
 config RTC_INTF_DEV
 	boolean "/dev/rtcN (character devices)"
@@ -90,12 +88,14 @@
 	help
 	  Say yes here if you want to use your RTCs using the /dev
 	  interfaces, which "udev" sets up as /dev/rtc0 through
-	  /dev/rtcN.  You may want to set up a symbolic link so one
-	  of these can be accessed as /dev/rtc, which is a name
-	  expected by "hwclock" and some other programs.
+	  /dev/rtcN.
 
-	  This driver can also be built as a module. If so, the module
-	  will be called rtc-dev.
+	  You may want to set up a symbolic link so one of these
+	  can be accessed as /dev/rtc, which is a name
+	  expected by "hwclock" and some other programs. Recent
+	  versions of "udev" are known to set up the symlink for you.
+
+	  If unsure, say Y.
 
 config RTC_INTF_DEV_UIE_EMUL
 	bool "RTC UIE emulation on dev interface"
@@ -132,14 +132,14 @@
 	tristate "Dallas/Maxim DS1307/37/38/39/40, ST M41T00"
 	help
 	  If you say yes here you get support for various compatible RTC
-	  chips (often with battery backup) connected with I2C.  This driver
+	  chips (often with battery backup) connected with I2C. This driver
 	  should handle DS1307, DS1337, DS1338, DS1339, DS1340, ST M41T00,
-	  and probably other chips.  In some cases the RTC must already
+	  and probably other chips. In some cases the RTC must already
 	  have been initialized (by manufacturing or a bootloader).
 
 	  The first seven registers on these chips hold an RTC, and other
 	  registers may add features such as NVRAM, a trickle charger for
-	  the RTC/NVRAM backup power, and alarms.  NVRAM is visible in
+	  the RTC/NVRAM backup power, and alarms. NVRAM is visible in
 	  sysfs, but other chip features may not be available.
 
 	  This driver can also be built as a module. If so, the module
@@ -150,10 +150,10 @@
 	depends on RTC_CLASS && I2C
 	help
 	  If you say yes here you get support for Dallas Semiconductor
-	  DS1374 real-time clock chips.  If an interrupt is associated
+	  DS1374 real-time clock chips. If an interrupt is associated
 	  with the device, the alarm functionality is supported.
 
-	  This driver can also be built as a module.  If so, the module
+	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ds1374.
 
 config RTC_DRV_DS1672
@@ -247,7 +247,7 @@
 	help
 	  If you say yes here you get support for the RTC on the
 	  TWL92330 "Menelaus" power management chip, used with OMAP2
-	  platforms.  The support is integrated with the rest of
+	  platforms. The support is integrated with the rest of
 	  the Menelaus driver; it's not separate module.
 
 config RTC_DRV_TWL4030
@@ -308,7 +308,7 @@
 	tristate "Dallas/Maxim DS1305/DS1306"
 	help
 	  Select this driver to get support for the Dallas/Maxim DS1305
-	  and DS1306 real time clock chips.  These support a trickle
+	  and DS1306 real time clock chips. These support a trickle
 	  charger, alarms, and NVRAM in addition to the clock.
 
 	  This driver can also be built as a module. If so, the module
@@ -317,7 +317,8 @@
 config RTC_DRV_DS1390
 	tristate "Dallas/Maxim DS1390/93/94"
 	help
-	  If you say yes here you get support for the DS1390/93/94 chips.
+	  If you say yes here you get support for the
+	  Dallas/Maxim DS1390/93/94 chips.
 
 	  This driver only supports the RTC feature, and not other chip
 	  features such as alarms and trickle charging.
@@ -381,7 +382,7 @@
 	  or LPC bus chips, and so on.
 
 	  Your system will need to define the platform device used by
-	  this driver, otherwise it won't be accessible.  This means
+	  this driver, otherwise it won't be accessible. This means
 	  you can safely enable this driver if you don't know whether
 	  or not your board has this kind of hardware.
 
@@ -598,7 +599,7 @@
 	depends on ARCH_AT91RM9200 || ARCH_AT91SAM9RL
 	help
 	  Driver for the internal RTC (Realtime Clock) module found on
-	  Atmel AT91RM9200's and AT91SAM9RL chips.  On SAM9RL chips
+	  Atmel AT91RM9200's and AT91SAM9RL chips. On SAM9RL chips
 	  this is powered by the backup power supply.
 
 config RTC_DRV_AT91SAM9
@@ -620,8 +621,8 @@
 	prompt "RTT module Number" if ARCH_AT91SAM9263
 	depends on RTC_DRV_AT91SAM9
 	help
-	  More than one RTT module is available.  You can choose which
-	  one will be used as an RTC.  The default of zero is normally
+	  More than one RTT module is available. You can choose which
+	  one will be used as an RTC. The default of zero is normally
 	  OK to use, though some systems use that for non-RTC purposes.
 
 config RTC_DRV_AT91SAM9_GPBR
@@ -633,10 +634,20 @@
 	depends on RTC_DRV_AT91SAM9
 	help
 	  The RTC driver needs to use one of the General Purpose Backup
-	  Registers (GPBRs) as well as the RTT.  You can choose which one
-	  will be used.  The default of zero is normally OK to use, but
+	  Registers (GPBRs) as well as the RTT. You can choose which one
+	  will be used. The default of zero is normally OK to use, but
 	  on some systems other software needs to use that register.
 
+config RTC_DRV_AU1XXX
+	tristate "Au1xxx Counter0 RTC support"
+	depends on SOC_AU1X00
+	help
+	  This is a driver for the Au1xxx on-chip Counter0 (Time-Of-Year
+	  counter) to be used as a RTC.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-au1xxx.
+
 config RTC_DRV_BFIN
 	tristate "Blackfin On-Chip RTC"
 	depends on BLACKFIN && !BF561
@@ -669,6 +680,17 @@
 	 the RTC. This exposes that functionality through the generic RTC
 	 class.
 
+config RTC_DRV_PXA
+       tristate "PXA27x/PXA3xx"
+       depends on ARCH_PXA
+       help
+         If you say Y here you will get access to the real time clock
+         built into your PXA27x or PXA3xx CPU.
+
+         This RTC driver uses PXA RTC registers available since pxa27x
+         series (RDxR, RYxR) instead of legacy RCNR, RTAR.
+
+
 config RTC_DRV_SUN4V
 	bool "SUN4V Hypervisor RTC"
 	depends on SPARC64
@@ -683,4 +705,22 @@
 	  If you say Y here you will get support for the RTC found on
 	  Starfire systems.
 
+config RTC_DRV_TX4939
+	tristate "TX4939 SoC"
+	depends on SOC_TX4939
+	help
+	  Driver for the internal RTC (Realtime Clock) module found on
+	  Toshiba TX4939 SoC.
+
+config RTC_DRV_MV
+	tristate "Marvell SoC RTC"
+	depends on ARCH_KIRKWOOD
+	help
+	  If you say yes here you will get support for the in-chip RTC
+	  that can be found in some of Marvell's SoC devices, such as
+	  the Kirkwood 88F6281 and 88F6192.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-mv.
+
 endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 6e79c91..9a4340d 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -20,6 +20,7 @@
 obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o
 obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
 obj-$(CONFIG_RTC_DRV_AT91SAM9)	+= rtc-at91sam9.o
+obj-$(CONFIG_RTC_DRV_AU1XXX)	+= rtc-au1xxx.o
 obj-$(CONFIG_RTC_DRV_BFIN)	+= rtc-bfin.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_DS1216)	+= rtc-ds1216.o
@@ -47,6 +48,7 @@
 obj-$(CONFIG_RTC_DRV_STARFIRE)	+= rtc-starfire.o
 obj-$(CONFIG_RTC_DRV_MAX6900)	+= rtc-max6900.o
 obj-$(CONFIG_RTC_DRV_MAX6902)	+= rtc-max6902.o
+obj-$(CONFIG_RTC_DRV_MV)	+= rtc-mv.o
 obj-$(CONFIG_RTC_DRV_OMAP)	+= rtc-omap.o
 obj-$(CONFIG_RTC_DRV_PCF8563)	+= rtc-pcf8563.o
 obj-$(CONFIG_RTC_DRV_PCF8583)	+= rtc-pcf8583.o
@@ -54,6 +56,7 @@
 obj-$(CONFIG_RTC_DRV_PL031)	+= rtc-pl031.o
 obj-$(CONFIG_RTC_DRV_PARISC)	+= rtc-parisc.o
 obj-$(CONFIG_RTC_DRV_PPC)	+= rtc-ppc.o
+obj-$(CONFIG_RTC_DRV_PXA)	+= rtc-pxa.o
 obj-$(CONFIG_RTC_DRV_R9701)	+= rtc-r9701.o
 obj-$(CONFIG_RTC_DRV_RS5C313)	+= rtc-rs5c313.o
 obj-$(CONFIG_RTC_DRV_RS5C348)	+= rtc-rs5c348.o
@@ -66,6 +69,7 @@
 obj-$(CONFIG_RTC_DRV_STK17TA8)	+= rtc-stk17ta8.o
 obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
 obj-$(CONFIG_RTC_DRV_TWL4030)	+= rtc-twl4030.o
+obj-$(CONFIG_RTC_DRV_TX4939)	+= rtc-tx4939.o
 obj-$(CONFIG_RTC_DRV_V3020)	+= rtc-v3020.o
 obj-$(CONFIG_RTC_DRV_VR41XX)	+= rtc-vr41xx.o
 obj-$(CONFIG_RTC_DRV_WM8350)	+= rtc-wm8350.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 4dfdf01..be5a6b7 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -48,9 +48,7 @@
 	struct rtc_time		tm;
 	struct timespec		ts = current_kernel_time();
 
-	if (strncmp(rtc->dev.bus_id,
-				CONFIG_RTC_HCTOSYS_DEVICE,
-				BUS_ID_SIZE) != 0)
+	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
 
 	rtc_read_time(rtc, &tm);
@@ -71,20 +69,18 @@
 	time_t			newtime;
 	struct timespec		time;
 
-	if (strncmp(rtc->dev.bus_id,
-				CONFIG_RTC_HCTOSYS_DEVICE,
-				BUS_ID_SIZE) != 0)
+	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
 
 	rtc_read_time(rtc, &tm);
 	if (rtc_valid_tm(&tm) != 0) {
-		pr_debug("%s:  bogus resume time\n", rtc->dev.bus_id);
+		pr_debug("%s:  bogus resume time\n", dev_name(&rtc->dev));
 		return 0;
 	}
 	rtc_tm_to_time(&tm, &newtime);
 	if (newtime <= oldtime) {
 		if (newtime < oldtime)
-			pr_debug("%s:  time travel!\n", rtc->dev.bus_id);
+			pr_debug("%s:  time travel!\n", dev_name(&rtc->dev));
 		return 0;
 	}
 
@@ -156,7 +152,7 @@
 	init_waitqueue_head(&rtc->irq_queue);
 
 	strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
-	snprintf(rtc->dev.bus_id, BUS_ID_SIZE, "rtc%d", id);
+	dev_set_name(&rtc->dev, "rtc%d", id);
 
 	rtc_dev_prepare(rtc);
 
@@ -169,7 +165,7 @@
 	rtc_proc_add_device(rtc);
 
 	dev_info(dev, "rtc core: registered %s as %s\n",
-			rtc->name, rtc->dev.bus_id);
+			rtc->name, dev_name(&rtc->dev));
 
 	return rtc;
 
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index fd2c652..4348c4b 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -50,10 +50,15 @@
 
 	if (!rtc->ops)
 		err = -ENODEV;
-	else if (!rtc->ops->set_time)
-		err = -EINVAL;
-	else
+	else if (rtc->ops->set_time)
 		err = rtc->ops->set_time(rtc->dev.parent, tm);
+	else if (rtc->ops->set_mmss) {
+		unsigned long secs;
+		err = rtc_tm_to_time(tm, &secs);
+		if (err == 0)
+			err = rtc->ops->set_mmss(rtc->dev.parent, secs);
+	} else
+		err = -EINVAL;
 
 	mutex_unlock(&rtc->ops_lock);
 	return err;
@@ -389,7 +394,7 @@
 {
 	char *name = (char *)data;
 
-	if (strncmp(dev->bus_id, name, BUS_ID_SIZE) == 0)
+	if (strcmp(dev_name(dev), name) == 0)
 		return 1;
 	return 0;
 }
@@ -504,9 +509,6 @@
 	if (rtc->ops->irq_set_freq == NULL)
 		return -ENXIO;
 
-	if (!is_power_of_2(freq))
-		return -EINVAL;
-
 	spin_lock_irqsave(&rtc->irq_task_lock, flags);
 	if (rtc->irq_task != NULL && task == NULL)
 		err = -EBUSY;
diff --git a/drivers/rtc/rtc-at32ap700x.c b/drivers/rtc/rtc-at32ap700x.c
index 90b9a65..e1ec33e 100644
--- a/drivers/rtc/rtc-at32ap700x.c
+++ b/drivers/rtc/rtc-at32ap700x.c
@@ -205,7 +205,7 @@
 {
 	struct resource	*regs;
 	struct rtc_at32ap700x *rtc;
-	int irq = -1;
+	int irq;
 	int ret;
 
 	rtc = kzalloc(sizeof(struct rtc_at32ap700x), GFP_KERNEL);
@@ -222,7 +222,7 @@
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
+	if (irq <= 0) {
 		dev_dbg(&pdev->dev, "could not get irq\n");
 		ret = -ENXIO;
 		goto out;
diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c
new file mode 100644
index 0000000..8906a68
--- /dev/null
+++ b/drivers/rtc/rtc-au1xxx.c
@@ -0,0 +1,153 @@
+/*
+ * Au1xxx counter0 (aka Time-Of-Year counter) RTC interface driver.
+ *
+ * Copyright (C) 2008 Manuel Lauss <mano@roarinelk.homelinux.net>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+/* All current Au1xxx SoCs have 2 counters fed by an external 32.768 kHz
+ * crystal. Counter 0, which keeps counting during sleep/powerdown, is
+ * used to count seconds since the beginning of the unix epoch.
+ *
+ * The counters must be configured and enabled by bootloader/board code;
+ * no checks as to whether they really get a proper 32.768kHz clock are
+ * made as this would take far too long.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/rtc.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <asm/mach-au1x00/au1000.h>
+
+/* 32kHz clock enabled and detected */
+#define CNTR_OK (SYS_CNTRL_E0 | SYS_CNTRL_32S)
+
+static int au1xtoy_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned long t;
+
+	t = au_readl(SYS_TOYREAD);
+
+	rtc_time_to_tm(t, tm);
+
+	return rtc_valid_tm(tm);
+}
+
+static int au1xtoy_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	unsigned long t;
+
+	rtc_tm_to_time(tm, &t);
+
+	au_writel(t, SYS_TOYWRITE);
+	au_sync();
+
+	/* wait for the pending register write to succeed.  This can
+	 * take up to 6 seconds...
+	 */
+	while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S)
+		msleep(1);
+
+	return 0;
+}
+
+static struct rtc_class_ops au1xtoy_rtc_ops = {
+	.read_time	= au1xtoy_rtc_read_time,
+	.set_time	= au1xtoy_rtc_set_time,
+};
+
+static int __devinit au1xtoy_rtc_probe(struct platform_device *pdev)
+{
+	struct rtc_device *rtcdev;
+	unsigned long t;
+	int ret;
+
+	t = au_readl(SYS_COUNTER_CNTRL);
+	if (!(t & CNTR_OK)) {
+		dev_err(&pdev->dev, "counters not working; aborting.\n");
+		ret = -ENODEV;
+		goto out_err;
+	}
+
+	ret = -ETIMEDOUT;
+
+	/* set counter0 tickrate to 1Hz if necessary */
+	if (au_readl(SYS_TOYTRIM) != 32767) {
+		/* wait until hardware gives access to TRIM register */
+		t = 0x00100000;
+		while ((au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_T0S) && t--)
+			msleep(1);
+
+		if (!t) {
+			/* timed out waiting for register access; assume
+			 * counters are unusable.
+			 */
+			dev_err(&pdev->dev, "timeout waiting for access\n");
+			goto out_err;
+		}
+
+		/* set 1Hz TOY tick rate */
+		au_writel(32767, SYS_TOYTRIM);
+		au_sync();
+	}
+
+	/* wait until the hardware allows writes to the counter reg */
+	while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S)
+		msleep(1);
+
+	rtcdev = rtc_device_register("rtc-au1xxx", &pdev->dev,
+				     &au1xtoy_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtcdev)) {
+		ret = PTR_ERR(rtcdev);
+		goto out_err;
+	}
+
+	platform_set_drvdata(pdev, rtcdev);
+
+	return 0;
+
+out_err:
+	return ret;
+}
+
+static int __devexit au1xtoy_rtc_remove(struct platform_device *pdev)
+{
+	struct rtc_device *rtcdev = platform_get_drvdata(pdev);
+
+	rtc_device_unregister(rtcdev);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver au1xrtc_driver = {
+	.driver		= {
+		.name	= "rtc-au1xxx",
+		.owner	= THIS_MODULE,
+	},
+	.remove		= __devexit_p(au1xtoy_rtc_remove),
+};
+
+static int __init au1xtoy_rtc_init(void)
+{
+	return platform_driver_probe(&au1xrtc_driver, au1xtoy_rtc_probe);
+}
+
+static void __exit au1xtoy_rtc_exit(void)
+{
+	platform_driver_unregister(&au1xrtc_driver);
+}
+
+module_init(au1xtoy_rtc_init);
+module_exit(au1xtoy_rtc_exit);
+
+MODULE_DESCRIPTION("Au1xxx TOY-counter-based RTC driver");
+MODULE_AUTHOR("Manuel Lauss <manuel.lauss@gmail.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:rtc-au1xxx");
diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index 34439ce..aafd3e6 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -390,7 +390,7 @@
 
 	/* Register our RTC with the RTC framework */
 	rtc->rtc_dev = rtc_device_register(pdev->name, dev, &bfin_rtc_ops, THIS_MODULE);
-	if (unlikely(IS_ERR(rtc))) {
+	if (unlikely(IS_ERR(rtc->rtc_dev))) {
 		ret = PTR_ERR(rtc->rtc_dev);
 		goto err_irq;
 	}
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 6cf8e28..b6d35f5 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -35,6 +35,7 @@
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
 #include <linux/mod_devicetable.h>
+#include <linux/log2.h>
 
 /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
 #include <asm-generic/rtc.h>
@@ -58,7 +59,7 @@
 };
 
 /* both platform and pnp busses use negative numbers for invalid irqs */
-#define is_valid_irq(n)		((n) >= 0)
+#define is_valid_irq(n)		((n) > 0)
 
 static const char driver_name[] = "rtc_cmos";
 
@@ -384,6 +385,8 @@
 	if (!is_valid_irq(cmos->irq))
 		return -ENXIO;
 
+	if (!is_power_of_2(freq))
+		return -EINVAL;
 	/* 0 = no irqs; 1 = 2^15 Hz ... 15 = 2^0 Hz */
 	f = ffs(freq);
 	if (f-- > 16)
@@ -729,7 +732,7 @@
 
 	cmos_rtc.dev = dev;
 	dev_set_drvdata(dev, &cmos_rtc);
-	rename_region(ports, cmos_rtc.rtc->dev.bus_id);
+	rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
 
 	spin_lock_irq(&rtc_lock);
 
@@ -777,7 +780,7 @@
 			rtc_cmos_int_handler = cmos_interrupt;
 
 		retval = request_irq(rtc_irq, rtc_cmos_int_handler,
-				IRQF_DISABLED, cmos_rtc.rtc->dev.bus_id,
+				IRQF_DISABLED, dev_name(&cmos_rtc.rtc->dev),
 				cmos_rtc.rtc);
 		if (retval < 0) {
 			dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq);
@@ -795,7 +798,7 @@
 	}
 
 	pr_info("%s: alarms up to one %s%s, %zd bytes nvram%s\n",
-			cmos_rtc.rtc->dev.bus_id,
+			dev_name(&cmos_rtc.rtc->dev),
 			is_valid_irq(rtc_irq)
 				?  (cmos_rtc.mon_alrm
 					? "year"
@@ -885,7 +888,7 @@
 	}
 
 	pr_debug("%s: suspend%s, ctrl %02x\n",
-			cmos_rtc.rtc->dev.bus_id,
+			dev_name(&cmos_rtc.rtc->dev),
 			(tmp & RTC_AIE) ? ", alarm may wake" : "",
 			tmp);
 
@@ -941,7 +944,7 @@
 	}
 
 	pr_debug("%s: resume, ctrl %02x\n",
-			cmos_rtc.rtc->dev.bus_id,
+			dev_name(&cmos_rtc.rtc->dev),
 			tmp);
 
 	return 0;
diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c
index 9a234a4..4aedc70 100644
--- a/drivers/rtc/rtc-ds1216.c
+++ b/drivers/rtc/rtc-ds1216.c
@@ -10,7 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/bcd.h>
 
-#define DRV_VERSION "0.1"
+#define DRV_VERSION "0.2"
 
 struct ds1216_regs {
 	u8 tsec;
@@ -101,7 +101,8 @@
 	tm->tm_year = bcd2bin(regs.year);
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;
-	return 0;
+
+	return rtc_valid_tm(tm);
 }
 
 static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -138,9 +139,8 @@
 	.set_time	= ds1216_rtc_set_time,
 };
 
-static int __devinit ds1216_rtc_probe(struct platform_device *pdev)
+static int __init ds1216_rtc_probe(struct platform_device *pdev)
 {
-	struct rtc_device *rtc;
 	struct resource *res;
 	struct ds1216_priv *priv;
 	int ret = 0;
@@ -152,7 +152,10 @@
 	priv = kzalloc(sizeof *priv, GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
-	priv->size = res->end - res->start + 1;
+
+	platform_set_drvdata(pdev, priv);
+
+	priv->size = resource_size(res);
 	if (!request_mem_region(res->start, priv->size, pdev->name)) {
 		ret = -EBUSY;
 		goto out;
@@ -163,22 +166,18 @@
 		ret = -ENOMEM;
 		goto out;
 	}
-	rtc = rtc_device_register("ds1216", &pdev->dev,
+	priv->rtc = rtc_device_register("ds1216", &pdev->dev,
 				  &ds1216_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		ret = PTR_ERR(rtc);
+	if (IS_ERR(priv->rtc)) {
+		ret = PTR_ERR(priv->rtc);
 		goto out;
 	}
-	priv->rtc = rtc;
-	platform_set_drvdata(pdev, priv);
 
 	/* dummy read to get clock into a known state */
 	ds1216_read(priv->ioaddr, dummy);
 	return 0;
 
 out:
-	if (priv->rtc)
-		rtc_device_unregister(priv->rtc);
 	if (priv->ioaddr)
 		iounmap(priv->ioaddr);
 	if (priv->baseaddr)
@@ -187,7 +186,7 @@
 	return ret;
 }
 
-static int __devexit ds1216_rtc_remove(struct platform_device *pdev)
+static int __exit ds1216_rtc_remove(struct platform_device *pdev)
 {
 	struct ds1216_priv *priv = platform_get_drvdata(pdev);
 
@@ -203,13 +202,12 @@
 		.name	= "rtc-ds1216",
 		.owner	= THIS_MODULE,
 	},
-	.probe		= ds1216_rtc_probe,
-	.remove		= __devexit_p(ds1216_rtc_remove),
+	.remove		= __exit_p(ds1216_rtc_remove),
 };
 
 static int __init ds1216_rtc_init(void)
 {
-	return platform_driver_register(&ds1216_rtc_platform_driver);
+	return platform_driver_probe(&ds1216_rtc_platform_driver, ds1216_rtc_probe);
 }
 
 static void __exit ds1216_rtc_exit(void)
diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c
index 599e976..e54b5c6 100644
--- a/drivers/rtc/rtc-ds1390.c
+++ b/drivers/rtc/rtc-ds1390.c
@@ -1,5 +1,5 @@
 /*
- * rtc-ds1390.c -- driver for DS1390/93/94
+ * rtc-ds1390.c -- driver for the Dallas/Maxim DS1390/93/94 SPI RTC
  *
  * Copyright (C) 2008 Mercury IMC Ltd
  * Written by Mark Jackson <mpfj@mimc.co.uk>
@@ -8,11 +8,13 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * NOTE : Currently this driver only supports the bare minimum for read
- * and write the RTC.  The extra features provided by the chip family
+ * NOTE: Currently this driver only supports the bare minimum for read
+ * and write the RTC. The extra features provided by the chip family
  * (alarms, trickle charger, different control registers) are unavailable.
  */
 
+#include <linux/init.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/spi/spi.h>
@@ -42,20 +44,6 @@
 	u8 txrx_buf[9];	/* cmd + 8 registers */
 };
 
-static void ds1390_set_reg(struct device *dev, unsigned char address,
-				unsigned char data)
-{
-	struct spi_device *spi = to_spi_device(dev);
-	struct ds1390 *chip = dev_get_drvdata(dev);
-
-	/* Set MSB to indicate write */
-	chip->txrx_buf[0] = address | 0x80;
-	chip->txrx_buf[1] = data;
-
-	/* do the i/o */
-	spi_write_then_read(spi, chip->txrx_buf, 2, NULL, 0);
-}
-
 static int ds1390_get_reg(struct device *dev, unsigned char address,
 				unsigned char *data)
 {
@@ -78,7 +66,7 @@
 	return 0;
 }
 
-static int ds1390_get_datetime(struct device *dev, struct rtc_time *dt)
+static int ds1390_read_time(struct device *dev, struct rtc_time *dt)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct ds1390 *chip = dev_get_drvdata(dev);
@@ -107,7 +95,7 @@
 	return rtc_valid_tm(dt);
 }
 
-static int ds1390_set_datetime(struct device *dev, struct rtc_time *dt)
+static int ds1390_set_time(struct device *dev, struct rtc_time *dt)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct ds1390 *chip = dev_get_drvdata(dev);
@@ -127,16 +115,6 @@
 	return spi_write_then_read(spi, chip->txrx_buf, 8, NULL, 0);
 }
 
-static int ds1390_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return ds1390_get_datetime(dev, tm);
-}
-
-static int ds1390_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return ds1390_set_datetime(dev, tm);
-}
-
 static const struct rtc_class_ops ds1390_rtc_ops = {
 	.read_time	= ds1390_read_time,
 	.set_time	= ds1390_set_time,
@@ -149,46 +127,40 @@
 	struct ds1390 *chip;
 	int res;
 
-	printk(KERN_DEBUG "DS1390 SPI RTC driver\n");
-
-	rtc = rtc_device_register("ds1390",
-				&spi->dev, &ds1390_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		printk(KERN_ALERT "RTC : unable to register device\n");
-		return PTR_ERR(rtc);
-	}
-
 	spi->mode = SPI_MODE_3;
 	spi->bits_per_word = 8;
 	spi_setup(spi);
 
 	chip = kzalloc(sizeof *chip, GFP_KERNEL);
 	if (!chip) {
-		printk(KERN_ALERT "RTC : unable to allocate device memory\n");
-		rtc_device_unregister(rtc);
+		dev_err(&spi->dev, "unable to allocate device memory\n");
 		return -ENOMEM;
 	}
-	chip->rtc = rtc;
 	dev_set_drvdata(&spi->dev, chip);
 
 	res = ds1390_get_reg(&spi->dev, DS1390_REG_SECONDS, &tmp);
-	if (res) {
-		printk(KERN_ALERT "RTC : unable to read device\n");
-		rtc_device_unregister(rtc);
+	if (res != 0) {
+		dev_err(&spi->dev, "unable to read device\n");
+		kfree(chip);
 		return res;
 	}
 
-	return 0;
+	chip->rtc = rtc_device_register("ds1390",
+				&spi->dev, &ds1390_rtc_ops, THIS_MODULE);
+	if (IS_ERR(chip->rtc)) {
+		dev_err(&spi->dev, "unable to register device\n");
+		res = PTR_ERR(chip->rtc);
+		kfree(chip);
+	}
+
+	return res;
 }
 
 static int __devexit ds1390_remove(struct spi_device *spi)
 {
 	struct ds1390 *chip = platform_get_drvdata(spi);
-	struct rtc_device *rtc = chip->rtc;
 
-	if (rtc)
-		rtc_device_unregister(rtc);
-
+	rtc_device_unregister(chip->rtc);
 	kfree(chip);
 
 	return 0;
@@ -215,6 +187,6 @@
 }
 module_exit(ds1390_exit);
 
-MODULE_DESCRIPTION("DS1390/93/94 SPI RTC driver");
+MODULE_DESCRIPTION("Dallas/Maxim DS1390/93/94 SPI RTC driver");
 MODULE_AUTHOR("Mark Jackson <mpfj@mimc.co.uk>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 25caada..23a07fe 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -326,9 +326,9 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	if (pdata->irq < 0) {
+	if (pdata->irq <= 0)
 		return -EINVAL;
-	}
+
 	pdata->alrm_mday = alrm->time.tm_mday;
 	pdata->alrm_hour = alrm->time.tm_hour;
 	pdata->alrm_min = alrm->time.tm_min;
@@ -346,9 +346,9 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	if (pdata->irq < 0) {
+	if (pdata->irq <= 0)
 		return -EINVAL;
-	}
+
 	alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday;
 	alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour;
 	alrm->time.tm_min = pdata->alrm_min < 0 ? 0 : pdata->alrm_min;
@@ -385,7 +385,7 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	if (pdata->irq < 0) {
+	if (pdata->irq <= 0) {
 		return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */
 	}
 	switch (cmd) {
@@ -503,7 +503,6 @@
 	if (!pdata) {
 		return -ENOMEM;
 	}
-	pdata->irq = -1;
 	pdata->size = res->end - res->start + 1;
 	if (!request_mem_region(res->start, pdata->size, pdev->name)) {
 		ret = -EBUSY;
@@ -545,13 +544,13 @@
 	 * if the platform has an interrupt in mind for this device,
 	 * then by all means, set it
 	 */
-	if (pdata->irq >= 0) {
+	if (pdata->irq > 0) {
 		rtc_read(RTC_CMD1);
 		if (request_irq(pdata->irq, ds1511_interrupt,
 			IRQF_DISABLED | IRQF_SHARED, pdev->name, pdev) < 0) {
 
 			dev_warn(&pdev->dev, "interrupt not available.\n");
-			pdata->irq = -1;
+			pdata->irq = 0;
 		}
 	}
 
@@ -572,7 +571,7 @@
 	if (pdata->rtc) {
 		rtc_device_unregister(pdata->rtc);
 	}
-	if (pdata->irq >= 0) {
+	if (pdata->irq > 0) {
 		free_irq(pdata->irq, pdev);
 	}
 	if (ds1511_base) {
@@ -595,7 +594,7 @@
 	sysfs_remove_bin_file(&pdev->dev.kobj, &ds1511_nvram_attr);
 	rtc_device_unregister(pdata->rtc);
 	pdata->rtc = NULL;
-	if (pdata->irq >= 0) {
+	if (pdata->irq > 0) {
 		/*
 		 * disable the alarm interrupt
 		 */
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index b9475cd..38d472b 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -162,7 +162,7 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	if (pdata->irq < 0)
+	if (pdata->irq <= 0)
 		return -EINVAL;
 	pdata->alrm_mday = alrm->time.tm_mday;
 	pdata->alrm_hour = alrm->time.tm_hour;
@@ -179,7 +179,7 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	if (pdata->irq < 0)
+	if (pdata->irq <= 0)
 		return -EINVAL;
 	alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday;
 	alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour;
@@ -213,7 +213,7 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	if (pdata->irq < 0)
+	if (pdata->irq <= 0)
 		return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */
 	switch (cmd) {
 	case RTC_AIE_OFF:
@@ -301,7 +301,6 @@
 	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return -ENOMEM;
-	pdata->irq = -1;
 	if (!request_mem_region(res->start, RTC_REG_SIZE, pdev->name)) {
 		ret = -EBUSY;
 		goto out;
@@ -327,13 +326,13 @@
 	if (readb(ioaddr + RTC_FLAGS) & RTC_FLAGS_BLF)
 		dev_warn(&pdev->dev, "voltage-low detected.\n");
 
-	if (pdata->irq >= 0) {
+	if (pdata->irq > 0) {
 		writeb(0, ioaddr + RTC_INTERRUPTS);
 		if (request_irq(pdata->irq, ds1553_rtc_interrupt,
 				IRQF_DISABLED | IRQF_SHARED,
 				pdev->name, pdev) < 0) {
 			dev_warn(&pdev->dev, "interrupt not available.\n");
-			pdata->irq = -1;
+			pdata->irq = 0;
 		}
 	}
 
@@ -353,7 +352,7 @@
  out:
 	if (pdata->rtc)
 		rtc_device_unregister(pdata->rtc);
-	if (pdata->irq >= 0)
+	if (pdata->irq > 0)
 		free_irq(pdata->irq, pdev);
 	if (ioaddr)
 		iounmap(ioaddr);
@@ -369,7 +368,7 @@
 
 	sysfs_remove_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
 	rtc_device_unregister(pdata->rtc);
-	if (pdata->irq >= 0) {
+	if (pdata->irq > 0) {
 		writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
 		free_irq(pdata->irq, pdev);
 	}
diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c
index 4e91419..06dfb54 100644
--- a/drivers/rtc/rtc-ds1672.c
+++ b/drivers/rtc/rtc-ds1672.c
@@ -83,32 +83,11 @@
 	return 0;
 }
 
-static int ds1672_set_datetime(struct i2c_client *client, struct rtc_time *tm)
-{
-	unsigned long secs;
-
-	dev_dbg(&client->dev,
-		"%s: secs=%d, mins=%d, hours=%d, "
-		"mday=%d, mon=%d, year=%d, wday=%d\n",
-		__func__,
-		tm->tm_sec, tm->tm_min, tm->tm_hour,
-		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
-
-	rtc_tm_to_time(tm, &secs);
-
-	return ds1672_set_mmss(client, secs);
-}
-
 static int ds1672_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	return ds1672_get_datetime(to_i2c_client(dev), tm);
 }
 
-static int ds1672_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return ds1672_set_datetime(to_i2c_client(dev), tm);
-}
-
 static int ds1672_rtc_set_mmss(struct device *dev, unsigned long secs)
 {
 	return ds1672_set_mmss(to_i2c_client(dev), secs);
@@ -152,7 +131,6 @@
 
 static const struct rtc_class_ops ds1672_rtc_ops = {
 	.read_time = ds1672_rtc_read_time,
-	.set_time = ds1672_rtc_set_time,
 	.set_mmss = ds1672_rtc_set_mmss,
 };
 
diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c
index 45e5b10..c51589e 100644
--- a/drivers/rtc/rtc-ds3234.c
+++ b/drivers/rtc/rtc-ds3234.c
@@ -1,4 +1,4 @@
-/* drivers/rtc/rtc-ds3234.c
+/* rtc-ds3234.c
  *
  * Driver for Dallas Semiconductor (DS3234) SPI RTC with Integrated Crystal
  * and SRAM.
@@ -9,13 +9,10 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * Changelog:
- *
- * 07-May-2008: Dennis Aberilla <denzzzhome@yahoo.com>
- *		- Created based on the max6902 code. Only implements the
- *		  date/time keeping functions; no SRAM yet.
  */
 
+#include <linux/init.h>
+#include <linux/module.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
@@ -34,16 +31,7 @@
 #define DS3234_REG_CONTROL	0x0E
 #define DS3234_REG_CONT_STAT	0x0F
 
-#undef DS3234_DEBUG
-
-struct ds3234 {
-	struct rtc_device *rtc;
-	u8 buf[8]; /* Burst read: addr + 7 regs */
-	u8 tx_buf[2];
-	u8 rx_buf[2];
-};
-
-static void ds3234_set_reg(struct device *dev, unsigned char address,
+static int ds3234_set_reg(struct device *dev, unsigned char address,
 				unsigned char data)
 {
 	struct spi_device *spi = to_spi_device(dev);
@@ -53,107 +41,45 @@
 	buf[0] = address | 0x80;
 	buf[1] = data;
 
-	spi_write(spi, buf, 2);
+	return spi_write_then_read(spi, buf, 2, NULL, 0);
 }
 
 static int ds3234_get_reg(struct device *dev, unsigned char address,
 				unsigned char *data)
 {
 	struct spi_device *spi = to_spi_device(dev);
-	struct ds3234 *chip = dev_get_drvdata(dev);
-	struct spi_message message;
-	struct spi_transfer xfer;
-	int status;
 
-	if (!data)
-		return -EINVAL;
+	*data = address & 0x7f;
 
-	/* Build our spi message */
-	spi_message_init(&message);
-	memset(&xfer, 0, sizeof(xfer));
-
-	/* Address + dummy tx byte */
-	xfer.len = 2;
-	xfer.tx_buf = chip->tx_buf;
-	xfer.rx_buf = chip->rx_buf;
-
-	chip->tx_buf[0] = address;
-	chip->tx_buf[1] = 0xff;
-
-	spi_message_add_tail(&xfer, &message);
-
-	/* do the i/o */
-	status = spi_sync(spi, &message);
-	if (status == 0)
-		status = message.status;
-	else
-		return status;
-
-	*data = chip->rx_buf[1];
-
-	return status;
+	return spi_write_then_read(spi, data, 1, data, 1);
 }
 
-static int ds3234_get_datetime(struct device *dev, struct rtc_time *dt)
+static int ds3234_read_time(struct device *dev, struct rtc_time *dt)
 {
+	int err;
+	unsigned char buf[8];
 	struct spi_device *spi = to_spi_device(dev);
-	struct ds3234 *chip = dev_get_drvdata(dev);
-	struct spi_message message;
-	struct spi_transfer xfer;
-	int status;
 
-	/* build the message */
-	spi_message_init(&message);
-	memset(&xfer, 0, sizeof(xfer));
-	xfer.len = 1 + 7;	/* Addr + 7 registers */
-	xfer.tx_buf = chip->buf;
-	xfer.rx_buf = chip->buf;
-	chip->buf[0] = 0x00;	/* Start address */
-	spi_message_add_tail(&xfer, &message);
+	buf[0] = 0x00; /* Start address */
 
-	/* do the i/o */
-	status = spi_sync(spi, &message);
-	if (status == 0)
-		status = message.status;
-	else
-		return status;
+	err = spi_write_then_read(spi, buf, 1, buf, 8);
+	if (err != 0)
+		return err;
 
 	/* Seconds, Minutes, Hours, Day, Date, Month, Year */
-	dt->tm_sec	= bcd2bin(chip->buf[1]);
-	dt->tm_min	= bcd2bin(chip->buf[2]);
-	dt->tm_hour	= bcd2bin(chip->buf[3] & 0x3f);
-	dt->tm_wday	= bcd2bin(chip->buf[4]) - 1; /* 0 = Sun */
-	dt->tm_mday	= bcd2bin(chip->buf[5]);
-	dt->tm_mon	= bcd2bin(chip->buf[6] & 0x1f) - 1; /* 0 = Jan */
-	dt->tm_year 	= bcd2bin(chip->buf[7] & 0xff) + 100; /* Assume 20YY */
+	dt->tm_sec	= bcd2bin(buf[0]);
+	dt->tm_min	= bcd2bin(buf[1]);
+	dt->tm_hour	= bcd2bin(buf[2] & 0x3f);
+	dt->tm_wday	= bcd2bin(buf[3]) - 1; /* 0 = Sun */
+	dt->tm_mday	= bcd2bin(buf[4]);
+	dt->tm_mon	= bcd2bin(buf[5] & 0x1f) - 1; /* 0 = Jan */
+	dt->tm_year 	= bcd2bin(buf[6] & 0xff) + 100; /* Assume 20YY */
 
-#ifdef DS3234_DEBUG
-	dev_dbg(dev, "\n%s : Read RTC values\n", __func__);
-	dev_dbg(dev, "tm_hour: %i\n", dt->tm_hour);
-	dev_dbg(dev, "tm_min : %i\n", dt->tm_min);
-	dev_dbg(dev, "tm_sec : %i\n", dt->tm_sec);
-	dev_dbg(dev, "tm_wday: %i\n", dt->tm_wday);
-	dev_dbg(dev, "tm_mday: %i\n", dt->tm_mday);
-	dev_dbg(dev, "tm_mon : %i\n", dt->tm_mon);
-	dev_dbg(dev, "tm_year: %i\n", dt->tm_year);
-#endif
-
-	return 0;
+	return rtc_valid_tm(dt);
 }
 
-static int ds3234_set_datetime(struct device *dev, struct rtc_time *dt)
+static int ds3234_set_time(struct device *dev, struct rtc_time *dt)
 {
-#ifdef DS3234_DEBUG
-	dev_dbg(dev, "\n%s : Setting RTC values\n", __func__);
-	dev_dbg(dev, "tm_sec : %i\n", dt->tm_sec);
-	dev_dbg(dev, "tm_min : %i\n", dt->tm_min);
-	dev_dbg(dev, "tm_hour: %i\n", dt->tm_hour);
-	dev_dbg(dev, "tm_wday: %i\n", dt->tm_wday);
-	dev_dbg(dev, "tm_mday: %i\n", dt->tm_mday);
-	dev_dbg(dev, "tm_mon : %i\n", dt->tm_mon);
-	dev_dbg(dev, "tm_year: %i\n", dt->tm_year);
-#endif
-
 	ds3234_set_reg(dev, DS3234_REG_SECONDS, bin2bcd(dt->tm_sec));
 	ds3234_set_reg(dev, DS3234_REG_MINUTES, bin2bcd(dt->tm_min));
 	ds3234_set_reg(dev, DS3234_REG_HOURS, bin2bcd(dt->tm_hour) & 0x3f);
@@ -174,16 +100,6 @@
 	return 0;
 }
 
-static int ds3234_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return ds3234_get_datetime(dev, tm);
-}
-
-static int ds3234_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return ds3234_set_datetime(dev, tm);
-}
-
 static const struct rtc_class_ops ds3234_rtc_ops = {
 	.read_time	= ds3234_read_time,
 	.set_time	= ds3234_set_time,
@@ -193,31 +109,15 @@
 {
 	struct rtc_device *rtc;
 	unsigned char tmp;
-	struct ds3234 *chip;
 	int res;
 
-	rtc = rtc_device_register("ds3234",
-				&spi->dev, &ds3234_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc))
-		return PTR_ERR(rtc);
-
 	spi->mode = SPI_MODE_3;
 	spi->bits_per_word = 8;
 	spi_setup(spi);
 
-	chip = kzalloc(sizeof(struct ds3234), GFP_KERNEL);
-	if (!chip) {
-		rtc_device_unregister(rtc);
-		return -ENOMEM;
-	}
-	chip->rtc = rtc;
-	dev_set_drvdata(&spi->dev, chip);
-
 	res = ds3234_get_reg(&spi->dev, DS3234_REG_SECONDS, &tmp);
-	if (res) {
-		rtc_device_unregister(rtc);
+	if (res != 0)
 		return res;
-	}
 
 	/* Control settings
 	 *
@@ -246,26 +146,27 @@
 	ds3234_get_reg(&spi->dev, DS3234_REG_CONT_STAT, &tmp);
 	dev_info(&spi->dev, "Ctrl/Stat Reg: 0x%02x\n", tmp);
 
+	rtc = rtc_device_register("ds3234",
+				&spi->dev, &ds3234_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+
+	dev_set_drvdata(&spi->dev, rtc);
+
 	return 0;
 }
 
 static int __devexit ds3234_remove(struct spi_device *spi)
 {
-	struct ds3234 *chip = platform_get_drvdata(spi);
-	struct rtc_device *rtc = chip->rtc;
+	struct rtc_device *rtc = platform_get_drvdata(spi);
 
-	if (rtc)
-		rtc_device_unregister(rtc);
-
-	kfree(chip);
-
+	rtc_device_unregister(rtc);
 	return 0;
 }
 
 static struct spi_driver ds3234_driver = {
 	.driver = {
 		.name	 = "ds3234",
-		.bus	= &spi_bus_type,
 		.owner	= THIS_MODULE,
 	},
 	.probe	 = ds3234_probe,
@@ -274,7 +175,6 @@
 
 static __init int ds3234_init(void)
 {
-	printk(KERN_INFO "DS3234 SPI RTC Driver\n");
 	return spi_register_driver(&ds3234_driver);
 }
 module_init(ds3234_init);
diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c
index 36e4ac0..f7a3283 100644
--- a/drivers/rtc/rtc-ep93xx.c
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -49,18 +49,6 @@
 	return 0;
 }
 
-static int ep93xx_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	int err;
-	unsigned long secs;
-
-	err = rtc_tm_to_time(tm, &secs);
-	if (err != 0)
-		return err;
-
-	return ep93xx_rtc_set_mmss(dev, secs);
-}
-
 static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq)
 {
 	unsigned short preload, delete;
@@ -75,7 +63,6 @@
 
 static const struct rtc_class_ops ep93xx_rtc_ops = {
 	.read_time	= ep93xx_rtc_read_time,
-	.set_time	= ep93xx_rtc_set_time,
 	.set_mmss	= ep93xx_rtc_set_mmss,
 	.proc		= ep93xx_rtc_proc,
 };
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index 43afb7a..33921a6 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -450,7 +450,7 @@
 	 * the mode without IRQ.
 	 */
 	m48t59->irq = platform_get_irq(pdev, 0);
-	if (m48t59->irq < 0)
+	if (m48t59->irq <= 0)
 		m48t59->irq = NO_IRQ;
 
 	if (m48t59->irq != NO_IRQ) {
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 2f6507d..36a8ea9 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -9,14 +9,6 @@
  *
  * Driver for MAX6902 spi RTC
  *
- * Changelog:
- *
- * 24-May-2006: Raphael Assenat <raph@8d.com>
- *                - Major rework
- *				Converted to rtc_device and uses the SPI layer.
- *
- * ??-???-2005: Someone at Compulab
- *                - Initial driver creation.
  */
 
 #include <linux/module.h>
@@ -26,7 +18,6 @@
 #include <linux/rtc.h>
 #include <linux/spi/spi.h>
 #include <linux/bcd.h>
-#include <linux/delay.h>
 
 #define MAX6902_REG_SECONDS		0x01
 #define MAX6902_REG_MINUTES		0x03
@@ -38,16 +29,7 @@
 #define MAX6902_REG_CONTROL		0x0F
 #define MAX6902_REG_CENTURY		0x13
 
-#undef MAX6902_DEBUG
-
-struct max6902 {
-	struct rtc_device *rtc;
-	u8 buf[9]; /* Burst read cmd + 8 registers */
-	u8 tx_buf[2];
-	u8 rx_buf[2];
-};
-
-static void max6902_set_reg(struct device *dev, unsigned char address,
+static int max6902_set_reg(struct device *dev, unsigned char address,
 				unsigned char data)
 {
 	struct spi_device *spi = to_spi_device(dev);
@@ -57,113 +39,58 @@
 	buf[0] = address & 0x7f;
 	buf[1] = data;
 
-	spi_write(spi, buf, 2);
+	return spi_write_then_read(spi, buf, 2, NULL, 0);
 }
 
 static int max6902_get_reg(struct device *dev, unsigned char address,
 				unsigned char *data)
 {
 	struct spi_device *spi = to_spi_device(dev);
-	struct max6902 *chip = dev_get_drvdata(dev);
-	struct spi_message message;
-	struct spi_transfer xfer;
-	int status;
-
-	if (!data)
-		return -EINVAL;
-
-	/* Build our spi message */
-	spi_message_init(&message);
-	memset(&xfer, 0, sizeof(xfer));
-	xfer.len = 2;
-	/* Can tx_buf and rx_buf be equal? The doc in spi.h is not sure... */
-	xfer.tx_buf = chip->tx_buf;
-	xfer.rx_buf = chip->rx_buf;
 
 	/* Set MSB to indicate read */
-	chip->tx_buf[0] = address | 0x80;
+	*data = address | 0x80;
 
-	spi_message_add_tail(&xfer, &message);
-
-	/* do the i/o */
-	status = spi_sync(spi, &message);
-
-	if (status == 0)
-		*data = chip->rx_buf[1];
-	return status;
+	return spi_write_then_read(spi, data, 1, data, 1);
 }
 
-static int max6902_get_datetime(struct device *dev, struct rtc_time *dt)
+static int max6902_read_time(struct device *dev, struct rtc_time *dt)
 {
-	unsigned char tmp;
-	int century;
-	int err;
+	int err, century;
 	struct spi_device *spi = to_spi_device(dev);
-	struct max6902 *chip = dev_get_drvdata(dev);
-	struct spi_message message;
-	struct spi_transfer xfer;
-	int status;
+	unsigned char buf[8];
 
-	err = max6902_get_reg(dev, MAX6902_REG_CENTURY, &tmp);
-	if (err)
+	buf[0] = 0xbf;	/* Burst read */
+
+	err = spi_write_then_read(spi, buf, 1, buf, 8);
+	if (err != 0)
 		return err;
 
-	/* build the message */
-	spi_message_init(&message);
-	memset(&xfer, 0, sizeof(xfer));
-	xfer.len = 1 + 7;	/* Burst read command + 7 registers */
-	xfer.tx_buf = chip->buf;
-	xfer.rx_buf = chip->buf;
-	chip->buf[0] = 0xbf;	/* Burst read */
-	spi_message_add_tail(&xfer, &message);
-
-	/* do the i/o */
-	status = spi_sync(spi, &message);
-	if (status)
-		return status;
-
 	/* The chip sends data in this order:
 	 * Seconds, Minutes, Hours, Date, Month, Day, Year */
-	dt->tm_sec	= bcd2bin(chip->buf[1]);
-	dt->tm_min	= bcd2bin(chip->buf[2]);
-	dt->tm_hour	= bcd2bin(chip->buf[3]);
-	dt->tm_mday	= bcd2bin(chip->buf[4]);
-	dt->tm_mon	= bcd2bin(chip->buf[5]) - 1;
-	dt->tm_wday	= bcd2bin(chip->buf[6]);
-	dt->tm_year = bcd2bin(chip->buf[7]);
+	dt->tm_sec	= bcd2bin(buf[0]);
+	dt->tm_min	= bcd2bin(buf[1]);
+	dt->tm_hour	= bcd2bin(buf[2]);
+	dt->tm_mday	= bcd2bin(buf[3]);
+	dt->tm_mon	= bcd2bin(buf[4]) - 1;
+	dt->tm_wday	= bcd2bin(buf[5]);
+	dt->tm_year	= bcd2bin(buf[6]);
 
-	century = bcd2bin(tmp) * 100;
+	/* Read century */
+	err = max6902_get_reg(dev, MAX6902_REG_CENTURY, &buf[0]);
+	if (err != 0)
+		return err;
+
+	century = bcd2bin(buf[0]) * 100;
 
 	dt->tm_year += century;
 	dt->tm_year -= 1900;
 
-#ifdef MAX6902_DEBUG
-	printk("\n%s : Read RTC values\n",__func__);
-	printk("tm_hour: %i\n",dt->tm_hour);
-	printk("tm_min : %i\n",dt->tm_min);
-	printk("tm_sec : %i\n",dt->tm_sec);
-	printk("tm_year: %i\n",dt->tm_year);
-	printk("tm_mon : %i\n",dt->tm_mon);
-	printk("tm_mday: %i\n",dt->tm_mday);
-	printk("tm_wday: %i\n",dt->tm_wday);
-#endif
-
-	return 0;
+	return rtc_valid_tm(dt);
 }
 
-static int max6902_set_datetime(struct device *dev, struct rtc_time *dt)
+static int max6902_set_time(struct device *dev, struct rtc_time *dt)
 {
-	dt->tm_year = dt->tm_year+1900;
-
-#ifdef MAX6902_DEBUG
-	printk("\n%s : Setting RTC values\n",__func__);
-	printk("tm_sec : %i\n",dt->tm_sec);
-	printk("tm_min : %i\n",dt->tm_min);
-	printk("tm_hour: %i\n",dt->tm_hour);
-	printk("tm_mday: %i\n",dt->tm_mday);
-	printk("tm_wday: %i\n",dt->tm_wday);
-	printk("tm_year: %i\n",dt->tm_year);
-#endif
+	dt->tm_year = dt->tm_year + 1900;
 
 	/* Remove write protection */
 	max6902_set_reg(dev, 0xF, 0);
@@ -173,10 +100,10 @@
 	max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour));
 
 	max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday));
-	max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon+1));
+	max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon + 1));
 	max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday));
-	max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year%100));
-	max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year/100));
+	max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year % 100));
+	max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year / 100));
 
 	/* Compulab used a delay here. However, the datasheet
 	 * does not mention a delay being required anywhere... */
@@ -188,16 +115,6 @@
 	return 0;
 }
 
-static int max6902_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return max6902_get_datetime(dev, tm);
-}
-
-static int max6902_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return max6902_set_datetime(dev, tm);
-}
-
 static const struct rtc_class_ops max6902_rtc_ops = {
 	.read_time	= max6902_read_time,
 	.set_time	= max6902_set_time,
@@ -207,45 +124,29 @@
 {
 	struct rtc_device *rtc;
 	unsigned char tmp;
-	struct max6902 *chip;
 	int res;
 
+	spi->mode = SPI_MODE_3;
+	spi->bits_per_word = 8;
+	spi_setup(spi);
+
+	res = max6902_get_reg(&spi->dev, MAX6902_REG_SECONDS, &tmp);
+	if (res != 0)
+		return res;
+
 	rtc = rtc_device_register("max6902",
 				&spi->dev, &max6902_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
 
-	spi->mode = SPI_MODE_3;
-	spi->bits_per_word = 8;
-	spi_setup(spi);
-
-	chip = kzalloc(sizeof *chip, GFP_KERNEL);
-	if (!chip) {
-		rtc_device_unregister(rtc);
-		return -ENOMEM;
-	}
-	chip->rtc = rtc;
-	dev_set_drvdata(&spi->dev, chip);
-
-	res = max6902_get_reg(&spi->dev, MAX6902_REG_SECONDS, &tmp);
-	if (res) {
-		rtc_device_unregister(rtc);
-		return res;
-	}
-
 	return 0;
 }
 
 static int __devexit max6902_remove(struct spi_device *spi)
 {
-	struct max6902 *chip = platform_get_drvdata(spi);
-	struct rtc_device *rtc = chip->rtc;
+	struct rtc_device *rtc = platform_get_drvdata(spi);
 
-	if (rtc)
-		rtc_device_unregister(rtc);
-
-	kfree(chip);
-
+	rtc_device_unregister(rtc);
 	return 0;
 }
 
@@ -261,7 +162,6 @@
 
 static __init int max6902_init(void)
 {
-	printk("max6902 spi driver\n");
 	return spi_register_driver(&max6902_driver);
 }
 module_init(max6902_init);
diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c
new file mode 100644
index 0000000..45f12dc
--- /dev/null
+++ b/drivers/rtc/rtc-mv.c
@@ -0,0 +1,163 @@
+/*
+ * Driver for the RTC in Marvell SoCs.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/rtc.h>
+#include <linux/bcd.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+
+
+#define RTC_TIME_REG_OFFS	0
+#define RTC_SECONDS_OFFS	0
+#define RTC_MINUTES_OFFS	8
+#define RTC_HOURS_OFFS		16
+#define RTC_WDAY_OFFS		24
+#define RTC_HOURS_12H_MODE		(1 << 22) /* 12 hours mode */
+
+#define RTC_DATE_REG_OFFS	4
+#define RTC_MDAY_OFFS		0
+#define RTC_MONTH_OFFS		8
+#define RTC_YEAR_OFFS		16
+
+
+struct rtc_plat_data {
+	struct rtc_device *rtc;
+	void __iomem *ioaddr;
+};
+
+static int mv_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct rtc_plat_data *pdata = dev_get_drvdata(dev);
+	void __iomem *ioaddr = pdata->ioaddr;
+	u32	rtc_reg;
+
+	rtc_reg = (bin2bcd(tm->tm_sec) << RTC_SECONDS_OFFS) |
+		(bin2bcd(tm->tm_min) << RTC_MINUTES_OFFS) |
+		(bin2bcd(tm->tm_hour) << RTC_HOURS_OFFS) |
+		(bin2bcd(tm->tm_wday) << RTC_WDAY_OFFS);
+	writel(rtc_reg, ioaddr + RTC_TIME_REG_OFFS);
+
+	rtc_reg = (bin2bcd(tm->tm_mday) << RTC_MDAY_OFFS) |
+		(bin2bcd(tm->tm_mon + 1) << RTC_MONTH_OFFS) |
+		(bin2bcd(tm->tm_year % 100) << RTC_YEAR_OFFS);
+	writel(rtc_reg, ioaddr + RTC_DATE_REG_OFFS);
+
+	return 0;
+}
+
+static int mv_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct rtc_plat_data *pdata = dev_get_drvdata(dev);
+	void __iomem *ioaddr = pdata->ioaddr;
+	u32	rtc_time, rtc_date;
+	unsigned int year, month, day, hour, minute, second, wday;
+
+	rtc_time = readl(ioaddr + RTC_TIME_REG_OFFS);
+	rtc_date = readl(ioaddr + RTC_DATE_REG_OFFS);
+
+	second = rtc_time & 0x7f;
+	minute = (rtc_time >> RTC_MINUTES_OFFS) & 0x7f;
+	hour = (rtc_time >> RTC_HOURS_OFFS) & 0x3f; /* assume 24 hours mode */
+	wday = (rtc_time >> RTC_WDAY_OFFS) & 0x7;
+
+	day = rtc_date & 0x3f;
+	month = (rtc_date >> RTC_MONTH_OFFS) & 0x3f;
+	year = (rtc_date >> RTC_YEAR_OFFS) & 0xff;
+
+	tm->tm_sec = bcd2bin(second);
+	tm->tm_min = bcd2bin(minute);
+	tm->tm_hour = bcd2bin(hour);
+	tm->tm_mday = bcd2bin(day);
+	tm->tm_wday = bcd2bin(wday);
+	tm->tm_mon = bcd2bin(month) - 1;
+	/* hw counts from year 2000, but tm_year is relative to 1900 */
+	tm->tm_year = bcd2bin(year) + 100;
+
+	return rtc_valid_tm(tm);
+}
+
+static const struct rtc_class_ops mv_rtc_ops = {
+	.read_time	= mv_rtc_read_time,
+	.set_time	= mv_rtc_set_time,
+};
+
+static int __init mv_rtc_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct rtc_plat_data *pdata;
+	resource_size_t size;
+	u32 rtc_time;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	size = resource_size(res);
+	if (!devm_request_mem_region(&pdev->dev, res->start, size,
+				     pdev->name))
+		return -EBUSY;
+
+	pdata->ioaddr = devm_ioremap(&pdev->dev, res->start, size);
+	if (!pdata->ioaddr)
+		return -ENOMEM;
+
+	/* make sure the 24 hours mode is enabled */
+	rtc_time = readl(pdata->ioaddr + RTC_TIME_REG_OFFS);
+	if (rtc_time & RTC_HOURS_12H_MODE) {
+		dev_err(&pdev->dev, "24 Hours mode not supported.\n");
+		return -EINVAL;
+	}
+
+	platform_set_drvdata(pdev, pdata);
+	pdata->rtc = rtc_device_register(pdev->name, &pdev->dev,
+					 &mv_rtc_ops, THIS_MODULE);
+	if (IS_ERR(pdata->rtc))
+		return PTR_ERR(pdata->rtc);
+
+	return 0;
+}
+
+static int __exit mv_rtc_remove(struct platform_device *pdev)
+{
+	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+
+	rtc_device_unregister(pdata->rtc);
+	return 0;
+}
+
+static struct platform_driver mv_rtc_driver = {
+	.remove		= __exit_p(mv_rtc_remove),
+	.driver		= {
+		.name	= "rtc-mv",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static __init int mv_init(void)
+{
+	return platform_driver_probe(&mv_rtc_driver, mv_rtc_probe);
+}
+
+static __exit void mv_exit(void)
+{
+	platform_driver_unregister(&mv_rtc_driver);
+}
+
+module_init(mv_init);
+module_exit(mv_exit);
+
+MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
+MODULE_DESCRIPTION("Marvell RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:rtc-mv");
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
new file mode 100644
index 0000000..cc7eb87
--- /dev/null
+++ b/drivers/rtc/rtc-pxa.c
@@ -0,0 +1,489 @@
+/*
+ * Real Time Clock interface for XScale PXA27x and PXA3xx
+ *
+ * Copyright (C) 2008 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/rtc.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+
+#define TIMER_FREQ		CLOCK_TICK_RATE
+#define RTC_DEF_DIVIDER		(32768 - 1)
+#define RTC_DEF_TRIM		0
+#define MAXFREQ_PERIODIC	1000
+
+/*
+ * PXA Registers and bits definitions
+ */
+#define RTSR_PICE	(1 << 15)	/* Periodic interrupt count enable */
+#define RTSR_PIALE	(1 << 14)	/* Periodic interrupt Alarm enable */
+#define RTSR_PIAL	(1 << 13)	/* Periodic interrupt detected */
+#define RTSR_SWALE2	(1 << 11)	/* RTC stopwatch alarm2 enable */
+#define RTSR_SWAL2	(1 << 10)	/* RTC stopwatch alarm2 detected */
+#define RTSR_SWALE1	(1 << 9)	/* RTC stopwatch alarm1 enable */
+#define RTSR_SWAL1	(1 << 8)	/* RTC stopwatch alarm1 detected */
+#define RTSR_RDALE2	(1 << 7)	/* RTC alarm2 enable */
+#define RTSR_RDAL2	(1 << 6)	/* RTC alarm2 detected */
+#define RTSR_RDALE1	(1 << 5)	/* RTC alarm1 enable */
+#define RTSR_RDAL1	(1 << 4)	/* RTC alarm1 detected */
+#define RTSR_HZE	(1 << 3)	/* HZ interrupt enable */
+#define RTSR_ALE	(1 << 2)	/* RTC alarm interrupt enable */
+#define RTSR_HZ		(1 << 1)	/* HZ rising-edge detected */
+#define RTSR_AL		(1 << 0)	/* RTC alarm detected */
+#define RTSR_TRIG_MASK	(RTSR_AL | RTSR_HZ | RTSR_RDAL1 | RTSR_RDAL2\
+			 | RTSR_SWAL1 | RTSR_SWAL2)
+#define RYxR_YEAR_S	9
+#define RYxR_YEAR_MASK	(0xfff << RYxR_YEAR_S)
+#define RYxR_MONTH_S	5
+#define RYxR_MONTH_MASK	(0xf << RYxR_MONTH_S)
+#define RYxR_DAY_MASK	0x1f
+#define RDxR_HOUR_S	12
+#define RDxR_HOUR_MASK	(0x1f << RDxR_HOUR_S)
+#define RDxR_MIN_S	6
+#define RDxR_MIN_MASK	(0x3f << RDxR_MIN_S)
+#define RDxR_SEC_MASK	0x3f
+
+#define RTSR		0x08
+#define RTTR		0x0c
+#define RDCR		0x10
+#define RYCR		0x14
+#define RDAR1		0x18
+#define RYAR1		0x1c
+#define RTCPICR		0x34
+#define PIAR		0x38
+
+#define rtc_readl(pxa_rtc, reg)	\
+	__raw_readl((pxa_rtc)->base + (reg))
+#define rtc_writel(pxa_rtc, reg, value)	\
+	__raw_writel((value), (pxa_rtc)->base + (reg))
+
+struct pxa_rtc {
+	struct resource	*ress;
+	void __iomem		*base;
+	int			irq_1Hz;
+	int			irq_Alrm;
+	struct rtc_device	*rtc;
+	spinlock_t		lock;		/* Protects this structure */
+	struct rtc_time		rtc_alarm;
+};
+
+static u32 ryxr_calc(struct rtc_time *tm)
+{
+	return ((tm->tm_year + 1900) << RYxR_YEAR_S)
+		| ((tm->tm_mon + 1) << RYxR_MONTH_S)
+		| tm->tm_mday;
+}
+
+static u32 rdxr_calc(struct rtc_time *tm)
+{
+	return (tm->tm_hour << RDxR_HOUR_S) | (tm->tm_min << RDxR_MIN_S)
+		| tm->tm_sec;
+}
+
+static void tm_calc(u32 rycr, u32 rdcr, struct rtc_time *tm)
+{
+	tm->tm_year = ((rycr & RYxR_YEAR_MASK) >> RYxR_YEAR_S) - 1900;
+	tm->tm_mon = (((rycr & RYxR_MONTH_MASK) >> RYxR_MONTH_S)) - 1;
+	tm->tm_mday = (rycr & RYxR_DAY_MASK);
+	tm->tm_hour = (rdcr & RDxR_HOUR_MASK) >> RDxR_HOUR_S;
+	tm->tm_min = (rdcr & RDxR_MIN_MASK) >> RDxR_MIN_S;
+	tm->tm_sec = rdcr & RDxR_SEC_MASK;
+}
+
+static void rtsr_clear_bits(struct pxa_rtc *pxa_rtc, u32 mask)
+{
+	u32 rtsr;
+
+	rtsr = rtc_readl(pxa_rtc, RTSR);
+	rtsr &= ~RTSR_TRIG_MASK;
+	rtsr &= ~mask;
+	rtc_writel(pxa_rtc, RTSR, rtsr);
+}
+
+static void rtsr_set_bits(struct pxa_rtc *pxa_rtc, u32 mask)
+{
+	u32 rtsr;
+
+	rtsr = rtc_readl(pxa_rtc, RTSR);
+	rtsr &= ~RTSR_TRIG_MASK;
+	rtsr |= mask;
+	rtc_writel(pxa_rtc, RTSR, rtsr);
+}
+
+static irqreturn_t pxa_rtc_irq(int irq, void *dev_id)
+{
+	struct platform_device *pdev = to_platform_device(dev_id);
+	struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
+	u32 rtsr;
+	unsigned long events = 0;
+
+	spin_lock(&pxa_rtc->lock);
+
+	/* clear interrupt sources */
+	rtsr = rtc_readl(pxa_rtc, RTSR);
+	rtc_writel(pxa_rtc, RTSR, rtsr);
+
+	/* temporary disable rtc interrupts */
+	rtsr_clear_bits(pxa_rtc, RTSR_RDALE1 | RTSR_PIALE | RTSR_HZE);
+
+	/* clear alarm interrupt if it has occurred */
+	if (rtsr & RTSR_RDAL1)
+		rtsr &= ~RTSR_RDALE1;
+
+	/* update irq data & counter */
+	if (rtsr & RTSR_RDAL1)
+		events |= RTC_AF | RTC_IRQF;
+	if (rtsr & RTSR_HZ)
+		events |= RTC_UF | RTC_IRQF;
+	if (rtsr & RTSR_PIAL)
+		events |= RTC_PF | RTC_IRQF;
+
+	rtc_update_irq(pxa_rtc->rtc, 1, events);
+
+	/* enable back rtc interrupts */
+	rtc_writel(pxa_rtc, RTSR, rtsr & ~RTSR_TRIG_MASK);
+
+	spin_unlock(&pxa_rtc->lock);
+	return IRQ_HANDLED;
+}
+
+static int pxa_rtc_open(struct device *dev)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+	int ret;
+
+	ret = request_irq(pxa_rtc->irq_1Hz, pxa_rtc_irq, IRQF_DISABLED,
+			  "rtc 1Hz", dev);
+	if (ret < 0) {
+		dev_err(dev, "can't get irq %i, err %d\n", pxa_rtc->irq_1Hz,
+			ret);
+		goto err_irq_1Hz;
+	}
+	ret = request_irq(pxa_rtc->irq_Alrm, pxa_rtc_irq, IRQF_DISABLED,
+			  "rtc Alrm", dev);
+	if (ret < 0) {
+		dev_err(dev, "can't get irq %i, err %d\n", pxa_rtc->irq_Alrm,
+			ret);
+		goto err_irq_Alrm;
+	}
+
+	return 0;
+
+err_irq_Alrm:
+	free_irq(pxa_rtc->irq_1Hz, dev);
+err_irq_1Hz:
+	return ret;
+}
+
+static void pxa_rtc_release(struct device *dev)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+
+	spin_lock_irq(&pxa_rtc->lock);
+	rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE);
+	spin_unlock_irq(&pxa_rtc->lock);
+
+	free_irq(pxa_rtc->irq_Alrm, dev);
+	free_irq(pxa_rtc->irq_1Hz, dev);
+}
+
+static int pxa_periodic_irq_set_freq(struct device *dev, int freq)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+	int period_ms;
+
+	if (freq < 1 || freq > MAXFREQ_PERIODIC)
+		return -EINVAL;
+
+	period_ms = 1000 / freq;
+	rtc_writel(pxa_rtc, PIAR, period_ms);
+
+	return 0;
+}
+
+static int pxa_periodic_irq_set_state(struct device *dev, int enabled)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+
+	if (enabled)
+		rtsr_set_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
+	else
+		rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
+
+	return 0;
+}
+
+static int pxa_rtc_ioctl(struct device *dev, unsigned int cmd,
+		unsigned long arg)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+	int ret = 0;
+
+	spin_lock_irq(&pxa_rtc->lock);
+	switch (cmd) {
+	case RTC_AIE_OFF:
+		rtsr_clear_bits(pxa_rtc, RTSR_RDALE1);
+		break;
+	case RTC_AIE_ON:
+		rtsr_set_bits(pxa_rtc, RTSR_RDALE1);
+		break;
+	case RTC_UIE_OFF:
+		rtsr_clear_bits(pxa_rtc, RTSR_HZE);
+		break;
+	case RTC_UIE_ON:
+		rtsr_set_bits(pxa_rtc, RTSR_HZE);
+		break;
+	default:
+		ret = -ENOIOCTLCMD;
+	}
+
+	spin_unlock_irq(&pxa_rtc->lock);
+	return ret;
+}
+
+static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+	u32 rycr, rdcr;
+
+	rycr = rtc_readl(pxa_rtc, RYCR);
+	rdcr = rtc_readl(pxa_rtc, RDCR);
+
+	tm_calc(rycr, rdcr, tm);
+	return 0;
+}
+
+static int pxa_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+
+	rtc_writel(pxa_rtc, RYCR, ryxr_calc(tm));
+	rtc_writel(pxa_rtc, RDCR, rdxr_calc(tm));
+
+	return 0;
+}
+
+static int pxa_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+	u32 rtsr, ryar, rdar;
+
+	ryar = rtc_readl(pxa_rtc, RYAR1);
+	rdar = rtc_readl(pxa_rtc, RDAR1);
+	tm_calc(ryar, rdar, &alrm->time);
+
+	rtsr = rtc_readl(pxa_rtc, RTSR);
+	alrm->enabled = (rtsr & RTSR_RDALE1) ? 1 : 0;
+	alrm->pending = (rtsr & RTSR_RDAL1) ? 1 : 0;
+	return 0;
+}
+
+static int pxa_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+	u32 rtsr;
+
+	spin_lock_irq(&pxa_rtc->lock);
+
+	rtc_writel(pxa_rtc, RYAR1, ryxr_calc(&alrm->time));
+	rtc_writel(pxa_rtc, RDAR1, rdxr_calc(&alrm->time));
+
+	rtsr = rtc_readl(pxa_rtc, RTSR);
+	if (alrm->enabled)
+		rtsr |= RTSR_RDALE1;
+	else
+		rtsr &= ~RTSR_RDALE1;
+	rtc_writel(pxa_rtc, RTSR, rtsr);
+
+	spin_unlock_irq(&pxa_rtc->lock);
+
+	return 0;
+}
+
+static int pxa_rtc_proc(struct device *dev, struct seq_file *seq)
+{
+	struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
+
+	seq_printf(seq, "trim/divider\t: 0x%08x\n", rtc_readl(pxa_rtc, RTTR));
+	seq_printf(seq, "update_IRQ\t: %s\n",
+		   (rtc_readl(pxa_rtc, RTSR) & RTSR_HZE) ? "yes" : "no");
+	seq_printf(seq, "periodic_IRQ\t: %s\n",
+		   (rtc_readl(pxa_rtc, RTSR) & RTSR_PIALE) ? "yes" : "no");
+	seq_printf(seq, "periodic_freq\t: %u\n", rtc_readl(pxa_rtc, PIAR));
+
+	return 0;
+}
+
+static const struct rtc_class_ops pxa_rtc_ops = {
+	.open = pxa_rtc_open,
+	.release = pxa_rtc_release,
+	.ioctl = pxa_rtc_ioctl,
+	.read_time = pxa_rtc_read_time,
+	.set_time = pxa_rtc_set_time,
+	.read_alarm = pxa_rtc_read_alarm,
+	.set_alarm = pxa_rtc_set_alarm,
+	.proc = pxa_rtc_proc,
+	.irq_set_state = pxa_periodic_irq_set_state,
+	.irq_set_freq = pxa_periodic_irq_set_freq,
+};
+
+static int __init pxa_rtc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct pxa_rtc *pxa_rtc;
+	int ret;
+	u32 rttr;
+
+	pxa_rtc = kzalloc(sizeof(struct pxa_rtc), GFP_KERNEL);
+	if (!pxa_rtc)
+		return -ENOMEM;
+
+	spin_lock_init(&pxa_rtc->lock);
+	platform_set_drvdata(pdev, pxa_rtc);
+
+	ret = -ENXIO;
+	pxa_rtc->ress = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!pxa_rtc->ress) {
+		dev_err(dev, "No I/O memory resource defined\n");
+		goto err_ress;
+	}
+
+	pxa_rtc->irq_1Hz = platform_get_irq(pdev, 0);
+	if (pxa_rtc->irq_1Hz < 0) {
+		dev_err(dev, "No 1Hz IRQ resource defined\n");
+		goto err_ress;
+	}
+	pxa_rtc->irq_Alrm = platform_get_irq(pdev, 1);
+	if (pxa_rtc->irq_Alrm < 0) {
+		dev_err(dev, "No alarm IRQ resource defined\n");
+		goto err_ress;
+	}
+
+	ret = -ENOMEM;
+	pxa_rtc->base = ioremap(pxa_rtc->ress->start,
+				resource_size(pxa_rtc->ress));
+	if (!pxa_rtc->base) {
+		dev_err(&pdev->dev, "Unable to map pxa RTC I/O memory\n");
+		goto err_map;
+	}
+
+	/*
+	 * If the clock divider is uninitialized then reset it to the
+	 * default value to get the 1Hz clock.
+	 */
+	if (rtc_readl(pxa_rtc, RTTR) == 0) {
+		rttr = RTC_DEF_DIVIDER + (RTC_DEF_TRIM << 16);
+		rtc_writel(pxa_rtc, RTTR, rttr);
+		dev_warn(dev, "warning: initializing default clock"
+			 " divider/trim value\n");
+	}
+
+	rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE);
+
+	pxa_rtc->rtc = rtc_device_register("pxa-rtc", &pdev->dev, &pxa_rtc_ops,
+					   THIS_MODULE);
+	ret = PTR_ERR(pxa_rtc->rtc);
+	if (IS_ERR(pxa_rtc->rtc)) {
+		dev_err(dev, "Failed to register RTC device -> %d\n", ret);
+		goto err_rtc_reg;
+	}
+
+	device_init_wakeup(dev, 1);
+
+	return 0;
+
+err_rtc_reg:
+	 iounmap(pxa_rtc->base);
+err_ress:
+err_map:
+	kfree(pxa_rtc);
+	return ret;
+}
+
+static int __exit pxa_rtc_remove(struct platform_device *pdev)
+{
+	struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
+
+	rtc_device_unregister(pxa_rtc->rtc);
+
+	spin_lock_irq(&pxa_rtc->lock);
+	iounmap(pxa_rtc->base);
+	spin_unlock_irq(&pxa_rtc->lock);
+
+	kfree(pxa_rtc);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int pxa_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
+
+	if (device_may_wakeup(&pdev->dev))
+		enable_irq_wake(pxa_rtc->irq_Alrm);
+	return 0;
+}
+
+static int pxa_rtc_resume(struct platform_device *pdev)
+{
+	struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
+
+	if (device_may_wakeup(&pdev->dev))
+		disable_irq_wake(pxa_rtc->irq_Alrm);
+	return 0;
+}
+#else
+#define pxa_rtc_suspend	NULL
+#define pxa_rtc_resume	NULL
+#endif
+
+static struct platform_driver pxa_rtc_driver = {
+	.remove		= __exit_p(pxa_rtc_remove),
+	.suspend	= pxa_rtc_suspend,
+	.resume		= pxa_rtc_resume,
+	.driver		= {
+		.name		= "pxa-rtc",
+	},
+};
+
+static int __init pxa_rtc_init(void)
+{
+	if (cpu_is_pxa27x() || cpu_is_pxa3xx())
+		return platform_driver_probe(&pxa_rtc_driver, pxa_rtc_probe);
+
+	return -ENODEV;
+}
+
+static void __exit pxa_rtc_exit(void)
+{
+	platform_driver_unregister(&pxa_rtc_driver);
+}
+
+module_init(pxa_rtc_init);
+module_exit(pxa_rtc_exit);
+
+MODULE_AUTHOR("Robert Jarzmik");
+MODULE_DESCRIPTION("PXA27x/PXA3xx Realtime Clock Driver (RTC)");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:pxa-rtc");
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 7a568beb..e0d7b99 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -94,6 +94,9 @@
 {
 	unsigned int tmp;
 
+	if (!is_power_of_2(freq))
+		return -EINVAL;
+
 	spin_lock_irq(&s3c_rtc_pie_lock);
 
 	tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE;
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index aaf9d6a..1c3fc6b 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
+#include <linux/log2.h>
 #include <asm/rtc.h>
 
 #define DRV_NAME	"sh-rtc"
@@ -89,7 +90,9 @@
 	void __iomem *regbase;
 	unsigned long regsize;
 	struct resource *res;
-	unsigned int alarm_irq, periodic_irq, carry_irq;
+	int alarm_irq;
+	int periodic_irq;
+	int carry_irq;
 	struct rtc_device *rtc_dev;
 	spinlock_t lock;
 	unsigned long capabilities;	/* See asm-sh/rtc.h for cap bits */
@@ -549,6 +552,8 @@
 
 static int sh_rtc_irq_set_freq(struct device *dev, int freq)
 {
+	if (!is_power_of_2(freq))
+		return -EINVAL;
 	return sh_rtc_ioctl(dev, RTC_IRQP_SET, freq);
 }
 
@@ -578,7 +583,7 @@
 
 	/* get periodic/carry/alarm irqs */
 	ret = platform_get_irq(pdev, 0);
-	if (unlikely(ret < 0)) {
+	if (unlikely(ret <= 0)) {
 		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IRQ for period\n");
 		goto err_badres;
@@ -586,7 +591,7 @@
 	rtc->periodic_irq = ret;
 
 	ret = platform_get_irq(pdev, 1);
-	if (unlikely(ret < 0)) {
+	if (unlikely(ret <= 0)) {
 		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IRQ for carry\n");
 		goto err_badres;
@@ -594,7 +599,7 @@
 	rtc->carry_irq = ret;
 
 	ret = platform_get_irq(pdev, 2);
-	if (unlikely(ret < 0)) {
+	if (unlikely(ret <= 0)) {
 		ret = -ENOENT;
 		dev_err(&pdev->dev, "No IRQ for alarm\n");
 		goto err_badres;
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index f4cd46e..dc0b622 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -170,7 +170,7 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	if (pdata->irq < 0)
+	if (pdata->irq <= 0)
 		return -EINVAL;
 	pdata->alrm_mday = alrm->time.tm_mday;
 	pdata->alrm_hour = alrm->time.tm_hour;
@@ -187,7 +187,7 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	if (pdata->irq < 0)
+	if (pdata->irq <= 0)
 		return -EINVAL;
 	alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday;
 	alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour;
@@ -221,7 +221,7 @@
 	struct platform_device *pdev = to_platform_device(dev);
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	if (pdata->irq < 0)
+	if (pdata->irq <= 0)
 		return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */
 	switch (cmd) {
 	case RTC_AIE_OFF:
@@ -303,7 +303,6 @@
 	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return -ENOMEM;
-	pdata->irq = -1;
 	if (!request_mem_region(res->start, RTC_REG_SIZE, pdev->name)) {
 		ret = -EBUSY;
 		goto out;
@@ -329,13 +328,13 @@
 	if (readb(ioaddr + RTC_FLAGS) & RTC_FLAGS_PF)
 		dev_warn(&pdev->dev, "voltage-low detected.\n");
 
-	if (pdata->irq >= 0) {
+	if (pdata->irq > 0) {
 		writeb(0, ioaddr + RTC_INTERRUPTS);
 		if (request_irq(pdata->irq, stk17ta8_rtc_interrupt,
 				IRQF_DISABLED | IRQF_SHARED,
 				pdev->name, pdev) < 0) {
 			dev_warn(&pdev->dev, "interrupt not available.\n");
-			pdata->irq = -1;
+			pdata->irq = 0;
 		}
 	}
 
@@ -355,7 +354,7 @@
  out:
 	if (pdata->rtc)
 		rtc_device_unregister(pdata->rtc);
-	if (pdata->irq >= 0)
+	if (pdata->irq > 0)
 		free_irq(pdata->irq, pdev);
 	if (ioaddr)
 		iounmap(ioaddr);
@@ -371,7 +370,7 @@
 
 	sysfs_remove_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
 	rtc_device_unregister(pdata->rtc);
-	if (pdata->irq >= 0) {
+	if (pdata->irq > 0) {
 		writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
 		free_irq(pdata->irq, pdev);
 	}
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index bc93002..e478280 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c
@@ -34,14 +34,9 @@
 	return 0;
 }
 
-static int test_rtc_set_time(struct device *dev,
-	struct rtc_time *tm)
-{
-	return 0;
-}
-
 static int test_rtc_set_mmss(struct device *dev, unsigned long secs)
 {
+	dev_info(dev, "%s, secs = %lu\n", __func__, secs);
 	return 0;
 }
 
@@ -78,7 +73,6 @@
 static const struct rtc_class_ops test_rtc_ops = {
 	.proc = test_rtc_proc,
 	.read_time = test_rtc_read_time,
-	.set_time = test_rtc_set_time,
 	.read_alarm = test_rtc_read_alarm,
 	.set_alarm = test_rtc_set_alarm,
 	.set_mmss = test_rtc_set_mmss,
diff --git a/drivers/rtc/rtc-twl4030.c b/drivers/rtc/rtc-twl4030.c
index 01d8da9..8ce5f74 100644
--- a/drivers/rtc/rtc-twl4030.c
+++ b/drivers/rtc/rtc-twl4030.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -415,8 +416,8 @@
 	int irq = platform_get_irq(pdev, 0);
 	u8 rd_reg;
 
-	if (irq < 0)
-		return irq;
+	if (irq <= 0)
+		return -EINVAL;
 
 	rtc = rtc_device_register(pdev->name,
 				  &pdev->dev, &twl4030_rtc_ops, THIS_MODULE);
diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
new file mode 100644
index 0000000..4ee4857
--- /dev/null
+++ b/drivers/rtc/rtc-tx4939.c
@@ -0,0 +1,317 @@
+/*
+ * TX4939 internal RTC driver
+ * Based on RBTX49xx patch from CELF patch archive.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * (C) Copyright TOSHIBA CORPORATION 2005-2007
+ */
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <asm/txx9/tx4939.h>
+
+struct tx4939rtc_plat_data {
+	struct rtc_device *rtc;
+	struct tx4939_rtc_reg __iomem *rtcreg;
+};
+
+static struct tx4939rtc_plat_data *get_tx4939rtc_plat_data(struct device *dev)
+{
+	return platform_get_drvdata(to_platform_device(dev));
+}
+
+static int tx4939_rtc_cmd(struct tx4939_rtc_reg __iomem *rtcreg, int cmd)
+{
+	int i = 0;
+
+	__raw_writel(cmd, &rtcreg->ctl);
+	/* This might take 30us (next 32.768KHz clock) */
+	while (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_BUSY) {
+		/* timeout on approx. 100us (@ GBUS200MHz) */
+		if (i++ > 200 * 100)
+			return -EBUSY;
+		cpu_relax();
+	}
+	return 0;
+}
+
+static int tx4939_rtc_set_mmss(struct device *dev, unsigned long secs)
+{
+	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
+	int i, ret;
+	unsigned char buf[6];
+
+	buf[0] = 0;
+	buf[1] = 0;
+	buf[2] = secs;
+	buf[3] = secs >> 8;
+	buf[4] = secs >> 16;
+	buf[5] = secs >> 24;
+	spin_lock_irq(&pdata->rtc->irq_lock);
+	__raw_writel(0, &rtcreg->adr);
+	for (i = 0; i < 6; i++)
+		__raw_writel(buf[i], &rtcreg->dat);
+	ret = tx4939_rtc_cmd(rtcreg,
+			     TX4939_RTCCTL_COMMAND_SETTIME |
+			     (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALME));
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+	return ret;
+}
+
+static int tx4939_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
+	int i, ret;
+	unsigned long sec;
+	unsigned char buf[6];
+
+	spin_lock_irq(&pdata->rtc->irq_lock);
+	ret = tx4939_rtc_cmd(rtcreg,
+			     TX4939_RTCCTL_COMMAND_GETTIME |
+			     (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALME));
+	if (ret) {
+		spin_unlock_irq(&pdata->rtc->irq_lock);
+		return ret;
+	}
+	__raw_writel(2, &rtcreg->adr);
+	for (i = 2; i < 6; i++)
+		buf[i] = __raw_readl(&rtcreg->dat);
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+	sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+	rtc_time_to_tm(sec, tm);
+	return rtc_valid_tm(tm);
+}
+
+static int tx4939_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
+	int i, ret;
+	unsigned long sec;
+	unsigned char buf[6];
+
+	if (alrm->time.tm_sec < 0 ||
+	    alrm->time.tm_min < 0 ||
+	    alrm->time.tm_hour < 0 ||
+	    alrm->time.tm_mday < 0 ||
+	    alrm->time.tm_mon < 0 ||
+	    alrm->time.tm_year < 0)
+		return -EINVAL;
+	rtc_tm_to_time(&alrm->time, &sec);
+	buf[0] = 0;
+	buf[1] = 0;
+	buf[2] = sec;
+	buf[3] = sec >> 8;
+	buf[4] = sec >> 16;
+	buf[5] = sec >> 24;
+	spin_lock_irq(&pdata->rtc->irq_lock);
+	__raw_writel(0, &rtcreg->adr);
+	for (i = 0; i < 6; i++)
+		__raw_writel(buf[i], &rtcreg->dat);
+	ret = tx4939_rtc_cmd(rtcreg, TX4939_RTCCTL_COMMAND_SETALARM |
+			     (alrm->enabled ? TX4939_RTCCTL_ALME : 0));
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+	return ret;
+}
+
+static int tx4939_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
+	int i, ret;
+	unsigned long sec;
+	unsigned char buf[6];
+	u32 ctl;
+
+	spin_lock_irq(&pdata->rtc->irq_lock);
+	ret = tx4939_rtc_cmd(rtcreg,
+			     TX4939_RTCCTL_COMMAND_GETALARM |
+			     (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALME));
+	if (ret) {
+		spin_unlock_irq(&pdata->rtc->irq_lock);
+		return ret;
+	}
+	__raw_writel(2, &rtcreg->adr);
+	for (i = 2; i < 6; i++)
+		buf[i] = __raw_readl(&rtcreg->dat);
+	ctl = __raw_readl(&rtcreg->ctl);
+	alrm->enabled = (ctl & TX4939_RTCCTL_ALME) ? 1 : 0;
+	alrm->pending = (ctl & TX4939_RTCCTL_ALMD) ? 1 : 0;
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+	sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+	rtc_time_to_tm(sec, &alrm->time);
+	return rtc_valid_tm(&alrm->time);
+}
+
+static int tx4939_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+
+	spin_lock_irq(&pdata->rtc->irq_lock);
+	tx4939_rtc_cmd(pdata->rtcreg,
+		       TX4939_RTCCTL_COMMAND_NOP |
+		       (enabled ? TX4939_RTCCTL_ALME : 0));
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+	return 0;
+}
+
+static irqreturn_t tx4939_rtc_interrupt(int irq, void *dev_id)
+{
+	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev_id);
+	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
+	unsigned long events = RTC_IRQF;
+
+	spin_lock(&pdata->rtc->irq_lock);
+	if (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALMD) {
+		events |= RTC_AF;
+		tx4939_rtc_cmd(rtcreg, TX4939_RTCCTL_COMMAND_NOP);
+	}
+	spin_unlock(&pdata->rtc->irq_lock);
+	rtc_update_irq(pdata->rtc, 1, events);
+	return IRQ_HANDLED;
+}
+
+static const struct rtc_class_ops tx4939_rtc_ops = {
+	.read_time		= tx4939_rtc_read_time,
+	.read_alarm		= tx4939_rtc_read_alarm,
+	.set_alarm		= tx4939_rtc_set_alarm,
+	.set_mmss		= tx4939_rtc_set_mmss,
+	.alarm_irq_enable	= tx4939_rtc_alarm_irq_enable,
+};
+
+static ssize_t tx4939_rtc_nvram_read(struct kobject *kobj,
+				     struct bin_attribute *bin_attr,
+				     char *buf, loff_t pos, size_t size)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
+	ssize_t count;
+
+	spin_lock_irq(&pdata->rtc->irq_lock);
+	for (count = 0; size > 0 && pos < TX4939_RTC_REG_RAMSIZE;
+	     count++, size--) {
+		__raw_writel(pos++, &rtcreg->adr);
+		*buf++ = __raw_readl(&rtcreg->dat);
+	}
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+	return count;
+}
+
+static ssize_t tx4939_rtc_nvram_write(struct kobject *kobj,
+				      struct bin_attribute *bin_attr,
+				      char *buf, loff_t pos, size_t size)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
+	ssize_t count;
+
+	spin_lock_irq(&pdata->rtc->irq_lock);
+	for (count = 0; size > 0 && pos < TX4939_RTC_REG_RAMSIZE;
+	     count++, size--) {
+		__raw_writel(pos++, &rtcreg->adr);
+		__raw_writel(*buf++, &rtcreg->dat);
+	}
+	spin_unlock_irq(&pdata->rtc->irq_lock);
+	return count;
+}
+
+static struct bin_attribute tx4939_rtc_nvram_attr = {
+	.attr = {
+		.name = "nvram",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = TX4939_RTC_REG_RAMSIZE,
+	.read = tx4939_rtc_nvram_read,
+	.write = tx4939_rtc_nvram_write,
+};
+
+static int __init tx4939_rtc_probe(struct platform_device *pdev)
+{
+	struct rtc_device *rtc;
+	struct tx4939rtc_plat_data *pdata;
+	struct resource *res;
+	int irq, ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -ENODEV;
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, pdata);
+
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+				     resource_size(res), pdev->name))
+		return -EBUSY;
+	pdata->rtcreg = devm_ioremap(&pdev->dev, res->start,
+				     resource_size(res));
+	if (!pdata->rtcreg)
+		return -EBUSY;
+
+	tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
+	if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt,
+			     IRQF_DISABLED | IRQF_SHARED,
+			     pdev->name, &pdev->dev) < 0) {
+		return -EBUSY;
+	}
+	rtc = rtc_device_register(pdev->name, &pdev->dev,
+				  &tx4939_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+	pdata->rtc = rtc;
+	ret = sysfs_create_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
+	if (ret)
+		rtc_device_unregister(rtc);
+	return ret;
+}
+
+static int __exit tx4939_rtc_remove(struct platform_device *pdev)
+{
+	struct tx4939rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	struct rtc_device *rtc = pdata->rtc;
+
+	spin_lock_irq(&rtc->irq_lock);
+	tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
+	spin_unlock_irq(&rtc->irq_lock);
+	sysfs_remove_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
+	rtc_device_unregister(rtc);
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static struct platform_driver tx4939_rtc_driver = {
+	.remove		= __exit_p(tx4939_rtc_remove),
+	.driver		= {
+		.name	= "tx4939rtc",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init tx4939rtc_init(void)
+{
+	return platform_driver_probe(&tx4939_rtc_driver, tx4939_rtc_probe);
+}
+
+static void __exit tx4939rtc_exit(void)
+{
+	platform_driver_unregister(&tx4939_rtc_driver);
+}
+
+module_init(tx4939rtc_init);
+module_exit(tx4939rtc_exit);
+
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_DESCRIPTION("TX4939 internal RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:tx4939rtc");
diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 834dcc6..f11297a 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -27,6 +27,7 @@
 #include <linux/rtc.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <linux/log2.h>
 
 #include <asm/div64.h>
 #include <asm/io.h>
@@ -84,8 +85,8 @@
 static char rtc_name[] = "RTC";
 static unsigned long periodic_count;
 static unsigned int alarm_enabled;
-static int aie_irq = -1;
-static int pie_irq = -1;
+static int aie_irq;
+static int pie_irq;
 
 static inline unsigned long read_elapsed_second(void)
 {
@@ -210,6 +211,8 @@
 {
 	unsigned long count;
 
+	if (!is_power_of_2(freq))
+		return -EINVAL;
 	count = RTC_FREQUENCY;
 	do_div(count, freq);
 
@@ -360,7 +363,7 @@
 	spin_unlock_irq(&rtc_lock);
 
 	aie_irq = platform_get_irq(pdev, 0);
-	if (aie_irq < 0 || aie_irq >= nr_irqs) {
+	if (aie_irq <= 0) {
 		retval = -EBUSY;
 		goto err_device_unregister;
 	}
@@ -371,7 +374,7 @@
 		goto err_device_unregister;
 
 	pie_irq = platform_get_irq(pdev, 1);
-	if (pie_irq < 0 || pie_irq >= nr_irqs)
+	if (pie_irq <= 0)
 		goto err_free_irq;
 
 	retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED,
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b9d0efb..4a6fe01 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -78,7 +78,7 @@
 	  will be called au1550_spi.
 
 config SPI_BITBANG
-	tristate "Bitbanging SPI master"
+	tristate "Utilities for Bitbanging SPI masters"
 	help
 	  With a few GPIO pins, your system can bitbang the SPI protocol.
 	  Select this to get SPI support through I/O pins (GPIO, parallel
@@ -100,6 +100,22 @@
 	  inexpensive battery powered microcontroller evaluation board.
 	  This same cable can be used to flash new firmware.
 
+config SPI_GPIO
+	tristate "GPIO-based bitbanging SPI Master"
+	depends on GENERIC_GPIO
+	select SPI_BITBANG
+	help
+	  This simple GPIO bitbanging SPI master uses the arch-neutral GPIO
+	  interface to manage MOSI, MISO, SCK, and chipselect signals.  SPI
+	  slaves connected to a bus using this driver are configured as usual,
+	  except that the spi_board_info.controller_data holds the GPIO number
+	  for the chipselect used by this controller driver.
+
+	  Note that this driver often won't achieve even 1 Mbit/sec speeds,
+	  making it unusually slow for SPI.  If your platform can inline
+	  GPIO operations, you should be able to leverage that for better
+	  speed with a custom version of this driver; see the source code.
+
 config SPI_IMX
 	tristate "Freescale iMX SPI controller"
 	depends on ARCH_IMX && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index ccf18de..5e9f521 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -16,6 +16,7 @@
 obj-$(CONFIG_SPI_BITBANG)		+= spi_bitbang.o
 obj-$(CONFIG_SPI_AU1550)		+= au1550_spi.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi_butterfly.o
+obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
 obj-$(CONFIG_SPI_IMX)			+= spi_imx.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
 obj-$(CONFIG_SPI_PXA2XX)		+= pxa2xx_spi.o
diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c
index 8abae4a..5e39bac 100644
--- a/drivers/spi/atmel_spi.c
+++ b/drivers/spi/atmel_spi.c
@@ -30,13 +30,6 @@
  * The core SPI transfer engine just talks to a register bank to set up
  * DMA transfers; transfer queue progress is driven by IRQs.  The clock
  * framework provides the base clock, subdivided for each spi_device.
- *
- * Newer controllers, marked with "new_1" flag, have:
- *  - CR.LASTXFER
- *  - SPI_MR.DIV32 may become FDIV or must-be-zero (here: always zero)
- *  - SPI_SR.TXEMPTY, SPI_SR.NSSR (and corresponding irqs)
- *  - SPI_CSRx.CSAAT
- *  - SPI_CSRx.SBCR allows faster clocking
  */
 struct atmel_spi {
 	spinlock_t		lock;
@@ -45,7 +38,6 @@
 	int			irq;
 	struct clk		*clk;
 	struct platform_device	*pdev;
-	unsigned		new_1:1;
 	struct spi_device	*stay;
 
 	u8			stopping;
@@ -59,10 +51,33 @@
 	dma_addr_t		buffer_dma;
 };
 
+/* Controller-specific per-slave state */
+struct atmel_spi_device {
+	unsigned int		npcs_pin;
+	u32			csr;
+};
+
 #define BUFFER_SIZE		PAGE_SIZE
 #define INVALID_DMA_ADDRESS	0xffffffff
 
 /*
+ * Version 2 of the SPI controller has
+ *  - CR.LASTXFER
+ *  - SPI_MR.DIV32 may become FDIV or must-be-zero (here: always zero)
+ *  - SPI_SR.TXEMPTY, SPI_SR.NSSR (and corresponding irqs)
+ *  - SPI_CSRx.CSAAT
+ *  - SPI_CSRx.SBCR allows faster clocking
+ *
+ * We can determine the controller version by reading the VERSION
+ * register, but I haven't checked that it exists on all chips, and
+ * this is cheaper anyway.
+ */
+static bool atmel_spi_is_v2(void)
+{
+	return !cpu_is_at91rm9200();
+}
+
+/*
  * Earlier SPI controllers (e.g. on at91rm9200) have a design bug whereby
  * they assume that spi slave device state will not change on deselect, so
  * that automagic deselection is OK.  ("NPCSx rises if no data is to be
@@ -80,39 +95,58 @@
  * Master on Chip Select 0.")  No workaround exists for that ... so for
  * nCS0 on that chip, we (a) don't use the GPIO, (b) can't support CS_HIGH,
  * and (c) will trigger that first erratum in some cases.
+ *
+ * TODO: Test if the atmel_spi_is_v2() branch below works on
+ * AT91RM9200 if we use some other register than CSR0. However, don't
+ * do this unconditionally since AP7000 has an errata where the BITS
+ * field in CSR0 overrides all other CSRs.
  */
 
 static void cs_activate(struct atmel_spi *as, struct spi_device *spi)
 {
-	unsigned gpio = (unsigned) spi->controller_data;
+	struct atmel_spi_device *asd = spi->controller_state;
 	unsigned active = spi->mode & SPI_CS_HIGH;
 	u32 mr;
-	int i;
-	u32 csr;
-	u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0;
 
-	/* Make sure clock polarity is correct */
-	for (i = 0; i < spi->master->num_chipselect; i++) {
-		csr = spi_readl(as, CSR0 + 4 * i);
-		if ((csr ^ cpol) & SPI_BIT(CPOL))
-			spi_writel(as, CSR0 + 4 * i, csr ^ SPI_BIT(CPOL));
+	if (atmel_spi_is_v2()) {
+		/*
+		 * Always use CSR0. This ensures that the clock
+		 * switches to the correct idle polarity before we
+		 * toggle the CS.
+		 */
+		spi_writel(as, CSR0, asd->csr);
+		spi_writel(as, MR, SPI_BF(PCS, 0x0e) | SPI_BIT(MODFDIS)
+				| SPI_BIT(MSTR));
+		mr = spi_readl(as, MR);
+		gpio_set_value(asd->npcs_pin, active);
+	} else {
+		u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0;
+		int i;
+		u32 csr;
+
+		/* Make sure clock polarity is correct */
+		for (i = 0; i < spi->master->num_chipselect; i++) {
+			csr = spi_readl(as, CSR0 + 4 * i);
+			if ((csr ^ cpol) & SPI_BIT(CPOL))
+				spi_writel(as, CSR0 + 4 * i,
+						csr ^ SPI_BIT(CPOL));
+		}
+
+		mr = spi_readl(as, MR);
+		mr = SPI_BFINS(PCS, ~(1 << spi->chip_select), mr);
+		if (spi->chip_select != 0)
+			gpio_set_value(asd->npcs_pin, active);
+		spi_writel(as, MR, mr);
 	}
 
-	mr = spi_readl(as, MR);
-	mr = SPI_BFINS(PCS, ~(1 << spi->chip_select), mr);
-
 	dev_dbg(&spi->dev, "activate %u%s, mr %08x\n",
-			gpio, active ? " (high)" : "",
+			asd->npcs_pin, active ? " (high)" : "",
 			mr);
-
-	if (!(cpu_is_at91rm9200() && spi->chip_select == 0))
-		gpio_set_value(gpio, active);
-	spi_writel(as, MR, mr);
 }
 
 static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi)
 {
-	unsigned gpio = (unsigned) spi->controller_data;
+	struct atmel_spi_device *asd = spi->controller_state;
 	unsigned active = spi->mode & SPI_CS_HIGH;
 	u32 mr;
 
@@ -126,11 +160,11 @@
 	}
 
 	dev_dbg(&spi->dev, "DEactivate %u%s, mr %08x\n",
-			gpio, active ? " (low)" : "",
+			asd->npcs_pin, active ? " (low)" : "",
 			mr);
 
-	if (!(cpu_is_at91rm9200() && spi->chip_select == 0))
-		gpio_set_value(gpio, !active);
+	if (atmel_spi_is_v2() || spi->chip_select != 0)
+		gpio_set_value(asd->npcs_pin, !active);
 }
 
 static inline int atmel_spi_xfer_is_last(struct spi_message *msg,
@@ -502,6 +536,7 @@
 static int atmel_spi_setup(struct spi_device *spi)
 {
 	struct atmel_spi	*as;
+	struct atmel_spi_device	*asd;
 	u32			scbr, csr;
 	unsigned int		bits = spi->bits_per_word;
 	unsigned long		bus_hz;
@@ -536,19 +571,16 @@
 	}
 
 	/* see notes above re chipselect */
-	if (cpu_is_at91rm9200()
+	if (!atmel_spi_is_v2()
 			&& spi->chip_select == 0
 			&& (spi->mode & SPI_CS_HIGH)) {
 		dev_dbg(&spi->dev, "setup: can't be active-high\n");
 		return -EINVAL;
 	}
 
-	/*
-	 * Pre-new_1 chips start out at half the peripheral
-	 * bus speed.
-	 */
+	/* v1 chips start out at half the peripheral bus speed. */
 	bus_hz = clk_get_rate(as->clk);
-	if (!as->new_1)
+	if (!atmel_spi_is_v2())
 		bus_hz /= 2;
 
 	if (spi->max_speed_hz) {
@@ -589,11 +621,20 @@
 
 	/* chipselect must have been muxed as GPIO (e.g. in board setup) */
 	npcs_pin = (unsigned int)spi->controller_data;
-	if (!spi->controller_state) {
+	asd = spi->controller_state;
+	if (!asd) {
+		asd = kzalloc(sizeof(struct atmel_spi_device), GFP_KERNEL);
+		if (!asd)
+			return -ENOMEM;
+
 		ret = gpio_request(npcs_pin, spi->dev.bus_id);
-		if (ret)
+		if (ret) {
+			kfree(asd);
 			return ret;
-		spi->controller_state = (void *)npcs_pin;
+		}
+
+		asd->npcs_pin = npcs_pin;
+		spi->controller_state = asd;
 		gpio_direction_output(npcs_pin, !(spi->mode & SPI_CS_HIGH));
 	} else {
 		unsigned long		flags;
@@ -605,11 +646,14 @@
 		spin_unlock_irqrestore(&as->lock, flags);
 	}
 
+	asd->csr = csr;
+
 	dev_dbg(&spi->dev,
 		"setup: %lu Hz bpw %u mode 0x%x -> csr%d %08x\n",
 		bus_hz / scbr, bits, spi->mode, spi->chip_select, csr);
 
-	spi_writel(as, CSR0 + 4 * spi->chip_select, csr);
+	if (!atmel_spi_is_v2())
+		spi_writel(as, CSR0 + 4 * spi->chip_select, csr);
 
 	return 0;
 }
@@ -684,10 +728,11 @@
 static void atmel_spi_cleanup(struct spi_device *spi)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(spi->master);
+	struct atmel_spi_device	*asd = spi->controller_state;
 	unsigned		gpio = (unsigned) spi->controller_data;
 	unsigned long		flags;
 
-	if (!spi->controller_state)
+	if (!asd)
 		return;
 
 	spin_lock_irqsave(&as->lock, flags);
@@ -697,7 +742,9 @@
 	}
 	spin_unlock_irqrestore(&as->lock, flags);
 
+	spi->controller_state = NULL;
 	gpio_free(gpio);
+	kfree(asd);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -755,8 +802,6 @@
 		goto out_free_buffer;
 	as->irq = irq;
 	as->clk = clk;
-	if (!cpu_is_at91rm9200())
-		as->new_1 = 1;
 
 	ret = request_irq(irq, atmel_spi_interrupt, 0,
 			pdev->dev.bus_id, master);
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 6104f46..d0fc4ca 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1561,11 +1561,12 @@
 static int pxa2xx_spi_remove(struct platform_device *pdev)
 {
 	struct driver_data *drv_data = platform_get_drvdata(pdev);
-	struct ssp_device *ssp = drv_data->ssp;
+	struct ssp_device *ssp;
 	int status = 0;
 
 	if (!drv_data)
 		return 0;
+	ssp = drv_data->ssp;
 
 	/* Remove the queue */
 	status = destroy_queue(drv_data);
diff --git a/drivers/spi/spi_gpio.c b/drivers/spi/spi_gpio.c
new file mode 100644
index 0000000..49698ca
--- /dev/null
+++ b/drivers/spi/spi_gpio.c
@@ -0,0 +1,360 @@
+/*
+ * spi_gpio.c - SPI master driver using generic bitbanged GPIO
+ *
+ * Copyright (C) 2006,2008 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_bitbang.h>
+#include <linux/spi/spi_gpio.h>
+
+
+/*
+ * This bitbanging SPI master driver should help make systems usable
+ * when a native hardware SPI engine is not available, perhaps because
+ * its driver isn't yet working or because the I/O pins it requires
+ * are used for other purposes.
+ *
+ * platform_device->driver_data ... points to spi_gpio
+ *
+ * spi->controller_state ... reserved for bitbang framework code
+ * spi->controller_data ... holds chipselect GPIO
+ *
+ * spi->master->dev.driver_data ... points to spi_gpio->bitbang
+ */
+
+struct spi_gpio {
+	struct spi_bitbang		bitbang;
+	struct spi_gpio_platform_data	pdata;
+	struct platform_device		*pdev;
+};
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Because the overhead of going through four GPIO procedure calls
+ * per transferred bit can make performance a problem, this code
+ * is set up so that you can use it in either of two ways:
+ *
+ *   - The slow generic way:  set up platform_data to hold the GPIO
+ *     numbers used for MISO/MOSI/SCK, and issue procedure calls for
+ *     each of them.  This driver can handle several such busses.
+ *
+ *   - The quicker inlined way:  only helps with platform GPIO code
+ *     that inlines operations for constant GPIOs.  This can give
+ *     you tight (fast!) inner loops, but each such bus needs a
+ *     new driver.  You'll define a new C file, with Makefile and
+ *     Kconfig support; the C code can be a total of six lines:
+ *
+ *		#define DRIVER_NAME	"myboard_spi2"
+ *		#define	SPI_MISO_GPIO	119
+ *		#define	SPI_MOSI_GPIO	120
+ *		#define	SPI_SCK_GPIO	121
+ *		#define	SPI_N_CHIPSEL	4
+ *		#include "spi_gpio.c"
+ */
+
+#ifndef DRIVER_NAME
+#define DRIVER_NAME	"spi_gpio"
+
+#define GENERIC_BITBANG	/* vs tight inlines */
+
+/* all functions referencing these symbols must define pdata */
+#define SPI_MISO_GPIO	((pdata)->miso)
+#define SPI_MOSI_GPIO	((pdata)->mosi)
+#define SPI_SCK_GPIO	((pdata)->sck)
+
+#define SPI_N_CHIPSEL	((pdata)->num_chipselect)
+
+#endif
+
+/*----------------------------------------------------------------------*/
+
+static inline const struct spi_gpio_platform_data * __pure
+spi_to_pdata(const struct spi_device *spi)
+{
+	const struct spi_bitbang	*bang;
+	const struct spi_gpio		*spi_gpio;
+
+	bang = spi_master_get_devdata(spi->master);
+	spi_gpio = container_of(bang, struct spi_gpio, bitbang);
+	return &spi_gpio->pdata;
+}
+
+/* this is #defined to avoid unused-variable warnings when inlining */
+#define pdata		spi_to_pdata(spi)
+
+static inline void setsck(const struct spi_device *spi, int is_on)
+{
+	gpio_set_value(SPI_SCK_GPIO, is_on);
+}
+
+static inline void setmosi(const struct spi_device *spi, int is_on)
+{
+	gpio_set_value(SPI_MOSI_GPIO, is_on);
+}
+
+static inline int getmiso(const struct spi_device *spi)
+{
+	return gpio_get_value(SPI_MISO_GPIO);
+}
+
+#undef pdata
+
+/*
+ * NOTE:  this clocks "as fast as we can".  It "should" be a function of the
+ * requested device clock.  Software overhead means we usually have trouble
+ * reaching even one Mbit/sec (except when we can inline bitops), so for now
+ * we'll just assume we never need additional per-bit slowdowns.
+ */
+#define spidelay(nsecs)	do {} while (0)
+
+#define	EXPAND_BITBANG_TXRX
+#include <linux/spi/spi_bitbang.h>
+
+/*
+ * These functions can leverage inline expansion of GPIO calls to shrink
+ * costs for a txrx bit, often by factors of around ten (by instruction
+ * count).  That is particularly visible for larger word sizes, but helps
+ * even with default 8-bit words.
+ *
+ * REVISIT overheads calling these functions for each word also have
+ * significant performance costs.  Having txrx_bufs() calls that inline
+ * the txrx_word() logic would help performance, e.g. on larger blocks
+ * used with flash storage or MMC/SD.  There should also be ways to make
+ * GCC be less stupid about reloading registers inside the I/O loops,
+ * even without inlined GPIO calls; __attribute__((hot)) on GCC 4.3?
+ */
+
+static u32 spi_gpio_txrx_word_mode0(struct spi_device *spi,
+		unsigned nsecs, u32 word, u8 bits)
+{
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, word, bits);
+}
+
+static u32 spi_gpio_txrx_word_mode1(struct spi_device *spi,
+		unsigned nsecs, u32 word, u8 bits)
+{
+	return bitbang_txrx_be_cpha1(spi, nsecs, 0, word, bits);
+}
+
+static u32 spi_gpio_txrx_word_mode2(struct spi_device *spi,
+		unsigned nsecs, u32 word, u8 bits)
+{
+	return bitbang_txrx_be_cpha0(spi, nsecs, 1, word, bits);
+}
+
+static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
+		unsigned nsecs, u32 word, u8 bits)
+{
+	return bitbang_txrx_be_cpha1(spi, nsecs, 1, word, bits);
+}
+
+/*----------------------------------------------------------------------*/
+
+static void spi_gpio_chipselect(struct spi_device *spi, int is_active)
+{
+	unsigned long cs = (unsigned long) spi->controller_data;
+
+	/* set initial clock polarity */
+	if (is_active)
+		setsck(spi, spi->mode & SPI_CPOL);
+
+	/* SPI is normally active-low */
+	gpio_set_value(cs, (spi->mode & SPI_CS_HIGH) ? is_active : !is_active);
+}
+
+static int spi_gpio_setup(struct spi_device *spi)
+{
+	unsigned long	cs = (unsigned long) spi->controller_data;
+	int		status = 0;
+
+	if (spi->bits_per_word > 32)
+		return -EINVAL;
+
+	if (!spi->controller_state) {
+		status = gpio_request(cs, spi->dev.bus_id);
+		if (status)
+			return status;
+		status = gpio_direction_output(cs, spi->mode & SPI_CS_HIGH);
+	}
+	if (!status)
+		status = spi_bitbang_setup(spi);
+	if (status) {
+		if (!spi->controller_state)
+			gpio_free(cs);
+	}
+	return status;
+}
+
+static void spi_gpio_cleanup(struct spi_device *spi)
+{
+	unsigned long	cs = (unsigned long) spi->controller_data;
+
+	gpio_free(cs);
+	spi_bitbang_cleanup(spi);
+}
+
+static int __init spi_gpio_alloc(unsigned pin, const char *label, bool is_in)
+{
+	int value;
+
+	value = gpio_request(pin, label);
+	if (value == 0) {
+		if (is_in)
+			value = gpio_direction_input(pin);
+		else
+			value = gpio_direction_output(pin, 0);
+	}
+	return value;
+}
+
+static int __init
+spi_gpio_request(struct spi_gpio_platform_data *pdata, const char *label)
+{
+	int value;
+
+	/* NOTE:  SPI_*_GPIO symbols may reference "pdata" */
+
+	value = spi_gpio_alloc(SPI_MOSI_GPIO, label, false);
+	if (value)
+		goto done;
+
+	value = spi_gpio_alloc(SPI_MISO_GPIO, label, true);
+	if (value)
+		goto free_mosi;
+
+	value = spi_gpio_alloc(SPI_SCK_GPIO, label, false);
+	if (value)
+		goto free_miso;
+
+	goto done;
+
+free_miso:
+	gpio_free(SPI_MISO_GPIO);
+free_mosi:
+	gpio_free(SPI_MOSI_GPIO);
+done:
+	return value;
+}
+
+static int __init spi_gpio_probe(struct platform_device *pdev)
+{
+	int				status;
+	struct spi_master		*master;
+	struct spi_gpio			*spi_gpio;
+	struct spi_gpio_platform_data	*pdata;
+
+	pdata = pdev->dev.platform_data;
+#ifdef GENERIC_BITBANG
+	if (!pdata || !pdata->num_chipselect)
+		return -ENODEV;
+#endif
+
+	status = spi_gpio_request(pdata, dev_name(&pdev->dev));
+	if (status < 0)
+		return status;
+
+	master = spi_alloc_master(&pdev->dev, sizeof *spi_gpio);
+	if (!master) {
+		status = -ENOMEM;
+		goto gpio_free;
+	}
+	spi_gpio = spi_master_get_devdata(master);
+	platform_set_drvdata(pdev, spi_gpio);
+
+	spi_gpio->pdev = pdev;
+	if (pdata)
+		spi_gpio->pdata = *pdata;
+
+	master->bus_num = pdev->id;
+	master->num_chipselect = SPI_N_CHIPSEL;
+	master->setup = spi_gpio_setup;
+	master->cleanup = spi_gpio_cleanup;
+
+	spi_gpio->bitbang.master = spi_master_get(master);
+	spi_gpio->bitbang.chipselect = spi_gpio_chipselect;
+	spi_gpio->bitbang.txrx_word[SPI_MODE_0] = spi_gpio_txrx_word_mode0;
+	spi_gpio->bitbang.txrx_word[SPI_MODE_1] = spi_gpio_txrx_word_mode1;
+	spi_gpio->bitbang.txrx_word[SPI_MODE_2] = spi_gpio_txrx_word_mode2;
+	spi_gpio->bitbang.txrx_word[SPI_MODE_3] = spi_gpio_txrx_word_mode3;
+	spi_gpio->bitbang.setup_transfer = spi_bitbang_setup_transfer;
+	spi_gpio->bitbang.flags = SPI_CS_HIGH;
+
+	status = spi_bitbang_start(&spi_gpio->bitbang);
+	if (status < 0) {
+		spi_master_put(spi_gpio->bitbang.master);
+gpio_free:
+		gpio_free(SPI_MISO_GPIO);
+		gpio_free(SPI_MOSI_GPIO);
+		gpio_free(SPI_SCK_GPIO);
+		spi_master_put(master);
+	}
+
+	return status;
+}
+
+static int __exit spi_gpio_remove(struct platform_device *pdev)
+{
+	struct spi_gpio			*spi_gpio;
+	struct spi_gpio_platform_data	*pdata;
+	int				status;
+
+	spi_gpio = platform_get_drvdata(pdev);
+	pdata = pdev->dev.platform_data;
+
+	/* stop() unregisters child devices too */
+	status = spi_bitbang_stop(&spi_gpio->bitbang);
+	spi_master_put(spi_gpio->bitbang.master);
+
+	platform_set_drvdata(pdev, NULL);
+
+	gpio_free(SPI_MISO_GPIO);
+	gpio_free(SPI_MOSI_GPIO);
+	gpio_free(SPI_SCK_GPIO);
+
+	return status;
+}
+
+MODULE_ALIAS("platform:" DRIVER_NAME);
+
+static struct platform_driver spi_gpio_driver = {
+	.driver.name	= DRIVER_NAME,
+	.driver.owner	= THIS_MODULE,
+	.remove		= __exit_p(spi_gpio_remove),
+};
+
+static int __init spi_gpio_init(void)
+{
+	return platform_driver_probe(&spi_gpio_driver, spi_gpio_probe);
+}
+module_init(spi_gpio_init);
+
+static void __exit spi_gpio_exit(void)
+{
+	platform_driver_unregister(&spi_gpio_driver);
+}
+module_exit(spi_gpio_exit);
+
+
+MODULE_DESCRIPTION("SPI master driver using generic bitbanged GPIO ");
+MODULE_AUTHOR("David Brownell");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 256d183..b3ebc1d 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -19,6 +19,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/platform_device.h>
+#include <linux/gpio.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
@@ -27,7 +28,6 @@
 #include <asm/dma.h>
 #include <mach/hardware.h>
 
-#include <mach/regs-gpio.h>
 #include <plat/regs-spi.h>
 #include <mach/spi.h>
 
@@ -66,7 +66,7 @@
 
 static void s3c24xx_spi_gpiocs(struct s3c2410_spi_info *spi, int cs, int pol)
 {
-	s3c2410_gpio_setpin(spi->pin_cs, pol);
+	gpio_set_value(spi->pin_cs, pol);
 }
 
 static void s3c24xx_spi_chipsel(struct spi_device *spi, int value)
@@ -248,8 +248,13 @@
 	writeb(SPPIN_DEFAULT, hw->regs + S3C2410_SPPIN);
 	writeb(SPCON_DEFAULT, hw->regs + S3C2410_SPCON);
 
-	if (hw->pdata && hw->pdata->gpio_setup)
-		hw->pdata->gpio_setup(hw->pdata, 1);
+	if (hw->pdata) {
+		if (hw->set_cs == s3c24xx_spi_gpiocs)
+			gpio_direction_output(hw->pdata->pin_cs, 1);
+
+		if (hw->pdata->gpio_setup)
+			hw->pdata->gpio_setup(hw->pdata, 1);
+	}
 }
 
 static int __init s3c24xx_spi_probe(struct platform_device *pdev)
@@ -343,18 +348,27 @@
 		goto err_no_clk;
 	}
 
-	s3c24xx_spi_initialsetup(hw);
-
 	/* setup any gpio we can */
 
 	if (!pdata->set_cs) {
-		hw->set_cs = s3c24xx_spi_gpiocs;
+		if (pdata->pin_cs < 0) {
+			dev_err(&pdev->dev, "No chipselect pin\n");
+			goto err_register;
+		}
 
-		s3c2410_gpio_setpin(pdata->pin_cs, 1);
-		s3c2410_gpio_cfgpin(pdata->pin_cs, S3C2410_GPIO_OUTPUT);
+		err = gpio_request(pdata->pin_cs, dev_name(&pdev->dev));
+		if (err) {
+			dev_err(&pdev->dev, "Failed to get gpio for cs\n");
+			goto err_register;
+		}
+
+		hw->set_cs = s3c24xx_spi_gpiocs;
+		gpio_direction_output(pdata->pin_cs, 1);
 	} else
 		hw->set_cs = pdata->set_cs;
 
+	s3c24xx_spi_initialsetup(hw);
+
 	/* register our spi controller */
 
 	err = spi_bitbang_start(&hw->bitbang);
@@ -366,6 +380,9 @@
 	return 0;
 
  err_register:
+	if (hw->set_cs == s3c24xx_spi_gpiocs)
+		gpio_free(pdata->pin_cs);
+
 	clk_disable(hw->clk);
 	clk_put(hw->clk);
 
@@ -401,6 +418,9 @@
 	free_irq(hw->irq, hw);
 	iounmap(hw->regs);
 
+	if (hw->set_cs == s3c24xx_spi_gpiocs)
+		gpio_free(hw->pdata->pin_cs);
+
 	release_resource(hw->ioarea);
 	kfree(hw->ioarea);
 
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 243ea4a..db16112 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -2051,7 +2051,7 @@
 
 	/* Virtualize mmio region */
 	info->fix.mmio_start = reg_addr;
-	par->regbase = ioremap(reg_addr, pci_resource_len(pdev, 2));
+	par->regbase = pci_ioremap_bar(pdev, 2);
 	if (!par->regbase)
 		goto err_free_info;
 
diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c
index 7d1b819..a9b3ada 100644
--- a/drivers/video/bfin-t350mcqb-fb.c
+++ b/drivers/video/bfin-t350mcqb-fb.c
@@ -255,7 +255,7 @@
 {
 
 	if (var->bits_per_pixel != LCD_BPP) {
-		pr_debug("%s: depth not supported: %u BPP\n", __FUNCTION__,
+		pr_debug("%s: depth not supported: %u BPP\n", __func__,
 			 var->bits_per_pixel);
 		return -EINVAL;
 	}
@@ -264,7 +264,7 @@
 	    info->var.xres_virtual != var->xres_virtual ||
 	    info->var.yres_virtual != var->yres_virtual) {
 		pr_debug("%s: Resolution not supported: X%u x Y%u \n",
-			 __FUNCTION__, var->xres, var->yres);
+			 __func__, var->xres, var->yres);
 		return -EINVAL;
 	}
 
@@ -274,7 +274,7 @@
 
 	if ((info->fix.line_length * var->yres_virtual) > info->fix.smem_len) {
 		pr_debug("%s: Memory Limit requested yres_virtual = %u\n",
-			 __FUNCTION__, var->yres_virtual);
+			 __func__, var->yres_virtual);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/video/carminefb.c b/drivers/video/carminefb.c
index c9b1913..c7ff3c1 100644
--- a/drivers/video/carminefb.c
+++ b/drivers/video/carminefb.c
@@ -168,7 +168,7 @@
 	blue >>= 8;
 	transp >>= 8;
 
-	((u32 *)info->pseudo_palette)[regno] = be32_to_cpu(transp << 24 |
+	((__be32 *)info->pseudo_palette)[regno] = cpu_to_be32(transp << 24 |
 		red << 0 | green << 8 | blue << 16);
 	return 0;
 }
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 39d5d64..7a9e42e 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -1583,8 +1583,7 @@
 		goto failed_release;
 
 	cfb->dev = dev;
-	cfb->region = ioremap(pci_resource_start(dev, 0),
-			      pci_resource_len(dev, 0));
+	cfb->region = pci_ioremap_bar(dev, 0);
 	if (!cfb->region)
 		goto failed_ioremap;
 
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 3c65b0d..756efeb 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -510,6 +510,10 @@
 		fb_logo_ex_num = 0;
 
 	for (i = 0; i < fb_logo_ex_num; i++) {
+		if (fb_logo_ex[i].logo->type != fb_logo.logo->type) {
+			fb_logo_ex[i].logo = NULL;
+			continue;
+		}
 		height += fb_logo_ex[i].logo->height;
 		if (height > yres) {
 			height -= fb_logo_ex[i].logo->height;
diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c
index f89c3cc..fe5b519 100644
--- a/drivers/video/gbefb.c
+++ b/drivers/video/gbefb.c
@@ -912,6 +912,7 @@
 {
 	unsigned int line_length;
 	struct gbe_timing_info timing;
+	int ret;
 
 	/* Limit bpp to 8, 16, and 32 */
 	if (var->bits_per_pixel <= 8)
@@ -930,8 +931,10 @@
 
 	var->grayscale = 0;	/* No grayscale for now */
 
-	if ((var->pixclock = compute_gbe_timing(var, &timing)) < 0)
-		return(-EINVAL);
+	ret = compute_gbe_timing(var, &timing);
+	var->pixclock = ret;
+	if (ret < 0)
+		return -EINVAL;
 
 	/* Adjust virtual resolution, if necessary */
 	if (var->xres > var->xres_virtual || (!ywrap && !ypan))
diff --git a/drivers/video/geode/gx1fb_core.c b/drivers/video/geode/gx1fb_core.c
index bb20a22..751e491 100644
--- a/drivers/video/geode/gx1fb_core.c
+++ b/drivers/video/geode/gx1fb_core.c
@@ -217,8 +217,7 @@
 	ret = pci_request_region(dev, 0, "gx1fb (video)");
 	if (ret < 0)
 		return ret;
-	par->vid_regs = ioremap(pci_resource_start(dev, 0),
-				pci_resource_len(dev, 0));
+	par->vid_regs = pci_ioremap_bar(dev, 0);
 	if (!par->vid_regs)
 		return -ENOMEM;
 
diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c
index de2b8f9..4841189 100644
--- a/drivers/video/geode/gxfb_core.c
+++ b/drivers/video/geode/gxfb_core.c
@@ -242,23 +242,21 @@
 	ret = pci_request_region(dev, 3, "gxfb (video processor)");
 	if (ret < 0)
 		return ret;
-	par->vid_regs = ioremap(pci_resource_start(dev, 3),
-				pci_resource_len(dev, 3));
+	par->vid_regs = pci_ioremap_bar(dev, 3);
 	if (!par->vid_regs)
 		return -ENOMEM;
 
 	ret = pci_request_region(dev, 2, "gxfb (display controller)");
 	if (ret < 0)
 		return ret;
-	par->dc_regs = ioremap(pci_resource_start(dev, 2), pci_resource_len(dev, 2));
+	par->dc_regs = pci_ioremap_bar(dev, 2);
 	if (!par->dc_regs)
 		return -ENOMEM;
 
 	ret = pci_request_region(dev, 1, "gxfb (graphics processor)");
 	if (ret < 0)
 		return ret;
-	par->gp_regs = ioremap(pci_resource_start(dev, 1),
-	pci_resource_len(dev, 1));
+	par->gp_regs = pci_ioremap_bar(dev, 1);
 
 	if (!par->gp_regs)
 		return -ENOMEM;
diff --git a/drivers/video/geode/lxfb_core.c b/drivers/video/geode/lxfb_core.c
index 2cd9b74..b965ecd 100644
--- a/drivers/video/geode/lxfb_core.c
+++ b/drivers/video/geode/lxfb_core.c
@@ -379,20 +379,17 @@
 	if (info->screen_base == NULL)
 		return ret;
 
-	par->gp_regs = ioremap(pci_resource_start(dev, 1),
-				pci_resource_len(dev, 1));
+	par->gp_regs = pci_ioremap_bar(dev, 1);
 
 	if (par->gp_regs == NULL)
 		return ret;
 
-	par->dc_regs = ioremap(pci_resource_start(dev, 2),
-			       pci_resource_len(dev, 2));
+	par->dc_regs = pci_ioremap_bar(dev, 2);
 
 	if (par->dc_regs == NULL)
 		return ret;
 
-	par->vp_regs = ioremap(pci_resource_start(dev, 3),
-			       pci_resource_len(dev, 3));
+	par->vp_regs = pci_ioremap_bar(dev, 3);
 
 	if (par->vp_regs == NULL)
 		return ret;
diff --git a/drivers/video/gxt4500.c b/drivers/video/gxt4500.c
index 5645577..896e53d 100644
--- a/drivers/video/gxt4500.c
+++ b/drivers/video/gxt4500.c
@@ -648,7 +648,7 @@
 	info->pseudo_palette = par->pseudo_palette;
 
 	info->fix.mmio_start = reg_phys;
-	par->regs = ioremap(reg_phys, pci_resource_len(pdev, 0));
+	par->regs = pci_ioremap_bar(pdev, 0);
 	if (!par->regs) {
 		dev_err(&pdev->dev, "gxt4500: cannot map registers\n");
 		goto err_free_all;
@@ -656,7 +656,7 @@
 
 	info->fix.smem_start = fb_phys;
 	info->fix.smem_len = pci_resource_len(pdev, 1);
-	info->screen_base = ioremap(fb_phys, pci_resource_len(pdev, 1));
+	info->screen_base = pci_ioremap_bar(pdev, 1);
 	if (!info->screen_base) {
 		dev_err(&pdev->dev, "gxt4500: cannot map framebuffer\n");
 		goto err_unmap_regs;
diff --git a/drivers/video/i810/i810_accel.c b/drivers/video/i810/i810_accel.c
index 76764ea..f5bedee 100644
--- a/drivers/video/i810/i810_accel.c
+++ b/drivers/video/i810/i810_accel.c
@@ -301,8 +301,10 @@
 	u32 dx, dy, width, height, dest, rop = 0, color = 0;
 
 	if (!info->var.accel_flags || par->dev_flags & LOCKUP ||
-	    par->depth == 4) 
-		return cfb_fillrect(info, rect);
+	    par->depth == 4) {
+		cfb_fillrect(info, rect);
+		return;
+	}
 
 	if (par->depth == 1) 
 		color = rect->color;
@@ -327,8 +329,10 @@
 	u32 sx, sy, dx, dy, pitch, width, height, src, dest, xdir;
 
 	if (!info->var.accel_flags || par->dev_flags & LOCKUP ||
-	    par->depth == 4)
-		return cfb_copyarea(info, region);
+	    par->depth == 4) {
+		cfb_copyarea(info, region);
+		return;
+	}
 
 	dx = region->dx * par->depth;
 	sx = region->sx * par->depth;
@@ -366,8 +370,10 @@
 	u32 fg = 0, bg = 0, size, dst;
 	
 	if (!info->var.accel_flags || par->dev_flags & LOCKUP ||
-	    par->depth == 4 || image->depth != 1) 
-		return cfb_imageblit(info, image);
+	    par->depth == 4 || image->depth != 1) {
+		cfb_imageblit(info, image);
+		return;
+	}
 
 	switch (info->var.bits_per_pixel) {
 	case 8:
diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c
index a09e236..6d8e541 100644
--- a/drivers/video/intelfb/intelfbdrv.c
+++ b/drivers/video/intelfb/intelfbdrv.c
@@ -1493,8 +1493,10 @@
 	DBG_MSG("intelfb_fillrect\n");
 #endif
 
-	if (!ACCEL(dinfo, info) || dinfo->depth == 4)
-		return cfb_fillrect(info, rect);
+	if (!ACCEL(dinfo, info) || dinfo->depth == 4) {
+		cfb_fillrect(info, rect);
+		return;
+	}
 
 	if (rect->rop == ROP_COPY)
 		rop = PAT_ROP_GXCOPY;
@@ -1521,8 +1523,10 @@
 	DBG_MSG("intelfb_copyarea\n");
 #endif
 
-	if (!ACCEL(dinfo, info) || dinfo->depth == 4)
-		return cfb_copyarea(info, region);
+	if (!ACCEL(dinfo, info) || dinfo->depth == 4) {
+		cfb_copyarea(info, region);
+		return;
+	}
 
 	intelfbhw_do_bitblt(dinfo, region->sx, region->sy, region->dx,
 			    region->dy, region->width, region->height,
@@ -1540,8 +1544,10 @@
 #endif
 
 	if (!ACCEL(dinfo, info) || dinfo->depth == 4
-	    || image->depth != 1)
-		return cfb_imageblit(info, image);
+	    || image->depth != 1) {
+		cfb_imageblit(info, image);
+		return;
+	}
 
 	if (dinfo->depth != 8) {
 		fgcolor = dinfo->pseudo_palette[image->fg_color];
@@ -1554,8 +1560,10 @@
 	if (!intelfbhw_do_drawglyph(dinfo, fgcolor, bgcolor, image->width,
 				    image->height, image->data,
 				    image->dx, image->dy,
-				    dinfo->pitch, info->var.bits_per_pixel))
-		return cfb_imageblit(info, image);
+				    dinfo->pitch, info->var.bits_per_pixel)) {
+		cfb_imageblit(info, image);
+		return;
+	}
 }
 
 static int intelfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c
index d3c3af5..1618624 100644
--- a/drivers/video/modedb.c
+++ b/drivers/video/modedb.c
@@ -329,7 +329,7 @@
 	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
 	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
 	/* 17 1152x864-75 VESA */
-	{ NULL, 75, 1153, 864, 9259, 256, 64, 32, 1, 128, 3,
+	{ NULL, 75, 1152, 864, 9259, 256, 64, 32, 1, 128, 3,
 	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
 	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
 	/* 18 1280x960-60 VESA */
diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index bfb802d..588527a 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -1453,7 +1453,8 @@
 			 * is less than 16 bits wide. This is due to insufficient
 			 * padding when writing the image. We need to adjust
 			 * struct fb_pixmap. Not yet done. */
-			return cfb_imageblit(info, image);
+			cfb_imageblit(info, image);
+			return;
 		}
 		bltCntl_flags = NEO_BC0_SRC_MONO;
 	} else if (image->depth == info->var.bits_per_pixel) {
@@ -1461,7 +1462,8 @@
 	} else {
 		/* We don't currently support hardware acceleration if image
 		 * depth is different from display */
-		return cfb_imageblit(info, image);
+		cfb_imageblit(info, image);
+		return;
 	}
 
 	switch (info->var.bits_per_pixel) {
diff --git a/drivers/video/nvidia/nv_accel.c b/drivers/video/nvidia/nv_accel.c
index fa4821c..ad6472a 100644
--- a/drivers/video/nvidia/nv_accel.c
+++ b/drivers/video/nvidia/nv_accel.c
@@ -300,8 +300,10 @@
 	if (info->state != FBINFO_STATE_RUNNING)
 		return;
 
-	if (par->lockup)
-		return cfb_copyarea(info, region);
+	if (par->lockup) {
+		cfb_copyarea(info, region);
+		return;
+	}
 
 	NVDmaStart(info, par, BLIT_POINT_SRC, 3);
 	NVDmaNext(par, (region->sy << 16) | region->sx);
@@ -319,8 +321,10 @@
 	if (info->state != FBINFO_STATE_RUNNING)
 		return;
 
-	if (par->lockup)
-		return cfb_fillrect(info, rect);
+	if (par->lockup) {
+		cfb_fillrect(info, rect);
+		return;
+	}
 
 	if (info->var.bits_per_pixel == 8)
 		color = rect->color;
diff --git a/drivers/video/pm3fb.c b/drivers/video/pm3fb.c
index 68089d1..6666f45 100644
--- a/drivers/video/pm3fb.c
+++ b/drivers/video/pm3fb.c
@@ -539,8 +539,10 @@
 		bgx = par->palette[image->bg_color];
 		break;
 	}
-	if (image->depth != 1)
-		return cfb_imageblit(info, image);
+	if (image->depth != 1) {
+		cfb_imageblit(info, image);
+		return;
+	}
 
 	if (info->var.bits_per_pixel == 8) {
 		fgx |= fgx << 8;
diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index f94ae84..dcd9879 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -159,6 +159,9 @@
 		break;
 
 	case SM501_MEMF_PANEL:
+		if (size > inf->fbmem_len)
+			return -ENOMEM;
+
 		ptr = inf->fbmem_len - size;
 		fbi = inf->fb[HEAD_CRT];
 
@@ -172,9 +175,6 @@
 		if (fbi && ptr < fbi->fix.smem_len)
 			return -ENOMEM;
 
-		if (ptr < 0)
-			return -ENOMEM;
-
 		break;
 
 	case SM501_MEMF_CRT:
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index e21fe5b..37b433a0 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -870,8 +870,10 @@
 	u32 col = 0, rop = 0;
 	int pitch;
 
-	if (!viafb_accel)
-		return cfb_fillrect(info, rect);
+	if (!viafb_accel) {
+		cfb_fillrect(info, rect);
+		return;
+	}
 
 	if (!rect->width || !rect->height)
 		return;
@@ -937,8 +939,10 @@
 
 	DEBUG_MSG(KERN_INFO "viafb_copyarea!!\n");
 
-	if (!viafb_accel)
-		return cfb_copyarea(info, area);
+	if (!viafb_accel) {
+		cfb_copyarea(info, area);
+		return;
+	}
 
 	if (!area->width || !area->height)
 		return;
@@ -994,8 +998,10 @@
 	int i;
 	int pitch;
 
-	if (!viafb_accel)
-		return cfb_imageblit(info, image);
+	if (!viafb_accel) {
+		cfb_imageblit(info, image);
+		return;
+	}
 
 	udata = (u32 *) image->data;
 
diff --git a/firmware/dsp56k/bootstrap.asm b/firmware/dsp56k/bootstrap.asm
index 10d8919..a411047 100644
--- a/firmware/dsp56k/bootstrap.asm
+++ b/firmware/dsp56k/bootstrap.asm
@@ -51,19 +51,19 @@
         ; Copy DSP program control
         move    #real,r0
         move    #upload,r1
-        do      #upload_end-upload,<_copy
-        move    P:(r0)+,x0
-        move    x0,P:(r1)+
-_copy   movep   #>4,X:<<M_HCR
-        movep   #>$c00,X:<<M_IPR
+        do      #upload_end-upload,_copy
+        movem    P:(r0)+,x0
+        movem    x0,P:(r1)+
+_copy   movep   #4,X:<<M_HCR
+        movep   #$c00,X:<<M_IPR
         and     #<$fe,mr
         jmp     upload
 
 real
         org     P:$7ea9
 upload
-        movep   #>1,X:<<M_PBC
-        movep   #>0,X:<<M_BCR
+        movep   #1,X:<<M_PBC
+        movep   #0,X:<<M_BCR
 
 next    jclr    #0,X:<<M_HSR,*
         movep   X:<<M_HRX,A
@@ -81,18 +81,18 @@
         cmp     x0,A
         jeq     load_Y
 
-load_P  do      y0,_load
+load_P  do      y0,_load_P
         jclr    #0,X:<<M_HSR,*
         movep   X:<<M_HRX,P:(r0)+
-_load   jmp     next
-load_X  do      y0,_load
+_load_P jmp     next
+load_X  do      y0,_load_X
         jclr    #0,X:<<M_HSR,*
         movep   X:<<M_HRX,X:(r0)+
-_load   jmp     next
-load_Y  do      y0,_load
+_load_X jmp     next
+load_Y  do      y0,_load_Y
         jclr    #0,X:<<M_HSR,*
         movep   X:<<M_HRX,Y:(r0)+
-_load   jmp     next
+_load_Y jmp     next
 
 upload_end
         end
diff --git a/fs/Kconfig b/fs/Kconfig
index f9b6e29..3288358 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -721,7 +721,20 @@
 
 endmenu
 
-menu "Miscellaneous filesystems"
+menuconfig MISC_FILESYSTEMS
+	bool "Miscellaneous filesystems"
+	default y
+	---help---
+	  Say Y here to get to see options for various miscellaneous
+	  filesystems, such as filesystems that came from other
+	  operating systems.
+
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled; if unsure, say Y here.
+
+if MISC_FILESYSTEMS
 
 config ADFS_FS
 	tristate "ADFS file system support (EXPERIMENTAL)"
@@ -1091,7 +1104,7 @@
 	  Y here.  This will result in _many_ additional debugging messages to be
 	  written to the system log.
 
-endmenu
+endif # MISC_FILESYSTEMS
 
 menuconfig NETWORK_FILESYSTEMS
 	bool "Network File Systems"
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index e0f16da..a768031 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -25,8 +25,6 @@
 #define AUTOFS_DEV_IOCTL_IOC_FIRST	(AUTOFS_DEV_IOCTL_VERSION)
 #define AUTOFS_DEV_IOCTL_IOC_COUNT	(AUTOFS_IOC_COUNT - 11)
 
-#define AUTOFS_TYPE_TRIGGER	(AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)
-
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/time.h>
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 63b7c7a..025e105 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -124,7 +124,7 @@
 
 /*
  * Check sanity of parameter control fields and if a path is present
- * check that it has a "/" and is terminated.
+ * check that it is terminated and contains at least one "/".
  */
 static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
 {
@@ -138,15 +138,16 @@
 	}
 
 	if (param->size > sizeof(*param)) {
-		err = check_name(param->path);
+		err = invalid_str(param->path,
+				 (void *) ((size_t) param + param->size));
 		if (err) {
-			AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
-				    cmd);
+			AUTOFS_WARN(
+			  "path string terminator missing for cmd(0x%08x)",
+			  cmd);
 			goto out;
 		}
 
-		err = invalid_str(param->path,
-				 (void *) ((size_t) param + param->size));
+		err = check_name(param->path);
 		if (err) {
 			AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
 				    cmd);
@@ -180,7 +181,7 @@
 				     struct autofs_sb_info *sbi,
 				     struct autofs_dev_ioctl *param)
 {
-	param->arg1 = sbi->version;
+	param->protover.version = sbi->version;
 	return 0;
 }
 
@@ -189,7 +190,7 @@
 					struct autofs_sb_info *sbi,
 					struct autofs_dev_ioctl *param)
 {
-	param->arg1 = sbi->sub_version;
+	param->protosubver.sub_version = sbi->sub_version;
 	return 0;
 }
 
@@ -335,13 +336,13 @@
 	int err, fd;
 
 	/* param->path has already been checked */
-	if (!param->arg1)
+	if (!param->openmount.devid)
 		return -EINVAL;
 
 	param->ioctlfd = -1;
 
 	path = param->path;
-	devid = param->arg1;
+	devid = param->openmount.devid;
 
 	err = 0;
 	fd = autofs_dev_ioctl_open_mountpoint(path, devid);
@@ -373,7 +374,7 @@
 {
 	autofs_wqt_t token;
 
-	token = (autofs_wqt_t) param->arg1;
+	token = (autofs_wqt_t) param->ready.token;
 	return autofs4_wait_release(sbi, token, 0);
 }
 
@@ -388,8 +389,8 @@
 	autofs_wqt_t token;
 	int status;
 
-	token = (autofs_wqt_t) param->arg1;
-	status = param->arg2 ? param->arg2 : -ENOENT;
+	token = (autofs_wqt_t) param->fail.token;
+	status = param->fail.status ? param->fail.status : -ENOENT;
 	return autofs4_wait_release(sbi, token, status);
 }
 
@@ -412,10 +413,10 @@
 	int pipefd;
 	int err = 0;
 
-	if (param->arg1 == -1)
+	if (param->setpipefd.pipefd == -1)
 		return -EINVAL;
 
-	pipefd = param->arg1;
+	pipefd = param->setpipefd.pipefd;
 
 	mutex_lock(&sbi->wq_mutex);
 	if (!sbi->catatonic) {
@@ -457,8 +458,8 @@
 {
 	unsigned long timeout;
 
-	timeout = param->arg1;
-	param->arg1 = sbi->exp_timeout / HZ;
+	timeout = param->timeout.timeout;
+	param->timeout.timeout = sbi->exp_timeout / HZ;
 	sbi->exp_timeout = timeout * HZ;
 	return 0;
 }
@@ -489,7 +490,7 @@
 	path = param->path;
 	devid = sbi->sb->s_dev;
 
-	param->arg1 = param->arg2 = -1;
+	param->requester.uid = param->requester.gid = -1;
 
 	/* Get nameidata of the parent directory */
 	err = path_lookup(path, LOOKUP_PARENT, &nd);
@@ -505,8 +506,8 @@
 		err = 0;
 		autofs4_expire_wait(nd.path.dentry);
 		spin_lock(&sbi->fs_lock);
-		param->arg1 = ino->uid;
-		param->arg2 = ino->gid;
+		param->requester.uid = ino->uid;
+		param->requester.gid = ino->gid;
 		spin_unlock(&sbi->fs_lock);
 	}
 
@@ -529,10 +530,10 @@
 	int err = -EAGAIN;
 	int how;
 
-	how = param->arg1;
+	how = param->expire.how;
 	mnt = fp->f_path.mnt;
 
-	if (sbi->type & AUTOFS_TYPE_TRIGGER)
+	if (autofs_type_trigger(sbi->type))
 		dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how);
 	else
 		dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how);
@@ -565,9 +566,9 @@
 				      struct autofs_sb_info *sbi,
 				      struct autofs_dev_ioctl *param)
 {
-	param->arg1 = 0;
+	param->askumount.may_umount = 0;
 	if (may_umount(fp->f_path.mnt))
-		param->arg1 = 1;
+		param->askumount.may_umount = 1;
 	return 0;
 }
 
@@ -600,6 +601,7 @@
 	struct nameidata nd;
 	const char *path;
 	unsigned int type;
+	unsigned int devid, magic;
 	int err = -ENOENT;
 
 	if (param->size <= sizeof(*param)) {
@@ -608,13 +610,13 @@
 	}
 
 	path = param->path;
-	type = param->arg1;
+	type = param->ismountpoint.in.type;
 
-	param->arg1 = 0;
-	param->arg2 = 0;
+	param->ismountpoint.out.devid = devid = 0;
+	param->ismountpoint.out.magic = magic = 0;
 
 	if (!fp || param->ioctlfd == -1) {
-		if (type == AUTOFS_TYPE_ANY) {
+		if (autofs_type_any(type)) {
 			struct super_block *sb;
 
 			err = path_lookup(path, LOOKUP_FOLLOW, &nd);
@@ -622,7 +624,7 @@
 				goto out;
 
 			sb = nd.path.dentry->d_sb;
-			param->arg1 = new_encode_dev(sb->s_dev);
+			devid = new_encode_dev(sb->s_dev);
 		} else {
 			struct autofs_info *ino;
 
@@ -635,38 +637,41 @@
 				goto out_release;
 
 			ino = autofs4_dentry_ino(nd.path.dentry);
-			param->arg1 = autofs4_get_dev(ino->sbi);
+			devid = autofs4_get_dev(ino->sbi);
 		}
 
 		err = 0;
 		if (nd.path.dentry->d_inode &&
 		    nd.path.mnt->mnt_root == nd.path.dentry) {
 			err = 1;
-			param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic;
+			magic = nd.path.dentry->d_inode->i_sb->s_magic;
 		}
 	} else {
-		dev_t devid = new_encode_dev(sbi->sb->s_dev);
+		dev_t dev = autofs4_get_dev(sbi);
 
 		err = path_lookup(path, LOOKUP_PARENT, &nd);
 		if (err)
 			goto out;
 
-		err = autofs_dev_ioctl_find_super(&nd, devid);
+		err = autofs_dev_ioctl_find_super(&nd, dev);
 		if (err)
 			goto out_release;
 
-		param->arg1 = autofs4_get_dev(sbi);
+		devid = dev;
 
 		err = have_submounts(nd.path.dentry);
 
 		if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
 			if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
 				struct inode *inode = nd.path.dentry->d_inode;
-				param->arg2 = inode->i_sb->s_magic;
+				magic = inode->i_sb->s_magic;
 			}
 		}
 	}
 
+	param->ismountpoint.out.devid = devid;
+	param->ismountpoint.out.magic = magic;
+
 out_release:
 	path_put(&nd.path);
 out:
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 4b6fb3f..e3bd507 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -63,7 +63,7 @@
 		struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 
 		/* This is an autofs submount, we can't expire it */
-		if (sbi->type == AUTOFS_TYPE_INDIRECT)
+		if (autofs_type_indirect(sbi->type))
 			goto done;
 
 		/*
@@ -490,7 +490,7 @@
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 
-	if (sbi->type & AUTOFS_TYPE_TRIGGER)
+	if (autofs_type_trigger(sbi->type))
 		dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
 	else
 		dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index cfc23e5..716e12b 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -197,9 +197,9 @@
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
 	seq_printf(m, ",maxproto=%d", sbi->max_proto);
 
-	if (sbi->type & AUTOFS_TYPE_OFFSET)
+	if (autofs_type_offset(sbi->type))
 		seq_printf(m, ",offset");
-	else if (sbi->type & AUTOFS_TYPE_DIRECT)
+	else if (autofs_type_direct(sbi->type))
 		seq_printf(m, ",direct");
 	else
 		seq_printf(m, ",indirect");
@@ -284,13 +284,13 @@
 			*maxproto = option;
 			break;
 		case Opt_indirect:
-			*type = AUTOFS_TYPE_INDIRECT;
+			set_autofs_type_indirect(type);
 			break;
 		case Opt_direct:
-			*type = AUTOFS_TYPE_DIRECT;
+			set_autofs_type_direct(type);
 			break;
 		case Opt_offset:
-			*type = AUTOFS_TYPE_OFFSET;
+			set_autofs_type_offset(type);
 			break;
 		default:
 			return 1;
@@ -338,7 +338,7 @@
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
-	sbi->type = AUTOFS_TYPE_INDIRECT;
+	set_autofs_type_indirect(&sbi->type);
 	sbi->min_proto = 0;
 	sbi->max_proto = 0;
 	mutex_init(&sbi->wq_mutex);
@@ -380,7 +380,7 @@
 	}
 
 	root_inode->i_fop = &autofs4_root_operations;
-	root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ?
+	root_inode->i_op = autofs_type_trigger(sbi->type) ?
 			&autofs4_direct_root_inode_operations :
 			&autofs4_indirect_root_inode_operations;
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index e02cc8a..eeb2468 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -337,7 +337,7 @@
 		 * is very similar for indirect mounts except only dentrys
 		 * in the root of the autofs file system may be negative.
 		 */
-		if (sbi->type & AUTOFS_TYPE_TRIGGER)
+		if (autofs_type_trigger(sbi->type))
 			return -ENOENT;
 		else if (!IS_ROOT(dentry->d_parent))
 			return -ENOENT;
@@ -348,7 +348,7 @@
 		return -ENOMEM;
 
 	/* If this is a direct mount request create a dummy name */
-	if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER)
+	if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type))
 		qstr.len = sprintf(name, "%p", dentry);
 	else {
 		qstr.len = autofs4_getpath(sbi, dentry, &name);
@@ -406,11 +406,11 @@
 				type = autofs_ptype_expire_multi;
 		} else {
 			if (notify == NFY_MOUNT)
-				type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
+				type = autofs_type_trigger(sbi->type) ?
 					autofs_ptype_missing_direct :
 					 autofs_ptype_missing_indirect;
 			else
-				type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
+				type = autofs_type_trigger(sbi->type) ?
 					autofs_ptype_expire_direct :
 					autofs_ptype_expire_indirect;
 		}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 0ed57b5..cc4062d 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -213,6 +213,9 @@
 {
 	struct bfs_sb_info *info = BFS_SB(s);
 
+	if (!info)
+		return;
+
 	brelse(info->si_sbh);
 	mutex_destroy(&info->bfs_lock);
 	kfree(info->si_imap);
@@ -327,6 +330,7 @@
 	unsigned i, imap_len;
 	struct bfs_sb_info *info;
 	long ret = -EINVAL;
+	unsigned long i_sblock, i_eblock, i_eoff, s_size;
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
@@ -350,6 +354,12 @@
 
 	s->s_magic = BFS_MAGIC;
 	info->si_sbh = bh;
+
+	if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
+		printf("Superblock is corrupted\n");
+		goto out;
+	}
+
 	info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
 					sizeof(struct bfs_inode)
 					+ BFS_ROOT_INO - 1;
@@ -380,6 +390,18 @@
 			- le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS;
 	info->si_freei = 0;
 	info->si_lf_eblk = 0;
+
+	/* can we read the last block? */
+	bh = sb_bread(s, info->si_blocks - 1);
+	if (!bh) {
+		printf("Last block not available: %lu\n", info->si_blocks - 1);
+		iput(inode);
+		ret = -EIO;
+		kfree(info->si_imap);
+		goto out;
+	}
+	brelse(bh);
+
 	bh = NULL;
 	for (i = BFS_ROOT_INO; i <= info->si_lasti; i++) {
 		struct bfs_inode *di;
@@ -397,6 +419,29 @@
 
 		di = (struct bfs_inode *)bh->b_data + off;
 
+		/* test if filesystem is not corrupted */
+
+		i_eoff = le32_to_cpu(di->i_eoffset);
+		i_sblock = le32_to_cpu(di->i_sblock);
+		i_eblock = le32_to_cpu(di->i_eblock);
+		s_size = le32_to_cpu(bfs_sb->s_end);
+
+		if (i_sblock > info->si_blocks ||
+			i_eblock > info->si_blocks ||
+			i_sblock > i_eblock ||
+			i_eoff > s_size ||
+			i_sblock * BFS_BSIZE > i_eoff) {
+
+			printf("Inode 0x%08x corrupted\n", i);
+
+			brelse(bh);
+			s->s_root = NULL;
+			kfree(info->si_imap);
+			kfree(info);
+			s->s_fs_info = NULL;
+			return -EIO;
+		}
+
 		if (!di->i_ino) {
 			info->si_freei++;
 			continue;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e1158cb..c4e8353 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -649,7 +649,7 @@
 static ssize_t
 bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
-	char *s = enabled ? "enabled" : "disabled";
+	char *s = enabled ? "enabled\n" : "disabled\n";
 
 	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 349a26c..b957717 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1262,7 +1262,7 @@
 
 /**
  * lookup_bdev  - lookup a struct block_device by name
- * @path:	special file representing the block device
+ * @pathname:	special file representing the block device
  *
  * Get a reference to the blockdevice at @pathname in the current
  * namespace if possible and return it.  Return ERR_PTR(error)
diff --git a/fs/buffer.c b/fs/buffer.c
index a13f09b..c26da78 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2022,7 +2022,6 @@
 			if (pos + len > inode->i_size)
 				vmtruncate(inode, inode->i_size);
 		}
-		goto out;
 	}
 
 out:
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 700697a..38f7122 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -120,7 +120,7 @@
 	cd->major = major;
 	cd->baseminor = baseminor;
 	cd->minorct = minorct;
-	strncpy(cd->name,name, 64);
+	strlcpy(cd->name, name, sizeof(cd->name));
 
 	i = major_to_index(major);
 
diff --git a/fs/compat.c b/fs/compat.c
index d1ece79..30f2faa 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1187,6 +1187,9 @@
 	ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos);
 
 out:
+	if (ret > 0)
+		add_rchar(current, ret);
+	inc_syscr(current);
 	fput(file);
 	return ret;
 }
@@ -1210,6 +1213,9 @@
 	ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos);
 
 out:
+	if (ret > 0)
+		add_wchar(current, ret);
+	inc_syscw(current);
 	fput(file);
 	return ret;
 }
diff --git a/fs/direct-io.c b/fs/direct-io.c
index af0558d..b6d4390 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1209,6 +1209,19 @@
 	retval = direct_io_worker(rw, iocb, inode, iov, offset,
 				nr_segs, blkbits, get_block, end_io, dio);
 
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
+	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
+	 * it's own meaner.
+	 */
+	if (unlikely(retval < 0 && (rw & WRITE))) {
+		loff_t isize = i_size_read(inode);
+
+		if (end > isize && dio_lock_type == DIO_LOCKING)
+			vmtruncate(inode, isize);
+	}
+
 	if (rw == READ && dio_lock_type == DIO_LOCKING)
 		release_i_mutex = 0;
 
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 6046239..c01e043 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -175,8 +175,8 @@
  *
  * Returns zero on success; non-zero on error.
  */
-static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
-			      loff_t offset)
+int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
+		       loff_t offset)
 {
 	int rc = 0;
 	char dst[MD5_DIGEST_SIZE];
@@ -924,6 +924,15 @@
 		crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
 	if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
 		crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED;
+	if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
+		crypt_stat->flags |= ECRYPTFS_ENCRYPT_FILENAMES;
+		if (mount_crypt_stat->flags
+		    & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)
+			crypt_stat->flags |= ECRYPTFS_ENCFN_USE_MOUNT_FNEK;
+		else if (mount_crypt_stat->flags
+			 & ECRYPTFS_GLOBAL_ENCFN_USE_FEK)
+			crypt_stat->flags |= ECRYPTFS_ENCFN_USE_FEK;
+	}
 }
 
 static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
@@ -1060,7 +1069,8 @@
 static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
 	{0x00000001, ECRYPTFS_ENABLE_HMAC},
 	{0x00000002, ECRYPTFS_ENCRYPTED},
-	{0x00000004, ECRYPTFS_METADATA_IN_XATTR}
+	{0x00000004, ECRYPTFS_METADATA_IN_XATTR},
+	{0x00000008, ECRYPTFS_ENCRYPT_FILENAMES}
 };
 
 /**
@@ -1149,19 +1159,20 @@
 
 /**
  * ecryptfs_code_for_cipher_string
- * @crypt_stat: The cryptographic context
+ * @cipher_name: The string alias for the cipher
+ * @key_bytes: Length of key in bytes; used for AES code selection
  *
  * Returns zero on no match, or the cipher code on match
  */
-u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat)
+u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes)
 {
 	int i;
 	u8 code = 0;
 	struct ecryptfs_cipher_code_str_map_elem *map =
 		ecryptfs_cipher_code_str_map;
 
-	if (strcmp(crypt_stat->cipher, "aes") == 0) {
-		switch (crypt_stat->key_size) {
+	if (strcmp(cipher_name, "aes") == 0) {
+		switch (key_bytes) {
 		case 16:
 			code = RFC2440_CIPHER_AES_128;
 			break;
@@ -1173,7 +1184,7 @@
 		}
 	} else {
 		for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
-			if (strcmp(crypt_stat->cipher, map[i].cipher_str) == 0){
+			if (strcmp(cipher_name, map[i].cipher_str) == 0) {
 				code = map[i].cipher_code;
 				break;
 			}
@@ -1212,6 +1223,8 @@
 		&(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
 	int rc;
 
+	if (crypt_stat->extent_size == 0)
+		crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
 	rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size,
 				 ecryptfs_inode);
 	if (rc) {
@@ -1221,7 +1234,6 @@
 	}
 	if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
 		rc = -EINVAL;
-		ecryptfs_printk(KERN_DEBUG, "Valid marker not found\n");
 	}
 out:
 	return rc;
@@ -1628,95 +1640,95 @@
 }
 
 /**
- * ecryptfs_encode_filename - converts a plaintext file name to cipher text
- * @crypt_stat: The crypt_stat struct associated with the file anem to encode
- * @name: The plaintext name
- * @length: The length of the plaintext
- * @encoded_name: The encypted name
+ * ecryptfs_encrypt_filename - encrypt filename
  *
- * Encrypts and encodes a filename into something that constitutes a
- * valid filename for a filesystem, with printable characters.
+ * CBC-encrypts the filename. We do not want to encrypt the same
+ * filename with the same key and IV, which may happen with hard
+ * links, so we prepend random bits to each filename.
  *
- * We assume that we have a properly initialized crypto context,
- * pointed to by crypt_stat->tfm.
- *
- * TODO: Implement filename decoding and decryption here, in place of
- * memcpy. We are keeping the framework around for now to (1)
- * facilitate testing of the components needed to implement filename
- * encryption and (2) to provide a code base from which other
- * developers in the community can easily implement this feature.
- *
- * Returns the length of encoded filename; negative if error
+ * Returns zero on success; non-zero otherwise
  */
-int
-ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat,
-			 const char *name, int length, char **encoded_name)
+static int
+ecryptfs_encrypt_filename(struct ecryptfs_filename *filename,
+			  struct ecryptfs_crypt_stat *crypt_stat,
+			  struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
 {
-	int error = 0;
+	int rc = 0;
 
-	(*encoded_name) = kmalloc(length + 2, GFP_KERNEL);
-	if (!(*encoded_name)) {
-		error = -ENOMEM;
+	filename->encrypted_filename = NULL;
+	filename->encrypted_filename_size = 0;
+	if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
+	    || (mount_crypt_stat && (mount_crypt_stat->flags
+				     & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) {
+		size_t packet_size;
+		size_t remaining_bytes;
+
+		rc = ecryptfs_write_tag_70_packet(
+			NULL, NULL,
+			&filename->encrypted_filename_size,
+			mount_crypt_stat, NULL,
+			filename->filename_size);
+		if (rc) {
+			printk(KERN_ERR "%s: Error attempting to get packet "
+			       "size for tag 72; rc = [%d]\n", __func__,
+			       rc);
+			filename->encrypted_filename_size = 0;
+			goto out;
+		}
+		filename->encrypted_filename =
+			kmalloc(filename->encrypted_filename_size, GFP_KERNEL);
+		if (!filename->encrypted_filename) {
+			printk(KERN_ERR "%s: Out of memory whilst attempting "
+			       "to kmalloc [%zd] bytes\n", __func__,
+			       filename->encrypted_filename_size);
+			rc = -ENOMEM;
+			goto out;
+		}
+		remaining_bytes = filename->encrypted_filename_size;
+		rc = ecryptfs_write_tag_70_packet(filename->encrypted_filename,
+						  &remaining_bytes,
+						  &packet_size,
+						  mount_crypt_stat,
+						  filename->filename,
+						  filename->filename_size);
+		if (rc) {
+			printk(KERN_ERR "%s: Error attempting to generate "
+			       "tag 70 packet; rc = [%d]\n", __func__,
+			       rc);
+			kfree(filename->encrypted_filename);
+			filename->encrypted_filename = NULL;
+			filename->encrypted_filename_size = 0;
+			goto out;
+		}
+		filename->encrypted_filename_size = packet_size;
+	} else {
+		printk(KERN_ERR "%s: No support for requested filename "
+		       "encryption method in this release\n", __func__);
+		rc = -ENOTSUPP;
 		goto out;
 	}
-	/* TODO: Filename encryption is a scheduled feature for a
-	 * future version of eCryptfs. This function is here only for
-	 * the purpose of providing a framework for other developers
-	 * to easily implement filename encryption. Hint: Replace this
-	 * memcpy() with a call to encrypt and encode the
-	 * filename, the set the length accordingly. */
-	memcpy((void *)(*encoded_name), (void *)name, length);
-	(*encoded_name)[length] = '\0';
-	error = length + 1;
 out:
-	return error;
+	return rc;
 }
 
-/**
- * ecryptfs_decode_filename - converts the cipher text name to plaintext
- * @crypt_stat: The crypt_stat struct associated with the file
- * @name: The filename in cipher text
- * @length: The length of the cipher text name
- * @decrypted_name: The plaintext name
- *
- * Decodes and decrypts the filename.
- *
- * We assume that we have a properly initialized crypto context,
- * pointed to by crypt_stat->tfm.
- *
- * TODO: Implement filename decoding and decryption here, in place of
- * memcpy. We are keeping the framework around for now to (1)
- * facilitate testing of the components needed to implement filename
- * encryption and (2) to provide a code base from which other
- * developers in the community can easily implement this feature.
- *
- * Returns the length of decoded filename; negative if error
- */
-int
-ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
-			 const char *name, int length, char **decrypted_name)
+static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size,
+				  const char *name, size_t name_size)
 {
-	int error = 0;
+	int rc = 0;
 
-	(*decrypted_name) = kmalloc(length + 2, GFP_KERNEL);
-	if (!(*decrypted_name)) {
-		error = -ENOMEM;
+	(*copied_name) = kmalloc((name_size + 2), GFP_KERNEL);
+	if (!(*copied_name)) {
+		rc = -ENOMEM;
 		goto out;
 	}
-	/* TODO: Filename encryption is a scheduled feature for a
-	 * future version of eCryptfs. This function is here only for
-	 * the purpose of providing a framework for other developers
-	 * to easily implement filename encryption. Hint: Replace this
-	 * memcpy() with a call to decode and decrypt the
-	 * filename, the set the length accordingly. */
-	memcpy((void *)(*decrypted_name), (void *)name, length);
-	(*decrypted_name)[length + 1] = '\0';	/* Only for convenience
+	memcpy((void *)(*copied_name), (void *)name, name_size);
+	(*copied_name)[(name_size)] = '\0';	/* Only for convenience
 						 * in printing out the
 						 * string in debug
 						 * messages */
-	error = length;
+	(*copied_name_size) = (name_size + 1);
 out:
-	return error;
+	return rc;
 }
 
 /**
@@ -1740,7 +1752,7 @@
 	*key_tfm = NULL;
 	if (*key_size > ECRYPTFS_MAX_KEY_BYTES) {
 		rc = -EINVAL;
-		printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum "
+		printk(KERN_ERR "Requested key size is [%zd] bytes; maximum "
 		      "allowable is [%d]\n", *key_size, ECRYPTFS_MAX_KEY_BYTES);
 		goto out;
 	}
@@ -1765,7 +1777,7 @@
 	get_random_bytes(dummy_key, *key_size);
 	rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size);
 	if (rc) {
-		printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
+		printk(KERN_ERR "Error attempting to set key of size [%zd] for "
 		       "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc);
 		rc = -EINVAL;
 		goto out;
@@ -1910,3 +1922,341 @@
 	mutex_unlock(&key_tfm_list_mutex);
 	return rc;
 }
+
+/* 64 characters forming a 6-bit target field */
+static unsigned char *portable_filename_chars = ("-.0123456789ABCD"
+						 "EFGHIJKLMNOPQRST"
+						 "UVWXYZabcdefghij"
+						 "klmnopqrstuvwxyz");
+
+/* We could either offset on every reverse map or just pad some 0x00's
+ * at the front here */
+static const unsigned char filename_rev_map[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 31 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 39 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* 47 */
+	0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, /* 55 */
+	0x0A, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 63 */
+	0x00, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, /* 71 */
+	0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, /* 79 */
+	0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, /* 87 */
+	0x23, 0x24, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, /* 95 */
+	0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */
+	0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */
+	0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */
+	0x3D, 0x3E, 0x3F
+};
+
+/**
+ * ecryptfs_encode_for_filename
+ * @dst: Destination location for encoded filename
+ * @dst_size: Size of the encoded filename in bytes
+ * @src: Source location for the filename to encode
+ * @src_size: Size of the source in bytes
+ */
+void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size,
+				  unsigned char *src, size_t src_size)
+{
+	size_t num_blocks;
+	size_t block_num = 0;
+	size_t dst_offset = 0;
+	unsigned char last_block[3];
+
+	if (src_size == 0) {
+		(*dst_size) = 0;
+		goto out;
+	}
+	num_blocks = (src_size / 3);
+	if ((src_size % 3) == 0) {
+		memcpy(last_block, (&src[src_size - 3]), 3);
+	} else {
+		num_blocks++;
+		last_block[2] = 0x00;
+		switch (src_size % 3) {
+		case 1:
+			last_block[0] = src[src_size - 1];
+			last_block[1] = 0x00;
+			break;
+		case 2:
+			last_block[0] = src[src_size - 2];
+			last_block[1] = src[src_size - 1];
+		}
+	}
+	(*dst_size) = (num_blocks * 4);
+	if (!dst)
+		goto out;
+	while (block_num < num_blocks) {
+		unsigned char *src_block;
+		unsigned char dst_block[4];
+
+		if (block_num == (num_blocks - 1))
+			src_block = last_block;
+		else
+			src_block = &src[block_num * 3];
+		dst_block[0] = ((src_block[0] >> 2) & 0x3F);
+		dst_block[1] = (((src_block[0] << 4) & 0x30)
+				| ((src_block[1] >> 4) & 0x0F));
+		dst_block[2] = (((src_block[1] << 2) & 0x3C)
+				| ((src_block[2] >> 6) & 0x03));
+		dst_block[3] = (src_block[2] & 0x3F);
+		dst[dst_offset++] = portable_filename_chars[dst_block[0]];
+		dst[dst_offset++] = portable_filename_chars[dst_block[1]];
+		dst[dst_offset++] = portable_filename_chars[dst_block[2]];
+		dst[dst_offset++] = portable_filename_chars[dst_block[3]];
+		block_num++;
+	}
+out:
+	return;
+}
+
+/**
+ * ecryptfs_decode_from_filename
+ * @dst: If NULL, this function only sets @dst_size and returns. If
+ *       non-NULL, this function decodes the encoded octets in @src
+ *       into the memory that @dst points to.
+ * @dst_size: Set to the size of the decoded string.
+ * @src: The encoded set of octets to decode.
+ * @src_size: The size of the encoded set of octets to decode.
+ */
+static void
+ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
+			      const unsigned char *src, size_t src_size)
+{
+	u8 current_bit_offset = 0;
+	size_t src_byte_offset = 0;
+	size_t dst_byte_offset = 0;
+
+	if (dst == NULL) {
+		/* Not exact; conservatively long. Every block of 4
+		 * encoded characters decodes into a block of 3
+		 * decoded characters. This segment of code provides
+		 * the caller with the maximum amount of allocated
+		 * space that @dst will need to point to in a
+		 * subsequent call. */
+		(*dst_size) = (((src_size + 1) * 3) / 4);
+		goto out;
+	}
+	while (src_byte_offset < src_size) {
+		unsigned char src_byte =
+				filename_rev_map[(int)src[src_byte_offset]];
+
+		switch (current_bit_offset) {
+		case 0:
+			dst[dst_byte_offset] = (src_byte << 2);
+			current_bit_offset = 6;
+			break;
+		case 6:
+			dst[dst_byte_offset++] |= (src_byte >> 4);
+			dst[dst_byte_offset] = ((src_byte & 0xF)
+						 << 4);
+			current_bit_offset = 4;
+			break;
+		case 4:
+			dst[dst_byte_offset++] |= (src_byte >> 2);
+			dst[dst_byte_offset] = (src_byte << 6);
+			current_bit_offset = 2;
+			break;
+		case 2:
+			dst[dst_byte_offset++] |= (src_byte);
+			dst[dst_byte_offset] = 0;
+			current_bit_offset = 0;
+			break;
+		}
+		src_byte_offset++;
+	}
+	(*dst_size) = dst_byte_offset;
+out:
+	return;
+}
+
+/**
+ * ecryptfs_encrypt_and_encode_filename - converts a plaintext file name to cipher text
+ * @crypt_stat: The crypt_stat struct associated with the file anem to encode
+ * @name: The plaintext name
+ * @length: The length of the plaintext
+ * @encoded_name: The encypted name
+ *
+ * Encrypts and encodes a filename into something that constitutes a
+ * valid filename for a filesystem, with printable characters.
+ *
+ * We assume that we have a properly initialized crypto context,
+ * pointed to by crypt_stat->tfm.
+ *
+ * Returns zero on success; non-zero on otherwise
+ */
+int ecryptfs_encrypt_and_encode_filename(
+	char **encoded_name,
+	size_t *encoded_name_size,
+	struct ecryptfs_crypt_stat *crypt_stat,
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+	const char *name, size_t name_size)
+{
+	size_t encoded_name_no_prefix_size;
+	int rc = 0;
+
+	(*encoded_name) = NULL;
+	(*encoded_name_size) = 0;
+	if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES))
+	    || (mount_crypt_stat && (mount_crypt_stat->flags
+				     & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) {
+		struct ecryptfs_filename *filename;
+
+		filename = kzalloc(sizeof(*filename), GFP_KERNEL);
+		if (!filename) {
+			printk(KERN_ERR "%s: Out of memory whilst attempting "
+			       "to kzalloc [%zd] bytes\n", __func__,
+			       sizeof(*filename));
+			rc = -ENOMEM;
+			goto out;
+		}
+		filename->filename = (char *)name;
+		filename->filename_size = name_size;
+		rc = ecryptfs_encrypt_filename(filename, crypt_stat,
+					       mount_crypt_stat);
+		if (rc) {
+			printk(KERN_ERR "%s: Error attempting to encrypt "
+			       "filename; rc = [%d]\n", __func__, rc);
+			kfree(filename);
+			goto out;
+		}
+		ecryptfs_encode_for_filename(
+			NULL, &encoded_name_no_prefix_size,
+			filename->encrypted_filename,
+			filename->encrypted_filename_size);
+		if ((crypt_stat && (crypt_stat->flags
+				    & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
+		    || (mount_crypt_stat
+			&& (mount_crypt_stat->flags
+			    & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)))
+			(*encoded_name_size) =
+				(ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE
+				 + encoded_name_no_prefix_size);
+		else
+			(*encoded_name_size) =
+				(ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE
+				 + encoded_name_no_prefix_size);
+		(*encoded_name) = kmalloc((*encoded_name_size) + 1, GFP_KERNEL);
+		if (!(*encoded_name)) {
+			printk(KERN_ERR "%s: Out of memory whilst attempting "
+			       "to kzalloc [%zd] bytes\n", __func__,
+			       (*encoded_name_size));
+			rc = -ENOMEM;
+			kfree(filename->encrypted_filename);
+			kfree(filename);
+			goto out;
+		}
+		if ((crypt_stat && (crypt_stat->flags
+				    & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
+		    || (mount_crypt_stat
+			&& (mount_crypt_stat->flags
+			    & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) {
+			memcpy((*encoded_name),
+			       ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
+			       ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE);
+			ecryptfs_encode_for_filename(
+			    ((*encoded_name)
+			     + ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE),
+			    &encoded_name_no_prefix_size,
+			    filename->encrypted_filename,
+			    filename->encrypted_filename_size);
+			(*encoded_name_size) =
+				(ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE
+				 + encoded_name_no_prefix_size);
+			(*encoded_name)[(*encoded_name_size)] = '\0';
+			(*encoded_name_size)++;
+		} else {
+			rc = -ENOTSUPP;
+		}
+		if (rc) {
+			printk(KERN_ERR "%s: Error attempting to encode "
+			       "encrypted filename; rc = [%d]\n", __func__,
+			       rc);
+			kfree((*encoded_name));
+			(*encoded_name) = NULL;
+			(*encoded_name_size) = 0;
+		}
+		kfree(filename->encrypted_filename);
+		kfree(filename);
+	} else {
+		rc = ecryptfs_copy_filename(encoded_name,
+					    encoded_name_size,
+					    name, name_size);
+	}
+out:
+	return rc;
+}
+
+/**
+ * ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext
+ * @plaintext_name: The plaintext name
+ * @plaintext_name_size: The plaintext name size
+ * @ecryptfs_dir_dentry: eCryptfs directory dentry
+ * @name: The filename in cipher text
+ * @name_size: The cipher text name size
+ *
+ * Decrypts and decodes the filename.
+ *
+ * Returns zero on error; non-zero otherwise
+ */
+int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
+					 size_t *plaintext_name_size,
+					 struct dentry *ecryptfs_dir_dentry,
+					 const char *name, size_t name_size)
+{
+	char *decoded_name;
+	size_t decoded_name_size;
+	size_t packet_size;
+	int rc = 0;
+
+	if ((name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
+	    && (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
+			ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) {
+		struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+			&ecryptfs_superblock_to_private(
+				ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
+		const char *orig_name = name;
+		size_t orig_name_size = name_size;
+
+		name += ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE;
+		name_size -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE;
+		ecryptfs_decode_from_filename(NULL, &decoded_name_size,
+					      name, name_size);
+		decoded_name = kmalloc(decoded_name_size, GFP_KERNEL);
+		if (!decoded_name) {
+			printk(KERN_ERR "%s: Out of memory whilst attempting "
+			       "to kmalloc [%zd] bytes\n", __func__,
+			       decoded_name_size);
+			rc = -ENOMEM;
+			goto out;
+		}
+		ecryptfs_decode_from_filename(decoded_name, &decoded_name_size,
+					      name, name_size);
+		rc = ecryptfs_parse_tag_70_packet(plaintext_name,
+						  plaintext_name_size,
+						  &packet_size,
+						  mount_crypt_stat,
+						  decoded_name,
+						  decoded_name_size);
+		if (rc) {
+			printk(KERN_INFO "%s: Could not parse tag 70 packet "
+			       "from filename; copying through filename "
+			       "as-is\n", __func__);
+			rc = ecryptfs_copy_filename(plaintext_name,
+						    plaintext_name_size,
+						    orig_name, orig_name_size);
+			goto out_free;
+		}
+	} else {
+		rc = ecryptfs_copy_filename(plaintext_name,
+					    plaintext_name_size,
+					    name, name_size);
+		goto out;
+	}
+out_free:
+	kfree(decoded_name);
+out:
+	return rc;
+}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index a75026d..c11fc95 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -51,12 +51,16 @@
 #define ECRYPTFS_VERSIONING_XATTR                 0x00000010
 #define ECRYPTFS_VERSIONING_MULTKEY               0x00000020
 #define ECRYPTFS_VERSIONING_DEVMISC               0x00000040
+#define ECRYPTFS_VERSIONING_HMAC                  0x00000080
+#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION   0x00000100
+#define ECRYPTFS_VERSIONING_GCM                   0x00000200
 #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
 				  | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
 				  | ECRYPTFS_VERSIONING_PUBKEY \
 				  | ECRYPTFS_VERSIONING_XATTR \
 				  | ECRYPTFS_VERSIONING_MULTKEY \
-				  | ECRYPTFS_VERSIONING_DEVMISC)
+				  | ECRYPTFS_VERSIONING_DEVMISC \
+				  | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
 #define ECRYPTFS_MAX_PASSWORD_LENGTH 64
 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
 #define ECRYPTFS_SALT_SIZE 8
@@ -199,6 +203,7 @@
 #define ECRYPTFS_DEFAULT_CIPHER "aes"
 #define ECRYPTFS_DEFAULT_KEY_BYTES 16
 #define ECRYPTFS_DEFAULT_HASH "md5"
+#define ECRYPTFS_TAG_70_DIGEST ECRYPTFS_DEFAULT_HASH
 #define ECRYPTFS_TAG_1_PACKET_TYPE 0x01
 #define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
 #define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
@@ -206,30 +211,64 @@
 #define ECRYPTFS_TAG_65_PACKET_TYPE 0x41
 #define ECRYPTFS_TAG_66_PACKET_TYPE 0x42
 #define ECRYPTFS_TAG_67_PACKET_TYPE 0x43
+#define ECRYPTFS_TAG_70_PACKET_TYPE 0x46 /* FNEK-encrypted filename
+					  * as dentry name */
+#define ECRYPTFS_TAG_71_PACKET_TYPE 0x47 /* FNEK-encrypted filename in
+					  * metadata */
+#define ECRYPTFS_TAG_72_PACKET_TYPE 0x48 /* FEK-encrypted filename as
+					  * dentry name */
+#define ECRYPTFS_TAG_73_PACKET_TYPE 0x49 /* FEK-encrypted filename as
+					  * metadata */
+/* Constraint: ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES >=
+ * ECRYPTFS_MAX_IV_BYTES */
+#define ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES 16
+#define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */
 #define MD5_DIGEST_SIZE 16
+#define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE
+#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED."
+#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23
+#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED."
+#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 24
+#define ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN (18 + 1 + 4 + 1 + 32)
 
 struct ecryptfs_key_sig {
 	struct list_head crypt_stat_list;
 	char keysig[ECRYPTFS_SIG_SIZE_HEX];
 };
 
+struct ecryptfs_filename {
+	struct list_head crypt_stat_list;
+#define ECRYPTFS_FILENAME_CONTAINS_DECRYPTED 0x00000001
+	u32 flags;
+	u32 seq_no;
+	char *filename;
+	char *encrypted_filename;
+	size_t filename_size;
+	size_t encrypted_filename_size;
+	char fnek_sig[ECRYPTFS_SIG_SIZE_HEX];
+	char dentry_name[ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN + 1];
+};
+
 /**
  * This is the primary struct associated with each encrypted file.
  *
  * TODO: cache align/pack?
  */
 struct ecryptfs_crypt_stat {
-#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001
-#define ECRYPTFS_POLICY_APPLIED     0x00000002
-#define ECRYPTFS_NEW_FILE           0x00000004
-#define ECRYPTFS_ENCRYPTED          0x00000008
-#define ECRYPTFS_SECURITY_WARNING   0x00000010
-#define ECRYPTFS_ENABLE_HMAC        0x00000020
-#define ECRYPTFS_ENCRYPT_IV_PAGES   0x00000040
-#define ECRYPTFS_KEY_VALID          0x00000080
-#define ECRYPTFS_METADATA_IN_XATTR  0x00000100
-#define ECRYPTFS_VIEW_AS_ENCRYPTED  0x00000200
-#define ECRYPTFS_KEY_SET            0x00000400
+#define ECRYPTFS_STRUCT_INITIALIZED   0x00000001
+#define ECRYPTFS_POLICY_APPLIED       0x00000002
+#define ECRYPTFS_NEW_FILE             0x00000004
+#define ECRYPTFS_ENCRYPTED            0x00000008
+#define ECRYPTFS_SECURITY_WARNING     0x00000010
+#define ECRYPTFS_ENABLE_HMAC          0x00000020
+#define ECRYPTFS_ENCRYPT_IV_PAGES     0x00000040
+#define ECRYPTFS_KEY_VALID            0x00000080
+#define ECRYPTFS_METADATA_IN_XATTR    0x00000100
+#define ECRYPTFS_VIEW_AS_ENCRYPTED    0x00000200
+#define ECRYPTFS_KEY_SET              0x00000400
+#define ECRYPTFS_ENCRYPT_FILENAMES    0x00000800
+#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000
+#define ECRYPTFS_ENCFN_USE_FEK        0x00002000
 	u32 flags;
 	unsigned int file_version;
 	size_t iv_bytes;
@@ -332,13 +371,20 @@
 #define ECRYPTFS_XATTR_METADATA_ENABLED        0x00000002
 #define ECRYPTFS_ENCRYPTED_VIEW_ENABLED        0x00000004
 #define ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED  0x00000008
+#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES      0x00000010
+#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK   0x00000020
+#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK          0x00000040
 	u32 flags;
 	struct list_head global_auth_tok_list;
 	struct mutex global_auth_tok_list_mutex;
 	size_t num_global_auth_toks;
 	size_t global_default_cipher_key_size;
+	size_t global_default_fn_cipher_key_bytes;
 	unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
 						 + 1];
+	unsigned char global_default_fn_cipher_name[
+		ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
+	char global_default_fnek_sig[ECRYPTFS_SIG_SIZE_HEX + 1];
 };
 
 /* superblock private data. */
@@ -571,13 +617,22 @@
 int ecryptfs_interpose(struct dentry *hidden_dentry,
 		       struct dentry *this_dentry, struct super_block *sb,
 		       u32 flags);
+int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
+					struct dentry *lower_dentry,
+					struct ecryptfs_crypt_stat *crypt_stat,
+					struct inode *ecryptfs_dir_inode,
+					struct nameidata *ecryptfs_nd);
+int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
+					 size_t *decrypted_name_size,
+					 struct dentry *ecryptfs_dentry,
+					 const char *name, size_t name_size);
 int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
-int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
-			     const char *name, int length,
-			     char **decrypted_name);
-int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat,
-			     const char *name, int length,
-			     char **encoded_name);
+int ecryptfs_encrypt_and_encode_filename(
+	char **encoded_name,
+	size_t *encoded_name_size,
+	struct ecryptfs_crypt_stat *crypt_stat,
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+	const char *name, size_t name_size);
 struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
 void ecryptfs_dump_hex(char *data, int bytes);
 int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
@@ -599,7 +654,7 @@
 					     struct inode *ecryptfs_inode);
 int ecryptfs_read_and_validate_xattr_region(char *page_virt,
 					    struct dentry *ecryptfs_dentry);
-u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat);
+u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes);
 int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code);
 void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat);
 int ecryptfs_generate_key_packet_set(char *dest_base,
@@ -694,5 +749,17 @@
 			     struct vfsmount *lower_mnt,
 			     const struct cred *cred);
 int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
+int
+ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
+			     size_t *packet_size,
+			     struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+			     char *filename, size_t filename_size);
+int
+ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
+			     size_t *packet_size,
+			     struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+			     char *data, size_t max_packet_size);
+int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
+		       loff_t offset);
 
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 7138343..9e94405 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -77,27 +77,27 @@
 
 /* Inspired by generic filldir in fs/readdir.c */
 static int
-ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset,
-		 u64 ino, unsigned int d_type)
+ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen,
+		 loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct ecryptfs_crypt_stat *crypt_stat;
 	struct ecryptfs_getdents_callback *buf =
 	    (struct ecryptfs_getdents_callback *)dirent;
+	size_t name_size;
+	char *name;
 	int rc;
-	int decoded_length;
-	char *decoded_name;
 
-	crypt_stat = ecryptfs_dentry_to_private(buf->dentry)->crypt_stat;
 	buf->filldir_called++;
-	decoded_length = ecryptfs_decode_filename(crypt_stat, name, namelen,
-						  &decoded_name);
-	if (decoded_length < 0) {
-		rc = decoded_length;
+	rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size,
+						  buf->dentry, lower_name,
+						  lower_namelen);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to decode and decrypt "
+		       "filename [%s]; rc = [%d]\n", __func__, lower_name,
+		       rc);
 		goto out;
 	}
-	rc = buf->filldir(buf->dirent, decoded_name, decoded_length, offset,
-			  ino, d_type);
-	kfree(decoded_name);
+	rc = buf->filldir(buf->dirent, name, name_size, offset, ino, d_type);
+	kfree(name);
 	if (rc >= 0)
 		buf->entries_written++;
 out:
@@ -106,8 +106,8 @@
 
 /**
  * ecryptfs_readdir
- * @file: The ecryptfs file struct
- * @dirent: Directory entry
+ * @file: The eCryptfs directory file
+ * @dirent: Directory entry handle
  * @filldir: The filldir callback function
  */
 static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 0111906..5697899 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -52,8 +52,7 @@
 /**
  * ecryptfs_create_underlying_file
  * @lower_dir_inode: inode of the parent in the lower fs of the new file
- * @lower_dentry: New file's dentry in the lower fs
- * @ecryptfs_dentry: New file's dentry in ecryptfs
+ * @dentry: New file's dentry
  * @mode: The mode of the new file
  * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
  *
@@ -228,8 +227,7 @@
 {
 	int rc;
 
-	/* ecryptfs_do_create() calls ecryptfs_interpose(), which opens
-	 * the crypt_stat->lower_file (persistent file) */
+	/* ecryptfs_do_create() calls ecryptfs_interpose() */
 	rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd);
 	if (unlikely(rc)) {
 		ecryptfs_printk(KERN_WARNING, "Failed to create file in"
@@ -244,141 +242,91 @@
 }
 
 /**
- * ecryptfs_lookup
- * @dir: inode
- * @dentry: The dentry
- * @nd: nameidata, may be NULL
- *
- * Find a file on disk. If the file does not exist, then we'll add it to the
- * dentry cache and continue on to read it from the disk.
+ * ecryptfs_lookup_and_interpose_lower - Perform a lookup
  */
-static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
-				      struct nameidata *nd)
+int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
+					struct dentry *lower_dentry,
+					struct ecryptfs_crypt_stat *crypt_stat,
+					struct inode *ecryptfs_dir_inode,
+					struct nameidata *ecryptfs_nd)
 {
-	int rc = 0;
 	struct dentry *lower_dir_dentry;
-	struct dentry *lower_dentry;
 	struct vfsmount *lower_mnt;
-	char *encoded_name;
-	int encoded_namelen;
-	struct ecryptfs_crypt_stat *crypt_stat = NULL;
+	struct inode *lower_inode;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	char *page_virt = NULL;
-	struct inode *lower_inode;
 	u64 file_size;
+	int rc = 0;
 
-	lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
-	dentry->d_op = &ecryptfs_dops;
-	if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, "."))
-	    || (dentry->d_name.len == 2
-		&& !strcmp(dentry->d_name.name, ".."))) {
-		d_drop(dentry);
-		goto out;
-	}
-	encoded_namelen = ecryptfs_encode_filename(crypt_stat,
-						   dentry->d_name.name,
-						   dentry->d_name.len,
-						   &encoded_name);
-	if (encoded_namelen < 0) {
-		rc = encoded_namelen;
-		d_drop(dentry);
-		goto out;
-	}
-	ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
-			"= [%d]\n", encoded_name, encoded_namelen);
-	lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
-				      encoded_namelen - 1);
-	kfree(encoded_name);
-	if (IS_ERR(lower_dentry)) {
-		ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
-		rc = PTR_ERR(lower_dentry);
-		d_drop(dentry);
-		goto out;
-	}
-	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
-	ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->"
-       		"d_name.name = [%s]\n", lower_dentry,
-		lower_dentry->d_name.name);
+	lower_dir_dentry = lower_dentry->d_parent;
+	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
+				   ecryptfs_dentry->d_parent));
 	lower_inode = lower_dentry->d_inode;
-	fsstack_copy_attr_atime(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
 	BUG_ON(!atomic_read(&lower_dentry->d_count));
-	ecryptfs_set_dentry_private(dentry,
+	ecryptfs_set_dentry_private(ecryptfs_dentry,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
 						     GFP_KERNEL));
-	if (!ecryptfs_dentry_to_private(dentry)) {
+	if (!ecryptfs_dentry_to_private(ecryptfs_dentry)) {
 		rc = -ENOMEM;
-		ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting "
-				"to allocate ecryptfs_dentry_info struct\n");
+		printk(KERN_ERR "%s: Out of memory whilst attempting "
+		       "to allocate ecryptfs_dentry_info struct\n",
+			__func__);
 		goto out_dput;
 	}
-	ecryptfs_set_dentry_lower(dentry, lower_dentry);
-	ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
+	ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry);
+	ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt);
 	if (!lower_dentry->d_inode) {
 		/* We want to add because we couldn't find in lower */
-		d_add(dentry, NULL);
+		d_add(ecryptfs_dentry, NULL);
 		goto out;
 	}
-	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
-				ECRYPTFS_INTERPOSE_FLAG_D_ADD);
+	rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
+				ecryptfs_dir_inode->i_sb, 1);
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error interposing\n");
+		printk(KERN_ERR "%s: Error interposing; rc = [%d]\n",
+		       __func__, rc);
 		goto out;
 	}
-	if (S_ISDIR(lower_inode->i_mode)) {
-		ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
+	if (S_ISDIR(lower_inode->i_mode))
 		goto out;
-	}
-	if (S_ISLNK(lower_inode->i_mode)) {
-		ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n");
+	if (S_ISLNK(lower_inode->i_mode))
 		goto out;
-	}
-	if (special_file(lower_inode->i_mode)) {
-		ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n");
+	if (special_file(lower_inode->i_mode))
 		goto out;
-	}
-	if (!nd) {
-		ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave"
-				"as we *think* we are about to unlink\n");
+	if (!ecryptfs_nd)
 		goto out;
-	}
 	/* Released in this function */
-	page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2,
-				      GFP_USER);
+	page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
 	if (!page_virt) {
+		printk(KERN_ERR "%s: Cannot kmem_cache_zalloc() a page\n",
+		       __func__);
 		rc = -ENOMEM;
-		ecryptfs_printk(KERN_ERR,
-				"Cannot ecryptfs_kmalloc a page\n");
 		goto out;
 	}
-	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
-	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
-		ecryptfs_set_default_sizes(crypt_stat);
-	if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) {
-		rc = ecryptfs_init_persistent_file(dentry);
+	if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
+		rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
 		if (rc) {
 			printk(KERN_ERR "%s: Error attempting to initialize "
 			       "the persistent file for the dentry with name "
 			       "[%s]; rc = [%d]\n", __func__,
-			       dentry->d_name.name, rc);
-			goto out;
+			       ecryptfs_dentry->d_name.name, rc);
+			goto out_free_kmem;
 		}
 	}
 	rc = ecryptfs_read_and_validate_header_region(page_virt,
-						      dentry->d_inode);
+						      ecryptfs_dentry->d_inode);
 	if (rc) {
-		rc = ecryptfs_read_and_validate_xattr_region(page_virt, dentry);
+		rc = ecryptfs_read_and_validate_xattr_region(page_virt,
+							     ecryptfs_dentry);
 		if (rc) {
-			printk(KERN_DEBUG "Valid metadata not found in header "
-			       "region or xattr region; treating file as "
-			       "unencrypted\n");
 			rc = 0;
-			kmem_cache_free(ecryptfs_header_cache_2, page_virt);
-			goto out;
+			goto out_free_kmem;
 		}
 		crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
 	}
 	mount_crypt_stat = &ecryptfs_superblock_to_private(
-		dentry->d_sb)->mount_crypt_stat;
+		ecryptfs_dentry->d_sb)->mount_crypt_stat;
 	if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
 		if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
 			file_size = (crypt_stat->num_header_bytes_at_front
@@ -388,14 +336,103 @@
 	} else {
 		file_size = get_unaligned_be64(page_virt);
 	}
-	i_size_write(dentry->d_inode, (loff_t)file_size);
+	i_size_write(ecryptfs_dentry->d_inode, (loff_t)file_size);
+out_free_kmem:
 	kmem_cache_free(ecryptfs_header_cache_2, page_virt);
 	goto out;
-
 out_dput:
 	dput(lower_dentry);
-	d_drop(dentry);
+	d_drop(ecryptfs_dentry);
 out:
+	return rc;
+}
+
+/**
+ * ecryptfs_lookup
+ * @ecryptfs_dir_inode: The eCryptfs directory inode
+ * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
+ * @ecryptfs_nd: nameidata; may be NULL
+ *
+ * Find a file on disk. If the file does not exist, then we'll add it to the
+ * dentry cache and continue on to read it from the disk.
+ */
+static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
+				      struct dentry *ecryptfs_dentry,
+				      struct nameidata *ecryptfs_nd)
+{
+	char *encrypted_and_encoded_name = NULL;
+	size_t encrypted_and_encoded_name_size;
+	struct ecryptfs_crypt_stat *crypt_stat = NULL;
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
+	struct ecryptfs_inode_info *inode_info;
+	struct dentry *lower_dir_dentry, *lower_dentry;
+	int rc = 0;
+
+	ecryptfs_dentry->d_op = &ecryptfs_dops;
+	if ((ecryptfs_dentry->d_name.len == 1
+	     && !strcmp(ecryptfs_dentry->d_name.name, "."))
+	    || (ecryptfs_dentry->d_name.len == 2
+		&& !strcmp(ecryptfs_dentry->d_name.name, ".."))) {
+		goto out_d_drop;
+	}
+	lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
+	lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
+				      lower_dir_dentry,
+				      ecryptfs_dentry->d_name.len);
+	if (IS_ERR(lower_dentry)) {
+		rc = PTR_ERR(lower_dentry);
+		printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
+		       "lower_dentry = [%s]\n", __func__, rc,
+		       ecryptfs_dentry->d_name.name);
+		goto out_d_drop;
+	}
+	if (lower_dentry->d_inode)
+		goto lookup_and_interpose;
+	inode_info =  ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
+	if (inode_info) {
+		crypt_stat = &inode_info->crypt_stat;
+		/* TODO: lock for crypt_stat comparison */
+		if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
+			ecryptfs_set_default_sizes(crypt_stat);
+	}
+	if (crypt_stat)
+		mount_crypt_stat = crypt_stat->mount_crypt_stat;
+	else
+		mount_crypt_stat = &ecryptfs_superblock_to_private(
+			ecryptfs_dentry->d_sb)->mount_crypt_stat;
+	if (!(crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES))
+	    && !(mount_crypt_stat && (mount_crypt_stat->flags
+				     & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
+		goto lookup_and_interpose;
+	dput(lower_dentry);
+	rc = ecryptfs_encrypt_and_encode_filename(
+		&encrypted_and_encoded_name, &encrypted_and_encoded_name_size,
+		crypt_stat, mount_crypt_stat, ecryptfs_dentry->d_name.name,
+		ecryptfs_dentry->d_name.len);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to encrypt and encode "
+		       "filename; rc = [%d]\n", __func__, rc);
+		goto out_d_drop;
+	}
+	lower_dentry = lookup_one_len(encrypted_and_encoded_name,
+				      lower_dir_dentry,
+				      encrypted_and_encoded_name_size - 1);
+	if (IS_ERR(lower_dentry)) {
+		rc = PTR_ERR(lower_dentry);
+		printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
+		       "lower_dentry = [%s]\n", __func__, rc,
+		       encrypted_and_encoded_name);
+		goto out_d_drop;
+	}
+lookup_and_interpose:
+	rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
+						 crypt_stat, ecryptfs_dir_inode,
+						 ecryptfs_nd);
+	goto out;
+out_d_drop:
+	d_drop(ecryptfs_dentry);
+out:
+	kfree(encrypted_and_encoded_name);
 	return ERR_PTR(rc);
 }
 
@@ -466,19 +503,21 @@
 	struct dentry *lower_dentry;
 	struct dentry *lower_dir_dentry;
 	char *encoded_symname;
-	int encoded_symlen;
-	struct ecryptfs_crypt_stat *crypt_stat = NULL;
+	size_t encoded_symlen;
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	dget(lower_dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
-	encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
-						  strlen(symname),
-						  &encoded_symname);
-	if (encoded_symlen < 0) {
-		rc = encoded_symlen;
+	mount_crypt_stat = &ecryptfs_superblock_to_private(
+		dir->i_sb)->mount_crypt_stat;
+	rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname,
+						  &encoded_symlen,
+						  NULL,
+						  mount_crypt_stat, symname,
+						  strlen(symname));
+	if (rc)
 		goto out_lock;
-	}
 	rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
 			 encoded_symname);
 	kfree(encoded_symname);
@@ -602,52 +641,54 @@
 }
 
 static int
-ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz)
+ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
 {
-	int rc;
-	struct dentry *lower_dentry;
-	char *decoded_name;
 	char *lower_buf;
-	mm_segment_t old_fs;
+	struct dentry *lower_dentry;
 	struct ecryptfs_crypt_stat *crypt_stat;
+	char *plaintext_name;
+	size_t plaintext_name_size;
+	mm_segment_t old_fs;
+	int rc;
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	if (!lower_dentry->d_inode->i_op->readlink) {
 		rc = -EINVAL;
 		goto out;
 	}
+	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
 	/* Released in this function */
 	lower_buf = kmalloc(bufsiz, GFP_KERNEL);
 	if (lower_buf == NULL) {
-		ecryptfs_printk(KERN_ERR, "Out of memory\n");
+		printk(KERN_ERR "%s: Out of memory whilst attempting to "
+		       "kmalloc [%d] bytes\n", __func__, bufsiz);
 		rc = -ENOMEM;
 		goto out;
 	}
 	old_fs = get_fs();
 	set_fs(get_ds());
-	ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
-			"lower_dentry->d_name.name = [%s]\n",
-			lower_dentry->d_name.name);
 	rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
 						   (char __user *)lower_buf,
 						   bufsiz);
 	set_fs(old_fs);
 	if (rc >= 0) {
-		crypt_stat = NULL;
-		rc = ecryptfs_decode_filename(crypt_stat, lower_buf, rc,
-					      &decoded_name);
-		if (rc == -ENOMEM)
+		rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name,
+							  &plaintext_name_size,
+							  dentry, lower_buf,
+							  rc);
+		if (rc) {
+			printk(KERN_ERR "%s: Error attempting to decode and "
+			       "decrypt filename; rc = [%d]\n", __func__,
+				rc);
 			goto out_free_lower_buf;
-		if (rc > 0) {
-			ecryptfs_printk(KERN_DEBUG, "Copying [%d] bytes "
-					"to userspace: [%*s]\n", rc,
-					decoded_name);
-			if (copy_to_user(buf, decoded_name, rc))
-				rc = -EFAULT;
 		}
-		kfree(decoded_name);
-		fsstack_copy_attr_atime(dentry->d_inode,
-					lower_dentry->d_inode);
+		rc = copy_to_user(buf, plaintext_name, plaintext_name_size);
+		if (rc)
+			rc = -EFAULT;
+		else
+			rc = plaintext_name_size;
+		kfree(plaintext_name);
+		fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
 	}
 out_free_lower_buf:
 	kfree(lower_buf);
@@ -669,8 +710,6 @@
 	}
 	old_fs = get_fs();
 	set_fs(get_ds());
-	ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
-			"dentry->d_name.name = [%s]\n", dentry->d_name.name);
 	rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
 	set_fs(old_fs);
 	if (rc < 0)
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 0d713b6..ff53942 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -358,7 +358,7 @@
 	/* verify that everything through the encrypted FEK size is present */
 	if (message_len < 4) {
 		rc = -EIO;
-		printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable "
+		printk(KERN_ERR "%s: message_len is [%zd]; minimum acceptable "
 		       "message length is [%d]\n", __func__, message_len, 4);
 		goto out;
 	}
@@ -385,13 +385,13 @@
 	i += data_len;
 	if (message_len < (i + key_rec->enc_key_size)) {
 		rc = -EIO;
-		printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n",
+		printk(KERN_ERR "%s: message_len [%zd]; max len is [%zd]\n",
 		       __func__, message_len, (i + key_rec->enc_key_size));
 		goto out;
 	}
 	if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
 		rc = -EIO;
-		printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than "
+		printk(KERN_ERR "%s: Encrypted key_size [%zd] larger than "
 		       "the maximum key size [%d]\n", __func__,
 		       key_rec->enc_key_size,
 		       ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
@@ -403,6 +403,580 @@
 }
 
 static int
+ecryptfs_find_global_auth_tok_for_sig(
+	struct ecryptfs_global_auth_tok **global_auth_tok,
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig)
+{
+	struct ecryptfs_global_auth_tok *walker;
+	int rc = 0;
+
+	(*global_auth_tok) = NULL;
+	mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
+	list_for_each_entry(walker,
+			    &mount_crypt_stat->global_auth_tok_list,
+			    mount_crypt_stat_list) {
+		if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) {
+			(*global_auth_tok) = walker;
+			goto out;
+		}
+	}
+	rc = -EINVAL;
+out:
+	mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
+	return rc;
+}
+
+/**
+ * ecryptfs_find_auth_tok_for_sig
+ * @auth_tok: Set to the matching auth_tok; NULL if not found
+ * @crypt_stat: inode crypt_stat crypto context
+ * @sig: Sig of auth_tok to find
+ *
+ * For now, this function simply looks at the registered auth_tok's
+ * linked off the mount_crypt_stat, so all the auth_toks that can be
+ * used must be registered at mount time. This function could
+ * potentially try a lot harder to find auth_tok's (e.g., by calling
+ * out to ecryptfsd to dynamically retrieve an auth_tok object) so
+ * that static registration of auth_tok's will no longer be necessary.
+ *
+ * Returns zero on no error; non-zero on error
+ */
+static int
+ecryptfs_find_auth_tok_for_sig(
+	struct ecryptfs_auth_tok **auth_tok,
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+	char *sig)
+{
+	struct ecryptfs_global_auth_tok *global_auth_tok;
+	int rc = 0;
+
+	(*auth_tok) = NULL;
+	if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
+						  mount_crypt_stat, sig)) {
+		struct key *auth_tok_key;
+
+		rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
+						       sig);
+	} else
+		(*auth_tok) = global_auth_tok->global_auth_tok;
+	return rc;
+}
+
+/**
+ * write_tag_70_packet can gobble a lot of stack space. We stuff most
+ * of the function's parameters in a kmalloc'd struct to help reduce
+ * eCryptfs' overall stack usage.
+ */
+struct ecryptfs_write_tag_70_packet_silly_stack {
+	u8 cipher_code;
+	size_t max_packet_size;
+	size_t packet_size_len;
+	size_t block_aligned_filename_size;
+	size_t block_size;
+	size_t i;
+	size_t j;
+	size_t num_rand_bytes;
+	struct mutex *tfm_mutex;
+	char *block_aligned_filename;
+	struct ecryptfs_auth_tok *auth_tok;
+	struct scatterlist src_sg;
+	struct scatterlist dst_sg;
+	struct blkcipher_desc desc;
+	char iv[ECRYPTFS_MAX_IV_BYTES];
+	char hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
+	char tmp_hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
+	struct hash_desc hash_desc;
+	struct scatterlist hash_sg;
+};
+
+/**
+ * write_tag_70_packet - Write encrypted filename (EFN) packet against FNEK
+ * @filename: NULL-terminated filename string
+ *
+ * This is the simplest mechanism for achieving filename encryption in
+ * eCryptfs. It encrypts the given filename with the mount-wide
+ * filename encryption key (FNEK) and stores it in a packet to @dest,
+ * which the callee will encode and write directly into the dentry
+ * name.
+ */
+int
+ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
+			     size_t *packet_size,
+			     struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+			     char *filename, size_t filename_size)
+{
+	struct ecryptfs_write_tag_70_packet_silly_stack *s;
+	int rc = 0;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s) {
+		printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
+		       "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
+		goto out;
+	}
+	s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	(*packet_size) = 0;
+	rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(
+		&s->desc.tfm,
+		&s->tfm_mutex, mount_crypt_stat->global_default_fn_cipher_name);
+	if (unlikely(rc)) {
+		printk(KERN_ERR "Internal error whilst attempting to get "
+		       "tfm and mutex for cipher name [%s]; rc = [%d]\n",
+		       mount_crypt_stat->global_default_fn_cipher_name, rc);
+		goto out;
+	}
+	mutex_lock(s->tfm_mutex);
+	s->block_size = crypto_blkcipher_blocksize(s->desc.tfm);
+	/* Plus one for the \0 separator between the random prefix
+	 * and the plaintext filename */
+	s->num_rand_bytes = (ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES + 1);
+	s->block_aligned_filename_size = (s->num_rand_bytes + filename_size);
+	if ((s->block_aligned_filename_size % s->block_size) != 0) {
+		s->num_rand_bytes += (s->block_size
+				      - (s->block_aligned_filename_size
+					 % s->block_size));
+		s->block_aligned_filename_size = (s->num_rand_bytes
+						  + filename_size);
+	}
+	/* Octet 0: Tag 70 identifier
+	 * Octets 1-N1: Tag 70 packet size (includes cipher identifier
+	 *              and block-aligned encrypted filename size)
+	 * Octets N1-N2: FNEK sig (ECRYPTFS_SIG_SIZE)
+	 * Octet N2-N3: Cipher identifier (1 octet)
+	 * Octets N3-N4: Block-aligned encrypted filename
+	 *  - Consists of a minimum number of random characters, a \0
+	 *    separator, and then the filename */
+	s->max_packet_size = (1                   /* Tag 70 identifier */
+			      + 3                 /* Max Tag 70 packet size */
+			      + ECRYPTFS_SIG_SIZE /* FNEK sig */
+			      + 1                 /* Cipher identifier */
+			      + s->block_aligned_filename_size);
+	if (dest == NULL) {
+		(*packet_size) = s->max_packet_size;
+		goto out_unlock;
+	}
+	if (s->max_packet_size > (*remaining_bytes)) {
+		printk(KERN_WARNING "%s: Require [%zd] bytes to write; only "
+		       "[%zd] available\n", __func__, s->max_packet_size,
+		       (*remaining_bytes));
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+	s->block_aligned_filename = kzalloc(s->block_aligned_filename_size,
+					    GFP_KERNEL);
+	if (!s->block_aligned_filename) {
+		printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
+		       "kzalloc [%zd] bytes\n", __func__,
+		       s->block_aligned_filename_size);
+		rc = -ENOMEM;
+		goto out_unlock;
+	}
+	s->i = 0;
+	dest[s->i++] = ECRYPTFS_TAG_70_PACKET_TYPE;
+	rc = ecryptfs_write_packet_length(&dest[s->i],
+					  (ECRYPTFS_SIG_SIZE
+					   + 1 /* Cipher code */
+					   + s->block_aligned_filename_size),
+					  &s->packet_size_len);
+	if (rc) {
+		printk(KERN_ERR "%s: Error generating tag 70 packet "
+		       "header; cannot generate packet length; rc = [%d]\n",
+		       __func__, rc);
+		goto out_free_unlock;
+	}
+	s->i += s->packet_size_len;
+	ecryptfs_from_hex(&dest[s->i],
+			  mount_crypt_stat->global_default_fnek_sig,
+			  ECRYPTFS_SIG_SIZE);
+	s->i += ECRYPTFS_SIG_SIZE;
+	s->cipher_code = ecryptfs_code_for_cipher_string(
+		mount_crypt_stat->global_default_fn_cipher_name,
+		mount_crypt_stat->global_default_fn_cipher_key_bytes);
+	if (s->cipher_code == 0) {
+		printk(KERN_WARNING "%s: Unable to generate code for "
+		       "cipher [%s] with key bytes [%zd]\n", __func__,
+		       mount_crypt_stat->global_default_fn_cipher_name,
+		       mount_crypt_stat->global_default_fn_cipher_key_bytes);
+		rc = -EINVAL;
+		goto out_free_unlock;
+	}
+	dest[s->i++] = s->cipher_code;
+	rc = ecryptfs_find_auth_tok_for_sig(
+		&s->auth_tok, mount_crypt_stat,
+		mount_crypt_stat->global_default_fnek_sig);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to find auth tok for "
+		       "fnek sig [%s]; rc = [%d]\n", __func__,
+		       mount_crypt_stat->global_default_fnek_sig, rc);
+		goto out_free_unlock;
+	}
+	/* TODO: Support other key modules than passphrase for
+	 * filename encryption */
+	BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD);
+	sg_init_one(
+		&s->hash_sg,
+		(u8 *)s->auth_tok->token.password.session_key_encryption_key,
+		s->auth_tok->token.password.session_key_encryption_key_bytes);
+	s->hash_desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	s->hash_desc.tfm = crypto_alloc_hash(ECRYPTFS_TAG_70_DIGEST, 0,
+					     CRYPTO_ALG_ASYNC);
+	if (IS_ERR(s->hash_desc.tfm)) {
+			rc = PTR_ERR(s->hash_desc.tfm);
+			printk(KERN_ERR "%s: Error attempting to "
+			       "allocate hash crypto context; rc = [%d]\n",
+			       __func__, rc);
+			goto out_free_unlock;
+	}
+	rc = crypto_hash_init(&s->hash_desc);
+	if (rc) {
+		printk(KERN_ERR
+		       "%s: Error initializing crypto hash; rc = [%d]\n",
+		       __func__, rc);
+		goto out_release_free_unlock;
+	}
+	rc = crypto_hash_update(
+		&s->hash_desc, &s->hash_sg,
+		s->auth_tok->token.password.session_key_encryption_key_bytes);
+	if (rc) {
+		printk(KERN_ERR
+		       "%s: Error updating crypto hash; rc = [%d]\n",
+		       __func__, rc);
+		goto out_release_free_unlock;
+	}
+	rc = crypto_hash_final(&s->hash_desc, s->hash);
+	if (rc) {
+		printk(KERN_ERR
+		       "%s: Error finalizing crypto hash; rc = [%d]\n",
+		       __func__, rc);
+		goto out_release_free_unlock;
+	}
+	for (s->j = 0; s->j < (s->num_rand_bytes - 1); s->j++) {
+		s->block_aligned_filename[s->j] =
+			s->hash[(s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)];
+		if ((s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)
+		    == (ECRYPTFS_TAG_70_DIGEST_SIZE - 1)) {
+			sg_init_one(&s->hash_sg, (u8 *)s->hash,
+				    ECRYPTFS_TAG_70_DIGEST_SIZE);
+			rc = crypto_hash_init(&s->hash_desc);
+			if (rc) {
+				printk(KERN_ERR
+				       "%s: Error initializing crypto hash; "
+				       "rc = [%d]\n", __func__, rc);
+				goto out_release_free_unlock;
+			}
+			rc = crypto_hash_update(&s->hash_desc, &s->hash_sg,
+						ECRYPTFS_TAG_70_DIGEST_SIZE);
+			if (rc) {
+				printk(KERN_ERR
+				       "%s: Error updating crypto hash; "
+				       "rc = [%d]\n", __func__, rc);
+				goto out_release_free_unlock;
+			}
+			rc = crypto_hash_final(&s->hash_desc, s->tmp_hash);
+			if (rc) {
+				printk(KERN_ERR
+				       "%s: Error finalizing crypto hash; "
+				       "rc = [%d]\n", __func__, rc);
+				goto out_release_free_unlock;
+			}
+			memcpy(s->hash, s->tmp_hash,
+			       ECRYPTFS_TAG_70_DIGEST_SIZE);
+		}
+		if (s->block_aligned_filename[s->j] == '\0')
+			s->block_aligned_filename[s->j] = ECRYPTFS_NON_NULL;
+	}
+	memcpy(&s->block_aligned_filename[s->num_rand_bytes], filename,
+	       filename_size);
+	rc = virt_to_scatterlist(s->block_aligned_filename,
+				 s->block_aligned_filename_size, &s->src_sg, 1);
+	if (rc != 1) {
+		printk(KERN_ERR "%s: Internal error whilst attempting to "
+		       "convert filename memory to scatterlist; "
+		       "expected rc = 1; got rc = [%d]. "
+		       "block_aligned_filename_size = [%zd]\n", __func__, rc,
+		       s->block_aligned_filename_size);
+		goto out_release_free_unlock;
+	}
+	rc = virt_to_scatterlist(&dest[s->i], s->block_aligned_filename_size,
+				 &s->dst_sg, 1);
+	if (rc != 1) {
+		printk(KERN_ERR "%s: Internal error whilst attempting to "
+		       "convert encrypted filename memory to scatterlist; "
+		       "expected rc = 1; got rc = [%d]. "
+		       "block_aligned_filename_size = [%zd]\n", __func__, rc,
+		       s->block_aligned_filename_size);
+		goto out_release_free_unlock;
+	}
+	/* The characters in the first block effectively do the job
+	 * of the IV here, so we just use 0's for the IV. Note the
+	 * constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES
+	 * >= ECRYPTFS_MAX_IV_BYTES. */
+	memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
+	s->desc.info = s->iv;
+	rc = crypto_blkcipher_setkey(
+		s->desc.tfm,
+		s->auth_tok->token.password.session_key_encryption_key,
+		mount_crypt_stat->global_default_fn_cipher_key_bytes);
+	if (rc < 0) {
+		printk(KERN_ERR "%s: Error setting key for crypto context; "
+		       "rc = [%d]. s->auth_tok->token.password.session_key_"
+		       "encryption_key = [0x%p]; mount_crypt_stat->"
+		       "global_default_fn_cipher_key_bytes = [%zd]\n", __func__,
+		       rc,
+		       s->auth_tok->token.password.session_key_encryption_key,
+		       mount_crypt_stat->global_default_fn_cipher_key_bytes);
+		goto out_release_free_unlock;
+	}
+	rc = crypto_blkcipher_encrypt_iv(&s->desc, &s->dst_sg, &s->src_sg,
+					 s->block_aligned_filename_size);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to encrypt filename; "
+		       "rc = [%d]\n", __func__, rc);
+		goto out_release_free_unlock;
+	}
+	s->i += s->block_aligned_filename_size;
+	(*packet_size) = s->i;
+	(*remaining_bytes) -= (*packet_size);
+out_release_free_unlock:
+	crypto_free_hash(s->hash_desc.tfm);
+out_free_unlock:
+	memset(s->block_aligned_filename, 0, s->block_aligned_filename_size);
+	kfree(s->block_aligned_filename);
+out_unlock:
+	mutex_unlock(s->tfm_mutex);
+out:
+	kfree(s);
+	return rc;
+}
+
+struct ecryptfs_parse_tag_70_packet_silly_stack {
+	u8 cipher_code;
+	size_t max_packet_size;
+	size_t packet_size_len;
+	size_t parsed_tag_70_packet_size;
+	size_t block_aligned_filename_size;
+	size_t block_size;
+	size_t i;
+	struct mutex *tfm_mutex;
+	char *decrypted_filename;
+	struct ecryptfs_auth_tok *auth_tok;
+	struct scatterlist src_sg;
+	struct scatterlist dst_sg;
+	struct blkcipher_desc desc;
+	char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
+	char iv[ECRYPTFS_MAX_IV_BYTES];
+	char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
+};
+
+/**
+ * parse_tag_70_packet - Parse and process FNEK-encrypted passphrase packet
+ * @filename: This function kmalloc's the memory for the filename
+ * @filename_size: This function sets this to the amount of memory
+ *                 kmalloc'd for the filename
+ * @packet_size: This function sets this to the the number of octets
+ *               in the packet parsed
+ * @mount_crypt_stat: The mount-wide cryptographic context
+ * @data: The memory location containing the start of the tag 70
+ *        packet
+ * @max_packet_size: The maximum legal size of the packet to be parsed
+ *                   from @data
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int
+ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
+			     size_t *packet_size,
+			     struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+			     char *data, size_t max_packet_size)
+{
+	struct ecryptfs_parse_tag_70_packet_silly_stack *s;
+	int rc = 0;
+
+	(*packet_size) = 0;
+	(*filename_size) = 0;
+	(*filename) = NULL;
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s) {
+		printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
+		       "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
+		goto out;
+	}
+	s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	if (max_packet_size < (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)) {
+		printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
+		       "at least [%d]\n", __func__, max_packet_size,
+			(1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1));
+		rc = -EINVAL;
+		goto out;
+	}
+	/* Octet 0: Tag 70 identifier
+	 * Octets 1-N1: Tag 70 packet size (includes cipher identifier
+	 *              and block-aligned encrypted filename size)
+	 * Octets N1-N2: FNEK sig (ECRYPTFS_SIG_SIZE)
+	 * Octet N2-N3: Cipher identifier (1 octet)
+	 * Octets N3-N4: Block-aligned encrypted filename
+	 *  - Consists of a minimum number of random numbers, a \0
+	 *    separator, and then the filename */
+	if (data[(*packet_size)++] != ECRYPTFS_TAG_70_PACKET_TYPE) {
+		printk(KERN_WARNING "%s: Invalid packet tag [0x%.2x]; must be "
+		       "tag [0x%.2x]\n", __func__,
+		       data[((*packet_size) - 1)], ECRYPTFS_TAG_70_PACKET_TYPE);
+		rc = -EINVAL;
+		goto out;
+	}
+	rc = ecryptfs_parse_packet_length(&data[(*packet_size)],
+					  &s->parsed_tag_70_packet_size,
+					  &s->packet_size_len);
+	if (rc) {
+		printk(KERN_WARNING "%s: Error parsing packet length; "
+		       "rc = [%d]\n", __func__, rc);
+		goto out;
+	}
+	s->block_aligned_filename_size = (s->parsed_tag_70_packet_size
+					  - ECRYPTFS_SIG_SIZE - 1);
+	if ((1 + s->packet_size_len + s->parsed_tag_70_packet_size)
+	    > max_packet_size) {
+		printk(KERN_WARNING "%s: max_packet_size is [%zd]; real packet "
+		       "size is [%zd]\n", __func__, max_packet_size,
+		       (1 + s->packet_size_len + 1
+			+ s->block_aligned_filename_size));
+		rc = -EINVAL;
+		goto out;
+	}
+	(*packet_size) += s->packet_size_len;
+	ecryptfs_to_hex(s->fnek_sig_hex, &data[(*packet_size)],
+			ECRYPTFS_SIG_SIZE);
+	s->fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX] = '\0';
+	(*packet_size) += ECRYPTFS_SIG_SIZE;
+	s->cipher_code = data[(*packet_size)++];
+	rc = ecryptfs_cipher_code_to_string(s->cipher_string, s->cipher_code);
+	if (rc) {
+		printk(KERN_WARNING "%s: Cipher code [%d] is invalid\n",
+		       __func__, s->cipher_code);
+		goto out;
+	}
+	rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->desc.tfm,
+							&s->tfm_mutex,
+							s->cipher_string);
+	if (unlikely(rc)) {
+		printk(KERN_ERR "Internal error whilst attempting to get "
+		       "tfm and mutex for cipher name [%s]; rc = [%d]\n",
+		       s->cipher_string, rc);
+		goto out;
+	}
+	mutex_lock(s->tfm_mutex);
+	rc = virt_to_scatterlist(&data[(*packet_size)],
+				 s->block_aligned_filename_size, &s->src_sg, 1);
+	if (rc != 1) {
+		printk(KERN_ERR "%s: Internal error whilst attempting to "
+		       "convert encrypted filename memory to scatterlist; "
+		       "expected rc = 1; got rc = [%d]. "
+		       "block_aligned_filename_size = [%zd]\n", __func__, rc,
+		       s->block_aligned_filename_size);
+		goto out_unlock;
+	}
+	(*packet_size) += s->block_aligned_filename_size;
+	s->decrypted_filename = kmalloc(s->block_aligned_filename_size,
+					GFP_KERNEL);
+	if (!s->decrypted_filename) {
+		printk(KERN_ERR "%s: Out of memory whilst attempting to "
+		       "kmalloc [%zd] bytes\n", __func__,
+		       s->block_aligned_filename_size);
+		rc = -ENOMEM;
+		goto out_unlock;
+	}
+	rc = virt_to_scatterlist(s->decrypted_filename,
+				 s->block_aligned_filename_size, &s->dst_sg, 1);
+	if (rc != 1) {
+		printk(KERN_ERR "%s: Internal error whilst attempting to "
+		       "convert decrypted filename memory to scatterlist; "
+		       "expected rc = 1; got rc = [%d]. "
+		       "block_aligned_filename_size = [%zd]\n", __func__, rc,
+		       s->block_aligned_filename_size);
+		goto out_free_unlock;
+	}
+	/* The characters in the first block effectively do the job of
+	 * the IV here, so we just use 0's for the IV. Note the
+	 * constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES
+	 * >= ECRYPTFS_MAX_IV_BYTES. */
+	memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
+	s->desc.info = s->iv;
+	rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat,
+					    s->fnek_sig_hex);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to find auth tok for "
+		       "fnek sig [%s]; rc = [%d]\n", __func__, s->fnek_sig_hex,
+		       rc);
+		goto out_free_unlock;
+	}
+	/* TODO: Support other key modules than passphrase for
+	 * filename encryption */
+	BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD);
+	rc = crypto_blkcipher_setkey(
+		s->desc.tfm,
+		s->auth_tok->token.password.session_key_encryption_key,
+		mount_crypt_stat->global_default_fn_cipher_key_bytes);
+	if (rc < 0) {
+		printk(KERN_ERR "%s: Error setting key for crypto context; "
+		       "rc = [%d]. s->auth_tok->token.password.session_key_"
+		       "encryption_key = [0x%p]; mount_crypt_stat->"
+		       "global_default_fn_cipher_key_bytes = [%zd]\n", __func__,
+		       rc,
+		       s->auth_tok->token.password.session_key_encryption_key,
+		       mount_crypt_stat->global_default_fn_cipher_key_bytes);
+		goto out_free_unlock;
+	}
+	rc = crypto_blkcipher_decrypt_iv(&s->desc, &s->dst_sg, &s->src_sg,
+					 s->block_aligned_filename_size);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to decrypt filename; "
+		       "rc = [%d]\n", __func__, rc);
+		goto out_free_unlock;
+	}
+	s->i = 0;
+	while (s->decrypted_filename[s->i] != '\0'
+	       && s->i < s->block_aligned_filename_size)
+		s->i++;
+	if (s->i == s->block_aligned_filename_size) {
+		printk(KERN_WARNING "%s: Invalid tag 70 packet; could not "
+		       "find valid separator between random characters and "
+		       "the filename\n", __func__);
+		rc = -EINVAL;
+		goto out_free_unlock;
+	}
+	s->i++;
+	(*filename_size) = (s->block_aligned_filename_size - s->i);
+	if (!((*filename_size) > 0 && (*filename_size < PATH_MAX))) {
+		printk(KERN_WARNING "%s: Filename size is [%zd], which is "
+		       "invalid\n", __func__, (*filename_size));
+		rc = -EINVAL;
+		goto out_free_unlock;
+	}
+	(*filename) = kmalloc(((*filename_size) + 1), GFP_KERNEL);
+	if (!(*filename)) {
+		printk(KERN_ERR "%s: Out of memory whilst attempting to "
+		       "kmalloc [%zd] bytes\n", __func__,
+		       ((*filename_size) + 1));
+		rc = -ENOMEM;
+		goto out_free_unlock;
+	}
+	memcpy((*filename), &s->decrypted_filename[s->i], (*filename_size));
+	(*filename)[(*filename_size)] = '\0';
+out_free_unlock:
+	kfree(s->decrypted_filename);
+out_unlock:
+	mutex_unlock(s->tfm_mutex);
+out:
+	if (rc) {
+		(*packet_size) = 0;
+		(*filename_size) = 0;
+		(*filename) = NULL;
+	}
+	kfree(s);
+	return rc;
+}
+
+static int
 ecryptfs_get_auth_tok_sig(char **sig, struct ecryptfs_auth_tok *auth_tok)
 {
 	int rc = 0;
@@ -897,30 +1471,6 @@
 	return rc;
 }
 
-static int
-ecryptfs_find_global_auth_tok_for_sig(
-	struct ecryptfs_global_auth_tok **global_auth_tok,
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig)
-{
-	struct ecryptfs_global_auth_tok *walker;
-	int rc = 0;
-
-	(*global_auth_tok) = NULL;
-	mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
-	list_for_each_entry(walker,
-			    &mount_crypt_stat->global_auth_tok_list,
-			    mount_crypt_stat_list) {
-		if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) {
-			(*global_auth_tok) = walker;
-			goto out;
-		}
-	}
-	rc = -EINVAL;
-out:
-	mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
-	return rc;
-}
-
 /**
  * ecryptfs_verify_version
  * @version: The version number to confirm
@@ -990,43 +1540,6 @@
 }
 
 /**
- * ecryptfs_find_auth_tok_for_sig
- * @auth_tok: Set to the matching auth_tok; NULL if not found
- * @crypt_stat: inode crypt_stat crypto context
- * @sig: Sig of auth_tok to find
- *
- * For now, this function simply looks at the registered auth_tok's
- * linked off the mount_crypt_stat, so all the auth_toks that can be
- * used must be registered at mount time. This function could
- * potentially try a lot harder to find auth_tok's (e.g., by calling
- * out to ecryptfsd to dynamically retrieve an auth_tok object) so
- * that static registration of auth_tok's will no longer be necessary.
- *
- * Returns zero on no error; non-zero on error
- */
-static int
-ecryptfs_find_auth_tok_for_sig(
-	struct ecryptfs_auth_tok **auth_tok,
-	struct ecryptfs_crypt_stat *crypt_stat, char *sig)
-{
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
-		crypt_stat->mount_crypt_stat;
-	struct ecryptfs_global_auth_tok *global_auth_tok;
-	int rc = 0;
-
-	(*auth_tok) = NULL;
-	if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
-						  mount_crypt_stat, sig)) {
-		struct key *auth_tok_key;
-
-		rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
-						       sig);
-	} else
-		(*auth_tok) = global_auth_tok->global_auth_tok;
-	return rc;
-}
-
-/**
  * decrypt_passphrase_encrypted_session_key - Decrypt the session key with the given auth_tok.
  * @auth_tok: The passphrase authentication token to use to encrypt the FEK
  * @crypt_stat: The cryptographic context
@@ -1256,7 +1769,8 @@
 			rc = -EINVAL;
 			goto out_wipe_list;
 		}
-		ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, crypt_stat,
+		ecryptfs_find_auth_tok_for_sig(&matching_auth_tok,
+					       crypt_stat->mount_crypt_stat,
 					       candidate_auth_tok_sig);
 		if (matching_auth_tok) {
 			found_auth_tok = 1;
@@ -1336,7 +1850,9 @@
 	int rc;
 
 	rc = write_tag_66_packet(auth_tok->token.private_key.signature,
-				 ecryptfs_code_for_cipher_string(crypt_stat),
+				 ecryptfs_code_for_cipher_string(
+					 crypt_stat->cipher,
+					 crypt_stat->key_size),
 				 crypt_stat, &payload, &payload_len);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n");
@@ -1696,7 +2212,8 @@
 	dest[(*packet_size)++] = 0x04; /* version 4 */
 	/* TODO: Break from RFC2440 so that arbitrary ciphers can be
 	 * specified with strings */
-	cipher_code = ecryptfs_code_for_cipher_string(crypt_stat);
+	cipher_code = ecryptfs_code_for_cipher_string(crypt_stat->cipher,
+						      crypt_stat->key_size);
 	if (cipher_code == 0) {
 		ecryptfs_printk(KERN_WARNING, "Unable to generate code for "
 				"cipher [%s]\n", crypt_stat->cipher);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index fd63071..789cf2e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -206,7 +206,9 @@
        ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher,
        ecryptfs_opt_ecryptfs_key_bytes,
        ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
-       ecryptfs_opt_encrypted_view, ecryptfs_opt_err };
+       ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
+       ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
+       ecryptfs_opt_err };
 
 static const match_table_t tokens = {
 	{ecryptfs_opt_sig, "sig=%s"},
@@ -217,6 +219,9 @@
 	{ecryptfs_opt_passthrough, "ecryptfs_passthrough"},
 	{ecryptfs_opt_xattr_metadata, "ecryptfs_xattr_metadata"},
 	{ecryptfs_opt_encrypted_view, "ecryptfs_encrypted_view"},
+	{ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"},
+	{ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
+	{ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
 	{ecryptfs_opt_err, NULL}
 };
 
@@ -281,8 +286,11 @@
 	int rc = 0;
 	int sig_set = 0;
 	int cipher_name_set = 0;
+	int fn_cipher_name_set = 0;
 	int cipher_key_bytes;
 	int cipher_key_bytes_set = 0;
+	int fn_cipher_key_bytes;
+	int fn_cipher_key_bytes_set = 0;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
 		&ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
 	substring_t args[MAX_OPT_ARGS];
@@ -290,7 +298,12 @@
 	char *sig_src;
 	char *cipher_name_dst;
 	char *cipher_name_src;
+	char *fn_cipher_name_dst;
+	char *fn_cipher_name_src;
+	char *fnek_dst;
+	char *fnek_src;
 	char *cipher_key_bytes_src;
+	char *fn_cipher_key_bytes_src;
 
 	if (!options) {
 		rc = -EINVAL;
@@ -322,10 +335,7 @@
 				global_default_cipher_name;
 			strncpy(cipher_name_dst, cipher_name_src,
 				ECRYPTFS_MAX_CIPHER_NAME_SIZE);
-			ecryptfs_printk(KERN_DEBUG,
-					"The mount_crypt_stat "
-					"global_default_cipher_name set to: "
-					"[%s]\n", cipher_name_dst);
+			cipher_name_dst[ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0';
 			cipher_name_set = 1;
 			break;
 		case ecryptfs_opt_ecryptfs_key_bytes:
@@ -335,11 +345,6 @@
 						   &cipher_key_bytes_src, 0);
 			mount_crypt_stat->global_default_cipher_key_size =
 				cipher_key_bytes;
-			ecryptfs_printk(KERN_DEBUG,
-					"The mount_crypt_stat "
-					"global_default_cipher_key_size "
-					"set to: [%d]\n", mount_crypt_stat->
-					global_default_cipher_key_size);
 			cipher_key_bytes_set = 1;
 			break;
 		case ecryptfs_opt_passthrough:
@@ -356,11 +361,51 @@
 			mount_crypt_stat->flags |=
 				ECRYPTFS_ENCRYPTED_VIEW_ENABLED;
 			break;
+		case ecryptfs_opt_fnek_sig:
+			fnek_src = args[0].from;
+			fnek_dst =
+				mount_crypt_stat->global_default_fnek_sig;
+			strncpy(fnek_dst, fnek_src, ECRYPTFS_SIG_SIZE_HEX);
+			mount_crypt_stat->global_default_fnek_sig[
+				ECRYPTFS_SIG_SIZE_HEX] = '\0';
+			rc = ecryptfs_add_global_auth_tok(
+				mount_crypt_stat,
+				mount_crypt_stat->global_default_fnek_sig);
+			if (rc) {
+				printk(KERN_ERR "Error attempting to register "
+				       "global fnek sig [%s]; rc = [%d]\n",
+				       mount_crypt_stat->global_default_fnek_sig,
+				       rc);
+				goto out;
+			}
+			mount_crypt_stat->flags |=
+				(ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES
+				 | ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK);
+			break;
+		case ecryptfs_opt_fn_cipher:
+			fn_cipher_name_src = args[0].from;
+			fn_cipher_name_dst =
+				mount_crypt_stat->global_default_fn_cipher_name;
+			strncpy(fn_cipher_name_dst, fn_cipher_name_src,
+				ECRYPTFS_MAX_CIPHER_NAME_SIZE);
+			mount_crypt_stat->global_default_fn_cipher_name[
+				ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0';
+			fn_cipher_name_set = 1;
+			break;
+		case ecryptfs_opt_fn_cipher_key_bytes:
+			fn_cipher_key_bytes_src = args[0].from;
+			fn_cipher_key_bytes =
+				(int)simple_strtol(fn_cipher_key_bytes_src,
+						   &fn_cipher_key_bytes_src, 0);
+			mount_crypt_stat->global_default_fn_cipher_key_bytes =
+				fn_cipher_key_bytes;
+			fn_cipher_key_bytes_set = 1;
+			break;
 		case ecryptfs_opt_err:
 		default:
-			ecryptfs_printk(KERN_WARNING,
-					"eCryptfs: unrecognized option '%s'\n",
-					p);
+			printk(KERN_WARNING
+			       "%s: eCryptfs: unrecognized option [%s]\n",
+			       __func__, p);
 		}
 	}
 	if (!sig_set) {
@@ -374,33 +419,60 @@
 		int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
 
 		BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
-
 		strcpy(mount_crypt_stat->global_default_cipher_name,
 		       ECRYPTFS_DEFAULT_CIPHER);
 	}
-	if (!cipher_key_bytes_set) {
+	if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
+	    && !fn_cipher_name_set)
+		strcpy(mount_crypt_stat->global_default_fn_cipher_name,
+		       mount_crypt_stat->global_default_cipher_name);
+	if (!cipher_key_bytes_set)
 		mount_crypt_stat->global_default_cipher_key_size = 0;
-	}
+	if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
+	    && !fn_cipher_key_bytes_set)
+		mount_crypt_stat->global_default_fn_cipher_key_bytes =
+			mount_crypt_stat->global_default_cipher_key_size;
 	mutex_lock(&key_tfm_list_mutex);
 	if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
-				 NULL))
+				 NULL)) {
 		rc = ecryptfs_add_new_key_tfm(
 			NULL, mount_crypt_stat->global_default_cipher_name,
 			mount_crypt_stat->global_default_cipher_key_size);
-	mutex_unlock(&key_tfm_list_mutex);
-	if (rc) {
-		printk(KERN_ERR "Error attempting to initialize cipher with "
-		       "name = [%s] and key size = [%td]; rc = [%d]\n",
-		       mount_crypt_stat->global_default_cipher_name,
-		       mount_crypt_stat->global_default_cipher_key_size, rc);
-		rc = -EINVAL;
-		goto out;
+		if (rc) {
+			printk(KERN_ERR "Error attempting to initialize "
+			       "cipher with name = [%s] and key size = [%td]; "
+			       "rc = [%d]\n",
+			       mount_crypt_stat->global_default_cipher_name,
+			       mount_crypt_stat->global_default_cipher_key_size,
+			       rc);
+			rc = -EINVAL;
+			mutex_unlock(&key_tfm_list_mutex);
+			goto out;
+		}
 	}
+	if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
+	    && !ecryptfs_tfm_exists(
+		    mount_crypt_stat->global_default_fn_cipher_name, NULL)) {
+		rc = ecryptfs_add_new_key_tfm(
+			NULL, mount_crypt_stat->global_default_fn_cipher_name,
+			mount_crypt_stat->global_default_fn_cipher_key_bytes);
+		if (rc) {
+			printk(KERN_ERR "Error attempting to initialize "
+			       "cipher with name = [%s] and key size = [%td]; "
+			       "rc = [%d]\n",
+			       mount_crypt_stat->global_default_fn_cipher_name,
+			       mount_crypt_stat->global_default_fn_cipher_key_bytes,
+			       rc);
+			rc = -EINVAL;
+			mutex_unlock(&key_tfm_list_mutex);
+			goto out;
+		}
+	}
+	mutex_unlock(&key_tfm_list_mutex);
 	rc = ecryptfs_init_global_auth_toks(mount_crypt_stat);
-	if (rc) {
+	if (rc)
 		printk(KERN_WARNING "One or more global auth toks could not "
 		       "properly register; rc = [%d]\n", rc);
-	}
 out:
 	return rc;
 }
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 6913f72..96ef514 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -193,7 +193,7 @@
 	(*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL);
 	if (!(*daemon)) {
 		rc = -ENOMEM;
-		printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
+		printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of "
 		       "GFP_KERNEL memory\n", __func__, sizeof(**daemon));
 		goto out;
 	}
@@ -435,7 +435,7 @@
 	msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL);
 	if (!msg_ctx->msg) {
 		rc = -ENOMEM;
-		printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
+		printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of "
 		       "GFP_KERNEL memory\n", __func__, msg_size);
 		goto unlock;
 	}
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index efd95a0..a67fea6 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -199,7 +199,7 @@
 		if (!msg_ctx->msg) {
 			rc = -ENOMEM;
 			printk(KERN_ERR "%s: Out of memory whilst attempting "
-			       "to kmalloc(%Zd, GFP_KERNEL)\n", __func__,
+			       "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
 			       (sizeof(*msg_ctx->msg) + data_size));
 			goto out_unlock;
 		}
@@ -322,7 +322,7 @@
 	if (count < total_length) {
 		rc = 0;
 		printk(KERN_WARNING "%s: Only given user buffer of "
-		       "size [%Zd], but we need [%Zd] to read the "
+		       "size [%zd], but we need [%zd] to read the "
 		       "pending message\n", __func__, count, total_length);
 		goto out_unlock_msg_ctx;
 	}
@@ -376,7 +376,7 @@
 
 	if ((sizeof(*msg) + msg->data_len) != data_size) {
 		printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = "
-		       "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__,
+		       "[%zd]; data_size = [%zd]. Invalid packet.\n", __func__,
 		       (sizeof(*msg) + msg->data_len), data_size);
 		rc = -EINVAL;
 		goto out;
@@ -421,7 +421,7 @@
 	data = kmalloc(count, GFP_KERNEL);
 	if (!data) {
 		printk(KERN_ERR "%s: Out of memory whilst attempting to "
-		       "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count);
+		       "kmalloc([%zd], GFP_KERNEL)\n", __func__, count);
 		goto out;
 	}
 	rc = copy_from_user(data, buf, count);
@@ -436,8 +436,8 @@
 	case ECRYPTFS_MSG_RESPONSE:
 		if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) {
 			printk(KERN_WARNING "%s: Minimum acceptable packet "
-			       "size is [%Zd], but amount of data written is "
-			       "only [%Zd]. Discarding response packet.\n",
+			       "size is [%zd], but amount of data written is "
+			       "only [%zd]. Discarding response packet.\n",
 			       __func__,
 			       (1 + 4 + 1 + sizeof(struct ecryptfs_message)),
 			       count);
@@ -455,9 +455,9 @@
 		}
 		i += packet_size_length;
 		if ((1 + 4 + packet_size_length + packet_size) != count) {
-			printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])"
-			       " + packet_size([%Zd]))([%Zd]) != "
-			       "count([%Zd]). Invalid packet format.\n",
+			printk(KERN_WARNING "%s: (1 + packet_size_length([%zd])"
+			       " + packet_size([%zd]))([%zd]) != "
+			       "count([%zd]). Invalid packet format.\n",
 			       __func__, packet_size_length, packet_size,
 			       (1 + packet_size_length + packet_size), count);
 			goto out_free;
diff --git a/fs/exec.c b/fs/exec.c
index 9c33f54..71a6efe 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -232,13 +232,13 @@
 
 static int __bprm_mm_init(struct linux_binprm *bprm)
 {
-	int err = -ENOMEM;
+	int err;
 	struct vm_area_struct *vma = NULL;
 	struct mm_struct *mm = bprm->mm;
 
 	bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma)
-		goto err;
+		return -ENOMEM;
 
 	down_write(&mm->mmap_sem);
 	vma->vm_mm = mm;
@@ -251,28 +251,20 @@
 	 */
 	vma->vm_end = STACK_TOP_MAX;
 	vma->vm_start = vma->vm_end - PAGE_SIZE;
-
 	vma->vm_flags = VM_STACK_FLAGS;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	err = insert_vm_struct(mm, vma);
-	if (err) {
-		up_write(&mm->mmap_sem);
+	if (err)
 		goto err;
-	}
 
 	mm->stack_vm = mm->total_vm = 1;
 	up_write(&mm->mmap_sem);
-
 	bprm->p = vma->vm_end - sizeof(void *);
-
 	return 0;
-
 err:
-	if (vma) {
-		bprm->vma = NULL;
-		kmem_cache_free(vm_area_cachep, vma);
-	}
-
+	up_write(&mm->mmap_sem);
+	bprm->vma = NULL;
+	kmem_cache_free(vm_area_cachep, vma);
 	return err;
 }
 
@@ -1694,7 +1686,7 @@
 	return (ret >= 2) ? 2 : ret;
 }
 
-int do_coredump(long signr, int exit_code, struct pt_regs * regs)
+void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 {
 	struct core_state core_state;
 	char corename[CORENAME_MAX_SIZE + 1];
@@ -1778,6 +1770,11 @@
 
  	if (ispipe) {
 		helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
+		if (!helper_argv) {
+			printk(KERN_WARNING "%s failed to allocate memory\n",
+			       __func__);
+			goto fail_unlock;
+		}
 		/* Terminate the string before the first option */
 		delimit = strchr(corename, ' ');
 		if (delimit)
@@ -1845,5 +1842,5 @@
 	put_cred(cred);
 	coredump_finish(mm);
 fail:
-	return retval;
+	return;
 }
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0537c8..6c46c64 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1225,11 +1225,11 @@
 } while (0)
 
 #ifdef CONFIG_SMP
-/* Each CPU can accumulate FBC_BATCH blocks in their local
+/* Each CPU can accumulate percpu_counter_batch blocks in their local
  * counters. So we need to make sure we have free blocks more
- * than FBC_BATCH  * nr_cpu_ids. Also add a window of 4 times.
+ * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
  */
-#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
 #else
 #define EXT4_FREEBLOCKS_WATERMARK 0
 #endif
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6702a49..98d3fe7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2498,7 +2498,7 @@
 	/*
 	 * switch to non delalloc mode if we are running low
 	 * on free block. The free block accounting via percpu
-	 * counters can get slightly wrong with FBC_BATCH getting
+	 * counters can get slightly wrong with percpu_counter_batch getting
 	 * accumulated on each CPU without updating global counters
 	 * Delalloc need an accurate free block accounting. So switch
 	 * to non delalloc when we are near to error range.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d0ff0b8..e5eaa62 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -421,9 +421,6 @@
  * If we're a pdlfush thread, then implement pdflush collision avoidance
  * against the entire list.
  *
- * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
- * that it can be located for waiting on in __writeback_single_inode().
- *
  * If `bdi' is non-zero then we're being asked to writeback a specific queue.
  * This function assumes that the blockdev superblock's inodes are backed by
  * a variety of queues, so all inodes are searched.  For other superblocks,
@@ -443,6 +440,7 @@
 				struct writeback_control *wbc)
 {
 	const unsigned long start = jiffies;	/* livelock avoidance */
+	int sync = wbc->sync_mode == WB_SYNC_ALL;
 
 	spin_lock(&inode_lock);
 	if (!wbc->for_kupdate || list_empty(&sb->s_io))
@@ -499,10 +497,6 @@
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
 		__writeback_single_inode(inode, wbc);
-		if (wbc->sync_mode == WB_SYNC_HOLD) {
-			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list, &sb->s_dirty);
-		}
 		if (current_is_pdflush())
 			writeback_release(bdi);
 		if (wbc->pages_skipped != pages_skipped) {
@@ -523,7 +517,49 @@
 		if (!list_empty(&sb->s_more_io))
 			wbc->more_io = 1;
 	}
-	spin_unlock(&inode_lock);
+
+	if (sync) {
+		struct inode *inode, *old_inode = NULL;
+
+		/*
+		 * Data integrity sync. Must wait for all pages under writeback,
+		 * because there may have been pages dirtied before our sync
+		 * call, but which had writeout started before we write it out.
+		 * In which case, the inode may not be on the dirty list, but
+		 * we still have to wait for that writeout.
+		 */
+		list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+			struct address_space *mapping;
+
+			if (inode->i_state & (I_FREEING|I_WILL_FREE))
+				continue;
+			mapping = inode->i_mapping;
+			if (mapping->nrpages == 0)
+				continue;
+			__iget(inode);
+			spin_unlock(&inode_lock);
+			/*
+			 * We hold a reference to 'inode' so it couldn't have
+			 * been removed from s_inodes list while we dropped the
+			 * inode_lock.  We cannot iput the inode now as we can
+			 * be holding the last reference and we cannot iput it
+			 * under inode_lock. So we keep the reference and iput
+			 * it later.
+			 */
+			iput(old_inode);
+			old_inode = inode;
+
+			filemap_fdatawait(mapping);
+
+			cond_resched();
+
+			spin_lock(&inode_lock);
+		}
+		spin_unlock(&inode_lock);
+		iput(old_inode);
+	} else
+		spin_unlock(&inode_lock);
+
 	return;		/* Leave any unwritten inodes on s_io */
 }
 EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
@@ -588,8 +624,7 @@
 
 /*
  * writeback and wait upon the filesystem's dirty inodes.  The caller will
- * do this in two passes - one to write, and one to wait.  WB_SYNC_HOLD is
- * used to park the written inodes on sb->s_dirty for the wait pass.
+ * do this in two passes - one to write, and one to wait.
  *
  * A finite limit is set on the number of pages which will be written.
  * To prevent infinite livelock of sys_sync().
@@ -600,32 +635,23 @@
 void sync_inodes_sb(struct super_block *sb, int wait)
 {
 	struct writeback_control wbc = {
-		.sync_mode	= wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
+		.sync_mode	= wait ? WB_SYNC_ALL : WB_SYNC_NONE,
 		.range_start	= 0,
 		.range_end	= LLONG_MAX,
 	};
-	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
-	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
 
-	wbc.nr_to_write = nr_dirty + nr_unstable +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused) +
-			nr_dirty + nr_unstable;
-	wbc.nr_to_write += wbc.nr_to_write / 2;		/* Bit more for luck */
+	if (!wait) {
+		unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+		unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+
+		wbc.nr_to_write = nr_dirty + nr_unstable +
+			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+	} else
+		wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
+
 	sync_sb_inodes(sb, &wbc);
 }
 
-/*
- * Rather lame livelock avoidance.
- */
-static void set_sb_syncing(int val)
-{
-	struct super_block *sb;
-	spin_lock(&sb_lock);
-	list_for_each_entry_reverse(sb, &super_blocks, s_list)
-		sb->s_syncing = val;
-	spin_unlock(&sb_lock);
-}
-
 /**
  * sync_inodes - writes all inodes to disk
  * @wait: wait for completion
@@ -652,9 +678,6 @@
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (sb->s_syncing)
-			continue;
-		sb->s_syncing = 1;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
@@ -672,13 +695,10 @@
 
 void sync_inodes(int wait)
 {
-	set_sb_syncing(0);
 	__sync_inodes(0);
 
-	if (wait) {
-		set_sb_syncing(0);
+	if (wait)
 		__sync_inodes(1);
-	}
 }
 
 /**
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 4f3cab3..99c99df 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -1,6 +1,6 @@
 /*
   FUSE: Filesystem in Userspace
-  Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
+  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
 
   This program can be distributed under the terms of the GNU GPL.
   See the file COPYING.
@@ -48,11 +48,13 @@
 	size_t size;
 
 	if (!*ppos) {
+		long value;
 		struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
 		if (!fc)
 			return 0;
 
-		file->private_data=(void *)(long)atomic_read(&fc->num_waiting);
+		value = atomic_read(&fc->num_waiting);
+		file->private_data = (void *)value;
 		fuse_conn_put(fc);
 	}
 	size = sprintf(tmp, "%ld\n", (long)file->private_data);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index fba5716..e0c7ada 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1,6 +1,6 @@
 /*
   FUSE: Filesystem in Userspace
-  Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
+  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
 
   This program can be distributed under the terms of the GNU GPL.
   See the file COPYING.
@@ -269,7 +269,7 @@
  * Called with fc->lock, unlocks it
  */
 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
-	__releases(fc->lock)
+__releases(&fc->lock)
 {
 	void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
 	req->end = NULL;
@@ -293,13 +293,13 @@
 	wake_up(&req->waitq);
 	if (end)
 		end(fc, req);
-	else
-		fuse_put_request(fc, req);
+	fuse_put_request(fc, req);
 }
 
 static void wait_answer_interruptible(struct fuse_conn *fc,
 				      struct fuse_req *req)
-	__releases(fc->lock) __acquires(fc->lock)
+__releases(&fc->lock)
+__acquires(&fc->lock)
 {
 	if (signal_pending(current))
 		return;
@@ -317,7 +317,8 @@
 }
 
 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
-	__releases(fc->lock) __acquires(fc->lock)
+__releases(&fc->lock)
+__acquires(&fc->lock)
 {
 	if (!fc->no_interrupt) {
 		/* Any signal may interrupt this */
@@ -380,7 +381,7 @@
 	}
 }
 
-void request_send(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 {
 	req->isreply = 1;
 	spin_lock(&fc->lock);
@@ -399,8 +400,8 @@
 	spin_unlock(&fc->lock);
 }
 
-static void request_send_nowait_locked(struct fuse_conn *fc,
-				       struct fuse_req *req)
+static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
+					    struct fuse_req *req)
 {
 	req->background = 1;
 	fc->num_background++;
@@ -414,11 +415,11 @@
 	flush_bg_queue(fc);
 }
 
-static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
 {
 	spin_lock(&fc->lock);
 	if (fc->connected) {
-		request_send_nowait_locked(fc, req);
+		fuse_request_send_nowait_locked(fc, req);
 		spin_unlock(&fc->lock);
 	} else {
 		req->out.h.error = -ENOTCONN;
@@ -426,16 +427,16 @@
 	}
 }
 
-void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
 {
 	req->isreply = 0;
-	request_send_nowait(fc, req);
+	fuse_request_send_nowait(fc, req);
 }
 
-void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 {
 	req->isreply = 1;
-	request_send_nowait(fc, req);
+	fuse_request_send_nowait(fc, req);
 }
 
 /*
@@ -443,10 +444,11 @@
  *
  * fc->connected must have been checked previously
  */
-void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req)
+void fuse_request_send_background_locked(struct fuse_conn *fc,
+					 struct fuse_req *req)
 {
 	req->isreply = 1;
-	request_send_nowait_locked(fc, req);
+	fuse_request_send_nowait_locked(fc, req);
 }
 
 /*
@@ -539,8 +541,8 @@
 		BUG_ON(!cs->nr_segs);
 		cs->seglen = cs->iov[0].iov_len;
 		cs->addr = (unsigned long) cs->iov[0].iov_base;
-		cs->iov ++;
-		cs->nr_segs --;
+		cs->iov++;
+		cs->nr_segs--;
 	}
 	down_read(&current->mm->mmap_sem);
 	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
@@ -589,9 +591,11 @@
 		kunmap_atomic(mapaddr, KM_USER1);
 	}
 	while (count) {
-		int err;
-		if (!cs->len && (err = fuse_copy_fill(cs)))
-			return err;
+		if (!cs->len) {
+			int err = fuse_copy_fill(cs);
+			if (err)
+				return err;
+		}
 		if (page) {
 			void *mapaddr = kmap_atomic(page, KM_USER1);
 			void *buf = mapaddr + offset;
@@ -631,9 +635,11 @@
 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
 {
 	while (size) {
-		int err;
-		if (!cs->len && (err = fuse_copy_fill(cs)))
-			return err;
+		if (!cs->len) {
+			int err = fuse_copy_fill(cs);
+			if (err)
+				return err;
+		}
 		fuse_copy_do(cs, &val, &size);
 	}
 	return 0;
@@ -664,6 +670,8 @@
 
 /* Wait until a request is available on the pending list */
 static void request_wait(struct fuse_conn *fc)
+__releases(&fc->lock)
+__acquires(&fc->lock)
 {
 	DECLARE_WAITQUEUE(wait, current);
 
@@ -691,7 +699,7 @@
  */
 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
 			       const struct iovec *iov, unsigned long nr_segs)
-	__releases(fc->lock)
+__releases(&fc->lock)
 {
 	struct fuse_copy_state cs;
 	struct fuse_in_header ih;
@@ -813,6 +821,34 @@
 	return err;
 }
 
+static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
+			    struct fuse_copy_state *cs)
+{
+	struct fuse_notify_poll_wakeup_out outarg;
+	int err;
+
+	if (size != sizeof(outarg))
+		return -EINVAL;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		return err;
+
+	return fuse_notify_poll_wakeup(fc, &outarg);
+}
+
+static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
+		       unsigned int size, struct fuse_copy_state *cs)
+{
+	switch (code) {
+	case FUSE_NOTIFY_POLL:
+		return fuse_notify_poll(fc, size, cs);
+
+	default:
+		return -EINVAL;
+	}
+}
+
 /* Look up request on processing list by unique ID */
 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
 {
@@ -876,9 +912,23 @@
 	err = fuse_copy_one(&cs, &oh, sizeof(oh));
 	if (err)
 		goto err_finish;
+
 	err = -EINVAL;
-	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
-	    oh.len != nbytes)
+	if (oh.len != nbytes)
+		goto err_finish;
+
+	/*
+	 * Zero oh.unique indicates unsolicited notification message
+	 * and error contains notification code.
+	 */
+	if (!oh.unique) {
+		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs);
+		fuse_copy_finish(&cs);
+		return err ? err : nbytes;
+	}
+
+	err = -EINVAL;
+	if (oh.error <= -1000 || oh.error > 0)
 		goto err_finish;
 
 	spin_lock(&fc->lock);
@@ -966,6 +1016,8 @@
  * This function releases and reacquires fc->lock
  */
 static void end_requests(struct fuse_conn *fc, struct list_head *head)
+__releases(&fc->lock)
+__acquires(&fc->lock)
 {
 	while (!list_empty(head)) {
 		struct fuse_req *req;
@@ -988,7 +1040,8 @@
  * locked).
  */
 static void end_io_requests(struct fuse_conn *fc)
-	__releases(fc->lock) __acquires(fc->lock)
+__releases(&fc->lock)
+__acquires(&fc->lock)
 {
 	while (!list_empty(&fc->io)) {
 		struct fuse_req *req =
@@ -1002,11 +1055,11 @@
 		wake_up(&req->waitq);
 		if (end) {
 			req->end = NULL;
-			/* The end function will consume this reference */
 			__fuse_get_request(req);
 			spin_unlock(&fc->lock);
 			wait_event(req->waitq, !req->locked);
 			end(fc, req);
+			fuse_put_request(fc, req);
 			spin_lock(&fc->lock);
 		}
 	}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 95bc22b..fdff346 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1,6 +1,6 @@
 /*
   FUSE: Filesystem in Userspace
-  Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
+  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
 
   This program can be distributed under the terms of the GNU GPL.
   See the file COPYING.
@@ -189,7 +189,7 @@
 		parent = dget_parent(entry);
 		fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
 				 &entry->d_name, &outarg);
-		request_send(fc, req);
+		fuse_request_send(fc, req);
 		dput(parent);
 		err = req->out.h.error;
 		fuse_put_request(fc, req);
@@ -204,7 +204,7 @@
 				return 0;
 			}
 			spin_lock(&fc->lock);
-			fi->nlookup ++;
+			fi->nlookup++;
 			spin_unlock(&fc->lock);
 		}
 		fuse_put_request(fc, forget_req);
@@ -283,7 +283,7 @@
 	attr_version = fuse_get_attr_version(fc);
 
 	fuse_lookup_init(fc, req, nodeid, name, outarg);
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
@@ -369,7 +369,7 @@
 {
 	fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
 	ff->reserved_req->force = 1;
-	request_send(fc, ff->reserved_req);
+	fuse_request_send(fc, ff->reserved_req);
 	fuse_put_request(fc, ff->reserved_req);
 	kfree(ff);
 }
@@ -408,7 +408,7 @@
 		goto out_put_forget_req;
 
 	err = -ENOMEM;
-	ff = fuse_file_alloc();
+	ff = fuse_file_alloc(fc);
 	if (!ff)
 		goto out_put_request;
 
@@ -432,7 +432,7 @@
 	req->out.args[0].value = &outentry;
 	req->out.args[1].size = sizeof(outopen);
 	req->out.args[1].value = &outopen;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	if (err) {
 		if (err == -ENOSYS)
@@ -502,7 +502,7 @@
 	else
 		req->out.args[0].size = sizeof(outarg);
 	req->out.args[0].value = &outarg;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err)
@@ -631,15 +631,17 @@
 	req->in.numargs = 1;
 	req->in.args[0].size = entry->d_name.len + 1;
 	req->in.args[0].value = entry->d_name.name;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err) {
 		struct inode *inode = entry->d_inode;
 
-		/* Set nlink to zero so the inode can be cleared, if
-                   the inode does have more links this will be
-                   discovered at the next lookup/getattr */
+		/*
+		 * Set nlink to zero so the inode can be cleared, if the inode
+		 * does have more links this will be discovered at the next
+		 * lookup/getattr.
+		 */
 		clear_nlink(inode);
 		fuse_invalidate_attr(inode);
 		fuse_invalidate_attr(dir);
@@ -662,7 +664,7 @@
 	req->in.numargs = 1;
 	req->in.args[0].size = entry->d_name.len + 1;
 	req->in.args[0].value = entry->d_name.name;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err) {
@@ -695,7 +697,7 @@
 	req->in.args[1].value = oldent->d_name.name;
 	req->in.args[2].size = newent->d_name.len + 1;
 	req->in.args[2].value = newent->d_name.name;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err) {
@@ -811,7 +813,7 @@
 	else
 		req->out.args[0].size = sizeof(outarg);
 	req->out.args[0].value = &outarg;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err) {
@@ -911,7 +913,7 @@
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err == -ENOSYS) {
@@ -1033,7 +1035,7 @@
 	req->num_pages = 1;
 	req->pages[0] = page;
 	fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	nbytes = req->out.args[0].size;
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
@@ -1067,7 +1069,7 @@
 	req->out.numargs = 1;
 	req->out.args[0].size = PAGE_SIZE - 1;
 	req->out.args[0].value = link;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	if (req->out.h.error) {
 		free_page((unsigned long) link);
 		link = ERR_PTR(req->out.h.error);
@@ -1273,7 +1275,7 @@
 	else
 		req->out.args[0].size = sizeof(outarg);
 	req->out.args[0].value = &outarg;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err) {
@@ -1367,7 +1369,7 @@
 	req->in.args[1].value = name;
 	req->in.args[2].size = size;
 	req->in.args[2].value = value;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err == -ENOSYS) {
@@ -1413,7 +1415,7 @@
 		req->out.args[0].size = sizeof(outarg);
 		req->out.args[0].value = &outarg;
 	}
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	ret = req->out.h.error;
 	if (!ret)
 		ret = size ? req->out.args[0].size : outarg.size;
@@ -1463,7 +1465,7 @@
 		req->out.args[0].size = sizeof(outarg);
 		req->out.args[0].value = &outarg;
 	}
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	ret = req->out.h.error;
 	if (!ret)
 		ret = size ? req->out.args[0].size : outarg.size;
@@ -1496,7 +1498,7 @@
 	req->in.numargs = 1;
 	req->in.args[0].size = strlen(name) + 1;
 	req->in.args[0].value = name;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err == -ENOSYS) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4c9ee70..e816264 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1,6 +1,6 @@
 /*
   FUSE: Filesystem in Userspace
-  Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
+  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
 
   This program can be distributed under the terms of the GNU GPL.
   See the file COPYING.
@@ -39,14 +39,14 @@
 	req->out.numargs = 1;
 	req->out.args[0].size = sizeof(*outargp);
 	req->out.args[0].value = outargp;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 
 	return err;
 }
 
-struct fuse_file *fuse_file_alloc(void)
+struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 {
 	struct fuse_file *ff;
 	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
@@ -58,7 +58,12 @@
 		} else {
 			INIT_LIST_HEAD(&ff->write_entry);
 			atomic_set(&ff->count, 0);
+			spin_lock(&fc->lock);
+			ff->kh = ++fc->khctr;
+			spin_unlock(&fc->lock);
 		}
+		RB_CLEAR_NODE(&ff->polled_node);
+		init_waitqueue_head(&ff->poll_wait);
 	}
 	return ff;
 }
@@ -79,7 +84,6 @@
 {
 	dput(req->misc.release.dentry);
 	mntput(req->misc.release.vfsmount);
-	fuse_put_request(fc, req);
 }
 
 static void fuse_file_put(struct fuse_file *ff)
@@ -89,7 +93,7 @@
 		struct inode *inode = req->misc.release.dentry->d_inode;
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		req->end = fuse_release_end;
-		request_send_background(fc, req);
+		fuse_request_send_background(fc, req);
 		kfree(ff);
 	}
 }
@@ -109,6 +113,7 @@
 
 int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 {
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_open_out outarg;
 	struct fuse_file *ff;
 	int err;
@@ -121,7 +126,7 @@
 	if (err)
 		return err;
 
-	ff = fuse_file_alloc();
+	ff = fuse_file_alloc(fc);
 	if (!ff)
 		return -ENOMEM;
 
@@ -167,7 +172,11 @@
 
 		spin_lock(&fc->lock);
 		list_del(&ff->write_entry);
+		if (!RB_EMPTY_NODE(&ff->polled_node))
+			rb_erase(&ff->polled_node, &fc->polled_files);
 		spin_unlock(&fc->lock);
+
+		wake_up_interruptible_sync(&ff->poll_wait);
 		/*
 		 * Normally this will send the RELEASE request,
 		 * however if some asynchronous READ or WRITE requests
@@ -280,7 +289,7 @@
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
 	req->force = 1;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err == -ENOSYS) {
@@ -344,7 +353,7 @@
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err == -ENOSYS) {
@@ -396,7 +405,7 @@
 		inarg->read_flags |= FUSE_READ_LOCKOWNER;
 		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
 	}
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	return req->out.args[0].size;
 }
 
@@ -493,7 +502,6 @@
 	}
 	if (req->ff)
 		fuse_file_put(req->ff);
-	fuse_put_request(fc, req);
 }
 
 static void fuse_send_readpages(struct fuse_req *req, struct file *file,
@@ -509,10 +517,11 @@
 		struct fuse_file *ff = file->private_data;
 		req->ff = fuse_file_get(ff);
 		req->end = fuse_readpages_end;
-		request_send_background(fc, req);
+		fuse_request_send_background(fc, req);
 	} else {
-		request_send(fc, req);
+		fuse_request_send(fc, req);
 		fuse_readpages_end(fc, req);
+		fuse_put_request(fc, req);
 	}
 }
 
@@ -543,7 +552,7 @@
 		}
 	}
 	req->pages[req->num_pages] = page;
-	req->num_pages ++;
+	req->num_pages++;
 	return 0;
 }
 
@@ -636,7 +645,7 @@
 		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
 		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
 	}
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	return req->misc.write.out.size;
 }
 
@@ -1042,7 +1051,6 @@
 {
 	__free_page(req->pages[0]);
 	fuse_file_put(req->ff);
-	fuse_put_request(fc, req);
 }
 
 static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1060,6 +1068,8 @@
 
 /* Called under fc->lock, may release and reacquire it */
 static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
+__releases(&fc->lock)
+__acquires(&fc->lock)
 {
 	struct fuse_inode *fi = get_fuse_inode(req->inode);
 	loff_t size = i_size_read(req->inode);
@@ -1079,13 +1089,14 @@
 
 	req->in.args[1].size = inarg->size;
 	fi->writectr++;
-	request_send_background_locked(fc, req);
+	fuse_request_send_background_locked(fc, req);
 	return;
 
  out_free:
 	fuse_writepage_finish(fc, req);
 	spin_unlock(&fc->lock);
 	fuse_writepage_free(fc, req);
+	fuse_put_request(fc, req);
 	spin_lock(&fc->lock);
 }
 
@@ -1096,6 +1107,8 @@
  * Called with fc->lock
  */
 void fuse_flush_writepages(struct inode *inode)
+__releases(&fc->lock)
+__acquires(&fc->lock)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
@@ -1325,7 +1338,7 @@
 	req->out.numargs = 1;
 	req->out.args[0].size = sizeof(outarg);
 	req->out.args[0].value = &outarg;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err)
@@ -1357,7 +1370,7 @@
 		return PTR_ERR(req);
 
 	fuse_lk_fill(req, file, fl, opcode, pid, flock);
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	/* locking is restartable */
 	if (err == -EINTR)
@@ -1433,7 +1446,7 @@
 	req->out.numargs = 1;
 	req->out.args[0].size = sizeof(outarg);
 	req->out.args[0].value = &outarg;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (err == -ENOSYS)
@@ -1470,6 +1483,406 @@
 	return retval;
 }
 
+static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
+			unsigned int nr_segs, size_t bytes, bool to_user)
+{
+	struct iov_iter ii;
+	int page_idx = 0;
+
+	if (!bytes)
+		return 0;
+
+	iov_iter_init(&ii, iov, nr_segs, bytes, 0);
+
+	while (iov_iter_count(&ii)) {
+		struct page *page = pages[page_idx++];
+		size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
+		void *kaddr, *map;
+
+		kaddr = map = kmap(page);
+
+		while (todo) {
+			char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
+			size_t iov_len = ii.iov->iov_len - ii.iov_offset;
+			size_t copy = min(todo, iov_len);
+			size_t left;
+
+			if (!to_user)
+				left = copy_from_user(kaddr, uaddr, copy);
+			else
+				left = copy_to_user(uaddr, kaddr, copy);
+
+			if (unlikely(left))
+				return -EFAULT;
+
+			iov_iter_advance(&ii, copy);
+			todo -= copy;
+			kaddr += copy;
+		}
+
+		kunmap(map);
+	}
+
+	return 0;
+}
+
+/*
+ * For ioctls, there is no generic way to determine how much memory
+ * needs to be read and/or written.  Furthermore, ioctls are allowed
+ * to dereference the passed pointer, so the parameter requires deep
+ * copying but FUSE has no idea whatsoever about what to copy in or
+ * out.
+ *
+ * This is solved by allowing FUSE server to retry ioctl with
+ * necessary in/out iovecs.  Let's assume the ioctl implementation
+ * needs to read in the following structure.
+ *
+ * struct a {
+ *	char	*buf;
+ *	size_t	buflen;
+ * }
+ *
+ * On the first callout to FUSE server, inarg->in_size and
+ * inarg->out_size will be NULL; then, the server completes the ioctl
+ * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and
+ * the actual iov array to
+ *
+ * { { .iov_base = inarg.arg,	.iov_len = sizeof(struct a) } }
+ *
+ * which tells FUSE to copy in the requested area and retry the ioctl.
+ * On the second round, the server has access to the structure and
+ * from that it can tell what to look for next, so on the invocation,
+ * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to
+ *
+ * { { .iov_base = inarg.arg,	.iov_len = sizeof(struct a)	},
+ *   { .iov_base = a.buf,	.iov_len = a.buflen		} }
+ *
+ * FUSE will copy both struct a and the pointed buffer from the
+ * process doing the ioctl and retry ioctl with both struct a and the
+ * buffer.
+ *
+ * This time, FUSE server has everything it needs and completes ioctl
+ * without FUSE_IOCTL_RETRY which finishes the ioctl call.
+ *
+ * Copying data out works the same way.
+ *
+ * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel
+ * automatically initializes in and out iovs by decoding @cmd with
+ * _IOC_* macros and the server is not allowed to request RETRY.  This
+ * limits ioctl data transfers to well-formed ioctls and is the forced
+ * behavior for all FUSE servers.
+ */
+static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
+			       unsigned long arg, unsigned int flags)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_ioctl_in inarg = {
+		.fh = ff->fh,
+		.cmd = cmd,
+		.arg = arg,
+		.flags = flags
+	};
+	struct fuse_ioctl_out outarg;
+	struct fuse_req *req = NULL;
+	struct page **pages = NULL;
+	struct page *iov_page = NULL;
+	struct iovec *in_iov = NULL, *out_iov = NULL;
+	unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
+	size_t in_size, out_size, transferred;
+	int err;
+
+	/* assume all the iovs returned by client always fits in a page */
+	BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
+
+	if (!fuse_allow_task(fc, current))
+		return -EACCES;
+
+	err = -EIO;
+	if (is_bad_inode(inode))
+		goto out;
+
+	err = -ENOMEM;
+	pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
+	iov_page = alloc_page(GFP_KERNEL);
+	if (!pages || !iov_page)
+		goto out;
+
+	/*
+	 * If restricted, initialize IO parameters as encoded in @cmd.
+	 * RETRY from server is not allowed.
+	 */
+	if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
+		struct iovec *iov = page_address(iov_page);
+
+		iov->iov_base = (void __user *)arg;
+		iov->iov_len = _IOC_SIZE(cmd);
+
+		if (_IOC_DIR(cmd) & _IOC_WRITE) {
+			in_iov = iov;
+			in_iovs = 1;
+		}
+
+		if (_IOC_DIR(cmd) & _IOC_READ) {
+			out_iov = iov;
+			out_iovs = 1;
+		}
+	}
+
+ retry:
+	inarg.in_size = in_size = iov_length(in_iov, in_iovs);
+	inarg.out_size = out_size = iov_length(out_iov, out_iovs);
+
+	/*
+	 * Out data can be used either for actual out data or iovs,
+	 * make sure there always is at least one page.
+	 */
+	out_size = max_t(size_t, out_size, PAGE_SIZE);
+	max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE);
+
+	/* make sure there are enough buffer pages and init request with them */
+	err = -ENOMEM;
+	if (max_pages > FUSE_MAX_PAGES_PER_REQ)
+		goto out;
+	while (num_pages < max_pages) {
+		pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+		if (!pages[num_pages])
+			goto out;
+		num_pages++;
+	}
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		req = NULL;
+		goto out;
+	}
+	memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
+	req->num_pages = num_pages;
+
+	/* okay, let's send it to the client */
+	req->in.h.opcode = FUSE_IOCTL;
+	req->in.h.nodeid = get_node_id(inode);
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	if (in_size) {
+		req->in.numargs++;
+		req->in.args[1].size = in_size;
+		req->in.argpages = 1;
+
+		err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size,
+					   false);
+		if (err)
+			goto out;
+	}
+
+	req->out.numargs = 2;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	req->out.args[1].size = out_size;
+	req->out.argpages = 1;
+	req->out.argvar = 1;
+
+	fuse_request_send(fc, req);
+	err = req->out.h.error;
+	transferred = req->out.args[1].size;
+	fuse_put_request(fc, req);
+	req = NULL;
+	if (err)
+		goto out;
+
+	/* did it ask for retry? */
+	if (outarg.flags & FUSE_IOCTL_RETRY) {
+		char *vaddr;
+
+		/* no retry if in restricted mode */
+		err = -EIO;
+		if (!(flags & FUSE_IOCTL_UNRESTRICTED))
+			goto out;
+
+		in_iovs = outarg.in_iovs;
+		out_iovs = outarg.out_iovs;
+
+		/*
+		 * Make sure things are in boundary, separate checks
+		 * are to protect against overflow.
+		 */
+		err = -ENOMEM;
+		if (in_iovs > FUSE_IOCTL_MAX_IOV ||
+		    out_iovs > FUSE_IOCTL_MAX_IOV ||
+		    in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
+			goto out;
+
+		err = -EIO;
+		if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
+			goto out;
+
+		/* okay, copy in iovs and retry */
+		vaddr = kmap_atomic(pages[0], KM_USER0);
+		memcpy(page_address(iov_page), vaddr, transferred);
+		kunmap_atomic(vaddr, KM_USER0);
+
+		in_iov = page_address(iov_page);
+		out_iov = in_iov + in_iovs;
+
+		goto retry;
+	}
+
+	err = -EIO;
+	if (transferred > inarg.out_size)
+		goto out;
+
+	err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true);
+ out:
+	if (req)
+		fuse_put_request(fc, req);
+	if (iov_page)
+		__free_page(iov_page);
+	while (num_pages)
+		__free_page(pages[--num_pages]);
+	kfree(pages);
+
+	return err ? err : outarg.result;
+}
+
+static long fuse_file_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	return fuse_file_do_ioctl(file, cmd, arg, 0);
+}
+
+static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+				   unsigned long arg)
+{
+	return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
+}
+
+/*
+ * All files which have been polled are linked to RB tree
+ * fuse_conn->polled_files which is indexed by kh.  Walk the tree and
+ * find the matching one.
+ */
+static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh,
+					      struct rb_node **parent_out)
+{
+	struct rb_node **link = &fc->polled_files.rb_node;
+	struct rb_node *last = NULL;
+
+	while (*link) {
+		struct fuse_file *ff;
+
+		last = *link;
+		ff = rb_entry(last, struct fuse_file, polled_node);
+
+		if (kh < ff->kh)
+			link = &last->rb_left;
+		else if (kh > ff->kh)
+			link = &last->rb_right;
+		else
+			return link;
+	}
+
+	if (parent_out)
+		*parent_out = last;
+	return link;
+}
+
+/*
+ * The file is about to be polled.  Make sure it's on the polled_files
+ * RB tree.  Note that files once added to the polled_files tree are
+ * not removed before the file is released.  This is because a file
+ * polled once is likely to be polled again.
+ */
+static void fuse_register_polled_file(struct fuse_conn *fc,
+				      struct fuse_file *ff)
+{
+	spin_lock(&fc->lock);
+	if (RB_EMPTY_NODE(&ff->polled_node)) {
+		struct rb_node **link, *parent;
+
+		link = fuse_find_polled_node(fc, ff->kh, &parent);
+		BUG_ON(*link);
+		rb_link_node(&ff->polled_node, parent, link);
+		rb_insert_color(&ff->polled_node, &fc->polled_files);
+	}
+	spin_unlock(&fc->lock);
+}
+
+static unsigned fuse_file_poll(struct file *file, poll_table *wait)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
+	struct fuse_poll_out outarg;
+	struct fuse_req *req;
+	int err;
+
+	if (fc->no_poll)
+		return DEFAULT_POLLMASK;
+
+	poll_wait(file, &ff->poll_wait, wait);
+
+	/*
+	 * Ask for notification iff there's someone waiting for it.
+	 * The client may ignore the flag and always notify.
+	 */
+	if (waitqueue_active(&ff->poll_wait)) {
+		inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
+		fuse_register_polled_file(fc, ff);
+	}
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->in.h.opcode = FUSE_POLL;
+	req->in.h.nodeid = get_node_id(inode);
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	fuse_request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+
+	if (!err)
+		return outarg.revents;
+	if (err == -ENOSYS) {
+		fc->no_poll = 1;
+		return DEFAULT_POLLMASK;
+	}
+	return POLLERR;
+}
+
+/*
+ * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
+ * wakes up the poll waiters.
+ */
+int fuse_notify_poll_wakeup(struct fuse_conn *fc,
+			    struct fuse_notify_poll_wakeup_out *outarg)
+{
+	u64 kh = outarg->kh;
+	struct rb_node **link;
+
+	spin_lock(&fc->lock);
+
+	link = fuse_find_polled_node(fc, kh, NULL);
+	if (*link) {
+		struct fuse_file *ff;
+
+		ff = rb_entry(*link, struct fuse_file, polled_node);
+		wake_up_interruptible_sync(&ff->poll_wait);
+	}
+
+	spin_unlock(&fc->lock);
+	return 0;
+}
+
 static const struct file_operations fuse_file_operations = {
 	.llseek		= fuse_file_llseek,
 	.read		= do_sync_read,
@@ -1484,6 +1897,9 @@
 	.lock		= fuse_file_lock,
 	.flock		= fuse_file_flock,
 	.splice_read	= generic_file_splice_read,
+	.unlocked_ioctl	= fuse_file_ioctl,
+	.compat_ioctl	= fuse_file_compat_ioctl,
+	.poll		= fuse_file_poll,
 };
 
 static const struct file_operations fuse_direct_io_file_operations = {
@@ -1496,6 +1912,9 @@
 	.fsync		= fuse_fsync,
 	.lock		= fuse_file_lock,
 	.flock		= fuse_file_flock,
+	.unlocked_ioctl	= fuse_file_ioctl,
+	.compat_ioctl	= fuse_file_compat_ioctl,
+	.poll		= fuse_file_poll,
 	/* no mmap and splice_read */
 };
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 35accfd..5e64b81 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1,6 +1,6 @@
 /*
   FUSE: Filesystem in Userspace
-  Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
+  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
 
   This program can be distributed under the terms of the GNU GPL.
   See the file COPYING.
@@ -19,6 +19,8 @@
 #include <linux/backing-dev.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/poll.h>
 
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -100,6 +102,9 @@
 	/** Request reserved for flush and release */
 	struct fuse_req *reserved_req;
 
+	/** Kernel file handle guaranteed to be unique */
+	u64 kh;
+
 	/** File handle used by userspace */
 	u64 fh;
 
@@ -108,6 +113,12 @@
 
 	/** Entry on inode's write_files list */
 	struct list_head write_entry;
+
+	/** RB node to be linked on fuse_conn->polled_files */
+	struct rb_node polled_node;
+
+	/** Wait queue head for poll */
+	wait_queue_head_t poll_wait;
 };
 
 /** One input argument of a request */
@@ -322,6 +333,12 @@
 	/** The list of requests under I/O */
 	struct list_head io;
 
+	/** The next unique kernel file handle */
+	u64 khctr;
+
+	/** rbtree of fuse_files waiting for poll events indexed by ph */
+	struct rb_root polled_files;
+
 	/** Number of requests currently in the background */
 	unsigned num_background;
 
@@ -355,19 +372,19 @@
 	/** Connection failed (version mismatch).  Cannot race with
 	    setting other bitfields since it is only set once in INIT
 	    reply, before any other request, and never cleared */
-	unsigned conn_error : 1;
+	unsigned conn_error:1;
 
 	/** Connection successful.  Only set in INIT */
-	unsigned conn_init : 1;
+	unsigned conn_init:1;
 
 	/** Do readpages asynchronously?  Only set in INIT */
-	unsigned async_read : 1;
+	unsigned async_read:1;
 
 	/** Do not send separate SETATTR request before open(O_TRUNC)  */
-	unsigned atomic_o_trunc : 1;
+	unsigned atomic_o_trunc:1;
 
 	/** Filesystem supports NFS exporting.  Only set in INIT */
-	unsigned export_support : 1;
+	unsigned export_support:1;
 
 	/*
 	 * The following bitfields are only for optimization purposes
@@ -375,43 +392,46 @@
 	 */
 
 	/** Is fsync not implemented by fs? */
-	unsigned no_fsync : 1;
+	unsigned no_fsync:1;
 
 	/** Is fsyncdir not implemented by fs? */
-	unsigned no_fsyncdir : 1;
+	unsigned no_fsyncdir:1;
 
 	/** Is flush not implemented by fs? */
-	unsigned no_flush : 1;
+	unsigned no_flush:1;
 
 	/** Is setxattr not implemented by fs? */
-	unsigned no_setxattr : 1;
+	unsigned no_setxattr:1;
 
 	/** Is getxattr not implemented by fs? */
-	unsigned no_getxattr : 1;
+	unsigned no_getxattr:1;
 
 	/** Is listxattr not implemented by fs? */
-	unsigned no_listxattr : 1;
+	unsigned no_listxattr:1;
 
 	/** Is removexattr not implemented by fs? */
-	unsigned no_removexattr : 1;
+	unsigned no_removexattr:1;
 
 	/** Are file locking primitives not implemented by fs? */
-	unsigned no_lock : 1;
+	unsigned no_lock:1;
 
 	/** Is access not implemented by fs? */
-	unsigned no_access : 1;
+	unsigned no_access:1;
 
 	/** Is create not implemented by fs? */
-	unsigned no_create : 1;
+	unsigned no_create:1;
 
 	/** Is interrupt not implemented by fs? */
-	unsigned no_interrupt : 1;
+	unsigned no_interrupt:1;
 
 	/** Is bmap not implemented by fs? */
-	unsigned no_bmap : 1;
+	unsigned no_bmap:1;
+
+	/** Is poll not implemented by fs? */
+	unsigned no_poll:1;
 
 	/** Do multi-page cached writes */
-	unsigned big_writes : 1;
+	unsigned big_writes:1;
 
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
@@ -445,6 +465,9 @@
 
 	/** Version counter for attribute changes */
 	u64 attr_version;
+
+	/** Called on final put */
+	void (*release)(struct fuse_conn *);
 };
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -499,7 +522,7 @@
  */
 int fuse_open_common(struct inode *inode, struct file *file, int isdir);
 
-struct fuse_file *fuse_file_alloc(void);
+struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
 void fuse_file_free(struct fuse_file *ff);
 void fuse_finish_open(struct inode *inode, struct file *file,
 		      struct fuse_file *ff, struct fuse_open_out *outarg);
@@ -519,6 +542,12 @@
 		      int isdir);
 
 /**
+ * Notify poll wakeup
+ */
+int fuse_notify_poll_wakeup(struct fuse_conn *fc,
+			    struct fuse_notify_poll_wakeup_out *outarg);
+
+/**
  * Initialize file operations on a regular file
  */
 void fuse_init_file_inode(struct inode *inode);
@@ -593,19 +622,20 @@
 /**
  * Send a request (synchronous)
  */
-void request_send(struct fuse_conn *fc, struct fuse_req *req);
+void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
 
 /**
  * Send a request with no reply
  */
-void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
+void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
 
 /**
  * Send a request in the background
  */
-void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
+void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
 
-void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req);
+void fuse_request_send_background_locked(struct fuse_conn *fc,
+					 struct fuse_req *req);
 
 /* Abort all requests */
 void fuse_abort_conn(struct fuse_conn *fc);
@@ -623,6 +653,11 @@
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 
 /**
+ * Initialize fuse_conn
+ */
+int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb);
+
+/**
  * Release reference to fuse_conn
  */
 void fuse_conn_put(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2e99f34..47c96fd 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1,6 +1,6 @@
 /*
   FUSE: Filesystem in Userspace
-  Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
+  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
 
   This program can be distributed under the terms of the GNU GPL.
   See the file COPYING.
@@ -37,10 +37,10 @@
 	unsigned rootmode;
 	unsigned user_id;
 	unsigned group_id;
-	unsigned fd_present : 1;
-	unsigned rootmode_present : 1;
-	unsigned user_id_present : 1;
-	unsigned group_id_present : 1;
+	unsigned fd_present:1;
+	unsigned rootmode_present:1;
+	unsigned user_id_present:1;
+	unsigned group_id_present:1;
 	unsigned flags;
 	unsigned max_read;
 	unsigned blksize;
@@ -94,7 +94,7 @@
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(struct fuse_forget_in);
 	req->in.args[0].value = inarg;
-	request_send_noreply(fc, req);
+	fuse_request_send_noreply(fc, req);
 }
 
 static void fuse_clear_inode(struct inode *inode)
@@ -250,7 +250,7 @@
 
 	fi = get_fuse_inode(inode);
 	spin_lock(&fc->lock);
-	fi->nlookup ++;
+	fi->nlookup++;
 	spin_unlock(&fc->lock);
 	fuse_change_attributes(inode, attr, attr_valid, attr_version);
 
@@ -269,7 +269,7 @@
 		fc->destroy_req = NULL;
 		req->in.h.opcode = FUSE_DESTROY;
 		req->force = 1;
-		request_send(fc, req);
+		fuse_request_send(fc, req);
 		fuse_put_request(fc, req);
 	}
 }
@@ -334,7 +334,7 @@
 	req->out.args[0].size =
 		fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
 	req->out.args[0].value = &outarg;
-	request_send(fc, req);
+	fuse_request_send(fc, req);
 	err = req->out.h.error;
 	if (!err)
 		convert_fuse_statfs(buf, &outarg.st);
@@ -462,68 +462,69 @@
 	return 0;
 }
 
-static struct fuse_conn *new_conn(struct super_block *sb)
+int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
 {
-	struct fuse_conn *fc;
 	int err;
 
-	fc = kzalloc(sizeof(*fc), GFP_KERNEL);
-	if (fc) {
-		spin_lock_init(&fc->lock);
-		mutex_init(&fc->inst_mutex);
-		atomic_set(&fc->count, 1);
-		init_waitqueue_head(&fc->waitq);
-		init_waitqueue_head(&fc->blocked_waitq);
-		init_waitqueue_head(&fc->reserved_req_waitq);
-		INIT_LIST_HEAD(&fc->pending);
-		INIT_LIST_HEAD(&fc->processing);
-		INIT_LIST_HEAD(&fc->io);
-		INIT_LIST_HEAD(&fc->interrupts);
-		INIT_LIST_HEAD(&fc->bg_queue);
-		atomic_set(&fc->num_waiting, 0);
-		fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-		fc->bdi.unplug_io_fn = default_unplug_io_fn;
-		/* fuse does it's own writeback accounting */
-		fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
-		fc->dev = sb->s_dev;
-		err = bdi_init(&fc->bdi);
-		if (err)
-			goto error_kfree;
-		if (sb->s_bdev) {
-			err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
-					   MAJOR(fc->dev), MINOR(fc->dev));
-		} else {
-			err = bdi_register_dev(&fc->bdi, fc->dev);
-		}
-		if (err)
-			goto error_bdi_destroy;
-		/*
-		 * For a single fuse filesystem use max 1% of dirty +
-		 * writeback threshold.
-		 *
-		 * This gives about 1M of write buffer for memory maps on a
-		 * machine with 1G and 10% dirty_ratio, which should be more
-		 * than enough.
-		 *
-		 * Privileged users can raise it by writing to
-		 *
-		 *    /sys/class/bdi/<bdi>/max_ratio
-		 */
-		bdi_set_max_ratio(&fc->bdi, 1);
-		fc->reqctr = 0;
-		fc->blocked = 1;
-		fc->attr_version = 1;
-		get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+	memset(fc, 0, sizeof(*fc));
+	spin_lock_init(&fc->lock);
+	mutex_init(&fc->inst_mutex);
+	atomic_set(&fc->count, 1);
+	init_waitqueue_head(&fc->waitq);
+	init_waitqueue_head(&fc->blocked_waitq);
+	init_waitqueue_head(&fc->reserved_req_waitq);
+	INIT_LIST_HEAD(&fc->pending);
+	INIT_LIST_HEAD(&fc->processing);
+	INIT_LIST_HEAD(&fc->io);
+	INIT_LIST_HEAD(&fc->interrupts);
+	INIT_LIST_HEAD(&fc->bg_queue);
+	INIT_LIST_HEAD(&fc->entry);
+	atomic_set(&fc->num_waiting, 0);
+	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.unplug_io_fn = default_unplug_io_fn;
+	/* fuse does it's own writeback accounting */
+	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+	fc->khctr = 0;
+	fc->polled_files = RB_ROOT;
+	fc->dev = sb->s_dev;
+	err = bdi_init(&fc->bdi);
+	if (err)
+		goto error_mutex_destroy;
+	if (sb->s_bdev) {
+		err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
+				   MAJOR(fc->dev), MINOR(fc->dev));
+	} else {
+		err = bdi_register_dev(&fc->bdi, fc->dev);
 	}
-	return fc;
+	if (err)
+		goto error_bdi_destroy;
+	/*
+	 * For a single fuse filesystem use max 1% of dirty +
+	 * writeback threshold.
+	 *
+	 * This gives about 1M of write buffer for memory maps on a
+	 * machine with 1G and 10% dirty_ratio, which should be more
+	 * than enough.
+	 *
+	 * Privileged users can raise it by writing to
+	 *
+	 *    /sys/class/bdi/<bdi>/max_ratio
+	 */
+	bdi_set_max_ratio(&fc->bdi, 1);
+	fc->reqctr = 0;
+	fc->blocked = 1;
+	fc->attr_version = 1;
+	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
 
-error_bdi_destroy:
+	return 0;
+
+ error_bdi_destroy:
 	bdi_destroy(&fc->bdi);
-error_kfree:
+ error_mutex_destroy:
 	mutex_destroy(&fc->inst_mutex);
-	kfree(fc);
-	return NULL;
+	return err;
 }
+EXPORT_SYMBOL_GPL(fuse_conn_init);
 
 void fuse_conn_put(struct fuse_conn *fc)
 {
@@ -532,7 +533,7 @@
 			fuse_request_free(fc->destroy_req);
 		mutex_destroy(&fc->inst_mutex);
 		bdi_destroy(&fc->bdi);
-		kfree(fc);
+		fc->release(fc);
 	}
 }
 
@@ -542,7 +543,7 @@
 	return fc;
 }
 
-static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
+static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
 {
 	struct fuse_attr attr;
 	memset(&attr, 0, sizeof(attr));
@@ -553,8 +554,7 @@
 	return fuse_iget(sb, 1, 0, &attr, 0, 0);
 }
 
-struct fuse_inode_handle
-{
+struct fuse_inode_handle {
 	u64 nodeid;
 	u32 generation;
 };
@@ -761,7 +761,6 @@
 		fc->max_write = max_t(unsigned, 4096, fc->max_write);
 		fc->conn_init = 1;
 	}
-	fuse_put_request(fc, req);
 	fc->blocked = 0;
 	wake_up_all(&fc->blocked_waitq);
 }
@@ -787,7 +786,12 @@
 	req->out.args[0].size = sizeof(struct fuse_init_out);
 	req->out.args[0].value = &req->misc.init_out;
 	req->end = process_init_reply;
-	request_send_background(fc, req);
+	fuse_request_send_background(fc, req);
+}
+
+static void fuse_free_conn(struct fuse_conn *fc)
+{
+	kfree(fc);
 }
 
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
@@ -828,10 +832,17 @@
 	if (file->f_op != &fuse_dev_operations)
 		return -EINVAL;
 
-	fc = new_conn(sb);
+	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
 	if (!fc)
 		return -ENOMEM;
 
+	err = fuse_conn_init(fc, sb);
+	if (err) {
+		kfree(fc);
+		return err;
+	}
+
+	fc->release = fuse_free_conn;
 	fc->flags = d.flags;
 	fc->user_id = d.user_id;
 	fc->group_id = d.group_id;
@@ -841,7 +852,7 @@
 	sb->s_fs_info = fc;
 
 	err = -ENOMEM;
-	root = get_root_inode(sb, d.rootmode);
+	root = fuse_get_root_inode(sb, d.rootmode);
 	if (!root)
 		goto err;
 
@@ -952,7 +963,7 @@
 
 static void fuse_inode_init_once(void *foo)
 {
-	struct inode * inode = foo;
+	struct inode *inode = foo;
 
 	inode_init_once(inode);
 }
@@ -1031,7 +1042,7 @@
 {
 	int res;
 
-	printk("fuse init (API version %i.%i)\n",
+	printk(KERN_INFO "fuse init (API version %i.%i)\n",
 	       FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
 
 	INIT_LIST_HEAD(&fuse_conn_list);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0ab0c6f5..6903d37 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -252,6 +252,7 @@
 	for (;;) {
 		struct page *page;
 		unsigned long nr, ret;
+		int ra;
 
 		/* nr is the maximum number of bytes to copy from this page */
 		nr = huge_page_size(h);
@@ -274,16 +275,19 @@
 			 */
 			ret = len < nr ? len : nr;
 			if (clear_user(buf, ret))
-				ret = -EFAULT;
+				ra = -EFAULT;
+			else
+				ra = 0;
 		} else {
 			/*
 			 * We have the page, copy it to user space buffer.
 			 */
-			ret = hugetlbfs_read_actor(page, offset, buf, len, nr);
+			ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
+			ret = ra;
 		}
-		if (ret < 0) {
+		if (ra < 0) {
 			if (retval == 0)
-				retval = ret;
+				retval = ra;
 			if (page)
 				page_cache_release(page);
 			goto out;
diff --git a/fs/inode.c b/fs/inode.c
index bd48e5e..7a6e8c2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -110,8 +110,8 @@
 
 /**
  * inode_init_always - perform inode structure intialisation
- * @sb		- superblock inode belongs to.
- * @inode	- inode to initialise
+ * @sb: superblock inode belongs to
+ * @inode: inode to initialise
  *
  * These are initializations that need to be done on every inode
  * allocation as the fields are not initialised by slab allocation.
@@ -166,7 +166,7 @@
 	mapping->a_ops = &empty_aops;
 	mapping->host = inode;
 	mapping->flags = 0;
-	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
+	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
 	mapping->assoc_mapping = NULL;
 	mapping->backing_dev_info = &default_backing_dev_info;
 	mapping->writeback_index = 0;
@@ -576,8 +576,8 @@
 
 /**
  * inode_add_to_lists - add a new inode to relevant lists
- * @sb		- superblock inode belongs to.
- * @inode	- inode to mark in use
+ * @sb: superblock inode belongs to
+ * @inode: inode to mark in use
  *
  * When an inode is allocated it needs to be accounted for, added to the in use
  * list, the owning superblock and the inode hash. This needs to be done under
@@ -601,7 +601,7 @@
  *	@sb: superblock
  *
  *	Allocates a new inode for given superblock. The default gfp_mask
- *	for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE.
+ *	for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
  *	If HIGHMEM pages are unsuitable or it is known that pages allocated
  *	for the page cache are not reclaimable or migratable,
  *	mapping_set_gfp_mask() must be called with suitable flags on the
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index f704338..d4946c4 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -280,7 +280,7 @@
 	return -EINVAL;
 
 got_it:
-	pos = (page->index >> PAGE_CACHE_SHIFT) + p - (char*)page_address(page);
+	pos = page_offset(page) + p - (char *)page_address(page);
 	err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize,
 					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err)
diff --git a/fs/mpage.c b/fs/mpage.c
index 552b80b..16c3ef3 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -241,7 +241,6 @@
 				first_hole = page_block;
 			page_block++;
 			block_in_file++;
-			clear_buffer_mapped(map_bh);
 			continue;
 		}
 
@@ -308,7 +307,10 @@
 		goto alloc_new;
 	}
 
-	if (buffer_boundary(map_bh) || (first_hole != blocks_per_page))
+	relative_block = block_in_file - *first_logical_block;
+	nblocks = map_bh->b_size >> blkbits;
+	if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
+	    (first_hole != blocks_per_page))
 		bio = mpage_bio_submit(READ, bio);
 	else
 		*last_block_in_bio = blocks[blocks_per_page - 1];
diff --git a/fs/ncpfs/getopt.c b/fs/ncpfs/getopt.c
index 335b003..0af3349 100644
--- a/fs/ncpfs/getopt.c
+++ b/fs/ncpfs/getopt.c
@@ -16,7 +16,6 @@
  *	@opts: an array of &struct option entries controlling parser operations
  *	@optopt: output; will contain the current option
  *	@optarg: output; will contain the value (if one exists)
- *	@flag: output; may be NULL; should point to a long for or'ing flags
  *	@value: output; may be NULL; will be overwritten with the integer value
  *		of the current argument.
  *
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3a8bdd7..9406384 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -396,7 +396,9 @@
 		   "Private_Clean:  %8lu kB\n"
 		   "Private_Dirty:  %8lu kB\n"
 		   "Referenced:     %8lu kB\n"
-		   "Swap:           %8lu kB\n",
+		   "Swap:           %8lu kB\n"
+		   "KernelPageSize: %8lu kB\n"
+		   "MMUPageSize:    %8lu kB\n",
 		   (vma->vm_end - vma->vm_start) >> 10,
 		   mss.resident >> 10,
 		   (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
@@ -405,7 +407,9 @@
 		   mss.private_clean >> 10,
 		   mss.private_dirty >> 10,
 		   mss.referenced >> 10,
-		   mss.swap >> 10);
+		   mss.swap >> 10,
+		   vma_kernel_pagesize(vma) >> 10,
+		   vma_mmu_pagesize(vma) >> 10);
 
 	if (m->count < m->size)  /* vma is copied successfully */
 		m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
diff --git a/fs/select.c b/fs/select.c
index 87df51e..08b91be 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -109,11 +109,11 @@
 void poll_initwait(struct poll_wqueues *pwq)
 {
 	init_poll_funcptr(&pwq->pt, __pollwait);
+	pwq->polling_task = current;
 	pwq->error = 0;
 	pwq->table = NULL;
 	pwq->inline_index = 0;
 }
-
 EXPORT_SYMBOL(poll_initwait);
 
 static void free_poll_entry(struct poll_table_entry *entry)
@@ -142,12 +142,10 @@
 		free_page((unsigned long) old);
 	}
 }
-
 EXPORT_SYMBOL(poll_freewait);
 
-static struct poll_table_entry *poll_get_entry(poll_table *_p)
+static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
 {
-	struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
 	struct poll_table_page *table = p->table;
 
 	if (p->inline_index < N_INLINE_POLL_ENTRIES)
@@ -159,7 +157,6 @@
 		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
 		if (!new_table) {
 			p->error = -ENOMEM;
-			__set_current_state(TASK_RUNNING);
 			return NULL;
 		}
 		new_table->entry = new_table->entries;
@@ -171,20 +168,75 @@
 	return table->entry++;
 }
 
+static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct poll_wqueues *pwq = wait->private;
+	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
+
+	/*
+	 * Although this function is called under waitqueue lock, LOCK
+	 * doesn't imply write barrier and the users expect write
+	 * barrier semantics on wakeup functions.  The following
+	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
+	 * and is paired with set_mb() in poll_schedule_timeout.
+	 */
+	smp_wmb();
+	pwq->triggered = 1;
+
+	/*
+	 * Perform the default wake up operation using a dummy
+	 * waitqueue.
+	 *
+	 * TODO: This is hacky but there currently is no interface to
+	 * pass in @sync.  @sync is scheduled to be removed and once
+	 * that happens, wake_up_process() can be used directly.
+	 */
+	return default_wake_function(&dummy_wait, mode, sync, key);
+}
+
 /* Add a new entry */
 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 				poll_table *p)
 {
-	struct poll_table_entry *entry = poll_get_entry(p);
+	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
+	struct poll_table_entry *entry = poll_get_entry(pwq);
 	if (!entry)
 		return;
 	get_file(filp);
 	entry->filp = filp;
 	entry->wait_address = wait_address;
-	init_waitqueue_entry(&entry->wait, current);
+	init_waitqueue_func_entry(&entry->wait, pollwake);
+	entry->wait.private = pwq;
 	add_wait_queue(wait_address, &entry->wait);
 }
 
+int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+			  ktime_t *expires, unsigned long slack)
+{
+	int rc = -EINTR;
+
+	set_current_state(state);
+	if (!pwq->triggered)
+		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
+	__set_current_state(TASK_RUNNING);
+
+	/*
+	 * Prepare for the next iteration.
+	 *
+	 * The following set_mb() serves two purposes.  First, it's
+	 * the counterpart rmb of the wmb in pollwake() such that data
+	 * written before wake up is always visible after wake up.
+	 * Second, the full barrier guarantees that triggered clearing
+	 * doesn't pass event check of the next iteration.  Note that
+	 * this problem doesn't exist for the first iteration as
+	 * add_wait_queue() has full barrier semantics.
+	 */
+	set_mb(pwq->triggered, 0);
+
+	return rc;
+}
+EXPORT_SYMBOL(poll_schedule_timeout);
+
 /**
  * poll_select_set_timeout - helper function to setup the timeout value
  * @to:		pointer to timespec variable for the final timeout
@@ -340,8 +392,6 @@
 	for (;;) {
 		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
 
-		set_current_state(TASK_INTERRUPTIBLE);
-
 		inp = fds->in; outp = fds->out; exp = fds->ex;
 		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
 
@@ -411,10 +461,10 @@
 			to = &expire;
 		}
 
-		if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+		if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
+					   to, slack))
 			timed_out = 1;
 	}
-	__set_current_state(TASK_RUNNING);
 
 	poll_freewait(&table);
 
@@ -666,7 +716,6 @@
 	for (;;) {
 		struct poll_list *walk;
 
-		set_current_state(TASK_INTERRUPTIBLE);
 		for (walk = list; walk != NULL; walk = walk->next) {
 			struct pollfd * pfd, * pfd_end;
 
@@ -709,10 +758,9 @@
 			to = &expire;
 		}
 
-		if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+		if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
 			timed_out = 1;
 	}
-	__set_current_state(TASK_RUNNING);
 	return count;
 }
 
diff --git a/fs/sync.c b/fs/sync.c
index 0921d6d..ac02b56 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -295,7 +295,7 @@
 
 	if (flags & SYNC_FILE_RANGE_WRITE) {
 		ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
-						WB_SYNC_NONE);
+						WB_SYNC_ALL);
 		if (ret < 0)
 			goto out;
 	}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 0d7564b..89556ee 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -432,12 +432,19 @@
 	int i, err;
 	struct ubifs_info *c = sb->s_fs_info;
 	struct writeback_control wbc = {
-		.sync_mode   = wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
+		.sync_mode   = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
 		.range_start = 0,
 		.range_end   = LLONG_MAX,
 		.nr_to_write = LONG_MAX,
 	};
 
+	/*
+	 * Note by akpm about WB_SYNC_NONE used above: zero @wait is just an
+	 * advisory thing to help the file system shove lots of data into the
+	 * queues. If some gets missed then it'll be picked up on the second
+	 * '->sync_fs()' call, with non-zero @wait.
+	 */
+
 	if (sb->s_flags & MS_RDONLY)
 		return 0;
 
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index 46d696b..296c35c 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -35,10 +35,6 @@
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-typedef struct {
-	int counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)		{ (i) }
 #define atomic_read(v)		((v)->counter)
 #define atomic_set(v, i)	(((v)->counter) = (i))
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 8af2763..37b82cb 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -28,6 +28,17 @@
 #define BUGFLAG_WARNING	(1<<0)
 #endif	/* CONFIG_GENERIC_BUG */
 
+/*
+ * Don't use BUG() or BUG_ON() unless there's really no way out; one
+ * example might be detecting data structure corruption in the middle
+ * of an operation that can't be backed out of.  If the (sub)system
+ * can somehow continue operating, perhaps with reduced functionality,
+ * it's probably not BUG-worthy.
+ *
+ * If you're tempted to BUG(), think again:  is completely giving up
+ * really the *only* solution?  There are usually better options, where
+ * users don't need to reboot ASAP and can mostly shut down cleanly.
+ */
 #ifndef HAVE_ARCH_BUG
 #define BUG() do { \
 	printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
@@ -39,6 +50,12 @@
 #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while(0)
 #endif
 
+/*
+ * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
+ * significant issues that need prompt attention if they should ever
+ * appear at runtime.  Use the versions with printk format strings
+ * to provide better diagnostics.
+ */
 #ifndef __WARN
 #ifndef __ASSEMBLY__
 extern void warn_slowpath(const char *file, const int line,
diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h
index 33d7d04..dbd6150 100644
--- a/include/asm-generic/local.h
+++ b/include/asm-generic/local.h
@@ -2,7 +2,6 @@
 #define _ASM_GENERIC_LOCAL_H
 
 #include <linux/percpu.h>
-#include <linux/hardirq.h>
 #include <asm/atomic.h>
 #include <asm/types.h>
 
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 36fa286..4c8d0af 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -69,15 +69,8 @@
 })
 #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
 
-#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
-struct page;
-/* this is useful when inlined pfn_to_page is too big */
-extern struct page *pfn_to_page(unsigned long pfn);
-extern unsigned long page_to_pfn(struct page *page);
-#else
 #define page_to_pfn __page_to_pfn
 #define pfn_to_page __pfn_to_page
-#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-m32r/atomic.h b/include/asm-m32r/atomic.h
index 3a38ffe..2eed30f 100644
--- a/include/asm-m32r/atomic.h
+++ b/include/asm-m32r/atomic.h
@@ -9,6 +9,7 @@
  *    Copyright (C) 2004  Hirokazu Takata <takata at linux-m32r.org>
  */
 
+#include <linux/types.h>
 #include <asm/assembler.h>
 #include <asm/system.h>
 
@@ -17,13 +18,6 @@
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 /**
diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h
index 4915294..eb0ab9d 100644
--- a/include/asm-m68k/atomic.h
+++ b/include/asm-m68k/atomic.h
@@ -1,7 +1,7 @@
 #ifndef __ARCH_M68K_ATOMIC__
 #define __ARCH_M68K_ATOMIC__
 
-
+#include <linux/types.h>
 #include <asm/system.h>
 
 /*
@@ -13,7 +13,6 @@
  * We do not have SMP m68k systems, so we don't have to deal with that.
  */
 
-typedef struct { int counter; } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/include/asm-mn10300/atomic.h b/include/asm-mn10300/atomic.h
index 27c9690..bc06482 100644
--- a/include/asm-mn10300/atomic.h
+++ b/include/asm-mn10300/atomic.h
@@ -20,15 +20,6 @@
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct {
-	int	counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef __KERNEL__
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index b3b2354..67ad67b 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -14,8 +14,7 @@
 #define _XTENSA_ATOMIC_H
 
 #include <linux/stringify.h>
-
-typedef struct { volatile int counter; } atomic_t;
+#include <linux/types.h>
 
 #ifdef __KERNEL__
 #include <asm/processor.h>
diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h
index f4d05cc..91a7739 100644
--- a/include/linux/auto_dev-ioctl.h
+++ b/include/linux/auto_dev-ioctl.h
@@ -10,6 +10,7 @@
 #ifndef _LINUX_AUTO_DEV_IOCTL_H
 #define _LINUX_AUTO_DEV_IOCTL_H
 
+#include <linux/string.h>
 #include <linux/types.h>
 
 #define AUTOFS_DEVICE_NAME		"autofs"
@@ -25,6 +26,60 @@
  * An ioctl interface for autofs mount point control.
  */
 
+struct args_protover {
+	__u32	version;
+};
+
+struct args_protosubver {
+	__u32	sub_version;
+};
+
+struct args_openmount {
+	__u32	devid;
+};
+
+struct args_ready {
+	__u32	token;
+};
+
+struct args_fail {
+	__u32	token;
+	__s32	status;
+};
+
+struct args_setpipefd {
+	__s32	pipefd;
+};
+
+struct args_timeout {
+	__u64	timeout;
+};
+
+struct args_requester {
+	__u32	uid;
+	__u32	gid;
+};
+
+struct args_expire {
+	__u32	how;
+};
+
+struct args_askumount {
+	__u32	may_umount;
+};
+
+struct args_ismountpoint {
+	union {
+		struct args_in {
+			__u32	type;
+		} in;
+		struct args_out {
+			__u32	devid;
+			__u32	magic;
+		} out;
+	};
+};
+
 /*
  * All the ioctls use this structure.
  * When sending a path size must account for the total length
@@ -39,20 +94,32 @@
 				 * including this struct */
 	__s32 ioctlfd;		/* automount command fd */
 
-	__u32 arg1;		/* Command parameters */
-	__u32 arg2;
+	/* Command parameters */
+
+	union {
+		struct args_protover		protover;
+		struct args_protosubver		protosubver;
+		struct args_openmount		openmount;
+		struct args_ready		ready;
+		struct args_fail		fail;
+		struct args_setpipefd		setpipefd;
+		struct args_timeout		timeout;
+		struct args_requester		requester;
+		struct args_expire		expire;
+		struct args_askumount		askumount;
+		struct args_ismountpoint	ismountpoint;
+	};
 
 	char path[0];
 };
 
 static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
 {
+	memset(in, 0, sizeof(struct autofs_dev_ioctl));
 	in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
 	in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
 	in->size = sizeof(struct autofs_dev_ioctl);
 	in->ioctlfd = -1;
-	in->arg1 = 0;
-	in->arg2 = 0;
 	return;
 }
 
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index 2253716..55fa478 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -29,10 +29,64 @@
 #define AUTOFS_EXP_IMMEDIATE		1
 #define AUTOFS_EXP_LEAVES		2
 
-#define AUTOFS_TYPE_ANY			0x0000
-#define AUTOFS_TYPE_INDIRECT		0x0001
-#define AUTOFS_TYPE_DIRECT		0x0002
-#define AUTOFS_TYPE_OFFSET		0x0004
+#define AUTOFS_TYPE_ANY			0U
+#define AUTOFS_TYPE_INDIRECT		1U
+#define AUTOFS_TYPE_DIRECT		2U
+#define AUTOFS_TYPE_OFFSET		4U
+
+static inline void set_autofs_type_indirect(unsigned int *type)
+{
+	*type = AUTOFS_TYPE_INDIRECT;
+	return;
+}
+
+static inline unsigned int autofs_type_indirect(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_INDIRECT);
+}
+
+static inline void set_autofs_type_direct(unsigned int *type)
+{
+	*type = AUTOFS_TYPE_DIRECT;
+	return;
+}
+
+static inline unsigned int autofs_type_direct(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_DIRECT);
+}
+
+static inline void set_autofs_type_offset(unsigned int *type)
+{
+	*type = AUTOFS_TYPE_OFFSET;
+	return;
+}
+
+static inline unsigned int autofs_type_offset(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_OFFSET);
+}
+
+static inline unsigned int autofs_type_trigger(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET);
+}
+
+/*
+ * This isn't really a type as we use it to say "no type set" to
+ * indicate we want to search for "any" mount in the
+ * autofs_dev_ioctl_ismountpoint() device ioctl function.
+ */
+static inline void set_autofs_type_any(unsigned int *type)
+{
+	*type = AUTOFS_TYPE_ANY;
+	return;
+}
+
+static inline unsigned int autofs_type_any(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_ANY);
+}
 
 /* Daemon notification packet types */
 enum autofs_notify {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 6cbfbe2..77b4a9e 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -18,6 +18,7 @@
 #define BINPRM_BUF_SIZE 128
 
 #ifdef __KERNEL__
+#include <linux/list.h>
 
 #define CORENAME_MAX_SIZE 128
 
@@ -106,7 +107,7 @@
 extern int bprm_mm_init(struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
-extern int do_coredump(long signr, int exit_code, struct pt_regs * regs);
+extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
 extern int set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1164963..08b78c0 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -329,13 +329,7 @@
 			struct cgroup *cgrp);
 	void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
-	/*
-	 * This routine is called with the task_lock of mm->owner held
-	 */
-	void (*mm_owner_changed)(struct cgroup_subsys *ss,
-					struct cgroup *old,
-					struct cgroup *new,
-					struct task_struct *p);
+
 	int subsys_id;
 	int active;
 	int disabled;
@@ -400,9 +394,6 @@
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
 
-void cgroup_mm_owner_callbacks(struct task_struct *old,
-			       struct task_struct *new);
-
 #else /* !CONFIG_CGROUPS */
 
 static inline int cgroup_init_early(void) { return 0; }
@@ -420,9 +411,6 @@
 	return -EINVAL;
 }
 
-static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
-					     struct task_struct *new) {}
-
 #endif /* !CONFIG_CGROUPS */
 
 #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 8e540d3..51ea2bd 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -78,6 +78,8 @@
 
 extern void rebuild_sched_domains(void);
 
+extern void cpuset_print_task_mems_allowed(struct task_struct *p);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline int cpuset_init_early(void) { return 0; }
@@ -159,6 +161,10 @@
 	partition_sched_domains(1, NULL, NULL);
 }
 
+static inline void cpuset_print_task_mems_allowed(struct task_struct *p)
+{
+}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb59673..d7eba77 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1133,7 +1133,6 @@
 	struct rw_semaphore	s_umount;
 	struct mutex		s_lock;
 	int			s_count;
-	int			s_syncing;
 	int			s_need_sync_fs;
 	atomic_t		s_active;
 #ifdef CONFIG_SECURITY
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 350fe97..162e5def 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -1,6 +1,6 @@
 /*
     FUSE: Filesystem in Userspace
-    Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
+    Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
 
     This program can be distributed under the terms of the GNU GPL.
     See the file COPYING.
@@ -20,29 +20,27 @@
  *
  * 7.10
  *  - add nonseekable open flag
+ *
+ * 7.11
+ *  - add IOCTL message
+ *  - add unsolicited notification support
+ *  - add POLL message and NOTIFY_POLL notification
  */
 
 #ifndef _LINUX_FUSE_H
 #define _LINUX_FUSE_H
 
-#include <asm/types.h>
-#include <linux/major.h>
+#include <linux/types.h>
 
 /** Version number of this interface */
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 10
+#define FUSE_KERNEL_MINOR_VERSION 11
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
 
-/** The major number of the fuse character device */
-#define FUSE_MAJOR MISC_MAJOR
-
-/** The minor number of the fuse character device */
-#define FUSE_MINOR 229
-
 /* Make sure all structures are padded to 64bit boundary, so 32bit
    userspace works under 64bit kernels */
 
@@ -151,6 +149,28 @@
  */
 #define FUSE_READ_LOCKOWNER	(1 << 1)
 
+/**
+ * Ioctl flags
+ *
+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
+ * FUSE_IOCTL_RETRY: retry with new iovecs
+ *
+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
+ */
+#define FUSE_IOCTL_COMPAT	(1 << 0)
+#define FUSE_IOCTL_UNRESTRICTED	(1 << 1)
+#define FUSE_IOCTL_RETRY	(1 << 2)
+
+#define FUSE_IOCTL_MAX_IOV	256
+
+/**
+ * Poll flags
+ *
+ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify
+ */
+#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP	   = 1,
 	FUSE_FORGET	   = 2,  /* no reply */
@@ -188,6 +208,13 @@
 	FUSE_INTERRUPT     = 36,
 	FUSE_BMAP          = 37,
 	FUSE_DESTROY       = 38,
+	FUSE_IOCTL         = 39,
+	FUSE_POLL          = 40,
+};
+
+enum fuse_notify_code {
+	FUSE_NOTIFY_POLL   = 1,
+	FUSE_NOTIFY_CODE_MAX,
 };
 
 /* The read buffer is required to be at least 8k, but may be much larger */
@@ -388,6 +415,38 @@
 	__u64	block;
 };
 
+struct fuse_ioctl_in {
+	__u64	fh;
+	__u32	flags;
+	__u32	cmd;
+	__u64	arg;
+	__u32	in_size;
+	__u32	out_size;
+};
+
+struct fuse_ioctl_out {
+	__s32	result;
+	__u32	flags;
+	__u32	in_iovs;
+	__u32	out_iovs;
+};
+
+struct fuse_poll_in {
+	__u64	fh;
+	__u64	kh;
+	__u32	flags;
+	__u32   padding;
+};
+
+struct fuse_poll_out {
+	__u32	revents;
+	__u32	padding;
+};
+
+struct fuse_notify_poll_wakeup_out {
+	__u64	kh;
+};
+
 struct fuse_in_header {
 	__u32	len;
 	__u32	opcode;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e8003af..dd20cd7 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -69,12 +69,6 @@
 #define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
 				 __GFP_HARDWALL | __GFP_HIGHMEM | \
 				 __GFP_MOVABLE)
-#define GFP_NOFS_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_MOVABLE)
-#define GFP_USER_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
-				 __GFP_HARDWALL | __GFP_MOVABLE)
-#define GFP_HIGHUSER_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
-				 __GFP_HARDWALL | __GFP_HIGHMEM | \
-				 __GFP_MOVABLE)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e1c8afc..f1d2fba1 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -233,6 +233,10 @@
 	return (unsigned long)PAGE_SIZE << h->order;
 }
 
+extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma);
+
+extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma);
+
 static inline unsigned long huge_page_mask(struct hstate *h)
 {
 	return h->mask;
@@ -273,6 +277,8 @@
 #define hstate_inode(i) NULL
 #define huge_page_size(h) PAGE_SIZE
 #define huge_page_mask(h) PAGE_MASK
+#define vma_kernel_pagesize(v) PAGE_SIZE
+#define vma_mmu_pagesize(v) PAGE_SIZE
 #define huge_page_order(h) 0
 #define huge_page_shift(h) PAGE_SHIFT
 static inline unsigned int pages_per_huge_page(struct hstate *h)
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index a8f84c0..8137f66 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -234,6 +234,9 @@
 	/* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */
 	u8		mmc_cd;
 
+	/* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */
+	u32		debounce;
+
 	/* For gpio-N, bit (1 << N) in "pullups" is set if that pullup
 	 * should be enabled.  Else, if that bit is set in "pulldowns",
 	 * that pulldown is enabled.  Don't waste power by letting any
@@ -307,12 +310,6 @@
 #define TWL4030_VAUX3_DEV_GRP		0x1F
 #define TWL4030_VAUX3_DEDICATED		0x22
 
-/*
- * Exported TWL4030 GPIO APIs
- *
- * WARNING -- use standard GPIO and IRQ calls instead; these will vanish.
- */
-int twl4030_set_gpio_debounce(int gpio, int enable);
 
 #if defined(CONFIG_TWL4030_BCI_BATTERY) || \
 	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index db5ef8a..3644f63 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -32,18 +32,14 @@
 # define SUPPORT_VLB_SYNC 1
 #endif
 
-typedef unsigned char	byte;	/* used everywhere */
-
 /*
  * Probably not wise to fiddle with these
  */
+#define IDE_DEFAULT_MAX_FAILURES	1
 #define ERROR_MAX	8	/* Max read/write errors per sector */
 #define ERROR_RESET	3	/* Reset controller every 4th retry */
 #define ERROR_RECAL	1	/* Recalibrate every 2nd retry */
 
-#define HWIF(drive)		((ide_hwif_t *)((drive)->hwif))
-#define HWGROUP(drive)		((ide_hwgroup_t *)(HWIF(drive)->hwgroup))
-
 /*
  * Definitions for accessing IDE controller registers
  */
@@ -185,9 +181,6 @@
 	unsigned long	config;
 } hw_regs_t;
 
-void ide_init_port_data(struct hwif_s *, unsigned int);
-void ide_init_port_hw(struct hwif_s *, hw_regs_t *);
-
 static inline void ide_std_init_ports(hw_regs_t *hw,
 				      unsigned long io_addr,
 				      unsigned long ctl_addr)
@@ -433,18 +426,14 @@
 	struct idetape_bh *bh;
 	char *b_data;
 
-	/* idescsi only for now */
 	struct scatterlist *sg;
 	unsigned int sg_cnt;
 
-	struct scsi_cmnd *scsi_cmd;
-	void (*done) (struct scsi_cmnd *);
-
 	unsigned long timeout;
 };
 
 struct ide_devset;
-struct ide_driver_s;
+struct ide_driver;
 
 #ifdef CONFIG_BLK_DEV_IDEACPI
 struct ide_acpi_drive_link;
@@ -588,7 +577,6 @@
 	struct request_queue	*queue;	/* request queue */
 
 	struct request		*rq;	/* current request */
-	struct ide_drive_s 	*next;	/* circular list of hwgroup drives */
 	void		*driver_data;	/* extra driver data */
 	u16			*id;	/* identification info */
 #ifdef CONFIG_IDE_PROC_FS
@@ -662,6 +650,8 @@
 	int  (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
 			      unsigned int, int);
 
+	ide_startstop_t (*irq_handler)(struct ide_drive_s *);
+
 	unsigned long atapi_flags;
 
 	struct ide_atapi_pc request_sense_pc;
@@ -684,7 +674,6 @@
 	void	(*exec_command)(struct hwif_s *, u8);
 	u8	(*read_status)(struct hwif_s *);
 	u8	(*read_altstatus)(struct hwif_s *);
-	u8	(*read_sff_dma_status)(struct hwif_s *);
 
 	void	(*set_irq)(struct hwif_s *, int);
 
@@ -745,14 +734,17 @@
 	int	(*dma_test_irq)(struct ide_drive_s *);
 	void	(*dma_lost_irq)(struct ide_drive_s *);
 	void	(*dma_timeout)(struct ide_drive_s *);
+	/*
+	 * The following method is optional and only required to be
+	 * implemented for the SFF-8038i compatible controllers.
+	 */
+	u8	(*dma_sff_read_status)(struct hwif_s *);
 };
 
 struct ide_host;
 
 typedef struct hwif_s {
-	struct hwif_s *next;		/* for linked-list in ide_hwgroup_t */
 	struct hwif_s *mate;		/* other hwif from same PCI chip */
-	struct hwgroup_s *hwgroup;	/* actually (ide_hwgroup_t *) */
 	struct proc_dir_entry *proc;	/* /proc/ide/ directory entry */
 
 	struct ide_host *host;
@@ -763,7 +755,7 @@
 
 	unsigned long	sata_scr[SATA_NR_PORTS];
 
-	ide_drive_t	drives[MAX_DRIVES];	/* drive info */
+	ide_drive_t	*devices[MAX_DRIVES + 1];
 
 	u8 major;	/* our major number */
 	u8 index;	/* 0 for ide0; 1 for ide1; ... */
@@ -829,7 +821,7 @@
 	unsigned	extra_ports;	/* number of extra dma ports */
 
 	unsigned	present    : 1;	/* this interface exists */
-	unsigned	sg_mapped  : 1;	/* sg_table and sg_nents are ready */
+	unsigned	busy	   : 1; /* serializes devices on a port */
 
 	struct device		gendev;
 	struct device		*portdev;
@@ -841,19 +833,49 @@
 #ifdef CONFIG_BLK_DEV_IDEACPI
 	struct ide_acpi_hwif_link *acpidata;
 #endif
+
+	/* IRQ handler, if active */
+	ide_startstop_t	(*handler)(ide_drive_t *);
+
+	/* BOOL: polling active & poll_timeout field valid */
+	unsigned int polling : 1;
+
+	/* current drive */
+	ide_drive_t *cur_dev;
+
+	/* current request */
+	struct request *rq;
+
+	/* failsafe timer */
+	struct timer_list timer;
+	/* timeout value during long polls */
+	unsigned long poll_timeout;
+	/* queried upon timeouts */
+	int (*expiry)(ide_drive_t *);
+
+	int req_gen;
+	int req_gen_timer;
+
+	spinlock_t lock;
 } ____cacheline_internodealigned_in_smp ide_hwif_t;
 
 #define MAX_HOST_PORTS 4
 
 struct ide_host {
-	ide_hwif_t	*ports[MAX_HOST_PORTS];
+	ide_hwif_t	*ports[MAX_HOST_PORTS + 1];
 	unsigned int	n_ports;
 	struct device	*dev[2];
 	unsigned int	(*init_chipset)(struct pci_dev *);
 	unsigned long	host_flags;
 	void		*host_priv;
+	ide_hwif_t	*cur_port;	/* for hosts requiring serialization */
+
+	/* used for hosts requiring serialization */
+	volatile long	host_busy;
 };
 
+#define IDE_HOST_BUSY 0
+
 /*
  *  internal ide interrupt handler type
  */
@@ -863,38 +885,6 @@
 /* used by ide-cd, ide-floppy, etc. */
 typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned);
 
-typedef struct hwgroup_s {
-		/* irq handler, if active */
-	ide_startstop_t	(*handler)(ide_drive_t *);
-
-		/* BOOL: protects all fields below */
-	volatile int busy;
-		/* BOOL: polling active & poll_timeout field valid */
-	unsigned int polling	: 1;
-
-		/* current drive */
-	ide_drive_t *drive;
-		/* ptr to current hwif in linked-list */
-	ide_hwif_t *hwif;
-
-		/* current request */
-	struct request *rq;
-
-		/* failsafe timer */
-	struct timer_list timer;
-		/* timeout value during long polls */
-	unsigned long poll_timeout;
-		/* queried upon timeouts */
-	int (*expiry)(ide_drive_t *);
-
-	int req_gen;
-	int req_gen_timer;
-
-	spinlock_t lock;
-} ide_hwgroup_t;
-
-typedef struct ide_driver_s ide_driver_t;
-
 extern struct mutex ide_setting_mtx;
 
 /*
@@ -1020,8 +1010,8 @@
 void ide_proc_port_register_devices(ide_hwif_t *);
 void ide_proc_unregister_device(ide_drive_t *);
 void ide_proc_unregister_port(ide_hwif_t *);
-void ide_proc_register_driver(ide_drive_t *, ide_driver_t *);
-void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *);
+void ide_proc_register_driver(ide_drive_t *, struct ide_driver *);
+void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *);
 
 read_proc_t proc_ide_read_capacity;
 read_proc_t proc_ide_read_geometry;
@@ -1048,8 +1038,10 @@
 static inline void ide_proc_port_register_devices(ide_hwif_t *hwif) { ; }
 static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; }
 static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; }
-static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; }
-static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; }
+static inline void ide_proc_register_driver(ide_drive_t *drive,
+					    struct ide_driver *driver) { ; }
+static inline void ide_proc_unregister_driver(ide_drive_t *drive,
+					      struct ide_driver *driver) { ; }
 #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0;
 #endif
 
@@ -1118,11 +1110,10 @@
  * The gendriver.owner field should be set to the module owner of this driver.
  * The gendriver.name field should be set to the name of this driver
  */
-struct ide_driver_s {
+struct ide_driver {
 	const char			*version;
 	ide_startstop_t	(*do_request)(ide_drive_t *, struct request *, sector_t);
 	int		(*end_request)(ide_drive_t *, int, int);
-	ide_startstop_t	(*error)(ide_drive_t *, struct request *rq, u8, u8);
 	struct device_driver	gen_driver;
 	int		(*probe)(ide_drive_t *);
 	void		(*remove)(ide_drive_t *);
@@ -1134,7 +1125,7 @@
 #endif
 };
 
-#define to_ide_driver(drv) container_of(drv, ide_driver_t, gen_driver)
+#define to_ide_driver(drv) container_of(drv, struct ide_driver, gen_driver)
 
 int ide_device_get(ide_drive_t *);
 void ide_device_put(ide_drive_t *);
@@ -1166,9 +1157,7 @@
 
 void ide_pad_transfer(ide_drive_t *, int, int);
 
-ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8);
-
-ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
+ide_startstop_t ide_error(ide_drive_t *, const char *, u8);
 
 void ide_fix_driveid(u16 *);
 
@@ -1192,7 +1181,6 @@
 void ide_exec_command(ide_hwif_t *, u8);
 u8 ide_read_status(ide_hwif_t *);
 u8 ide_read_altstatus(ide_hwif_t *);
-u8 ide_read_sff_dma_status(ide_hwif_t *);
 
 void ide_set_irq(ide_hwif_t *, int);
 
@@ -1272,26 +1260,6 @@
 
 extern void ide_timer_expiry(unsigned long);
 extern irqreturn_t ide_intr(int irq, void *dev_id);
-
-static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup)
-{
-	if (hwgroup->busy)
-		return 1;
-
-	hwgroup->busy = 1;
-	/* for atari only */
-	ide_get_lock(ide_intr, hwgroup);
-
-	return 0;
-}
-
-static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup)
-{
-	/* for atari only */
-	ide_release_lock();
-	hwgroup->busy = 0;
-}
-
 extern void do_ide_request(struct request_queue *);
 
 void ide_init_disk(struct gendisk *, ide_drive_t *);
@@ -1327,11 +1295,11 @@
 }
 #endif
 
-typedef struct ide_pci_enablebit_s {
+struct ide_pci_enablebit {
 	u8	reg;	/* byte pci reg holding the enable-bit */
 	u8	mask;	/* mask to isolate the enable-bit */
 	u8	val;	/* value of masked reg when "enabled" */
-} ide_pci_enablebit_t;
+};
 
 enum {
 	/* Uses ISA control ports not PCI ones. */
@@ -1420,7 +1388,8 @@
 	const struct ide_port_ops	*port_ops;
 	const struct ide_dma_ops	*dma_ops;
 
-	ide_pci_enablebit_t	enablebits[2];
+	struct ide_pci_enablebit	enablebits[2];
+
 	hwif_chipset_t		chipset;
 
 	u16			max_sectors;	/* if < than the default one */
@@ -1492,6 +1461,7 @@
 extern void ide_dma_start(ide_drive_t *);
 int ide_dma_end(ide_drive_t *);
 int ide_dma_test_irq(ide_drive_t *);
+u8 ide_dma_sff_read_status(ide_hwif_t *);
 extern const struct ide_dma_ops sff_dma_ops;
 #else
 static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; }
@@ -1529,9 +1499,6 @@
 static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {}
 #endif
 
-void ide_remove_port_from_hwgroup(ide_hwif_t *);
-void ide_unregister(ide_hwif_t *);
-
 void ide_register_region(struct gendisk *);
 void ide_unregister_region(struct gendisk *);
 
@@ -1616,23 +1583,6 @@
 	ide_set_pio(drive, 255);
 }
 
-extern spinlock_t ide_lock;
-extern struct mutex ide_cfg_mtx;
-/*
- * Structure locking:
- *
- * ide_cfg_mtx and hwgroup->lock together protect changes to
- * ide_hwif_t->next
- * ide_drive_t->next
- *
- * ide_hwgroup_t->busy: hwgroup->lock
- * ide_hwgroup_t->hwif: hwgroup->lock
- * ide_hwif_t->{hwgroup,mate}: constant, no locking
- * ide_drive_t->hwif: constant, no locking
- */
-
-#define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
-
 char *ide_media_string(ide_drive_t *);
 
 extern struct device_attribute ide_dev_attrs[];
@@ -1651,8 +1601,15 @@
 
 static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive)
 {
-	ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1];
+	ide_drive_t *peer = drive->hwif->devices[(drive->dn ^ 1) & 1];
 
 	return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL;
 }
+
+#define ide_port_for_each_dev(i, dev, port) \
+	for ((i) = 0; ((dev) = (port)->devices[i]) || (i) < MAX_DRIVES; (i)++)
+
+#define ide_host_for_each_port(i, port, host) \
+	for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++)
+
 #endif /* _IDE_H */
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 0702c4d..af886b2 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,7 +14,6 @@
 #include <linux/irqflags.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
-#include <linux/irqnr.h>
 
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d242fe1..6b8e202 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -48,6 +48,12 @@
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
+#define DIV_ROUND_CLOSEST(x, divisor)(			\
+{							\
+	typeof(divisor) __divisor = divisor;		\
+	(((x) + ((__divisor) / 2)) / (__divisor));	\
+}							\
+)
 
 #define _RET_IP_		(unsigned long)__builtin_return_address(0)
 #define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 497b1d1..d6ea19e 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -69,9 +69,6 @@
 	/* list of kprobes for multi-handler support */
 	struct list_head list;
 
-	/* Indicates that the corresponding module has been ref counted */
-	unsigned int mod_refcounted;
-
 	/*count the number of times this probe was temporarily disarmed */
 	unsigned long nmissed;
 
@@ -103,8 +100,19 @@
 
 	/* copy of the original instruction */
 	struct arch_specific_insn ainsn;
+
+	/* Indicates various status flags.  Protected by kprobe_mutex. */
+	u32 flags;
 };
 
+/* Kprobe status flags */
+#define KPROBE_FLAG_GONE	1 /* breakpoint has already gone */
+
+static inline int kprobe_gone(struct kprobe *p)
+{
+	return p->flags & KPROBE_FLAG_GONE;
+}
+
 /*
  * Special probe type that uses setjmp-longjmp type tricks to resume
  * execution at a specified entry with a matching prototype corresponding
@@ -201,7 +209,6 @@
 }
 #endif /* CONFIG_KPROBES_SANITY_TEST */
 
-extern struct mutex kprobe_mutex;
 extern int arch_prepare_kprobe(struct kprobe *p);
 extern void arch_arm_kprobe(struct kprobe *p);
 extern void arch_disarm_kprobe(struct kprobe *p);
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 36c82c9..3fdc108 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -79,14 +79,14 @@
 #else
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
-extern int register_new_memory(struct mem_section *);
+extern int register_new_memory(int, struct mem_section *);
 extern int unregister_memory_section(struct mem_section *);
 extern int memory_dev_init(void);
 extern int remove_memory_block(unsigned long, struct mem_section *, int);
 extern int memory_notify(unsigned long val, void *v);
+extern struct memory_block *find_memory_block(struct mem_section *);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
-
-
+enum mem_add_context { BOOT, HOTPLUG };
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 763ba81..d95f72e 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -72,7 +72,7 @@
 extern int offline_pages(unsigned long, unsigned long, unsigned long);
 
 /* reasonably generic interface to expand the physical pages in a zone  */
-extern int __add_pages(struct zone *zone, unsigned long start_pfn,
+extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
 extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3f34005..527602c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,6 +7,8 @@
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
 #ifdef CONFIG_MIGRATION
+#define PAGE_MIGRATION 1
+
 extern int putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -20,6 +22,8 @@
 		const nodemask_t *from, const nodemask_t *to,
 		unsigned long flags);
 #else
+#define PAGE_MIGRATION 0
+
 static inline int putback_lru_pages(struct list_head *l) { return 0; }
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private) { return -ENOSYS; }
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 26433ec..a820f81 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -3,33 +3,33 @@
 #include <linux/module.h>
 #include <linux/major.h>
 
-#define PSMOUSE_MINOR  1
-#define MS_BUSMOUSE_MINOR 2
-#define ATIXL_BUSMOUSE_MINOR 3
-/*#define AMIGAMOUSE_MINOR 4	FIXME OBSOLETE */
-#define ATARIMOUSE_MINOR 5
-#define SUN_MOUSE_MINOR 6
-#define APOLLO_MOUSE_MINOR 7
-#define PC110PAD_MINOR 9
-/*#define ADB_MOUSE_MINOR 10	FIXME OBSOLETE */
+#define PSMOUSE_MINOR		1
+#define MS_BUSMOUSE_MINOR	2
+#define ATIXL_BUSMOUSE_MINOR	3
+/*#define AMIGAMOUSE_MINOR	4	FIXME OBSOLETE */
+#define ATARIMOUSE_MINOR	5
+#define SUN_MOUSE_MINOR		6
+#define APOLLO_MOUSE_MINOR	7
+#define PC110PAD_MINOR		9
+/*#define ADB_MOUSE_MINOR	10	FIXME OBSOLETE */
 #define WATCHDOG_MINOR		130	/* Watchdog timer     */
 #define TEMP_MINOR		131	/* Temperature Sensor */
-#define RTC_MINOR 135
+#define RTC_MINOR		135
 #define EFI_RTC_MINOR		136	/* EFI Time services */
-#define SUN_OPENPROM_MINOR 139
+#define SUN_OPENPROM_MINOR	139
 #define DMAPI_MINOR		140	/* DMAPI */
-#define NVRAM_MINOR 144
-#define SGI_MMTIMER        153
+#define NVRAM_MINOR		144
+#define SGI_MMTIMER		153
 #define STORE_QUEUE_MINOR	155
-#define I2O_MINOR 166
+#define I2O_MINOR		166
 #define MICROCODE_MINOR		184
-#define MWAVE_MINOR	219		/* ACP/Mwave Modem */
-#define MPT_MINOR	220
-#define MISC_DYNAMIC_MINOR 255
-
-#define TUN_MINOR	     200
-#define	HPET_MINOR	     228
-#define KVM_MINOR            232
+#define TUN_MINOR		200
+#define MWAVE_MINOR		219	/* ACP/Mwave Modem */
+#define MPT_MINOR		220
+#define HPET_MINOR		228
+#define FUSE_MINOR		229
+#define KVM_MINOR		232
+#define MISC_DYNAMIC_MINOR	255
 
 struct device;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index aaa8b84..4a3d28c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -717,6 +717,11 @@
 
 #define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS)
 
+/*
+ * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
+ */
+extern void pagefault_out_of_memory(void);
+
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
 extern void show_free_areas(void);
diff --git a/include/linux/module.h b/include/linux/module.h
index 3bfed01..4f7ea12 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -294,9 +294,6 @@
 	/* The size of the executable code in each section.  */
 	unsigned int init_text_size, core_text_size;
 
-	/* The handle returned from unwind_add_table. */
-	void *unwind_info;
-
 	/* Arch-specific module values */
 	struct mod_arch_specific arch;
 
@@ -368,6 +365,18 @@
 struct module *__module_text_address(unsigned long addr);
 int is_module_address(unsigned long addr);
 
+static inline int within_module_core(unsigned long addr, struct module *mod)
+{
+	return (unsigned long)mod->module_core <= addr &&
+	       addr < (unsigned long)mod->module_core + mod->core_size;
+}
+
+static inline int within_module_init(unsigned long addr, struct module *mod)
+{
+	return (unsigned long)mod->module_init <= addr &&
+	       addr < (unsigned long)mod->module_init + mod->init_size;
+}
+
 /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
    symnum out of range. */
 int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
diff --git a/include/linux/node.h b/include/linux/node.h
index bc001bc..681a697 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -26,6 +26,7 @@
 	struct sys_device	sysdev;
 };
 
+struct memory_block;
 extern struct node node_devices[];
 
 extern int register_node(struct node *, int, struct node *);
@@ -35,6 +36,9 @@
 extern void unregister_one_node(int nid);
 extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
+extern int register_mem_sect_under_node(struct memory_block *mem_blk,
+						int nid);
+extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk);
 #else
 static inline int register_one_node(int nid)
 {
@@ -52,6 +56,15 @@
 {
 	return 0;
 }
+static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
+							int nid)
+{
+	return 0;
+}
+static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
+{
+	return 0;
+}
 #endif
 
 #define to_node(sys_device) container_of(sys_device, struct node, sysdev)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b12f93a..219a523 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -228,6 +228,7 @@
 PAGEFLAG(SwapCache, swapcache)
 #else
 PAGEFLAG_FALSE(SwapCache)
+	SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
 #endif
 
 #ifdef CONFIG_UNEVICTABLE_LRU
@@ -372,31 +373,22 @@
 #define __PG_MLOCKED		0
 #endif
 
-#define PAGE_FLAGS	(1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
-			 1 << PG_buddy | 1 << PG_writeback | \
-			 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
-			 __PG_UNEVICTABLE | __PG_MLOCKED)
-
-/*
- * Flags checked in bad_page().  Pages on the free list should not have
- * these flags set.  It they are, there is a problem.
- */
-#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \
-		1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked)
-
 /*
  * Flags checked when a page is freed.  Pages being freed should not have
  * these flags set.  It they are, there is a problem.
  */
-#define PAGE_FLAGS_CHECK_AT_FREE (PAGE_FLAGS | 1 << PG_reserved)
+#define PAGE_FLAGS_CHECK_AT_FREE \
+	(1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
+	 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
+	 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
+	 __PG_UNEVICTABLE | __PG_MLOCKED)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
- * Pages being prepped should not have these flags set.  It they are, there
- * is a problem.
+ * Pages being prepped should not have any flags set.  It they are set,
+ * there has been a kernel bug or struct page corruption.
  */
-#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \
-		1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked)
+#define PAGE_FLAGS_CHECK_AT_PREP	((1 << NR_PAGEFLAGS) - 1)
 
 #endif /* !__GENERATING_BOUNDS_H */
 #endif	/* PAGE_FLAGS_H */
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index e90a2cb..7b2886f 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -21,7 +21,6 @@
 };
 
 void __pagevec_release(struct pagevec *pvec);
-void __pagevec_release_nonlru(struct pagevec *pvec);
 void __pagevec_free(struct pagevec *pvec);
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 void pagevec_strip(struct pagevec *pvec);
@@ -69,12 +68,6 @@
 		__pagevec_release(pvec);
 }
 
-static inline void pagevec_release_nonlru(struct pagevec *pvec)
-{
-	if (pagevec_count(pvec))
-		__pagevec_release_nonlru(pvec);
-}
-
 static inline void pagevec_free(struct pagevec *pvec)
 {
 	if (pagevec_count(pvec))
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 218c73b..d543365 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1658,6 +1658,7 @@
 #define PCI_VENDOR_ID_ROCKWELL		0x127A
 
 #define PCI_VENDOR_ID_ITE		0x1283
+#define PCI_DEVICE_ID_ITE_8172		0x8172
 #define PCI_DEVICE_ID_ITE_8211		0x8211
 #define PCI_DEVICE_ID_ITE_8212		0x8212
 #define PCI_DEVICE_ID_ITE_8213		0x8213
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccd..99de7a3 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -24,11 +24,7 @@
 	s32 *counters;
 };
 
-#if NR_CPUS >= 16
-#define FBC_BATCH	(NR_CPUS*2)
-#else
-#define FBC_BATCH	(NR_CPUS*4)
-#endif
+extern int percpu_counter_batch;
 
 int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
 int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
@@ -39,7 +35,7 @@
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
-	__percpu_counter_add(fbc, amount, FBC_BATCH);
+	__percpu_counter_add(fbc, amount, percpu_counter_batch);
 }
 
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
diff --git a/include/linux/poll.h b/include/linux/poll.h
index badd98a..8c24ef8 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -46,9 +46,9 @@
 }
 
 struct poll_table_entry {
-	struct file * filp;
+	struct file *filp;
 	wait_queue_t wait;
-	wait_queue_head_t * wait_address;
+	wait_queue_head_t *wait_address;
 };
 
 /*
@@ -56,7 +56,9 @@
  */
 struct poll_wqueues {
 	poll_table pt;
-	struct poll_table_page * table;
+	struct poll_table_page *table;
+	struct task_struct *polling_task;
+	int triggered;
 	int error;
 	int inline_index;
 	struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
@@ -64,6 +66,13 @@
 
 extern void poll_initwait(struct poll_wqueues *pwq);
 extern void poll_freewait(struct poll_wqueues *pwq);
+extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+				 ktime_t *expires, unsigned long slack);
+
+static inline int poll_schedule(struct poll_wqueues *pwq, int state)
+{
+	return poll_schedule_timeout(pwq, state, NULL, 0);
+}
 
 /*
  * Scaleable version of the fd_set.
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 32c0547..c93a58a 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -391,7 +391,6 @@
  * rio_get_inb_message - Get A RIO message from an inbound mailbox queue
  * @mport: Master port containing the inbound mailbox
  * @mbox: The inbound mailbox number
- * @buffer: Pointer to the message buffer
  *
  * Get a RIO message from an inbound mailbox queue. Returns 0 on success.
  */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 89f0564..b35bc0e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -63,16 +63,13 @@
 void anon_vma_link(struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
 
-extern struct anon_vma *page_lock_anon_vma(struct page *page);
-extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
-
 /*
  * rmap interfaces called when adding or removing pte of page
  */
 void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
-void page_remove_rmap(struct page *, struct vm_area_struct *);
+void page_remove_rmap(struct page *);
 
 #ifdef CONFIG_DEBUG_VM
 void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 38a3f4b..ea41513 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -386,6 +386,9 @@
 		(mm)->hiwater_vm = (mm)->total_vm;	\
 } while (0)
 
+#define get_mm_hiwater_rss(mm)	max((mm)->hiwater_rss, get_mm_rss(mm))
+#define get_mm_hiwater_vm(mm)	max((mm)->hiwater_vm, (mm)->total_vm)
+
 extern void set_dumpable(struct mm_struct *mm, int value);
 extern int get_dumpable(struct mm_struct *mm);
 
diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h
new file mode 100644
index 0000000..0f01a0f
--- /dev/null
+++ b/include/linux/spi/spi_gpio.h
@@ -0,0 +1,60 @@
+#ifndef __LINUX_SPI_GPIO_H
+#define __LINUX_SPI_GPIO_H
+
+/*
+ * For each bitbanged SPI bus, set up a platform_device node with:
+ *   - name "spi_gpio"
+ *   - id the same as the SPI bus number it implements
+ *   - dev.platform data pointing to a struct spi_gpio_platform_data
+ *
+ * Or, see the driver code for information about speedups that are
+ * possible on platforms that support inlined access for GPIOs (no
+ * spi_gpio_platform_data is used).
+ *
+ * Use spi_board_info with these busses in the usual way, being sure
+ * that the controller_data being the GPIO used for each device's
+ * chipselect:
+ *
+ *	static struct spi_board_info ... [] = {
+ *	...
+ *		// this slave uses GPIO 42 for its chipselect
+ *		.controller_data = (void *) 42,
+ *	...
+ *		// this one uses GPIO 86 for its chipselect
+ *		.controller_data = (void *) 86,
+ *	...
+ *	};
+ *
+ * If the bitbanged bus is later switched to a "native" controller,
+ * that platform_device and controller_data should be removed.
+ */
+
+/**
+ * struct spi_gpio_platform_data - parameter for bitbanged SPI master
+ * @sck: number of the GPIO used for clock output
+ * @mosi: number of the GPIO used for Master Output, Slave In (MOSI) data
+ * @miso: number of the GPIO used for Master Input, Slave Output (MISO) data
+ * @num_chipselect: how many slaves to allow
+ *
+ * All GPIO signals used with the SPI bus managed through this driver
+ * (chipselects, MOSI, MISO, SCK) must be configured as GPIOs, instead
+ * of some alternate function.
+ *
+ * It can be convenient to use this driver with pins that have alternate
+ * functions associated with a "native" SPI controller if a driver for that
+ * controller is not available, or is missing important functionality.
+ *
+ * On platforms which can do so, configure MISO with a weak pullup unless
+ * there's an external pullup on that signal.  That saves power by avoiding
+ * floating signals.  (A weak pulldown would save power too, but many
+ * drivers expect to see all-ones data as the no slave "response".)
+ */
+struct spi_gpio_platform_data {
+	unsigned	sck;
+	unsigned	mosi;
+	unsigned	miso;
+
+	u16		num_chipselect;
+};
+
+#endif /* __LINUX_SPI_GPIO_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a3af95b..91dee50 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -120,7 +120,9 @@
 enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
-	SWP_ACTIVE	= (SWP_USED | SWP_WRITEOK),
+	SWP_DISCARDABLE = (1 << 2),	/* blkdev supports discard */
+	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
+	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
@@ -134,22 +136,24 @@
  * The in-memory structure used to track swap areas.
  */
 struct swap_info_struct {
-	unsigned int flags;
+	unsigned long flags;
 	int prio;			/* swap priority */
+	int next;			/* next entry on swap list */
 	struct file *swap_file;
 	struct block_device *bdev;
 	struct list_head extent_list;
 	struct swap_extent *curr_swap_extent;
-	unsigned old_block_size;
-	unsigned short * swap_map;
+	unsigned short *swap_map;
 	unsigned int lowest_bit;
 	unsigned int highest_bit;
+	unsigned int lowest_alloc;	/* while preparing discard cluster */
+	unsigned int highest_alloc;	/* while preparing discard cluster */
 	unsigned int cluster_next;
 	unsigned int cluster_nr;
 	unsigned int pages;
 	unsigned int max;
 	unsigned int inuse_pages;
-	int next;			/* next entry on swap list */
+	unsigned int old_block_size;
 };
 
 struct swap_list_t {
@@ -163,7 +167,6 @@
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
-extern long nr_swap_pages;
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
 
@@ -174,8 +177,6 @@
 /* linux/mm/swap.c */
 extern void __lru_cache_add(struct page *, enum lru_list lru);
 extern void lru_cache_add_lru(struct page *, enum lru_list lru);
-extern void lru_cache_add_active_or_unevictable(struct page *,
-					struct vm_area_struct *);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
@@ -280,7 +281,7 @@
 extern struct address_space swapper_space;
 #define total_swapcache_pages  swapper_space.nrpages
 extern void show_swap_cache_info(void);
-extern int add_to_swap(struct page *, gfp_t);
+extern int add_to_swap(struct page *);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern void __delete_from_swap_cache(struct page *);
 extern void delete_from_swap_cache(struct page *);
@@ -293,6 +294,7 @@
 			struct vm_area_struct *vma, unsigned long addr);
 
 /* linux/mm/swapfile.c */
+extern long nr_swap_pages;
 extern long total_swap_pages;
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
@@ -300,15 +302,14 @@
 extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern void swap_free(swp_entry_t);
-extern void free_swap_and_cache(swp_entry_t);
+extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
 extern sector_t swapdev_block(int, pgoff_t);
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
-extern int can_share_swap_page(struct page *);
-extern int remove_exclusive_swap_page(struct page *);
-extern int remove_exclusive_swap_page_ref(struct page *);
+extern int reuse_swap_page(struct page *);
+extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 
 /* linux/mm/thrash.c */
@@ -334,7 +335,8 @@
 
 #else /* CONFIG_SWAP */
 
-#define total_swap_pages			0
+#define nr_swap_pages				0L
+#define total_swap_pages			0L
 #define total_swapcache_pages			0UL
 
 #define si_swapinfo(val) \
@@ -350,14 +352,8 @@
 {
 }
 
-static inline void free_swap_and_cache(swp_entry_t swp)
-{
-}
-
-static inline int swap_duplicate(swp_entry_t swp)
-{
-	return 0;
-}
+#define free_swap_and_cache(swp)	is_migration_entry(swp)
+#define swap_duplicate(swp)		is_migration_entry(swp)
 
 static inline void swap_free(swp_entry_t swp)
 {
@@ -374,7 +370,10 @@
 	return NULL;
 }
 
-#define can_share_swap_page(p)			(page_mapcount(p) == 1)
+static inline int add_to_swap(struct page *page)
+{
+	return 0;
+}
 
 static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
 							gfp_t gfp_mask)
@@ -390,14 +389,9 @@
 {
 }
 
-#define swap_token_default_timeout		0
+#define reuse_swap_page(page)	(page_mapcount(page) == 1)
 
-static inline int remove_exclusive_swap_page(struct page *p)
-{
-	return 0;
-}
-
-static inline int remove_exclusive_swap_page_ref(struct page *page)
+static inline int try_to_free_swap(struct page *page)
 {
 	return 0;
 }
diff --git a/include/linux/types.h b/include/linux/types.h
index 121f349..3b864f2 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -195,6 +195,16 @@
 
 typedef phys_addr_t resource_size_t;
 
+typedef struct {
+	volatile int counter;
+} atomic_t;
+
+#ifdef CONFIG_64BIT
+typedef struct {
+	volatile long counter;
+} atomic64_t;
+#endif
+
 struct ustat {
 	__kernel_daddr_t	f_tfree;
 	__kernel_ino_t		f_tinode;
diff --git a/include/linux/unwind.h b/include/linux/unwind.h
deleted file mode 100644
index 7760860..0000000
--- a/include/linux/unwind.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef _LINUX_UNWIND_H
-#define _LINUX_UNWIND_H
-
-/*
- * Copyright (C) 2002-2006 Novell, Inc.
- *	Jan Beulich <jbeulich@novell.com>
- * This code is released under version 2 of the GNU GPL.
- *
- * A simple API for unwinding kernel stacks.  This is used for
- * debugging and error reporting purposes.  The kernel doesn't need
- * full-blown stack unwinding with all the bells and whistles, so there
- * is not much point in implementing the full Dwarf2 unwind API.
- */
-
-struct module;
-
-struct unwind_frame_info {};
-
-static inline void unwind_init(void) {}
-static inline void unwind_setup(void) {}
-
-#ifdef CONFIG_MODULES
-
-static inline void *unwind_add_table(struct module *mod,
-                                     const void *table_start,
-                                     unsigned long table_size)
-{
-	return NULL;
-}
-
-static inline void unwind_remove_table(void *handle, int init_only)
-{
-}
-
-#endif
-
-static inline int unwind_init_frame_info(struct unwind_frame_info *info,
-                                         struct task_struct *tsk,
-                                         const struct pt_regs *regs)
-{
-	return -ENOSYS;
-}
-
-static inline int unwind_init_blocked(struct unwind_frame_info *info,
-                                      struct task_struct *tsk)
-{
-	return -ENOSYS;
-}
-
-static inline int unwind_init_running(struct unwind_frame_info *info,
-                                      asmlinkage int (*cb)(struct unwind_frame_info *,
-                                                           void *arg),
-                                      void *arg)
-{
-	return -ENOSYS;
-}
-
-static inline int unwind(struct unwind_frame_info *info)
-{
-	return -ENOSYS;
-}
-
-static inline int unwind_to_user(struct unwind_frame_info *info)
-{
-	return -ENOSYS;
-}
-
-#endif /* _LINUX_UNWIND_H */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 307b885..506e762 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -97,6 +97,10 @@
 extern struct vm_struct *alloc_vm_area(size_t size);
 extern void free_vm_area(struct vm_struct *area);
 
+/* for /dev/kmem */
+extern long vread(char *buf, char *addr, unsigned long count);
+extern long vwrite(char *buf, char *addr, unsigned long count);
+
 /*
  *	Internals.  Dont't use..
  */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index e585657..7300ecd 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -30,7 +30,6 @@
 enum writeback_sync_modes {
 	WB_SYNC_NONE,	/* Don't wait on anything */
 	WB_SYNC_ALL,	/* Wait on every mapping */
-	WB_SYNC_HOLD,	/* Hold the inode on sb_dirty for sys_sync() */
 };
 
 /*
@@ -107,7 +106,9 @@
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
+extern unsigned long dirty_background_bytes;
 extern int vm_dirty_ratio;
+extern unsigned long vm_dirty_bytes;
 extern int dirty_writeback_interval;
 extern int dirty_expire_interval;
 extern int vm_highmem_is_dirtyable;
@@ -116,17 +117,26 @@
 
 extern unsigned long determine_dirtyable_memory(void);
 
+extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos);
+extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos);
 extern int dirty_ratio_handler(struct ctl_table *table, int write,
 		struct file *filp, void __user *buffer, size_t *lenp,
 		loff_t *ppos);
+extern int dirty_bytes_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos);
 
 struct ctl_table;
 struct file;
 int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
 				      void __user *, size_t *, loff_t *);
 
-void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
-		 struct backing_dev_info *bdi);
+void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
+		      unsigned long *pbdi_dirty, struct backing_dev_info *bdi);
 
 void page_writeback_init(void);
 void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
diff --git a/init/Kconfig b/init/Kconfig
index d9d3dba..e7893b1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -848,10 +848,6 @@
 	boolean
 	select PLIST
 
-config TINY_SHMEM
-	default !SHMEM
-	bool
-
 config BASE_SMALL
 	int
 	default 0 if BASE_FULL
diff --git a/init/do_mounts.c b/init/do_mounts.c
index d055b19..5efca73 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -220,10 +220,10 @@
 
 	sys_chdir("/root");
 	ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev;
-	printk("VFS: Mounted root (%s filesystem)%s.\n",
+	printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
 	       current->fs->pwd.mnt->mnt_sb->s_type->name,
 	       current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ?
-	       " readonly" : "");
+	       " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
 	return 0;
 }
 
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index d6da5cd..ff95e31 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -271,7 +271,7 @@
 __setup("raid=", raid_setup);
 __setup("md=", md_setup);
 
-static void autodetect_raid(void)
+static void __init autodetect_raid(void)
 {
 	int fd;
 
diff --git a/init/main.c b/init/main.c
index cd168eb..b5a892c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -50,7 +50,6 @@
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
 #include <linux/key.h>
-#include <linux/unwind.h>
 #include <linux/buffer_head.h>
 #include <linux/page_cgroup.h>
 #include <linux/debug_locks.h>
@@ -108,7 +107,7 @@
 
 extern void time_init(void);
 /* Default late time init is NULL. archs can override this later. */
-void (*late_time_init)(void);
+void (*__initdata late_time_init)(void);
 extern void softirq_init(void);
 
 /* Untouched command line saved by arch-specific code. */
@@ -447,7 +446,7 @@
  * gcc-3.4 accidentally inlines this function, so use noinline.
  */
 
-static void noinline __init_refok rest_init(void)
+static noinline void __init_refok rest_init(void)
 	__releases(kernel_lock)
 {
 	int pid;
@@ -537,7 +536,6 @@
 	 * Need to run as early as possible, to initialize the
 	 * lockdep hash:
 	 */
-	unwind_init();
 	lockdep_init();
 	debug_objects_early_init();
 	cgroup_init_early();
@@ -559,7 +557,6 @@
 	setup_arch(&command_line);
 	mm_init_owner(&init_mm, &init_task);
 	setup_command_line(command_line);
-	unwind_setup();
 	setup_per_cpu_areas();
 	setup_nr_cpu_ids();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
@@ -786,7 +783,7 @@
 /* This is a non __init function. Force it to be noinline otherwise gcc
  * makes it inline to init() and it becomes part of init.text section
  */
-static int noinline init_post(void)
+static noinline int init_post(void)
 {
 	free_initmem();
 	unlock_kernel();
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 0dfebc5..4a7a12c 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -26,29 +26,6 @@
 	return which;
 }
 
-/*
- * Routine that is called when the file "auto_msgmni" has successfully been
- * written.
- * Two values are allowed:
- * 0: unregister msgmni's callback routine from the ipc namespace notifier
- *    chain. This means that msgmni won't be recomputed anymore upon memory
- *    add/remove or ipc namespace creation/removal.
- * 1: register back the callback routine.
- */
-static void ipc_auto_callback(int val)
-{
-	if (!val)
-		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
-	else {
-		/*
-		 * Re-enable automatic recomputing only if not already
-		 * enabled.
-		 */
-		recompute_msgmni(current->nsproxy->ipc_ns);
-		cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
-	}
-}
-
 #ifdef CONFIG_PROC_FS
 static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -94,6 +71,29 @@
 					lenp, ppos);
 }
 
+/*
+ * Routine that is called when the file "auto_msgmni" has successfully been
+ * written.
+ * Two values are allowed:
+ * 0: unregister msgmni's callback routine from the ipc namespace notifier
+ *    chain. This means that msgmni won't be recomputed anymore upon memory
+ *    add/remove or ipc namespace creation/removal.
+ * 1: register back the callback routine.
+ */
+static void ipc_auto_callback(int val)
+{
+	if (!val)
+		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
+	else {
+		/*
+		 * Re-enable automatic recomputing only if not already
+		 * enabled.
+		 */
+		recompute_msgmni(current->nsproxy->ipc_ns);
+		cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
+	}
+}
+
 static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
 {
diff --git a/ipc/sem.c b/ipc/sem.c
index fea0ad3..c68cd3f 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1216,7 +1216,6 @@
 	if (timeout && jiffies_left == 0)
 		error = -EAGAIN;
 	list_del(&queue.list);
-	goto out_unlock_free;
 
 out_unlock_free:
 	sem_unlock(sma);
diff --git a/ipc/shm.c b/ipc/shm.c
index 57dd500..b125b56 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -75,7 +75,7 @@
 	ns->shm_ctlall = SHMALL;
 	ns->shm_ctlmni = SHMMNI;
 	ns->shm_tot = 0;
-	ipc_init_ids(&ns->ids[IPC_SHM_IDS]);
+	ipc_init_ids(&shm_ids(ns));
 }
 
 /*
@@ -644,7 +644,7 @@
 		if (err)
 			return err;
 
-		memset(&shminfo,0,sizeof(shminfo));
+		memset(&shminfo, 0, sizeof(shminfo));
 		shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
 		shminfo.shmmax = ns->shm_ctlmax;
 		shminfo.shmall = ns->shm_ctlall;
@@ -669,7 +669,7 @@
 		if (err)
 			return err;
 
-		memset(&shm_info,0,sizeof(shm_info));
+		memset(&shm_info, 0, sizeof(shm_info));
 		down_read(&shm_ids(ns).rw_mutex);
 		shm_info.used_ids = shm_ids(ns).in_use;
 		shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
@@ -678,7 +678,7 @@
 		shm_info.swap_successes = 0;
 		err = ipc_get_maxid(&shm_ids(ns));
 		up_read(&shm_ids(ns).rw_mutex);
-		if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
+		if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
 			err = -EFAULT;
 			goto out;
 		}
@@ -692,11 +692,6 @@
 		struct shmid64_ds tbuf;
 		int result;
 
-		if (!buf) {
-			err = -EFAULT;
-			goto out;
-		}
-
 		if (cmd == SHM_STAT) {
 			shp = shm_lock(ns, shmid);
 			if (IS_ERR(shp)) {
@@ -712,7 +707,7 @@
 			}
 			result = 0;
 		}
-		err=-EACCES;
+		err = -EACCES;
 		if (ipcperms (&shp->shm_perm, S_IRUGO))
 			goto out_unlock;
 		err = security_shm_shmctl(shp, cmd);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 87bb025..f221446 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -116,7 +116,6 @@
  * be called.
  */
 static int need_forkexit_callback __read_mostly;
-static int need_mm_owner_callback __read_mostly;
 
 /* convenient tests for these bits */
 inline int cgroup_is_removed(const struct cgroup *cgrp)
@@ -2539,7 +2538,6 @@
 	init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
 
 	need_forkexit_callback |= ss->fork || ss->exit;
-	need_mm_owner_callback |= !!ss->mm_owner_changed;
 
 	/* At system boot, before all subsystems have been
 	 * registered, no tasks have been forked, so we don't
@@ -2789,37 +2787,6 @@
 	}
 }
 
-#ifdef CONFIG_MM_OWNER
-/**
- * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
- * @p: the new owner
- *
- * Called on every change to mm->owner. mm_init_owner() does not
- * invoke this routine, since it assigns the mm->owner the first time
- * and does not change it.
- *
- * The callbacks are invoked with mmap_sem held in read mode.
- */
-void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
-{
-	struct cgroup *oldcgrp, *newcgrp = NULL;
-
-	if (need_mm_owner_callback) {
-		int i;
-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-			struct cgroup_subsys *ss = subsys[i];
-			oldcgrp = task_cgroup(old, ss->subsys_id);
-			if (new)
-				newcgrp = task_cgroup(new, ss->subsys_id);
-			if (oldcgrp == newcgrp)
-				continue;
-			if (ss->mm_owner_changed)
-				ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
-		}
-	}
-}
-#endif /* CONFIG_MM_OWNER */
-
 /**
  * cgroup_post_fork - called on a new task after adding it to the task list
  * @child: the task in question
diff --git a/kernel/compat.c b/kernel/compat.c
index d52e2ec..42d5654 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -24,6 +24,7 @@
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
 #include <linux/times.h>
+#include <linux/ptrace.h>
 
 #include <asm/uaccess.h>
 
@@ -229,6 +230,7 @@
 		if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
 			return -EFAULT;
 	}
+	force_successful_syscall_return();
 	return compat_jiffies_to_clock_t(jiffies);
 }
 
@@ -894,8 +896,9 @@
 
 	if (tloc) {
 		if (put_user(i,tloc))
-			i = -EFAULT;
+			return -EFAULT;
 	}
+	force_successful_syscall_return();
 	return i;
 }
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 39c1a4c..345ace51 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -240,6 +240,17 @@
 static DEFINE_MUTEX(callback_mutex);
 
 /*
+ * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
+ * buffers.  They are statically allocated to prevent using excess stack
+ * when calling cpuset_print_task_mems_allowed().
+ */
+#define CPUSET_NAME_LEN		(128)
+#define	CPUSET_NODELIST_LEN	(256)
+static char cpuset_name[CPUSET_NAME_LEN];
+static char cpuset_nodelist[CPUSET_NODELIST_LEN];
+static DEFINE_SPINLOCK(cpuset_buffer_lock);
+
+/*
  * This is ugly, but preserves the userspace API for existing cpuset
  * users. If someone tries to mount the "cpuset" filesystem, we
  * silently switch it to mount "cgroup" instead
@@ -2356,6 +2367,29 @@
 	return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
 }
 
+/**
+ * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
+ * @task: pointer to task_struct of some task.
+ *
+ * Description: Prints @task's name, cpuset name, and cached copy of its
+ * mems_allowed to the kernel log.  Must hold task_lock(task) to allow
+ * dereferencing task_cs(task).
+ */
+void cpuset_print_task_mems_allowed(struct task_struct *tsk)
+{
+	struct dentry *dentry;
+
+	dentry = task_cs(tsk)->css.cgroup->dentry;
+	spin_lock(&cpuset_buffer_lock);
+	snprintf(cpuset_name, CPUSET_NAME_LEN,
+		 dentry ? (const char *)dentry->d_name.name : "/");
+	nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
+			   tsk->mems_allowed);
+	printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
+	       tsk->comm, cpuset_name, cpuset_nodelist);
+	spin_unlock(&cpuset_buffer_lock);
+}
+
 /*
  * Collection of memory_pressure is suppressed unless
  * this flag is enabled by writing "1" to the special
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
index f013a0c..0387074 100644
--- a/kernel/dma-coherent.c
+++ b/kernel/dma-coherent.c
@@ -109,20 +109,40 @@
 int dma_alloc_from_coherent(struct device *dev, ssize_t size,
 				       dma_addr_t *dma_handle, void **ret)
 {
-	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+	struct dma_coherent_mem *mem;
 	int order = get_order(size);
+	int pageno;
 
-	if (mem) {
-		int page = bitmap_find_free_region(mem->bitmap, mem->size,
-						     order);
-		if (page >= 0) {
-			*dma_handle = mem->device_base + (page << PAGE_SHIFT);
-			*ret = mem->virt_base + (page << PAGE_SHIFT);
-			memset(*ret, 0, size);
-		} else if (mem->flags & DMA_MEMORY_EXCLUSIVE)
-			*ret = NULL;
+	if (!dev)
+		return 0;
+	mem = dev->dma_mem;
+	if (!mem)
+		return 0;
+	if (unlikely(size > mem->size))
+ 		return 0;
+
+	pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
+	if (pageno >= 0) {
+		/*
+		 * Memory was found in the per-device arena.
+		 */
+		*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
+		*ret = mem->virt_base + (pageno << PAGE_SHIFT);
+		memset(*ret, 0, size);
+	} else if (mem->flags & DMA_MEMORY_EXCLUSIVE) {
+		/*
+		 * The per-device arena is exhausted and we are not
+		 * permitted to fall back to generic memory.
+		 */
+		*ret = NULL;
+	} else {
+		/*
+		 * The per-device arena is exhausted and we are
+		 * permitted to fall back to generic memory.
+		 */
+		 return 0;
 	}
-	return (mem != NULL);
+	return 1;
 }
 EXPORT_SYMBOL(dma_alloc_from_coherent);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index c9e5a1c..c7740fa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -642,35 +642,31 @@
 	/*
 	 * We found no owner yet mm_users > 1: this implies that we are
 	 * most likely racing with swapoff (try_to_unuse()) or /proc or
-	 * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
-	 * so that subsystems can understand the callback and take action.
+	 * ptrace or page migration (get_task_mm()).  Mark owner as NULL.
 	 */
-	down_write(&mm->mmap_sem);
-	cgroup_mm_owner_callbacks(mm->owner, NULL);
 	mm->owner = NULL;
-	up_write(&mm->mmap_sem);
 	return;
 
 assign_new_owner:
 	BUG_ON(c == p);
 	get_task_struct(c);
-	read_unlock(&tasklist_lock);
-	down_write(&mm->mmap_sem);
 	/*
 	 * The task_lock protects c->mm from changing.
 	 * We always want mm->owner->mm == mm
 	 */
 	task_lock(c);
+	/*
+	 * Delay read_unlock() till we have the task_lock()
+	 * to ensure that c does not slip away underneath us
+	 */
+	read_unlock(&tasklist_lock);
 	if (c->mm != mm) {
 		task_unlock(c);
-		up_write(&mm->mmap_sem);
 		put_task_struct(c);
 		goto retry;
 	}
-	cgroup_mm_owner_callbacks(mm->owner, c);
 	mm->owner = c;
 	task_unlock(c);
-	up_write(&mm->mmap_sem);
 	put_task_struct(c);
 }
 #endif /* CONFIG_MM_OWNER */
@@ -1055,10 +1051,7 @@
 				preempt_count());
 
 	acct_update_integrals(tsk);
-	if (tsk->mm) {
-		update_hiwater_rss(tsk->mm);
-		update_hiwater_vm(tsk->mm);
-	}
+
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
 		hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/kernel/fork.c b/kernel/fork.c
index 43cbf30..7b8f2a7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -400,6 +400,18 @@
 #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
 #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
 
+static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
+
+static int __init coredump_filter_setup(char *s)
+{
+	default_dump_filter =
+		(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
+		MMF_DUMP_FILTER_MASK;
+	return 1;
+}
+
+__setup("coredump_filter=", coredump_filter_setup);
+
 #include <linux/init_task.h>
 
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
@@ -408,8 +420,7 @@
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
-	mm->flags = (current->mm) ? current->mm->flags
-				  : MMF_DUMP_FILTER_DEFAULT;
+	mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
 	mm->core_state = NULL;
 	mm->nr_ptes = 0;
 	set_mm_counter(mm, file_rss, 0);
@@ -758,7 +769,7 @@
 {
 	struct sighand_struct *sig;
 
-	if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
+	if (clone_flags & CLONE_SIGHAND) {
 		atomic_inc(&current->sighand->count);
 		return 0;
 	}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index b46dbb9..a27a5f6 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -51,8 +51,8 @@
 
 /**
  * request_module - try to load a kernel module
- * @fmt:     printf style format string for the name of the module
- * @varargs: arguements as specified in the format string
+ * @fmt: printf style format string for the name of the module
+ * @...: arguments as specified in the format string
  *
  * Load a module using the user mode module loader. The function returns
  * zero on success or a negative errno code on failure. Note that a
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9f8a3f2..1b9cbdc 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -69,7 +69,7 @@
 /* NOTE: change this value only with kprobe_mutex held */
 static bool kprobe_enabled;
 
-DEFINE_MUTEX(kprobe_mutex);		/* Protects kprobe_table */
+static DEFINE_MUTEX(kprobe_mutex);	/* Protects kprobe_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
 static struct {
 	spinlock_t lock ____cacheline_aligned_in_smp;
@@ -115,6 +115,7 @@
 	SLOT_USED = 2,
 };
 
+static DEFINE_MUTEX(kprobe_insn_mutex);	/* Protects kprobe_insn_pages */
 static struct hlist_head kprobe_insn_pages;
 static int kprobe_garbage_slots;
 static int collect_garbage_slots(void);
@@ -144,10 +145,10 @@
 }
 
 /**
- * get_insn_slot() - Find a slot on an executable page for an instruction.
+ * __get_insn_slot() - Find a slot on an executable page for an instruction.
  * We allocate an executable page if there's no room on existing ones.
  */
-kprobe_opcode_t __kprobes *get_insn_slot(void)
+static kprobe_opcode_t __kprobes *__get_insn_slot(void)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
@@ -196,6 +197,15 @@
 	return kip->insns;
 }
 
+kprobe_opcode_t __kprobes *get_insn_slot(void)
+{
+	kprobe_opcode_t *ret;
+	mutex_lock(&kprobe_insn_mutex);
+	ret = __get_insn_slot();
+	mutex_unlock(&kprobe_insn_mutex);
+	return ret;
+}
+
 /* Return 1 if all garbages are collected, otherwise 0. */
 static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
 {
@@ -226,9 +236,13 @@
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos, *next;
+	int safety;
 
 	/* Ensure no-one is preepmted on the garbages */
-	if (check_safety() != 0)
+	mutex_unlock(&kprobe_insn_mutex);
+	safety = check_safety();
+	mutex_lock(&kprobe_insn_mutex);
+	if (safety != 0)
 		return -EAGAIN;
 
 	hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
@@ -251,6 +265,7 @@
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
 
+	mutex_lock(&kprobe_insn_mutex);
 	hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) {
 		if (kip->insns <= slot &&
 		    slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
@@ -267,6 +282,8 @@
 
 	if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE)
 		collect_garbage_slots();
+
+	mutex_unlock(&kprobe_insn_mutex);
 }
 #endif
 
@@ -310,7 +327,7 @@
 	struct kprobe *kp;
 
 	list_for_each_entry_rcu(kp, &p->list, list) {
-		if (kp->pre_handler) {
+		if (kp->pre_handler && !kprobe_gone(kp)) {
 			set_kprobe_instance(kp);
 			if (kp->pre_handler(kp, regs))
 				return 1;
@@ -326,7 +343,7 @@
 	struct kprobe *kp;
 
 	list_for_each_entry_rcu(kp, &p->list, list) {
-		if (kp->post_handler) {
+		if (kp->post_handler && !kprobe_gone(kp)) {
 			set_kprobe_instance(kp);
 			kp->post_handler(kp, regs, flags);
 			reset_kprobe_instance();
@@ -393,7 +410,7 @@
 		hlist_add_head(&ri->hlist, head);
 }
 
-void kretprobe_hash_lock(struct task_struct *tsk,
+void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
 			 struct hlist_head **head, unsigned long *flags)
 {
 	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
@@ -404,13 +421,15 @@
 	spin_lock_irqsave(hlist_lock, *flags);
 }
 
-static void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
+static void __kprobes kretprobe_table_lock(unsigned long hash,
+	unsigned long *flags)
 {
 	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
 	spin_lock_irqsave(hlist_lock, *flags);
 }
 
-void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
+void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
+	unsigned long *flags)
 {
 	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
 	spinlock_t *hlist_lock;
@@ -419,7 +438,7 @@
 	spin_unlock_irqrestore(hlist_lock, *flags);
 }
 
-void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
+void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
 {
 	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
 	spin_unlock_irqrestore(hlist_lock, *flags);
@@ -526,9 +545,10 @@
 	ap->addr = p->addr;
 	ap->pre_handler = aggr_pre_handler;
 	ap->fault_handler = aggr_fault_handler;
-	if (p->post_handler)
+	/* We don't care the kprobe which has gone. */
+	if (p->post_handler && !kprobe_gone(p))
 		ap->post_handler = aggr_post_handler;
-	if (p->break_handler)
+	if (p->break_handler && !kprobe_gone(p))
 		ap->break_handler = aggr_break_handler;
 
 	INIT_LIST_HEAD(&ap->list);
@@ -547,17 +567,41 @@
 	int ret = 0;
 	struct kprobe *ap;
 
+	if (kprobe_gone(old_p)) {
+		/*
+		 * Attempting to insert new probe at the same location that
+		 * had a probe in the module vaddr area which already
+		 * freed. So, the instruction slot has already been
+		 * released. We need a new slot for the new probe.
+		 */
+		ret = arch_prepare_kprobe(old_p);
+		if (ret)
+			return ret;
+	}
 	if (old_p->pre_handler == aggr_pre_handler) {
 		copy_kprobe(old_p, p);
 		ret = add_new_kprobe(old_p, p);
+		ap = old_p;
 	} else {
 		ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
-		if (!ap)
+		if (!ap) {
+			if (kprobe_gone(old_p))
+				arch_remove_kprobe(old_p);
 			return -ENOMEM;
+		}
 		add_aggr_kprobe(ap, old_p);
 		copy_kprobe(ap, p);
 		ret = add_new_kprobe(ap, p);
 	}
+	if (kprobe_gone(old_p)) {
+		/*
+		 * If the old_p has gone, its breakpoint has been disarmed.
+		 * We have to arm it again after preparing real kprobes.
+		 */
+		ap->flags &= ~KPROBE_FLAG_GONE;
+		if (kprobe_enabled)
+			arch_arm_kprobe(ap);
+	}
 	return ret;
 }
 
@@ -600,8 +644,7 @@
 	return (kprobe_opcode_t *)(((char *)addr) + p->offset);
 }
 
-static int __kprobes __register_kprobe(struct kprobe *p,
-	unsigned long called_from)
+int __kprobes register_kprobe(struct kprobe *p)
 {
 	int ret = 0;
 	struct kprobe *old_p;
@@ -620,28 +663,30 @@
 		return -EINVAL;
 	}
 
-	p->mod_refcounted = 0;
-
+	p->flags = 0;
 	/*
 	 * Check if are we probing a module.
 	 */
 	probed_mod = __module_text_address((unsigned long) p->addr);
 	if (probed_mod) {
-		struct module *calling_mod;
-		calling_mod = __module_text_address(called_from);
 		/*
-		 * We must allow modules to probe themself and in this case
-		 * avoid incrementing the module refcount, so as to allow
-		 * unloading of self probing modules.
+		 * We must hold a refcount of the probed module while updating
+		 * its code to prohibit unexpected unloading.
 		 */
-		if (calling_mod && calling_mod != probed_mod) {
-			if (unlikely(!try_module_get(probed_mod))) {
-				preempt_enable();
-				return -EINVAL;
-			}
-			p->mod_refcounted = 1;
-		} else
-			probed_mod = NULL;
+		if (unlikely(!try_module_get(probed_mod))) {
+			preempt_enable();
+			return -EINVAL;
+		}
+		/*
+		 * If the module freed .init.text, we couldn't insert
+		 * kprobes in there.
+		 */
+		if (within_module_init((unsigned long)p->addr, probed_mod) &&
+		    probed_mod->state != MODULE_STATE_COMING) {
+			module_put(probed_mod);
+			preempt_enable();
+			return -EINVAL;
+		}
 	}
 	preempt_enable();
 
@@ -668,8 +713,9 @@
 out:
 	mutex_unlock(&kprobe_mutex);
 
-	if (ret && probed_mod)
+	if (probed_mod)
 		module_put(probed_mod);
+
 	return ret;
 }
 
@@ -697,16 +743,16 @@
 	     list_is_singular(&old_p->list))) {
 		/*
 		 * Only probe on the hash list. Disarm only if kprobes are
-		 * enabled - otherwise, the breakpoint would already have
-		 * been removed. We save on flushing icache.
+		 * enabled and not gone - otherwise, the breakpoint would
+		 * already have been removed. We save on flushing icache.
 		 */
-		if (kprobe_enabled)
+		if (kprobe_enabled && !kprobe_gone(old_p))
 			arch_disarm_kprobe(p);
 		hlist_del_rcu(&old_p->hlist);
 	} else {
-		if (p->break_handler)
+		if (p->break_handler && !kprobe_gone(p))
 			old_p->break_handler = NULL;
-		if (p->post_handler) {
+		if (p->post_handler && !kprobe_gone(p)) {
 			list_for_each_entry_rcu(list_p, &old_p->list, list) {
 				if ((list_p != p) && (list_p->post_handler))
 					goto noclean;
@@ -721,39 +767,27 @@
 
 static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
 {
-	struct module *mod;
 	struct kprobe *old_p;
 
-	if (p->mod_refcounted) {
-		/*
-		 * Since we've already incremented refcount,
-		 * we don't need to disable preemption.
-		 */
-		mod = module_text_address((unsigned long)p->addr);
-		if (mod)
-			module_put(mod);
-	}
-
-	if (list_empty(&p->list) || list_is_singular(&p->list)) {
-		if (!list_empty(&p->list)) {
-			/* "p" is the last child of an aggr_kprobe */
-			old_p = list_entry(p->list.next, struct kprobe, list);
-			list_del(&p->list);
-			kfree(old_p);
-		}
+	if (list_empty(&p->list))
 		arch_remove_kprobe(p);
+	else if (list_is_singular(&p->list)) {
+		/* "p" is the last child of an aggr_kprobe */
+		old_p = list_entry(p->list.next, struct kprobe, list);
+		list_del(&p->list);
+		arch_remove_kprobe(old_p);
+		kfree(old_p);
 	}
 }
 
-static int __register_kprobes(struct kprobe **kps, int num,
-	unsigned long called_from)
+int __kprobes register_kprobes(struct kprobe **kps, int num)
 {
 	int i, ret = 0;
 
 	if (num <= 0)
 		return -EINVAL;
 	for (i = 0; i < num; i++) {
-		ret = __register_kprobe(kps[i], called_from);
+		ret = register_kprobe(kps[i]);
 		if (ret < 0) {
 			if (i > 0)
 				unregister_kprobes(kps, i);
@@ -763,26 +797,11 @@
 	return ret;
 }
 
-/*
- * Registration and unregistration functions for kprobe.
- */
-int __kprobes register_kprobe(struct kprobe *p)
-{
-	return __register_kprobes(&p, 1,
-				  (unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_kprobe(struct kprobe *p)
 {
 	unregister_kprobes(&p, 1);
 }
 
-int __kprobes register_kprobes(struct kprobe **kps, int num)
-{
-	return __register_kprobes(kps, num,
-				  (unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_kprobes(struct kprobe **kps, int num)
 {
 	int i;
@@ -811,8 +830,7 @@
 	return (unsigned long)entry;
 }
 
-static int __register_jprobes(struct jprobe **jps, int num,
-	unsigned long called_from)
+int __kprobes register_jprobes(struct jprobe **jps, int num)
 {
 	struct jprobe *jp;
 	int ret = 0, i;
@@ -830,7 +848,7 @@
 			/* Todo: Verify probepoint is a function entry point */
 			jp->kp.pre_handler = setjmp_pre_handler;
 			jp->kp.break_handler = longjmp_break_handler;
-			ret = __register_kprobe(&jp->kp, called_from);
+			ret = register_kprobe(&jp->kp);
 		}
 		if (ret < 0) {
 			if (i > 0)
@@ -843,8 +861,7 @@
 
 int __kprobes register_jprobe(struct jprobe *jp)
 {
-	return __register_jprobes(&jp, 1,
-		(unsigned long)__builtin_return_address(0));
+	return register_jprobes(&jp, 1);
 }
 
 void __kprobes unregister_jprobe(struct jprobe *jp)
@@ -852,12 +869,6 @@
 	unregister_jprobes(&jp, 1);
 }
 
-int __kprobes register_jprobes(struct jprobe **jps, int num)
-{
-	return __register_jprobes(jps, num,
-		(unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_jprobes(struct jprobe **jps, int num)
 {
 	int i;
@@ -920,8 +931,7 @@
 	return 0;
 }
 
-static int __kprobes __register_kretprobe(struct kretprobe *rp,
-					  unsigned long called_from)
+int __kprobes register_kretprobe(struct kretprobe *rp)
 {
 	int ret = 0;
 	struct kretprobe_instance *inst;
@@ -967,21 +977,20 @@
 
 	rp->nmissed = 0;
 	/* Establish function entry probe point */
-	ret = __register_kprobe(&rp->kp, called_from);
+	ret = register_kprobe(&rp->kp);
 	if (ret != 0)
 		free_rp_inst(rp);
 	return ret;
 }
 
-static int __register_kretprobes(struct kretprobe **rps, int num,
-	unsigned long called_from)
+int __kprobes register_kretprobes(struct kretprobe **rps, int num)
 {
 	int ret = 0, i;
 
 	if (num <= 0)
 		return -EINVAL;
 	for (i = 0; i < num; i++) {
-		ret = __register_kretprobe(rps[i], called_from);
+		ret = register_kretprobe(rps[i]);
 		if (ret < 0) {
 			if (i > 0)
 				unregister_kretprobes(rps, i);
@@ -991,23 +1000,11 @@
 	return ret;
 }
 
-int __kprobes register_kretprobe(struct kretprobe *rp)
-{
-	return __register_kretprobes(&rp, 1,
-			(unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_kretprobe(struct kretprobe *rp)
 {
 	unregister_kretprobes(&rp, 1);
 }
 
-int __kprobes register_kretprobes(struct kretprobe **rps, int num)
-{
-	return __register_kretprobes(rps, num,
-			(unsigned long)__builtin_return_address(0));
-}
-
 void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
 {
 	int i;
@@ -1055,6 +1052,72 @@
 
 #endif /* CONFIG_KRETPROBES */
 
+/* Set the kprobe gone and remove its instruction buffer. */
+static void __kprobes kill_kprobe(struct kprobe *p)
+{
+	struct kprobe *kp;
+	p->flags |= KPROBE_FLAG_GONE;
+	if (p->pre_handler == aggr_pre_handler) {
+		/*
+		 * If this is an aggr_kprobe, we have to list all the
+		 * chained probes and mark them GONE.
+		 */
+		list_for_each_entry_rcu(kp, &p->list, list)
+			kp->flags |= KPROBE_FLAG_GONE;
+		p->post_handler = NULL;
+		p->break_handler = NULL;
+	}
+	/*
+	 * Here, we can remove insn_slot safely, because no thread calls
+	 * the original probed function (which will be freed soon) any more.
+	 */
+	arch_remove_kprobe(p);
+}
+
+/* Module notifier call back, checking kprobes on the module */
+static int __kprobes kprobes_module_callback(struct notifier_block *nb,
+					     unsigned long val, void *data)
+{
+	struct module *mod = data;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+	unsigned int i;
+	int checkcore = (val == MODULE_STATE_GOING);
+
+	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
+		return NOTIFY_DONE;
+
+	/*
+	 * When MODULE_STATE_GOING was notified, both of module .text and
+	 * .init.text sections would be freed. When MODULE_STATE_LIVE was
+	 * notified, only .init.text section would be freed. We need to
+	 * disable kprobes which have been inserted in the sections.
+	 */
+	mutex_lock(&kprobe_mutex);
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+		head = &kprobe_table[i];
+		hlist_for_each_entry_rcu(p, node, head, hlist)
+			if (within_module_init((unsigned long)p->addr, mod) ||
+			    (checkcore &&
+			     within_module_core((unsigned long)p->addr, mod))) {
+				/*
+				 * The vaddr this probe is installed will soon
+				 * be vfreed buy not synced to disk. Hence,
+				 * disarming the breakpoint isn't needed.
+				 */
+				kill_kprobe(p);
+			}
+	}
+	mutex_unlock(&kprobe_mutex);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kprobe_module_nb = {
+	.notifier_call = kprobes_module_callback,
+	.priority = 0
+};
+
 static int __init init_kprobes(void)
 {
 	int i, err = 0;
@@ -1111,6 +1174,9 @@
 	err = arch_init_kprobes();
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
+	if (!err)
+		err = register_module_notifier(&kprobe_module_nb);
+
 	kprobes_initialized = (err == 0);
 
 	if (!err)
@@ -1131,10 +1197,12 @@
 	else
 		kprobe_type = "k";
 	if (sym)
-		seq_printf(pi, "%p  %s  %s+0x%x  %s\n", p->addr, kprobe_type,
-			sym, offset, (modname ? modname : " "));
+		seq_printf(pi, "%p  %s  %s+0x%x  %s %s\n", p->addr, kprobe_type,
+			sym, offset, (modname ? modname : " "),
+			(kprobe_gone(p) ? "[GONE]" : ""));
 	else
-		seq_printf(pi, "%p  %s  %p\n", p->addr, kprobe_type, p->addr);
+		seq_printf(pi, "%p  %s  %p %s\n", p->addr, kprobe_type, p->addr,
+			(kprobe_gone(p) ? "[GONE]" : ""));
 }
 
 static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1215,7 +1283,8 @@
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist)
-			arch_arm_kprobe(p);
+			if (!kprobe_gone(p))
+				arch_arm_kprobe(p);
 	}
 
 	kprobe_enabled = true;
@@ -1244,7 +1313,7 @@
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist) {
-			if (!arch_trampoline_kprobe(p))
+			if (!arch_trampoline_kprobe(p) && !kprobe_gone(p))
 				arch_disarm_kprobe(p);
 		}
 	}
diff --git a/kernel/module.c b/kernel/module.c
index f47cce9..496dcb5 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -43,7 +43,6 @@
 #include <linux/device.h>
 #include <linux/string.h>
 #include <linux/mutex.h>
-#include <linux/unwind.h>
 #include <linux/rculist.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1449,8 +1448,6 @@
 	remove_sect_attrs(mod);
 	mod_kobject_remove(mod);
 
-	unwind_remove_table(mod->unwind_info, 0);
-
 	/* Arch-specific cleanup. */
 	module_arch_cleanup(mod);
 
@@ -1867,7 +1864,6 @@
 	unsigned int symindex = 0;
 	unsigned int strindex = 0;
 	unsigned int modindex, versindex, infoindex, pcpuindex;
-	unsigned int unwindex = 0;
 	unsigned int num_kp, num_mcount;
 	struct kernel_param *kp;
 	struct module *mod;
@@ -1957,9 +1953,6 @@
 	versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
 	infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
 	pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
-#ifdef ARCH_UNWIND_SECTION_NAME
-	unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
-#endif
 
 	/* Don't keep modinfo and version sections. */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1969,8 +1962,6 @@
 	sechdrs[symindex].sh_flags |= SHF_ALLOC;
 	sechdrs[strindex].sh_flags |= SHF_ALLOC;
 #endif
-	if (unwindex)
-		sechdrs[unwindex].sh_flags |= SHF_ALLOC;
 
 	/* Check module struct version now, before we try to use module. */
 	if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2267,11 +2258,6 @@
 	add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
 	add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
 
-	/* Size of section 0 is 0, so this works well if no unwind info. */
-	mod->unwind_info = unwind_add_table(mod,
-					    (void *)sechdrs[unwindex].sh_addr,
-					    sechdrs[unwindex].sh_size);
-
 	/* Get rid of temporary copy */
 	vfree(hdr);
 
@@ -2366,11 +2352,12 @@
 	/* Now it's a first class citizen!  Wake up anyone waiting for it. */
 	mod->state = MODULE_STATE_LIVE;
 	wake_up(&module_wq);
+	blocking_notifier_call_chain(&module_notify_list,
+				     MODULE_STATE_LIVE, mod);
 
 	mutex_lock(&module_mutex);
 	/* Drop initial reference. */
 	module_put(mod);
-	unwind_remove_table(mod->unwind_info, 1);
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
@@ -2405,7 +2392,7 @@
 	unsigned long nextval;
 
 	/* At worse, next value is at end of module */
-	if (within(addr, mod->module_init, mod->init_size))
+	if (within_module_init(addr, mod))
 		nextval = (unsigned long)mod->module_init+mod->init_text_size;
 	else
 		nextval = (unsigned long)mod->module_core+mod->core_text_size;
@@ -2453,8 +2440,8 @@
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
-		if (within(addr, mod->module_init, mod->init_size)
-		    || within(addr, mod->module_core, mod->core_size)) {
+		if (within_module_init(addr, mod) ||
+		    within_module_core(addr, mod)) {
 			if (modname)
 				*modname = mod->name;
 			ret = get_ksymbol(mod, addr, size, offset);
@@ -2476,8 +2463,8 @@
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
-		if (within(addr, mod->module_init, mod->init_size) ||
-		    within(addr, mod->module_core, mod->core_size)) {
+		if (within_module_init(addr, mod) ||
+		    within_module_core(addr, mod)) {
 			const char *sym;
 
 			sym = get_ksymbol(mod, addr, NULL, NULL);
@@ -2500,8 +2487,8 @@
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
-		if (within(addr, mod->module_init, mod->init_size) ||
-		    within(addr, mod->module_core, mod->core_size)) {
+		if (within_module_init(addr, mod) ||
+		    within_module_core(addr, mod)) {
 			const char *sym;
 
 			sym = get_ksymbol(mod, addr, size, offset);
@@ -2720,7 +2707,7 @@
 	preempt_disable();
 
 	list_for_each_entry_rcu(mod, &modules, list) {
-		if (within(addr, mod->module_core, mod->core_size)) {
+		if (within_module_core(addr, mod)) {
 			preempt_enable();
 			return 1;
 		}
diff --git a/kernel/panic.c b/kernel/panic.c
index 13f0634..2a2ff36 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -299,6 +299,8 @@
 {
 	if (!oops_id)
 		get_random_bytes(&oops_id, sizeof(oops_id));
+	else
+		oops_id++;
 
 	return 0;
 }
diff --git a/kernel/profile.c b/kernel/profile.c
index d18e2d2..784933ac 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -445,7 +445,6 @@
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
 #include <asm/uaccess.h>
-#include <asm/ptrace.h>
 
 static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
 			int count, int *eof, void *data)
diff --git a/kernel/signal.c b/kernel/signal.c
index 8e95855..3152ac3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -858,7 +858,8 @@
 			q->info.si_signo = sig;
 			q->info.si_errno = 0;
 			q->info.si_code = SI_USER;
-			q->info.si_pid = task_pid_vnr(current);
+			q->info.si_pid = task_tgid_nr_ns(current,
+							task_active_pid_ns(t));
 			q->info.si_uid = current_uid();
 			break;
 		case (unsigned long) SEND_SIG_PRIV:
diff --git a/kernel/sys.c b/kernel/sys.c
index d356d79..4a43617c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -33,6 +33,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/seccomp.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -927,6 +928,7 @@
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
+	force_successful_syscall_return();
 	return (long) jiffies_64_to_clock_t(get_jiffies_64());
 }
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ff6d45c..92f6e5b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -87,10 +87,6 @@
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
 /* Constants used for minimum and  maximum */
-#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
-static int one = 1;
-#endif
-
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 static int sixty = 60;
 static int neg_one = -1;
@@ -101,6 +97,7 @@
 #endif
 
 static int zero;
+static int one = 1;
 static int one_hundred = 100;
 
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -952,12 +949,22 @@
 		.data		= &dirty_background_ratio,
 		.maxlen		= sizeof(dirty_background_ratio),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
+		.proc_handler	= &dirty_background_ratio_handler,
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
 	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "dirty_background_bytes",
+		.data		= &dirty_background_bytes,
+		.maxlen		= sizeof(dirty_background_bytes),
+		.mode		= 0644,
+		.proc_handler	= &dirty_background_bytes_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+	},
+	{
 		.ctl_name	= VM_DIRTY_RATIO,
 		.procname	= "dirty_ratio",
 		.data		= &vm_dirty_ratio,
@@ -969,6 +976,16 @@
 		.extra2		= &one_hundred,
 	},
 	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "dirty_bytes",
+		.data		= &vm_dirty_bytes,
+		.maxlen		= sizeof(vm_dirty_bytes),
+		.mode		= 0644,
+		.proc_handler	= &dirty_bytes_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+	},
+	{
 		.procname	= "dirty_writeback_centisecs",
 		.data		= &dirty_writeback_interval,
 		.maxlen		= sizeof(dirty_writeback_interval),
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 06b6395..4f10451 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -22,21 +22,11 @@
 
 static u32 rand1, preh_val, posth_val, jph_val;
 static int errors, handler_errors, num_tests;
+static u32 (*target)(u32 value);
+static u32 (*target2)(u32 value);
 
 static noinline u32 kprobe_target(u32 value)
 {
-	/*
-	 * gcc ignores noinline on some architectures unless we stuff
-	 * sufficient lard into the function. The get_kprobe() here is
-	 * just for that.
-	 *
-	 * NOTE: We aren't concerned about the correctness of get_kprobe()
-	 * here; hence, this call is neither under !preempt nor with the
-	 * kprobe_mutex held. This is fine(tm)
-	 */
-	if (get_kprobe((void *)0xdeadbeef))
-		printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n");
-
 	return (value / div_factor);
 }
 
@@ -74,7 +64,7 @@
 		return ret;
 	}
 
-	ret = kprobe_target(rand1);
+	ret = target(rand1);
 	unregister_kprobe(&kp);
 
 	if (preh_val == 0) {
@@ -92,6 +82,84 @@
 	return 0;
 }
 
+static noinline u32 kprobe_target2(u32 value)
+{
+	return (value / div_factor) + 1;
+}
+
+static int kp_pre_handler2(struct kprobe *p, struct pt_regs *regs)
+{
+	preh_val = (rand1 / div_factor) + 1;
+	return 0;
+}
+
+static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs,
+		unsigned long flags)
+{
+	if (preh_val != (rand1 / div_factor) + 1) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"incorrect value in post_handler2\n");
+	}
+	posth_val = preh_val + div_factor;
+}
+
+static struct kprobe kp2 = {
+	.symbol_name = "kprobe_target2",
+	.pre_handler = kp_pre_handler2,
+	.post_handler = kp_post_handler2
+};
+
+static int test_kprobes(void)
+{
+	int ret;
+	struct kprobe *kps[2] = {&kp, &kp2};
+
+	kp.addr = 0; /* addr should be cleard for reusing kprobe. */
+	ret = register_kprobes(kps, 2);
+	if (ret < 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"register_kprobes returned %d\n", ret);
+		return ret;
+	}
+
+	preh_val = 0;
+	posth_val = 0;
+	ret = target(rand1);
+
+	if (preh_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kprobe pre_handler not called\n");
+		handler_errors++;
+	}
+
+	if (posth_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kprobe post_handler not called\n");
+		handler_errors++;
+	}
+
+	preh_val = 0;
+	posth_val = 0;
+	ret = target2(rand1);
+
+	if (preh_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kprobe pre_handler2 not called\n");
+		handler_errors++;
+	}
+
+	if (posth_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kprobe post_handler2 not called\n");
+		handler_errors++;
+	}
+
+	unregister_kprobes(kps, 2);
+	return 0;
+
+}
+
 static u32 j_kprobe_target(u32 value)
 {
 	if (value != rand1) {
@@ -121,7 +189,7 @@
 		return ret;
 	}
 
-	ret = kprobe_target(rand1);
+	ret = target(rand1);
 	unregister_jprobe(&jp);
 	if (jph_val == 0) {
 		printk(KERN_ERR "Kprobe smoke test failed: "
@@ -132,6 +200,43 @@
 	return 0;
 }
 
+static struct jprobe jp2 = {
+	.entry          = j_kprobe_target,
+	.kp.symbol_name = "kprobe_target2"
+};
+
+static int test_jprobes(void)
+{
+	int ret;
+	struct jprobe *jps[2] = {&jp, &jp2};
+
+	jp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */
+	ret = register_jprobes(jps, 2);
+	if (ret < 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"register_jprobes returned %d\n", ret);
+		return ret;
+	}
+
+	jph_val = 0;
+	ret = target(rand1);
+	if (jph_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"jprobe handler not called\n");
+		handler_errors++;
+	}
+
+	jph_val = 0;
+	ret = target2(rand1);
+	if (jph_val == 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"jprobe handler2 not called\n");
+		handler_errors++;
+	}
+	unregister_jprobes(jps, 2);
+
+	return 0;
+}
 #ifdef CONFIG_KRETPROBES
 static u32 krph_val;
 
@@ -177,7 +282,7 @@
 		return ret;
 	}
 
-	ret = kprobe_target(rand1);
+	ret = target(rand1);
 	unregister_kretprobe(&rp);
 	if (krph_val != rand1) {
 		printk(KERN_ERR "Kprobe smoke test failed: "
@@ -187,12 +292,72 @@
 
 	return 0;
 }
+
+static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	unsigned long ret = regs_return_value(regs);
+
+	if (ret != (rand1 / div_factor) + 1) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"incorrect value in kretprobe handler2\n");
+	}
+	if (krph_val == 0) {
+		handler_errors++;
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"call to kretprobe entry handler failed\n");
+	}
+
+	krph_val = rand1;
+	return 0;
+}
+
+static struct kretprobe rp2 = {
+	.handler	= return_handler2,
+	.entry_handler  = entry_handler,
+	.kp.symbol_name = "kprobe_target2"
+};
+
+static int test_kretprobes(void)
+{
+	int ret;
+	struct kretprobe *rps[2] = {&rp, &rp2};
+
+	rp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */
+	ret = register_kretprobes(rps, 2);
+	if (ret < 0) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"register_kretprobe returned %d\n", ret);
+		return ret;
+	}
+
+	krph_val = 0;
+	ret = target(rand1);
+	if (krph_val != rand1) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kretprobe handler not called\n");
+		handler_errors++;
+	}
+
+	krph_val = 0;
+	ret = target2(rand1);
+	if (krph_val != rand1) {
+		printk(KERN_ERR "Kprobe smoke test failed: "
+				"kretprobe handler2 not called\n");
+		handler_errors++;
+	}
+	unregister_kretprobes(rps, 2);
+	return 0;
+}
 #endif /* CONFIG_KRETPROBES */
 
 int init_test_probes(void)
 {
 	int ret;
 
+	target = kprobe_target;
+	target2 = kprobe_target2;
+
 	do {
 		rand1 = random32();
 	} while (rand1 <= div_factor);
@@ -204,15 +369,30 @@
 		errors++;
 
 	num_tests++;
+	ret = test_kprobes();
+	if (ret < 0)
+		errors++;
+
+	num_tests++;
 	ret = test_jprobe();
 	if (ret < 0)
 		errors++;
 
+	num_tests++;
+	ret = test_jprobes();
+	if (ret < 0)
+		errors++;
+
 #ifdef CONFIG_KRETPROBES
 	num_tests++;
 	ret = test_kretprobe();
 	if (ret < 0)
 		errors++;
+
+	num_tests++;
+	ret = test_kretprobes();
+	if (ret < 0)
+		errors++;
 #endif /* CONFIG_KRETPROBES */
 
 	if (errors)
diff --git a/kernel/time.c b/kernel/time.c
index d63a433..4886e3c 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -37,6 +37,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/math64.h>
+#include <linux/ptrace.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -65,8 +66,9 @@
 
 	if (tloc) {
 		if (put_user(i,tloc))
-			i = -EFAULT;
+			return -EFAULT;
 	}
+	force_successful_syscall_return();
 	return i;
 }
 
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 2dc06ab..43f891b 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -92,8 +92,8 @@
 	mm = get_task_mm(p);
 	if (mm) {
 		/* adjust to KB unit */
-		stats->hiwater_rss   = mm->hiwater_rss * PAGE_SIZE / KB;
-		stats->hiwater_vm    = mm->hiwater_vm * PAGE_SIZE / KB;
+		stats->hiwater_rss   = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB;
+		stats->hiwater_vm    = get_mm_hiwater_vm(mm)  * PAGE_SIZE / KB;
 		mmput(mm);
 	}
 	stats->read_char	= p->ioac.rchar;
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
index 486da62..9681d54 100644
--- a/lib/bust_spinlocks.c
+++ b/lib/bust_spinlocks.c
@@ -12,6 +12,7 @@
 #include <linux/tty.h>
 #include <linux/wait.h>
 #include <linux/vt_kern.h>
+#include <linux/console.h>
 
 
 void __attribute__((weak)) bust_spinlocks(int yes)
@@ -22,6 +23,7 @@
 #ifdef CONFIG_VT
 		unblank_screen();
 #endif
+		console_unblank();
 		if (--oops_in_progress == 0)
 			wake_up_klogd();
 	}
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index a50a311..f97af55b 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -6,7 +6,6 @@
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/unwind.h>
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
 #include <linux/fault-inject.h>
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index b255b93..a60bd80 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -9,10 +9,8 @@
 #include <linux/cpu.h>
 #include <linux/module.h>
 
-#ifdef CONFIG_HOTPLUG_CPU
 static LIST_HEAD(percpu_counters);
 static DEFINE_MUTEX(percpu_counters_lock);
-#endif
 
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 {
@@ -111,13 +109,24 @@
 }
 EXPORT_SYMBOL(percpu_counter_destroy);
 
-#ifdef CONFIG_HOTPLUG_CPU
+int percpu_counter_batch __read_mostly = 32;
+EXPORT_SYMBOL(percpu_counter_batch);
+
+static void compute_batch_value(void)
+{
+	int nr = num_online_cpus();
+
+	percpu_counter_batch = max(32, nr*2);
+}
+
 static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 					unsigned long action, void *hcpu)
 {
+#ifdef CONFIG_HOTPLUG_CPU
 	unsigned int cpu;
 	struct percpu_counter *fbc;
 
+	compute_batch_value();
 	if (action != CPU_DEAD)
 		return NOTIFY_OK;
 
@@ -134,13 +143,14 @@
 		spin_unlock_irqrestore(&fbc->lock, flags);
 	}
 	mutex_unlock(&percpu_counters_lock);
+#endif
 	return NOTIFY_OK;
 }
 
 static int __init percpu_counter_startup(void)
 {
+	compute_batch_value();
 	hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
 	return 0;
 }
 module_init(percpu_counter_startup);
-#endif
diff --git a/lib/prio_heap.c b/lib/prio_heap.c
index 471944a..a7af6f8 100644
--- a/lib/prio_heap.c
+++ b/lib/prio_heap.c
@@ -31,7 +31,7 @@
 
 	if (heap->size < heap->max) {
 		/* Heap insertion */
-		int pos = heap->size++;
+		pos = heap->size++;
 		while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) {
 			ptrs[pos] = ptrs[(pos-1)/2];
 			pos = (pos-1)/2;
diff --git a/lib/proportions.c b/lib/proportions.c
index 4f387a6..3fda810 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -147,6 +147,7 @@
  * this is used to track the active references.
  */
 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
+__acquires(RCU)
 {
 	int index;
 
@@ -160,6 +161,7 @@
 }
 
 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
+__releases(RCU)
 {
 	rcu_read_unlock();
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index be86b32..8d3fb0b 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -81,7 +81,7 @@
 	int nr;
 	struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
 };
-DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
+static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
 
 static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
 {
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 98d63227..0fbd012 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -170,6 +170,8 @@
 		return -EINVAL;
 
 	val = simple_strtoul(cp, &tail, base);
+	if (tail == cp)
+		return -EINVAL;
 	if ((*tail == '\0') ||
 		((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
 		*res = val;
@@ -241,6 +243,8 @@
 		return -EINVAL;
 
 	val = simple_strtoull(cp, &tail, base);
+	if (tail == cp)
+		return -EINVAL;
 	if ((*tail == '\0') ||
 		((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
 		*res = val;
diff --git a/mm/Kconfig b/mm/Kconfig
index 5b5790f..a5b7781 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -181,12 +181,6 @@
 	  example on NUMA systems to put pages nearer to the processors accessing
 	  the page.
 
-config RESOURCES_64BIT
-	bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && EXPERIMENTAL)
-	default 64BIT
-	help
-	  This option allows memory and IO resources to be 64 bit.
-
 config PHYS_ADDR_T_64BIT
 	def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
 
diff --git a/mm/Makefile b/mm/Makefile
index 51c2770..72255be 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -9,7 +9,7 @@
 
 obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o pdflush.o \
-			   readahead.o swap.o truncate.o vmscan.o \
+			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
 			   page_isolation.o mm_init.o $(mmu-y)
 
@@ -21,9 +21,7 @@
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SPARSEMEM)	+= sparse.o
 obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
-obj-$(CONFIG_SHMEM) += shmem.o
 obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
-obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 801c08b..6f80bed 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -24,9 +24,9 @@
 static int bdi_debug_stats_show(struct seq_file *m, void *v)
 {
 	struct backing_dev_info *bdi = m->private;
-	long background_thresh;
-	long dirty_thresh;
-	long bdi_thresh;
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
+	unsigned long bdi_thresh;
 
 	get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);
 
diff --git a/mm/bootmem.c b/mm/bootmem.c
index ac5a891..51a0ccf 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -435,6 +435,10 @@
 	unsigned long fallback = 0;
 	unsigned long min, max, start, sidx, midx, step;
 
+	bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
+		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
+		align, goal, limit);
+
 	BUG_ON(!size);
 	BUG_ON(align & (align - 1));
 	BUG_ON(limit && goal + size > limit);
@@ -442,10 +446,6 @@
 	if (!bdata->node_bootmem_map)
 		return NULL;
 
-	bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
-		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
-		align, goal, limit);
-
 	min = bdata->node_min_pfn;
 	max = bdata->node_low_pfn;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index f5769b4..2f55a1e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -210,7 +210,7 @@
 	int ret;
 	struct writeback_control wbc = {
 		.sync_mode = sync_mode,
-		.nr_to_write = mapping->nrpages * 2,
+		.nr_to_write = LONG_MAX,
 		.range_start = start,
 		.range_end = end,
 	};
@@ -741,7 +741,14 @@
 		page = __page_cache_alloc(gfp_mask);
 		if (!page)
 			return NULL;
-		err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
+		/*
+		 * We want a regular kernel memory (not highmem or DMA etc)
+		 * allocation for the radix tree nodes, but we need to honour
+		 * the context-specific requirements the caller has asked for.
+		 * GFP_RECLAIM_MASK collects those requirements.
+		 */
+		err = add_to_page_cache_lru(page, mapping, index,
+			(gfp_mask & GFP_RECLAIM_MASK));
 		if (unlikely(err)) {
 			page_cache_release(page);
 			page = NULL;
@@ -950,7 +957,7 @@
 		return NULL;
 	}
 	page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
-	if (page && add_to_page_cache_lru(page, mapping, index, GFP_KERNEL)) {
+	if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) {
 		page_cache_release(page);
 		page = NULL;
 	}
@@ -1317,7 +1324,8 @@
 			goto out; /* skip atime */
 		size = i_size_read(inode);
 		if (pos < size) {
-			retval = filemap_write_and_wait(mapping);
+			retval = filemap_write_and_wait_range(mapping, pos,
+					pos + iov_length(iov, nr_segs) - 1);
 			if (!retval) {
 				retval = mapping->a_ops->direct_IO(READ, iocb,
 							iov, pos, nr_segs);
@@ -1530,7 +1538,6 @@
 	/*
 	 * Found the page and have a reference on it.
 	 */
-	mark_page_accessed(page);
 	ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;
 	vmf->page = page;
 	return ret | VM_FAULT_LOCKED;
@@ -2060,18 +2067,10 @@
 	if (count != ocount)
 		*nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
 
-	/*
-	 * Unmap all mmappings of the file up-front.
-	 *
-	 * This will cause any pte dirty bits to be propagated into the
-	 * pageframes for the subsequent filemap_write_and_wait().
-	 */
 	write_len = iov_length(iov, *nr_segs);
 	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
-	if (mapping_mapped(mapping))
-		unmap_mapping_range(mapping, pos, write_len, 0);
 
-	written = filemap_write_and_wait(mapping);
+	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
 	if (written)
 		goto out;
 
@@ -2291,7 +2290,8 @@
 	 * the file data here, to try to honour O_DIRECT expectations.
 	 */
 	if (unlikely(file->f_flags & O_DIRECT) && written)
-		status = filemap_write_and_wait(mapping);
+		status = filemap_write_and_wait_range(mapping,
+					pos, pos + written - 1);
 
 	return written ? written : status;
 }
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index b5167df..0c04615 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -193,7 +193,7 @@
 			/* Nuke the page table entry. */
 			flush_cache_page(vma, address, pte_pfn(*pte));
 			pteval = ptep_clear_flush_notify(vma, address, pte);
-			page_remove_rmap(page, vma);
+			page_remove_rmap(page);
 			dec_mm_counter(mm, file_rss);
 			BUG_ON(pte_dirty(pteval));
 			pte_unmap_unlock(pte, ptl);
diff --git a/mm/fremap.c b/mm/fremap.c
index 7d12ca7..62d5bbd 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -37,7 +37,7 @@
 		if (page) {
 			if (pte_dirty(pte))
 				set_page_dirty(page);
-			page_remove_rmap(page, vma);
+			page_remove_rmap(page);
 			page_cache_release(page);
 			update_hiwater_rss(mm);
 			dec_mm_counter(mm, file_rss);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6058b53..618e983 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -220,6 +220,35 @@
 }
 
 /*
+ * Return the size of the pages allocated when backing a VMA. In the majority
+ * cases this will be same size as used by the page table entries.
+ */
+unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
+{
+	struct hstate *hstate;
+
+	if (!is_vm_hugetlb_page(vma))
+		return PAGE_SIZE;
+
+	hstate = hstate_vma(vma);
+
+	return 1UL << (hstate->order + PAGE_SHIFT);
+}
+
+/*
+ * Return the page size being used by the MMU to back a VMA. In the majority
+ * of cases, the page size used by the kernel matches the MMU size. On
+ * architectures where it differs, an architecture-specific version of this
+ * function is required.
+ */
+#ifndef vma_mmu_pagesize
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+	return vma_kernel_pagesize(vma);
+}
+#endif
+
+/*
  * Flags for MAP_PRIVATE reservations.  These are stored in the bottom
  * bits of the reservation map pointer, which are always clear due to
  * alignment.
@@ -371,8 +400,10 @@
 {
 	int i;
 
-	if (unlikely(sz > MAX_ORDER_NR_PAGES))
-		return clear_gigantic_page(page, addr, sz);
+	if (unlikely(sz > MAX_ORDER_NR_PAGES)) {
+		clear_gigantic_page(page, addr, sz);
+		return;
+	}
 
 	might_sleep();
 	for (i = 0; i < sz/PAGE_SIZE; i++) {
@@ -404,8 +435,10 @@
 	int i;
 	struct hstate *h = hstate_vma(vma);
 
-	if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES))
-		return copy_gigantic_page(dst, src, addr, vma);
+	if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
+		copy_gigantic_page(dst, src, addr, vma);
+		return;
+	}
 
 	might_sleep();
 	for (i = 0; i < pages_per_huge_page(h); i++) {
@@ -972,7 +1005,7 @@
 	return page;
 }
 
-__attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h)
+int __weak alloc_bootmem_huge_page(struct hstate *h)
 {
 	struct huge_bootmem_page *m;
 	int nr_nodes = nodes_weight(node_online_map);
@@ -991,8 +1024,7 @@
 			 * puts them into the mem_map).
 			 */
 			m = addr;
-			if (m)
-				goto found;
+			goto found;
 		}
 		hstate_next_node(h);
 		nr_nodes--;
diff --git a/mm/internal.h b/mm/internal.h
index 13333bc..478223b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -49,6 +49,7 @@
 /*
  * in mm/page_alloc.c
  */
+extern unsigned long highest_memmap_pfn;
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 
 /*
@@ -275,6 +276,7 @@
 #define GUP_FLAGS_WRITE                  0x1
 #define GUP_FLAGS_FORCE                  0x2
 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
+#define GUP_FLAGS_IGNORE_SIGKILL         0x8
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long start, int len, int flags,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 866dcc7..51ee965 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -779,7 +779,8 @@
 	return 0;
 }
 
-int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
+static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
+				   unsigned long long val)
 {
 
 	int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
diff --git a/mm/memory.c b/mm/memory.c
index 7b9db65..3f8fa06 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -52,6 +52,9 @@
 #include <linux/writeback.h>
 #include <linux/memcontrol.h>
 #include <linux/mmu_notifier.h>
+#include <linux/kallsyms.h>
+#include <linux/swapops.h>
+#include <linux/elf.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -59,9 +62,6 @@
 #include <asm/tlbflush.h>
 #include <asm/pgtable.h>
 
-#include <linux/swapops.h>
-#include <linux/elf.h>
-
 #include "internal.h"
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -375,15 +375,65 @@
  *
  * The calling function must still handle the error.
  */
-static void print_bad_pte(struct vm_area_struct *vma, pte_t pte,
-			  unsigned long vaddr)
+static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
+			  pte_t pte, struct page *page)
 {
-	printk(KERN_ERR "Bad pte = %08llx, process = %s, "
-			"vm_flags = %lx, vaddr = %lx\n",
-		(long long)pte_val(pte),
-		(vma->vm_mm == current->mm ? current->comm : "???"),
-		vma->vm_flags, vaddr);
+	pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
+	pud_t *pud = pud_offset(pgd, addr);
+	pmd_t *pmd = pmd_offset(pud, addr);
+	struct address_space *mapping;
+	pgoff_t index;
+	static unsigned long resume;
+	static unsigned long nr_shown;
+	static unsigned long nr_unshown;
+
+	/*
+	 * Allow a burst of 60 reports, then keep quiet for that minute;
+	 * or allow a steady drip of one report per second.
+	 */
+	if (nr_shown == 60) {
+		if (time_before(jiffies, resume)) {
+			nr_unshown++;
+			return;
+		}
+		if (nr_unshown) {
+			printk(KERN_ALERT
+				"BUG: Bad page map: %lu messages suppressed\n",
+				nr_unshown);
+			nr_unshown = 0;
+		}
+		nr_shown = 0;
+	}
+	if (nr_shown++ == 0)
+		resume = jiffies + 60 * HZ;
+
+	mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
+	index = linear_page_index(vma, addr);
+
+	printk(KERN_ALERT
+		"BUG: Bad page map in process %s  pte:%08llx pmd:%08llx\n",
+		current->comm,
+		(long long)pte_val(pte), (long long)pmd_val(*pmd));
+	if (page) {
+		printk(KERN_ALERT
+		"page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
+		page, (void *)page->flags, page_count(page),
+		page_mapcount(page), page->mapping, page->index);
+	}
+	printk(KERN_ALERT
+		"addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
+		(void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
+	/*
+	 * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y
+	 */
+	if (vma->vm_ops)
+		print_symbol(KERN_ALERT "vma->vm_ops->fault: %s\n",
+				(unsigned long)vma->vm_ops->fault);
+	if (vma->vm_file && vma->vm_file->f_op)
+		print_symbol(KERN_ALERT "vma->vm_file->f_op->mmap: %s\n",
+				(unsigned long)vma->vm_file->f_op->mmap);
 	dump_stack();
+	add_taint(TAINT_BAD_PAGE);
 }
 
 static inline int is_cow_mapping(unsigned int flags)
@@ -441,21 +491,18 @@
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 				pte_t pte)
 {
-	unsigned long pfn;
+	unsigned long pfn = pte_pfn(pte);
 
 	if (HAVE_PTE_SPECIAL) {
-		if (likely(!pte_special(pte))) {
-			VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-			return pte_page(pte);
-		}
-		VM_BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
+		if (likely(!pte_special(pte)))
+			goto check_pfn;
+		if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)))
+			print_bad_pte(vma, addr, pte, NULL);
 		return NULL;
 	}
 
 	/* !HAVE_PTE_SPECIAL case follows: */
 
-	pfn = pte_pfn(pte);
-
 	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
 		if (vma->vm_flags & VM_MIXEDMAP) {
 			if (!pfn_valid(pfn))
@@ -471,11 +518,14 @@
 		}
 	}
 
-	VM_BUG_ON(!pfn_valid(pfn));
+check_pfn:
+	if (unlikely(pfn > highest_memmap_pfn)) {
+		print_bad_pte(vma, addr, pte, NULL);
+		return NULL;
+	}
 
 	/*
 	 * NOTE! We still have PageReserved() pages in the page tables.
-	 *
 	 * eg. VDSO mappings can cause them to exist.
 	 */
 out:
@@ -767,11 +817,14 @@
 			else {
 				if (pte_dirty(ptent))
 					set_page_dirty(page);
-				if (pte_young(ptent))
-					SetPageReferenced(page);
+				if (pte_young(ptent) &&
+				    likely(!VM_SequentialReadHint(vma)))
+					mark_page_accessed(page);
 				file_rss--;
 			}
-			page_remove_rmap(page, vma);
+			page_remove_rmap(page);
+			if (unlikely(page_mapcount(page) < 0))
+				print_bad_pte(vma, addr, ptent, page);
 			tlb_remove_page(tlb, page);
 			continue;
 		}
@@ -781,8 +834,12 @@
 		 */
 		if (unlikely(details))
 			continue;
-		if (!pte_file(ptent))
-			free_swap_and_cache(pte_to_swp_entry(ptent));
+		if (pte_file(ptent)) {
+			if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
+				print_bad_pte(vma, addr, ptent, NULL);
+		} else if
+		  (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent))))
+			print_bad_pte(vma, addr, ptent, NULL);
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
@@ -1153,6 +1210,7 @@
 	int write = !!(flags & GUP_FLAGS_WRITE);
 	int force = !!(flags & GUP_FLAGS_FORCE);
 	int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
+	int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
 
 	if (len <= 0)
 		return 0;
@@ -1231,12 +1289,15 @@
 			struct page *page;
 
 			/*
-			 * If tsk is ooming, cut off its access to large memory
-			 * allocations. It has a pending SIGKILL, but it can't
-			 * be processed until returning to user space.
+			 * If we have a pending SIGKILL, don't keep faulting
+			 * pages and potentially allocating memory, unless
+			 * current is handling munlock--e.g., on exit. In
+			 * that case, we are not allocating memory.  Rather,
+			 * we're only unlocking already resident/mapped pages.
 			 */
-			if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE)))
-				return i ? i : -ENOMEM;
+			if (unlikely(!ignore_sigkill &&
+					fatal_signal_pending(current)))
+				return i ? i : -ERESTARTSYS;
 
 			if (write)
 				foll_flags |= FOLL_WRITE;
@@ -1263,9 +1324,15 @@
 				 * do_wp_page has broken COW when necessary,
 				 * even if maybe_mkwrite decided not to set
 				 * pte_write. We can thus safely do subsequent
-				 * page lookups as if they were reads.
+				 * page lookups as if they were reads. But only
+				 * do so when looping for pte_write is futile:
+				 * in some cases userspace may also be wanting
+				 * to write to the gotten user page, which a
+				 * read fault here might prevent (a readonly
+				 * page might get reCOWed by userspace write).
 				 */
-				if (ret & VM_FAULT_WRITE)
+				if ((ret & VM_FAULT_WRITE) &&
+				    !(vma->vm_flags & VM_WRITE))
 					foll_flags &= ~FOLL_WRITE;
 
 				cond_resched();
@@ -1644,6 +1711,8 @@
 
 	BUG_ON(pmd_huge(*pmd));
 
+	arch_enter_lazy_mmu_mode();
+
 	token = pmd_pgtable(*pmd);
 
 	do {
@@ -1652,6 +1721,8 @@
 			break;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
+	arch_leave_lazy_mmu_mode();
+
 	if (mm != &init_mm)
 		pte_unmap_unlock(pte-1, ptl);
 	return err;
@@ -1837,10 +1908,21 @@
 	 * not dirty accountable.
 	 */
 	if (PageAnon(old_page)) {
-		if (trylock_page(old_page)) {
-			reuse = can_share_swap_page(old_page);
-			unlock_page(old_page);
+		if (!trylock_page(old_page)) {
+			page_cache_get(old_page);
+			pte_unmap_unlock(page_table, ptl);
+			lock_page(old_page);
+			page_table = pte_offset_map_lock(mm, pmd, address,
+							 &ptl);
+			if (!pte_same(*page_table, orig_pte)) {
+				unlock_page(old_page);
+				page_cache_release(old_page);
+				goto unlock;
+			}
+			page_cache_release(old_page);
 		}
+		reuse = reuse_swap_page(old_page);
+		unlock_page(old_page);
 	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 					(VM_WRITE|VM_SHARED))) {
 		/*
@@ -1943,11 +2025,7 @@
 		 * thread doing COW.
 		 */
 		ptep_clear_flush_notify(vma, address, page_table);
-		SetPageSwapBacked(new_page);
-		lru_cache_add_active_or_unevictable(new_page, vma);
 		page_add_new_anon_rmap(new_page, vma, address);
-
-//TODO:  is this safe?  do_anonymous_page() does it this way.
 		set_pte_at(mm, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
 		if (old_page) {
@@ -1973,7 +2051,7 @@
 			 * mapcount is visible. So transitively, TLBs to
 			 * old page will be flushed before it can be reused.
 			 */
-			page_remove_rmap(old_page, vma);
+			page_remove_rmap(old_page);
 		}
 
 		/* Free the old page.. */
@@ -2374,7 +2452,7 @@
 
 	inc_mm_counter(mm, anon_rss);
 	pte = mk_pte(page, vma->vm_page_prot);
-	if (write_access && can_share_swap_page(page)) {
+	if (write_access && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 		write_access = 0;
 	}
@@ -2385,7 +2463,7 @@
 
 	swap_free(entry);
 	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
-		remove_exclusive_swap_page(page);
+		try_to_free_swap(page);
 	unlock_page(page);
 
 	if (write_access) {
@@ -2442,8 +2520,6 @@
 	if (!pte_none(*page_table))
 		goto release;
 	inc_mm_counter(mm, anon_rss);
-	SetPageSwapBacked(page);
-	lru_cache_add_active_or_unevictable(page, vma);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
 
@@ -2591,8 +2667,6 @@
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
-			SetPageSwapBacked(page);
-			lru_cache_add_active_or_unevictable(page, vma);
 			page_add_new_anon_rmap(page, vma, address);
 		} else {
 			inc_mm_counter(mm, file_rss);
@@ -2602,7 +2676,6 @@
 				get_page(dirty_page);
 			}
 		}
-//TODO:  is this safe?  do_anonymous_page() does it this way.
 		set_pte_at(mm, address, page_table, entry);
 
 		/* no need to invalidate: a not-present page won't be cached */
@@ -2666,12 +2739,11 @@
 	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
 		return 0;
 
-	if (unlikely(!(vma->vm_flags & VM_NONLINEAR) ||
-			!(vma->vm_flags & VM_CAN_NONLINEAR))) {
+	if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
 		/*
 		 * Page table corrupted: show pte and kill process.
 		 */
-		print_bad_pte(vma, orig_pte, address);
+		print_bad_pte(vma, address, orig_pte, NULL);
 		return VM_FAULT_OOM;
 	}
 
@@ -2953,7 +3025,7 @@
 {
 	resource_size_t phys_addr;
 	unsigned long prot = 0;
-	void *maddr;
+	void __iomem *maddr;
 	int offset = addr & (PAGE_SIZE-1);
 
 	if (follow_phys(vma, addr, write, &prot, &phys_addr))
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b1737118..c083cf5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -216,7 +216,8 @@
 	return 0;
 }
 
-static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn)
+static int __meminit __add_section(int nid, struct zone *zone,
+					unsigned long phys_start_pfn)
 {
 	int nr_pages = PAGES_PER_SECTION;
 	int ret;
@@ -234,7 +235,7 @@
 	if (ret < 0)
 		return ret;
 
-	return register_new_memory(__pfn_to_section(phys_start_pfn));
+	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -273,8 +274,8 @@
  * call this function after deciding the zone to which to
  * add the new pages.
  */
-int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn,
-		 unsigned long nr_pages)
+int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
+			unsigned long nr_pages)
 {
 	unsigned long i;
 	int err = 0;
@@ -284,7 +285,7 @@
 	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
 
 	for (i = start_sec; i <= end_sec; i++) {
-		err = __add_section(zone, i << PFN_SECTION_SHIFT);
+		err = __add_section(nid, zone, i << PFN_SECTION_SHIFT);
 
 		/*
 		 * EEXIST is finally dealt with by ioresource collision
@@ -626,15 +627,12 @@
 }
 
 static struct page *
-hotremove_migrate_alloc(struct page *page,
-			unsigned long private,
-			int **x)
+hotremove_migrate_alloc(struct page *page, unsigned long private, int **x)
 {
-	/* This should be improoooooved!! */
-	return alloc_page(GFP_HIGHUSER_PAGECACHE);
+	/* This should be improooooved!! */
+	return alloc_page(GFP_HIGHUSER_MOVABLE);
 }
 
-
 #define NR_OFFLINE_AT_ONCE_PAGES	(256)
 static int
 do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
diff --git a/mm/migrate.c b/mm/migrate.c
index 21631ab..5537398 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -300,12 +300,10 @@
 	 * Now we know that no one else is looking at the page.
 	 */
 	get_page(newpage);	/* add cache reference */
-#ifdef CONFIG_SWAP
 	if (PageSwapCache(page)) {
 		SetPageSwapCache(newpage);
 		set_page_private(newpage, page_private(page));
 	}
-#endif
 
 	radix_tree_replace_slot(pslot, newpage);
 
@@ -373,9 +371,7 @@
 
 	mlock_migrate_page(newpage, page);
 
-#ifdef CONFIG_SWAP
 	ClearPageSwapCache(page);
-#endif
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
 	/* page->mapping contains a flag for PageAnon() */
@@ -848,12 +844,6 @@
 		struct vm_area_struct *vma;
 		struct page *page;
 
-		/*
-		 * A valid page pointer that will not match any of the
-		 * pages that will be moved.
-		 */
-		pp->page = ZERO_PAGE(0);
-
 		err = -EFAULT;
 		vma = find_vma(mm, pp->addr);
 		if (!vma || !vma_migratable(vma))
@@ -919,41 +909,43 @@
 			 const int __user *nodes,
 			 int __user *status, int flags)
 {
-	struct page_to_node *pm = NULL;
+	struct page_to_node *pm;
 	nodemask_t task_nodes;
-	int err = 0;
-	int i;
+	unsigned long chunk_nr_pages;
+	unsigned long chunk_start;
+	int err;
 
 	task_nodes = cpuset_mems_allowed(task);
 
-	/* Limit nr_pages so that the multiplication may not overflow */
-	if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
-		err = -E2BIG;
+	err = -ENOMEM;
+	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
+	if (!pm)
 		goto out;
-	}
-
-	pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
-	if (!pm) {
-		err = -ENOMEM;
-		goto out;
-	}
-
 	/*
-	 * Get parameters from user space and initialize the pm
-	 * array. Return various errors if the user did something wrong.
+	 * Store a chunk of page_to_node array in a page,
+	 * but keep the last one as a marker
 	 */
-	for (i = 0; i < nr_pages; i++) {
-		const void __user *p;
+	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
 
-		err = -EFAULT;
-		if (get_user(p, pages + i))
-			goto out_pm;
+	for (chunk_start = 0;
+	     chunk_start < nr_pages;
+	     chunk_start += chunk_nr_pages) {
+		int j;
 
-		pm[i].addr = (unsigned long)p;
-		if (nodes) {
+		if (chunk_start + chunk_nr_pages > nr_pages)
+			chunk_nr_pages = nr_pages - chunk_start;
+
+		/* fill the chunk pm with addrs and nodes from user-space */
+		for (j = 0; j < chunk_nr_pages; j++) {
+			const void __user *p;
 			int node;
 
-			if (get_user(node, nodes + i))
+			err = -EFAULT;
+			if (get_user(p, pages + j + chunk_start))
+				goto out_pm;
+			pm[j].addr = (unsigned long) p;
+
+			if (get_user(node, nodes + j + chunk_start))
 				goto out_pm;
 
 			err = -ENODEV;
@@ -964,22 +956,29 @@
 			if (!node_isset(node, task_nodes))
 				goto out_pm;
 
-			pm[i].node = node;
-		} else
-			pm[i].node = 0;	/* anything to not match MAX_NUMNODES */
-	}
-	/* End marker */
-	pm[nr_pages].node = MAX_NUMNODES;
+			pm[j].node = node;
+		}
 
-	err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
-	if (err >= 0)
+		/* End marker for this chunk */
+		pm[chunk_nr_pages].node = MAX_NUMNODES;
+
+		/* Migrate this chunk */
+		err = do_move_page_to_node_array(mm, pm,
+						 flags & MPOL_MF_MOVE_ALL);
+		if (err < 0)
+			goto out_pm;
+
 		/* Return status information */
-		for (i = 0; i < nr_pages; i++)
-			if (put_user(pm[i].status, status + i))
+		for (j = 0; j < chunk_nr_pages; j++)
+			if (put_user(pm[j].status, status + j + chunk_start)) {
 				err = -EFAULT;
+				goto out_pm;
+			}
+	}
+	err = 0;
 
 out_pm:
-	vfree(pm);
+	free_page((unsigned long)pm);
 out:
 	return err;
 }
diff --git a/mm/mlock.c b/mm/mlock.c
index 3035a56..e125156 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -173,12 +173,13 @@
 		  (atomic_read(&mm->mm_users) != 0));
 
 	/*
-	 * mlock:   don't page populate if page has PROT_NONE permission.
-	 * munlock: the pages always do munlock althrough
-	 *          its has PROT_NONE permission.
+	 * mlock:   don't page populate if vma has PROT_NONE permission.
+	 * munlock: always do munlock although the vma has PROT_NONE
+	 *          permission, or SIGKILL is pending.
 	 */
 	if (!mlock)
-		gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS;
+		gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS |
+			     GUP_FLAGS_IGNORE_SIGKILL;
 
 	if (vma->vm_flags & VM_WRITE)
 		gup_flags |= GUP_FLAGS_WRITE;
diff --git a/mm/mmap.c b/mm/mmap.c
index 2c778fc..a910c045 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -413,7 +413,7 @@
 
 static void __vma_link_file(struct vm_area_struct *vma)
 {
-	struct file * file;
+	struct file *file;
 
 	file = vma->vm_file;
 	if (file) {
@@ -474,11 +474,10 @@
  * insert vm structure into list and rbtree and anon_vma,
  * but it has already been inserted into prio_tree earlier.
  */
-static void
-__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-	struct vm_area_struct * __vma, * prev;
-	struct rb_node ** rb_link, * rb_parent;
+	struct vm_area_struct *__vma, *prev;
+	struct rb_node **rb_link, *rb_parent;
 
 	__vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
 	BUG_ON(__vma && __vma->vm_start < vma->vm_end);
@@ -908,7 +907,7 @@
  * The caller must hold down_write(current->mm->mmap_sem).
  */
 
-unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
+unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 			unsigned long len, unsigned long prot,
 			unsigned long flags, unsigned long pgoff)
 {
@@ -1464,7 +1463,7 @@
 EXPORT_SYMBOL(get_unmapped_area);
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
-struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
+struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
 	struct vm_area_struct *vma = NULL;
 
@@ -1507,7 +1506,7 @@
 			struct vm_area_struct **pprev)
 {
 	struct vm_area_struct *vma = NULL, *prev = NULL;
-	struct rb_node * rb_node;
+	struct rb_node *rb_node;
 	if (!mm)
 		goto out;
 
@@ -1541,7 +1540,7 @@
  * update accounting. This is shared with both the
  * grow-up and grow-down cases.
  */
-static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct rlimit *rlim = current->signal->rlim;
@@ -2091,6 +2090,9 @@
 	arch_exit_mmap(mm);
 	mmu_notifier_release(mm);
 
+	if (!mm->mmap)	/* Can happen if dup_mmap() received an OOM */
+		return;
+
 	if (mm->locked_vm) {
 		vma = mm->mmap;
 		while (vma) {
@@ -2103,7 +2105,7 @@
 	lru_add_drain();
 	flush_cache_mm(mm);
 	tlb = tlb_gather_mmu(mm, 1);
-	/* Don't update_hiwater_rss(mm) here, do_exit already did */
+	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index cfb4c48..d0f6e7c 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -22,6 +22,7 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
+#include <linux/migrate.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -59,8 +60,7 @@
 				ptent = pte_mkwrite(ptent);
 
 			ptep_modify_prot_commit(mm, addr, pte, ptent);
-#ifdef CONFIG_MIGRATION
-		} else if (!pte_file(oldpte)) {
+		} else if (PAGE_MIGRATION && !pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 
 			if (is_write_migration_entry(entry)) {
@@ -72,9 +72,7 @@
 				set_pte_at(mm, addr, pte,
 					swp_entry_to_pte(entry));
 			}
-#endif
 		}
-
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 558f9af..6b9e758 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -31,7 +31,7 @@
 int sysctl_panic_on_oom;
 int sysctl_oom_kill_allocating_task;
 int sysctl_oom_dump_tasks;
-static DEFINE_SPINLOCK(zone_scan_mutex);
+static DEFINE_SPINLOCK(zone_scan_lock);
 /* #define DEBUG */
 
 /**
@@ -392,6 +392,9 @@
 		printk(KERN_WARNING "%s invoked oom-killer: "
 			"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
 			current->comm, gfp_mask, order, current->oomkilladj);
+		task_lock(current);
+		cpuset_print_task_mems_allowed(current);
+		task_unlock(current);
 		dump_stack();
 		show_mem();
 		if (sysctl_oom_dump_tasks)
@@ -470,7 +473,7 @@
 	struct zone *zone;
 	int ret = 1;
 
-	spin_lock(&zone_scan_mutex);
+	spin_lock(&zone_scan_lock);
 	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
 		if (zone_is_oom_locked(zone)) {
 			ret = 0;
@@ -480,7 +483,7 @@
 
 	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
 		/*
-		 * Lock each zone in the zonelist under zone_scan_mutex so a
+		 * Lock each zone in the zonelist under zone_scan_lock so a
 		 * parallel invocation of try_set_zone_oom() doesn't succeed
 		 * when it shouldn't.
 		 */
@@ -488,7 +491,7 @@
 	}
 
 out:
-	spin_unlock(&zone_scan_mutex);
+	spin_unlock(&zone_scan_lock);
 	return ret;
 }
 
@@ -502,11 +505,74 @@
 	struct zoneref *z;
 	struct zone *zone;
 
-	spin_lock(&zone_scan_mutex);
+	spin_lock(&zone_scan_lock);
 	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
 		zone_clear_flag(zone, ZONE_OOM_LOCKED);
 	}
-	spin_unlock(&zone_scan_mutex);
+	spin_unlock(&zone_scan_lock);
+}
+
+/*
+ * Must be called with tasklist_lock held for read.
+ */
+static void __out_of_memory(gfp_t gfp_mask, int order)
+{
+	if (sysctl_oom_kill_allocating_task) {
+		oom_kill_process(current, gfp_mask, order, 0, NULL,
+				"Out of memory (oom_kill_allocating_task)");
+
+	} else {
+		unsigned long points;
+		struct task_struct *p;
+
+retry:
+		/*
+		 * Rambo mode: Shoot down a process and hope it solves whatever
+		 * issues we may have.
+		 */
+		p = select_bad_process(&points, NULL);
+
+		if (PTR_ERR(p) == -1UL)
+			return;
+
+		/* Found nothing?!?! Either we hang forever, or we panic. */
+		if (!p) {
+			read_unlock(&tasklist_lock);
+			panic("Out of memory and no killable processes...\n");
+		}
+
+		if (oom_kill_process(p, gfp_mask, order, points, NULL,
+				     "Out of memory"))
+			goto retry;
+	}
+}
+
+/*
+ * pagefault handler calls into here because it is out of memory but
+ * doesn't know exactly how or why.
+ */
+void pagefault_out_of_memory(void)
+{
+	unsigned long freed = 0;
+
+	blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
+	if (freed > 0)
+		/* Got some memory back in the last second. */
+		return;
+
+	if (sysctl_panic_on_oom)
+		panic("out of memory from page fault. panic_on_oom is selected.\n");
+
+	read_lock(&tasklist_lock);
+	__out_of_memory(0, 0); /* unknown gfp_mask and order */
+	read_unlock(&tasklist_lock);
+
+	/*
+	 * Give "p" a good chance of killing itself before we
+	 * retry to allocate memory.
+	 */
+	if (!test_thread_flag(TIF_MEMDIE))
+		schedule_timeout_uninterruptible(1);
 }
 
 /**
@@ -522,8 +588,6 @@
  */
 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 {
-	struct task_struct *p;
-	unsigned long points = 0;
 	unsigned long freed = 0;
 	enum oom_constraint constraint;
 
@@ -544,7 +608,7 @@
 
 	switch (constraint) {
 	case CONSTRAINT_MEMORY_POLICY:
-		oom_kill_process(current, gfp_mask, order, points, NULL,
+		oom_kill_process(current, gfp_mask, order, 0, NULL,
 				"No available memory (MPOL_BIND)");
 		break;
 
@@ -553,35 +617,10 @@
 			panic("out of memory. panic_on_oom is selected\n");
 		/* Fall-through */
 	case CONSTRAINT_CPUSET:
-		if (sysctl_oom_kill_allocating_task) {
-			oom_kill_process(current, gfp_mask, order, points, NULL,
-					"Out of memory (oom_kill_allocating_task)");
-			break;
-		}
-retry:
-		/*
-		 * Rambo mode: Shoot down a process and hope it solves whatever
-		 * issues we may have.
-		 */
-		p = select_bad_process(&points, NULL);
-
-		if (PTR_ERR(p) == -1UL)
-			goto out;
-
-		/* Found nothing?!?! Either we hang forever, or we panic. */
-		if (!p) {
-			read_unlock(&tasklist_lock);
-			panic("Out of memory and no killable processes...\n");
-		}
-
-		if (oom_kill_process(p, gfp_mask, order, points, NULL,
-				     "Out of memory"))
-			goto retry;
-
+		__out_of_memory(gfp_mask, order);
 		break;
 	}
 
-out:
 	read_unlock(&tasklist_lock);
 
 	/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2970e35..b493db7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -69,6 +69,12 @@
 int dirty_background_ratio = 5;
 
 /*
+ * dirty_background_bytes starts at 0 (disabled) so that it is a function of
+ * dirty_background_ratio * the amount of dirtyable memory
+ */
+unsigned long dirty_background_bytes;
+
+/*
  * free highmem will not be subtracted from the total free memory
  * for calculating free ratios if vm_highmem_is_dirtyable is true
  */
@@ -80,6 +86,12 @@
 int vm_dirty_ratio = 10;
 
 /*
+ * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
+ * vm_dirty_ratio * the amount of dirtyable memory
+ */
+unsigned long vm_dirty_bytes;
+
+/*
  * The interval between `kupdate'-style writebacks, in jiffies
  */
 int dirty_writeback_interval = 5 * HZ;
@@ -135,23 +147,75 @@
 {
 	unsigned long dirty_total;
 
-	dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100;
+	if (vm_dirty_bytes)
+		dirty_total = vm_dirty_bytes / PAGE_SIZE;
+	else
+		dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
+				100;
 	return 2 + ilog2(dirty_total - 1);
 }
 
 /*
- * update the period when the dirty ratio changes.
+ * update the period when the dirty threshold changes.
  */
+static void update_completion_period(void)
+{
+	int shift = calc_period_shift();
+	prop_change_shift(&vm_completions, shift);
+	prop_change_shift(&vm_dirties, shift);
+}
+
+int dirty_background_ratio_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	if (ret == 0 && write)
+		dirty_background_bytes = 0;
+	return ret;
+}
+
+int dirty_background_bytes_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	if (ret == 0 && write)
+		dirty_background_ratio = 0;
+	return ret;
+}
+
 int dirty_ratio_handler(struct ctl_table *table, int write,
 		struct file *filp, void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int old_ratio = vm_dirty_ratio;
-	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
 	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
-		int shift = calc_period_shift();
-		prop_change_shift(&vm_completions, shift);
-		prop_change_shift(&vm_dirties, shift);
+		update_completion_period();
+		vm_dirty_bytes = 0;
+	}
+	return ret;
+}
+
+
+int dirty_bytes_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int old_bytes = vm_dirty_bytes;
+	int ret;
+
+	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
+		update_completion_period();
+		vm_dirty_ratio = 0;
 	}
 	return ret;
 }
@@ -362,26 +426,32 @@
 }
 
 void
-get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
-		 struct backing_dev_info *bdi)
+get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
+		 unsigned long *pbdi_dirty, struct backing_dev_info *bdi)
 {
-	int background_ratio;		/* Percentages */
-	int dirty_ratio;
-	long background;
-	long dirty;
+	unsigned long background;
+	unsigned long dirty;
 	unsigned long available_memory = determine_dirtyable_memory();
 	struct task_struct *tsk;
 
-	dirty_ratio = vm_dirty_ratio;
-	if (dirty_ratio < 5)
-		dirty_ratio = 5;
+	if (vm_dirty_bytes)
+		dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
+	else {
+		int dirty_ratio;
 
-	background_ratio = dirty_background_ratio;
-	if (background_ratio >= dirty_ratio)
-		background_ratio = dirty_ratio / 2;
+		dirty_ratio = vm_dirty_ratio;
+		if (dirty_ratio < 5)
+			dirty_ratio = 5;
+		dirty = (dirty_ratio * available_memory) / 100;
+	}
 
-	background = (background_ratio * available_memory) / 100;
-	dirty = (dirty_ratio * available_memory) / 100;
+	if (dirty_background_bytes)
+		background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);
+	else
+		background = (dirty_background_ratio * available_memory) / 100;
+
+	if (background >= dirty)
+		background = dirty / 2;
 	tsk = current;
 	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
 		background += background / 4;
@@ -423,9 +493,9 @@
 {
 	long nr_reclaimable, bdi_nr_reclaimable;
 	long nr_writeback, bdi_nr_writeback;
-	long background_thresh;
-	long dirty_thresh;
-	long bdi_thresh;
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
+	unsigned long bdi_thresh;
 	unsigned long pages_written = 0;
 	unsigned long write_chunk = sync_writeback_pages();
 
@@ -580,8 +650,8 @@
 
 void throttle_vm_writeout(gfp_t gfp_mask)
 {
-	long background_thresh;
-	long dirty_thresh;
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
 
         for ( ; ; ) {
 		get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
@@ -624,8 +694,8 @@
 	};
 
 	for ( ; ; ) {
-		long background_thresh;
-		long dirty_thresh;
+		unsigned long background_thresh;
+		unsigned long dirty_thresh;
 
 		get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
 		if (global_page_state(NR_FILE_DIRTY) +
@@ -868,9 +938,11 @@
 	int done = 0;
 	struct pagevec pvec;
 	int nr_pages;
+	pgoff_t uninitialized_var(writeback_index);
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
-	int scanned = 0;
+	pgoff_t done_index;
+	int cycled;
 	int range_whole = 0;
 	long nr_to_write = wbc->nr_to_write;
 
@@ -881,83 +953,134 @@
 
 	pagevec_init(&pvec, 0);
 	if (wbc->range_cyclic) {
-		index = mapping->writeback_index; /* Start from prev offset */
+		writeback_index = mapping->writeback_index; /* prev offset */
+		index = writeback_index;
+		if (index == 0)
+			cycled = 1;
+		else
+			cycled = 0;
 		end = -1;
 	} else {
 		index = wbc->range_start >> PAGE_CACHE_SHIFT;
 		end = wbc->range_end >> PAGE_CACHE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = 1;
-		scanned = 1;
+		cycled = 1; /* ignore range_cyclic tests */
 	}
 retry:
-	while (!done && (index <= end) &&
-	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-					      PAGECACHE_TAG_DIRTY,
-					      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
-		unsigned i;
+	done_index = index;
+	while (!done && (index <= end)) {
+		int i;
 
-		scanned = 1;
+		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+			      PAGECACHE_TAG_DIRTY,
+			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+		if (nr_pages == 0)
+			break;
+
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
 
 			/*
-			 * At this point we hold neither mapping->tree_lock nor
-			 * lock on the page itself: the page may be truncated or
-			 * invalidated (changing page->mapping to NULL), or even
-			 * swizzled back from swapper_space to tmpfs file
-			 * mapping
+			 * At this point, the page may be truncated or
+			 * invalidated (changing page->mapping to NULL), or
+			 * even swizzled back from swapper_space to tmpfs file
+			 * mapping. However, page->index will not change
+			 * because we have a reference on the page.
 			 */
+			if (page->index > end) {
+				/*
+				 * can't be range_cyclic (1st pass) because
+				 * end == -1 in that case.
+				 */
+				done = 1;
+				break;
+			}
+
+			done_index = page->index + 1;
+
 			lock_page(page);
 
+			/*
+			 * Page truncated or invalidated. We can freely skip it
+			 * then, even for data integrity operations: the page
+			 * has disappeared concurrently, so there could be no
+			 * real expectation of this data interity operation
+			 * even if there is now a new, dirty page at the same
+			 * pagecache address.
+			 */
 			if (unlikely(page->mapping != mapping)) {
+continue_unlock:
 				unlock_page(page);
 				continue;
 			}
 
-			if (!wbc->range_cyclic && page->index > end) {
-				done = 1;
-				unlock_page(page);
-				continue;
+			if (!PageDirty(page)) {
+				/* someone wrote it for us */
+				goto continue_unlock;
 			}
 
-			if (wbc->sync_mode != WB_SYNC_NONE)
-				wait_on_page_writeback(page);
-
-			if (PageWriteback(page) ||
-			    !clear_page_dirty_for_io(page)) {
-				unlock_page(page);
-				continue;
+			if (PageWriteback(page)) {
+				if (wbc->sync_mode != WB_SYNC_NONE)
+					wait_on_page_writeback(page);
+				else
+					goto continue_unlock;
 			}
 
+			BUG_ON(PageWriteback(page));
+			if (!clear_page_dirty_for_io(page))
+				goto continue_unlock;
+
 			ret = (*writepage)(page, wbc, data);
+			if (unlikely(ret)) {
+				if (ret == AOP_WRITEPAGE_ACTIVATE) {
+					unlock_page(page);
+					ret = 0;
+				} else {
+					/*
+					 * done_index is set past this page,
+					 * so media errors will not choke
+					 * background writeout for the entire
+					 * file. This has consequences for
+					 * range_cyclic semantics (ie. it may
+					 * not be suitable for data integrity
+					 * writeout).
+					 */
+					done = 1;
+					break;
+				}
+ 			}
 
-			if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
-				unlock_page(page);
-				ret = 0;
+			if (wbc->sync_mode == WB_SYNC_NONE) {
+				wbc->nr_to_write--;
+				if (wbc->nr_to_write <= 0) {
+					done = 1;
+					break;
+				}
 			}
-			if (ret || (--nr_to_write <= 0))
-				done = 1;
 			if (wbc->nonblocking && bdi_write_congested(bdi)) {
 				wbc->encountered_congestion = 1;
 				done = 1;
+				break;
 			}
 		}
 		pagevec_release(&pvec);
 		cond_resched();
 	}
-	if (!scanned && !done) {
+	if (!cycled) {
 		/*
+		 * range_cyclic:
 		 * We hit the last page and there is more work to be done: wrap
 		 * back to the start of the file
 		 */
-		scanned = 1;
+		cycled = 1;
 		index = 0;
+		end = writeback_index - 1;
 		goto retry;
 	}
 	if (!wbc->no_nrwrite_index_update) {
 		if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
-			mapping->writeback_index = index;
+			mapping->writeback_index = done_index;
 		wbc->nr_to_write = nr_to_write;
 	}
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d8ac014..7bf22e0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -69,7 +69,7 @@
 
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
-long nr_swap_pages;
+unsigned long highest_memmap_pfn __read_mostly;
 int percpu_pagelist_fraction;
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -223,19 +223,41 @@
 
 static void bad_page(struct page *page)
 {
-	printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
-		"page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
-		current->comm, page, (int)(2*sizeof(unsigned long)),
-		(unsigned long)page->flags, page->mapping,
-		page_mapcount(page), page_count(page));
+	static unsigned long resume;
+	static unsigned long nr_shown;
+	static unsigned long nr_unshown;
 
-	printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
-		KERN_EMERG "Backtrace:\n");
+	/*
+	 * Allow a burst of 60 reports, then keep quiet for that minute;
+	 * or allow a steady drip of one report per second.
+	 */
+	if (nr_shown == 60) {
+		if (time_before(jiffies, resume)) {
+			nr_unshown++;
+			goto out;
+		}
+		if (nr_unshown) {
+			printk(KERN_ALERT
+			      "BUG: Bad page state: %lu messages suppressed\n",
+				nr_unshown);
+			nr_unshown = 0;
+		}
+		nr_shown = 0;
+	}
+	if (nr_shown++ == 0)
+		resume = jiffies + 60 * HZ;
+
+	printk(KERN_ALERT "BUG: Bad page state in process %s  pfn:%05lx\n",
+		current->comm, page_to_pfn(page));
+	printk(KERN_ALERT
+		"page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
+		page, (void *)page->flags, page_count(page),
+		page_mapcount(page), page->mapping, page->index);
+
 	dump_stack();
-	page->flags &= ~PAGE_FLAGS_CLEAR_WHEN_BAD;
-	set_page_count(page, 0);
-	reset_page_mapcount(page);
-	page->mapping = NULL;
+out:
+	/* Leave bad fields for debug, except PageBuddy could make trouble */
+	__ClearPageBuddy(page);
 	add_taint(TAINT_BAD_PAGE);
 }
 
@@ -292,25 +314,31 @@
 }
 #endif
 
-static void destroy_compound_page(struct page *page, unsigned long order)
+static int destroy_compound_page(struct page *page, unsigned long order)
 {
 	int i;
 	int nr_pages = 1 << order;
+	int bad = 0;
 
-	if (unlikely(compound_order(page) != order))
+	if (unlikely(compound_order(page) != order) ||
+	    unlikely(!PageHead(page))) {
 		bad_page(page);
+		bad++;
+	}
 
-	if (unlikely(!PageHead(page)))
-			bad_page(page);
 	__ClearPageHead(page);
+
 	for (i = 1; i < nr_pages; i++) {
 		struct page *p = page + i;
 
-		if (unlikely(!PageTail(p) |
-				(p->first_page != page)))
+		if (unlikely(!PageTail(p) | (p->first_page != page))) {
 			bad_page(page);
+			bad++;
+		}
 		__ClearPageTail(p);
 	}
+
+	return bad;
 }
 
 static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
@@ -430,7 +458,8 @@
 	int migratetype = get_pageblock_migratetype(page);
 
 	if (unlikely(PageCompound(page)))
-		destroy_compound_page(page, order);
+		if (unlikely(destroy_compound_page(page, order)))
+			return;
 
 	page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
 
@@ -467,18 +496,13 @@
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
 		(page_count(page) != 0)  |
-		(page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
+		(page->flags & PAGE_FLAGS_CHECK_AT_FREE))) {
 		bad_page(page);
-	if (PageDirty(page))
-		__ClearPageDirty(page);
-	if (PageSwapBacked(page))
-		__ClearPageSwapBacked(page);
-	/*
-	 * For now, we report if PG_reserved was found set, but do not
-	 * clear it, and do not free the page.  But we shall soon need
-	 * to do more, for when the ZERO_PAGE count wraps negative.
-	 */
-	return PageReserved(page);
+		return 1;
+	}
+	if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
+		page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
+	return 0;
 }
 
 /*
@@ -523,11 +547,11 @@
 {
 	unsigned long flags;
 	int i;
-	int reserved = 0;
+	int bad = 0;
 
 	for (i = 0 ; i < (1 << order) ; ++i)
-		reserved += free_pages_check(page + i);
-	if (reserved)
+		bad += free_pages_check(page + i);
+	if (bad)
 		return;
 
 	if (!PageHighMem(page)) {
@@ -612,23 +636,11 @@
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
 		(page_count(page) != 0)  |
-		(page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
+		(page->flags & PAGE_FLAGS_CHECK_AT_PREP))) {
 		bad_page(page);
-
-	/*
-	 * For now, we report if PG_reserved was found set, but do not
-	 * clear it, and do not allocate the page: as a safety net.
-	 */
-	if (PageReserved(page))
 		return 1;
+	}
 
-	page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
-			1 << PG_referenced | 1 << PG_arch_1 |
-			1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
-#ifdef CONFIG_UNEVICTABLE_LRU
-			| 1 << PG_mlocked
-#endif
-			);
 	set_page_private(page, 0);
 	set_page_refcounted(page);
 
@@ -2609,6 +2621,9 @@
 	unsigned long pfn;
 	struct zone *z;
 
+	if (highest_memmap_pfn < end_pfn - 1)
+		highest_memmap_pfn = end_pfn - 1;
+
 	z = &NODE_DATA(nid)->node_zones[zone];
 	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
 		/*
@@ -3381,10 +3396,8 @@
 {
 	unsigned long usemapsize = usemap_size(zonesize);
 	zone->pageblock_flags = NULL;
-	if (usemapsize) {
+	if (usemapsize)
 		zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
-		memset(zone->pageblock_flags, 0, usemapsize);
-	}
 }
 #else
 static void inline setup_usemap(struct pglist_data *pgdat,
@@ -3469,9 +3482,10 @@
 			PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
 		if (realsize >= memmap_pages) {
 			realsize -= memmap_pages;
-			printk(KERN_DEBUG
-				"  %s zone: %lu pages used for memmap\n",
-				zone_names[j], memmap_pages);
+			if (memmap_pages)
+				printk(KERN_DEBUG
+				       "  %s zone: %lu pages used for memmap\n",
+				       zone_names[j], memmap_pages);
 		} else
 			printk(KERN_WARNING
 				"  %s zone: %lu pages exceeds realsize %lu\n",
@@ -4316,7 +4330,7 @@
  *    1TB     101        10GB
  *   10TB     320        32GB
  */
-void setup_per_zone_inactive_ratio(void)
+static void setup_per_zone_inactive_ratio(void)
 {
 	struct zone *zone;
 
@@ -4573,19 +4587,6 @@
 	return table;
 }
 
-#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
-struct page *pfn_to_page(unsigned long pfn)
-{
-	return __pfn_to_page(pfn);
-}
-unsigned long page_to_pfn(struct page *page)
-{
-	return __page_to_pfn(page);
-}
-EXPORT_SYMBOL(pfn_to_page);
-EXPORT_SYMBOL(page_to_pfn);
-#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
-
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
 static inline unsigned long *get_pageblock_bitmap(struct zone *zone,
 							unsigned long pfn)
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index ab27ff7..d6507a6 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -101,7 +101,7 @@
 }
 
 /* __alloc_bootmem...() is protected by !slab_available() */
-int __init_refok init_section_page_cgroup(unsigned long pfn)
+static int __init_refok init_section_page_cgroup(unsigned long pfn)
 {
 	struct mem_section *section;
 	struct page_cgroup *base, *pc;
diff --git a/mm/page_io.c b/mm/page_io.c
index 065c448..dc6ce0a 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -98,7 +98,7 @@
 	struct bio *bio;
 	int ret = 0, rw = WRITE;
 
-	if (remove_exclusive_swap_page(page)) {
+	if (try_to_free_swap(page)) {
 		unlock_page(page);
 		goto out;
 	}
@@ -125,8 +125,8 @@
 	struct bio *bio;
 	int ret = 0;
 
-	BUG_ON(!PageLocked(page));
-	BUG_ON(PageUptodate(page));
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(PageUptodate(page));
 	bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
 				end_swap_bio_read);
 	if (bio == NULL) {
diff --git a/mm/rmap.c b/mm/rmap.c
index 1099394..ac4af8c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -47,9 +47,9 @@
 #include <linux/rmap.h>
 #include <linux/rcupdate.h>
 #include <linux/module.h>
-#include <linux/kallsyms.h>
 #include <linux/memcontrol.h>
 #include <linux/mmu_notifier.h>
+#include <linux/migrate.h>
 
 #include <asm/tlbflush.h>
 
@@ -191,7 +191,7 @@
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page)
+static struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma;
 	unsigned long anon_mapping;
@@ -211,7 +211,7 @@
 	return NULL;
 }
 
-void page_unlock_anon_vma(struct anon_vma *anon_vma)
+static void page_unlock_anon_vma(struct anon_vma *anon_vma)
 {
 	spin_unlock(&anon_vma->lock);
 	rcu_read_unlock();
@@ -359,8 +359,17 @@
 		goto out_unmap;
 	}
 
-	if (ptep_clear_flush_young_notify(vma, address, pte))
-		referenced++;
+	if (ptep_clear_flush_young_notify(vma, address, pte)) {
+		/*
+		 * Don't treat a reference through a sequentially read
+		 * mapping as such.  If the page has been used in
+		 * another mapping, we will catch it; if this other
+		 * mapping is already gone, the unmap path will have
+		 * set PG_referenced or activated the page.
+		 */
+		if (likely(!VM_SequentialReadHint(vma)))
+			referenced++;
+	}
 
 	/* Pretend the page is referenced if the task has the
 	   swap token and is in the middle of a page fault. */
@@ -661,9 +670,14 @@
 void page_add_new_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
-	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
-	atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
+	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+	SetPageSwapBacked(page);
+	atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
 	__page_set_anon_rmap(page, vma, address);
+	if (page_evictable(page, vma))
+		lru_cache_add_lru(page, LRU_ACTIVE_ANON);
+	else
+		add_page_to_unevictable_list(page);
 }
 
 /**
@@ -693,7 +707,6 @@
  */
 void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
 {
-	BUG_ON(page_mapcount(page) == 0);
 	if (PageAnon(page))
 		__page_check_anon_rmap(page, vma, address);
 	atomic_inc(&page->_mapcount);
@@ -703,28 +716,12 @@
 /**
  * page_remove_rmap - take down pte mapping from a page
  * @page: page to remove mapping from
- * @vma: the vm area in which the mapping is removed
  *
  * The caller needs to hold the pte lock.
  */
-void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
+void page_remove_rmap(struct page *page)
 {
 	if (atomic_add_negative(-1, &page->_mapcount)) {
-		if (unlikely(page_mapcount(page) < 0)) {
-			printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
-			printk (KERN_EMERG "  page pfn = %lx\n", page_to_pfn(page));
-			printk (KERN_EMERG "  page->flags = %lx\n", page->flags);
-			printk (KERN_EMERG "  page->count = %x\n", page_count(page));
-			printk (KERN_EMERG "  page->mapping = %p\n", page->mapping);
-			print_symbol (KERN_EMERG "  vma->vm_ops = %s\n", (unsigned long)vma->vm_ops);
-			if (vma->vm_ops) {
-				print_symbol (KERN_EMERG "  vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault);
-			}
-			if (vma->vm_file && vma->vm_file->f_op)
-				print_symbol (KERN_EMERG "  vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
-			BUG();
-		}
-
 		/*
 		 * Now that the last pte has gone, s390 must transfer dirty
 		 * flag from storage key to struct page.  We can usually skip
@@ -818,8 +815,7 @@
 				spin_unlock(&mmlist_lock);
 			}
 			dec_mm_counter(mm, anon_rss);
-#ifdef CONFIG_MIGRATION
-		} else {
+		} else if (PAGE_MIGRATION) {
 			/*
 			 * Store the pfn of the page in a special migration
 			 * pte. do_swap_page() will wait until the migration
@@ -827,23 +823,19 @@
 			 */
 			BUG_ON(!migration);
 			entry = make_migration_entry(page, pte_write(pteval));
-#endif
 		}
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 		BUG_ON(pte_file(*pte));
-	} else
-#ifdef CONFIG_MIGRATION
-	if (migration) {
+	} else if (PAGE_MIGRATION && migration) {
 		/* Establish migration entry for a file page */
 		swp_entry_t entry;
 		entry = make_migration_entry(page, pte_write(pteval));
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 	} else
-#endif
 		dec_mm_counter(mm, file_rss);
 
 
-	page_remove_rmap(page, vma);
+	page_remove_rmap(page);
 	page_cache_release(page);
 
 out_unmap:
@@ -958,7 +950,7 @@
 		if (pte_dirty(pteval))
 			set_page_dirty(page);
 
-		page_remove_rmap(page, vma);
+		page_remove_rmap(page);
 		page_cache_release(page);
 		dec_mm_counter(mm, file_rss);
 		(*mapcount)--;
diff --git a/mm/shmem.c b/mm/shmem.c
index f1b0d48..5941f98 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -14,31 +14,39 @@
  * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
  * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
  *
+ * tiny-shmem:
+ * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
+ *
  * This file is released under the GPL.
  */
 
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/vfs.h>
+#include <linux/mount.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+
+static struct vfsmount *shm_mnt;
+
+#ifdef CONFIG_SHMEM
 /*
  * This virtual memory filesystem is heavily based on the ramfs. It
  * extends ramfs by the ability to use swap and honor resource limits
  * which makes it a completely usable filesystem.
  */
 
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/fs.h>
 #include <linux/xattr.h>
 #include <linux/exportfs.h>
 #include <linux/generic_acl.h>
-#include <linux/mm.h>
 #include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/swap.h>
 #include <linux/pagemap.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
 #include <linux/shmem_fs.h>
-#include <linux/mount.h>
 #include <linux/writeback.h>
 #include <linux/vfs.h>
 #include <linux/blkdev.h>
@@ -1444,7 +1452,6 @@
 	if (error)
 		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
 
-	mark_page_accessed(vmf->page);
 	return ret | VM_FAULT_LOCKED;
 }
 
@@ -2486,7 +2493,6 @@
 	.get_sb		= shmem_get_sb,
 	.kill_sb	= kill_litter_super,
 };
-static struct vfsmount *shm_mnt;
 
 static int __init init_tmpfs(void)
 {
@@ -2525,7 +2531,51 @@
 	shm_mnt = ERR_PTR(error);
 	return error;
 }
-module_init(init_tmpfs)
+
+#else /* !CONFIG_SHMEM */
+
+/*
+ * tiny-shmem: simple shmemfs and tmpfs using ramfs code
+ *
+ * This is intended for small system where the benefits of the full
+ * shmem code (swap-backed and resource-limited) are outweighed by
+ * their complexity. On systems without swap this code should be
+ * effectively equivalent, but much lighter weight.
+ */
+
+#include <linux/ramfs.h>
+
+static struct file_system_type tmpfs_fs_type = {
+	.name		= "tmpfs",
+	.get_sb		= ramfs_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
+static int __init init_tmpfs(void)
+{
+	BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
+
+	shm_mnt = kern_mount(&tmpfs_fs_type);
+	BUG_ON(IS_ERR(shm_mnt));
+
+	return 0;
+}
+
+int shmem_unuse(swp_entry_t entry, struct page *page)
+{
+	return 0;
+}
+
+#define shmem_file_operations ramfs_file_operations
+#define shmem_vm_ops generic_file_vm_ops
+#define shmem_get_inode ramfs_get_inode
+#define shmem_acct_size(a, b) 0
+#define shmem_unacct_size(a, b) do {} while (0)
+#define SHMEM_MAX_BYTES LLONG_MAX
+
+#endif /* CONFIG_SHMEM */
+
+/* common code */
 
 /**
  * shmem_file_setup - get an unlinked file living in tmpfs
@@ -2569,12 +2619,20 @@
 	if (!inode)
 		goto close_file;
 
+#ifdef CONFIG_SHMEM
 	SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
+#endif
 	d_instantiate(dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */
 	init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
-			&shmem_file_operations);
+		  &shmem_file_operations);
+
+#ifndef CONFIG_MMU
+	error = ramfs_nommu_expand_for_mapping(inode, size);
+	if (error)
+		goto close_file;
+#endif
 	return file;
 
 close_file:
@@ -2606,3 +2664,5 @@
 	vma->vm_ops = &shmem_vm_ops;
 	return 0;
 }
+
+module_init(init_tmpfs)
diff --git a/mm/swap.c b/mm/swap.c
index b135ec9..ba2c0e8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -246,25 +246,6 @@
 	spin_unlock_irq(&zone->lru_lock);
 }
 
-/**
- * lru_cache_add_active_or_unevictable
- * @page:  the page to be added to LRU
- * @vma:   vma in which page is mapped for determining reclaimability
- *
- * place @page on active or unevictable LRU list, depending on
- * page_evictable().  Note that if the page is not evictable,
- * it goes directly back onto it's zone's unevictable list.  It does
- * NOT use a per cpu pagevec.
- */
-void lru_cache_add_active_or_unevictable(struct page *page,
-					struct vm_area_struct *vma)
-{
-	if (page_evictable(page, vma))
-		lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page));
-	else
-		add_page_to_unevictable_list(page);
-}
-
 /*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
@@ -398,28 +379,6 @@
 EXPORT_SYMBOL(__pagevec_release);
 
 /*
- * pagevec_release() for pages which are known to not be on the LRU
- *
- * This function reinitialises the caller's pagevec.
- */
-void __pagevec_release_nonlru(struct pagevec *pvec)
-{
-	int i;
-	struct pagevec pages_to_free;
-
-	pagevec_init(&pages_to_free, pvec->cold);
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-
-		VM_BUG_ON(PageLRU(page));
-		if (put_page_testzero(page))
-			pagevec_add(&pages_to_free, page);
-	}
-	pagevec_free(&pages_to_free);
-	pagevec_reinit(pvec);
-}
-
-/*
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
@@ -495,8 +454,7 @@
 		struct page *page = pvec->pages[i];
 
 		if (PageSwapCache(page) && trylock_page(page)) {
-			if (PageSwapCache(page))
-				remove_exclusive_swap_page_ref(page);
+			try_to_free_swap(page);
 			unlock_page(page);
 		}
 	}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3353c90..81c825f 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -72,10 +72,10 @@
 {
 	int error;
 
-	BUG_ON(!PageLocked(page));
-	BUG_ON(PageSwapCache(page));
-	BUG_ON(PagePrivate(page));
-	BUG_ON(!PageSwapBacked(page));
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(PageSwapCache(page));
+	VM_BUG_ON(!PageSwapBacked(page));
+
 	error = radix_tree_preload(gfp_mask);
 	if (!error) {
 		page_cache_get(page);
@@ -108,10 +108,9 @@
  */
 void __delete_from_swap_cache(struct page *page)
 {
-	BUG_ON(!PageLocked(page));
-	BUG_ON(!PageSwapCache(page));
-	BUG_ON(PageWriteback(page));
-	BUG_ON(PagePrivate(page));
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(!PageSwapCache(page));
+	VM_BUG_ON(PageWriteback(page));
 
 	radix_tree_delete(&swapper_space.page_tree, page_private(page));
 	set_page_private(page, 0);
@@ -129,13 +128,13 @@
  * Allocate swap space for the page and add the page to the
  * swap cache.  Caller needs to hold the page lock. 
  */
-int add_to_swap(struct page * page, gfp_t gfp_mask)
+int add_to_swap(struct page *page)
 {
 	swp_entry_t entry;
 	int err;
 
-	BUG_ON(!PageLocked(page));
-	BUG_ON(!PageUptodate(page));
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(!PageUptodate(page));
 
 	for (;;) {
 		entry = get_swap_page();
@@ -154,7 +153,7 @@
 		 * Add it to the swap cache and mark it dirty
 		 */
 		err = add_to_swap_cache(page, entry,
-				gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN);
+				__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
 
 		switch (err) {
 		case 0:				/* Success */
@@ -196,14 +195,14 @@
  * If we are the only user, then try to free up the swap cache. 
  * 
  * Its ok to check for PageSwapCache without the page lock
- * here because we are going to recheck again inside 
- * exclusive_swap_page() _with_ the lock. 
+ * here because we are going to recheck again inside
+ * try_to_free_swap() _with_ the lock.
  * 					- Marcelo
  */
 static inline void free_swap_cache(struct page *page)
 {
-	if (PageSwapCache(page) && trylock_page(page)) {
-		remove_exclusive_swap_page(page);
+	if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
+		try_to_free_swap(page);
 		unlock_page(page);
 	}
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 54a9f87..eec5ca7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -16,6 +16,7 @@
 #include <linux/namei.h>
 #include <linux/shm.h>
 #include <linux/blkdev.h>
+#include <linux/random.h>
 #include <linux/writeback.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -35,6 +36,7 @@
 
 static DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
+long nr_swap_pages;
 long total_swap_pages;
 static int swap_overflow;
 static int least_priority;
@@ -83,15 +85,96 @@
 	up_read(&swap_unplug_sem);
 }
 
+/*
+ * swapon tell device that all the old swap contents can be discarded,
+ * to allow the swap device to optimize its wear-levelling.
+ */
+static int discard_swap(struct swap_info_struct *si)
+{
+	struct swap_extent *se;
+	int err = 0;
+
+	list_for_each_entry(se, &si->extent_list, list) {
+		sector_t start_block = se->start_block << (PAGE_SHIFT - 9);
+		sector_t nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
+
+		if (se->start_page == 0) {
+			/* Do not discard the swap header page! */
+			start_block += 1 << (PAGE_SHIFT - 9);
+			nr_blocks -= 1 << (PAGE_SHIFT - 9);
+			if (!nr_blocks)
+				continue;
+		}
+
+		err = blkdev_issue_discard(si->bdev, start_block,
+						nr_blocks, GFP_KERNEL);
+		if (err)
+			break;
+
+		cond_resched();
+	}
+	return err;		/* That will often be -EOPNOTSUPP */
+}
+
+/*
+ * swap allocation tell device that a cluster of swap can now be discarded,
+ * to allow the swap device to optimize its wear-levelling.
+ */
+static void discard_swap_cluster(struct swap_info_struct *si,
+				 pgoff_t start_page, pgoff_t nr_pages)
+{
+	struct swap_extent *se = si->curr_swap_extent;
+	int found_extent = 0;
+
+	while (nr_pages) {
+		struct list_head *lh;
+
+		if (se->start_page <= start_page &&
+		    start_page < se->start_page + se->nr_pages) {
+			pgoff_t offset = start_page - se->start_page;
+			sector_t start_block = se->start_block + offset;
+			sector_t nr_blocks = se->nr_pages - offset;
+
+			if (nr_blocks > nr_pages)
+				nr_blocks = nr_pages;
+			start_page += nr_blocks;
+			nr_pages -= nr_blocks;
+
+			if (!found_extent++)
+				si->curr_swap_extent = se;
+
+			start_block <<= PAGE_SHIFT - 9;
+			nr_blocks <<= PAGE_SHIFT - 9;
+			if (blkdev_issue_discard(si->bdev, start_block,
+							nr_blocks, GFP_NOIO))
+				break;
+		}
+
+		lh = se->list.next;
+		if (lh == &si->extent_list)
+			lh = lh->next;
+		se = list_entry(lh, struct swap_extent, list);
+	}
+}
+
+static int wait_for_discard(void *word)
+{
+	schedule();
+	return 0;
+}
+
 #define SWAPFILE_CLUSTER	256
 #define LATENCY_LIMIT		256
 
 static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 {
-	unsigned long offset, last_in_cluster;
+	unsigned long offset;
+	unsigned long scan_base;
+	unsigned long last_in_cluster = 0;
 	int latency_ration = LATENCY_LIMIT;
+	int found_free_cluster = 0;
 
-	/* 
+	/*
 	 * We try to cluster swap pages by allocating them sequentially
 	 * in swap.  Once we've allocated SWAPFILE_CLUSTER pages this
 	 * way, however, we resort to first-free allocation, starting
@@ -99,16 +182,42 @@
 	 * all over the entire swap partition, so that we reduce
 	 * overall disk seek times between swap pages.  -- sct
 	 * But we do now try to find an empty cluster.  -Andrea
+	 * And we let swap pages go all over an SSD partition.  Hugh
 	 */
 
 	si->flags += SWP_SCANNING;
-	if (unlikely(!si->cluster_nr)) {
-		si->cluster_nr = SWAPFILE_CLUSTER - 1;
-		if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER)
-			goto lowest;
+	scan_base = offset = si->cluster_next;
+
+	if (unlikely(!si->cluster_nr--)) {
+		if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
+			si->cluster_nr = SWAPFILE_CLUSTER - 1;
+			goto checks;
+		}
+		if (si->flags & SWP_DISCARDABLE) {
+			/*
+			 * Start range check on racing allocations, in case
+			 * they overlap the cluster we eventually decide on
+			 * (we scan without swap_lock to allow preemption).
+			 * It's hardly conceivable that cluster_nr could be
+			 * wrapped during our scan, but don't depend on it.
+			 */
+			if (si->lowest_alloc)
+				goto checks;
+			si->lowest_alloc = si->max;
+			si->highest_alloc = 0;
+		}
 		spin_unlock(&swap_lock);
 
-		offset = si->lowest_bit;
+		/*
+		 * If seek is expensive, start searching for new cluster from
+		 * start of partition, to minimize the span of allocated swap.
+		 * But if seek is cheap, search from our current position, so
+		 * that swap is allocated from all over the partition: if the
+		 * Flash Translation Layer only remaps within limited zones,
+		 * we don't want to wear out the first zone too quickly.
+		 */
+		if (!(si->flags & SWP_SOLIDSTATE))
+			scan_base = offset = si->lowest_bit;
 		last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
 
 		/* Locate the first empty (unaligned) cluster */
@@ -117,43 +226,124 @@
 				last_in_cluster = offset + SWAPFILE_CLUSTER;
 			else if (offset == last_in_cluster) {
 				spin_lock(&swap_lock);
-				si->cluster_next = offset-SWAPFILE_CLUSTER+1;
-				goto cluster;
+				offset -= SWAPFILE_CLUSTER - 1;
+				si->cluster_next = offset;
+				si->cluster_nr = SWAPFILE_CLUSTER - 1;
+				found_free_cluster = 1;
+				goto checks;
 			}
 			if (unlikely(--latency_ration < 0)) {
 				cond_resched();
 				latency_ration = LATENCY_LIMIT;
 			}
 		}
+
+		offset = si->lowest_bit;
+		last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
+
+		/* Locate the first empty (unaligned) cluster */
+		for (; last_in_cluster < scan_base; offset++) {
+			if (si->swap_map[offset])
+				last_in_cluster = offset + SWAPFILE_CLUSTER;
+			else if (offset == last_in_cluster) {
+				spin_lock(&swap_lock);
+				offset -= SWAPFILE_CLUSTER - 1;
+				si->cluster_next = offset;
+				si->cluster_nr = SWAPFILE_CLUSTER - 1;
+				found_free_cluster = 1;
+				goto checks;
+			}
+			if (unlikely(--latency_ration < 0)) {
+				cond_resched();
+				latency_ration = LATENCY_LIMIT;
+			}
+		}
+
+		offset = scan_base;
 		spin_lock(&swap_lock);
-		goto lowest;
+		si->cluster_nr = SWAPFILE_CLUSTER - 1;
+		si->lowest_alloc = 0;
 	}
 
-	si->cluster_nr--;
-cluster:
-	offset = si->cluster_next;
-	if (offset > si->highest_bit)
-lowest:		offset = si->lowest_bit;
-checks:	if (!(si->flags & SWP_WRITEOK))
+checks:
+	if (!(si->flags & SWP_WRITEOK))
 		goto no_page;
 	if (!si->highest_bit)
 		goto no_page;
-	if (!si->swap_map[offset]) {
-		if (offset == si->lowest_bit)
-			si->lowest_bit++;
-		if (offset == si->highest_bit)
-			si->highest_bit--;
-		si->inuse_pages++;
-		if (si->inuse_pages == si->pages) {
-			si->lowest_bit = si->max;
-			si->highest_bit = 0;
-		}
-		si->swap_map[offset] = 1;
-		si->cluster_next = offset + 1;
-		si->flags -= SWP_SCANNING;
-		return offset;
-	}
+	if (offset > si->highest_bit)
+		scan_base = offset = si->lowest_bit;
+	if (si->swap_map[offset])
+		goto scan;
 
+	if (offset == si->lowest_bit)
+		si->lowest_bit++;
+	if (offset == si->highest_bit)
+		si->highest_bit--;
+	si->inuse_pages++;
+	if (si->inuse_pages == si->pages) {
+		si->lowest_bit = si->max;
+		si->highest_bit = 0;
+	}
+	si->swap_map[offset] = 1;
+	si->cluster_next = offset + 1;
+	si->flags -= SWP_SCANNING;
+
+	if (si->lowest_alloc) {
+		/*
+		 * Only set when SWP_DISCARDABLE, and there's a scan
+		 * for a free cluster in progress or just completed.
+		 */
+		if (found_free_cluster) {
+			/*
+			 * To optimize wear-levelling, discard the
+			 * old data of the cluster, taking care not to
+			 * discard any of its pages that have already
+			 * been allocated by racing tasks (offset has
+			 * already stepped over any at the beginning).
+			 */
+			if (offset < si->highest_alloc &&
+			    si->lowest_alloc <= last_in_cluster)
+				last_in_cluster = si->lowest_alloc - 1;
+			si->flags |= SWP_DISCARDING;
+			spin_unlock(&swap_lock);
+
+			if (offset < last_in_cluster)
+				discard_swap_cluster(si, offset,
+					last_in_cluster - offset + 1);
+
+			spin_lock(&swap_lock);
+			si->lowest_alloc = 0;
+			si->flags &= ~SWP_DISCARDING;
+
+			smp_mb();	/* wake_up_bit advises this */
+			wake_up_bit(&si->flags, ilog2(SWP_DISCARDING));
+
+		} else if (si->flags & SWP_DISCARDING) {
+			/*
+			 * Delay using pages allocated by racing tasks
+			 * until the whole discard has been issued. We
+			 * could defer that delay until swap_writepage,
+			 * but it's easier to keep this self-contained.
+			 */
+			spin_unlock(&swap_lock);
+			wait_on_bit(&si->flags, ilog2(SWP_DISCARDING),
+				wait_for_discard, TASK_UNINTERRUPTIBLE);
+			spin_lock(&swap_lock);
+		} else {
+			/*
+			 * Note pages allocated by racing tasks while
+			 * scan for a free cluster is in progress, so
+			 * that its final discard can exclude them.
+			 */
+			if (offset < si->lowest_alloc)
+				si->lowest_alloc = offset;
+			if (offset > si->highest_alloc)
+				si->highest_alloc = offset;
+		}
+	}
+	return offset;
+
+scan:
 	spin_unlock(&swap_lock);
 	while (++offset <= si->highest_bit) {
 		if (!si->swap_map[offset]) {
@@ -165,8 +355,18 @@
 			latency_ration = LATENCY_LIMIT;
 		}
 	}
+	offset = si->lowest_bit;
+	while (++offset < scan_base) {
+		if (!si->swap_map[offset]) {
+			spin_lock(&swap_lock);
+			goto checks;
+		}
+		if (unlikely(--latency_ration < 0)) {
+			cond_resched();
+			latency_ration = LATENCY_LIMIT;
+		}
+	}
 	spin_lock(&swap_lock);
-	goto lowest;
 
 no_page:
 	si->flags -= SWP_SCANNING;
@@ -268,7 +468,7 @@
 	printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
 out:
 	return NULL;
-}	
+}
 
 static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
 {
@@ -326,97 +526,58 @@
 }
 
 /*
- * We can use this swap cache entry directly
- * if there are no other references to it.
+ * We can write to an anon page without COW if there are no other references
+ * to it.  And as a side-effect, free up its swap: because the old content
+ * on disk will never be read, and seeking back there to write new content
+ * later would only waste time away from clustering.
  */
-int can_share_swap_page(struct page *page)
+int reuse_swap_page(struct page *page)
 {
 	int count;
 
-	BUG_ON(!PageLocked(page));
+	VM_BUG_ON(!PageLocked(page));
 	count = page_mapcount(page);
-	if (count <= 1 && PageSwapCache(page))
+	if (count <= 1 && PageSwapCache(page)) {
 		count += page_swapcount(page);
+		if (count == 1 && !PageWriteback(page)) {
+			delete_from_swap_cache(page);
+			SetPageDirty(page);
+		}
+	}
 	return count == 1;
 }
 
 /*
- * Work out if there are any other processes sharing this
- * swap cache page. Free it if you can. Return success.
+ * If swap is getting full, or if there are no more mappings of this page,
+ * then try_to_free_swap is called to free its swap space.
  */
-static int remove_exclusive_swap_page_count(struct page *page, int count)
+int try_to_free_swap(struct page *page)
 {
-	int retval;
-	struct swap_info_struct * p;
-	swp_entry_t entry;
-
-	BUG_ON(PagePrivate(page));
-	BUG_ON(!PageLocked(page));
+	VM_BUG_ON(!PageLocked(page));
 
 	if (!PageSwapCache(page))
 		return 0;
 	if (PageWriteback(page))
 		return 0;
-	if (page_count(page) != count) /* us + cache + ptes */
+	if (page_swapcount(page))
 		return 0;
 
-	entry.val = page_private(page);
-	p = swap_info_get(entry);
-	if (!p)
-		return 0;
-
-	/* Is the only swap cache user the cache itself? */
-	retval = 0;
-	if (p->swap_map[swp_offset(entry)] == 1) {
-		/* Recheck the page count with the swapcache lock held.. */
-		spin_lock_irq(&swapper_space.tree_lock);
-		if ((page_count(page) == count) && !PageWriteback(page)) {
-			__delete_from_swap_cache(page);
-			SetPageDirty(page);
-			retval = 1;
-		}
-		spin_unlock_irq(&swapper_space.tree_lock);
-	}
-	spin_unlock(&swap_lock);
-
-	if (retval) {
-		swap_free(entry);
-		page_cache_release(page);
-	}
-
-	return retval;
-}
-
-/*
- * Most of the time the page should have two references: one for the
- * process and one for the swap cache.
- */
-int remove_exclusive_swap_page(struct page *page)
-{
-	return remove_exclusive_swap_page_count(page, 2);
-}
-
-/*
- * The pageout code holds an extra reference to the page.  That raises
- * the reference count to test for to 2 for a page that is only in the
- * swap cache plus 1 for each process that maps the page.
- */
-int remove_exclusive_swap_page_ref(struct page *page)
-{
-	return remove_exclusive_swap_page_count(page, 2 + page_mapcount(page));
+	delete_from_swap_cache(page);
+	SetPageDirty(page);
+	return 1;
 }
 
 /*
  * Free the swap entry like above, but also try to
  * free the page cache entry if it is the last user.
  */
-void free_swap_and_cache(swp_entry_t entry)
+int free_swap_and_cache(swp_entry_t entry)
 {
-	struct swap_info_struct * p;
+	struct swap_info_struct *p;
 	struct page *page = NULL;
 
 	if (is_migration_entry(entry))
-		return;
+		return 1;
 
 	p = swap_info_get(entry);
 	if (p) {
@@ -430,20 +591,19 @@
 		spin_unlock(&swap_lock);
 	}
 	if (page) {
-		int one_user;
-
-		BUG_ON(PagePrivate(page));
-		one_user = (page_count(page) == 2);
-		/* Only cache user (+us), or swap space full? Free it! */
-		/* Also recheck PageSwapCache after page is locked (above) */
+		/*
+		 * Not mapped elsewhere, or swap space full? Free it!
+		 * Also recheck PageSwapCache now page is locked (above).
+		 */
 		if (PageSwapCache(page) && !PageWriteback(page) &&
-					(one_user || vm_swap_full())) {
+				(!page_mapped(page) || vm_swap_full())) {
 			delete_from_swap_cache(page);
 			SetPageDirty(page);
 		}
 		unlock_page(page);
 		page_cache_release(page);
 	}
+	return p != NULL;
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -776,10 +936,10 @@
 			break;
 		}
 
-		/* 
+		/*
 		 * Get a page for the entry, using the existing swap
 		 * cache page if there is one.  Otherwise, get a clean
-		 * page and read the swap into it. 
+		 * page and read the swap into it.
 		 */
 		swap_map = &si->swap_map[i];
 		entry = swp_entry(type, i);
@@ -930,7 +1090,16 @@
 			lock_page(page);
 			wait_on_page_writeback(page);
 		}
-		if (PageSwapCache(page))
+
+		/*
+		 * It is conceivable that a racing task removed this page from
+		 * swap cache just before we acquired the page lock at the top,
+		 * or while we dropped it in unuse_mm().  The page might even
+		 * be back in swap cache on another swap area: that we must not
+		 * delete, since it may not have been written out to swap yet.
+		 */
+		if (PageSwapCache(page) &&
+		    likely(page_private(page) == entry.val))
 			delete_from_swap_cache(page);
 
 		/*
@@ -1203,26 +1372,6 @@
 	return ret;
 }
 
-#if 0	/* We don't need this yet */
-#include <linux/backing-dev.h>
-int page_queue_congested(struct page *page)
-{
-	struct backing_dev_info *bdi;
-
-	BUG_ON(!PageLocked(page));	/* It pins the swap_info_struct */
-
-	if (PageSwapCache(page)) {
-		swp_entry_t entry = { .val = page_private(page) };
-		struct swap_info_struct *sis;
-
-		sis = get_swap_info_struct(swp_type(entry));
-		bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info;
-	} else
-		bdi = page->mapping->backing_dev_info;
-	return bdi_write_congested(bdi);
-}
-#endif
-
 asmlinkage long sys_swapoff(const char __user * specialfile)
 {
 	struct swap_info_struct * p = NULL;
@@ -1233,7 +1382,7 @@
 	char * pathname;
 	int i, type, prev;
 	int err;
-	
+
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
@@ -1253,7 +1402,7 @@
 	spin_lock(&swap_lock);
 	for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
 		p = swap_info + type;
-		if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
+		if (p->flags & SWP_WRITEOK) {
 			if (p->swap_file->f_mapping == mapping)
 				break;
 		}
@@ -1426,12 +1575,12 @@
 	file = ptr->swap_file;
 	len = seq_path(swap, &file->f_path, " \t\n\\");
 	seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
-		       len < 40 ? 40 - len : 1, " ",
-		       S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
+			len < 40 ? 40 - len : 1, " ",
+			S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
 				"partition" : "file\t",
-		       ptr->pages << (PAGE_SHIFT - 10),
-		       ptr->inuse_pages << (PAGE_SHIFT - 10),
-		       ptr->prio);
+			ptr->pages << (PAGE_SHIFT - 10),
+			ptr->inuse_pages << (PAGE_SHIFT - 10),
+			ptr->prio);
 	return 0;
 }
 
@@ -1487,12 +1636,11 @@
 	int i, prev;
 	int error;
 	union swap_header *swap_header = NULL;
-	int swap_header_version;
 	unsigned int nr_good_pages = 0;
 	int nr_extents = 0;
 	sector_t span;
 	unsigned long maxpages = 1;
-	int swapfilesize;
+	unsigned long swapfilepages;
 	unsigned short *swap_map = NULL;
 	struct page *page = NULL;
 	struct inode *inode = NULL;
@@ -1570,7 +1718,7 @@
 		goto bad_swap;
 	}
 
-	swapfilesize = i_size_read(inode) >> PAGE_SHIFT;
+	swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
 
 	/*
 	 * Read the swap header.
@@ -1584,101 +1732,86 @@
 		error = PTR_ERR(page);
 		goto bad_swap;
 	}
-	kmap(page);
-	swap_header = page_address(page);
+	swap_header = kmap(page);
 
-	if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
-		swap_header_version = 1;
-	else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10))
-		swap_header_version = 2;
-	else {
+	if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
 		printk(KERN_ERR "Unable to find swap-space signature\n");
 		error = -EINVAL;
 		goto bad_swap;
 	}
-	
-	switch (swap_header_version) {
-	case 1:
-		printk(KERN_ERR "version 0 swap is no longer supported. "
-			"Use mkswap -v1 %s\n", name);
+
+	/* swap partition endianess hack... */
+	if (swab32(swap_header->info.version) == 1) {
+		swab32s(&swap_header->info.version);
+		swab32s(&swap_header->info.last_page);
+		swab32s(&swap_header->info.nr_badpages);
+		for (i = 0; i < swap_header->info.nr_badpages; i++)
+			swab32s(&swap_header->info.badpages[i]);
+	}
+	/* Check the swap header's sub-version */
+	if (swap_header->info.version != 1) {
+		printk(KERN_WARNING
+		       "Unable to handle swap header version %d\n",
+		       swap_header->info.version);
 		error = -EINVAL;
 		goto bad_swap;
-	case 2:
-		/* swap partition endianess hack... */
-		if (swab32(swap_header->info.version) == 1) {
-			swab32s(&swap_header->info.version);
-			swab32s(&swap_header->info.last_page);
-			swab32s(&swap_header->info.nr_badpages);
-			for (i = 0; i < swap_header->info.nr_badpages; i++)
-				swab32s(&swap_header->info.badpages[i]);
-		}
-		/* Check the swap header's sub-version and the size of
-                   the swap file and bad block lists */
-		if (swap_header->info.version != 1) {
-			printk(KERN_WARNING
-			       "Unable to handle swap header version %d\n",
-			       swap_header->info.version);
+	}
+
+	p->lowest_bit  = 1;
+	p->cluster_next = 1;
+
+	/*
+	 * Find out how many pages are allowed for a single swap
+	 * device. There are two limiting factors: 1) the number of
+	 * bits for the swap offset in the swp_entry_t type and
+	 * 2) the number of bits in the a swap pte as defined by
+	 * the different architectures. In order to find the
+	 * largest possible bit mask a swap entry with swap type 0
+	 * and swap offset ~0UL is created, encoded to a swap pte,
+	 * decoded to a swp_entry_t again and finally the swap
+	 * offset is extracted. This will mask all the bits from
+	 * the initial ~0UL mask that can't be encoded in either
+	 * the swp_entry_t or the architecture definition of a
+	 * swap pte.
+	 */
+	maxpages = swp_offset(pte_to_swp_entry(
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) - 1;
+	if (maxpages > swap_header->info.last_page)
+		maxpages = swap_header->info.last_page;
+	p->highest_bit = maxpages - 1;
+
+	error = -EINVAL;
+	if (!maxpages)
+		goto bad_swap;
+	if (swapfilepages && maxpages > swapfilepages) {
+		printk(KERN_WARNING
+		       "Swap area shorter than signature indicates\n");
+		goto bad_swap;
+	}
+	if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
+		goto bad_swap;
+	if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
+		goto bad_swap;
+
+	/* OK, set up the swap map and apply the bad block list */
+	swap_map = vmalloc(maxpages * sizeof(short));
+	if (!swap_map) {
+		error = -ENOMEM;
+		goto bad_swap;
+	}
+
+	memset(swap_map, 0, maxpages * sizeof(short));
+	for (i = 0; i < swap_header->info.nr_badpages; i++) {
+		int page_nr = swap_header->info.badpages[i];
+		if (page_nr <= 0 || page_nr >= swap_header->info.last_page) {
 			error = -EINVAL;
 			goto bad_swap;
 		}
-
-		p->lowest_bit  = 1;
-		p->cluster_next = 1;
-
-		/*
-		 * Find out how many pages are allowed for a single swap
-		 * device. There are two limiting factors: 1) the number of
-		 * bits for the swap offset in the swp_entry_t type and
-		 * 2) the number of bits in the a swap pte as defined by
-		 * the different architectures. In order to find the
-		 * largest possible bit mask a swap entry with swap type 0
-		 * and swap offset ~0UL is created, encoded to a swap pte,
-		 * decoded to a swp_entry_t again and finally the swap
-		 * offset is extracted. This will mask all the bits from
-		 * the initial ~0UL mask that can't be encoded in either
-		 * the swp_entry_t or the architecture definition of a
-		 * swap pte.
-		 */
-		maxpages = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0,~0UL)))) - 1;
-		if (maxpages > swap_header->info.last_page)
-			maxpages = swap_header->info.last_page;
-		p->highest_bit = maxpages - 1;
-
-		error = -EINVAL;
-		if (!maxpages)
-			goto bad_swap;
-		if (swapfilesize && maxpages > swapfilesize) {
-			printk(KERN_WARNING
-			       "Swap area shorter than signature indicates\n");
-			goto bad_swap;
-		}
-		if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
-			goto bad_swap;
-		if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
-			goto bad_swap;
-
-		/* OK, set up the swap map and apply the bad block list */
-		swap_map = vmalloc(maxpages * sizeof(short));
-		if (!swap_map) {
-			error = -ENOMEM;
-			goto bad_swap;
-		}
-
-		error = 0;
-		memset(swap_map, 0, maxpages * sizeof(short));
-		for (i = 0; i < swap_header->info.nr_badpages; i++) {
-			int page_nr = swap_header->info.badpages[i];
-			if (page_nr <= 0 || page_nr >= swap_header->info.last_page)
-				error = -EINVAL;
-			else
-				swap_map[page_nr] = SWAP_MAP_BAD;
-		}
-		nr_good_pages = swap_header->info.last_page -
-				swap_header->info.nr_badpages -
-				1 /* header page */;
-		if (error)
-			goto bad_swap;
+		swap_map[page_nr] = SWAP_MAP_BAD;
 	}
+	nr_good_pages = swap_header->info.last_page -
+			swap_header->info.nr_badpages -
+			1 /* header page */;
 
 	if (nr_good_pages) {
 		swap_map[0] = SWAP_MAP_BAD;
@@ -1697,6 +1830,13 @@
 		goto bad_swap;
 	}
 
+	if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
+		p->flags |= SWP_SOLIDSTATE;
+		p->cluster_next = 1 + (random32() % p->highest_bit);
+	}
+	if (discard_swap(p) == 0)
+		p->flags |= SWP_DISCARDABLE;
+
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
 	if (swap_flags & SWAP_FLAG_PREFER)
@@ -1705,14 +1845,16 @@
 	else
 		p->prio = --least_priority;
 	p->swap_map = swap_map;
-	p->flags = SWP_ACTIVE;
+	p->flags |= SWP_WRITEOK;
 	nr_swap_pages += nr_good_pages;
 	total_swap_pages += nr_good_pages;
 
 	printk(KERN_INFO "Adding %uk swap on %s.  "
-			"Priority:%d extents:%d across:%lluk\n",
+			"Priority:%d extents:%d across:%lluk %s%s\n",
 		nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
-		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10));
+		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
+		(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
+		(p->flags & SWP_DISCARDABLE) ? "D" : "");
 
 	/* insert swap space into swap_list: */
 	prev = -1;
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
deleted file mode 100644
index 3e67d57..0000000
--- a/mm/tiny-shmem.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * tiny-shmem.c: simple shmemfs and tmpfs using ramfs code
- *
- * Matt Mackall <mpm@selenic.com> January, 2004
- * derived from mm/shmem.c and fs/ramfs/inode.c
- *
- * This is intended for small system where the benefits of the full
- * shmem code (swap-backed and resource-limited) are outweighed by
- * their complexity. On systems without swap this code should be
- * effectively equivalent, but much lighter weight.
- */
-
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/vfs.h>
-#include <linux/mount.h>
-#include <linux/file.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/swap.h>
-#include <linux/ramfs.h>
-
-static struct file_system_type tmpfs_fs_type = {
-	.name		= "tmpfs",
-	.get_sb		= ramfs_get_sb,
-	.kill_sb	= kill_litter_super,
-};
-
-static struct vfsmount *shm_mnt;
-
-static int __init init_tmpfs(void)
-{
-	BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
-
-	shm_mnt = kern_mount(&tmpfs_fs_type);
-	BUG_ON(IS_ERR(shm_mnt));
-
-	return 0;
-}
-module_init(init_tmpfs)
-
-/**
- * shmem_file_setup - get an unlinked file living in tmpfs
- * @name: name for dentry (to be seen in /proc/<pid>/maps
- * @size: size to be set for the file
- * @flags: vm_flags
- */
-struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
-{
-	int error;
-	struct file *file;
-	struct inode *inode;
-	struct dentry *dentry, *root;
-	struct qstr this;
-
-	if (IS_ERR(shm_mnt))
-		return (void *)shm_mnt;
-
-	error = -ENOMEM;
-	this.name = name;
-	this.len = strlen(name);
-	this.hash = 0; /* will go */
-	root = shm_mnt->mnt_root;
-	dentry = d_alloc(root, &this);
-	if (!dentry)
-		goto put_memory;
-
-	error = -ENFILE;
-	file = get_empty_filp();
-	if (!file)
-		goto put_dentry;
-
-	error = -ENOSPC;
-	inode = ramfs_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
-	if (!inode)
-		goto close_file;
-
-	d_instantiate(dentry, inode);
-	inode->i_size = size;
-	inode->i_nlink = 0;	/* It is unlinked */
-	init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
-			&ramfs_file_operations);
-
-#ifndef CONFIG_MMU
-	error = ramfs_nommu_expand_for_mapping(inode, size);
-	if (error)
-		goto close_file;
-#endif
-	return file;
-
-close_file:
-	put_filp(file);
-put_dentry:
-	dput(dentry);
-put_memory:
-	return ERR_PTR(error);
-}
-EXPORT_SYMBOL_GPL(shmem_file_setup);
-
-/**
- * shmem_zero_setup - setup a shared anonymous mapping
- * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
- */
-int shmem_zero_setup(struct vm_area_struct *vma)
-{
-	struct file *file;
-	loff_t size = vma->vm_end - vma->vm_start;
-
-	file = shmem_file_setup("dev/zero", size, vma->vm_flags);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
-	if (vma->vm_file)
-		fput(vma->vm_file);
-	vma->vm_file = file;
-	vma->vm_ops = &generic_file_vm_ops;
-	return 0;
-}
-
-int shmem_unuse(swp_entry_t entry, struct page *page)
-{
-	return 0;
-}
-
-#ifndef CONFIG_MMU
-unsigned long shmem_get_unmapped_area(struct file *file,
-				      unsigned long addr,
-				      unsigned long len,
-				      unsigned long pgoff,
-				      unsigned long flags)
-{
-	return ramfs_nommu_get_unmapped_area(file, addr, len, pgoff, flags);
-}
-#endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7465f22..c5db9a7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -14,6 +14,7 @@
 #include <linux/highmem.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -381,8 +382,9 @@
 			goto retry;
 		}
 		if (printk_ratelimit())
-			printk(KERN_WARNING "vmap allocation failed: "
-				 "use vmalloc=<size> to increase size.\n");
+			printk(KERN_WARNING
+				"vmap allocation for size %lu failed: "
+				"use vmalloc=<size> to increase size.\n", size);
 		return ERR_PTR(-EBUSY);
 	}
 
@@ -432,6 +434,27 @@
 	vunmap_page_range(va->va_start, va->va_end);
 }
 
+static void vmap_debug_free_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * Unmap page tables and force a TLB flush immediately if
+	 * CONFIG_DEBUG_PAGEALLOC is set. This catches use after free
+	 * bugs similarly to those in linear kernel virtual address
+	 * space after a page has been freed.
+	 *
+	 * All the lazy freeing logic is still retained, in order to
+	 * minimise intrusiveness of this debugging feature.
+	 *
+	 * This is going to be *slow* (linear kernel virtual address
+	 * debugging doesn't do a broadcast TLB flush so it is a lot
+	 * faster).
+	 */
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	vunmap_page_range(start, end);
+	flush_tlb_kernel_range(start, end);
+#endif
+}
+
 /*
  * lazy_max_pages is the maximum amount of virtual address space we gather up
  * before attempting to purge with a TLB flush.
@@ -472,7 +495,7 @@
 static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
 					int sync, int force_flush)
 {
-	static DEFINE_SPINLOCK(purge_lock);
+	static DEFINE_MUTEX(purge_lock);
 	LIST_HEAD(valist);
 	struct vmap_area *va;
 	int nr = 0;
@@ -483,10 +506,10 @@
 	 * the case that isn't actually used at the moment anyway.
 	 */
 	if (!sync && !force_flush) {
-		if (!spin_trylock(&purge_lock))
+		if (!mutex_trylock(&purge_lock))
 			return;
 	} else
-		spin_lock(&purge_lock);
+		mutex_lock(&purge_lock);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(va, &vmap_area_list, list) {
@@ -518,7 +541,7 @@
 			__free_vmap_area(va);
 		spin_unlock(&vmap_area_lock);
 	}
-	spin_unlock(&purge_lock);
+	mutex_unlock(&purge_lock);
 }
 
 /*
@@ -912,6 +935,7 @@
 	BUG_ON(addr & (PAGE_SIZE-1));
 
 	debug_check_no_locks_freed(mem, size);
+	vmap_debug_free_range(addr, addr+size);
 
 	if (likely(count <= VMAP_MAX_ALLOC))
 		vb_free(mem, size);
@@ -1128,6 +1152,8 @@
 	if (va && va->flags & VM_VM_AREA) {
 		struct vm_struct *vm = va->private;
 		struct vm_struct *tmp, **p;
+
+		vmap_debug_free_range(va->va_start, va->va_end);
 		free_unmap_vmap_area(va);
 		vm->size -= PAGE_SIZE;
 
@@ -1375,7 +1401,8 @@
 	struct vm_struct *area;
 	void *ret;
 
-	ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+	ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+			     PAGE_KERNEL, -1, __builtin_return_address(0));
 	if (ret) {
 		area = find_vm_area(ret);
 		area->flags |= VM_USERMAP;
@@ -1420,7 +1447,8 @@
 
 void *vmalloc_exec(unsigned long size)
 {
-	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
+	return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
+			      -1, __builtin_return_address(0));
 }
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
@@ -1440,7 +1468,8 @@
  */
 void *vmalloc_32(unsigned long size)
 {
-	return __vmalloc(size, GFP_VMALLOC32, PAGE_KERNEL);
+	return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL,
+			      -1, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_32);
 
@@ -1456,7 +1485,8 @@
 	struct vm_struct *area;
 	void *ret;
 
-	ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL);
+	ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
+			     -1, __builtin_return_address(0));
 	if (ret) {
 		area = find_vm_area(ret);
 		area->flags |= VM_USERMAP;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d196f46..b07c48b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -52,6 +52,9 @@
 	/* Incremented by the number of inactive pages that were scanned */
 	unsigned long nr_scanned;
 
+	/* Number of pages freed so far during a call to shrink_zones() */
+	unsigned long nr_reclaimed;
+
 	/* This context's GFP mask */
 	gfp_t gfp_mask;
 
@@ -617,7 +620,6 @@
 					referenced && page_mapping_inuse(page))
 			goto activate_locked;
 
-#ifdef CONFIG_SWAP
 		/*
 		 * Anonymous process memory has backing store?
 		 * Try to allocate it some swap space here.
@@ -625,20 +627,10 @@
 		if (PageAnon(page) && !PageSwapCache(page)) {
 			if (!(sc->gfp_mask & __GFP_IO))
 				goto keep_locked;
-			switch (try_to_munlock(page)) {
-			case SWAP_FAIL:		/* shouldn't happen */
-			case SWAP_AGAIN:
-				goto keep_locked;
-			case SWAP_MLOCK:
-				goto cull_mlocked;
-			case SWAP_SUCCESS:
-				; /* fall thru'; add to swap cache */
-			}
-			if (!add_to_swap(page, GFP_ATOMIC))
+			if (!add_to_swap(page))
 				goto activate_locked;
 			may_enter_fs = 1;
 		}
-#endif /* CONFIG_SWAP */
 
 		mapping = page_mapping(page);
 
@@ -752,6 +744,8 @@
 		continue;
 
 cull_mlocked:
+		if (PageSwapCache(page))
+			try_to_free_swap(page);
 		unlock_page(page);
 		putback_lru_page(page);
 		continue;
@@ -759,7 +753,7 @@
 activate_locked:
 		/* Not a candidate for swapping, so reclaim swap space. */
 		if (PageSwapCache(page) && vm_swap_full())
-			remove_exclusive_swap_page_ref(page);
+			try_to_free_swap(page);
 		VM_BUG_ON(PageActive(page));
 		SetPageActive(page);
 		pgactivate++;
@@ -1173,11 +1167,6 @@
 		zone->prev_priority = priority;
 }
 
-static inline int zone_is_near_oom(struct zone *zone)
-{
-	return zone->pages_scanned >= (zone_lru_pages(zone) * 3);
-}
-
 /*
  * This moves pages from the active list to the inactive list.
  *
@@ -1248,6 +1237,13 @@
 		list_add(&page->lru, &l_inactive);
 	}
 
+	/*
+	 * Move the pages to the [file or anon] inactive list.
+	 */
+	pagevec_init(&pvec, 1);
+	pgmoved = 0;
+	lru = LRU_BASE + file * LRU_FILE;
+
 	spin_lock_irq(&zone->lru_lock);
 	/*
 	 * Count referenced pages from currently used mappings as
@@ -1255,15 +1251,9 @@
 	 * This helps balance scan pressure between file and anonymous
 	 * pages in get_scan_ratio.
 	 */
-	zone->recent_rotated[!!file] += pgmoved;
+	if (scan_global_lru(sc))
+		zone->recent_rotated[!!file] += pgmoved;
 
-	/*
-	 * Move the pages to the [file or anon] inactive list.
-	 */
-	pagevec_init(&pvec, 1);
-
-	pgmoved = 0;
-	lru = LRU_BASE + file * LRU_FILE;
 	while (!list_empty(&l_inactive)) {
 		page = lru_to_page(&l_inactive);
 		prefetchw_prev_lru_page(page, &l_inactive, flags);
@@ -1336,12 +1326,6 @@
 	unsigned long anon_prio, file_prio;
 	unsigned long ap, fp;
 
-	anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
-		zone_page_state(zone, NR_INACTIVE_ANON);
-	file  = zone_page_state(zone, NR_ACTIVE_FILE) +
-		zone_page_state(zone, NR_INACTIVE_FILE);
-	free  = zone_page_state(zone, NR_FREE_PAGES);
-
 	/* If we have no swap space, do not bother scanning anon pages. */
 	if (nr_swap_pages <= 0) {
 		percent[0] = 0;
@@ -1349,6 +1333,12 @@
 		return;
 	}
 
+	anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
+		zone_page_state(zone, NR_INACTIVE_ANON);
+	file  = zone_page_state(zone, NR_ACTIVE_FILE) +
+		zone_page_state(zone, NR_INACTIVE_FILE);
+	free  = zone_page_state(zone, NR_FREE_PAGES);
+
 	/* If we have very few page cache pages, force-scan anon pages. */
 	if (unlikely(file + free <= zone->pages_high)) {
 		percent[0] = 100;
@@ -1408,14 +1398,15 @@
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
-static unsigned long shrink_zone(int priority, struct zone *zone,
+static void shrink_zone(int priority, struct zone *zone,
 				struct scan_control *sc)
 {
 	unsigned long nr[NR_LRU_LISTS];
 	unsigned long nr_to_scan;
-	unsigned long nr_reclaimed = 0;
 	unsigned long percent[2];	/* anon @ 0; file @ 1 */
 	enum lru_list l;
+	unsigned long nr_reclaimed = sc->nr_reclaimed;
+	unsigned long swap_cluster_max = sc->swap_cluster_max;
 
 	get_scan_ratio(zone, sc, percent);
 
@@ -1431,7 +1422,7 @@
 			}
 			zone->lru[l].nr_scan += scan;
 			nr[l] = zone->lru[l].nr_scan;
-			if (nr[l] >= sc->swap_cluster_max)
+			if (nr[l] >= swap_cluster_max)
 				zone->lru[l].nr_scan = 0;
 			else
 				nr[l] = 0;
@@ -1450,16 +1441,28 @@
 					nr[LRU_INACTIVE_FILE]) {
 		for_each_evictable_lru(l) {
 			if (nr[l]) {
-				nr_to_scan = min(nr[l],
-					(unsigned long)sc->swap_cluster_max);
+				nr_to_scan = min(nr[l], swap_cluster_max);
 				nr[l] -= nr_to_scan;
 
 				nr_reclaimed += shrink_list(l, nr_to_scan,
-							zone, sc, priority);
+							    zone, sc, priority);
 			}
 		}
+		/*
+		 * On large memory systems, scan >> priority can become
+		 * really large. This is fine for the starting priority;
+		 * we want to put equal scanning pressure on each zone.
+		 * However, if the VM has a harder time of freeing pages,
+		 * with multiple processes reclaiming pages, the total
+		 * freeing target can get unreasonably large.
+		 */
+		if (nr_reclaimed > swap_cluster_max &&
+			priority < DEF_PRIORITY && !current_is_kswapd())
+			break;
 	}
 
+	sc->nr_reclaimed = nr_reclaimed;
+
 	/*
 	 * Even if we did not try to evict anon pages at all, we want to
 	 * rebalance the anon lru active/inactive ratio.
@@ -1470,7 +1473,6 @@
 		shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
 
 	throttle_vm_writeout(sc->gfp_mask);
-	return nr_reclaimed;
 }
 
 /*
@@ -1484,16 +1486,13 @@
  * b) The zones may be over pages_high but they must go *over* pages_high to
  *    satisfy the `incremental min' zone defense algorithm.
  *
- * Returns the number of reclaimed pages.
- *
  * If a zone is deemed to be full of pinned pages then just give it a light
  * scan then give up on it.
  */
-static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
+static void shrink_zones(int priority, struct zonelist *zonelist,
 					struct scan_control *sc)
 {
 	enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
-	unsigned long nr_reclaimed = 0;
 	struct zoneref *z;
 	struct zone *zone;
 
@@ -1524,10 +1523,8 @@
 							priority);
 		}
 
-		nr_reclaimed += shrink_zone(priority, zone, sc);
+		shrink_zone(priority, zone, sc);
 	}
-
-	return nr_reclaimed;
 }
 
 /*
@@ -1552,7 +1549,6 @@
 	int priority;
 	unsigned long ret = 0;
 	unsigned long total_scanned = 0;
-	unsigned long nr_reclaimed = 0;
 	struct reclaim_state *reclaim_state = current->reclaim_state;
 	unsigned long lru_pages = 0;
 	struct zoneref *z;
@@ -1580,7 +1576,7 @@
 		sc->nr_scanned = 0;
 		if (!priority)
 			disable_swap_token();
-		nr_reclaimed += shrink_zones(priority, zonelist, sc);
+		shrink_zones(priority, zonelist, sc);
 		/*
 		 * Don't shrink slabs when reclaiming memory from
 		 * over limit cgroups
@@ -1588,13 +1584,13 @@
 		if (scan_global_lru(sc)) {
 			shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
 			if (reclaim_state) {
-				nr_reclaimed += reclaim_state->reclaimed_slab;
+				sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 				reclaim_state->reclaimed_slab = 0;
 			}
 		}
 		total_scanned += sc->nr_scanned;
-		if (nr_reclaimed >= sc->swap_cluster_max) {
-			ret = nr_reclaimed;
+		if (sc->nr_reclaimed >= sc->swap_cluster_max) {
+			ret = sc->nr_reclaimed;
 			goto out;
 		}
 
@@ -1617,7 +1613,7 @@
 	}
 	/* top priority shrink_zones still had more to do? don't OOM, then */
 	if (!sc->all_unreclaimable && scan_global_lru(sc))
-		ret = nr_reclaimed;
+		ret = sc->nr_reclaimed;
 out:
 	/*
 	 * Now that we've scanned all the zones at this priority level, note
@@ -1712,7 +1708,6 @@
 	int priority;
 	int i;
 	unsigned long total_scanned;
-	unsigned long nr_reclaimed;
 	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
@@ -1731,7 +1726,7 @@
 
 loop_again:
 	total_scanned = 0;
-	nr_reclaimed = 0;
+	sc.nr_reclaimed = 0;
 	sc.may_writepage = !laptop_mode;
 	count_vm_event(PAGEOUTRUN);
 
@@ -1817,11 +1812,11 @@
 			 */
 			if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
 						end_zone, 0))
-				nr_reclaimed += shrink_zone(priority, zone, &sc);
+				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
 						lru_pages);
-			nr_reclaimed += reclaim_state->reclaimed_slab;
+			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 			if (zone_is_all_unreclaimable(zone))
 				continue;
@@ -1835,7 +1830,7 @@
 			 * even in laptop mode
 			 */
 			if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
-			    total_scanned > nr_reclaimed + nr_reclaimed / 2)
+			    total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
 				sc.may_writepage = 1;
 		}
 		if (all_zones_ok)
@@ -1853,7 +1848,7 @@
 		 * matches the direct reclaim path behaviour in terms of impact
 		 * on zone->*_priority.
 		 */
-		if (nr_reclaimed >= SWAP_CLUSTER_MAX)
+		if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
 			break;
 	}
 out:
@@ -1872,10 +1867,27 @@
 
 		try_to_freeze();
 
+		/*
+		 * Fragmentation may mean that the system cannot be
+		 * rebalanced for high-order allocations in all zones.
+		 * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,
+		 * it means the zones have been fully scanned and are still
+		 * not balanced. For high-order allocations, there is
+		 * little point trying all over again as kswapd may
+		 * infinite loop.
+		 *
+		 * Instead, recheck all watermarks at order-0 as they
+		 * are the most important. If watermarks are ok, kswapd will go
+		 * back to sleep. High-order users can still perform direct
+		 * reclaim if they wish.
+		 */
+		if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
+			order = sc.order = 0;
+
 		goto loop_again;
 	}
 
-	return nr_reclaimed;
+	return sc.nr_reclaimed;
 }
 
 /*
@@ -2227,7 +2239,6 @@
 	struct task_struct *p = current;
 	struct reclaim_state reclaim_state;
 	int priority;
-	unsigned long nr_reclaimed = 0;
 	struct scan_control sc = {
 		.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
 		.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP),
@@ -2260,9 +2271,9 @@
 		priority = ZONE_RECLAIM_PRIORITY;
 		do {
 			note_zone_scanning_priority(zone, priority);
-			nr_reclaimed += shrink_zone(priority, zone, &sc);
+			shrink_zone(priority, zone, &sc);
 			priority--;
-		} while (priority >= 0 && nr_reclaimed < nr_pages);
+		} while (priority >= 0 && sc.nr_reclaimed < nr_pages);
 	}
 
 	slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
@@ -2286,13 +2297,13 @@
 		 * Update nr_reclaimed by the number of slab pages we
 		 * reclaimed from this zone.
 		 */
-		nr_reclaimed += slab_reclaimable -
+		sc.nr_reclaimed += slab_reclaimable -
 			zone_page_state(zone, NR_SLAB_RECLAIMABLE);
 	}
 
 	p->reclaim_state = NULL;
 	current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
-	return nr_reclaimed >= nr_pages;
+	return sc.nr_reclaimed >= nr_pages;
 }
 
 int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -2472,7 +2483,7 @@
  * back onto @zone's unevictable list.
  */
 #define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */
-void scan_zone_unevictable_pages(struct zone *zone)
+static void scan_zone_unevictable_pages(struct zone *zone)
 {
 	struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
 	unsigned long scan;
@@ -2514,7 +2525,7 @@
  * that has possibly/probably made some previously unevictable pages
  * evictable.
  */
-void scan_all_zones_unevictable_pages(void)
+static void scan_all_zones_unevictable_pages(void)
 {
 	struct zone *zone;
 
diff --git a/samples/firmware_class/firmware_sample_driver.c b/samples/firmware_class/firmware_sample_driver.c
index 11114f3..219a298 100644
--- a/samples/firmware_class/firmware_sample_driver.c
+++ b/samples/firmware_class/firmware_sample_driver.c
@@ -100,7 +100,7 @@
 		       " request_firmware_nowait failed\n");
 }
 
-static int sample_init(void)
+static int __init sample_init(void)
 {
 	device_initialize(&ghost_device);
 	/* since there is no real hardware insertion I just call the
diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c
index 08d0d3f..8d9b55a 100644
--- a/samples/kobject/kobject-example.c
+++ b/samples/kobject/kobject-example.c
@@ -101,7 +101,7 @@
 
 static struct kobject *example_kobj;
 
-static int example_init(void)
+static int __init example_init(void)
 {
 	int retval;
 
@@ -126,7 +126,7 @@
 	return retval;
 }
 
-static void example_exit(void)
+static void __exit example_exit(void)
 {
 	kobject_put(example_kobj);
 }
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index 7395c0b..45b7d56 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -229,7 +229,7 @@
 	kobject_put(&foo->kobj);
 }
 
-static int example_init(void)
+static int __init example_init(void)
 {
 	/*
 	 * Create a kset with the name of "kset_example",
@@ -264,7 +264,7 @@
 	return -EINVAL;
 }
 
-static void example_exit(void)
+static void __exit example_exit(void)
 {
 	destroy_foo_obj(baz_obj);
 	destroy_foo_obj(bar_obj);
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
index e90dc5d..e9cd9c0 100644
--- a/samples/markers/marker-example.c
+++ b/samples/markers/marker-example.c
@@ -30,7 +30,7 @@
 	.open = my_open,
 };
 
-static int example_init(void)
+static int __init example_init(void)
 {
 	printk(KERN_ALERT "example init\n");
 	pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops);
@@ -39,7 +39,7 @@
 	return 0;
 }
 
-static void example_exit(void)
+static void __exit example_exit(void)
 {
 	printk(KERN_ALERT "example exit\n");
 	remove_proc_entry("marker-example", NULL);
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
index e3a9648..9e60eb6 100644
--- a/samples/tracepoints/tracepoint-probe-sample.c
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -28,7 +28,7 @@
 	printk(KERN_INFO "Event B is encountered\n");
 }
 
-int __init tp_sample_trace_init(void)
+static int __init tp_sample_trace_init(void)
 {
 	int ret;
 
@@ -42,7 +42,7 @@
 
 module_init(tp_sample_trace_init);
 
-void __exit tp_sample_trace_exit(void)
+static void __exit tp_sample_trace_exit(void)
 {
 	unregister_trace_subsys_eventb(probe_subsys_eventb);
 	unregister_trace_subsys_event(probe_subsys_event);
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
index 685a5ac..be2a960 100644
--- a/samples/tracepoints/tracepoint-probe-sample2.c
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -18,7 +18,7 @@
 		inode->i_ino);
 }
 
-int __init tp_sample_trace_init(void)
+static int __init tp_sample_trace_init(void)
 {
 	int ret;
 
@@ -30,7 +30,7 @@
 
 module_init(tp_sample_trace_init);
 
-void __exit tp_sample_trace_exit(void)
+static void __exit tp_sample_trace_exit(void)
 {
 	unregister_trace_subsys_event(probe_subsys_event);
 	tracepoint_synchronize_unregister();
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
index 00d1697..68d5dc0 100644
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -32,7 +32,7 @@
 	.open = my_open,
 };
 
-static int example_init(void)
+static int __init example_init(void)
 {
 	printk(KERN_ALERT "example init\n");
 	pentry_example = proc_create("tracepoint-example", 0444, NULL,
@@ -42,7 +42,7 @@
 	return 0;
 }
 
-static void example_exit(void)
+static void __exit example_exit(void)
 {
 	printk(KERN_ALERT "example exit\n");
 	remove_proc_entry("tracepoint-example", NULL);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index f88bb3e..7bed4ed 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1,7 +1,8 @@
 #!/usr/bin/perl -w
 # (c) 2001, Dave Jones. <davej@redhat.com> (the file handling bit)
 # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
-# (c) 2007, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite, etc)
+# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite)
+# (c) 2008, Andy Whitcroft <apw@canonical.com>
 # Licensed under the terms of the GNU GPL License version 2
 
 use strict;
@@ -9,7 +10,7 @@
 my $P = $0;
 $P =~ s@.*/@@g;
 
-my $V = '0.24';
+my $V = '0.26';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 
@@ -68,7 +69,9 @@
 my $dbg_type = 0;
 my $dbg_attr = 0;
 for my $key (keys %debug) {
-	eval "\${dbg_$key} = '$debug{$key}';"
+	## no critic
+	eval "\${dbg_$key} = '$debug{$key}';";
+	die "$@" if ($@);
 }
 
 if ($terse) {
@@ -116,7 +119,8 @@
 			__(?:mem|cpu|dev|)(?:initdata|init)|
 			____cacheline_aligned|
 			____cacheline_aligned_in_smp|
-			____cacheline_internodealigned_in_smp
+			____cacheline_internodealigned_in_smp|
+			__weak
 		  }x;
 our $Modifier;
 our $Inline	= qr{inline|__always_inline|noinline};
@@ -125,6 +129,7 @@
 
 our $Constant	= qr{(?:[0-9]+|0x[0-9a-fA-F]+)[UL]*};
 our $Assignment	= qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)};
+our $Compare    = qr{<=|>=|==|!=|<|>};
 our $Operators	= qr{
 			<=|>=|==|!=|
 			=>|->|<<|>>|<|>|!|~|
@@ -190,7 +195,7 @@
 		  }x;
 	$Type	= qr{
 			$NonptrType
-			(?:\s*\*+\s*const|\s*\*+|(?:\s*\[\s*\])+)?
+			(?:[\s\*]+\s*const|[\s\*]+|(?:\s*\[\s*\])+)?
 			(?:\s+$Inline|\s+$Modifier)*
 		  }x;
 	$Declare	= qr{(?:$Storage\s+)?$Type};
@@ -203,9 +208,9 @@
 my @dep_functions = ();
 my $removal = "Documentation/feature-removal-schedule.txt";
 if ($tree && -f "$root/$removal") {
-	open(REMOVE, "<$root/$removal") ||
+	open(my $REMOVE, '<', "$root/$removal") ||
 				die "$P: $removal: open failed - $!\n";
-	while (<REMOVE>) {
+	while (<$REMOVE>) {
 		if (/^Check:\s+(.*\S)/) {
 			for my $entry (split(/[, ]+/, $1)) {
 				if ($entry =~ m@include/(.*)@) {
@@ -217,17 +222,21 @@
 			}
 		}
 	}
+	close($REMOVE);
 }
 
 my @rawlines = ();
 my @lines = ();
 my $vname;
 for my $filename (@ARGV) {
+	my $FILE;
 	if ($file) {
-		open(FILE, "diff -u /dev/null $filename|") ||
+		open($FILE, '-|', "diff -u /dev/null $filename") ||
 			die "$P: $filename: diff failed - $!\n";
+	} elsif ($filename eq '-') {
+		open($FILE, '<&STDIN');
 	} else {
-		open(FILE, "<$filename") ||
+		open($FILE, '<', "$filename") ||
 			die "$P: $filename: open failed - $!\n";
 	}
 	if ($filename eq '-') {
@@ -235,11 +244,11 @@
 	} else {
 		$vname = $filename;
 	}
-	while (<FILE>) {
+	while (<$FILE>) {
 		chomp;
 		push(@rawlines, $_);
 	}
-	close(FILE);
+	close($FILE);
 	if (!process($filename)) {
 		$exit = 1;
 	}
@@ -366,7 +375,7 @@
 			}
 		}
 
-		#print "SQ:$sanitise_quote\n";
+		#print "c<$c> SQ<$sanitise_quote>\n";
 		if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") {
 			substr($res, $off, 1, $;);
 		} elsif ($off != 0 && $sanitise_quote && $c ne "\t") {
@@ -402,6 +411,7 @@
 
 	my $type = '';
 	my $level = 0;
+	my @stack = ([$type, $level]);
 	my $p;
 	my $c;
 	my $len = 0;
@@ -433,6 +443,16 @@
 		$remainder = substr($blk, $off);
 
 		#warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n";
+
+		# Handle nested #if/#else.
+		if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) {
+			push(@stack, [ $type, $level ]);
+		} elsif ($remainder =~ /^#\s*(?:else|elif)\b/) {
+			($type, $level) = @{$stack[$#stack - 1]};
+		} elsif ($remainder =~ /^#\s*endif\b/) {
+			($type, $level) = @{pop(@stack)};
+		}
+
 		# Statement ends at the ';' or a close '}' at the
 		# outermost level.
 		if ($level == 0 && $c eq ';') {
@@ -579,11 +599,22 @@
 	my @res = ();
 
 	my $level = 0;
+	my @stack = ($level);
 	for ($line = $start; $remain > 0; $line++) {
 		next if ($rawlines[$line] =~ /^-/);
 		$remain--;
 
 		$blk .= $rawlines[$line];
+
+		# Handle nested #if/#else.
+		if ($rawlines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) {
+			push(@stack, $level);
+		} elsif ($rawlines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) {
+			$level = $stack[$#stack - 1];
+		} elsif ($rawlines[$line] =~ /^.\s*#\s*endif\b/) {
+			$level = pop(@stack);
+		}
+
 		foreach my $c (split(//, $rawlines[$line])) {
 			##print "C<$c>L<$level><$open$close>O<$off>\n";
 			if ($off > 0) {
@@ -843,11 +874,11 @@
 			$type = 'V';
 			$av_pending = 'V';
 
-		} elsif ($cur =~ /^($Ident\s*):/) {
-			if ($type eq 'E') {
-				$av_pend_colon = 'L';
-			} elsif ($type eq 'T') {
+		} elsif ($cur =~ /^($Ident\s*):(?:\s*\d+\s*(,|=|;))?/) {
+			if (defined $2 && $type eq 'C' || $type eq 'T') {
 				$av_pend_colon = 'B';
+			} elsif ($type eq 'E') {
+				$av_pend_colon = 'L';
 			}
 			print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1);
 			$type = 'V';
@@ -865,6 +896,10 @@
 			$type = 'E';
 			$av_pend_colon = 'O';
 
+		} elsif ($cur =~/^(,)/) {
+			print "COMMA($1)\n" if ($dbg_values > 1);
+			$type = 'C';
+
 		} elsif ($cur =~ /^(\?)/o) {
 			print "QUESTION($1)\n" if ($dbg_values > 1);
 			$type = 'N';
@@ -880,7 +915,7 @@
 			}
 			$av_pend_colon = 'O';
 
-		} elsif ($cur =~ /^(;|\[)/o) {
+		} elsif ($cur =~ /^(\[)/o) {
 			print "CLOSE($1)\n" if ($dbg_values > 1);
 			$type = 'N';
 
@@ -1051,6 +1086,7 @@
 	my $in_comment = 0;
 	my $comment_edge = 0;
 	my $first_line = 0;
+	my $p1_prefix = '';
 
 	my $prev_values = 'E';
 
@@ -1097,9 +1133,12 @@
 					 $rawlines[$ln - 1] =~ /^-/);
 				$cnt--;
 				#print "RAW<$rawlines[$ln - 1]>\n";
-				($edge) = (defined $rawlines[$ln - 1] &&
-					$rawlines[$ln - 1] =~ m@(/\*|\*/)@);
-				last if (defined $edge);
+				last if (!defined $rawlines[$ln - 1]);
+				if ($rawlines[$ln - 1] =~ m@(/\*|\*/)@ &&
+				    $rawlines[$ln - 1] !~ m@"[^"]*(?:/\*|\*/)[^"]*"@) {
+					($edge) = $1;
+					last;
+				}
 			}
 			if (defined $edge && $edge eq '*/') {
 				$in_comment = 1;
@@ -1109,7 +1148,7 @@
 			# is the start of a diff block and this line starts
 			# ' *' then it is very likely a comment.
 			if (!defined $edge &&
-			    $rawlines[$linenr] =~ m@^.\s* \*(?:\s|$)@)
+			    $rawlines[$linenr] =~ m@^.\s*(?:\*\*+| \*)(?:\s|$)@)
 			{
 				$in_comment = 1;
 			}
@@ -1196,7 +1235,12 @@
 		# extract the filename as it passes
 		if ($line=~/^\+\+\+\s+(\S+)/) {
 			$realfile = $1;
-			$realfile =~ s@^[^/]*/@@;
+			$realfile =~ s@^([^/]*)/@@;
+
+			$p1_prefix = $1;
+			if ($tree && $p1_prefix ne '' && -e "$root/$p1_prefix") {
+				WARN("patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n");
+			}
 
 			if ($realfile =~ m@^include/asm/@) {
 				ERROR("do not modify files in include/asm, change architecture specific files in include/asm-<architecture>\n" . "$here$rawline\n");
@@ -1336,7 +1380,7 @@
 			}
 
 			# any (foo ... *) is a pointer cast, and foo is a type
-			while ($s =~ /\(($Ident)(?:\s+$Sparse)*\s*\*+\s*\)/sg) {
+			while ($s =~ /\(($Ident)(?:\s+$Sparse)*[\s\*]+\s*\)/sg) {
 				possible($1, "C:" . $s);
 			}
 
@@ -1594,7 +1638,7 @@
 				$herecurr);
 		}
 # check for static initialisers.
-		if ($line =~ /\s*static\s.*=\s*(0|NULL|false)\s*;/) {
+		if ($line =~ /\bstatic\s.*=\s*(0|NULL|false)\s*;/) {
 			ERROR("do not initialise statics to 0 or NULL\n" .
 				$herecurr);
 		}
@@ -1602,7 +1646,7 @@
 # check for new typedefs, only function parameters and sparse annotations
 # make sense.
 		if ($line =~ /\btypedef\s/ &&
-		    $line !~ /\btypedef\s+$Type\s+\(\s*\*?$Ident\s*\)\s*\(/ &&
+		    $line !~ /\btypedef\s+$Type\s*\(\s*\*?$Ident\s*\)\s*\(/ &&
 		    $line !~ /\btypedef\s+$Type\s+$Ident\s*\(/ &&
 		    $line !~ /\b$typeTypedefs\b/ &&
 		    $line !~ /\b__bitwise(?:__|)\b/) {
@@ -1610,21 +1654,39 @@
 		}
 
 # * goes on variable not on type
-		if ($line =~ m{\($NonptrType(\*+)(?:\s+const)?\)}) {
-			ERROR("\"(foo$1)\" should be \"(foo $1)\"\n" .
-				$herecurr);
+		# (char*[ const])
+		if ($line =~ m{\($NonptrType(\s*\*[\s\*]*(?:$Modifier\s*)*)\)}) {
+			my ($from, $to) = ($1, $1);
 
-		} elsif ($line =~ m{\($NonptrType\s+(\*+)(?!\s+const)\s+\)}) {
-			ERROR("\"(foo $1 )\" should be \"(foo $1)\"\n" .
-				$herecurr);
+			# Should start with a space.
+			$to =~ s/^(\S)/ $1/;
+			# Should not end with a space.
+			$to =~ s/\s+$//;
+			# '*'s should not have spaces between.
+			while ($to =~ s/(.)\s\*/$1\*/) {
+			}
 
-		} elsif ($line =~ m{\b$NonptrType(\*+)(?:\s+(?:$Attribute|$Sparse))?\s+[A-Za-z\d_]+}) {
-			ERROR("\"foo$1 bar\" should be \"foo $1bar\"\n" .
-				$herecurr);
+			#print "from<$from> to<$to>\n";
+			if ($from ne $to) {
+				ERROR("\"(foo$from)\" should be \"(foo$to)\"\n" .  $herecurr);
+			}
+		} elsif ($line =~ m{\b$NonptrType(\s*\*[\s\*]*(?:$Modifier\s*)?)($Ident)}) {
+			my ($from, $to, $ident) = ($1, $1, $2);
 
-		} elsif ($line =~ m{\b$NonptrType\s+(\*+)(?!\s+(?:$Attribute|$Sparse))\s+[A-Za-z\d_]+}) {
-			ERROR("\"foo $1 bar\" should be \"foo $1bar\"\n" .
-				$herecurr);
+			# Should start with a space.
+			$to =~ s/^(\S)/ $1/;
+			# Should not end with a space.
+			$to =~ s/\s+$//;
+			# '*'s should not have spaces between.
+			while ($to =~ s/(.)\s\*/$1\*/) {
+			}
+			# Modifiers should have spaces.
+			$to =~ s/(\b$Modifier$)/$1 /;
+
+			#print "from<$from> to<$to>\n";
+			if ($from ne $to) {
+				ERROR("\"foo${from}bar\" should be \"foo${to}bar\"\n" .  $herecurr);
+			}
 		}
 
 # # no BUG() or BUG_ON()
@@ -1759,7 +1821,7 @@
 					$c = 'C' if ($elements[$n + 2] =~ /^$;/);
 					$c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/);
 					$c = 'O' if ($elements[$n + 2] eq '');
-					$c = 'E' if ($elements[$n + 2] =~ /\s*\\$/);
+					$c = 'E' if ($elements[$n + 2] =~ /^\s*\\$/);
 				} else {
 					$c = 'E';
 				}
@@ -1950,9 +2012,9 @@
 			my $spacing = $1;
 			my $value = $2;
 
-			# Flatten any parentheses and braces
+			# Flatten any parentheses
 			$value =~ s/\)\(/\) \(/g;
-			while ($value =~ s/\([^\(\)]*\)/1/) {
+			while ($value !~ /(?:$Ident|-?$Constant)\s*$Compare\s*(?:$Ident|-?$Constant)/ && $value =~ s/\([^\(\)]*\)/1/) {
 			}
 
 			if ($value =~ /^(?:$Ident|-?$Constant)$/) {
@@ -1992,7 +2054,7 @@
 		    $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
 			my ($s, $c) = ($stat, $cond);
 
-			if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/) {
+			if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) {
 				ERROR("do not use assignment in if condition\n" . $herecurr);
 			}
 
@@ -2167,9 +2229,10 @@
 				MODULE_PARAM_DESC|
 				DECLARE_PER_CPU|
 				DEFINE_PER_CPU|
-				__typeof__\(
+				__typeof__\(|
+				\.$Ident\s*=\s*
 			}x;
-			#print "REST<$rest>\n";
+			#print "REST<$rest> dstat<$dstat>\n";
 			if ($rest ne '') {
 				if ($rest !~ /while\s*\(/ &&
 				    $dstat !~ /$exceptions/)
@@ -2189,6 +2252,15 @@
 			}
 		}
 
+# make sure symbols are always wrapped with VMLINUX_SYMBOL() ...
+# all assignments may have only one of the following with an assignment:
+#	.
+#	ALIGN(...)
+#	VMLINUX_SYMBOL(...)
+		if ($realfile eq 'vmlinux.lds.h' && $line =~ /(?:(?:^|\s)$Ident\s*=|=\s*$Ident(?:\s|$))/) {
+			WARN("vmlinux.lds.h needs VMLINUX_SYMBOL() around C-visible symbols\n" . $herecurr);
+		}
+
 # check for redundant bracing round if etc
 		if ($line =~ /(^.*)\bif\b/ && $1 !~ /else\s*$/) {
 			my ($level, $endln, @chunks) =
@@ -2443,6 +2515,11 @@
 		if ($line =~ /^.\s*__initcall\s*\(/) {
 			WARN("please use device_initcall() instead of __initcall()\n" . $herecurr);
 		}
+# check for struct file_operations, ensure they are const.
+		if ($line =~ /\bstruct\s+file_operations\b/ &&
+		    $line !~ /\bconst\b/) {
+			WARN("struct file_operations should normally be const\n" . $herecurr);
+		}
 
 # use of NR_CPUS is usually wrong
 # ignore definitions of NR_CPUS and usage to define arrays as likely right
@@ -2466,6 +2543,15 @@
 				last;
 			}
 		}
+
+# whine mightly about in_atomic
+		if ($line =~ /\bin_atomic\s*\(/) {
+			if ($realfile =~ m@^drivers/@) {
+				ERROR("do not use in_atomic in drivers\n" . $herecurr);
+			} else {
+				WARN("use of in_atomic() is incorrect outside core kernel code\n" . $herecurr);
+			}
+		}
 	}
 
 	# If we have no input at all, then there is nothing to report on
diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl
new file mode 100644
index 0000000..700a7a6
--- /dev/null
+++ b/scripts/markup_oops.pl
@@ -0,0 +1,162 @@
+#!/usr/bin/perl -w
+
+# Copyright 2008, Intel Corporation
+#
+# This file is part of the Linux kernel
+#
+# This program file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; version 2 of the License.
+#
+# Authors:
+# 	Arjan van de Ven <arjan@linux.intel.com>
+
+
+my $vmlinux_name = $ARGV[0];
+
+#
+# Step 1: Parse the oops to find the EIP value
+#
+
+my $target = "0";
+while (<STDIN>) {
+	if ($_ =~ /EIP: 0060:\[\<([a-z0-9]+)\>\]/) {
+		$target = $1;
+	}
+}
+
+if ($target =~ /^f8/) {
+	print "This script does not work on modules ... \n";
+	exit;
+}
+
+if ($target eq "0") {
+	print "No oops found!\n";
+	print "Usage: \n";
+	print "    dmesg | perl scripts/markup_oops.pl vmlinux\n";
+	exit;
+}
+
+my $counter = 0;
+my $state   = 0;
+my $center  = 0;
+my @lines;
+
+sub InRange {
+	my ($address, $target) = @_;
+	my $ad = "0x".$address;
+	my $ta = "0x".$target;
+	my $delta = hex($ad) - hex($ta);
+
+	if (($delta > -4096) && ($delta < 4096)) {
+		return 1;
+	}
+	return 0;
+}
+
+
+
+# first, parse the input into the lines array, but to keep size down,
+# we only do this for 4Kb around the sweet spot
+
+my $filename;
+
+open(FILE, "objdump -dS $vmlinux_name |") || die "Cannot start objdump";
+
+while (<FILE>) {
+	my $line = $_;
+	chomp($line);
+	if ($state == 0) {
+		if ($line =~ /^([a-f0-9]+)\:/) {
+			if (InRange($1, $target)) {
+				$state = 1;
+			}
+		}
+	} else {
+		if ($line =~ /^([a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]+)\:/) {
+			my $val = $1;
+			if (!InRange($val, $target)) {
+				last;
+			}
+			if ($val eq $target) {
+				$center = $counter;
+			}
+		}
+		$lines[$counter] = $line;
+
+		$counter = $counter + 1;
+	}
+}
+
+close(FILE);
+
+if ($counter == 0) {
+	print "No matching code found \n";
+	exit;
+}
+
+if ($center == 0) {
+	print "No matching code found \n";
+	exit;
+}
+
+my $start;
+my $finish;
+my $codelines = 0;
+my $binarylines = 0;
+# now we go up and down in the array to find how much we want to print
+
+$start = $center;
+
+while ($start > 1) {
+	$start = $start - 1;
+	my $line = $lines[$start];
+	if ($line =~ /^([a-f0-9]+)\:/) {
+		$binarylines = $binarylines + 1;
+	} else {
+		$codelines = $codelines + 1;
+	}
+	if ($codelines > 10) {
+		last;
+	}
+	if ($binarylines > 20) {
+		last;
+	}
+}
+
+
+$finish = $center;
+$codelines = 0;
+$binarylines = 0;
+while ($finish < $counter) {
+	$finish = $finish + 1;
+	my $line = $lines[$finish];
+	if ($line =~ /^([a-f0-9]+)\:/) {
+		$binarylines = $binarylines + 1;
+	} else {
+		$codelines = $codelines + 1;
+	}
+	if ($codelines > 10) {
+		last;
+	}
+	if ($binarylines > 20) {
+		last;
+	}
+}
+
+
+my $i;
+
+my $fulltext = "";
+$i = $start;
+while ($i < $finish) {
+	if ($i == $center) {
+		$fulltext = $fulltext . "*$lines[$i]     <----- faulting instruction\n";
+	} else {
+		$fulltext = $fulltext .  " $lines[$i]\n";
+	}
+	$i = $i +1;
+}
+
+print $fulltext;
+
diff --git a/sound/core/sound.c b/sound/core/sound.c
index 44a69bb..7872a02 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -152,6 +152,10 @@
 	}
 	old_fops = file->f_op;
 	file->f_op = fops_get(mptr->f_ops);
+	if (file->f_op == NULL) {
+		file->f_op = old_fops;
+		return -ENODEV;
+	}
 	if (file->f_op->open)
 		err = file->f_op->open(inode, file);
 	if (err) {
