Merge tag 'for-v4.3-rc' of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply
Pull power supply fixes from Sebastian Reichel:
"twl4030-charger fixes"
* tag 'for-v4.3-rc' of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply:
twl4030_charger: fix another compile error
Revert "twl4030_charger: correctly handle -EPROBE_DEFER from devm_usb_get_phy_by_node"
diff --git a/CREDITS b/CREDITS
index bcb8efa..8207cc6 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2992,6 +2992,10 @@
S: Santa Clara, CA 95052
S: USA
+N: Anil Ravindranath
+E: anil_ravindranath@pmc-sierra.com
+D: PMC-Sierra MaxRAID driver
+
N: Eric S. Raymond
E: esr@thyrsus.com
W: http://www.tuxedo.org/~esr/
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index 68b6a6a..12686be 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -201,7 +201,7 @@
specifies the number of bytes.
- blkio.io_serviced
- - Number of IOs completed to/from the disk by the group. These
+ - Number of IOs (bio) issued to the disk by the group. These
are further divided by the type of operation - read or write, sync
or async. First two fields specify the major and minor number of the
device, third field specifies the operation type and the fourth field
@@ -327,18 +327,11 @@
subjected to both the constraints.
- blkio.throttle.io_serviced
- - Number of IOs (bio) completed to/from the disk by the group (as
- seen by throttling policy). These are further divided by the type
- of operation - read or write, sync or async. First two fields specify
- the major and minor number of the device, third field specifies the
- operation type and the fourth field specifies the number of IOs.
-
- blkio.io_serviced does accounting as seen by CFQ and counts are in
- number of requests (struct request). On the other hand,
- blkio.throttle.io_serviced counts number of IO in terms of number
- of bios as seen by throttling policy. These bios can later be
- merged by elevator and total number of requests completed can be
- lesser.
+ - Number of IOs (bio) issued to the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of IOs.
- blkio.throttle.io_service_bytes
- Number of bytes transferred to/from the disk by the group. These
@@ -347,11 +340,6 @@
device, third field specifies the operation type and the fourth field
specifies the number of bytes.
- These numbers should roughly be same as blkio.io_service_bytes as
- updated by CFQ. The difference between two is that
- blkio.io_service_bytes will not be updated if CFQ is not operating
- on request queue.
-
Common files among various policies
-----------------------------------
- blkio.reset_stats
diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt
index 1ee9caf..e0975c2 100644
--- a/Documentation/cgroups/unified-hierarchy.txt
+++ b/Documentation/cgroups/unified-hierarchy.txt
@@ -27,7 +27,7 @@
5-3-1. Format
5-3-2. Control Knobs
5-4. Per-Controller Changes
- 5-4-1. blkio
+ 5-4-1. io
5-4-2. cpuset
5-4-3. memory
6. Planned Changes
@@ -203,7 +203,7 @@
universal, and there are various other knobs which simply aren't
available for tasks.
-The blkio controller implicitly creates a hidden leaf node for each
+The io controller implicitly creates a hidden leaf node for each
cgroup to host the tasks. The hidden leaf has its own copies of all
the knobs with "leaf_" prefixed. While this allows equivalent control
over internal tasks, it's with serious drawbacks. It always adds an
@@ -438,9 +438,62 @@
5-4. Per-Controller Changes
-5-4-1. blkio
+5-4-1. io
-- blk-throttle becomes properly hierarchical.
+- blkio is renamed to io. The interface is overhauled anyway. The
+ new name is more in line with the other two major controllers, cpu
+ and memory, and better suited given that it may be used for cgroup
+ writeback without involving block layer.
+
+- Everything including stat is always hierarchical making separate
+ recursive stat files pointless and, as no internal node can have
+ tasks, leaf weights are meaningless. The operation model is
+ simplified and the interface is overhauled accordingly.
+
+ io.stat
+
+ The stat file. The reported stats are from the point where
+ bio's are issued to request_queue. The stats are counted
+ independent of which policies are enabled. Each line in the
+ file follows the following format. More fields may later be
+ added at the end.
+
+ $MAJ:$MIN rbytes=$RBYTES wbytes=$WBYTES rios=$RIOS wrios=$WIOS
+
+ io.weight
+
+ The weight setting, currently only available and effective if
+ cfq-iosched is in use for the target device. The weight is
+ between 1 and 10000 and defaults to 100. The first line
+ always contains the default weight in the following format to
+ use when per-device setting is missing.
+
+ default $WEIGHT
+
+ Subsequent lines list per-device weights of the following
+ format.
+
+ $MAJ:$MIN $WEIGHT
+
+ Writing "$WEIGHT" or "default $WEIGHT" changes the default
+ setting. Writing "$MAJ:$MIN $WEIGHT" sets per-device weight
+ while "$MAJ:$MIN default" clears it.
+
+ This file is available only on non-root cgroups.
+
+ io.max
+
+ The maximum bandwidth and/or iops setting, only available if
+ blk-throttle is enabled. The file is of the following format.
+
+ $MAJ:$MIN rbps=$RBPS wbps=$WBPS riops=$RIOPS wiops=$WIOPS
+
+ ${R|W}BPS are read/write bytes per second and ${R|W}IOPS are
+ read/write IOs per second. "max" indicates no limit. Writing
+ to the file follows the same format but the individual
+ settings may be ommitted or specified in any order.
+
+ This file is available only on non-root cgroups.
5-4-2. cpuset
diff --git a/Documentation/devicetree/bindings/input/touchscreen/colibri-vf50-ts.txt b/Documentation/devicetree/bindings/input/touchscreen/colibri-vf50-ts.txt
new file mode 100644
index 0000000..9d9e930
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/colibri-vf50-ts.txt
@@ -0,0 +1,36 @@
+* Toradex Colibri VF50 Touchscreen driver
+
+Required Properties:
+- compatible must be toradex,vf50-touchscreen
+- io-channels: adc channels being used by the Colibri VF50 module
+- xp-gpios: FET gate driver for input of X+
+- xm-gpios: FET gate driver for input of X-
+- yp-gpios: FET gate driver for input of Y+
+- ym-gpios: FET gate driver for input of Y-
+- interrupt-parent: phandle for the interrupt controller
+- interrupts: pen irq interrupt for touch detection
+- pinctrl-names: "idle", "default", "gpios"
+- pinctrl-0: pinctrl node for pen/touch detection state pinmux
+- pinctrl-1: pinctrl node for X/Y and pressure measurement (ADC) state pinmux
+- pinctrl-2: pinctrl node for gpios functioning as FET gate drivers
+- vf50-ts-min-pressure: pressure level at which to stop measuring X/Y values
+
+Example:
+
+ touchctrl: vf50_touchctrl {
+ compatible = "toradex,vf50-touchscreen";
+ io-channels = <&adc1 0>,<&adc0 0>,
+ <&adc0 1>,<&adc1 2>;
+ xp-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+ xm-gpios = <&gpio2 29 GPIO_ACTIVE_HIGH>;
+ yp-gpios = <&gpio0 12 GPIO_ACTIVE_LOW>;
+ ym-gpios = <&gpio0 4 GPIO_ACTIVE_HIGH>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "idle","default","gpios";
+ pinctrl-0 = <&pinctrl_touchctrl_idle>;
+ pinctrl-1 = <&pinctrl_touchctrl_default>;
+ pinctrl-2 = <&pinctrl_touchctrl_gpios>;
+ vf50-ts-min-pressure = <200>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt b/Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt
new file mode 100644
index 0000000..853dff9
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt
@@ -0,0 +1,36 @@
+* Freescale i.MX6UL Touch Controller
+
+Required properties:
+- compatible: must be "fsl,imx6ul-tsc".
+- reg: this touch controller address and the ADC2 address.
+- interrupts: the interrupt of this touch controller and ADC2.
+- clocks: the root clock of touch controller and ADC2.
+- clock-names; must be "tsc" and "adc".
+- xnur-gpio: the X- gpio this controller connect to.
+ This xnur-gpio returns to low once the finger leave the touch screen (The
+ last touch event the touch controller capture).
+
+Optional properties:
+- measure-delay-time: the value of measure delay time.
+ Before X-axis or Y-axis measurement, the screen need some time before
+ even potential distribution ready.
+ This value depends on the touch screen.
+- pre-charge-time: the touch screen need some time to precharge.
+ This value depends on the touch screen.
+
+Example:
+ tsc: tsc@02040000 {
+ compatible = "fsl,imx6ul-tsc";
+ reg = <0x02040000 0x4000>, <0x0219c000 0x4000>;
+ interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks IMX6UL_CLK_IPG>,
+ <&clks IMX6UL_CLK_ADC2>;
+ clock-names = "tsc", "adc";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_tsc>;
+ xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
+ measure-delay-time = <0xfff>;
+ pre-charge-time = <0xffff>;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smd.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,smd.txt
index f65c76d..97d9b3e 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,smd.txt
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smd.txt
@@ -37,6 +37,12 @@
Definition: the identifier of the remote processor in the smd channel
allocation table
+- qcom,remote-pid:
+ Usage: optional
+ Value type: <u32>
+ Definition: the identifier for the remote processor as known by the rest
+ of the system.
+
= SMD DEVICES
In turn, subnodes of the "edges" represent devices tied to SMD channels on that
diff --git a/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt b/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt
new file mode 100644
index 0000000..f7cc7c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/atmel-sama5d4-wdt.txt
@@ -0,0 +1,35 @@
+* Atmel SAMA5D4 Watchdog Timer (WDT) Controller
+
+Required properties:
+- compatible: "atmel,sama5d4-wdt"
+- reg: base physical address and length of memory mapped region.
+
+Optional properties:
+- timeout-sec: watchdog timeout value (in seconds).
+- interrupts: interrupt number to the CPU.
+- atmel,watchdog-type: should be "hardware" or "software".
+ "hardware": enable watchdog fault reset. A watchdog fault triggers
+ watchdog reset.
+ "software": enable watchdog fault interrupt. A watchdog fault asserts
+ watchdog interrupt.
+- atmel,idle-halt: present if you want to stop the watchdog when the CPU is
+ in idle state.
+ CAUTION: This property should be used with care, it actually makes the
+ watchdog not counting when the CPU is in idle state, therefore the
+ watchdog reset time depends on mean CPU usage and will not reset at all
+ if the CPU stop working while it is in idle state, which is probably
+ not what you want.
+- atmel,dbg-halt: present if you want to stop the watchdog when the CPU is
+ in debug state.
+
+Example:
+ watchdog@fc068640 {
+ compatible = "atmel,sama5d4-wdt";
+ reg = <0xfc068640 0x10>;
+ interrupts = <4 IRQ_TYPE_LEVEL_HIGH 5>;
+ timeout-sec = <10>;
+ atmel,watchdog-type = "hardware";
+ atmel,dbg-halt;
+ atmel,idle-halt;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/lpc18xx-wdt.txt b/Documentation/devicetree/bindings/watchdog/lpc18xx-wdt.txt
new file mode 100644
index 0000000..09f6b24
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/lpc18xx-wdt.txt
@@ -0,0 +1,19 @@
+* NXP LPC18xx Watchdog Timer (WDT)
+
+Required properties:
+- compatible: Should be "nxp,lpc1850-wwdt"
+- reg: Should contain WDT registers location and length
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Should contain "wdtclk" and "reg"; the watchdog counter
+ clock and register interface clock respectively.
+- interrupts: Should contain WDT interrupt
+
+Examples:
+
+watchdog@40080000 {
+ compatible = "nxp,lpc1850-wwdt";
+ reg = <0x40080000 0x24>;
+ clocks = <&cgu BASE_SAFE_CLK>, <&ccu1 CLK_CPU_WWDT>;
+ clock-names = "wdtclk", "reg";
+ interrupts = <49>;
+};
diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt
index b80606d..f59c43b 100644
--- a/Documentation/gpio/board.txt
+++ b/Documentation/gpio/board.txt
@@ -21,8 +21,8 @@
device tree bindings for your controller.
GPIOs mappings are defined in the consumer device's node, in a property named
-<function>-gpios, where <function> is the function the driver will request
-through gpiod_get(). For example:
+either <function>-gpios or <function>-gpio, where <function> is the function
+the driver will request through gpiod_get(). For example:
foo_device {
compatible = "acme,foo";
@@ -31,7 +31,7 @@
<&gpio 16 GPIO_ACTIVE_HIGH>, /* green */
<&gpio 17 GPIO_ACTIVE_HIGH>; /* blue */
- power-gpios = <&gpio 1 GPIO_ACTIVE_LOW>;
+ power-gpio = <&gpio 1 GPIO_ACTIVE_LOW>;
};
This property will make GPIOs 15, 16 and 17 available to the driver under the
@@ -39,15 +39,24 @@
struct gpio_desc *red, *green, *blue, *power;
- red = gpiod_get_index(dev, "led", 0);
- green = gpiod_get_index(dev, "led", 1);
- blue = gpiod_get_index(dev, "led", 2);
+ red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH);
+ green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH);
+ blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH);
- power = gpiod_get(dev, "power");
+ power = gpiod_get(dev, "power", GPIOD_OUT_HIGH);
The led GPIOs will be active-high, while the power GPIO will be active-low (i.e.
gpiod_is_active_low(power) will be true).
+The second parameter of the gpiod_get() functions, the con_id string, has to be
+the <function>-prefix of the GPIO suffixes ("gpios" or "gpio", automatically
+looked up by the gpiod functions internally) used in the device tree. With above
+"led-gpios" example, use the prefix without the "-" as con_id parameter: "led".
+
+Internally, the GPIO subsystem prefixes the GPIO suffix ("gpios" or "gpio")
+with the string passed in con_id to get the resulting string
+(snprintf(... "%s-%s", con_id, gpio_suffixes[]).
+
ACPI
----
ACPI also supports function names for GPIOs in a similar fashion to DT.
@@ -142,13 +151,14 @@
struct gpio_desc *red, *green, *blue, *power;
- red = gpiod_get_index(dev, "led", 0);
- green = gpiod_get_index(dev, "led", 1);
- blue = gpiod_get_index(dev, "led", 2);
+ red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH);
+ green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH);
+ blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH);
- power = gpiod_get(dev, "power");
- gpiod_direction_output(power, 1);
+ power = gpiod_get(dev, "power", GPIOD_OUT_HIGH);
-Since the "power" GPIO is mapped as active-low, its actual signal will be 0
-after this code. Contrary to the legacy integer GPIO interface, the active-low
-property is handled during mapping and is thus transparent to GPIO consumers.
+Since the "led" GPIOs are mapped as active-high, this example will switch their
+signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped
+as active-low, its actual signal will be 0 after this code. Contrary to the legacy
+integer GPIO interface, the active-low property is handled during mapping and is
+thus transparent to GPIO consumers.
diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt
index a206639..e000502 100644
--- a/Documentation/gpio/consumer.txt
+++ b/Documentation/gpio/consumer.txt
@@ -39,6 +39,9 @@
const char *con_id, unsigned int idx,
enum gpiod_flags flags)
+For a more detailed description of the con_id parameter in the DeviceTree case
+see Documentation/gpio/board.txt
+
The flags parameter is used to optionally specify a direction and initial value
for the GPIO. Values can be:
diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775
index f0dd3d2..76add4c 100644
--- a/Documentation/hwmon/nct6775
+++ b/Documentation/hwmon/nct6775
@@ -32,6 +32,10 @@
Prefix: 'nct6792'
Addresses scanned: ISA address retrieved from Super I/O registers
Datasheet: Available from Nuvoton upon request
+ * Nuvoton NCT6793D
+ Prefix: 'nct6793'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
Authors:
Guenter Roeck <linux@roeck-us.net>
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index f4cb0b2..477927b 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -15,8 +15,8 @@
DEFINE_STATIC_KEY_TRUE(key);
DEFINE_STATIC_KEY_FALSE(key);
-static_key_likely()
-statick_key_unlikely()
+static_branch_likely()
+static_branch_unlikely()
0) Abstract
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index c1f6864..10f062e 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -180,6 +180,7 @@
|---temp: Current temperature
|---mode: Working mode of the thermal zone
|---policy: Thermal governor used for this zone
+ |---available_policies: Available thermal governors for this zone
|---trip_point_[0-*]_temp: Trip point temperature
|---trip_point_[0-*]_type: Trip point type
|---trip_point_[0-*]_hyst: Hysteresis value for this trip point
@@ -256,6 +257,10 @@
One of the various thermal governors used for a particular zone.
RW, Required
+available_policies
+ Available thermal governors which can be used for a particular zone.
+ RO, Required
+
trip_point_[0-*]_temp
The temperature above which trip point will be fired.
Unit: millidegree Celsius
@@ -417,6 +422,7 @@
|---temp: 37000
|---mode: enabled
|---policy: step_wise
+ |---available_policies: step_wise fair_share
|---trip_point_0_temp: 100000
|---trip_point_0_type: critical
|---trip_point_1_temp: 80000
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a4ebcb7..d9eccee 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2671,7 +2671,7 @@
4.87 KVM_SET_GUEST_DEBUG
Capability: KVM_CAP_SET_GUEST_DEBUG
-Architectures: x86, s390, ppc
+Architectures: x86, s390, ppc, arm64
Type: vcpu ioctl
Parameters: struct kvm_guest_debug (in)
Returns: 0 on success; -1 on error
@@ -2693,8 +2693,8 @@
The top 16 bits of the control field are architecture specific control
flags which can include the following:
- - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86]
- - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
+ - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64]
+ - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64]
- KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
- KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
- KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
@@ -2709,6 +2709,11 @@
The second part of the structure is architecture specific and
typically contains a set of debug registers.
+For arm64 the number of debug registers is implementation defined and
+can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
+KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
+indicating the number of supported registers.
+
When debug events exit the main run loop with the reason
KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
structure containing architecture specific debug information.
@@ -3111,11 +3116,13 @@
where kvm expects application code to place the data for the next
KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array.
+ /* KVM_EXIT_DEBUG */
struct {
struct kvm_debug_exit_arch arch;
} debug;
-Unused.
+If the exit_reason is KVM_EXIT_DEBUG, then a vcpu is processing a debug event
+for which architecture specific information is returned.
/* KVM_EXIT_MMIO */
struct {
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 081c497..6a5e2a1 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -14,6 +14,8 @@
- a brief summary of hugetlbpage support in the Linux kernel.
hwpoison.txt
- explains what hwpoison is
+idle_page_tracking.txt
+ - description of the idle page tracking feature.
ksm.txt
- how to use the Kernel Samepage Merging feature.
numa
diff --git a/Documentation/vm/idle_page_tracking.txt b/Documentation/vm/idle_page_tracking.txt
new file mode 100644
index 0000000..85dcc3b
--- /dev/null
+++ b/Documentation/vm/idle_page_tracking.txt
@@ -0,0 +1,98 @@
+MOTIVATION
+
+The idle page tracking feature allows to track which memory pages are being
+accessed by a workload and which are idle. This information can be useful for
+estimating the workload's working set size, which, in turn, can be taken into
+account when configuring the workload parameters, setting memory cgroup limits,
+or deciding where to place the workload within a compute cluster.
+
+It is enabled by CONFIG_IDLE_PAGE_TRACKING=y.
+
+USER API
+
+The idle page tracking API is located at /sys/kernel/mm/page_idle. Currently,
+it consists of the only read-write file, /sys/kernel/mm/page_idle/bitmap.
+
+The file implements a bitmap where each bit corresponds to a memory page. The
+bitmap is represented by an array of 8-byte integers, and the page at PFN #i is
+mapped to bit #i%64 of array element #i/64, byte order is native. When a bit is
+set, the corresponding page is idle.
+
+A page is considered idle if it has not been accessed since it was marked idle
+(for more details on what "accessed" actually means see the IMPLEMENTATION
+DETAILS section). To mark a page idle one has to set the bit corresponding to
+the page by writing to the file. A value written to the file is OR-ed with the
+current bitmap value.
+
+Only accesses to user memory pages are tracked. These are pages mapped to a
+process address space, page cache and buffer pages, swap cache pages. For other
+page types (e.g. SLAB pages) an attempt to mark a page idle is silently ignored,
+and hence such pages are never reported idle.
+
+For huge pages the idle flag is set only on the head page, so one has to read
+/proc/kpageflags in order to correctly count idle huge pages.
+
+Reading from or writing to /sys/kernel/mm/page_idle/bitmap will return
+-EINVAL if you are not starting the read/write on an 8-byte boundary, or
+if the size of the read/write is not a multiple of 8 bytes. Writing to
+this file beyond max PFN will return -ENXIO.
+
+That said, in order to estimate the amount of pages that are not used by a
+workload one should:
+
+ 1. Mark all the workload's pages as idle by setting corresponding bits in
+ /sys/kernel/mm/page_idle/bitmap. The pages can be found by reading
+ /proc/pid/pagemap if the workload is represented by a process, or by
+ filtering out alien pages using /proc/kpagecgroup in case the workload is
+ placed in a memory cgroup.
+
+ 2. Wait until the workload accesses its working set.
+
+ 3. Read /sys/kernel/mm/page_idle/bitmap and count the number of bits set. If
+ one wants to ignore certain types of pages, e.g. mlocked pages since they
+ are not reclaimable, he or she can filter them out using /proc/kpageflags.
+
+See Documentation/vm/pagemap.txt for more information about /proc/pid/pagemap,
+/proc/kpageflags, and /proc/kpagecgroup.
+
+IMPLEMENTATION DETAILS
+
+The kernel internally keeps track of accesses to user memory pages in order to
+reclaim unreferenced pages first on memory shortage conditions. A page is
+considered referenced if it has been recently accessed via a process address
+space, in which case one or more PTEs it is mapped to will have the Accessed bit
+set, or marked accessed explicitly by the kernel (see mark_page_accessed()). The
+latter happens when:
+
+ - a userspace process reads or writes a page using a system call (e.g. read(2)
+ or write(2))
+
+ - a page that is used for storing filesystem buffers is read or written,
+ because a process needs filesystem metadata stored in it (e.g. lists a
+ directory tree)
+
+ - a page is accessed by a device driver using get_user_pages()
+
+When a dirty page is written to swap or disk as a result of memory reclaim or
+exceeding the dirty memory limit, it is not marked referenced.
+
+The idle memory tracking feature adds a new page flag, the Idle flag. This flag
+is set manually, by writing to /sys/kernel/mm/page_idle/bitmap (see the USER API
+section), and cleared automatically whenever a page is referenced as defined
+above.
+
+When a page is marked idle, the Accessed bit must be cleared in all PTEs it is
+mapped to, otherwise we will not be able to detect accesses to the page coming
+from a process address space. To avoid interference with the reclaimer, which,
+as noted above, uses the Accessed bit to promote actively referenced pages, one
+more page flag is introduced, the Young flag. When the PTE Accessed bit is
+cleared as a result of setting or updating a page's Idle flag, the Young flag
+is set on the page. The reclaimer treats the Young flag as an extra PTE
+Accessed bit and therefore will consider such a page as referenced.
+
+Since the idle memory tracking feature is based on the memory reclaimer logic,
+it only works with pages that are on an LRU list, other pages are silently
+ignored. That means it will ignore a user memory page if it is isolated, but
+since there are usually not many of them, it should not affect the overall
+result noticeably. In order not to stall scanning of the idle page bitmap,
+locked pages may be skipped too.
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index 3cd3843..0e1e555 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -5,7 +5,7 @@
userspace programs to examine the page tables and related information by
reading files in /proc.
-There are three components to pagemap:
+There are four components to pagemap:
* /proc/pid/pagemap. This file lets a userspace process find out which
physical frame each virtual page is mapped to. It contains one 64-bit
@@ -70,6 +70,11 @@
22. THP
23. BALLOON
24. ZERO_PAGE
+ 25. IDLE
+
+ * /proc/kpagecgroup. This file contains a 64-bit inode number of the
+ memory cgroup each page is charged to, indexed by PFN. Only available when
+ CONFIG_MEMCG is set.
Short descriptions to the page flags:
@@ -116,6 +121,12 @@
24. ZERO_PAGE
zero page for pfn_zero or huge_zero page
+25. IDLE
+ page has not been accessed since it was marked idle (see
+ Documentation/vm/idle_page_tracking.txt). Note that this flag may be
+ stale in case the page was accessed via a PTE. To make sure the flag
+ is up-to-date one has to read /sys/kernel/mm/page_idle/bitmap first.
+
[IO related page flags]
1. ERROR IO error occurred
3. UPTODATE page has up-to-date data
diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt
index 8458c08..89fff7d 100644
--- a/Documentation/vm/zswap.txt
+++ b/Documentation/vm/zswap.txt
@@ -32,7 +32,7 @@
An example command to enable zswap at runtime, assuming sysfs is mounted
at /sys, is:
-echo 1 > /sys/modules/zswap/parameters/enabled
+echo 1 > /sys/module/zswap/parameters/enabled
When zswap is disabled at runtime it will stop storing pages that are
being swapped out. However, it will _not_ immediately write out or fault
@@ -49,14 +49,26 @@
evict pages from its own compressed pool on an LRU basis and write them back to
the backing swap device in the case that the compressed pool is full.
-Zswap makes use of zbud for the managing the compressed memory pool. Each
-allocation in zbud is not directly accessible by address. Rather, a handle is
+Zswap makes use of zpool for the managing the compressed memory pool. Each
+allocation in zpool is not directly accessible by address. Rather, a handle is
returned by the allocation routine and that handle must be mapped before being
accessed. The compressed memory pool grows on demand and shrinks as compressed
-pages are freed. The pool is not preallocated.
+pages are freed. The pool is not preallocated. By default, a zpool of type
+zbud is created, but it can be selected at boot time by setting the "zpool"
+attribute, e.g. zswap.zpool=zbud. It can also be changed at runtime using the
+sysfs "zpool" attribute, e.g.
+
+echo zbud > /sys/module/zswap/parameters/zpool
+
+The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which
+means the compression ratio will always be 2:1 or worse (because of half-full
+zbud pages). The zsmalloc type zpool has a more complex compressed page
+storage method, and it can achieve greater storage densities. However,
+zsmalloc does not implement compressed page eviction, so once zswap fills it
+cannot evict the oldest page, it can only reject new pages.
When a swap page is passed from frontswap to zswap, zswap maintains a mapping
-of the swap entry, a combination of the swap type and swap offset, to the zbud
+of the swap entry, a combination of the swap type and swap offset, to the zpool
handle that references that compressed swap page. This mapping is achieved
with a red-black tree per swap type. The swap offset is the search key for the
tree nodes.
@@ -74,9 +86,17 @@
* max_pool_percent - The maximum percentage of memory that the compressed
pool can occupy.
-Zswap allows the compressor to be selected at kernel boot time by setting the
-“compressor” attribute. The default compressor is lzo. e.g.
-zswap.compressor=deflate
+The default compressor is lzo, but it can be selected at boot time by setting
+the “compressor” attribute, e.g. zswap.compressor=lzo. It can also be changed
+at runtime using the sysfs "compressor" attribute, e.g.
+
+echo lzo > /sys/module/zswap/parameters/compressor
+
+When the zpool and/or compressor parameter is changed at runtime, any existing
+compressed pages are not modified; they are left in their own zpool. When a
+request is made for a page in an old zpool, it is uncompressed using its
+original compressor. Once all pages are removed from an old zpool, the zpool
+and its compressor are freed.
A debugfs interface is provided for various statistic about pool size, number
of pages stored, and various counters for the reasons pages are rejected.
diff --git a/Documentation/watchdog/src/watchdog-test.c b/Documentation/watchdog/src/watchdog-test.c
index 3da8229..fcdde8f 100644
--- a/Documentation/watchdog/src/watchdog-test.c
+++ b/Documentation/watchdog/src/watchdog-test.c
@@ -41,6 +41,7 @@
int main(int argc, char *argv[])
{
int flags;
+ unsigned int ping_rate = 1;
fd = open("/dev/watchdog", O_WRONLY);
@@ -63,22 +64,33 @@
fprintf(stderr, "Watchdog card enabled.\n");
fflush(stderr);
goto end;
+ } else if (!strncasecmp(argv[1], "-t", 2) && argv[2]) {
+ flags = atoi(argv[2]);
+ ioctl(fd, WDIOC_SETTIMEOUT, &flags);
+ fprintf(stderr, "Watchdog timeout set to %u seconds.\n", flags);
+ fflush(stderr);
+ goto end;
+ } else if (!strncasecmp(argv[1], "-p", 2) && argv[2]) {
+ ping_rate = strtoul(argv[2], NULL, 0);
+ fprintf(stderr, "Watchdog ping rate set to %u seconds.\n", ping_rate);
+ fflush(stderr);
} else {
- fprintf(stderr, "-d to disable, -e to enable.\n");
+ fprintf(stderr, "-d to disable, -e to enable, -t <n> to set " \
+ "the timeout,\n-p <n> to set the ping rate, and \n");
fprintf(stderr, "run by itself to tick the card.\n");
fflush(stderr);
goto end;
}
- } else {
- fprintf(stderr, "Watchdog Ticking Away!\n");
- fflush(stderr);
}
+ fprintf(stderr, "Watchdog Ticking Away!\n");
+ fflush(stderr);
+
signal(SIGINT, term);
while(1) {
keep_alive();
- sleep(1);
+ sleep(ping_rate);
}
end:
close(fd);
diff --git a/MAINTAINERS b/MAINTAINERS
index 67a4443..7ba7ab7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6789,6 +6789,14 @@
Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlxsw/
+MEMBARRIER SUPPORT
+M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+L: linux-kernel@vger.kernel.org
+S: Supported
+F: kernel/membarrier.c
+F: include/uapi/linux/membarrier.h
+
MEMORY MANAGEMENT
L: linux-mm@kvack.org
W: http://www.linux-mm.org
@@ -7396,6 +7404,7 @@
M: Jon Mason <jdmason@kudzu.us>
M: Dave Jiang <dave.jiang@intel.com>
M: Allen Hubbe <Allen.Hubbe@emc.com>
+L: linux-ntb@googlegroups.com
S: Supported
W: https://github.com/jonmason/ntb/wiki
T: git git://github.com/jonmason/ntb.git
@@ -7407,6 +7416,7 @@
NTB INTEL DRIVER
M: Jon Mason <jdmason@kudzu.us>
M: Dave Jiang <dave.jiang@intel.com>
+L: linux-ntb@googlegroups.com
S: Supported
W: https://github.com/jonmason/ntb/wiki
T: git git://github.com/jonmason/ntb.git
@@ -8199,10 +8209,9 @@
F: include/linux/i2c/pmbus.h
PMC SIERRA MaxRAID DRIVER
-M: Anil Ravindranath <anil_ravindranath@pmc-sierra.com>
L: linux-scsi@vger.kernel.org
W: http://www.pmc-sierra.com/
-S: Supported
+S: Orphan
F: drivers/scsi/pmcraid.*
PMC SIERRA PM8001 DRIVER
diff --git a/Makefile b/Makefile
index f2d2706..1a132ea 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
-PATCHLEVEL = 2
+PATCHLEVEL = 3
SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
NAME = Hurr durr I'ma sheep
# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index 8f35649..4e949e5 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -2,6 +2,9 @@
# General architecture dependent options
#
+config KEXEC_CORE
+ bool
+
config OPROFILE
tristate "OProfile system profiling"
depends on PROFILING
diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h
index dfa32f0..72a8ca7 100644
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -12,42 +12,6 @@
#include <asm-generic/dma-mapping-common.h>
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- struct dma_attrs *attrs)
-{
- return get_dma_ops(dev)->alloc(dev, size, dma_handle, gfp, attrs);
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- get_dma_ops(dev)->free(dev, size, vaddr, dma_handle, attrs);
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return get_dma_ops(dev)->mapping_error(dev, dma_addr);
-}
-
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- return get_dma_ops(dev)->dma_supported(dev, mask);
-}
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
- return get_dma_ops(dev)->set_dma_mask(dev, mask);
-}
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
#define dma_cache_sync(dev, va, size, dir) ((void)0)
#endif /* _ALPHA_DMA_MAPPING_H */
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index df24b76..2b1f4a1 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -166,15 +166,6 @@
return mask < 0x00ffffffUL ? 0 : 1;
}
-static int alpha_noop_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
-
- *dev->dma_mask = mask;
- return 0;
-}
-
struct dma_map_ops alpha_noop_ops = {
.alloc = alpha_noop_alloc_coherent,
.free = alpha_noop_free_coherent,
@@ -182,7 +173,6 @@
.map_sg = alpha_noop_map_sg,
.mapping_error = alpha_noop_mapping_error,
.dma_supported = alpha_noop_supported,
- .set_dma_mask = alpha_noop_set_mask,
};
struct dma_map_ops *dma_ops = &alpha_noop_ops;
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index eddee77..8969bf2 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -939,16 +939,6 @@
return dma_addr == 0;
}
-static int alpha_pci_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask ||
- !pci_dma_supported(alpha_gendev_to_pci(dev), mask))
- return -EIO;
-
- *dev->dma_mask = mask;
- return 0;
-}
-
struct dma_map_ops alpha_pci_ops = {
.alloc = alpha_pci_alloc_coherent,
.free = alpha_pci_free_coherent,
@@ -958,7 +948,6 @@
.unmap_sg = alpha_pci_unmap_sg,
.mapping_error = alpha_pci_mapping_error,
.dma_supported = alpha_pci_supported,
- .set_dma_mask = alpha_pci_set_mask,
};
struct dma_map_ops *dma_ops = &alpha_pci_ops;
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index ad9825d..0a77b19 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -402,6 +402,8 @@
unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
if (num_cores > 2)
arc_set_core_freq(50 * 1000000);
+ else if (num_cores == 2)
+ arc_set_core_freq(75 * 1000000);
#endif
switch (arc_get_core_freq()/1000000) {
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0d1b717..72ad724 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2020,6 +2020,7 @@
bool "Kexec system call (EXPERIMENTAL)"
depends on (!SMP || PM_SLEEP_SMP)
depends on !CPU_V7M
+ select KEXEC_CORE
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 7451b44..2c2b28e 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -54,6 +54,14 @@
LD += -EL
endif
+#
+# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and
+# later may result in code being generated that handles signed short and signed
+# char struct members incorrectly. So disable it.
+# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932)
+#
+KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra)
+
# This selects which instruction set is used.
# Note that GCC does not numerically define an architecture version
# macro, but instead defines a whole series of macros which makes
diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index bd245d3..a0765e7 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c
@@ -57,5 +57,5 @@
int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x))
{
- return decompress(input, len, NULL, NULL, output, NULL, error);
+ return __decompress(input, len, NULL, NULL, output, 0, NULL, error);
}
diff --git a/arch/arm/boot/dts/exynos3250-monk.dts b/arch/arm/boot/dts/exynos3250-monk.dts
index a5863ac..540a0ad 100644
--- a/arch/arm/boot/dts/exynos3250-monk.dts
+++ b/arch/arm/boot/dts/exynos3250-monk.dts
@@ -116,6 +116,21 @@
min-microvolt = <1100000>;
max-microvolt = <2700000>;
};
+
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ cooling-maps {
+ map0 {
+ /* Correspond to 500MHz at freq_table */
+ cooling-device = <&cpu0 5 5>;
+ };
+ map1 {
+ /* Correspond to 200MHz at freq_table */
+ cooling-device = <&cpu0 8 8>;
+ };
+ };
+ };
+ };
};
&adc {
@@ -141,6 +156,10 @@
};
};
+&cpu0 {
+ cpu0-supply = <&buck2_reg>;
+};
+
&exynos_usbphy {
status = "okay";
};
diff --git a/arch/arm/boot/dts/exynos3250-rinato.dts b/arch/arm/boot/dts/exynos3250-rinato.dts
index baa9b2f..41a5faf 100644
--- a/arch/arm/boot/dts/exynos3250-rinato.dts
+++ b/arch/arm/boot/dts/exynos3250-rinato.dts
@@ -107,6 +107,21 @@
min-microvolt = <1100000>;
max-microvolt = <2700000>;
};
+
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ cooling-maps {
+ map0 {
+ /* Corresponds to 500MHz */
+ cooling-device = <&cpu0 5 5>;
+ };
+ map1 {
+ /* Corresponds to 200MHz */
+ cooling-device = <&cpu0 8 8>;
+ };
+ };
+ };
+ };
};
&adc {
@@ -132,6 +147,10 @@
};
};
+&cpu0 {
+ cpu0-supply = <&buck2_reg>;
+};
+
&exynos_usbphy {
status = "okay";
};
diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi
index 2db9943..033def4 100644
--- a/arch/arm/boot/dts/exynos3250.dtsi
+++ b/arch/arm/boot/dts/exynos3250.dtsi
@@ -53,6 +53,22 @@
compatible = "arm,cortex-a7";
reg = <0>;
clock-frequency = <1000000000>;
+ clocks = <&cmu CLK_ARM_CLK>;
+ clock-names = "cpu";
+ #cooling-cells = <2>;
+
+ operating-points = <
+ 1000000 1150000
+ 900000 1112500
+ 800000 1075000
+ 700000 1037500
+ 600000 1000000
+ 500000 962500
+ 400000 925000
+ 300000 887500
+ 200000 850000
+ 100000 850000
+ >;
};
cpu1: cpu@1 {
diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
index b0d52b1..98c0a36 100644
--- a/arch/arm/boot/dts/exynos4.dtsi
+++ b/arch/arm/boot/dts/exynos4.dtsi
@@ -702,6 +702,7 @@
clocks = <&clock CLK_JPEG>;
clock-names = "jpeg";
power-domains = <&pd_cam>;
+ iommus = <&sysmmu_jpeg>;
};
hdmi: hdmi@12D00000 {
diff --git a/arch/arm/boot/dts/exynos4212.dtsi b/arch/arm/boot/dts/exynos4212.dtsi
index d9c8efee..5389011 100644
--- a/arch/arm/boot/dts/exynos4212.dtsi
+++ b/arch/arm/boot/dts/exynos4212.dtsi
@@ -30,6 +30,9 @@
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0xA00>;
+ clocks = <&clock CLK_ARM_CLK>;
+ clock-names = "cpu";
+ operating-points-v2 = <&cpu0_opp_table>;
cooling-min-level = <13>;
cooling-max-level = <7>;
#cooling-cells = <2>; /* min followed by max */
@@ -39,6 +42,84 @@
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0xA01>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ };
+ };
+
+ cpu0_opp_table: opp_table0 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp00 {
+ opp-hz = /bits/ 64 <200000000>;
+ opp-microvolt = <900000>;
+ clock-latency-ns = <200000>;
+ };
+ opp01 {
+ opp-hz = /bits/ 64 <300000000>;
+ opp-microvolt = <900000>;
+ clock-latency-ns = <200000>;
+ };
+ opp02 {
+ opp-hz = /bits/ 64 <400000000>;
+ opp-microvolt = <925000>;
+ clock-latency-ns = <200000>;
+ };
+ opp03 {
+ opp-hz = /bits/ 64 <500000000>;
+ opp-microvolt = <950000>;
+ clock-latency-ns = <200000>;
+ };
+ opp04 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-microvolt = <975000>;
+ clock-latency-ns = <200000>;
+ };
+ opp05 {
+ opp-hz = /bits/ 64 <700000000>;
+ opp-microvolt = <987500>;
+ clock-latency-ns = <200000>;
+ };
+ opp06 {
+ opp-hz = /bits/ 64 <800000000>;
+ opp-microvolt = <1000000>;
+ clock-latency-ns = <200000>;
+ };
+ opp07 {
+ opp-hz = /bits/ 64 <900000000>;
+ opp-microvolt = <1037500>;
+ clock-latency-ns = <200000>;
+ };
+ opp08 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <1087500>;
+ clock-latency-ns = <200000>;
+ };
+ opp09 {
+ opp-hz = /bits/ 64 <1100000000>;
+ opp-microvolt = <1137500>;
+ clock-latency-ns = <200000>;
+ };
+ opp10 {
+ opp-hz = /bits/ 64 <1200000000>;
+ opp-microvolt = <1187500>;
+ clock-latency-ns = <200000>;
+ };
+ opp11 {
+ opp-hz = /bits/ 64 <1300000000>;
+ opp-microvolt = <1250000>;
+ clock-latency-ns = <200000>;
+ };
+ opp12 {
+ opp-hz = /bits/ 64 <1400000000>;
+ opp-microvolt = <1287500>;
+ clock-latency-ns = <200000>;
+ };
+ opp13 {
+ opp-hz = /bits/ 64 <1500000000>;
+ opp-microvolt = <1350000>;
+ clock-latency-ns = <200000>;
+ turbo-mode;
};
};
};
diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
index ca7d168..db52841 100644
--- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
+++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
@@ -107,6 +107,10 @@
};
};
+&cpu0 {
+ cpu0-supply = <&buck2_reg>;
+};
+
/* RSTN signal for eMMC */
&sd1_cd {
samsung,pin-pud = <0>;
diff --git a/arch/arm/boot/dts/exynos4412-odroidu3.dts b/arch/arm/boot/dts/exynos4412-odroidu3.dts
index 44684e5..8632f35 100644
--- a/arch/arm/boot/dts/exynos4412-odroidu3.dts
+++ b/arch/arm/boot/dts/exynos4412-odroidu3.dts
@@ -13,6 +13,7 @@
/dts-v1/;
#include "exynos4412-odroid-common.dtsi"
+#include <dt-bindings/gpio/gpio.h>
/ {
model = "Hardkernel ODROID-U3 board based on Exynos4412";
@@ -61,3 +62,10 @@
"Speakers", "SPKL",
"Speakers", "SPKR";
};
+
+&spi_1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi1_bus>;
+ cs-gpios = <&gpb 5 GPIO_ACTIVE_HIGH>;
+ status = "okay";
+};
diff --git a/arch/arm/boot/dts/exynos4412-origen.dts b/arch/arm/boot/dts/exynos4412-origen.dts
index 84c7631..9d528af 100644
--- a/arch/arm/boot/dts/exynos4412-origen.dts
+++ b/arch/arm/boot/dts/exynos4412-origen.dts
@@ -78,6 +78,10 @@
};
};
+&cpu0 {
+ cpu0-supply = <&buck2_reg>;
+};
+
&fimd {
pinctrl-0 = <&lcd_clk &lcd_data24 &pwm1_out>;
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts
index 8848400..2a1ebb7 100644
--- a/arch/arm/boot/dts/exynos4412-trats2.dts
+++ b/arch/arm/boot/dts/exynos4412-trats2.dts
@@ -288,6 +288,10 @@
status = "okay";
};
+&cpu0 {
+ cpu0-supply = <&buck2_reg>;
+};
+
&csis_0 {
status = "okay";
vddcore-supply = <&ldo8_reg>;
diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi
index b78ada7..ca0e3c1 100644
--- a/arch/arm/boot/dts/exynos4412.dtsi
+++ b/arch/arm/boot/dts/exynos4412.dtsi
@@ -30,6 +30,9 @@
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0xA00>;
+ clocks = <&clock CLK_ARM_CLK>;
+ clock-names = "cpu";
+ operating-points-v2 = <&cpu0_opp_table>;
cooling-min-level = <13>;
cooling-max-level = <7>;
#cooling-cells = <2>; /* min followed by max */
@@ -39,18 +42,98 @@
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0xA01>;
+ operating-points-v2 = <&cpu0_opp_table>;
};
cpu@A02 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0xA02>;
+ operating-points-v2 = <&cpu0_opp_table>;
};
cpu@A03 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0xA03>;
+ operating-points-v2 = <&cpu0_opp_table>;
+ };
+ };
+
+ cpu0_opp_table: opp_table0 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp00 {
+ opp-hz = /bits/ 64 <200000000>;
+ opp-microvolt = <900000>;
+ clock-latency-ns = <200000>;
+ };
+ opp01 {
+ opp-hz = /bits/ 64 <300000000>;
+ opp-microvolt = <900000>;
+ clock-latency-ns = <200000>;
+ };
+ opp02 {
+ opp-hz = /bits/ 64 <400000000>;
+ opp-microvolt = <925000>;
+ clock-latency-ns = <200000>;
+ };
+ opp03 {
+ opp-hz = /bits/ 64 <500000000>;
+ opp-microvolt = <950000>;
+ clock-latency-ns = <200000>;
+ };
+ opp04 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-microvolt = <975000>;
+ clock-latency-ns = <200000>;
+ };
+ opp05 {
+ opp-hz = /bits/ 64 <700000000>;
+ opp-microvolt = <987500>;
+ clock-latency-ns = <200000>;
+ };
+ opp06 {
+ opp-hz = /bits/ 64 <800000000>;
+ opp-microvolt = <1000000>;
+ clock-latency-ns = <200000>;
+ };
+ opp07 {
+ opp-hz = /bits/ 64 <900000000>;
+ opp-microvolt = <1037500>;
+ clock-latency-ns = <200000>;
+ };
+ opp08 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <1087500>;
+ clock-latency-ns = <200000>;
+ };
+ opp09 {
+ opp-hz = /bits/ 64 <1100000000>;
+ opp-microvolt = <1137500>;
+ clock-latency-ns = <200000>;
+ };
+ opp10 {
+ opp-hz = /bits/ 64 <1200000000>;
+ opp-microvolt = <1187500>;
+ clock-latency-ns = <200000>;
+ };
+ opp11 {
+ opp-hz = /bits/ 64 <1300000000>;
+ opp-microvolt = <1250000>;
+ clock-latency-ns = <200000>;
+ };
+ opp12 {
+ opp-hz = /bits/ 64 <1400000000>;
+ opp-microvolt = <1287500>;
+ clock-latency-ns = <200000>;
+ };
+ opp13 {
+ opp-hz = /bits/ 64 <1500000000>;
+ opp-microvolt = <1350000>;
+ clock-latency-ns = <200000>;
+ turbo-mode;
};
};
diff --git a/arch/arm/boot/dts/exynos5250-arndale.dts b/arch/arm/boot/dts/exynos5250-arndale.dts
index 7e728a1..db3f65f 100644
--- a/arch/arm/boot/dts/exynos5250-arndale.dts
+++ b/arch/arm/boot/dts/exynos5250-arndale.dts
@@ -117,6 +117,10 @@
};
};
+&cpu0 {
+ cpu0-supply = <&buck2_reg>;
+};
+
&dp {
status = "okay";
samsung,color-space = <0>;
diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts
index 4fe186d..15aea76 100644
--- a/arch/arm/boot/dts/exynos5250-smdk5250.dts
+++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts
@@ -74,6 +74,10 @@
};
};
+&cpu0 {
+ cpu0-supply = <&buck2_reg>;
+};
+
&dp {
samsung,color-space = <0>;
samsung,dynamic-range = <0>;
diff --git a/arch/arm/boot/dts/exynos5250-snow.dts b/arch/arm/boot/dts/exynos5250-snow.dts
index b7f4122..0720caa 100644
--- a/arch/arm/boot/dts/exynos5250-snow.dts
+++ b/arch/arm/boot/dts/exynos5250-snow.dts
@@ -235,6 +235,10 @@
};
};
+&cpu0 {
+ cpu0-supply = <&buck2_reg>;
+};
+
&dp {
status = "okay";
pinctrl-names = "default";
@@ -688,6 +692,7 @@
status = "okay";
samsung,spi-src-clk = <0>;
num-cs = <1>;
+ cs-gpios = <&gpa2 5 GPIO_ACTIVE_HIGH>;
};
&usbdrd_dwc3 {
diff --git a/arch/arm/boot/dts/exynos5250-spring.dts b/arch/arm/boot/dts/exynos5250-spring.dts
index d03f9b8..c1edd6d 100644
--- a/arch/arm/boot/dts/exynos5250-spring.dts
+++ b/arch/arm/boot/dts/exynos5250-spring.dts
@@ -65,6 +65,10 @@
};
};
+&cpu0 {
+ cpu0-supply = <&buck2_reg>;
+};
+
&dp {
status = "okay";
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 4a1f883..b24610e 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -62,6 +62,28 @@
compatible = "arm,cortex-a15";
reg = <0>;
clock-frequency = <1700000000>;
+ clocks = <&clock CLK_ARM_CLK>;
+ clock-names = "cpu";
+ clock-latency = <140000>;
+
+ operating-points = <
+ 1700000 1300000
+ 1600000 1250000
+ 1500000 1225000
+ 1400000 1200000
+ 1300000 1150000
+ 1200000 1125000
+ 1100000 1100000
+ 1000000 1075000
+ 900000 1050000
+ 800000 1025000
+ 700000 1012500
+ 600000 1000000
+ 500000 975000
+ 400000 950000
+ 300000 937500
+ 200000 925000
+ >;
cooling-min-level = <15>;
cooling-max-level = <9>;
#cooling-cells = <2>; /* min followed by max */
diff --git a/arch/arm/boot/dts/exynos5422-cpus.dtsi b/arch/arm/boot/dts/exynos5422-cpus.dtsi
new file mode 100644
index 0000000..b7f60c8
--- /dev/null
+++ b/arch/arm/boot/dts/exynos5422-cpus.dtsi
@@ -0,0 +1,81 @@
+/*
+ * SAMSUNG EXYNOS5422 SoC cpu device tree source
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com
+ *
+ * The only difference between EXYNOS5422 and EXYNOS5800 is cpu ordering. The
+ * EXYNOS5422 is booting from Cortex-A7 core while the EXYNOS5800 is booting
+ * from Cortex-A15 core.
+ *
+ * EXYNOS5422 based board files can include this file to provide cpu ordering
+ * which could boot a cortex-a7 from cpu0.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&cpu0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x100>;
+ clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control0>;
+};
+
+&cpu1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x101>;
+ clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control0>;
+};
+
+&cpu2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x102>;
+ clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control0>;
+};
+
+&cpu3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x103>;
+ clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control0>;
+};
+
+&cpu4 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x0>;
+ clock-frequency = <1800000000>;
+ cci-control-port = <&cci_control1>;
+};
+
+&cpu5 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x1>;
+ clock-frequency = <1800000000>;
+ cci-control-port = <&cci_control1>;
+};
+
+&cpu6 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x2>;
+ clock-frequency = <1800000000>;
+ cci-control-port = <&cci_control1>;
+};
+
+&cpu7 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x3>;
+ clock-frequency = <1800000000>;
+ cci-control-port = <&cci_control1>;
+};
diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
index 1565667..79ffdfe 100644
--- a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
+++ b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
@@ -15,6 +15,7 @@
#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/sound/samsung-i2s.h>
#include "exynos5800.dtsi"
+#include "exynos5422-cpus.dtsi"
#include "exynos5422-cpu-thermal.dtsi"
/ {
diff --git a/arch/arm/boot/dts/qcom-apq8064-cm-qs600.dts b/arch/arm/boot/dts/qcom-apq8064-cm-qs600.dts
index 34ccb26..47c0282 100644
--- a/arch/arm/boot/dts/qcom-apq8064-cm-qs600.dts
+++ b/arch/arm/boot/dts/qcom-apq8064-cm-qs600.dts
@@ -4,6 +4,14 @@
model = "CompuLab CM-QS600";
compatible = "qcom,apq8064-cm-qs600", "qcom,apq8064";
+ aliases {
+ serial0 = &gsbi7_serial;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
soc {
pinctrl@800000 {
i2c1_pins: i2c1 {
diff --git a/arch/arm/boot/dts/qcom-apq8064-ifc6410.dts b/arch/arm/boot/dts/qcom-apq8064-ifc6410.dts
index 88d6655..f3100da 100644
--- a/arch/arm/boot/dts/qcom-apq8064-ifc6410.dts
+++ b/arch/arm/boot/dts/qcom-apq8064-ifc6410.dts
@@ -10,6 +10,10 @@
serial1 = &gsbi6_serial;
};
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
soc {
pinctrl@800000 {
card_detect: card_detect {
diff --git a/arch/arm/boot/dts/qcom-apq8074-dragonboard.dts b/arch/arm/boot/dts/qcom-apq8074-dragonboard.dts
index d484d08..835bdc7 100644
--- a/arch/arm/boot/dts/qcom-apq8074-dragonboard.dts
+++ b/arch/arm/boot/dts/qcom-apq8074-dragonboard.dts
@@ -6,6 +6,14 @@
model = "Qualcomm APQ8074 Dragonboard";
compatible = "qcom,apq8074-dragonboard", "qcom,apq8074";
+ aliases {
+ serial0 = &blsp1_uart2;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
soc {
serial@f991e000 {
status = "ok";
diff --git a/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts b/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts
index f7725b9..c9c2b76 100644
--- a/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts
+++ b/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts
@@ -5,6 +5,14 @@
model = "Qualcomm APQ8084/IFC6540";
compatible = "qcom,apq8084-ifc6540", "qcom,apq8084";
+ aliases {
+ serial0 = &blsp2_uart2;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
soc {
serial@f995e000 {
status = "okay";
diff --git a/arch/arm/boot/dts/qcom-apq8084-mtp.dts b/arch/arm/boot/dts/qcom-apq8084-mtp.dts
index cb43acf..3016c70 100644
--- a/arch/arm/boot/dts/qcom-apq8084-mtp.dts
+++ b/arch/arm/boot/dts/qcom-apq8084-mtp.dts
@@ -5,6 +5,14 @@
model = "Qualcomm APQ 8084-MTP";
compatible = "qcom,apq8084-mtp", "qcom,apq8084";
+ aliases {
+ serial0 = &blsp2_uart2;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
soc {
serial@f995e000 {
status = "okay";
diff --git a/arch/arm/boot/dts/qcom-apq8084.dtsi b/arch/arm/boot/dts/qcom-apq8084.dtsi
index 7084010..0554fbd 100644
--- a/arch/arm/boot/dts/qcom-apq8084.dtsi
+++ b/arch/arm/boot/dts/qcom-apq8084.dtsi
@@ -234,7 +234,7 @@
interrupts = <0 208 0>;
};
- serial@f995e000 {
+ blsp2_uart2: serial@f995e000 {
compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
reg = <0xf995e000 0x1000>;
interrupts = <0 114 0x0>;
diff --git a/arch/arm/boot/dts/qcom-ipq8064-ap148.dts b/arch/arm/boot/dts/qcom-ipq8064-ap148.dts
index 55b2910..d501382 100644
--- a/arch/arm/boot/dts/qcom-ipq8064-ap148.dts
+++ b/arch/arm/boot/dts/qcom-ipq8064-ap148.dts
@@ -4,6 +4,14 @@
model = "Qualcomm IPQ8064/AP148";
compatible = "qcom,ipq8064-ap148", "qcom,ipq8064";
+ aliases {
+ serial0 = &gsbi4_serial;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
reserved-memory {
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/qcom-ipq8064.dtsi b/arch/arm/boot/dts/qcom-ipq8064.dtsi
index 9f727d8..fa69863 100644
--- a/arch/arm/boot/dts/qcom-ipq8064.dtsi
+++ b/arch/arm/boot/dts/qcom-ipq8064.dtsi
@@ -197,7 +197,7 @@
syscon-tcsr = <&tcsr>;
- serial@16340000 {
+ gsbi4_serial: serial@16340000 {
compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
reg = <0x16340000 0x1000>,
<0x16300000 0x1000>;
diff --git a/arch/arm/boot/dts/qcom-msm8660-surf.dts b/arch/arm/boot/dts/qcom-msm8660-surf.dts
index e0883c3..b17f379 100644
--- a/arch/arm/boot/dts/qcom-msm8660-surf.dts
+++ b/arch/arm/boot/dts/qcom-msm8660-surf.dts
@@ -6,6 +6,14 @@
model = "Qualcomm MSM8660 SURF";
compatible = "qcom,msm8660-surf", "qcom,msm8660";
+ aliases {
+ serial0 = &gsbi12_serial;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
soc {
gsbi@19c00000 {
status = "ok";
diff --git a/arch/arm/boot/dts/qcom-msm8660.dtsi b/arch/arm/boot/dts/qcom-msm8660.dtsi
index ef2fe72..e5f7f33 100644
--- a/arch/arm/boot/dts/qcom-msm8660.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8660.dtsi
@@ -98,7 +98,7 @@
syscon-tcsr = <&tcsr>;
- serial@19c40000 {
+ gsbi12_serial: serial@19c40000 {
compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
reg = <0x19c40000 0x1000>,
<0x19c00000 0x1000>;
diff --git a/arch/arm/boot/dts/qcom-msm8960-cdp.dts b/arch/arm/boot/dts/qcom-msm8960-cdp.dts
index fad71d5..b72a554 100644
--- a/arch/arm/boot/dts/qcom-msm8960-cdp.dts
+++ b/arch/arm/boot/dts/qcom-msm8960-cdp.dts
@@ -6,6 +6,14 @@
model = "Qualcomm MSM8960 CDP";
compatible = "qcom,msm8960-cdp", "qcom,msm8960";
+ aliases {
+ serial0 = &gsbi5_serial;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
soc {
gsbi@16400000 {
status = "ok";
diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom-msm8960.dtsi
index 2096a94..134cd91 100644
--- a/arch/arm/boot/dts/qcom-msm8960.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8960.dtsi
@@ -157,7 +157,7 @@
syscon-tcsr = <&tcsr>;
- serial@16440000 {
+ gsbi5_serial: serial@16440000 {
compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
reg = <0x16440000 0x1000>,
<0x16400000 0x1000>;
diff --git a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts
index 9bc72a3..016f9ad 100644
--- a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts
+++ b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts
@@ -6,6 +6,14 @@
model = "Sony Xperia Z1";
compatible = "sony,xperia-honami", "qcom,msm8974";
+ aliases {
+ serial0 = &blsp1_uart2;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
memory@0 {
reg = <0 0x40000000>, <0x40000000 0x40000000>;
device_type = "memory";
diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi
index d7c99b8..ab8e572 100644
--- a/arch/arm/boot/dts/qcom-msm8974.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8974.dtsi
@@ -259,7 +259,7 @@
hwlocks = <&tcsr_mutex 3>;
};
- serial@f991e000 {
+ blsp1_uart2: serial@f991e000 {
compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
reg = <0xf991e000 0x1000>;
interrupts = <0 108 0x0>;
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 3eaf8fb..1ff2bfa 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -27,6 +27,8 @@
CONFIG_ARM_ATAG_DTB_COMPAT=y
CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init=/linuxrc mem=256M"
CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPUFREQ_DT=y
CONFIG_CPU_IDLE=y
CONFIG_ARM_EXYNOS_CPUIDLE=y
CONFIG_VFP=y
@@ -94,6 +96,7 @@
CONFIG_CHARGER_MAX77693=y
CONFIG_CHARGER_TPS65090=y
CONFIG_SENSORS_LM90=y
+CONFIG_SENSORS_NTC_THERMISTOR=y
CONFIG_SENSORS_PWM_FAN=y
CONFIG_SENSORS_INA2XX=y
CONFIG_THERMAL=y
@@ -144,6 +147,8 @@
CONFIG_SND_SOC=y
CONFIG_SND_SOC_SAMSUNG=y
CONFIG_SND_SOC_SNOW=y
+CONFIG_SND_SOC_ODROIDX2=y
+CONFIG_SND_SIMPLE_CARD=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_XHCI_HCD=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index f84471d..03deb7f 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -362,6 +362,7 @@
CONFIG_POWER_RESET_RMOBILE=y
CONFIG_SENSORS_LM90=y
CONFIG_SENSORS_LM95245=y
+CONFIG_SENSORS_NTC_THERMISTOR=m
CONFIG_THERMAL=y
CONFIG_CPU_THERMAL=y
CONFIG_RCAR_THERMAL=y
@@ -410,7 +411,9 @@
CONFIG_REGULATOR_MAX8973=y
CONFIG_REGULATOR_MAX77686=y
CONFIG_REGULATOR_MAX77693=m
+CONFIG_REGULATOR_MAX77802=m
CONFIG_REGULATOR_PALMAS=y
+CONFIG_REGULATOR_PBIAS=y
CONFIG_REGULATOR_PWM=m
CONFIG_REGULATOR_S2MPS11=y
CONFIG_REGULATOR_S5M8767=y
@@ -509,8 +512,6 @@
CONFIG_AB8500_USB=y
CONFIG_KEYSTONE_USB_PHY=y
CONFIG_OMAP_USB3=y
-CONFIG_SAMSUNG_USB2PHY=y
-CONFIG_SAMSUNG_USB3PHY=y
CONFIG_USB_GPIO_VBUS=y
CONFIG_USB_ISP1301=y
CONFIG_USB_MXS_PHY=y
@@ -635,6 +636,7 @@
CONFIG_TI_AEMIF=y
CONFIG_IIO=y
CONFIG_AT91_ADC=m
+CONFIG_EXYNOS_ADC=m
CONFIG_XILINX_XADC=y
CONFIG_AK8975=y
CONFIG_PWM=y
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 7bbf325..b2bc8e1 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -491,11 +491,6 @@
#endif
.endm
- .macro uaccess_save_and_disable, tmp
- uaccess_save \tmp
- uaccess_disable \tmp
- .endm
-
.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
.macro ret\c, reg
#if __LINUX_ARM_ARCH__ < 6
diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h
index b274bde..e7335a9 100644
--- a/arch/arm/include/asm/bug.h
+++ b/arch/arm/include/asm/bug.h
@@ -40,6 +40,7 @@
"2:\t.asciz " #__file "\n" \
".popsection\n" \
".pushsection __bug_table,\"a\"\n" \
+ ".align 2\n" \
"3:\t.word 1b, 2b\n" \
"\t.hword " #__line ", 0\n" \
".popsection"); \
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index a68b9d8..ccb3aa6 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -8,7 +8,6 @@
#include <linux/dma-attrs.h>
#include <linux/dma-debug.h>
-#include <asm-generic/dma-coherent.h>
#include <asm/memory.h>
#include <xen/xen.h>
@@ -39,12 +38,15 @@
dev->archdata.dma_ops = ops;
}
-#include <asm-generic/dma-mapping-common.h>
+#define HAVE_ARCH_DMA_SUPPORTED 1
+extern int dma_supported(struct device *dev, u64 mask);
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
- return get_dma_ops(dev)->set_dma_mask(dev, mask);
-}
+/*
+ * Note that while the generic code provides dummy dma_{alloc,free}_noncoherent
+ * implementations, we don't provide a dma_cache_sync function so drivers using
+ * this API are highlighted with build warnings.
+ */
+#include <asm-generic/dma-mapping-common.h>
#ifdef __arch_page_to_dma
#error Please update to __arch_pfn_to_dma
@@ -167,32 +169,6 @@
static inline void dma_mark_clean(void *addr, size_t size) { }
-/*
- * DMA errors are defined by all-bits-set in the DMA address.
- */
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- debug_dma_mapping_error(dev, dma_addr);
- return dma_addr == DMA_ERROR_CODE;
-}
-
-/*
- * Dummy noncoherent implementation. We don't provide a dma_cache_sync
- * function so drivers using this API are highlighted with build warnings.
- */
-static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
- dma_addr_t *handle, gfp_t gfp)
-{
- return NULL;
-}
-
-static inline void dma_free_noncoherent(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t handle)
-{
-}
-
-extern int dma_supported(struct device *dev, u64 mask);
-
extern int arm_dma_set_mask(struct device *dev, u64 dma_mask);
/**
@@ -209,21 +185,6 @@
extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
gfp_t gfp, struct dma_attrs *attrs);
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *cpu_addr;
- BUG_ON(!ops);
-
- cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
- return cpu_addr;
-}
-
/**
* arm_dma_free - free memory allocated by arm_dma_alloc
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -241,19 +202,6 @@
extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t handle, struct dma_attrs *attrs);
-#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- BUG_ON(!ops);
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
/**
* arm_dma_mmap - map a coherent DMA allocation into user space
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index e878129..fc8ba16 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -12,6 +12,7 @@
#ifndef __ASSEMBLY__
#include <asm/barrier.h>
+#include <asm/thread_info.h>
#endif
/*
@@ -89,7 +90,8 @@
asm(
"mrc p15, 0, %0, c3, c0 @ get domain"
- : "=r" (domain));
+ : "=r" (domain)
+ : "m" (current_thread_info()->cpu_domain));
return domain;
}
@@ -98,7 +100,7 @@
{
asm volatile(
"mcr p15, 0, %0, c3, c0 @ set domain"
- : : "r" (val));
+ : : "r" (val) : "memory");
isb();
}
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index e896d2c..dcba0fa 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -231,4 +231,9 @@
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arm_init_debug(void) {}
+static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index d0a1119..776757d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -25,7 +25,6 @@
struct task_struct;
#include <asm/types.h>
-#include <asm/domain.h>
typedef unsigned long mm_segment_t;
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 98b1084..1279563 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -34,7 +34,19 @@
unsigned long __pfn_to_mfn(unsigned long pfn);
extern struct rb_root phys_to_mach;
-static inline unsigned long pfn_to_mfn(unsigned long pfn)
+/* Pseudo-physical <-> Guest conversion */
+static inline unsigned long pfn_to_gfn(unsigned long pfn)
+{
+ return pfn;
+}
+
+static inline unsigned long gfn_to_pfn(unsigned long gfn)
+{
+ return gfn;
+}
+
+/* Pseudo-physical <-> BUS conversion */
+static inline unsigned long pfn_to_bfn(unsigned long pfn)
{
unsigned long mfn;
@@ -47,16 +59,16 @@
return pfn;
}
-static inline unsigned long mfn_to_pfn(unsigned long mfn)
+static inline unsigned long bfn_to_pfn(unsigned long bfn)
{
- return mfn;
+ return bfn;
}
-#define mfn_to_local_pfn(mfn) mfn_to_pfn(mfn)
+#define bfn_to_local_pfn(bfn) bfn_to_pfn(bfn)
-/* VIRT <-> MACHINE conversion */
-#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
-#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
+/* VIRT <-> GUEST conversion */
+#define virt_to_gfn(v) (pfn_to_gfn(virt_to_pfn(v)))
+#define gfn_to_virt(m) (__va(gfn_to_pfn(m) << PAGE_SHIFT))
/* Only used in PV code. But ARM guests are always HVM. */
static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
@@ -96,7 +108,7 @@
bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
- unsigned long mfn);
+ unsigned long bfn);
unsigned long xen_get_swiotlb_free_pages(unsigned int order);
#endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index a3089ba..7a7c4ce 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -226,6 +226,7 @@
memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
+#ifdef CONFIG_CPU_USE_DOMAINS
/*
* Copy the initial value of the domain access control register
* from the current thread: thread->addr_limit will have been
@@ -233,6 +234,7 @@
* kernel/fork.c
*/
thread->cpu_domain = get_domain();
+#endif
if (likely(!(p->flags & PF_KTHREAD))) {
*childregs = *current_pt_regs();
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index bc738d2..ce404a5 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -125,6 +125,7 @@
if (ret)
goto out_free_stage2_pgd;
+ kvm_vgic_early_init(kvm);
kvm_timer_init(kvm);
/* Mark the initial VMID generation invalid */
@@ -249,6 +250,7 @@
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
+ kvm_vgic_vcpu_early_init(vcpu);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
@@ -278,6 +280,8 @@
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
+ kvm_arm_reset_debug_ptr(vcpu);
+
return 0;
}
@@ -301,13 +305,6 @@
kvm_arm_set_running_vcpu(NULL);
}
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
-{
- return -EINVAL;
-}
-
-
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
@@ -528,10 +525,20 @@
if (vcpu->arch.pause)
vcpu_pause(vcpu);
- kvm_vgic_flush_hwstate(vcpu);
+ /*
+ * Disarming the background timer must be done in a
+ * preemptible context, as this call may sleep.
+ */
kvm_timer_flush_hwstate(vcpu);
+ /*
+ * Preparing the interrupts to be injected also
+ * involves poking the GIC, which must be done in a
+ * non-preemptible context.
+ */
preempt_disable();
+ kvm_vgic_flush_hwstate(vcpu);
+
local_irq_disable();
/*
@@ -544,12 +551,14 @@
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
local_irq_enable();
+ kvm_vgic_sync_hwstate(vcpu);
preempt_enable();
kvm_timer_sync_hwstate(vcpu);
- kvm_vgic_sync_hwstate(vcpu);
continue;
}
+ kvm_arm_setup_debug(vcpu);
+
/**************************************************************
* Enter the guest
*/
@@ -564,6 +573,8 @@
* Back from guest
*************************************************************/
+ kvm_arm_clear_debug(vcpu);
+
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
@@ -586,11 +597,12 @@
*/
kvm_guest_exit();
trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+
+ kvm_vgic_sync_hwstate(vcpu);
+
preempt_enable();
-
kvm_timer_sync_hwstate(vcpu);
- kvm_vgic_sync_hwstate(vcpu);
ret = handle_exit(vcpu, run, ret);
}
@@ -921,6 +933,8 @@
vector_ptr = (unsigned long)__kvm_hyp_vector;
__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+ kvm_arm_init_debug();
}
static int hyp_init_cpu_notify(struct notifier_block *self,
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index d503fbb..96e935b 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -290,3 +290,9 @@
{
return -EINVAL;
}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ return -EINVAL;
+}
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 568494d..900ef6d 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -361,10 +361,6 @@
@ Check syndrome register
mrc p15, 4, r1, c5, c2, 0 @ HSR
lsr r0, r1, #HSR_EC_SHIFT
-#ifdef CONFIG_VFPv3
- cmp r0, #HSR_EC_CP_0_13
- beq switch_to_guest_vfp
-#endif
cmp r0, #HSR_EC_HVC
bne guest_trap @ Not HVC instr.
@@ -378,7 +374,10 @@
cmp r2, #0
bne guest_trap @ Guest called HVC
-host_switch_to_hyp:
+ /*
+ * Getting here means host called HVC, we shift parameters and branch
+ * to Hyp function.
+ */
pop {r0, r1, r2}
/* Check for __hyp_get_vectors */
@@ -409,6 +408,10 @@
@ Check if we need the fault information
lsr r1, r1, #HSR_EC_SHIFT
+#ifdef CONFIG_VFPv3
+ cmp r1, #HSR_EC_CP_0_13
+ beq switch_to_guest_vfp
+#endif
cmp r1, #HSR_EC_IABT
mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
beq 2f
@@ -477,7 +480,6 @@
*/
#ifdef CONFIG_VFPv3
switch_to_guest_vfp:
- load_vcpu @ Load VCPU pointer to r0
push {r3-r7}
@ NEON/VFP used. Turn on VFP access.
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index f558c07..eeb8585 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -77,7 +77,5 @@
kvm_reset_coprocs(vcpu);
/* Reset arch_timer context */
- kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
-
- return 0;
+ return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
}
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 4c4858c..3a10f1a 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -15,6 +15,7 @@
select ARM_AMBA
select ARM_GIC
select COMMON_CLK_SAMSUNG
+ select EXYNOS_THERMAL
select HAVE_ARM_SCU if SMP
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C2410_WATCHDOG if WATCHDOG
@@ -24,6 +25,7 @@
select PM_GENERIC_DOMAINS if PM
select S5P_DEV_MFC
select SRAM
+ select THERMAL
select MFD_SYSCON
help
Support for SAMSUNG EXYNOS SoCs (EXYNOS4/5)
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index 5f8ddcd..1c47aee 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -225,7 +225,11 @@
}
static const struct of_device_id exynos_cpufreq_matches[] = {
+ { .compatible = "samsung,exynos3250", .data = "cpufreq-dt" },
{ .compatible = "samsung,exynos4210", .data = "cpufreq-dt" },
+ { .compatible = "samsung,exynos4212", .data = "cpufreq-dt" },
+ { .compatible = "samsung,exynos4412", .data = "cpufreq-dt" },
+ { .compatible = "samsung,exynos5250", .data = "cpufreq-dt" },
{ /* sentinel */ }
};
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index bf35abc..e626043 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -676,10 +676,6 @@
gfp_t gfp, struct dma_attrs *attrs)
{
pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
- void *memory;
-
- if (dma_alloc_from_coherent(dev, size, handle, &memory))
- return memory;
return __dma_alloc(dev, size, handle, gfp, prot, false,
attrs, __builtin_return_address(0));
@@ -688,11 +684,6 @@
static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
{
- void *memory;
-
- if (dma_alloc_from_coherent(dev, size, handle, &memory))
- return memory;
-
return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true,
attrs, __builtin_return_address(0));
}
@@ -752,9 +743,6 @@
struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
- if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
- return;
-
size = PAGE_ALIGN(size);
if (nommu()) {
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index 71df435..39c20af 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -95,9 +95,10 @@
reteq r4 @ no, return failure
next:
+ uaccess_enable r3
.Lx1: ldrt r6, [r5], #4 @ get the next instruction and
@ increment PC
-
+ uaccess_disable r3
and r2, r6, #0x0F000000 @ test for FP insns
teq r2, #0x0C000000
teqne r2, #0x0D000000
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index c50c8d3..eeeab07 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -49,35 +49,35 @@
static __initdata struct device_node *xen_node;
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t *mfn, int nr,
+ xen_pfn_t *gfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages)
{
- return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
+ return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
prot, domid, pages);
}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
+EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
/* Not used by XENFEAT_auto_translated guests. */
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t mfn, int nr,
+ xen_pfn_t gfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)
{
return -ENOSYS;
}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
-int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
+int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
int nr, struct page **pages)
{
return xen_xlate_unmap_gfn_range(vma, nr, pages);
}
-EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
+EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
static void xen_percpu_init(void)
{
diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S
index f00e080..10fd99c 100644
--- a/arch/arm/xen/hypercall.S
+++ b/arch/arm/xen/hypercall.S
@@ -98,8 +98,23 @@
mov r1, r2
mov r2, r3
ldr r3, [sp, #8]
+ /*
+ * Privcmd calls are issued by the userspace. We need to allow the
+ * kernel to access the userspace memory before issuing the hypercall.
+ */
+ uaccess_enable r4
+
+ /* r4 is loaded now as we use it as scratch register before */
ldr r4, [sp, #4]
__HVC(XEN_IMM)
+
+ /*
+ * Disable userspace access from kernel. This is fine to do it
+ * unconditionally as no set_fs(KERNEL_DS)/set_fs(get_ds()) is
+ * called before.
+ */
+ uaccess_disable r4
+
ldm sp!, {r4}
ret lr
ENDPROC(privcmd_call);
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 03e75fe..6dd911d 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -139,9 +139,9 @@
bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
- unsigned long mfn)
+ unsigned long bfn)
{
- return (!hypercall_cflush && (pfn != mfn) && !is_device_dma_coherent(dev));
+ return (!hypercall_cflush && (pfn != bfn) && !is_device_dma_coherent(dev));
}
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7d95663..07d1811 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -32,6 +32,7 @@
select GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_CPU_AUTOPROBE
select GENERIC_EARLY_IOREMAP
+ select GENERIC_IDLE_POLL_SETUP
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
@@ -331,6 +332,22 @@
If unsure, say Y.
+config ARM64_ERRATUM_843419
+ bool "Cortex-A53: 843419: A load or store might access an incorrect address"
+ depends on MODULES
+ default y
+ help
+ This option builds kernel modules using the large memory model in
+ order to avoid the use of the ADRP instruction, which can cause
+ a subsequent memory access to use an incorrect address on Cortex-A53
+ parts up to r0p4.
+
+ Note that the kernel itself must be linked with a version of ld
+ which fixes potentially affected ADRP instructions through the
+ use of veneers.
+
+ If unsure, say Y.
+
endmenu
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 15ff5b4..f9914d7 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -41,6 +41,10 @@
CHECKFLAGS += -D__aarch64__
+ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
+CFLAGS_MODULE += -mcmodel=large
+endif
+
# Default value
head-y := arch/arm64/kernel/head.o
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index f0d6d0b..cfdb34b 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -22,8 +22,6 @@
#include <linux/types.h>
#include <linux/vmalloc.h>
-#include <asm-generic/dma-coherent.h>
-
#include <xen/xen.h>
#include <asm/xen/hypervisor.h>
@@ -86,28 +84,6 @@
return (phys_addr_t)dev_addr;
}
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- debug_dma_mapping_error(dev, dev_addr);
- return ops->mapping_error(dev, dev_addr);
-}
-
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- return ops->dma_supported(dev, mask);
-}
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
- *dev->dma_mask = mask;
-
- return 0;
-}
-
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
if (!dev->dma_mask)
@@ -120,50 +96,5 @@
{
}
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-#define dma_free_coherent(d, s, h, f) dma_free_attrs(d, s, h, f, NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *vaddr;
-
- if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr))
- return vaddr;
-
- vaddr = ops->alloc(dev, size, dma_handle, flags, attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr);
- return vaddr;
-}
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dev_addr,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (dma_release_from_coherent(dev, get_order(size), vaddr))
- return;
-
- debug_dma_free_coherent(dev, size, vaddr, dev_addr);
- ops->free(dev, size, vaddr, dev_addr, attrs);
-}
-
-/*
- * There is no dma_cache_sync() implementation, so just return NULL here.
- */
-static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
- dma_addr_t *handle, gfp_t flags)
-{
- return NULL;
-}
-
-static inline void dma_free_noncoherent(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t handle)
-{
-}
-
#endif /* __KERNEL__ */
#endif /* __ASM_DMA_MAPPING_H */
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 52b484b..4c47cb2 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -16,6 +16,8 @@
#ifndef __ASM_HW_BREAKPOINT_H
#define __ASM_HW_BREAKPOINT_H
+#include <asm/cputype.h>
+
#ifdef __KERNEL__
struct arch_hw_breakpoint_ctrl {
@@ -132,5 +134,17 @@
extern struct pmu perf_ops_bp;
+/* Determine number of BRP registers available. */
+static inline int get_num_brps(void)
+{
+ return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
+}
+
+/* Determine number of WRP registers available. */
+static inline int get_num_wrps(void)
+{
+ return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
+}
+
#endif /* __KERNEL__ */
#endif /* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ac6fafb..7605e09 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -171,10 +171,13 @@
#define HSTR_EL2_TTEE (1 << 16)
#define HSTR_EL2_T(x) (1 << x)
+/* Hyp Coproccessor Trap Register Shifts */
+#define CPTR_EL2_TFP_SHIFT 10
+
/* Hyp Coprocessor Trap Register */
#define CPTR_EL2_TCPAC (1 << 31)
#define CPTR_EL2_TTA (1 << 20)
-#define CPTR_EL2_TFP (1 << 10)
+#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
/* Hyp Debug Configuration Register bits */
#define MDCR_EL2_TDRA (1 << 11)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 3c5fe68..67fa0de 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -46,24 +46,16 @@
#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
#define PAR_EL1 21 /* Physical Address Register */
#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
-#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */
-#define DBGBCR15_EL1 38
-#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */
-#define DBGBVR15_EL1 54
-#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */
-#define DBGWCR15_EL1 70
-#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */
-#define DBGWVR15_EL1 86
-#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */
+#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */
/* 32bit specific registers. Keep them at the end of the range */
-#define DACR32_EL2 88 /* Domain Access Control Register */
-#define IFSR32_EL2 89 /* Instruction Fault Status Register */
-#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */
-#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */
-#define TEECR32_EL1 92 /* ThumbEE Configuration Register */
-#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */
-#define NR_SYS_REGS 94
+#define DACR32_EL2 24 /* Domain Access Control Register */
+#define IFSR32_EL2 25 /* Instruction Fault Status Register */
+#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */
+#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */
+#define TEECR32_EL1 28 /* ThumbEE Configuration Register */
+#define TEEHBR32_EL1 29 /* ThumbEE Handler Base Register */
+#define NR_SYS_REGS 30
/* 32bit mapping */
#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
@@ -132,6 +124,8 @@
extern u64 __vgic_v3_get_ich_vtr_el2(void);
+extern u32 __kvm_get_mdcr_el2(void);
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2709db2..415938d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -103,15 +103,34 @@
/* HYP configuration */
u64 hcr_el2;
+ u32 mdcr_el2;
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* Debug state */
+ /* Guest debug state */
u64 debug_flags;
+ /*
+ * We maintain more than a single set of debug registers to support
+ * debugging the guest from the host and to maintain separate host and
+ * guest state during world switches. vcpu_debug_state are the debug
+ * registers of the vcpu as the guest sees them. host_debug_state are
+ * the host registers which are saved and restored during
+ * world switches. external_debug_state contains the debug
+ * values we want to debug the guest. This is set via the
+ * KVM_SET_GUEST_DEBUG ioctl.
+ *
+ * debug_ptr points to the set of debug registers that should be loaded
+ * onto the hardware when running the guest.
+ */
+ struct kvm_guest_debug_arch *debug_ptr;
+ struct kvm_guest_debug_arch vcpu_debug_state;
+ struct kvm_guest_debug_arch external_debug_state;
+
/* Pointer to host CPU context */
kvm_cpu_context_t *host_cpu_context;
+ struct kvm_guest_debug_arch host_debug_state;
/* VGIC state */
struct vgic_cpu vgic_cpu;
@@ -122,6 +141,17 @@
* here.
*/
+ /*
+ * Guest registers we preserve during guest debugging.
+ *
+ * These shadow registers are updated by the kvm_handle_sys_reg
+ * trap handler if the guest accesses or updates them while we
+ * are using guest debug.
+ */
+ struct {
+ u32 mdscr_el1;
+ } guest_debug_preserved;
+
/* Don't run the guest */
bool pause;
@@ -216,15 +246,15 @@
hyp_stack_ptr, vector_ptr);
}
-struct vgic_sr_vectors {
- void *save_vgic;
- void *restore_vgic;
-};
-
static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+void kvm_arm_init_debug(void);
+void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
+void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
+void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6900b2d9..b0329be 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -26,13 +26,9 @@
* Software defined PTE bits definition.
*/
#define PTE_VALID (_AT(pteval_t, 1) << 0)
+#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
-#ifdef CONFIG_ARM64_HW_AFDBM
-#define PTE_WRITE (PTE_DBM) /* same as DBM */
-#else
-#define PTE_WRITE (_AT(pteval_t, 1) << 57)
-#endif
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
/*
@@ -146,7 +142,7 @@
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#ifdef CONFIG_ARM64_HW_AFDBM
-#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY))
+#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
#else
#define pte_hw_dirty(pte) (0)
#endif
@@ -238,7 +234,7 @@
* When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
* the page fault mechanism. Checking the dirty status of a pte becomes:
*
- * PTE_DIRTY || !PTE_RDONLY
+ * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
@@ -503,7 +499,7 @@
PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK;
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
- newprot |= PTE_DIRTY;
+ pte = pte_mkdirty(pte);
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
return pte;
}
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index d268320..0cd7b59 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -53,14 +53,20 @@
struct user_fpsimd_state fp_regs;
};
-/* Supported Processor Types */
+/*
+ * Supported CPU Targets - Adding a new target type is not recommended,
+ * unless there are some special registers not supported by the
+ * genericv8 syreg table.
+ */
#define KVM_ARM_TARGET_AEM_V8 0
#define KVM_ARM_TARGET_FOUNDATION_V8 1
#define KVM_ARM_TARGET_CORTEX_A57 2
#define KVM_ARM_TARGET_XGENE_POTENZA 3
#define KVM_ARM_TARGET_CORTEX_A53 4
+/* Generic ARM v8 target */
+#define KVM_ARM_TARGET_GENERIC_V8 5
-#define KVM_ARM_NUM_TARGETS 5
+#define KVM_ARM_NUM_TARGETS 6
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
@@ -100,12 +106,39 @@
struct kvm_fpu {
};
+/*
+ * See v8 ARM ARM D7.3: Debug Registers
+ *
+ * The architectural limit is 16 debug registers of each type although
+ * in practice there are usually less (see ID_AA64DFR0_EL1).
+ *
+ * Although the control registers are architecturally defined as 32
+ * bits wide we use a 64 bit structure here to keep parity with
+ * KVM_GET/SET_ONE_REG behaviour which treats all system registers as
+ * 64 bit values. It also allows for the possibility of the
+ * architecture expanding the control registers without having to
+ * change the userspace ABI.
+ */
+#define KVM_ARM_MAX_DBG_REGS 16
struct kvm_guest_debug_arch {
+ __u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS];
+ __u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS];
+ __u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS];
+ __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
};
struct kvm_debug_exit_arch {
+ __u32 hsr;
+ __u64 far; /* used for watchpoints */
};
+/*
+ * Architecture specific defines for kvm_guest_debug->control
+ */
+
+#define KVM_GUESTDBG_USE_SW_BP (1 << 16)
+#define KVM_GUESTDBG_USE_HW (1 << 17)
+
struct kvm_sync_regs {
};
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index c99701a..8d89cf8 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -116,17 +116,22 @@
DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags));
+ DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr));
+ DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr));
+ DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr));
+ DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr));
+ DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
+ DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2));
DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
+ DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state));
DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff));
DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
- DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic));
- DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic));
DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 9b3b62a..cebf786 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -134,7 +134,7 @@
unsigned long action, void *data)
{
int cpu = (unsigned long)data;
- if (action == CPU_ONLINE)
+ if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
smp_call_function_single(cpu, clear_os_lock, NULL, 1);
return NOTIFY_OK;
}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a055be6..90d09ed 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -523,6 +523,11 @@
msr hstr_el2, xzr // Disable CP15 traps to EL2
#endif
+ /* EL2 debug */
+ mrs x0, pmcr_el0 // Disable debug access traps
+ ubfx x0, x0, #11, #5 // to EL2 and allow access to
+ msr mdcr_el2, x0 // all PMU counters from EL1
+
/* Stage-2 translation */
msr vttbr_el2, xzr
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 003bc3d..bba85c8 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -48,18 +48,6 @@
static int core_num_brps;
static int core_num_wrps;
-/* Determine number of BRP registers available. */
-static int get_num_brps(void)
-{
- return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
-}
-
-/* Determine number of WRP registers available. */
-static int get_num_wrps(void)
-{
- return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
-}
-
int hw_breakpoint_slots(int type)
{
/*
@@ -884,7 +872,7 @@
void *hcpu)
{
int cpu = (long)hcpu;
- if (action == CPU_ONLINE)
+ if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);
return NOTIFY_OK;
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 67bf410..876eb8d 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -332,12 +332,14 @@
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
AARCH64_INSN_IMM_ADR);
break;
+#ifndef CONFIG_ARM64_ERRATUM_843419
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
case R_AARCH64_ADR_PREL_PG_HI21:
ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21,
AARCH64_INSN_IMM_ADR);
break;
+#endif
case R_AARCH64_ADD_ABS_LO12_NC:
case R_AARCH64_LDST8_ABS_LO12_NC:
overflow_check = false;
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 948f0ad..71ef6dc 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -212,14 +212,32 @@
/*
* VFP save/restore code.
+ *
+ * We have to be careful with endianness, since the fpsimd context-switch
+ * code operates on 128-bit (Q) register values whereas the compat ABI
+ * uses an array of 64-bit (D) registers. Consequently, we need to swap
+ * the two halves of each Q register when running on a big-endian CPU.
*/
+union __fpsimd_vreg {
+ __uint128_t raw;
+ struct {
+#ifdef __AARCH64EB__
+ u64 hi;
+ u64 lo;
+#else
+ u64 lo;
+ u64 hi;
+#endif
+ };
+};
+
static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
{
struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state;
compat_ulong_t magic = VFP_MAGIC;
compat_ulong_t size = VFP_STORAGE_SIZE;
compat_ulong_t fpscr, fpexc;
- int err = 0;
+ int i, err = 0;
/*
* Save the hardware registers to the fpsimd_state structure.
@@ -235,10 +253,15 @@
/*
* Now copy the FP registers. Since the registers are packed,
* we can copy the prefix we want (V0-V15) as it is.
- * FIXME: Won't work if big endian.
*/
- err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs,
- sizeof(frame->ufp.fpregs));
+ for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) {
+ union __fpsimd_vreg vreg = {
+ .raw = fpsimd->vregs[i >> 1],
+ };
+
+ __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err);
+ __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err);
+ }
/* Create an AArch32 fpscr from the fpsr and the fpcr. */
fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) |
@@ -263,7 +286,7 @@
compat_ulong_t magic = VFP_MAGIC;
compat_ulong_t size = VFP_STORAGE_SIZE;
compat_ulong_t fpscr;
- int err = 0;
+ int i, err = 0;
__get_user_error(magic, &frame->magic, err);
__get_user_error(size, &frame->size, err);
@@ -273,12 +296,14 @@
if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE)
return -EINVAL;
- /*
- * Copy the FP registers into the start of the fpsimd_state.
- * FIXME: Won't work if big endian.
- */
- err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs,
- sizeof(frame->ufp.fpregs));
+ /* Copy the FP registers into the start of the fpsimd_state. */
+ for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) {
+ union __fpsimd_vreg vreg;
+
+ __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err);
+ __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err);
+ fpsimd.vregs[i >> 1] = vreg.raw;
+ }
/* Extract the fpsr and the fpcr from the fpscr */
__get_user_error(fpscr, &frame->ufp.fpscr, err);
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index f90f4aa..1949fe5 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -17,7 +17,7 @@
kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
-kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
+kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
new file mode 100644
index 0000000..47e5f0f
--- /dev/null
+++ b/arch/arm64/kvm/debug.c
@@ -0,0 +1,217 @@
+/*
+ * Debug and Guest Debug support
+ *
+ * Copyright (C) 2015 - Linaro Ltd
+ * Author: Alex Bennée <alex.bennee@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/hw_breakpoint.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_emulate.h>
+
+#include "trace.h"
+
+/* These are the bits of MDSCR_EL1 we may manipulate */
+#define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \
+ DBG_MDSCR_KDE | \
+ DBG_MDSCR_MDE)
+
+static DEFINE_PER_CPU(u32, mdcr_el2);
+
+/**
+ * save/restore_guest_debug_regs
+ *
+ * For some debug operations we need to tweak some guest registers. As
+ * a result we need to save the state of those registers before we
+ * make those modifications.
+ *
+ * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
+ * after we have restored the preserved value to the main context.
+ */
+static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
+
+ trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
+ vcpu->arch.guest_debug_preserved.mdscr_el1);
+}
+
+static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
+{
+ vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1;
+
+ trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
+ vcpu_sys_reg(vcpu, MDSCR_EL1));
+}
+
+/**
+ * kvm_arm_init_debug - grab what we need for debug
+ *
+ * Currently the sole task of this function is to retrieve the initial
+ * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has
+ * presumably been set-up by some knowledgeable bootcode.
+ *
+ * It is called once per-cpu during CPU hyp initialisation.
+ */
+
+void kvm_arm_init_debug(void)
+{
+ __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2));
+}
+
+/**
+ * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
+ */
+
+void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state;
+}
+
+/**
+ * kvm_arm_setup_debug - set up debug related stuff
+ *
+ * @vcpu: the vcpu pointer
+ *
+ * This is called before each entry into the hypervisor to setup any
+ * debug related registers. Currently this just ensures we will trap
+ * access to:
+ * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
+ * - Debug ROM Address (MDCR_EL2_TDRA)
+ * - OS related registers (MDCR_EL2_TDOSA)
+ *
+ * Additionally, KVM only traps guest accesses to the debug registers if
+ * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
+ * flag on vcpu->arch.debug_flags). Since the guest must not interfere
+ * with the hardware state when debugging the guest, we must ensure that
+ * trapping is enabled whenever we are debugging the guest using the
+ * debug registers.
+ */
+
+void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
+{
+ bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
+
+ trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
+
+ vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
+ vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
+ MDCR_EL2_TPMCR |
+ MDCR_EL2_TDRA |
+ MDCR_EL2_TDOSA);
+
+ /* Is Guest debugging in effect? */
+ if (vcpu->guest_debug) {
+ /* Route all software debug exceptions to EL2 */
+ vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
+
+ /* Save guest debug state */
+ save_guest_debug_regs(vcpu);
+
+ /*
+ * Single Step (ARM ARM D2.12.3 The software step state
+ * machine)
+ *
+ * If we are doing Single Step we need to manipulate
+ * the guest's MDSCR_EL1.SS and PSTATE.SS. Once the
+ * step has occurred the hypervisor will trap the
+ * debug exception and we return to userspace.
+ *
+ * If the guest attempts to single step its userspace
+ * we would have to deal with a trapped exception
+ * while in the guest kernel. Because this would be
+ * hard to unwind we suppress the guest's ability to
+ * do so by masking MDSCR_EL.SS.
+ *
+ * This confuses guest debuggers which use
+ * single-step behind the scenes but everything
+ * returns to normal once the host is no longer
+ * debugging the system.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+ vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS;
+ } else {
+ vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
+ }
+
+ trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
+
+ /*
+ * HW Breakpoints and watchpoints
+ *
+ * We simply switch the debug_ptr to point to our new
+ * external_debug_state which has been populated by the
+ * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
+ * mechanism ensures the registers are updated on the
+ * world switch.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
+ /* Enable breakpoints/watchpoints */
+ vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE;
+
+ vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ trap_debug = true;
+
+ trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
+ &vcpu->arch.debug_ptr->dbg_bcr[0],
+ &vcpu->arch.debug_ptr->dbg_bvr[0]);
+
+ trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
+ &vcpu->arch.debug_ptr->dbg_wcr[0],
+ &vcpu->arch.debug_ptr->dbg_wvr[0]);
+ }
+ }
+
+ BUG_ON(!vcpu->guest_debug &&
+ vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state);
+
+ /* Trap debug register access */
+ if (trap_debug)
+ vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
+
+ trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
+ trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
+}
+
+void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
+{
+ trace_kvm_arm_clear_debug(vcpu->guest_debug);
+
+ if (vcpu->guest_debug) {
+ restore_guest_debug_regs(vcpu);
+
+ /*
+ * If we were using HW debug we need to restore the
+ * debug_ptr to the guest debug state.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
+ kvm_arm_reset_debug_ptr(vcpu);
+
+ trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
+ &vcpu->arch.debug_ptr->dbg_bcr[0],
+ &vcpu->arch.debug_ptr->dbg_bvr[0]);
+
+ trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
+ &vcpu->arch.debug_ptr->dbg_wcr[0],
+ &vcpu->arch.debug_ptr->dbg_wvr[0]);
+ }
+ }
+}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 9535bd5..d250160 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -32,6 +32,8 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
+#include "trace.h"
+
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
@@ -293,7 +295,8 @@
break;
};
- return -EINVAL;
+ /* Return a default generic target */
+ return KVM_ARM_TARGET_GENERIC_V8;
}
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
@@ -331,3 +334,41 @@
{
return -EINVAL;
}
+
+#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
+ KVM_GUESTDBG_USE_SW_BP | \
+ KVM_GUESTDBG_USE_HW | \
+ KVM_GUESTDBG_SINGLESTEP)
+
+/**
+ * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
+ * @kvm: pointer to the KVM struct
+ * @kvm_guest_debug: the ioctl data buffer
+ *
+ * This sets up and enables the VM for guest debugging. Userspace
+ * passes in a control flag to enable different debug types and
+ * potentially other architecture specific information in the rest of
+ * the structure.
+ */
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ trace_kvm_set_guest_debug(vcpu, dbg->control);
+
+ if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
+ return -EINVAL;
+
+ if (dbg->control & KVM_GUESTDBG_ENABLE) {
+ vcpu->guest_debug = dbg->control;
+
+ /* Hardware assisted Break and Watch points */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
+ vcpu->arch.external_debug_state = dbg->arch;
+ }
+
+ } else {
+ /* If not enabled clear all flags */
+ vcpu->guest_debug = 0;
+ }
+ return 0;
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 524fa25..68a0759 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -82,6 +82,45 @@
return 1;
}
+/**
+ * kvm_handle_guest_debug - handle a debug exception instruction
+ *
+ * @vcpu: the vcpu pointer
+ * @run: access to the kvm_run structure for results
+ *
+ * We route all debug exceptions through the same handler. If both the
+ * guest and host are using the same debug facilities it will be up to
+ * userspace to re-inject the correct exception for guest delivery.
+ *
+ * @return: 0 (while setting run->exit_reason), -1 for error
+ */
+static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ int ret = 0;
+
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.hsr = hsr;
+
+ switch (hsr >> ESR_ELx_EC_SHIFT) {
+ case ESR_ELx_EC_WATCHPT_LOW:
+ run->debug.arch.far = vcpu->arch.fault.far_el2;
+ /* fall through */
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_BREAKPT_LOW:
+ case ESR_ELx_EC_BKPT32:
+ case ESR_ELx_EC_BRK64:
+ break;
+ default:
+ kvm_err("%s: un-handled case hsr: %#08x\n",
+ __func__, (unsigned int) hsr);
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
[ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
@@ -96,6 +135,11 @@
[ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
[ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
+ [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
+ [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
+ [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
+ [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
+ [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
};
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 10915aa..37c89ea 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -230,199 +230,52 @@
stp x24, x25, [x3, #160]
.endm
-.macro save_debug
- // x2: base address for cpu context
- // x3: tmp register
+.macro save_debug type
+ // x4: pointer to register set
+ // x5: number of registers to skip
+ // x6..x22 trashed
- mrs x26, id_aa64dfr0_el1
- ubfx x24, x26, #12, #4 // Extract BRPs
- ubfx x25, x26, #20, #4 // Extract WRPs
- mov w26, #15
- sub w24, w26, w24 // How many BPs to skip
- sub w25, w26, w25 // How many WPs to skip
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
+ adr x22, 1f
+ add x22, x22, x5, lsl #2
+ br x22
1:
- mrs x20, dbgbcr15_el1
- mrs x19, dbgbcr14_el1
- mrs x18, dbgbcr13_el1
- mrs x17, dbgbcr12_el1
- mrs x16, dbgbcr11_el1
- mrs x15, dbgbcr10_el1
- mrs x14, dbgbcr9_el1
- mrs x13, dbgbcr8_el1
- mrs x12, dbgbcr7_el1
- mrs x11, dbgbcr6_el1
- mrs x10, dbgbcr5_el1
- mrs x9, dbgbcr4_el1
- mrs x8, dbgbcr3_el1
- mrs x7, dbgbcr2_el1
- mrs x6, dbgbcr1_el1
- mrs x5, dbgbcr0_el1
+ mrs x21, \type\()15_el1
+ mrs x20, \type\()14_el1
+ mrs x19, \type\()13_el1
+ mrs x18, \type\()12_el1
+ mrs x17, \type\()11_el1
+ mrs x16, \type\()10_el1
+ mrs x15, \type\()9_el1
+ mrs x14, \type\()8_el1
+ mrs x13, \type\()7_el1
+ mrs x12, \type\()6_el1
+ mrs x11, \type\()5_el1
+ mrs x10, \type\()4_el1
+ mrs x9, \type\()3_el1
+ mrs x8, \type\()2_el1
+ mrs x7, \type\()1_el1
+ mrs x6, \type\()0_el1
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-
+ adr x22, 1f
+ add x22, x22, x5, lsl #2
+ br x22
1:
- str x20, [x3, #(15 * 8)]
- str x19, [x3, #(14 * 8)]
- str x18, [x3, #(13 * 8)]
- str x17, [x3, #(12 * 8)]
- str x16, [x3, #(11 * 8)]
- str x15, [x3, #(10 * 8)]
- str x14, [x3, #(9 * 8)]
- str x13, [x3, #(8 * 8)]
- str x12, [x3, #(7 * 8)]
- str x11, [x3, #(6 * 8)]
- str x10, [x3, #(5 * 8)]
- str x9, [x3, #(4 * 8)]
- str x8, [x3, #(3 * 8)]
- str x7, [x3, #(2 * 8)]
- str x6, [x3, #(1 * 8)]
- str x5, [x3, #(0 * 8)]
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-1:
- mrs x20, dbgbvr15_el1
- mrs x19, dbgbvr14_el1
- mrs x18, dbgbvr13_el1
- mrs x17, dbgbvr12_el1
- mrs x16, dbgbvr11_el1
- mrs x15, dbgbvr10_el1
- mrs x14, dbgbvr9_el1
- mrs x13, dbgbvr8_el1
- mrs x12, dbgbvr7_el1
- mrs x11, dbgbvr6_el1
- mrs x10, dbgbvr5_el1
- mrs x9, dbgbvr4_el1
- mrs x8, dbgbvr3_el1
- mrs x7, dbgbvr2_el1
- mrs x6, dbgbvr1_el1
- mrs x5, dbgbvr0_el1
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-
-1:
- str x20, [x3, #(15 * 8)]
- str x19, [x3, #(14 * 8)]
- str x18, [x3, #(13 * 8)]
- str x17, [x3, #(12 * 8)]
- str x16, [x3, #(11 * 8)]
- str x15, [x3, #(10 * 8)]
- str x14, [x3, #(9 * 8)]
- str x13, [x3, #(8 * 8)]
- str x12, [x3, #(7 * 8)]
- str x11, [x3, #(6 * 8)]
- str x10, [x3, #(5 * 8)]
- str x9, [x3, #(4 * 8)]
- str x8, [x3, #(3 * 8)]
- str x7, [x3, #(2 * 8)]
- str x6, [x3, #(1 * 8)]
- str x5, [x3, #(0 * 8)]
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- mrs x20, dbgwcr15_el1
- mrs x19, dbgwcr14_el1
- mrs x18, dbgwcr13_el1
- mrs x17, dbgwcr12_el1
- mrs x16, dbgwcr11_el1
- mrs x15, dbgwcr10_el1
- mrs x14, dbgwcr9_el1
- mrs x13, dbgwcr8_el1
- mrs x12, dbgwcr7_el1
- mrs x11, dbgwcr6_el1
- mrs x10, dbgwcr5_el1
- mrs x9, dbgwcr4_el1
- mrs x8, dbgwcr3_el1
- mrs x7, dbgwcr2_el1
- mrs x6, dbgwcr1_el1
- mrs x5, dbgwcr0_el1
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-
-1:
- str x20, [x3, #(15 * 8)]
- str x19, [x3, #(14 * 8)]
- str x18, [x3, #(13 * 8)]
- str x17, [x3, #(12 * 8)]
- str x16, [x3, #(11 * 8)]
- str x15, [x3, #(10 * 8)]
- str x14, [x3, #(9 * 8)]
- str x13, [x3, #(8 * 8)]
- str x12, [x3, #(7 * 8)]
- str x11, [x3, #(6 * 8)]
- str x10, [x3, #(5 * 8)]
- str x9, [x3, #(4 * 8)]
- str x8, [x3, #(3 * 8)]
- str x7, [x3, #(2 * 8)]
- str x6, [x3, #(1 * 8)]
- str x5, [x3, #(0 * 8)]
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- mrs x20, dbgwvr15_el1
- mrs x19, dbgwvr14_el1
- mrs x18, dbgwvr13_el1
- mrs x17, dbgwvr12_el1
- mrs x16, dbgwvr11_el1
- mrs x15, dbgwvr10_el1
- mrs x14, dbgwvr9_el1
- mrs x13, dbgwvr8_el1
- mrs x12, dbgwvr7_el1
- mrs x11, dbgwvr6_el1
- mrs x10, dbgwvr5_el1
- mrs x9, dbgwvr4_el1
- mrs x8, dbgwvr3_el1
- mrs x7, dbgwvr2_el1
- mrs x6, dbgwvr1_el1
- mrs x5, dbgwvr0_el1
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-
-1:
- str x20, [x3, #(15 * 8)]
- str x19, [x3, #(14 * 8)]
- str x18, [x3, #(13 * 8)]
- str x17, [x3, #(12 * 8)]
- str x16, [x3, #(11 * 8)]
- str x15, [x3, #(10 * 8)]
- str x14, [x3, #(9 * 8)]
- str x13, [x3, #(8 * 8)]
- str x12, [x3, #(7 * 8)]
- str x11, [x3, #(6 * 8)]
- str x10, [x3, #(5 * 8)]
- str x9, [x3, #(4 * 8)]
- str x8, [x3, #(3 * 8)]
- str x7, [x3, #(2 * 8)]
- str x6, [x3, #(1 * 8)]
- str x5, [x3, #(0 * 8)]
-
- mrs x21, mdccint_el1
- str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+ str x21, [x4, #(15 * 8)]
+ str x20, [x4, #(14 * 8)]
+ str x19, [x4, #(13 * 8)]
+ str x18, [x4, #(12 * 8)]
+ str x17, [x4, #(11 * 8)]
+ str x16, [x4, #(10 * 8)]
+ str x15, [x4, #(9 * 8)]
+ str x14, [x4, #(8 * 8)]
+ str x13, [x4, #(7 * 8)]
+ str x12, [x4, #(6 * 8)]
+ str x11, [x4, #(5 * 8)]
+ str x10, [x4, #(4 * 8)]
+ str x9, [x4, #(3 * 8)]
+ str x8, [x4, #(2 * 8)]
+ str x7, [x4, #(1 * 8)]
+ str x6, [x4, #(0 * 8)]
.endm
.macro restore_sysregs
@@ -467,195 +320,52 @@
msr mdscr_el1, x25
.endm
-.macro restore_debug
- // x2: base address for cpu context
- // x3: tmp register
+.macro restore_debug type
+ // x4: pointer to register set
+ // x5: number of registers to skip
+ // x6..x22 trashed
- mrs x26, id_aa64dfr0_el1
- ubfx x24, x26, #12, #4 // Extract BRPs
- ubfx x25, x26, #20, #4 // Extract WRPs
- mov w26, #15
- sub w24, w26, w24 // How many BPs to skip
- sub w25, w26, w25 // How many WPs to skip
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
+ adr x22, 1f
+ add x22, x22, x5, lsl #2
+ br x22
1:
- ldr x20, [x3, #(15 * 8)]
- ldr x19, [x3, #(14 * 8)]
- ldr x18, [x3, #(13 * 8)]
- ldr x17, [x3, #(12 * 8)]
- ldr x16, [x3, #(11 * 8)]
- ldr x15, [x3, #(10 * 8)]
- ldr x14, [x3, #(9 * 8)]
- ldr x13, [x3, #(8 * 8)]
- ldr x12, [x3, #(7 * 8)]
- ldr x11, [x3, #(6 * 8)]
- ldr x10, [x3, #(5 * 8)]
- ldr x9, [x3, #(4 * 8)]
- ldr x8, [x3, #(3 * 8)]
- ldr x7, [x3, #(2 * 8)]
- ldr x6, [x3, #(1 * 8)]
- ldr x5, [x3, #(0 * 8)]
+ ldr x21, [x4, #(15 * 8)]
+ ldr x20, [x4, #(14 * 8)]
+ ldr x19, [x4, #(13 * 8)]
+ ldr x18, [x4, #(12 * 8)]
+ ldr x17, [x4, #(11 * 8)]
+ ldr x16, [x4, #(10 * 8)]
+ ldr x15, [x4, #(9 * 8)]
+ ldr x14, [x4, #(8 * 8)]
+ ldr x13, [x4, #(7 * 8)]
+ ldr x12, [x4, #(6 * 8)]
+ ldr x11, [x4, #(5 * 8)]
+ ldr x10, [x4, #(4 * 8)]
+ ldr x9, [x4, #(3 * 8)]
+ ldr x8, [x4, #(2 * 8)]
+ ldr x7, [x4, #(1 * 8)]
+ ldr x6, [x4, #(0 * 8)]
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
+ adr x22, 1f
+ add x22, x22, x5, lsl #2
+ br x22
1:
- msr dbgbcr15_el1, x20
- msr dbgbcr14_el1, x19
- msr dbgbcr13_el1, x18
- msr dbgbcr12_el1, x17
- msr dbgbcr11_el1, x16
- msr dbgbcr10_el1, x15
- msr dbgbcr9_el1, x14
- msr dbgbcr8_el1, x13
- msr dbgbcr7_el1, x12
- msr dbgbcr6_el1, x11
- msr dbgbcr5_el1, x10
- msr dbgbcr4_el1, x9
- msr dbgbcr3_el1, x8
- msr dbgbcr2_el1, x7
- msr dbgbcr1_el1, x6
- msr dbgbcr0_el1, x5
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-1:
- ldr x20, [x3, #(15 * 8)]
- ldr x19, [x3, #(14 * 8)]
- ldr x18, [x3, #(13 * 8)]
- ldr x17, [x3, #(12 * 8)]
- ldr x16, [x3, #(11 * 8)]
- ldr x15, [x3, #(10 * 8)]
- ldr x14, [x3, #(9 * 8)]
- ldr x13, [x3, #(8 * 8)]
- ldr x12, [x3, #(7 * 8)]
- ldr x11, [x3, #(6 * 8)]
- ldr x10, [x3, #(5 * 8)]
- ldr x9, [x3, #(4 * 8)]
- ldr x8, [x3, #(3 * 8)]
- ldr x7, [x3, #(2 * 8)]
- ldr x6, [x3, #(1 * 8)]
- ldr x5, [x3, #(0 * 8)]
-
- adr x26, 1f
- add x26, x26, x24, lsl #2
- br x26
-1:
- msr dbgbvr15_el1, x20
- msr dbgbvr14_el1, x19
- msr dbgbvr13_el1, x18
- msr dbgbvr12_el1, x17
- msr dbgbvr11_el1, x16
- msr dbgbvr10_el1, x15
- msr dbgbvr9_el1, x14
- msr dbgbvr8_el1, x13
- msr dbgbvr7_el1, x12
- msr dbgbvr6_el1, x11
- msr dbgbvr5_el1, x10
- msr dbgbvr4_el1, x9
- msr dbgbvr3_el1, x8
- msr dbgbvr2_el1, x7
- msr dbgbvr1_el1, x6
- msr dbgbvr0_el1, x5
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- ldr x20, [x3, #(15 * 8)]
- ldr x19, [x3, #(14 * 8)]
- ldr x18, [x3, #(13 * 8)]
- ldr x17, [x3, #(12 * 8)]
- ldr x16, [x3, #(11 * 8)]
- ldr x15, [x3, #(10 * 8)]
- ldr x14, [x3, #(9 * 8)]
- ldr x13, [x3, #(8 * 8)]
- ldr x12, [x3, #(7 * 8)]
- ldr x11, [x3, #(6 * 8)]
- ldr x10, [x3, #(5 * 8)]
- ldr x9, [x3, #(4 * 8)]
- ldr x8, [x3, #(3 * 8)]
- ldr x7, [x3, #(2 * 8)]
- ldr x6, [x3, #(1 * 8)]
- ldr x5, [x3, #(0 * 8)]
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- msr dbgwcr15_el1, x20
- msr dbgwcr14_el1, x19
- msr dbgwcr13_el1, x18
- msr dbgwcr12_el1, x17
- msr dbgwcr11_el1, x16
- msr dbgwcr10_el1, x15
- msr dbgwcr9_el1, x14
- msr dbgwcr8_el1, x13
- msr dbgwcr7_el1, x12
- msr dbgwcr6_el1, x11
- msr dbgwcr5_el1, x10
- msr dbgwcr4_el1, x9
- msr dbgwcr3_el1, x8
- msr dbgwcr2_el1, x7
- msr dbgwcr1_el1, x6
- msr dbgwcr0_el1, x5
-
- add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- ldr x20, [x3, #(15 * 8)]
- ldr x19, [x3, #(14 * 8)]
- ldr x18, [x3, #(13 * 8)]
- ldr x17, [x3, #(12 * 8)]
- ldr x16, [x3, #(11 * 8)]
- ldr x15, [x3, #(10 * 8)]
- ldr x14, [x3, #(9 * 8)]
- ldr x13, [x3, #(8 * 8)]
- ldr x12, [x3, #(7 * 8)]
- ldr x11, [x3, #(6 * 8)]
- ldr x10, [x3, #(5 * 8)]
- ldr x9, [x3, #(4 * 8)]
- ldr x8, [x3, #(3 * 8)]
- ldr x7, [x3, #(2 * 8)]
- ldr x6, [x3, #(1 * 8)]
- ldr x5, [x3, #(0 * 8)]
-
- adr x26, 1f
- add x26, x26, x25, lsl #2
- br x26
-1:
- msr dbgwvr15_el1, x20
- msr dbgwvr14_el1, x19
- msr dbgwvr13_el1, x18
- msr dbgwvr12_el1, x17
- msr dbgwvr11_el1, x16
- msr dbgwvr10_el1, x15
- msr dbgwvr9_el1, x14
- msr dbgwvr8_el1, x13
- msr dbgwvr7_el1, x12
- msr dbgwvr6_el1, x11
- msr dbgwvr5_el1, x10
- msr dbgwvr4_el1, x9
- msr dbgwvr3_el1, x8
- msr dbgwvr2_el1, x7
- msr dbgwvr1_el1, x6
- msr dbgwvr0_el1, x5
-
- ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
- msr mdccint_el1, x21
+ msr \type\()15_el1, x21
+ msr \type\()14_el1, x20
+ msr \type\()13_el1, x19
+ msr \type\()12_el1, x18
+ msr \type\()11_el1, x17
+ msr \type\()10_el1, x16
+ msr \type\()9_el1, x15
+ msr \type\()8_el1, x14
+ msr \type\()7_el1, x13
+ msr \type\()6_el1, x12
+ msr \type\()5_el1, x11
+ msr \type\()4_el1, x10
+ msr \type\()3_el1, x9
+ msr \type\()2_el1, x8
+ msr \type\()1_el1, x7
+ msr \type\()0_el1, x6
.endm
.macro skip_32bit_state tmp, target
@@ -675,6 +385,14 @@
tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
.endm
+/*
+ * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
+ */
+.macro skip_fpsimd_state tmp, target
+ mrs \tmp, cptr_el2
+ tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target
+.endm
+
.macro compute_debug_state target
// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
// is set, we do a full save/restore cycle and disable trapping.
@@ -713,10 +431,12 @@
add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
mrs x4, dacr32_el2
mrs x5, ifsr32_el2
- mrs x6, fpexc32_el2
stp x4, x5, [x3]
- str x6, [x3, #16]
+ skip_fpsimd_state x8, 3f
+ mrs x6, fpexc32_el2
+ str x6, [x3, #16]
+3:
skip_debug_state x8, 2f
mrs x7, dbgvcr32_el2
str x7, [x3, #24]
@@ -743,10 +463,8 @@
add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
ldp x4, x5, [x3]
- ldr x6, [x3, #16]
msr dacr32_el2, x4
msr ifsr32_el2, x5
- msr fpexc32_el2, x6
skip_debug_state x8, 2f
ldr x7, [x3, #24]
@@ -763,31 +481,35 @@
.macro activate_traps
ldr x2, [x0, #VCPU_HCR_EL2]
+
+ /*
+ * We are about to set CPTR_EL2.TFP to trap all floating point
+ * register accesses to EL2, however, the ARM ARM clearly states that
+ * traps are only taken to EL2 if the operation would not otherwise
+ * trap to EL1. Therefore, always make sure that for 32-bit guests,
+ * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+ */
+ tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
+ mov x3, #(1 << 30)
+ msr fpexc32_el2, x3
+ isb
+99:
msr hcr_el2, x2
mov x2, #CPTR_EL2_TTA
+ orr x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
mov x2, #(1 << 15) // Trap CP15 Cr=15
msr hstr_el2, x2
- mrs x2, mdcr_el2
- and x2, x2, #MDCR_EL2_HPMN_MASK
- orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
- orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
-
- // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
- // if not dirty.
- ldr x3, [x0, #VCPU_DEBUG_FLAGS]
- tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
- orr x2, x2, #MDCR_EL2_TDA
-1:
+ // Monitor Debug Config - see kvm_arm_setup_debug()
+ ldr x2, [x0, #VCPU_MDCR_EL2]
msr mdcr_el2, x2
.endm
.macro deactivate_traps
mov x2, #HCR_RW
msr hcr_el2, x2
- msr cptr_el2, xzr
msr hstr_el2, xzr
mrs x2, mdcr_el2
@@ -900,21 +622,101 @@
restore_sysregs
ret
+/* Save debug state */
__save_debug:
- save_debug
+ // x2: ptr to CPU context
+ // x3: ptr to debug reg struct
+ // x4/x5/x6-22/x24-26: trashed
+
+ mrs x26, id_aa64dfr0_el1
+ ubfx x24, x26, #12, #4 // Extract BRPs
+ ubfx x25, x26, #20, #4 // Extract WRPs
+ mov w26, #15
+ sub w24, w26, w24 // How many BPs to skip
+ sub w25, w26, w25 // How many WPs to skip
+
+ mov x5, x24
+ add x4, x3, #DEBUG_BCR
+ save_debug dbgbcr
+ add x4, x3, #DEBUG_BVR
+ save_debug dbgbvr
+
+ mov x5, x25
+ add x4, x3, #DEBUG_WCR
+ save_debug dbgwcr
+ add x4, x3, #DEBUG_WVR
+ save_debug dbgwvr
+
+ mrs x21, mdccint_el1
+ str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
ret
+/* Restore debug state */
__restore_debug:
- restore_debug
+ // x2: ptr to CPU context
+ // x3: ptr to debug reg struct
+ // x4/x5/x6-22/x24-26: trashed
+
+ mrs x26, id_aa64dfr0_el1
+ ubfx x24, x26, #12, #4 // Extract BRPs
+ ubfx x25, x26, #20, #4 // Extract WRPs
+ mov w26, #15
+ sub w24, w26, w24 // How many BPs to skip
+ sub w25, w26, w25 // How many WPs to skip
+
+ mov x5, x24
+ add x4, x3, #DEBUG_BCR
+ restore_debug dbgbcr
+ add x4, x3, #DEBUG_BVR
+ restore_debug dbgbvr
+
+ mov x5, x25
+ add x4, x3, #DEBUG_WCR
+ restore_debug dbgwcr
+ add x4, x3, #DEBUG_WVR
+ restore_debug dbgwvr
+
+ ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+ msr mdccint_el1, x21
+
ret
__save_fpsimd:
+ skip_fpsimd_state x3, 1f
save_fpsimd
- ret
+1: ret
__restore_fpsimd:
+ skip_fpsimd_state x3, 1f
restore_fpsimd
- ret
+1: ret
+
+switch_to_guest_fpsimd:
+ push x4, lr
+
+ mrs x2, cptr_el2
+ bic x2, x2, #CPTR_EL2_TFP
+ msr cptr_el2, x2
+ isb
+
+ mrs x0, tpidr_el2
+
+ ldr x2, [x0, #VCPU_HOST_CONTEXT]
+ kern_hyp_va x2
+ bl __save_fpsimd
+
+ add x2, x0, #VCPU_CONTEXT
+ bl __restore_fpsimd
+
+ skip_32bit_state x3, 1f
+ ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
+ msr fpexc32_el2, x4
+1:
+ pop x4, lr
+ pop x2, x3
+ pop x0, x1
+
+ eret
/*
* u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
@@ -936,10 +738,10 @@
kern_hyp_va x2
save_host_regs
- bl __save_fpsimd
bl __save_sysregs
compute_debug_state 1f
+ add x3, x0, #VCPU_HOST_DEBUG_STATE
bl __save_debug
1:
activate_traps
@@ -952,9 +754,10 @@
add x2, x0, #VCPU_CONTEXT
bl __restore_sysregs
- bl __restore_fpsimd
skip_debug_state x3, 1f
+ ldr x3, [x0, #VCPU_DEBUG_PTR]
+ kern_hyp_va x3
bl __restore_debug
1:
restore_guest_32bit_state
@@ -975,6 +778,8 @@
bl __save_sysregs
skip_debug_state x3, 1f
+ ldr x3, [x0, #VCPU_DEBUG_PTR]
+ kern_hyp_va x3
bl __save_debug
1:
save_guest_32bit_state
@@ -991,12 +796,15 @@
bl __restore_sysregs
bl __restore_fpsimd
+ /* Clear FPSIMD and Trace trapping */
+ msr cptr_el2, xzr
skip_debug_state x3, 1f
// Clear the dirty flag for the next run, as all the state has
// already been saved. Note that we nuke the whole 64bit word.
// If we ever add more flags, we'll have to be more careful...
str xzr, [x0, #VCPU_DEBUG_FLAGS]
+ add x3, x0, #VCPU_HOST_DEBUG_STATE
bl __restore_debug
1:
restore_host_regs
@@ -1199,6 +1007,11 @@
* x1: ESR
* x2: ESR_EC
*/
+
+ /* Guest accessed VFP/SIMD registers, save host, restore Guest */
+ cmp x2, #ESR_ELx_EC_FP_ASIMD
+ b.eq switch_to_guest_fpsimd
+
cmp x2, #ESR_ELx_EC_DABT_LOW
mov x0, #ESR_ELx_EC_IABT_LOW
ccmp x2, x0, #4, ne
@@ -1293,4 +1106,10 @@
ventry el1_error_invalid // Error 32-bit EL1
ENDPROC(__kvm_hyp_vector)
+
+ENTRY(__kvm_get_mdcr_el2)
+ mrs x0, mdcr_el2
+ ret
+ENDPROC(__kvm_get_mdcr_el2)
+
.popsection
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 0b43265..91cf535 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -22,6 +22,7 @@
#include <linux/errno.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
+#include <linux/hw_breakpoint.h>
#include <kvm/arm_arch_timer.h>
@@ -56,6 +57,12 @@
return !!(pfr0 & 0x20);
}
+/**
+ * kvm_arch_dev_ioctl_check_extension
+ *
+ * We currently assume that the number of HW registers is uniform
+ * across all CPUs (see cpuinfo_sanity_check).
+ */
int kvm_arch_dev_ioctl_check_extension(long ext)
{
int r;
@@ -64,6 +71,15 @@
case KVM_CAP_ARM_EL1_32BIT:
r = cpu_has_32bit_el1();
break;
+ case KVM_CAP_GUEST_DEBUG_HW_BPS:
+ r = get_num_brps();
+ break;
+ case KVM_CAP_GUEST_DEBUG_HW_WPS:
+ r = get_num_wrps();
+ break;
+ case KVM_CAP_SET_GUEST_DEBUG:
+ r = 1;
+ break;
default:
r = 0;
}
@@ -105,7 +121,5 @@
kvm_reset_sys_regs(vcpu);
/* Reset timer */
- kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
-
- return 0;
+ return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c370b40..b41607d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -38,6 +38,8 @@
#include "sys_regs.h"
+#include "trace.h"
+
/*
* All of this file is extremly similar to the ARM coproc.c, but the
* types are different. My gut feeling is that it should be pretty
@@ -208,9 +210,217 @@
*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
}
+ trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt));
+
return true;
}
+/*
+ * reg_to_dbg/dbg_to_reg
+ *
+ * A 32 bit write to a debug register leave top bits alone
+ * A 32 bit read from a debug register only returns the bottom bits
+ *
+ * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
+ * hyp.S code switches between host and guest values in future.
+ */
+static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ u64 *dbg_reg)
+{
+ u64 val = *vcpu_reg(vcpu, p->Rt);
+
+ if (p->is_32bit) {
+ val &= 0xffffffffUL;
+ val |= ((*dbg_reg >> 32) << 32);
+ }
+
+ *dbg_reg = val;
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+}
+
+static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ u64 *dbg_reg)
+{
+ u64 val = *dbg_reg;
+
+ if (p->is_32bit)
+ val &= 0xffffffffUL;
+
+ *vcpu_reg(vcpu, p->Rt) = val;
+}
+
+static inline bool trap_bvr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+ trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+
+ return true;
+}
+
+static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+
+ if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static inline void reset_bvr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
+}
+
+static inline bool trap_bcr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+ trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+
+ return true;
+}
+
+static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
+
+ if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+
+ return 0;
+}
+
+static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static inline void reset_bcr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
+}
+
+static inline bool trap_wvr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+ trace_trap_reg(__func__, rd->reg, p->is_write,
+ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
+
+ return true;
+}
+
+static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
+
+ if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static inline void reset_wvr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
+}
+
+static inline bool trap_wcr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
+
+ if (p->is_write)
+ reg_to_dbg(vcpu, p, dbg_reg);
+ else
+ dbg_to_reg(vcpu, p, dbg_reg);
+
+ trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+
+ return true;
+}
+
+static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
+
+ if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
+
+ if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static inline void reset_wcr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
+}
+
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 amair;
@@ -240,16 +450,16 @@
#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
/* DBGBVRn_EL1 */ \
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \
- trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \
+ trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \
/* DBGBCRn_EL1 */ \
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \
- trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \
+ trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \
/* DBGWVRn_EL1 */ \
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \
- trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \
+ trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \
/* DBGWCRn_EL1 */ \
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
- trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
+ trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr }
/*
* Architected system registers.
@@ -516,28 +726,57 @@
return true;
}
-#define DBG_BCR_BVR_WCR_WVR(n) \
- /* DBGBVRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \
- NULL, (cp14_DBGBVR0 + (n) * 2) }, \
- /* DBGBCRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \
- NULL, (cp14_DBGBCR0 + (n) * 2) }, \
- /* DBGWVRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \
- NULL, (cp14_DBGWVR0 + (n) * 2) }, \
- /* DBGWCRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \
- NULL, (cp14_DBGWCR0 + (n) * 2) }
+/* AArch32 debug register mappings
+ *
+ * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0]
+ * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32]
+ *
+ * All control registers and watchpoint value registers are mapped to
+ * the lower 32 bits of their AArch64 equivalents. We share the trap
+ * handlers with the above AArch64 code which checks what mode the
+ * system is in.
+ */
-#define DBGBXVR(n) \
- { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \
- NULL, cp14_DBGBXVR0 + n * 2 }
+static inline bool trap_xvr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+
+ if (p->is_write) {
+ u64 val = *dbg_reg;
+
+ val &= 0xffffffffUL;
+ val |= *vcpu_reg(vcpu, p->Rt) << 32;
+ *dbg_reg = val;
+
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ } else {
+ *vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32;
+ }
+
+ trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+
+ return true;
+}
+
+#define DBG_BCR_BVR_WCR_WVR(n) \
+ /* DBGBVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \
+ /* DBGBCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \
+ /* DBGWVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \
+ /* DBGWCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n }
+
+#define DBGBXVR(n) \
+ { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n }
/*
* Trapped cp14 registers. We generally ignore most of the external
* debug, on the principle that they don't really make sense to a
- * guest. Revisit this one day, whould this principle change.
+ * guest. Revisit this one day, would this principle change.
*/
static const struct sys_reg_desc cp14_regs[] = {
/* DBGIDR */
@@ -999,6 +1238,8 @@
struct sys_reg_params params;
unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+ trace_kvm_handle_sys_reg(esr);
+
params.is_aarch32 = false;
params.is_32bit = false;
params.Op0 = (esr >> 20) & 3;
@@ -1303,6 +1544,9 @@
if (!r)
return get_invariant_sys_reg(reg->id, uaddr);
+ if (r->get_user)
+ return (r->get_user)(vcpu, r, reg, uaddr);
+
return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
}
@@ -1321,6 +1565,9 @@
if (!r)
return set_invariant_sys_reg(reg->id, uaddr);
+ if (r->set_user)
+ return (r->set_user)(vcpu, r, reg, uaddr);
+
return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index d411e25..eaa324e 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -55,6 +55,12 @@
/* Value (usually reset value) */
u64 val;
+
+ /* Custom get/set_user functions, fallback to generic if NULL */
+ int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr);
+ int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr);
};
static inline void print_sys_reg_instr(const struct sys_reg_params *p)
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 475fd29..1e45768 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -94,6 +94,8 @@
&genericv8_target_table);
kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
&genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8,
+ &genericv8_target_table);
return 0;
}
diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h
index 157416e9..7fb0008 100644
--- a/arch/arm64/kvm/trace.h
+++ b/arch/arm64/kvm/trace.h
@@ -44,6 +44,129 @@
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
+TRACE_EVENT(kvm_arm_setup_debug,
+ TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
+ TP_ARGS(vcpu, guest_debug),
+
+ TP_STRUCT__entry(
+ __field(struct kvm_vcpu *, vcpu)
+ __field(__u32, guest_debug)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->guest_debug = guest_debug;
+ ),
+
+ TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
+);
+
+TRACE_EVENT(kvm_arm_clear_debug,
+ TP_PROTO(__u32 guest_debug),
+ TP_ARGS(guest_debug),
+
+ TP_STRUCT__entry(
+ __field(__u32, guest_debug)
+ ),
+
+ TP_fast_assign(
+ __entry->guest_debug = guest_debug;
+ ),
+
+ TP_printk("flags: 0x%08x", __entry->guest_debug)
+);
+
+TRACE_EVENT(kvm_arm_set_dreg32,
+ TP_PROTO(const char *name, __u32 value),
+ TP_ARGS(name, value),
+
+ TP_STRUCT__entry(
+ __field(const char *, name)
+ __field(__u32, value)
+ ),
+
+ TP_fast_assign(
+ __entry->name = name;
+ __entry->value = value;
+ ),
+
+ TP_printk("%s: 0x%08x", __entry->name, __entry->value)
+);
+
+TRACE_EVENT(kvm_arm_set_regset,
+ TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
+ TP_ARGS(type, len, control, value),
+ TP_STRUCT__entry(
+ __field(const char *, name)
+ __field(int, len)
+ __array(u64, ctrls, 16)
+ __array(u64, values, 16)
+ ),
+ TP_fast_assign(
+ __entry->name = type;
+ __entry->len = len;
+ memcpy(__entry->ctrls, control, len << 3);
+ memcpy(__entry->values, value, len << 3);
+ ),
+ TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
+ __print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
+ __print_array(__entry->values, __entry->len, sizeof(__u64)))
+);
+
+TRACE_EVENT(trap_reg,
+ TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
+ TP_ARGS(fn, reg, is_write, write_value),
+
+ TP_STRUCT__entry(
+ __field(const char *, fn)
+ __field(int, reg)
+ __field(bool, is_write)
+ __field(u64, write_value)
+ ),
+
+ TP_fast_assign(
+ __entry->fn = fn;
+ __entry->reg = reg;
+ __entry->is_write = is_write;
+ __entry->write_value = write_value;
+ ),
+
+ TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
+);
+
+TRACE_EVENT(kvm_handle_sys_reg,
+ TP_PROTO(unsigned long hsr),
+ TP_ARGS(hsr),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, hsr)
+ ),
+
+ TP_fast_assign(
+ __entry->hsr = hsr;
+ ),
+
+ TP_printk("HSR 0x%08lx", __entry->hsr)
+);
+
+TRACE_EVENT(kvm_set_guest_debug,
+ TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
+ TP_ARGS(vcpu, guest_debug),
+
+ TP_STRUCT__entry(
+ __field(struct kvm_vcpu *, vcpu)
+ __field(__u32, guest_debug)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->guest_debug = guest_debug;
+ ),
+
+ TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
+);
+
+
#endif /* _TRACE_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 0bcc4bc..99224dc 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -100,7 +100,7 @@
if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask <= DMA_BIT_MASK(32))
flags |= GFP_DMA;
- if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) {
+ if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) {
struct page *page;
void *addr;
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 0314e32..8da5653 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -36,6 +36,17 @@
int
default 6
+config TRACE_IRQFLAGS_SUPPORT
+ depends on ETRAX_ARCH_V32
+ def_bool y
+
+config STACKTRACE_SUPPORT
+ def_bool y
+
+config LOCKDEP_SUPPORT
+ depends on ETRAX_ARCH_V32
+ def_bool y
+
config CRIS
bool
default y
@@ -58,6 +69,7 @@
select CLKSRC_MMIO if ETRAX_ARCH_V32
select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32
select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32
+ select HAVE_DEBUG_BUGVERBOSE if ETRAX_ARCH_V32
config HZ
int
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 81570fc..b562252 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -955,6 +955,14 @@
.long sys_process_vm_writev
.long sys_kcmp /* 350 */
.long sys_finit_module
+ .long sys_sched_setattr
+ .long sys_sched_getattr
+ .long sys_renameat2
+ .long sys_seccomp /* 355 */
+ .long sys_getrandom
+ .long sys_memfd_create
+ .long sys_bpf
+ .long sys_execveat
/*
* NOTE!! This doesn't have to be exact - we just have
diff --git a/arch/cris/arch-v10/lib/dmacopy.c b/arch/cris/arch-v10/lib/dmacopy.c
deleted file mode 100644
index 49f5b8c..0000000
--- a/arch/cris/arch-v10/lib/dmacopy.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * memcpy for large blocks, using memory-memory DMA channels 6 and 7 in Etrax
- */
-
-#include <asm/svinto.h>
-#include <asm/io.h>
-
-#define D(x)
-
-void *dma_memcpy(void *pdst,
- const void *psrc,
- unsigned int pn)
-{
- static etrax_dma_descr indma, outdma;
-
- D(printk(KERN_DEBUG "dma_memcpy %d bytes... ", pn));
-
-#if 0
- *R_GEN_CONFIG = genconfig_shadow =
- (genconfig_shadow & ~0x3c0000) |
- IO_STATE(R_GEN_CONFIG, dma6, intdma7) |
- IO_STATE(R_GEN_CONFIG, dma7, intdma6);
-#endif
- indma.sw_len = outdma.sw_len = pn;
- indma.ctrl = d_eol | d_eop;
- outdma.ctrl = d_eol;
- indma.buf = psrc;
- outdma.buf = pdst;
-
- *R_DMA_CH6_FIRST = &indma;
- *R_DMA_CH7_FIRST = &outdma;
- *R_DMA_CH6_CMD = IO_STATE(R_DMA_CH6_CMD, cmd, start);
- *R_DMA_CH7_CMD = IO_STATE(R_DMA_CH7_CMD, cmd, start);
-
- while (*R_DMA_CH7_CMD == 1)
- /* wait for completion */;
-
- D(printk(KERN_DEBUG "done\n"));
-}
-
-
-
diff --git a/arch/cris/arch-v10/lib/old_checksum.c b/arch/cris/arch-v10/lib/old_checksum.c
deleted file mode 100644
index 8f79163..0000000
--- a/arch/cris/arch-v10/lib/old_checksum.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * IP/TCP/UDP checksumming routines
- *
- * Authors: Jorge Cwik, <jorge@laser.satlink.net>
- * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- * Tom May, <ftom@netcom.com>
- * Lots of code moved from tcp.c and ip.c; see those files
- * for more names.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <net/checksum.h>
-#include <net/module.h>
-
-#undef PROFILE_CHECKSUM
-
-#ifdef PROFILE_CHECKSUM
-/* these are just for profiling the checksum code with an oscillioscope.. uh */
-#if 0
-#define BITOFF *((unsigned char *)0xb0000030) = 0xff
-#define BITON *((unsigned char *)0xb0000030) = 0x0
-#endif
-#include <asm/io.h>
-#define CBITON LED_ACTIVE_SET(1)
-#define CBITOFF LED_ACTIVE_SET(0)
-#define BITOFF
-#define BITON
-#else
-#define BITOFF
-#define BITON
-#define CBITOFF
-#define CBITON
-#endif
-
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-
-#include <asm/delay.h>
-
-__wsum csum_partial(const void *p, int len, __wsum __sum)
-{
- u32 sum = (__force u32)__sum;
- const u16 *buff = p;
- /*
- * Experiments with ethernet and slip connections show that buff
- * is aligned on either a 2-byte or 4-byte boundary.
- */
- const void *endMarker = p + len;
- const void *marker = endMarker - (len % 16);
-#if 0
- if((int)buff & 0x3)
- printk("unaligned buff %p\n", buff);
- __delay(900); /* extra delay of 90 us to test performance hit */
-#endif
- BITON;
- while (buff < marker) {
- sum += *buff++;
- sum += *buff++;
- sum += *buff++;
- sum += *buff++;
- sum += *buff++;
- sum += *buff++;
- sum += *buff++;
- sum += *buff++;
- }
- marker = endMarker - (len % 2);
- while (buff < marker)
- sum += *buff++;
-
- if (endMarker > buff)
- sum += *(const u8 *)buff; /* add extra byte separately */
-
- BITOFF;
- return (__force __wsum)sum;
-}
-
-EXPORT_SYMBOL(csum_partial);
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 4fc16b4..e6c523c 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -202,7 +202,7 @@
default "0x00" if ETRAXFS
default "0x00000000" if !ETRAXFS
help
- This is a bitmask (8 bits) with information of what bits in PA that a
+ This is a bitmask with information of what bits in PA that a
user can change direction on using ioctl's.
Bit set = changeable.
You probably want 0 here, but it depends on your hardware.
@@ -213,7 +213,7 @@
default "0x00" if ETRAXFS
default "0x00000000" if !ETRAXFS
help
- This is a bitmask (8 bits) with information of what bits in PA
+ This is a bitmask with information of what bits in PA
that a user can change the value on using ioctl's.
Bit set = changeable.
@@ -223,7 +223,7 @@
default "0x00000" if ETRAXFS
default "0x00000000" if !ETRAXFS
help
- This is a bitmask (18 bits) with information of what bits in PB
+ This is a bitmask with information of what bits in PB
that a user can change direction on using ioctl's.
Bit set = changeable.
You probably want 0 here, but it depends on your hardware.
@@ -234,7 +234,7 @@
default "0x00000" if ETRAXFS
default "0x00000000" if !ETRAXFS
help
- This is a bitmask (18 bits) with information of what bits in PB
+ This is a bitmask with information of what bits in PB
that a user can change the value on using ioctl's.
Bit set = changeable.
@@ -244,7 +244,7 @@
default "0x00000" if ETRAXFS
default "0x00000000" if !ETRAXFS
help
- This is a bitmask (18 bits) with information of what bits in PC
+ This is a bitmask with information of what bits in PC
that a user can change direction on using ioctl's.
Bit set = changeable.
You probably want 0 here, but it depends on your hardware.
@@ -253,9 +253,9 @@
hex "PC user changeable bits mask"
depends on ETRAX_GPIO
default "0x00000" if ETRAXFS
- default "0x00000000" if ETRAXFS
+ default "0x00000000" if !ETRAXFS
help
- This is a bitmask (18 bits) with information of what bits in PC
+ This is a bitmask with information of what bits in PC
that a user can change the value on using ioctl's.
Bit set = changeable.
@@ -264,7 +264,7 @@
depends on ETRAX_GPIO && ETRAXFS
default "0x00000"
help
- This is a bitmask (18 bits) with information of what bits in PD
+ This is a bitmask with information of what bits in PD
that a user can change direction on using ioctl's.
Bit set = changeable.
You probably want 0x00000 here, but it depends on your hardware.
diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index 28dd771..5387424 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -313,6 +313,7 @@
size_t len;
int ram_rootfs_partition = -1; /* -1 => no RAM rootfs partition */
int part;
+ struct mtd_partition *partition;
/* We need a root fs. If it resides in RAM, we need to use an
* MTDRAM device, so it must be enabled in the kernel config,
@@ -329,7 +330,7 @@
main_mtd = flash_probe();
if (main_mtd)
- printk(KERN_INFO "%s: 0x%08x bytes of NOR flash memory.\n",
+ printk(KERN_INFO "%s: 0x%08llx bytes of NOR flash memory.\n",
main_mtd->name, main_mtd->size);
#ifdef CONFIG_ETRAX_NANDFLASH
@@ -388,10 +389,10 @@
#endif
if (main_mtd) {
+ loff_t ptable_sector = CONFIG_ETRAX_PTABLE_SECTOR;
main_mtd->owner = THIS_MODULE;
axisflash_mtd = main_mtd;
- loff_t ptable_sector = CONFIG_ETRAX_PTABLE_SECTOR;
/* First partition (rescue) is always set to the default. */
pidx++;
@@ -517,7 +518,7 @@
/* Decide whether to use default partition table. */
/* Only use default table if we actually have a device (main_mtd) */
- struct mtd_partition *partition = &axis_partitions[0];
+ partition = &axis_partitions[0];
if (main_mtd && !ptable_ok) {
memcpy(axis_partitions, axis_default_partitions,
sizeof(axis_default_partitions));
@@ -580,7 +581,7 @@
printk(KERN_INFO "axisflashmap: Adding RAM partition "
"for rootfs image.\n");
err = mtdram_init_device(mtd_ram,
- (void *)partition[part].offset,
+ (void *)(u_int32_t)partition[part].offset,
partition[part].size,
partition[part].name);
if (err)
diff --git a/arch/cris/arch-v32/drivers/mach-a3/gpio.c b/arch/cris/arch-v32/drivers/mach-a3/gpio.c
index 74f9fe8..c92e1da 100644
--- a/arch/cris/arch-v32/drivers/mach-a3/gpio.c
+++ b/arch/cris/arch-v32/drivers/mach-a3/gpio.c
@@ -957,7 +957,7 @@
static int __init gpio_init(void)
{
- int res;
+ int res, res2;
printk(KERN_INFO "ETRAX FS GPIO driver v2.7, (c) 2003-2008 "
"Axis Communications AB\n");
@@ -977,7 +977,7 @@
CRIS_LED_DISK_READ(0);
CRIS_LED_DISK_WRITE(0);
- int res2 = request_irq(GIO_INTR_VECT, gpio_interrupt,
+ res2 = request_irq(GIO_INTR_VECT, gpio_interrupt,
IRQF_SHARED, "gpio", &alarmlist);
if (res2) {
printk(KERN_ERR "err: irq for gpio\n");
diff --git a/arch/cris/arch-v32/drivers/mach-fs/gpio.c b/arch/cris/arch-v32/drivers/mach-fs/gpio.c
index 009f4ee..72968fb 100644
--- a/arch/cris/arch-v32/drivers/mach-fs/gpio.c
+++ b/arch/cris/arch-v32/drivers/mach-fs/gpio.c
@@ -425,12 +425,11 @@
if (p > GPIO_MINOR_LAST)
return -EINVAL;
- priv = kmalloc(sizeof(struct gpio_private), GFP_KERNEL);
+ priv = kzalloc(sizeof(struct gpio_private), GFP_KERNEL);
if (!priv)
return -ENOMEM;
mutex_lock(&gpio_mutex);
- memset(priv, 0, sizeof(*priv));
priv->minor = p;
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 026a0b2..b17a209 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -240,6 +240,17 @@
.type _Rexit,@function
_Rexit:
+#if defined(CONFIG_TRACE_IRQFLAGS)
+ addoq +PT_ccs, $sp, $acr
+ move.d [$acr], $r0
+ btstq 15, $r0 ; I1
+ bpl 1f
+ nop
+ jsr trace_hardirqs_on
+ nop
+1:
+#endif
+
;; This epilogue MUST match the prologues in multiple_interrupt, irq.h
;; and ptregs.h.
addq 4, $sp ; Skip orig_r10.
@@ -875,6 +886,14 @@
.long sys_process_vm_writev
.long sys_kcmp /* 350 */
.long sys_finit_module
+ .long sys_sched_setattr
+ .long sys_sched_getattr
+ .long sys_renameat2
+ .long sys_seccomp /* 355 */
+ .long sys_getrandom
+ .long sys_memfd_create
+ .long sys_bpf
+ .long sys_execveat
/*
* NOTE!! This doesn't have to be exact - we just have
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index cebd32e..c7ce784 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -23,9 +23,9 @@
/* We use this if we don't have any better idle routine. */
void default_idle(void)
{
+ local_irq_enable();
/* Halt until exception. */
- __asm__ volatile("ei \n\t"
- "halt ");
+ __asm__ volatile("halt");
}
/*
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index 3a36ae6..150d1d7 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -19,7 +19,6 @@
#include <asm/processor.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
-#include <arch/ptrace.h>
#include <arch/hwregs/cpu_vect.h>
extern unsigned long cris_signal_return_page;
diff --git a/arch/cris/arch-v32/mach-fs/pinmux.c b/arch/cris/arch-v32/mach-fs/pinmux.c
index 05a0470..d8a3a3c 100644
--- a/arch/cris/arch-v32/mach-fs/pinmux.c
+++ b/arch/cris/arch-v32/mach-fs/pinmux.c
@@ -46,6 +46,8 @@
pins[port][i] = mode;
crisv32_pinmux_set(port);
+
+ return 0;
}
static int crisv32_pinmux_init(void)
@@ -93,6 +95,7 @@
int ret = -EINVAL;
char saved[sizeof pins];
unsigned long flags;
+ reg_pinmux_rw_hwprot hwprot;
spin_lock_irqsave(&pinmux_lock, flags);
@@ -101,7 +104,7 @@
crisv32_pinmux_init(); /* Must be done before we read rw_hwprot */
- reg_pinmux_rw_hwprot hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot);
+ hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot);
switch (function) {
case pinmux_ser1:
@@ -227,6 +230,7 @@
int ret = -EINVAL;
char saved[sizeof pins];
unsigned long flags;
+ reg_pinmux_rw_hwprot hwprot;
spin_lock_irqsave(&pinmux_lock, flags);
@@ -235,7 +239,7 @@
crisv32_pinmux_init(); /* Must be done before we read rw_hwprot */
- reg_pinmux_rw_hwprot hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot);
+ hwprot = REG_RD(pinmux, regi_pinmux, rw_hwprot);
switch (function) {
case pinmux_ser1:
diff --git a/arch/cris/configs/artpec_3_defconfig b/arch/cris/configs/artpec_3_defconfig
index 71854d4..70e497e 100644
--- a/arch/cris/configs/artpec_3_defconfig
+++ b/arch/cris/configs/artpec_3_defconfig
@@ -12,10 +12,6 @@
CONFIG_CRIS_MACH_ARTPEC3=y
CONFIG_ETRAX_DRAM_SIZE=32
CONFIG_ETRAX_FLASH1_SIZE=4
-CONFIG_ETRAX_DEF_GIO_PA_OE=1c
-CONFIG_ETRAX_DEF_GIO_PA_OUT=00
-CONFIG_ETRAX_DEF_GIO_PB_OE=00000
-CONFIG_ETRAX_DEF_GIO_PB_OUT=00000
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -42,3 +38,4 @@
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
+CONFIG_ETRAX_GPIO=y
diff --git a/arch/cris/configs/etraxfs_defconfig b/arch/cris/configs/etraxfs_defconfig
index 87c7227..9123268 100644
--- a/arch/cris/configs/etraxfs_defconfig
+++ b/arch/cris/configs/etraxfs_defconfig
@@ -38,3 +38,4 @@
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
+CONFIG_ETRAX_GPIO=y
diff --git a/arch/cris/include/arch-v32/arch/bug.h b/arch/cris/include/arch-v32/arch/bug.h
index 0f211e1..fb59faa 100644
--- a/arch/cris/include/arch-v32/arch/bug.h
+++ b/arch/cris/include/arch-v32/arch/bug.h
@@ -10,6 +10,7 @@
* All other stuff is done out-of-band with exception handlers.
*/
#define BUG() \
+do { \
__asm__ __volatile__ ("0: break 14\n\t" \
".section .fixup,\"ax\"\n" \
"1:\n\t" \
@@ -21,9 +22,15 @@
".section __ex_table,\"a\"\n\t" \
".dword 0b, 1b\n\t" \
".previous\n\t" \
- : : "ri" (__FILE__), "i" (__LINE__))
+ : : "ri" (__FILE__), "i" (__LINE__)); \
+ unreachable(); \
+} while (0)
#else
-#define BUG() __asm__ __volatile__ ("break 14\n\t")
+#define BUG() \
+do { \
+ __asm__ __volatile__ ("break 14\n\t"); \
+ unreachable(); \
+} while (0)
#endif
#define HAVE_ARCH_BUG
diff --git a/arch/cris/include/arch-v32/arch/irqflags.h b/arch/cris/include/arch-v32/arch/irqflags.h
index 041851f..5f6fddf 100644
--- a/arch/cris/include/arch-v32/arch/irqflags.h
+++ b/arch/cris/include/arch-v32/arch/irqflags.h
@@ -2,7 +2,7 @@
#define __ASM_CRIS_ARCH_IRQFLAGS_H
#include <linux/types.h>
-#include <arch/ptrace.h>
+#include <asm/ptrace.h>
static inline unsigned long arch_local_save_flags(void)
{
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index ad2244f..b7f6819 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -1,14 +1,20 @@
generic-y += atomic.h
+generic-y += auxvec.h
generic-y += barrier.h
+generic-y += bitsperlong.h
generic-y += clkdev.h
generic-y += cmpxchg.h
generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
+generic-y += errno.h
generic-y += exec.h
generic-y += emergency-restart.h
+generic-y += fcntl.h
generic-y += futex.h
generic-y += hardirq.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kdebug.h
@@ -19,11 +25,22 @@
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mman.h
generic-y += module.h
+generic-y += msgbuf.h
generic-y += percpu.h
+generic-y += poll.h
generic-y += preempt.h
+generic-y += resource.h
generic-y += sections.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
generic-y += topology.h
generic-y += trace_clock.h
+generic-y += types.h
generic-y += vga.h
generic-y += xor.h
diff --git a/arch/cris/include/asm/mmu_context.h b/arch/cris/include/asm/mmu_context.h
index 1d45fd6..349acfd 100644
--- a/arch/cris/include/asm/mmu_context.h
+++ b/arch/cris/include/asm/mmu_context.h
@@ -11,7 +11,14 @@
#define deactivate_mm(tsk,mm) do { } while (0)
-#define activate_mm(prev,next) switch_mm((prev),(next),NULL)
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ switch_mm(prev, next, NULL);
+ local_irq_restore(flags);
+}
/* current active pgd - this is similar to other processors pgd
* registers like cr3 on the i386
diff --git a/arch/cris/include/asm/stacktrace.h b/arch/cris/include/asm/stacktrace.h
new file mode 100644
index 0000000..2d90856
--- /dev/null
+++ b/arch/cris/include/asm/stacktrace.h
@@ -0,0 +1,8 @@
+#ifndef __CRIS_STACKTRACE_H
+#define __CRIS_STACKTRACE_H
+
+void walk_stackframe(unsigned long sp,
+ int (*fn)(unsigned long addr, void *data),
+ void *data);
+
+#endif
diff --git a/arch/cris/include/asm/types.h b/arch/cris/include/asm/types.h
deleted file mode 100644
index a3cac77..0000000
--- a/arch/cris/include/asm/types.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ETRAX_TYPES_H
-#define _ETRAX_TYPES_H
-
-#include <uapi/asm/types.h>
-
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-
-#define BITS_PER_LONG 32
-
-#endif
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index 0f40fed..9c23535 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -4,7 +4,7 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 360
+#define NR_syscalls 365
#include <arch/unistd.h>
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index 01f66b8..d5564a0 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -6,6 +6,9 @@
header-y += auxvec.h
header-y += bitsperlong.h
header-y += byteorder.h
+header-y += elf.h
+header-y += elf_v10.h
+header-y += elf_v32.h
header-y += errno.h
header-y += ethernet.h
header-y += etraxgpio.h
@@ -19,6 +22,8 @@
header-y += poll.h
header-y += posix_types.h
header-y += ptrace.h
+header-y += ptrace_v10.h
+header-y += ptrace_v32.h
header-y += resource.h
header-y += rs485.h
header-y += sembuf.h
diff --git a/arch/cris/include/uapi/asm/auxvec.h b/arch/cris/include/uapi/asm/auxvec.h
deleted file mode 100644
index cb30b01..0000000
--- a/arch/cris/include/uapi/asm/auxvec.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __ASMCRIS_AUXVEC_H
-#define __ASMCRIS_AUXVEC_H
-
-#endif
diff --git a/arch/cris/include/uapi/asm/bitsperlong.h b/arch/cris/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 6dc0bb0..0000000
--- a/arch/cris/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitsperlong.h>
diff --git a/arch/cris/include/asm/elf.h b/arch/cris/include/uapi/asm/elf.h
similarity index 95%
rename from arch/cris/include/asm/elf.h
rename to arch/cris/include/uapi/asm/elf.h
index c2a394f..a5df05b 100644
--- a/arch/cris/include/asm/elf.h
+++ b/arch/cris/include/uapi/asm/elf.h
@@ -5,7 +5,11 @@
* ELF register definitions..
*/
-#include <asm/user.h>
+#ifdef __arch_v32
+#include <asm/elf_v32.h>
+#else
+#include <asm/elf_v10.h>
+#endif
#define R_CRIS_NONE 0
#define R_CRIS_8 1
@@ -32,7 +36,6 @@
/* Note that NGREG is defined to ELF_NGREG in include/linux/elfcore.h, and is
thus exposed to user-space. */
-#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
typedef elf_greg_t elf_gregset_t[ELF_NGREG];
/* A placeholder; CRIS does not have any fp regs. */
@@ -45,8 +48,6 @@
#define ELF_DATA ELFDATA2LSB
#define ELF_ARCH EM_CRIS
-#include <arch/elf.h>
-
/* The master for these definitions is {binutils}/include/elf/cris.h: */
/* User symbols in this file have a leading underscore. */
#define EF_CRIS_UNDERSCORE 0x00000001
diff --git a/arch/cris/include/arch-v10/arch/elf.h b/arch/cris/include/uapi/asm/elf_v10.h
similarity index 97%
rename from arch/cris/include/arch-v10/arch/elf.h
rename to arch/cris/include/uapi/asm/elf_v10.h
index 1eb638a..3ea65ce 100644
--- a/arch/cris/include/arch-v10/arch/elf.h
+++ b/arch/cris/include/uapi/asm/elf_v10.h
@@ -1,10 +1,11 @@
#ifndef __ASMCRIS_ARCH_ELF_H
#define __ASMCRIS_ARCH_ELF_H
-#include <arch/system.h>
-
#define ELF_MACH EF_CRIS_VARIANT_ANY_V0_V10
+/* Matches struct user_regs_struct */
+#define ELF_NGREG 35
+
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
diff --git a/arch/cris/include/arch-v32/arch/elf.h b/arch/cris/include/uapi/asm/elf_v32.h
similarity index 97%
rename from arch/cris/include/arch-v32/arch/elf.h
rename to arch/cris/include/uapi/asm/elf_v32.h
index c46d582..f09fe49 100644
--- a/arch/cris/include/arch-v32/arch/elf.h
+++ b/arch/cris/include/uapi/asm/elf_v32.h
@@ -1,10 +1,11 @@
#ifndef _ASM_CRIS_ELF_H
#define _ASM_CRIS_ELF_H
-#include <arch/system.h>
-
#define ELF_CORE_EFLAGS EF_CRIS_VARIANT_V32
+/* Matches struct user_regs_struct */
+#define ELF_NGREG 32
+
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
diff --git a/arch/cris/include/uapi/asm/errno.h b/arch/cris/include/uapi/asm/errno.h
deleted file mode 100644
index 2bf5eb5..0000000
--- a/arch/cris/include/uapi/asm/errno.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _CRIS_ERRNO_H
-#define _CRIS_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif
diff --git a/arch/cris/include/uapi/asm/fcntl.h b/arch/cris/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12d..0000000
--- a/arch/cris/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/cris/include/uapi/asm/ioctl.h b/arch/cris/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe0..0000000
--- a/arch/cris/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/cris/include/uapi/asm/ipcbuf.h b/arch/cris/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51..0000000
--- a/arch/cris/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/cris/include/uapi/asm/kvm_para.h b/arch/cris/include/uapi/asm/kvm_para.h
deleted file mode 100644
index 14fab8f..0000000
--- a/arch/cris/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kvm_para.h>
diff --git a/arch/cris/include/uapi/asm/mman.h b/arch/cris/include/uapi/asm/mman.h
deleted file mode 100644
index 8eebf89f5..0000000
--- a/arch/cris/include/uapi/asm/mman.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mman.h>
diff --git a/arch/cris/include/uapi/asm/msgbuf.h b/arch/cris/include/uapi/asm/msgbuf.h
deleted file mode 100644
index ada63df..0000000
--- a/arch/cris/include/uapi/asm/msgbuf.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _CRIS_MSGBUF_H
-#define _CRIS_MSGBUF_H
-
-/* verbatim copy of asm-i386 version */
-
-/*
- * The msqid64_ds structure for CRIS architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct msqid64_ds {
- struct ipc64_perm msg_perm;
- __kernel_time_t msg_stime; /* last msgsnd time */
- unsigned long __unused1;
- __kernel_time_t msg_rtime; /* last msgrcv time */
- unsigned long __unused2;
- __kernel_time_t msg_ctime; /* last change time */
- unsigned long __unused3;
- unsigned long msg_cbytes; /* current number of bytes on queue */
- unsigned long msg_qnum; /* number of messages in queue */
- unsigned long msg_qbytes; /* max number of bytes on queue */
- __kernel_pid_t msg_lspid; /* pid of last msgsnd */
- __kernel_pid_t msg_lrpid; /* last receive pid */
- unsigned long __unused4;
- unsigned long __unused5;
-};
-
-#endif /* _CRIS_MSGBUF_H */
diff --git a/arch/cris/include/uapi/asm/poll.h b/arch/cris/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d..0000000
--- a/arch/cris/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/cris/include/uapi/asm/ptrace.h b/arch/cris/include/uapi/asm/ptrace.h
index c689c9b..bd8946f 100644
--- a/arch/cris/include/uapi/asm/ptrace.h
+++ b/arch/cris/include/uapi/asm/ptrace.h
@@ -1 +1,5 @@
-#include <arch/ptrace.h>
+#ifdef __arch_v32
+#include <asm/ptrace_v32.h>
+#else
+#include <asm/ptrace_v10.h>
+#endif
diff --git a/arch/cris/include/arch-v10/arch/ptrace.h b/arch/cris/include/uapi/asm/ptrace_v10.h
similarity index 100%
rename from arch/cris/include/arch-v10/arch/ptrace.h
rename to arch/cris/include/uapi/asm/ptrace_v10.h
diff --git a/arch/cris/include/arch-v32/arch/ptrace.h b/arch/cris/include/uapi/asm/ptrace_v32.h
similarity index 100%
rename from arch/cris/include/arch-v32/arch/ptrace.h
rename to arch/cris/include/uapi/asm/ptrace_v32.h
diff --git a/arch/cris/include/uapi/asm/resource.h b/arch/cris/include/uapi/asm/resource.h
deleted file mode 100644
index b5d2944..0000000
--- a/arch/cris/include/uapi/asm/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _CRIS_RESOURCE_H
-#define _CRIS_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif
diff --git a/arch/cris/include/uapi/asm/sembuf.h b/arch/cris/include/uapi/asm/sembuf.h
deleted file mode 100644
index 7fed984..0000000
--- a/arch/cris/include/uapi/asm/sembuf.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _CRIS_SEMBUF_H
-#define _CRIS_SEMBUF_H
-
-/*
- * The semid64_ds structure for CRIS architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct semid64_ds {
- struct ipc64_perm sem_perm; /* permissions .. see ipc.h */
- __kernel_time_t sem_otime; /* last semop time */
- unsigned long __unused1;
- __kernel_time_t sem_ctime; /* last change time */
- unsigned long __unused2;
- unsigned long sem_nsems; /* no. of semaphores in array */
- unsigned long __unused3;
- unsigned long __unused4;
-};
-
-#endif /* _CRIS_SEMBUF_H */
diff --git a/arch/cris/include/uapi/asm/shmbuf.h b/arch/cris/include/uapi/asm/shmbuf.h
deleted file mode 100644
index 3239e3f..0000000
--- a/arch/cris/include/uapi/asm/shmbuf.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _CRIS_SHMBUF_H
-#define _CRIS_SHMBUF_H
-
-/*
- * The shmid64_ds structure for CRIS architecture (same as for i386)
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct shmid64_ds {
- struct ipc64_perm shm_perm; /* operation perms */
- size_t shm_segsz; /* size of segment (bytes) */
- __kernel_time_t shm_atime; /* last attach time */
- unsigned long __unused1;
- __kernel_time_t shm_dtime; /* last detach time */
- unsigned long __unused2;
- __kernel_time_t shm_ctime; /* last change time */
- unsigned long __unused3;
- __kernel_pid_t shm_cpid; /* pid of creator */
- __kernel_pid_t shm_lpid; /* pid of last operator */
- unsigned long shm_nattch; /* no. of current attaches */
- unsigned long __unused4;
- unsigned long __unused5;
-};
-
-struct shminfo64 {
- unsigned long shmmax;
- unsigned long shmmin;
- unsigned long shmmni;
- unsigned long shmseg;
- unsigned long shmall;
- unsigned long __unused1;
- unsigned long __unused2;
- unsigned long __unused3;
- unsigned long __unused4;
-};
-
-#endif /* _CRIS_SHMBUF_H */
diff --git a/arch/cris/include/uapi/asm/siginfo.h b/arch/cris/include/uapi/asm/siginfo.h
deleted file mode 100644
index c1cd6d1..0000000
--- a/arch/cris/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _CRIS_SIGINFO_H
-#define _CRIS_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-#endif
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
deleted file mode 100644
index e2503d9f..0000000
--- a/arch/cris/include/uapi/asm/socket.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#ifndef _ASM_SOCKET_H
-#define _ASM_SOCKET_H
-
-/* almost the same as asm-i386/socket.h */
-
-#include <asm/sockios.h>
-
-/* For setsockoptions(2) */
-#define SOL_SOCKET 1
-
-#define SO_DEBUG 1
-#define SO_REUSEADDR 2
-#define SO_TYPE 3
-#define SO_ERROR 4
-#define SO_DONTROUTE 5
-#define SO_BROADCAST 6
-#define SO_SNDBUF 7
-#define SO_RCVBUF 8
-#define SO_SNDBUFFORCE 32
-#define SO_RCVBUFFORCE 33
-#define SO_KEEPALIVE 9
-#define SO_OOBINLINE 10
-#define SO_NO_CHECK 11
-#define SO_PRIORITY 12
-#define SO_LINGER 13
-#define SO_BSDCOMPAT 14
-#define SO_REUSEPORT 15
-#define SO_PASSCRED 16
-#define SO_PEERCRED 17
-#define SO_RCVLOWAT 18
-#define SO_SNDLOWAT 19
-#define SO_RCVTIMEO 20
-#define SO_SNDTIMEO 21
-
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION 22
-#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
-#define SO_SECURITY_ENCRYPTION_NETWORK 24
-
-#define SO_BINDTODEVICE 25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER 26
-#define SO_DETACH_FILTER 27
-#define SO_GET_FILTER SO_ATTACH_FILTER
-
-#define SO_PEERNAME 28
-#define SO_TIMESTAMP 29
-#define SCM_TIMESTAMP SO_TIMESTAMP
-
-#define SO_ACCEPTCONN 30
-
-#define SO_PEERSEC 31
-#define SO_PASSSEC 34
-#define SO_TIMESTAMPNS 35
-#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
-
-#define SO_MARK 36
-
-#define SO_TIMESTAMPING 37
-#define SCM_TIMESTAMPING SO_TIMESTAMPING
-
-#define SO_PROTOCOL 38
-#define SO_DOMAIN 39
-
-#define SO_RXQ_OVFL 40
-
-#define SO_WIFI_STATUS 41
-#define SCM_WIFI_STATUS SO_WIFI_STATUS
-#define SO_PEEK_OFF 42
-
-/* Instruct lower device to use last 4-bytes of skb data as FCS */
-#define SO_NOFCS 43
-
-#define SO_LOCK_FILTER 44
-
-#define SO_SELECT_ERR_QUEUE 45
-
-#define SO_BUSY_POLL 46
-
-#define SO_MAX_PACING_RATE 47
-
-#define SO_BPF_EXTENSIONS 48
-
-#define SO_INCOMING_CPU 49
-
-#define SO_ATTACH_BPF 50
-#define SO_DETACH_BPF SO_DETACH_FILTER
-
-#endif /* _ASM_SOCKET_H */
-
-
diff --git a/arch/cris/include/uapi/asm/sockios.h b/arch/cris/include/uapi/asm/sockios.h
deleted file mode 100644
index cfe7bfe..0000000
--- a/arch/cris/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ARCH_CRIS_SOCKIOS__
-#define __ARCH_CRIS_SOCKIOS__
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif
diff --git a/arch/cris/include/uapi/asm/statfs.h b/arch/cris/include/uapi/asm/statfs.h
deleted file mode 100644
index fdaf921..0000000
--- a/arch/cris/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _CRIS_STATFS_H
-#define _CRIS_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif
diff --git a/arch/cris/include/uapi/asm/types.h b/arch/cris/include/uapi/asm/types.h
deleted file mode 100644
index 9ec9d4c..0000000
--- a/arch/cris/include/uapi/asm/types.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/int-ll64.h>
diff --git a/arch/cris/include/uapi/asm/unistd.h b/arch/cris/include/uapi/asm/unistd.h
index f3287fa..062b648 100644
--- a/arch/cris/include/uapi/asm/unistd.h
+++ b/arch/cris/include/uapi/asm/unistd.h
@@ -356,5 +356,13 @@
#define __NR_process_vm_writev 349
#define __NR_kcmp 350
#define __NR_finit_module 351
+#define __NR_sched_setattr 352
+#define __NR_sched_getattr 353
+#define __NR_renameat2 354
+#define __NR_seccomp 355
+#define __NR_getrandom 356
+#define __NR_memfd_create 357
+#define __NR_bpf 358
+#define __NR_execveat 359
#endif /* _UAPI_ASM_CRIS_UNISTD_H_ */
diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile
index edef71f..5fae398 100644
--- a/arch/cris/kernel/Makefile
+++ b/arch/cris/kernel/Makefile
@@ -8,6 +8,7 @@
obj-y := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
obj-y += devicetree.o
+obj-y += stacktrace.o
obj-$(CONFIG_MODULES) += crisksyms.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c
index dd0be5d..694850e 100644
--- a/arch/cris/kernel/irq.c
+++ b/arch/cris/kernel/irq.c
@@ -45,7 +45,11 @@
asmlinkage void do_IRQ(int irq, struct pt_regs * regs)
{
unsigned long sp;
- struct pt_regs *old_regs = set_irq_regs(regs);
+ struct pt_regs *old_regs;
+
+ trace_hardirqs_off();
+
+ old_regs = set_irq_regs(regs);
irq_enter();
sp = rdsp();
if (unlikely((sp & (PAGE_SIZE - 1)) < (PAGE_SIZE/8))) {
diff --git a/arch/cris/kernel/stacktrace.c b/arch/cris/kernel/stacktrace.c
new file mode 100644
index 0000000..99838c7
--- /dev/null
+++ b/arch/cris/kernel/stacktrace.c
@@ -0,0 +1,76 @@
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/stacktrace.h>
+#include <asm/stacktrace.h>
+
+void walk_stackframe(unsigned long sp,
+ int (*fn)(unsigned long addr, void *data),
+ void *data)
+{
+ unsigned long high = ALIGN(sp, THREAD_SIZE);
+
+ for (; sp <= high - 4; sp += 4) {
+ unsigned long addr = *(unsigned long *) sp;
+
+ if (!kernel_text_address(addr))
+ continue;
+
+ if (fn(addr, data))
+ break;
+ }
+}
+
+struct stack_trace_data {
+ struct stack_trace *trace;
+ unsigned int no_sched_functions;
+ unsigned int skip;
+};
+
+#ifdef CONFIG_STACKTRACE
+
+static int save_trace(unsigned long addr, void *d)
+{
+ struct stack_trace_data *data = d;
+ struct stack_trace *trace = data->trace;
+
+ if (data->no_sched_functions && in_sched_functions(addr))
+ return 0;
+
+ if (data->skip) {
+ data->skip--;
+ return 0;
+ }
+
+ trace->entries[trace->nr_entries++] = addr;
+
+ return trace->nr_entries >= trace->max_entries;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+ struct stack_trace_data data;
+ unsigned long sp;
+
+ data.trace = trace;
+ data.skip = trace->skip;
+
+ if (tsk != current) {
+ data.no_sched_functions = 1;
+ sp = tsk->thread.ksp;
+ } else {
+ data.no_sched_functions = 0;
+ sp = rdsp();
+ }
+
+ walk_stackframe(sp, save_trace, &data);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+ save_stack_trace_tsk(current, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+#endif /* CONFIG_STACKTRACE */
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c
index 7042741..c4f2cfc 100644
--- a/arch/h8300/boot/compressed/misc.c
+++ b/arch/h8300/boot/compressed/misc.c
@@ -70,5 +70,5 @@
free_mem_ptr = (unsigned long)&_end;
free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
- decompress(input_data, input_len, NULL, NULL, output, NULL, error);
+ __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
}
diff --git a/arch/h8300/include/asm/dma-mapping.h b/arch/h8300/include/asm/dma-mapping.h
index 6e67a909..d9b5b80 100644
--- a/arch/h8300/include/asm/dma-mapping.h
+++ b/arch/h8300/include/asm/dma-mapping.h
@@ -1,8 +1,6 @@
#ifndef _H8300_DMA_MAPPING_H
#define _H8300_DMA_MAPPING_H
-#include <asm-generic/dma-coherent.h>
-
extern struct dma_map_ops h8300_dma_map_ops;
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
@@ -12,46 +10,4 @@
#include <asm-generic/dma-mapping-common.h>
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- return 0;
-}
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
- return 0;
-}
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *memory;
-
- memory = ops->alloc(dev, size, dma_handle, flag, attrs);
- return memory;
-}
-
-#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return 0;
-}
-
#endif
diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h
index 1696542..268fde8 100644
--- a/arch/hexagon/include/asm/dma-mapping.h
+++ b/arch/hexagon/include/asm/dma-mapping.h
@@ -31,12 +31,10 @@
struct device;
extern int bad_dma_address;
+#define DMA_ERROR_CODE bad_dma_address
extern struct dma_map_ops *dma_ops;
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
{
if (unlikely(dev == NULL))
@@ -45,8 +43,8 @@
return dma_ops;
}
+#define HAVE_ARCH_DMA_SUPPORTED 1
extern int dma_supported(struct device *dev, u64 mask);
-extern int dma_set_mask(struct device *dev, u64 mask);
extern int dma_is_consistent(struct device *dev, dma_addr_t dma_handle);
extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction);
@@ -60,47 +58,4 @@
return addr + size - 1 <= *dev->dma_mask;
}
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (dma_ops->mapping_error)
- return dma_ops->mapping_error(dev, dma_addr);
-
- return (dma_addr == bad_dma_address);
-}
-
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- void *ret;
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- ret = ops->alloc(dev, size, dma_handle, flag, attrs);
-
- debug_dma_alloc_coherent(dev, size, *dma_handle, ret);
-
- return ret;
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
-}
-
#endif
diff --git a/arch/hexagon/include/uapi/asm/signal.h b/arch/hexagon/include/uapi/asm/signal.h
index 98106e5..24b9988 100644
--- a/arch/hexagon/include/uapi/asm/signal.h
+++ b/arch/hexagon/include/uapi/asm/signal.h
@@ -19,8 +19,6 @@
#ifndef _ASM_SIGNAL_H
#define _ASM_SIGNAL_H
-#include <uapi/asm/registers.h>
-
extern unsigned long __rt_sigtramp_template[2];
void do_signal(struct pt_regs *regs);
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index b74f9ba..9e3ddf7 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -44,17 +44,6 @@
}
EXPORT_SYMBOL(dma_supported);
-int dma_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
-
- *dev->dma_mask = mask;
-
- return 0;
-}
-EXPORT_SYMBOL(dma_set_mask);
-
static struct gen_pool *coherent_pool;
diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c
index 17fbf45..a6a1d1f 100644
--- a/arch/hexagon/kernel/time.c
+++ b/arch/hexagon/kernel/time.c
@@ -97,20 +97,6 @@
return 0;
}
-/*
- * Sets the mode (periodic, shutdown, oneshot, etc) of a timer.
- */
-static void set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- switch (mode) {
- case CLOCK_EVT_MODE_SHUTDOWN:
- /* XXX implement me */
- default:
- break;
- }
-}
-
#ifdef CONFIG_SMP
/* Broadcast mechanism */
static void broadcast(const struct cpumask *mask)
@@ -119,13 +105,13 @@
}
#endif
+/* XXX Implement set_state_shutdown() */
static struct clock_event_device hexagon_clockevent_dev = {
.name = "clockevent",
.features = CLOCK_EVT_FEAT_ONESHOT,
.rating = 400,
.irq = RTOS_TIMER_INT,
.set_next_event = set_next_event,
- .set_mode = set_mode,
#ifdef CONFIG_SMP
.broadcast = broadcast,
#endif
@@ -146,7 +132,6 @@
dummy_clock_dev->features = CLOCK_EVT_FEAT_DUMMY;
dummy_clock_dev->cpumask = cpumask_of(cpu);
- dummy_clock_dev->mode = CLOCK_EVT_MODE_UNUSED;
clockevents_register_device(dummy_clock_dev);
}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 42a91a7..eb0249e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -518,6 +518,7 @@
config KEXEC
bool "kexec system call"
depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+ select KEXEC_CORE
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index cf3ab7e..9beccf8 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -23,60 +23,10 @@
extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int,
enum dma_data_direction);
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *daddr, gfp_t gfp,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = platform_dma_get_ops(dev);
- void *caddr;
-
- caddr = ops->alloc(dev, size, daddr, gfp, attrs);
- debug_dma_alloc_coherent(dev, size, *daddr, caddr);
- return caddr;
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *caddr, dma_addr_t daddr,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = platform_dma_get_ops(dev);
- debug_dma_free_coherent(dev, size, caddr, daddr);
- ops->free(dev, size, caddr, daddr, attrs);
-}
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
#define get_dma_ops(dev) platform_dma_get_ops(dev)
#include <asm-generic/dma-mapping-common.h>
-static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr)
-{
- struct dma_map_ops *ops = platform_dma_get_ops(dev);
- debug_dma_mapping_error(dev, daddr);
- return ops->mapping_error(dev, daddr);
-}
-
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *ops = platform_dma_get_ops(dev);
- return ops->dma_supported(dev, mask);
-}
-
-static inline int
-dma_set_mask (struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
- *dev->dma_mask = mask;
- return 0;
-}
-
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
if (!dev->dma_mask)
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 95c39b9..99c96a5 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -11,7 +11,7 @@
-#define NR_syscalls 319 /* length of syscall table */
+#define NR_syscalls 321 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h
index 4610795..98e94e1 100644
--- a/arch/ia64/include/uapi/asm/unistd.h
+++ b/arch/ia64/include/uapi/asm/unistd.h
@@ -332,5 +332,7 @@
#define __NR_memfd_create 1340
#define __NR_bpf 1341
#define __NR_execveat 1342
+#define __NR_userfaultfd 1343
+#define __NR_membarrier 1344
#endif /* _UAPI_ASM_IA64_UNISTD_H */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index ae0de7b..37cc7a6 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1768,5 +1768,7 @@
data8 sys_memfd_create // 1340
data8 sys_bpf
data8 sys_execveat
+ data8 sys_userfaultfd
+ data8 sys_membarrier
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 28a0952..3a76927 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -86,6 +86,7 @@
free_mem_end_ptr = free_mem_ptr + BOOT_HEAP_SIZE;
puts("\nDecompressing Linux... ");
- decompress(input_data, input_len, NULL, NULL, output_data, NULL, error);
+ __decompress(input_data, input_len, NULL, NULL, output_data, 0,
+ NULL, error);
puts("done.\nBooting the kernel.\n");
}
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 2dd8f63..498b567 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -95,6 +95,7 @@
config KEXEC
bool "kexec system call"
depends on M68KCLASSIC
+ select KEXEC_CORE
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h
index ab35372..24b1297 100644
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ b/arch/microblaze/include/asm/dma-mapping.h
@@ -27,7 +27,6 @@
#include <linux/dma-debug.h>
#include <linux/dma-attrs.h>
#include <asm/io.h>
-#include <asm-generic/dma-coherent.h>
#include <asm/cacheflush.h>
#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
@@ -45,31 +44,6 @@
return &dma_direct_ops;
}
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (unlikely(!ops))
- return 0;
- if (!ops->dma_supported)
- return 1;
- return ops->dma_supported(dev, mask);
-}
-
-static inline int dma_set_mask(struct device *dev, u64 dma_mask)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (unlikely(ops == NULL))
- return -EIO;
- if (ops->set_dma_mask)
- return ops->set_dma_mask(dev, dma_mask);
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
- *dev->dma_mask = dma_mask;
- return 0;
-}
-
#include <asm-generic/dma-mapping-common.h>
static inline void __dma_sync(unsigned long paddr,
@@ -88,50 +62,6 @@
}
}
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- debug_dma_mapping_error(dev, dma_addr);
- if (ops->mapping_error)
- return ops->mapping_error(dev, dma_addr);
-
- return (dma_addr == DMA_ERROR_CODE);
-}
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *memory;
-
- BUG_ON(!ops);
-
- memory = ops->alloc(dev, size, dma_handle, flag, attrs);
-
- debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
- return memory;
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d, s, c, h, NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!ops);
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction)
{
diff --git a/arch/microblaze/include/uapi/asm/elf.h b/arch/microblaze/include/uapi/asm/elf.h
index be1731d..e9bcdb6 100644
--- a/arch/microblaze/include/uapi/asm/elf.h
+++ b/arch/microblaze/include/uapi/asm/elf.h
@@ -11,12 +11,13 @@
#ifndef _UAPI_ASM_MICROBLAZE_ELF_H
#define _UAPI_ASM_MICROBLAZE_ELF_H
+#include <linux/elf-em.h>
+
/*
* Note there is no "official" ELF designation for Microblaze.
* I've snaffled the value from the microblaze binutils source code
* /binutils/microblaze/include/elf/microblaze.h
*/
-#define EM_MICROBLAZE 189
#define EM_MICROBLAZE_OLD 0xbaab
#define ELF_ARCH EM_MICROBLAZE
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 752acca..e3aa5b0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2597,6 +2597,7 @@
config KEXEC
bool "Kexec system call"
+ select KEXEC_CORE
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index 5483106..080cd53 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -111,8 +111,8 @@
puts("\n");
/* Decompress the kernel with according algorithm */
- decompress((char *)zimage_start, zimage_size, 0, 0,
- (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, error);
+ __decompress((char *)zimage_start, zimage_size, 0, 0,
+ (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error);
/* FIXME: should we flush cache here? */
puts("Now, booting the kernel...\n");
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index d8960d4..2cd45f5 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -161,9 +161,6 @@
{
void *ret;
- if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
- return ret;
-
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
@@ -194,11 +191,6 @@
static void octeon_dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs)
{
- int order = get_order(size);
-
- if (dma_release_from_coherent(dev, order, vaddr))
- return;
-
swiotlb_free_coherent(dev, size, vaddr, dma_handle);
}
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index 360b338..e604f76 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -4,7 +4,6 @@
#include <linux/scatterlist.h>
#include <asm/dma-coherence.h>
#include <asm/cache.h>
-#include <asm-generic/dma-coherent.h>
#ifndef CONFIG_SGI_IP27 /* Kludge to fix 2.6.39 build for IP27 */
#include <dma-coherence.h>
@@ -32,73 +31,7 @@
#include <asm-generic/dma-mapping-common.h>
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- return ops->dma_supported(dev, mask);
-}
-
-static inline int dma_mapping_error(struct device *dev, u64 mask)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- debug_dma_mapping_error(dev, mask);
- return ops->mapping_error(dev, mask);
-}
-
-static inline int
-dma_set_mask(struct device *dev, u64 mask)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if(!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
-
- if (ops->set_dma_mask)
- return ops->set_dma_mask(dev, mask);
-
- *dev->dma_mask = mask;
-
- return 0;
-}
-
extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction);
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- struct dma_attrs *attrs)
-{
- void *ret;
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- ret = ops->alloc(dev, size, dma_handle, gfp, attrs);
-
- debug_dma_alloc_coherent(dev, size, *dma_handle, ret);
-
- return ret;
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- ops->free(dev, size, vaddr, dma_handle, attrs);
-
- debug_dma_free_coherent(dev, size, vaddr, dma_handle);
-}
-
-
-void *dma_alloc_noncoherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag);
-
-void dma_free_noncoherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
#endif /* _ASM_DMA_MAPPING_H */
diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
index 2c6b989..4ffa6fc 100644
--- a/arch/mips/loongson64/common/dma-swiotlb.c
+++ b/arch/mips/loongson64/common/dma-swiotlb.c
@@ -14,9 +14,6 @@
{
void *ret;
- if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
- return ret;
-
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
@@ -46,11 +43,6 @@
static void loongson_dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs)
{
- int order = get_order(size);
-
- if (dma_release_from_coherent(dev, order, vaddr))
- return;
-
swiotlb_free_coherent(dev, size, vaddr, dma_handle);
}
@@ -93,6 +85,9 @@
static int loongson_dma_set_mask(struct device *dev, u64 mask)
{
+ if (!dev->dma_mask || !dma_supported(dev, mask))
+ return -EIO;
+
if (mask > DMA_BIT_MASK(loongson_sysconf.dma_mask_bits)) {
*dev->dma_mask = DMA_BIT_MASK(loongson_sysconf.dma_mask_bits);
return -EIO;
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index 8f23cf0..a914dc1 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -112,7 +112,7 @@
return gfp | dma_flag;
}
-void *dma_alloc_noncoherent(struct device *dev, size_t size,
+static void *mips_dma_alloc_noncoherent(struct device *dev, size_t size,
dma_addr_t * dma_handle, gfp_t gfp)
{
void *ret;
@@ -128,7 +128,6 @@
return ret;
}
-EXPORT_SYMBOL(dma_alloc_noncoherent);
static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs)
@@ -137,8 +136,12 @@
struct page *page = NULL;
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
- return ret;
+ /*
+ * XXX: seems like the coherent and non-coherent implementations could
+ * be consolidated.
+ */
+ if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs))
+ return mips_dma_alloc_noncoherent(dev, size, dma_handle, gfp);
gfp = massage_gfp_flags(dev, gfp);
@@ -164,24 +167,24 @@
}
-void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle)
+static void mips_dma_free_noncoherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
{
plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL);
free_pages((unsigned long) vaddr, get_order(size));
}
-EXPORT_SYMBOL(dma_free_noncoherent);
static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, struct dma_attrs *attrs)
{
unsigned long addr = (unsigned long) vaddr;
- int order = get_order(size);
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page *page = NULL;
- if (dma_release_from_coherent(dev, order, vaddr))
+ if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) {
+ mips_dma_free_noncoherent(dev, size, vaddr, dma_handle);
return;
+ }
plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL);
diff --git a/arch/mips/netlogic/common/nlm-dma.c b/arch/mips/netlogic/common/nlm-dma.c
index f3d4ae8..3758715 100644
--- a/arch/mips/netlogic/common/nlm-dma.c
+++ b/arch/mips/netlogic/common/nlm-dma.c
@@ -47,11 +47,6 @@
static void *nlm_dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs)
{
- void *ret;
-
- if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
- return ret;
-
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
@@ -69,11 +64,6 @@
static void nlm_dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs)
{
- int order = get_order(size);
-
- if (dma_release_from_coherent(dev, order, vaddr))
- return;
-
swiotlb_free_coherent(dev, size, vaddr, dma_handle);
}
diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h
index fab8628..413bfcf 100644
--- a/arch/openrisc/include/asm/dma-mapping.h
+++ b/arch/openrisc/include/asm/dma-mapping.h
@@ -23,7 +23,6 @@
*/
#include <linux/dma-debug.h>
-#include <asm-generic/dma-coherent.h>
#include <linux/kmemcheck.h>
#include <linux/dma-mapping.h>
@@ -36,75 +35,13 @@
return &or1k_dma_map_ops;
}
-#include <asm-generic/dma-mapping-common.h>
-
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *memory;
-
- memory = ops->alloc(dev, size, dma_handle, gfp, attrs);
-
- debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
-
- return memory;
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
-
- ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp)
-{
- struct dma_attrs attrs;
-
- dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs);
-
- return dma_alloc_attrs(dev, size, dma_handle, gfp, &attrs);
-}
-
-static inline void dma_free_noncoherent(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle)
-{
- struct dma_attrs attrs;
-
- dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs);
-
- dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs);
-}
-
+#define HAVE_ARCH_DMA_SUPPORTED 1
static inline int dma_supported(struct device *dev, u64 dma_mask)
{
/* Support 32 bit DMA mask exclusively */
return dma_mask == DMA_BIT_MASK(32);
}
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return 0;
-}
+#include <asm-generic/dma-mapping-common.h>
-static inline int dma_set_mask(struct device *dev, u64 dma_mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
-
- *dev->dma_mask = dma_mask;
-
- return 0;
-}
#endif /* __ASM_OPENRISC_DMA_MAPPING_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b447918..9a7057e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -420,6 +420,7 @@
config KEXEC
bool "kexec system call"
depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
+ select KEXEC_CORE
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 710f60e..7f522c0 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -18,7 +18,9 @@
#include <asm/io.h>
#include <asm/swiotlb.h>
+#ifdef CONFIG_PPC64
#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
+#endif
/* Some dma direct funcs must be visible for use in other dma_ops */
extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size,
@@ -120,71 +122,14 @@
/* this will be removed soon */
#define flush_write_buffers()
+#define HAVE_ARCH_DMA_SET_MASK 1
+extern int dma_set_mask(struct device *dev, u64 dma_mask);
+
#include <asm-generic/dma-mapping-common.h>
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (unlikely(dma_ops == NULL))
- return 0;
- if (dma_ops->dma_supported == NULL)
- return 1;
- return dma_ops->dma_supported(dev, mask);
-}
-
-extern int dma_set_mask(struct device *dev, u64 dma_mask);
extern int __dma_set_mask(struct device *dev, u64 dma_mask);
extern u64 __dma_get_required_mask(struct device *dev);
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
- void *cpu_addr;
-
- BUG_ON(!dma_ops);
-
- cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
-
- debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
-
- return cpu_addr;
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
-
- dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- debug_dma_mapping_error(dev, dma_addr);
- if (dma_ops->mapping_error)
- return dma_ops->mapping_error(dev, dma_addr);
-
-#ifdef CONFIG_PPC64
- return (dma_addr == DMA_ERROR_CODE);
-#else
- return 0;
-#endif
-}
-
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
#ifdef CONFIG_SWIOTLB
@@ -210,9 +155,6 @@
return daddr - get_dma_offset(dev);
}
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
#define ARCH_HAS_DMA_MMAP_COHERENT
static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index b91e74a..9fac01c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -158,6 +158,7 @@
bool *writable);
extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
unsigned long *rmap, long pte_index, int realmode);
+extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize);
extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index);
void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
@@ -225,12 +226,12 @@
return vcpu->arch.cr;
}
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
{
vcpu->arch.xer = val;
}
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
return vcpu->arch.xer;
}
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 5bdfb5d..72b6225 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,12 @@
#define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */
+/* Maximum number of threads per physical core */
+#define MAX_SMT_THREADS 8
+
+/* Maximum number of subcores per physical core */
+#define MAX_SUBCORES 4
+
#ifdef __ASSEMBLY__
#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -65,6 +71,19 @@
#else /*__ASSEMBLY__ */
+struct kvmppc_vcore;
+
+/* Struct used for coordinating micro-threading (split-core) mode changes */
+struct kvm_split_mode {
+ unsigned long rpr;
+ unsigned long pmmar;
+ unsigned long ldbar;
+ u8 subcore_size;
+ u8 do_nap;
+ u8 napped[MAX_SMT_THREADS];
+ struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
+};
+
/*
* This struct goes in the PACA on 64-bit processors. It is used
* to store host state that needs to be saved when we enter a guest
@@ -100,6 +119,7 @@
u64 host_spurr;
u64 host_dscr;
u64 dec_expires;
+ struct kvm_split_mode *kvm_split_mode;
#endif
#ifdef CONFIG_PPC_BOOK3S_64
u64 cfar;
@@ -112,7 +132,7 @@
bool in_use;
ulong gpr[14];
u32 cr;
- u32 xer;
+ ulong xer;
ulong ctr;
ulong lr;
ulong pc;
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index 3286f0d..bc6e29e 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -54,12 +54,12 @@
return vcpu->arch.cr;
}
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
{
vcpu->arch.xer = val;
}
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
return vcpu->arch.xer;
}
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d91f65b..98eebbf 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -205,8 +205,10 @@
*/
#define KVMPPC_RMAP_LOCK_BIT 63
#define KVMPPC_RMAP_RC_SHIFT 32
+#define KVMPPC_RMAP_CHG_SHIFT 48
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
+#define KVMPPC_RMAP_CHG_ORDER (0x3ful << KVMPPC_RMAP_CHG_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful
@@ -278,7 +280,9 @@
u16 last_cpu;
u8 vcore_state;
u8 in_guest;
+ struct kvmppc_vcore *master_vcore;
struct list_head runnable_threads;
+ struct list_head preempt_list;
spinlock_t lock;
wait_queue_head_t wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
@@ -300,12 +304,21 @@
#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
-/* Values for vcore_state */
+/* This bit is used when a vcore exit is triggered from outside the vcore */
+#define VCORE_EXIT_REQ 0x10000
+
+/*
+ * Values for vcore_state.
+ * Note that these are arranged such that lower values
+ * (< VCORE_SLEEPING) don't require stolen time accounting
+ * on load/unload, and higher values do.
+ */
#define VCORE_INACTIVE 0
-#define VCORE_SLEEPING 1
-#define VCORE_PREEMPT 2
-#define VCORE_RUNNING 3
-#define VCORE_EXITING 4
+#define VCORE_PREEMPT 1
+#define VCORE_PIGGYBACK 2
+#define VCORE_SLEEPING 3
+#define VCORE_RUNNING 4
+#define VCORE_EXITING 5
/*
* Struct used to manage memory for a virtual processor area
@@ -473,7 +486,7 @@
ulong ciabr;
ulong cfar;
ulong ppr;
- ulong pspb;
+ u32 pspb;
ulong fscr;
ulong shadow_fscr;
ulong ebbhr;
@@ -619,6 +632,7 @@
int trap;
int state;
int ptid;
+ int thread_cpu;
bool timer_running;
wait_queue_head_t cpu_run;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 8452335..790f5d1 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -287,7 +287,7 @@
/* POWER8 Micro Partition Prefetch (MPP) parameters */
/* Address mask is common for LOGMPP instruction and MPPR SPR */
-#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000
+#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL
/* Bits 60 and 61 of MPP SPR should be set to one of the following */
/* Aborting the fetch is indeed setting 00 in the table size bits */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 810f433..221d584 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -511,6 +511,8 @@
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
+ DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu));
+ DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu));
#endif
#ifdef CONFIG_PPC_BOOK3S
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -673,7 +675,14 @@
HSTATE_FIELD(HSTATE_DSCR, host_dscr);
HSTATE_FIELD(HSTATE_DABR, dabr);
HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
+ HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
DEFINE(IPI_PRIORITY, IPI_PRIORITY);
+ DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
+ DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
+ DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
+ DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size));
+ DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
+ DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 3caec2c..c2024ac 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -74,14 +74,14 @@
If unsure, say N.
config KVM_BOOK3S_64_HV
- tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+ tristate "KVM for POWER7 and later using hypervisor mode in host"
depends on KVM_BOOK3S_64 && PPC_POWERNV
select KVM_BOOK3S_HV_POSSIBLE
select MMU_NOTIFIER
select CMA
---help---
Support running unmodified book3s_64 guest kernels in
- virtual machines on POWER7 and PPC970 processors that have
+ virtual machines on POWER7 and newer processors that have
hypervisor mode available to the host.
If you say Y here, KVM will use the hardware virtualization
@@ -89,8 +89,8 @@
guest operating systems will run at full hardware speed
using supervisor and user modes. However, this also means
that KVM is not usable under PowerVM (pHyp), is only usable
- on POWER7 (or later) processors and PPC970-family processors,
- and cannot emulate a different processor from the host processor.
+ on POWER7 or later processors, and cannot emulate a
+ different processor from the host processor.
If unsure, say N.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 6d6398f..d75bf32 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -240,7 +240,8 @@
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
}
-int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
+ unsigned int priority)
{
int deliver = 1;
int vec = 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 2035d16..d5c9bfe 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -26,6 +26,7 @@
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
+#include "book3s.h"
/* #define DEBUG_MMU */
/* #define DEBUG_SR */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index b982d92..79ad35a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -28,6 +28,7 @@
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
#include "trace_pr.h"
+#include "book3s.h"
#define PTE_SIZE 12
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index dab68b7..1f9c0a1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -761,6 +761,8 @@
/* Harvest R and C */
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ if (rcbits & HPTE_R_C)
+ kvmppc_update_rmap_change(rmapp, psize);
if (rcbits & ~rev[i].guest_rpte) {
rev[i].guest_rpte = ptel | rcbits;
note_hpte_modification(kvm, &rev[i]);
@@ -927,8 +929,12 @@
retry:
lock_rmap(rmapp);
if (*rmapp & KVMPPC_RMAP_CHANGED) {
- *rmapp &= ~KVMPPC_RMAP_CHANGED;
+ long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
+ >> KVMPPC_RMAP_CHG_SHIFT;
+ *rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
npages_dirty = 1;
+ if (change_order > PAGE_SHIFT)
+ npages_dirty = 1ul << (change_order - PAGE_SHIFT);
}
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
unlock_rmap(rmapp);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 5a2bc4b..2afdb9c 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -23,6 +23,7 @@
#include <asm/reg.h>
#include <asm/switch_to.h>
#include <asm/time.h>
+#include "book3s.h"
#define OP_19_XOP_RFID 18
#define OP_19_XOP_RFI 50
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a9f753f..9754e68 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,12 @@
#define MPP_BUFFER_ORDER 3
#endif
+static int dynamic_mt_modes = 6;
+module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
+static int target_smt_mode;
+module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -114,7 +120,7 @@
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
- int cpu = vcpu->cpu;
+ int cpu;
wait_queue_head_t *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
@@ -123,10 +129,11 @@
++vcpu->stat.halt_wakeup;
}
- if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid))
+ if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
return;
/* CPU points to the first thread of the core */
+ cpu = vcpu->cpu;
if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
smp_send_reschedule(cpu);
}
@@ -164,6 +171,27 @@
* they should never fail.)
*/
+static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
+ vc->preempt_tb = mftb();
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
+static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
+ if (vc->preempt_tb != TB_NIL) {
+ vc->stolen_tb += mftb() - vc->preempt_tb;
+ vc->preempt_tb = TB_NIL;
+ }
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -175,14 +203,9 @@
* vcpu, and once it is set to this vcpu, only this task
* ever sets it to NULL.
*/
- if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
- spin_lock_irqsave(&vc->stoltb_lock, flags);
- if (vc->preempt_tb != TB_NIL) {
- vc->stolen_tb += mftb() - vc->preempt_tb;
- vc->preempt_tb = TB_NIL;
- }
- spin_unlock_irqrestore(&vc->stoltb_lock, flags);
- }
+ if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+ kvmppc_core_end_stolen(vc);
+
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
vcpu->arch.busy_preempt != TB_NIL) {
@@ -197,11 +220,9 @@
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
- if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
- spin_lock_irqsave(&vc->stoltb_lock, flags);
- vc->preempt_tb = mftb();
- spin_unlock_irqrestore(&vc->stoltb_lock, flags);
- }
+ if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+ kvmppc_core_start_stolen(vc);
+
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
vcpu->arch.busy_preempt = mftb();
@@ -214,12 +235,12 @@
kvmppc_end_cede(vcpu);
}
-void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
+static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
{
vcpu->arch.pvr = pvr;
}
-int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
+static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
unsigned long pcr = 0;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -259,7 +280,7 @@
return 0;
}
-void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
{
int r;
@@ -292,7 +313,7 @@
vcpu->arch.last_inst);
}
-struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
+static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
{
int r;
struct kvm_vcpu *v, *ret = NULL;
@@ -641,7 +662,8 @@
spin_lock(&vcore->lock);
if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
- vcore->vcore_state != VCORE_INACTIVE)
+ vcore->vcore_state != VCORE_INACTIVE &&
+ vcore->runner)
target = vcore->runner;
spin_unlock(&vcore->lock);
@@ -1431,6 +1453,7 @@
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_subcore;
vcore->kvm = kvm;
+ INIT_LIST_HEAD(&vcore->preempt_list);
vcore->mpp_buffer_is_valid = false;
@@ -1655,6 +1678,7 @@
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
+ vcpu->arch.thread_cpu = -1;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
@@ -1749,6 +1773,7 @@
/* Ensure the thread won't go into the kernel if it wakes */
tpaca->kvm_hstate.kvm_vcpu = NULL;
+ tpaca->kvm_hstate.kvm_vcore = NULL;
tpaca->kvm_hstate.napping = 0;
smp_wmb();
tpaca->kvm_hstate.hwthread_req = 1;
@@ -1780,26 +1805,32 @@
tpaca = &paca[cpu];
tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL;
+ tpaca->kvm_hstate.kvm_vcore = NULL;
+ tpaca->kvm_hstate.kvm_split_mode = NULL;
}
-static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
struct paca_struct *tpaca;
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvmppc_vcore *mvc = vc->master_vcore;
- if (vcpu->arch.timer_running) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- vcpu->arch.timer_running = 0;
+ cpu = vc->pcpu;
+ if (vcpu) {
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+ cpu += vcpu->arch.ptid;
+ vcpu->cpu = mvc->pcpu;
+ vcpu->arch.thread_cpu = cpu;
}
- cpu = vc->pcpu + vcpu->arch.ptid;
tpaca = &paca[cpu];
- tpaca->kvm_hstate.kvm_vcore = vc;
- tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
- vcpu->cpu = vc->pcpu;
- /* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */
- smp_wmb();
tpaca->kvm_hstate.kvm_vcpu = vcpu;
+ tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+ /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
+ smp_wmb();
+ tpaca->kvm_hstate.kvm_vcore = mvc;
if (cpu != smp_processor_id())
kvmppc_ipi_thread(cpu);
}
@@ -1812,12 +1843,12 @@
for (loops = 0; loops < 1000000; ++loops) {
/*
* Check if all threads are finished.
- * We set the vcpu pointer when starting a thread
+ * We set the vcore pointer when starting a thread
* and the thread clears it when finished, so we look
- * for any threads that still have a non-NULL vcpu ptr.
+ * for any threads that still have a non-NULL vcore ptr.
*/
for (i = 1; i < threads_per_subcore; ++i)
- if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+ if (paca[cpu + i].kvm_hstate.kvm_vcore)
break;
if (i == threads_per_subcore) {
HMT_medium();
@@ -1827,7 +1858,7 @@
}
HMT_medium();
for (i = 1; i < threads_per_subcore; ++i)
- if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+ if (paca[cpu + i].kvm_hstate.kvm_vcore)
pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
}
@@ -1890,6 +1921,278 @@
mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
}
+/*
+ * A list of virtual cores for each physical CPU.
+ * These are vcores that could run but their runner VCPU tasks are
+ * (or may be) preempted.
+ */
+struct preempted_vcore_list {
+ struct list_head list;
+ spinlock_t lock;
+};
+
+static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
+
+static void init_vcore_lists(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
+ spin_lock_init(&lp->lock);
+ INIT_LIST_HEAD(&lp->list);
+ }
+}
+
+static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
+{
+ struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+
+ vc->vcore_state = VCORE_PREEMPT;
+ vc->pcpu = smp_processor_id();
+ if (vc->num_threads < threads_per_subcore) {
+ spin_lock(&lp->lock);
+ list_add_tail(&vc->preempt_list, &lp->list);
+ spin_unlock(&lp->lock);
+ }
+
+ /* Start accumulating stolen time */
+ kvmppc_core_start_stolen(vc);
+}
+
+static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
+{
+ struct preempted_vcore_list *lp;
+
+ kvmppc_core_end_stolen(vc);
+ if (!list_empty(&vc->preempt_list)) {
+ lp = &per_cpu(preempted_vcores, vc->pcpu);
+ spin_lock(&lp->lock);
+ list_del_init(&vc->preempt_list);
+ spin_unlock(&lp->lock);
+ }
+ vc->vcore_state = VCORE_INACTIVE;
+}
+
+/*
+ * This stores information about the virtual cores currently
+ * assigned to a physical core.
+ */
+struct core_info {
+ int n_subcores;
+ int max_subcore_threads;
+ int total_threads;
+ int subcore_threads[MAX_SUBCORES];
+ struct kvm *subcore_vm[MAX_SUBCORES];
+ struct list_head vcs[MAX_SUBCORES];
+};
+
+/*
+ * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
+ * respectively in 2-way micro-threading (split-core) mode.
+ */
+static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
+
+static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
+{
+ int sub;
+
+ memset(cip, 0, sizeof(*cip));
+ cip->n_subcores = 1;
+ cip->max_subcore_threads = vc->num_threads;
+ cip->total_threads = vc->num_threads;
+ cip->subcore_threads[0] = vc->num_threads;
+ cip->subcore_vm[0] = vc->kvm;
+ for (sub = 0; sub < MAX_SUBCORES; ++sub)
+ INIT_LIST_HEAD(&cip->vcs[sub]);
+ list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+}
+
+static bool subcore_config_ok(int n_subcores, int n_threads)
+{
+ /* Can only dynamically split if unsplit to begin with */
+ if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
+ return false;
+ if (n_subcores > MAX_SUBCORES)
+ return false;
+ if (n_subcores > 1) {
+ if (!(dynamic_mt_modes & 2))
+ n_subcores = 4;
+ if (n_subcores > 2 && !(dynamic_mt_modes & 4))
+ return false;
+ }
+
+ return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
+}
+
+static void init_master_vcore(struct kvmppc_vcore *vc)
+{
+ vc->master_vcore = vc;
+ vc->entry_exit_map = 0;
+ vc->in_guest = 0;
+ vc->napping_threads = 0;
+ vc->conferring_threads = 0;
+}
+
+/*
+ * See if the existing subcores can be split into 3 (or fewer) subcores
+ * of at most two threads each, so we can fit in another vcore. This
+ * assumes there are at most two subcores and at most 6 threads in total.
+ */
+static bool can_split_piggybacked_subcores(struct core_info *cip)
+{
+ int sub, new_sub;
+ int large_sub = -1;
+ int thr;
+ int n_subcores = cip->n_subcores;
+ struct kvmppc_vcore *vc, *vcnext;
+ struct kvmppc_vcore *master_vc = NULL;
+
+ for (sub = 0; sub < cip->n_subcores; ++sub) {
+ if (cip->subcore_threads[sub] <= 2)
+ continue;
+ if (large_sub >= 0)
+ return false;
+ large_sub = sub;
+ vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
+ preempt_list);
+ if (vc->num_threads > 2)
+ return false;
+ n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
+ }
+ if (n_subcores > 3 || large_sub < 0)
+ return false;
+
+ /*
+ * Seems feasible, so go through and move vcores to new subcores.
+ * Note that when we have two or more vcores in one subcore,
+ * all those vcores must have only one thread each.
+ */
+ new_sub = cip->n_subcores;
+ thr = 0;
+ sub = large_sub;
+ list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
+ if (thr >= 2) {
+ list_del(&vc->preempt_list);
+ list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
+ /* vc->num_threads must be 1 */
+ if (++cip->subcore_threads[new_sub] == 1) {
+ cip->subcore_vm[new_sub] = vc->kvm;
+ init_master_vcore(vc);
+ master_vc = vc;
+ ++cip->n_subcores;
+ } else {
+ vc->master_vcore = master_vc;
+ ++new_sub;
+ }
+ }
+ thr += vc->num_threads;
+ }
+ cip->subcore_threads[large_sub] = 2;
+ cip->max_subcore_threads = 2;
+
+ return true;
+}
+
+static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
+{
+ int n_threads = vc->num_threads;
+ int sub;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return false;
+
+ if (n_threads < cip->max_subcore_threads)
+ n_threads = cip->max_subcore_threads;
+ if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
+ cip->max_subcore_threads = n_threads;
+ } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
+ vc->num_threads <= 2) {
+ /*
+ * We may be able to fit another subcore in by
+ * splitting an existing subcore with 3 or 4
+ * threads into two 2-thread subcores, or one
+ * with 5 or 6 threads into three subcores.
+ * We can only do this if those subcores have
+ * piggybacked virtual cores.
+ */
+ if (!can_split_piggybacked_subcores(cip))
+ return false;
+ } else {
+ return false;
+ }
+
+ sub = cip->n_subcores;
+ ++cip->n_subcores;
+ cip->total_threads += vc->num_threads;
+ cip->subcore_threads[sub] = vc->num_threads;
+ cip->subcore_vm[sub] = vc->kvm;
+ init_master_vcore(vc);
+ list_del(&vc->preempt_list);
+ list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
+
+ return true;
+}
+
+static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
+ struct core_info *cip, int sub)
+{
+ struct kvmppc_vcore *vc;
+ int n_thr;
+
+ vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
+ preempt_list);
+
+ /* require same VM and same per-core reg values */
+ if (pvc->kvm != vc->kvm ||
+ pvc->tb_offset != vc->tb_offset ||
+ pvc->pcr != vc->pcr ||
+ pvc->lpcr != vc->lpcr)
+ return false;
+
+ /* P8 guest with > 1 thread per core would see wrong TIR value */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ (vc->num_threads > 1 || pvc->num_threads > 1))
+ return false;
+
+ n_thr = cip->subcore_threads[sub] + pvc->num_threads;
+ if (n_thr > cip->max_subcore_threads) {
+ if (!subcore_config_ok(cip->n_subcores, n_thr))
+ return false;
+ cip->max_subcore_threads = n_thr;
+ }
+
+ cip->total_threads += pvc->num_threads;
+ cip->subcore_threads[sub] = n_thr;
+ pvc->master_vcore = vc;
+ list_del(&pvc->preempt_list);
+ list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
+
+ return true;
+}
+
+/*
+ * Work out whether it is possible to piggyback the execution of
+ * vcore *pvc onto the execution of the other vcores described in *cip.
+ */
+static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
+ int target_threads)
+{
+ int sub;
+
+ if (cip->total_threads + pvc->num_threads > target_threads)
+ return false;
+ for (sub = 0; sub < cip->n_subcores; ++sub)
+ if (cip->subcore_threads[sub] &&
+ can_piggyback_subcore(pvc, cip, sub))
+ return true;
+
+ if (can_dynamic_split(pvc, cip))
+ return true;
+
+ return false;
+}
+
static void prepare_threads(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu, *vnext;
@@ -1909,12 +2212,45 @@
}
}
-static void post_guest_process(struct kvmppc_vcore *vc)
+static void collect_piggybacks(struct core_info *cip, int target_threads)
{
+ struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+ struct kvmppc_vcore *pvc, *vcnext;
+
+ spin_lock(&lp->lock);
+ list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
+ if (!spin_trylock(&pvc->lock))
+ continue;
+ prepare_threads(pvc);
+ if (!pvc->n_runnable) {
+ list_del_init(&pvc->preempt_list);
+ if (pvc->runner == NULL) {
+ pvc->vcore_state = VCORE_INACTIVE;
+ kvmppc_core_end_stolen(pvc);
+ }
+ spin_unlock(&pvc->lock);
+ continue;
+ }
+ if (!can_piggyback(pvc, cip, target_threads)) {
+ spin_unlock(&pvc->lock);
+ continue;
+ }
+ kvmppc_core_end_stolen(pvc);
+ pvc->vcore_state = VCORE_PIGGYBACK;
+ if (cip->total_threads >= target_threads)
+ break;
+ }
+ spin_unlock(&lp->lock);
+}
+
+static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
+{
+ int still_running = 0;
u64 now;
long ret;
struct kvm_vcpu *vcpu, *vnext;
+ spin_lock(&vc->lock);
now = get_tb();
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
@@ -1933,17 +2269,36 @@
vcpu->arch.ret = ret;
vcpu->arch.trap = 0;
- if (vcpu->arch.ceded) {
- if (!is_kvmppc_resume_guest(ret))
- kvmppc_end_cede(vcpu);
- else
+ if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
+ if (vcpu->arch.pending_exceptions)
+ kvmppc_core_prepare_to_enter(vcpu);
+ if (vcpu->arch.ceded)
kvmppc_set_timer(vcpu);
- }
- if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
+ else
+ ++still_running;
+ } else {
kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run);
}
}
+ list_del_init(&vc->preempt_list);
+ if (!is_master) {
+ if (still_running > 0) {
+ kvmppc_vcore_preempt(vc);
+ } else if (vc->runner) {
+ vc->vcore_state = VCORE_PREEMPT;
+ kvmppc_core_start_stolen(vc);
+ } else {
+ vc->vcore_state = VCORE_INACTIVE;
+ }
+ if (vc->n_runnable > 0 && vc->runner == NULL) {
+ /* make sure there's a candidate runner awake */
+ vcpu = list_first_entry(&vc->runnable_threads,
+ struct kvm_vcpu, arch.run_list);
+ wake_up(&vcpu->arch.cpu_run);
+ }
+ }
+ spin_unlock(&vc->lock);
}
/*
@@ -1955,6 +2310,15 @@
struct kvm_vcpu *vcpu, *vnext;
int i;
int srcu_idx;
+ struct core_info core_info;
+ struct kvmppc_vcore *pvc, *vcnext;
+ struct kvm_split_mode split_info, *sip;
+ int split, subcore_size, active;
+ int sub;
+ bool thr0_done;
+ unsigned long cmd_bit, stat_bit;
+ int pcpu, thr;
+ int target_threads;
/*
* Remove from the list any threads that have a signal pending
@@ -1969,11 +2333,8 @@
/*
* Initialize *vc.
*/
- vc->entry_exit_map = 0;
+ init_master_vcore(vc);
vc->preempt_tb = TB_NIL;
- vc->in_guest = 0;
- vc->napping_threads = 0;
- vc->conferring_threads = 0;
/*
* Make sure we are running on primary threads, and that secondary
@@ -1991,24 +2352,120 @@
goto out;
}
+ /*
+ * See if we could run any other vcores on the physical core
+ * along with this one.
+ */
+ init_core_info(&core_info, vc);
+ pcpu = smp_processor_id();
+ target_threads = threads_per_subcore;
+ if (target_smt_mode && target_smt_mode < target_threads)
+ target_threads = target_smt_mode;
+ if (vc->num_threads < target_threads)
+ collect_piggybacks(&core_info, target_threads);
- vc->pcpu = smp_processor_id();
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
- kvmppc_start_thread(vcpu);
- kvmppc_create_dtl_entry(vcpu, vc);
- trace_kvm_guest_enter(vcpu);
+ /* Decide on micro-threading (split-core) mode */
+ subcore_size = threads_per_subcore;
+ cmd_bit = stat_bit = 0;
+ split = core_info.n_subcores;
+ sip = NULL;
+ if (split > 1) {
+ /* threads_per_subcore must be MAX_SMT_THREADS (8) here */
+ if (split == 2 && (dynamic_mt_modes & 2)) {
+ cmd_bit = HID0_POWER8_1TO2LPAR;
+ stat_bit = HID0_POWER8_2LPARMODE;
+ } else {
+ split = 4;
+ cmd_bit = HID0_POWER8_1TO4LPAR;
+ stat_bit = HID0_POWER8_4LPARMODE;
+ }
+ subcore_size = MAX_SMT_THREADS / split;
+ sip = &split_info;
+ memset(&split_info, 0, sizeof(split_info));
+ split_info.rpr = mfspr(SPRN_RPR);
+ split_info.pmmar = mfspr(SPRN_PMMAR);
+ split_info.ldbar = mfspr(SPRN_LDBAR);
+ split_info.subcore_size = subcore_size;
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ split_info.master_vcs[sub] =
+ list_first_entry(&core_info.vcs[sub],
+ struct kvmppc_vcore, preempt_list);
+ /* order writes to split_info before kvm_split_mode pointer */
+ smp_wmb();
+ }
+ pcpu = smp_processor_id();
+ for (thr = 0; thr < threads_per_subcore; ++thr)
+ paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+
+ /* Initiate micro-threading (split-core) if required */
+ if (cmd_bit) {
+ unsigned long hid0 = mfspr(SPRN_HID0);
+
+ hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
+ mb();
+ mtspr(SPRN_HID0, hid0);
+ isync();
+ for (;;) {
+ hid0 = mfspr(SPRN_HID0);
+ if (hid0 & stat_bit)
+ break;
+ cpu_relax();
+ }
}
- /* Set this explicitly in case thread 0 doesn't have a vcpu */
- get_paca()->kvm_hstate.kvm_vcore = vc;
- get_paca()->kvm_hstate.ptid = 0;
+ /* Start all the threads */
+ active = 0;
+ for (sub = 0; sub < core_info.n_subcores; ++sub) {
+ thr = subcore_thread_map[sub];
+ thr0_done = false;
+ active |= 1 << thr;
+ list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
+ pvc->pcpu = pcpu + thr;
+ list_for_each_entry(vcpu, &pvc->runnable_threads,
+ arch.run_list) {
+ kvmppc_start_thread(vcpu, pvc);
+ kvmppc_create_dtl_entry(vcpu, pvc);
+ trace_kvm_guest_enter(vcpu);
+ if (!vcpu->arch.ptid)
+ thr0_done = true;
+ active |= 1 << (thr + vcpu->arch.ptid);
+ }
+ /*
+ * We need to start the first thread of each subcore
+ * even if it doesn't have a vcpu.
+ */
+ if (pvc->master_vcore == pvc && !thr0_done)
+ kvmppc_start_thread(NULL, pvc);
+ thr += pvc->num_threads;
+ }
+ }
+
+ /*
+ * Ensure that split_info.do_nap is set after setting
+ * the vcore pointer in the PACA of the secondaries.
+ */
+ smp_mb();
+ if (cmd_bit)
+ split_info.do_nap = 1; /* ask secondaries to nap when done */
+
+ /*
+ * When doing micro-threading, poke the inactive threads as well.
+ * This gets them to the nap instruction after kvm_do_nap,
+ * which reduces the time taken to unsplit later.
+ */
+ if (split > 1)
+ for (thr = 1; thr < threads_per_subcore; ++thr)
+ if (!(active & (1 << thr)))
+ kvmppc_ipi_thread(pcpu + thr);
vc->vcore_state = VCORE_RUNNING;
preempt_disable();
trace_kvmppc_run_core(vc, 0);
- spin_unlock(&vc->lock);
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
+ spin_unlock(&pvc->lock);
kvm_guest_enter();
@@ -2019,32 +2476,58 @@
__kvmppc_vcore_entry();
- spin_lock(&vc->lock);
-
if (vc->mpp_buffer)
kvmppc_start_saving_l2_cache(vc);
- /* disable sending of IPIs on virtual external irqs */
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
- vcpu->cpu = -1;
- /* wait for secondary threads to finish writing their state to memory */
- kvmppc_wait_for_nap();
- for (i = 0; i < threads_per_subcore; ++i)
- kvmppc_release_hwthread(vc->pcpu + i);
+ srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+
+ spin_lock(&vc->lock);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
- spin_unlock(&vc->lock);
- srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+ /* wait for secondary threads to finish writing their state to memory */
+ kvmppc_wait_for_nap();
+
+ /* Return to whole-core mode if we split the core earlier */
+ if (split > 1) {
+ unsigned long hid0 = mfspr(SPRN_HID0);
+ unsigned long loops = 0;
+
+ hid0 &= ~HID0_POWER8_DYNLPARDIS;
+ stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+ mb();
+ mtspr(SPRN_HID0, hid0);
+ isync();
+ for (;;) {
+ hid0 = mfspr(SPRN_HID0);
+ if (!(hid0 & stat_bit))
+ break;
+ cpu_relax();
+ ++loops;
+ }
+ split_info.do_nap = 0;
+ }
+
+ /* Let secondaries go back to the offline loop */
+ for (i = 0; i < threads_per_subcore; ++i) {
+ kvmppc_release_hwthread(pcpu + i);
+ if (sip && sip->napped[i])
+ kvmppc_ipi_thread(pcpu + i);
+ }
+
+ spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
kvm_guest_exit();
- preempt_enable();
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
+ preempt_list)
+ post_guest_process(pvc, pvc == vc);
spin_lock(&vc->lock);
- post_guest_process(vc);
+ preempt_enable();
out:
vc->vcore_state = VCORE_INACTIVE;
@@ -2055,13 +2538,17 @@
* Wait for some other vcpu thread to execute us, and
* wake us up when we need to handle something in the host.
*/
-static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
+static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
+ struct kvm_vcpu *vcpu, int wait_state)
{
DEFINE_WAIT(wait);
prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
- if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+ spin_unlock(&vc->lock);
schedule();
+ spin_lock(&vc->lock);
+ }
finish_wait(&vcpu->arch.cpu_run, &wait);
}
@@ -2137,9 +2624,21 @@
* this thread straight away and have it join in.
*/
if (!signal_pending(current)) {
- if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) {
+ if (vc->vcore_state == VCORE_PIGGYBACK) {
+ struct kvmppc_vcore *mvc = vc->master_vcore;
+ if (spin_trylock(&mvc->lock)) {
+ if (mvc->vcore_state == VCORE_RUNNING &&
+ !VCORE_IS_EXITING(mvc)) {
+ kvmppc_create_dtl_entry(vcpu, vc);
+ kvmppc_start_thread(vcpu, vc);
+ trace_kvm_guest_enter(vcpu);
+ }
+ spin_unlock(&mvc->lock);
+ }
+ } else if (vc->vcore_state == VCORE_RUNNING &&
+ !VCORE_IS_EXITING(vc)) {
kvmppc_create_dtl_entry(vcpu, vc);
- kvmppc_start_thread(vcpu);
+ kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
wake_up(&vc->wq);
@@ -2149,10 +2648,11 @@
while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
!signal_pending(current)) {
+ if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
+ kvmppc_vcore_end_preempt(vc);
+
if (vc->vcore_state != VCORE_INACTIVE) {
- spin_unlock(&vc->lock);
- kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
- spin_lock(&vc->lock);
+ kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
continue;
}
list_for_each_entry_safe(v, vn, &vc->runnable_threads,
@@ -2179,10 +2679,11 @@
if (n_ceded == vc->n_runnable) {
kvmppc_vcore_blocked(vc);
} else if (need_resched()) {
- vc->vcore_state = VCORE_PREEMPT;
+ kvmppc_vcore_preempt(vc);
/* Let something else run */
cond_resched_lock(&vc->lock);
- vc->vcore_state = VCORE_INACTIVE;
+ if (vc->vcore_state == VCORE_PREEMPT)
+ kvmppc_vcore_end_preempt(vc);
} else {
kvmppc_run_core(vc);
}
@@ -2191,11 +2692,8 @@
while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
(vc->vcore_state == VCORE_RUNNING ||
- vc->vcore_state == VCORE_EXITING)) {
- spin_unlock(&vc->lock);
- kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
- spin_lock(&vc->lock);
- }
+ vc->vcore_state == VCORE_EXITING))
+ kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
kvmppc_remove_runnable(vc, vcpu);
@@ -2755,6 +3253,8 @@
init_default_hcalls();
+ init_vcore_lists();
+
r = kvmppc_mmu_hv_init();
return r;
}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ed2589d..fd7006b 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -110,14 +110,15 @@
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
unsigned int yield_count)
{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
+ int ptid = local_paca->kvm_hstate.ptid;
int threads_running;
int threads_ceded;
int threads_conferring;
u64 stop = get_tb() + 10 * tb_ticks_per_usec;
int rv = H_SUCCESS; /* => don't yield */
- set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+ set_bit(ptid, &vc->conferring_threads);
while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
threads_running = VCORE_ENTRY_MAP(vc);
threads_ceded = vc->napping_threads;
@@ -127,7 +128,7 @@
break;
}
}
- clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+ clear_bit(ptid, &vc->conferring_threads);
return rv;
}
@@ -238,7 +239,8 @@
{
struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
int ptid = local_paca->kvm_hstate.ptid;
- int me, ee;
+ struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
+ int me, ee, i;
/* Set our bit in the threads-exiting-guest map in the 0xff00
bits of vcore->entry_exit_map */
@@ -258,4 +260,26 @@
*/
if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
+
+ /*
+ * If we are doing dynamic micro-threading, interrupt the other
+ * subcores to pull them out of their guests too.
+ */
+ if (!sip)
+ return;
+
+ for (i = 0; i < MAX_SUBCORES; ++i) {
+ vc = sip->master_vcs[i];
+ if (!vc)
+ break;
+ do {
+ ee = vc->entry_exit_map;
+ /* Already asked to exit? */
+ if ((ee >> 8) != 0)
+ break;
+ } while (cmpxchg(&vc->entry_exit_map, ee,
+ ee | VCORE_EXIT_REQ) != ee);
+ if ((ee >> 8) == 0)
+ kvmhv_interrupt_vcore(vc, ee);
+ }
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index b027a89..c1df9bb 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -12,6 +12,7 @@
#include <linux/kvm_host.h>
#include <linux/hugetlb.h>
#include <linux/module.h>
+#include <linux/log2.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
@@ -97,25 +98,52 @@
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
+/* Update the changed page order field of an rmap entry */
+void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
+{
+ unsigned long order;
+
+ if (!psize)
+ return;
+ order = ilog2(psize);
+ order <<= KVMPPC_RMAP_CHG_SHIFT;
+ if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
+ *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
+}
+EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
+
+/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
+static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
+ unsigned long hpte_gr)
+{
+ struct kvm_memory_slot *memslot;
+ unsigned long *rmap;
+ unsigned long gfn;
+
+ gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+ if (!memslot)
+ return NULL;
+
+ rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
+ return rmap;
+}
+
/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
struct revmap_entry *rev,
unsigned long hpte_v, unsigned long hpte_r)
{
struct revmap_entry *next, *prev;
- unsigned long gfn, ptel, head;
- struct kvm_memory_slot *memslot;
+ unsigned long ptel, head;
unsigned long *rmap;
unsigned long rcbits;
rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
ptel = rev->guest_rpte |= rcbits;
- gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
- memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
- if (!memslot)
+ rmap = revmap_for_hpte(kvm, hpte_v, ptel);
+ if (!rmap)
return;
-
- rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
lock_rmap(rmap);
head = *rmap & KVMPPC_RMAP_INDEX;
@@ -131,6 +159,8 @@
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
}
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ if (rcbits & HPTE_R_C)
+ kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
unlock_rmap(rmap);
}
@@ -421,14 +451,20 @@
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
v = pte & ~HPTE_V_HVLOCK;
if (v & HPTE_V_VALID) {
- u64 pte1;
-
- pte1 = be64_to_cpu(hpte[1]);
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
- rb = compute_tlbie_rb(v, pte1, pte_index);
+ rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
- /* Read PTE low word after tlbie to get final R/C values */
- remove_revmap_chain(kvm, pte_index, rev, v, pte1);
+ /*
+ * The reference (R) and change (C) bits in a HPT
+ * entry can be set by hardware at any time up until
+ * the HPTE is invalidated and the TLB invalidation
+ * sequence has completed. This means that when
+ * removing a HPTE, we need to re-read the HPTE after
+ * the invalidation sequence has completed in order to
+ * obtain reliable values of R and C.
+ */
+ remove_revmap_chain(kvm, pte_index, rev, v,
+ be64_to_cpu(hpte[1]));
}
r = rev->guest_rpte & ~HPTE_GR_RESERVED;
note_hpte_modification(kvm, rev);
@@ -655,6 +691,105 @@
return H_SUCCESS;
}
+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index)
+{
+ struct kvm *kvm = vcpu->kvm;
+ __be64 *hpte;
+ unsigned long v, r, gr;
+ struct revmap_entry *rev;
+ unsigned long *rmap;
+ long ret = H_NOT_FOUND;
+
+ if (pte_index >= kvm->arch.hpt_npte)
+ return H_PARAMETER;
+
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ v = be64_to_cpu(hpte[0]);
+ r = be64_to_cpu(hpte[1]);
+ if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+ goto out;
+
+ gr = rev->guest_rpte;
+ if (rev->guest_rpte & HPTE_R_R) {
+ rev->guest_rpte &= ~HPTE_R_R;
+ note_hpte_modification(kvm, rev);
+ }
+ if (v & HPTE_V_VALID) {
+ gr |= r & (HPTE_R_R | HPTE_R_C);
+ if (r & HPTE_R_R) {
+ kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
+ rmap = revmap_for_hpte(kvm, v, gr);
+ if (rmap) {
+ lock_rmap(rmap);
+ *rmap |= KVMPPC_RMAP_REFERENCED;
+ unlock_rmap(rmap);
+ }
+ }
+ }
+ vcpu->arch.gpr[4] = gr;
+ ret = H_SUCCESS;
+ out:
+ unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+ return ret;
+}
+
+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index)
+{
+ struct kvm *kvm = vcpu->kvm;
+ __be64 *hpte;
+ unsigned long v, r, gr;
+ struct revmap_entry *rev;
+ unsigned long *rmap;
+ long ret = H_NOT_FOUND;
+
+ if (pte_index >= kvm->arch.hpt_npte)
+ return H_PARAMETER;
+
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ v = be64_to_cpu(hpte[0]);
+ r = be64_to_cpu(hpte[1]);
+ if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+ goto out;
+
+ gr = rev->guest_rpte;
+ if (gr & HPTE_R_C) {
+ rev->guest_rpte &= ~HPTE_R_C;
+ note_hpte_modification(kvm, rev);
+ }
+ if (v & HPTE_V_VALID) {
+ /* need to make it temporarily absent so C is stable */
+ hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
+ kvmppc_invalidate_hpte(kvm, hpte, pte_index);
+ r = be64_to_cpu(hpte[1]);
+ gr |= r & (HPTE_R_R | HPTE_R_C);
+ if (r & HPTE_R_C) {
+ unsigned long psize = hpte_page_size(v, r);
+ hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
+ eieio();
+ rmap = revmap_for_hpte(kvm, v, gr);
+ if (rmap) {
+ lock_rmap(rmap);
+ *rmap |= KVMPPC_RMAP_CHANGED;
+ kvmppc_update_rmap_change(rmap, psize);
+ unlock_rmap(rmap);
+ }
+ }
+ }
+ vcpu->arch.gpr[4] = gr;
+ ret = H_SUCCESS;
+ out:
+ unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+ return ret;
+}
+
void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index)
{
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 00e45b6..24f5807 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -67,14 +67,12 @@
}
/* Check if the core is loaded, if not, too hard */
- cpu = vcpu->cpu;
+ cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
return;
}
- /* In SMT cpu will always point to thread 0, we adjust it */
- cpu += vcpu->arch.ptid;
smp_mb();
kvmhv_rm_send_ipi(cpu);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index faa86e9..2273dca 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -128,6 +128,10 @@
subf r4, r4, r3
mtspr SPRN_DEC, r4
+ /* hwthread_req may have got set by cede or no vcpu, so clear it */
+ li r0, 0
+ stb r0, HSTATE_HWTHREAD_REQ(r13)
+
/*
* For external and machine check interrupts, we need
* to call the Linux handler to process the interrupt.
@@ -215,7 +219,6 @@
ld r5, HSTATE_KVM_VCORE(r13)
li r0, 0
stb r0, HSTATE_NAPPING(r13)
- stb r0, HSTATE_HWTHREAD_REQ(r13)
/* check the wake reason */
bl kvmppc_check_wake_reason
@@ -315,10 +318,10 @@
cmpdi r3, 0
bge kvm_no_guest
- /* get vcpu pointer, NULL if we have no vcpu to run */
- ld r4,HSTATE_KVM_VCPU(r13)
- cmpdi r4,0
- /* if we have no vcpu to run, go back to sleep */
+ /* get vcore pointer, NULL if we have nothing to run */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ cmpdi r5,0
+ /* if we have no vcore to run, go back to sleep */
beq kvm_no_guest
kvm_secondary_got_guest:
@@ -327,21 +330,42 @@
ld r6, PACA_DSCR_DEFAULT(r13)
std r6, HSTATE_DSCR(r13)
- /* Order load of vcore, ptid etc. after load of vcpu */
+ /* On thread 0 of a subcore, set HDEC to max */
+ lbz r4, HSTATE_PTID(r13)
+ cmpwi r4, 0
+ bne 63f
+ lis r6, 0x7fff
+ ori r6, r6, 0xffff
+ mtspr SPRN_HDEC, r6
+ /* and set per-LPAR registers, if doing dynamic micro-threading */
+ ld r6, HSTATE_SPLIT_MODE(r13)
+ cmpdi r6, 0
+ beq 63f
+ ld r0, KVM_SPLIT_RPR(r6)
+ mtspr SPRN_RPR, r0
+ ld r0, KVM_SPLIT_PMMAR(r6)
+ mtspr SPRN_PMMAR, r0
+ ld r0, KVM_SPLIT_LDBAR(r6)
+ mtspr SPRN_LDBAR, r0
+ isync
+63:
+ /* Order load of vcpu after load of vcore */
lwsync
+ ld r4, HSTATE_KVM_VCPU(r13)
bl kvmppc_hv_entry
/* Back from the guest, go back to nap */
- /* Clear our vcpu pointer so we don't come back in early */
+ /* Clear our vcpu and vcore pointers so we don't come back in early */
li r0, 0
+ std r0, HSTATE_KVM_VCPU(r13)
/*
- * Once we clear HSTATE_KVM_VCPU(r13), the code in
+ * Once we clear HSTATE_KVM_VCORE(r13), the code in
* kvmppc_run_core() is going to assume that all our vcpu
* state is visible in memory. This lwsync makes sure
* that that is true.
*/
lwsync
- std r0, HSTATE_KVM_VCPU(r13)
+ std r0, HSTATE_KVM_VCORE(r13)
/*
* At this point we have finished executing in the guest.
@@ -374,16 +398,71 @@
b power7_wakeup_loss
53: HMT_LOW
- ld r4, HSTATE_KVM_VCPU(r13)
- cmpdi r4, 0
+ ld r5, HSTATE_KVM_VCORE(r13)
+ cmpdi r5, 0
+ bne 60f
+ ld r3, HSTATE_SPLIT_MODE(r13)
+ cmpdi r3, 0
+ beq kvm_no_guest
+ lbz r0, KVM_SPLIT_DO_NAP(r3)
+ cmpwi r0, 0
beq kvm_no_guest
HMT_MEDIUM
+ b kvm_unsplit_nap
+60: HMT_MEDIUM
b kvm_secondary_got_guest
54: li r0, KVM_HWTHREAD_IN_KVM
stb r0, HSTATE_HWTHREAD_STATE(r13)
b kvm_no_guest
+/*
+ * Here the primary thread is trying to return the core to
+ * whole-core mode, so we need to nap.
+ */
+kvm_unsplit_nap:
+ /*
+ * Ensure that secondary doesn't nap when it has
+ * its vcore pointer set.
+ */
+ sync /* matches smp_mb() before setting split_info.do_nap */
+ ld r0, HSTATE_KVM_VCORE(r13)
+ cmpdi r0, 0
+ bne kvm_no_guest
+ /* clear any pending message */
+BEGIN_FTR_SECTION
+ lis r6, (PPC_DBELL_SERVER << (63-36))@h
+ PPC_MSGCLR(6)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ /* Set kvm_split_mode.napped[tid] = 1 */
+ ld r3, HSTATE_SPLIT_MODE(r13)
+ li r0, 1
+ lhz r4, PACAPACAINDEX(r13)
+ clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
+ addi r4, r4, KVM_SPLIT_NAPPED
+ stbx r0, r3, r4
+ /* Check the do_nap flag again after setting napped[] */
+ sync
+ lbz r0, KVM_SPLIT_DO_NAP(r3)
+ cmpwi r0, 0
+ beq 57f
+ li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
+ mfspr r4, SPRN_LPCR
+ rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
+ mtspr SPRN_LPCR, r4
+ isync
+ std r0, HSTATE_SCRATCH0(r13)
+ ptesync
+ ld r0, HSTATE_SCRATCH0(r13)
+1: cmpd r0, r0
+ bne 1b
+ nap
+ b .
+
+57: li r0, 0
+ stbx r0, r3, r4
+ b kvm_no_guest
+
/******************************************************************************
* *
* Entry code *
@@ -854,7 +933,10 @@
cmpwi r0, 0
bne 21f
HMT_LOW
-20: lbz r0, VCORE_IN_GUEST(r5)
+20: lwz r3, VCORE_ENTRY_EXIT(r5)
+ cmpwi r3, 0x100
+ bge no_switch_exit
+ lbz r0, VCORE_IN_GUEST(r5)
cmpwi r0, 0
beq 20b
HMT_MEDIUM
@@ -870,7 +952,7 @@
blt hdec_soon
ld r6, VCPU_CTR(r4)
- lwz r7, VCPU_XER(r4)
+ ld r7, VCPU_XER(r4)
mtctr r6
mtxer r7
@@ -985,9 +1067,13 @@
#endif
11: b kvmhv_switch_to_host
+no_switch_exit:
+ HMT_MEDIUM
+ li r12, 0
+ b 12f
hdec_soon:
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
- stw r12, VCPU_TRAP(r4)
+12: stw r12, VCPU_TRAP(r4)
mr r9, r4
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r4, VCPU_TB_RMEXIT
@@ -1103,7 +1189,7 @@
mfctr r3
mfxer r4
std r3, VCPU_CTR(r9)
- stw r4, VCPU_XER(r9)
+ std r4, VCPU_XER(r9)
/* If this is a page table miss then see if it's theirs or ours */
cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
@@ -1127,6 +1213,7 @@
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
bne 3f
lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
beq 4f
b guest_exit_cont
3:
@@ -1176,6 +1263,11 @@
ld r9, HSTATE_KVM_VCPU(r13)
lwz r12, VCPU_TRAP(r9)
+ /* Stop others sending VCPU interrupts to this physical CPU */
+ li r0, -1
+ stw r0, VCPU_CPU(r9)
+ stw r0, VCPU_THREAD_CPU(r9)
+
/* Save guest CTRL register, set runlatch to 1 */
mfspr r6,SPRN_CTRLF
stw r6,VCPU_CTRL(r9)
@@ -1540,12 +1632,17 @@
/* Primary thread waits for all the secondaries to exit guest */
15: lwz r3,VCORE_ENTRY_EXIT(r5)
- srwi r0,r3,8
+ rlwinm r0,r3,32-8,0xff
clrldi r3,r3,56
cmpw r3,r0
bne 15b
isync
+ /* Did we actually switch to the guest at all? */
+ lbz r6, VCORE_IN_GUEST(r5)
+ cmpwi r6, 0
+ beq 19f
+
/* Primary thread switches back to host partition */
ld r6,KVM_HOST_SDR1(r4)
lwz r7,KVM_HOST_LPID(r4)
@@ -1589,7 +1686,7 @@
18:
/* Signal secondary CPUs to continue */
stb r0,VCORE_IN_GUEST(r5)
- lis r8,0x7fff /* MAX_INT@h */
+19: lis r8,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC,r8
16: ld r8,KVM_HOST_LPCR(r4)
@@ -1675,7 +1772,7 @@
bl kvmppc_msr_interrupt
fast_interrupt_c_return:
6: ld r7, VCPU_CTR(r9)
- lwz r8, VCPU_XER(r9)
+ ld r8, VCPU_XER(r9)
mtctr r7
mtxer r8
mr r4, r9
@@ -1816,8 +1913,8 @@
.long DOTSYM(kvmppc_h_remove) - hcall_real_table
.long DOTSYM(kvmppc_h_enter) - hcall_real_table
.long DOTSYM(kvmppc_h_read) - hcall_real_table
- .long 0 /* 0x10 - H_CLEAR_MOD */
- .long 0 /* 0x14 - H_CLEAR_REF */
+ .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
+ .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
.long DOTSYM(kvmppc_h_protect) - hcall_real_table
.long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
.long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bd6ab16..a759d9a 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,7 +352,7 @@
return kvmppc_get_field(inst, msb + 32, lsb + 32);
}
-bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
+static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
{
if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
return false;
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index acee37c..ca8f174 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -123,7 +123,7 @@
PPC_LL r8, SVCPU_CTR(r3)
PPC_LL r9, SVCPU_LR(r3)
lwz r10, SVCPU_CR(r3)
- lwz r11, SVCPU_XER(r3)
+ PPC_LL r11, SVCPU_XER(r3)
mtctr r8
mtlr r9
@@ -237,7 +237,7 @@
mfctr r8
mflr r9
- stw r5, SVCPU_XER(r13)
+ PPC_STL r5, SVCPU_XER(r13)
PPC_STL r6, SVCPU_FAULT_DAR(r13)
stw r7, SVCPU_FAULT_DSISR(r13)
PPC_STL r8, SVCPU_CTR(r13)
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index c6ca7db..905e94a 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -41,7 +41,7 @@
* =======
*
* Each ICS has a spin lock protecting the information about the IRQ
- * sources and avoiding simultaneous deliveries if the same interrupt.
+ * sources and avoiding simultaneous deliveries of the same interrupt.
*
* ICP operations are done via a single compare & swap transaction
* (most ICP state fits in the union kvmppc_icp_state)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index cc58426..ae458f0 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -933,6 +933,7 @@
#endif
break;
case BOOKE_INTERRUPT_CRITICAL:
+ kvmppc_fill_pt_regs(®s);
unknown_exception(®s);
break;
case BOOKE_INTERRUPT_DEBUG:
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 50860e9..29911a0 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -377,7 +377,7 @@
| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
vcpu->arch.shared->mas1 =
(vcpu->arch.shared->mas6 & MAS6_SPID0)
- | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+ | ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0)
| (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
vcpu->arch.shared->mas2 &= MAS2_EPN;
vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e5dde32..2e51289 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -660,7 +660,7 @@
return kvmppc_core_pending_dec(vcpu);
}
-enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
+static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 4827870..1d57000 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -48,6 +48,7 @@
config KEXEC
def_bool y
+ select KEXEC_CORE
config AUDIT_ARCH
def_bool y
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index 42506b3..4da604e 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -167,7 +167,7 @@
#endif
puts("Uncompressing Linux... ");
- decompress(input_data, input_len, NULL, NULL, output, NULL, error);
+ __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
puts("Ok, booting the kernel.\n");
return (unsigned long) output;
}
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index 9d39596..b3fd54d 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -18,27 +18,13 @@
return &s390_dma_ops;
}
-extern int dma_set_mask(struct device *dev, u64 mask);
-
static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction)
{
}
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
#include <asm-generic/dma-mapping-common.h>
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (dma_ops->dma_supported == NULL)
- return 1;
- return dma_ops->dma_supported(dev, mask);
-}
-
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
if (!dev->dma_mask)
@@ -46,45 +32,4 @@
return addr + size - 1 <= *dev->dma_mask;
}
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- debug_dma_mapping_error(dev, dma_addr);
- if (dma_ops->mapping_error)
- return dma_ops->mapping_error(dev, dma_addr);
- return dma_addr == DMA_ERROR_CODE;
-}
-
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *cpu_addr;
-
- BUG_ON(!ops);
-
- cpu_addr = ops->alloc(dev, size, dma_handle, flags, attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
-
- return cpu_addr;
-}
-
-#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!ops);
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
#endif /* _ASM_S390_DMA_MAPPING_H */
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 42b7658..37505b8 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -262,16 +262,6 @@
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
}
-int dma_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
-
- *dev->dma_mask = mask;
- return 0;
-}
-EXPORT_SYMBOL_GPL(dma_set_mask);
-
static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction direction,
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 50057fe..d514df7e 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -602,6 +602,7 @@
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
depends on SUPERH32 && MMU
+ select KEXEC_CORE
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c
index 95470a4..208a975 100644
--- a/arch/sh/boot/compressed/misc.c
+++ b/arch/sh/boot/compressed/misc.c
@@ -132,7 +132,7 @@
puts("Uncompressing Linux... ");
cache_control(CACHE_ENABLE);
- decompress(input_data, input_len, NULL, NULL, output, NULL, error);
+ __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
cache_control(CACHE_DISABLE);
puts("Ok, booting the kernel.\n");
}
diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h
index b437f2c..a3745a3 100644
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h
@@ -9,86 +9,13 @@
return dma_ops;
}
-#include <asm-generic/dma-coherent.h>
+#define DMA_ERROR_CODE 0
+
#include <asm-generic/dma-mapping-common.h>
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (ops->dma_supported)
- return ops->dma_supported(dev, mask);
-
- return 1;
-}
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
- if (ops->set_dma_mask)
- return ops->set_dma_mask(dev, mask);
-
- *dev->dma_mask = mask;
-
- return 0;
-}
-
void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction dir);
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- debug_dma_mapping_error(dev, dma_addr);
- if (ops->mapping_error)
- return ops->mapping_error(dev, dma_addr);
-
- return dma_addr == 0;
-}
-
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *memory;
-
- if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
- return memory;
- if (!ops->alloc)
- return NULL;
-
- memory = ops->alloc(dev, size, dma_handle, gfp, attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
-
- return memory;
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (dma_release_from_coherent(dev, get_order(size), vaddr))
- return;
-
- debug_dma_free_coherent(dev, size, vaddr, dma_handle);
- if (ops->free)
- ops->free(dev, size, vaddr, dma_handle, attrs);
-}
-
/* arch/sh/mm/consistent.c */
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index 7e064c6..a21da59 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -7,11 +7,9 @@
#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
+#define HAVE_ARCH_DMA_SUPPORTED 1
int dma_supported(struct device *dev, u64 mask);
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction dir)
{
@@ -39,39 +37,7 @@
return dma_ops;
}
-#include <asm-generic/dma-mapping-common.h>
-
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *cpu_addr;
-
- cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
- return cpu_addr;
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- debug_dma_mapping_error(dev, dma_addr);
- return (dma_addr == DMA_ERROR_CODE);
-}
+#define HAVE_ARCH_DMA_SET_MASK 1
static inline int dma_set_mask(struct device *dev, u64 mask)
{
@@ -86,4 +52,6 @@
return -EINVAL;
}
+#include <asm-generic/dma-mapping-common.h>
+
#endif
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 2ba12d7..106c21b 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -205,6 +205,7 @@
config KEXEC
bool "kexec system call"
+ select KEXEC_CORE
---help---
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index 1eae359..96ac6cc 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -59,8 +59,6 @@
static inline void dma_mark_clean(void *addr, size_t size) {}
-#include <asm-generic/dma-mapping-common.h>
-
static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
{
dev->archdata.dma_ops = ops;
@@ -74,18 +72,9 @@
return addr + size - 1 <= *dev->dma_mask;
}
-static inline int
-dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- debug_dma_mapping_error(dev, dma_addr);
- return get_dma_ops(dev)->mapping_error(dev, dma_addr);
-}
+#define HAVE_ARCH_DMA_SET_MASK 1
-static inline int
-dma_supported(struct device *dev, u64 mask)
-{
- return get_dma_ops(dev)->dma_supported(dev, mask);
-}
+#include <asm-generic/dma-mapping-common.h>
static inline int
dma_set_mask(struct device *dev, u64 mask)
@@ -116,36 +105,6 @@
return 0;
}
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
- void *cpu_addr;
-
- cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
-
- debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
-
- return cpu_addr;
-}
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
-
- dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
-#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
-
/*
* dma_alloc_noncoherent() is #defined to return coherent memory,
* so there's no need to do any flushing here.
diff --git a/arch/unicore32/boot/compressed/misc.c b/arch/unicore32/boot/compressed/misc.c
index 176d5bd..5c65dfe 100644
--- a/arch/unicore32/boot/compressed/misc.c
+++ b/arch/unicore32/boot/compressed/misc.c
@@ -119,8 +119,8 @@
output_ptr = get_unaligned_le32(tmp);
arch_decomp_puts("Uncompressing Linux...");
- decompress(input_data, input_data_end - input_data, NULL, NULL,
- output_data, NULL, error);
+ __decompress(input_data, input_data_end - input_data, NULL, NULL,
+ output_data, 0, NULL, error);
arch_decomp_puts(" done, booting the kernel.\n");
return output_ptr;
}
diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h
index 366460a..8140e05 100644
--- a/arch/unicore32/include/asm/dma-mapping.h
+++ b/arch/unicore32/include/asm/dma-mapping.h
@@ -18,8 +18,6 @@
#include <linux/scatterlist.h>
#include <linux/swiotlb.h>
-#include <asm-generic/dma-coherent.h>
-
#include <asm/memory.h>
#include <asm/cacheflush.h>
@@ -30,26 +28,6 @@
return &swiotlb_dma_map_ops;
}
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (unlikely(dma_ops == NULL))
- return 0;
-
- return dma_ops->dma_supported(dev, mask);
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (dma_ops->mapping_error)
- return dma_ops->mapping_error(dev, dma_addr);
-
- return 0;
-}
-
#include <asm-generic/dma-mapping-common.h>
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
@@ -72,41 +50,6 @@
static inline void dma_mark_clean(void *addr, size_t size) {}
-static inline int dma_set_mask(struct device *dev, u64 dma_mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
-
- *dev->dma_mask = dma_mask;
-
- return 0;
-}
-
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- return dma_ops->alloc(dev, size, dma_handle, flag, attrs);
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
static inline void dma_cache_sync(struct device *dev, void *vaddr,
size_t size, enum dma_data_direction direction)
{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc0d73e..328c835 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1006,7 +1006,7 @@
depends on X86_MCE_INTEL
config X86_LEGACY_VM86
- bool "Legacy VM86 support (obsolete)"
+ bool "Legacy VM86 support"
default n
depends on X86_32
---help---
@@ -1018,19 +1018,20 @@
available to accelerate real mode DOS programs. However, any
recent version of DOSEMU, X, or vbetool should be fully
functional even without kernel VM86 support, as they will all
- fall back to (pretty well performing) software emulation.
+ fall back to software emulation. Nevertheless, if you are using
+ a 16-bit DOS program where 16-bit performance matters, vm86
+ mode might be faster than emulation and you might want to
+ enable this option.
- Anything that works on a 64-bit kernel is unlikely to need
- this option, as 64-bit kernels don't, and can't, support V8086
- mode. This option is also unrelated to 16-bit protected mode
- and is not needed to run most 16-bit programs under Wine.
+ Note that any app that works on a 64-bit kernel is unlikely to
+ need this option, as 64-bit kernels don't, and can't, support
+ V8086 mode. This option is also unrelated to 16-bit protected
+ mode and is not needed to run most 16-bit programs under Wine.
- Enabling this option adds considerable attack surface to the
- kernel and slows down system calls and exception handling.
+ Enabling this option increases the complexity of the kernel
+ and slows down exception handling a tiny bit.
- Unless you use very old userspace or need the last drop of
- performance in your real mode DOS games and can't use KVM,
- say N here.
+ If unsure, say N here.
config VM86
bool
@@ -1754,6 +1755,7 @@
config KEXEC
bool "kexec system call"
+ select KEXEC_CORE
---help---
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
@@ -1770,8 +1772,8 @@
config KEXEC_FILE
bool "kexec file based system call"
+ select KEXEC_CORE
select BUILD_BIN2C
- depends on KEXEC
depends on X86_64
depends on CRYPTO=y
depends on CRYPTO_SHA256=y
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index f637979..79dac17 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -448,7 +448,8 @@
#endif
debug_putstr("\nDecompressing Linux... ");
- decompress(input_data, input_len, NULL, NULL, output, NULL, error);
+ __decompress(input_data, input_len, NULL, NULL, output, output_len,
+ NULL, error);
parse_elf(output);
/*
* 32-bit always performs relocations. 64-bit relocations are only
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 16ef025..2d6b309 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -414,7 +414,7 @@
# define XLF23 0
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC)
+#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC_CORE)
# define XLF4 XLF_EFI_KEXEC
#else
# define XLF4 0
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 477bfa6..7663c45 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -381,3 +381,4 @@
372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
373 i386 shutdown sys_shutdown
374 i386 userfaultfd sys_userfaultfd
+375 i386 membarrier sys_membarrier
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 81c4906..278842f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -330,6 +330,7 @@
321 common bpf sys_bpf
322 64 execveat stub_execveat
323 common userfaultfd sys_userfaultfd
+324 common membarrier sys_membarrier
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 26a46f4..b160c0c 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -277,7 +277,7 @@
{
return "[vsyscall]";
}
-static struct vm_operations_struct gate_vma_ops = {
+static const struct vm_operations_struct gate_vma_ops = {
.name = gate_vma_name,
};
static struct vm_area_struct gate_vma = {
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 477fc28..e6cf2ad 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -241,6 +241,7 @@
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 1f5b728..953b726 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -12,7 +12,6 @@
#include <linux/dma-attrs.h>
#include <asm/io.h>
#include <asm/swiotlb.h>
-#include <asm-generic/dma-coherent.h>
#include <linux/dma-contiguous.h>
#ifdef CONFIG_ISA
@@ -41,24 +40,13 @@
#endif
}
-#include <asm-generic/dma-mapping-common.h>
+bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
+#define arch_dma_alloc_attrs arch_dma_alloc_attrs
-/* Make sure we keep the same behaviour */
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- debug_dma_mapping_error(dev, dma_addr);
- if (ops->mapping_error)
- return ops->mapping_error(dev, dma_addr);
-
- return (dma_addr == DMA_ERROR_CODE);
-}
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
+#define HAVE_ARCH_DMA_SUPPORTED 1
extern int dma_supported(struct device *hwdev, u64 mask);
-extern int dma_set_mask(struct device *dev, u64 mask);
+
+#include <asm-generic/dma-mapping-common.h>
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
@@ -125,16 +113,4 @@
return gfp;
}
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-void *
-dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, struct dma_attrs *attrs);
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t bus,
- struct dma_attrs *attrs);
-
#endif
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 32ce713..b130d59 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -29,7 +29,7 @@
extern void __show_regs(struct pt_regs *regs, int all);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
extern int in_crash_kexec;
#else
/* no crash dump is ever in progress if no crash kernel can be kexec'd */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index ce029e4..31247b5 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -97,7 +97,6 @@
struct pv_time_ops {
unsigned long long (*sched_clock)(void);
unsigned long long (*steal_clock)(int cpu);
- unsigned long (*get_tsc_khz)(void);
};
struct pv_cpu_ops {
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 9d51fae..eaba080 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -39,18 +39,27 @@
}
#endif
-#define virt_queued_spin_lock virt_queued_spin_lock
-
-static inline bool virt_queued_spin_lock(struct qspinlock *lock)
+#ifdef CONFIG_PARAVIRT
+#define virt_spin_lock virt_spin_lock
+static inline bool virt_spin_lock(struct qspinlock *lock)
{
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
return false;
- while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
- cpu_relax();
+ /*
+ * On hypervisors without PARAVIRT_SPINLOCKS support we fall
+ * back to a Test-and-Set spinlock, because fair locks have
+ * horrible lock 'holder' preemption issues.
+ */
+
+ do {
+ while (atomic_read(&lock->val) != 0)
+ cpu_relax();
+ } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
return true;
}
+#endif /* CONFIG_PARAVIRT */
#include <asm-generic/qspinlock.h>
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index a3804fb..0679e11 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -101,6 +101,11 @@
{
unsigned long mfn;
+ /*
+ * Some x86 code are still using pfn_to_mfn instead of
+ * pfn_to_mfn. This will have to be removed when we figured
+ * out which call.
+ */
if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn;
@@ -147,6 +152,11 @@
{
unsigned long pfn;
+ /*
+ * Some x86 code are still using mfn_to_pfn instead of
+ * gfn_to_pfn. This will have to be removed when we figure
+ * out which call.
+ */
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
@@ -176,6 +186,27 @@
return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
}
+/* Pseudo-physical <-> Guest conversion */
+static inline unsigned long pfn_to_gfn(unsigned long pfn)
+{
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return pfn;
+ else
+ return pfn_to_mfn(pfn);
+}
+
+static inline unsigned long gfn_to_pfn(unsigned long gfn)
+{
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return gfn;
+ else
+ return mfn_to_pfn(gfn);
+}
+
+/* Pseudo-physical <-> Bus conversion */
+#define pfn_to_bfn(pfn) pfn_to_gfn(pfn)
+#define bfn_to_pfn(bfn) gfn_to_pfn(bfn)
+
/*
* We detect special mappings in one of two ways:
* 1. If the MFN is an I/O page then Xen will set the m2p entry
@@ -196,7 +227,7 @@
* require. In all the cases we care about, the FOREIGN_FRAME bit is
* masked (e.g., pfn_to_mfn()) so behaviour there is correct.
*/
-static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
+static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
{
unsigned long pfn;
@@ -215,6 +246,10 @@
#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
+/* VIRT <-> GUEST conversion */
+#define virt_to_gfn(v) (pfn_to_gfn(virt_to_pfn(v)))
+#define gfn_to_virt(g) (__va(gfn_to_pfn(g) << PAGE_SHIFT))
+
static inline unsigned long pte_mfn(pte_t pte)
{
return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
@@ -262,7 +297,7 @@
static inline bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
- unsigned long mfn)
+ unsigned long bfn)
{
return false;
}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9ffdf25..b1b78ff 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -71,8 +71,8 @@
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_X86_TSC) += trace_clock.o
-obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
-obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o
+obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-y += kprobes/
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c42827e..25f9093 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -338,10 +338,15 @@
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
+ unsigned long flags;
+
if (instr[0] != 0x90)
return;
+ local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+ sync_core();
+ local_irq_restore(flags);
DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
instr, a->instrlen - a->padlen, a->padlen);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3ca3e46..24e94ce 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -336,6 +336,13 @@
apic_write(APIC_LVTT, lvtt_value);
if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
+ /*
+ * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
+ * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
+ * According to Intel, MFENCE can do the serialization here.
+ */
+ asm volatile("mfence" : : : "memory");
+
printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 38a76f8..5c60bb1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2522,6 +2522,7 @@
int pin, ioapic, irq, irq_entry;
const struct cpumask *mask;
struct irq_data *idata;
+ struct irq_chip *chip;
if (skip_ioapic_setup == 1)
return;
@@ -2545,9 +2546,9 @@
else
mask = apic->target_cpus();
- irq_set_affinity(irq, mask);
+ chip = irq_data_get_irq_chip(idata);
+ chip->irq_set_affinity(idata, mask, false);
}
-
}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 07ce52c..de22ea7 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1110,10 +1110,10 @@
else
printk(KERN_CONT "%d86", c->x86);
- printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
+ printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
if (c->x86_mask || c->cpuid_level >= 0)
- printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
+ printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
else
printk(KERN_CONT ")\n");
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cd9b6d0..3fefebf 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2316,9 +2316,12 @@
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
- struct event_constraint *c1 = cpuc->event_constraint[idx];
+ struct event_constraint *c1 = NULL;
struct event_constraint *c2;
+ if (idx >= 0) /* fake does < 0 */
+ c1 = cpuc->event_constraint[idx];
+
/*
* first time only
* - static constraint: no change across incremental scheduling calls
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index 54690e8..d1c0f25 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -222,6 +222,7 @@
if (!buf || bts_buffer_is_full(buf, bts))
return;
+ event->hw.itrace_started = 1;
event->hw.state = 0;
if (!buf->snapshot)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 49487b4..2c7aafa 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -200,7 +200,7 @@
* kind of shutdown from our side, we unregister the clock by writting anything
* that does not have the 'enable' bit set in the msr
*/
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
static void kvm_crash_shutdown(struct pt_regs *regs)
{
native_write_msr(msr_kvm_system_time, 0, 0);
@@ -259,7 +259,7 @@
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
machine_ops.shutdown = kvm_shutdown;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
machine_ops.crash_shutdown = kvm_crash_shutdown;
#endif
kvm_get_preset_lpj();
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 2bcc052..6acc9dd 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -58,7 +58,7 @@
if (alloc_size > PAGE_SIZE)
new_ldt->entries = vzalloc(alloc_size);
else
- new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
if (!new_ldt->entries) {
kfree(new_ldt);
@@ -95,7 +95,7 @@
if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(ldt->entries);
else
- kfree(ldt->entries);
+ free_page((unsigned long)ldt->entries);
kfree(ldt);
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 353972c..84b8ef8 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -58,17 +58,6 @@
/* Number of entries preallocated for DMA-API debugging */
#define PREALLOC_DMA_DEBUG_ENTRIES 65536
-int dma_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
-
- *dev->dma_mask = mask;
-
- return 0;
-}
-EXPORT_SYMBOL(dma_set_mask);
-
void __init pci_iommu_alloc(void)
{
struct iommu_table_entry *p;
@@ -140,50 +129,19 @@
free_pages((unsigned long)vaddr, get_order(size));
}
-void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, struct dma_attrs *attrs)
+bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *memory;
+ *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
+ *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+ if (!*dev)
+ *dev = &x86_dma_fallback_dev;
+ if (!is_device_dma_capable(*dev))
+ return false;
+ return true;
- if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
- return memory;
-
- if (!dev)
- dev = &x86_dma_fallback_dev;
-
- if (!is_device_dma_capable(dev))
- return NULL;
-
- if (!ops->alloc)
- return NULL;
-
- memory = ops->alloc(dev, size, dma_handle,
- dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
-
- return memory;
}
-EXPORT_SYMBOL(dma_alloc_attrs);
-
-void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t bus,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- WARN_ON(irqs_disabled()); /* for portability */
-
- if (dma_release_from_coherent(dev, get_order(size), vaddr))
- return;
-
- debug_dma_free_coherent(dev, size, vaddr, bus);
- if (ops->free)
- ops->free(dev, size, vaddr, bus, attrs);
-}
-EXPORT_SYMBOL(dma_free_attrs);
+EXPORT_SYMBOL(arch_dma_alloc_attrs);
/*
* See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 86db4bc..02693dd 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -673,7 +673,7 @@
.emergency_restart = native_machine_emergency_restart,
.restart = native_machine_restart,
.halt = native_machine_halt,
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.crash_shutdown = native_machine_crash_shutdown,
#endif
};
@@ -703,7 +703,7 @@
machine_ops.halt();
}
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
void machine_crash_shutdown(struct pt_regs *regs)
{
machine_ops.crash_shutdown(regs);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index baadbf9..fdb7f2a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -478,7 +478,7 @@
* --------- Crashkernel reservation ------------------------------
*/
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c8d52cb..c3f7602 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -21,6 +21,7 @@
#include <asm/hypervisor.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
+#include <asm/geode.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1013,15 +1014,17 @@
static void __init check_system_tsc_reliable(void)
{
-#ifdef CONFIG_MGEODE_LX
- /* RTSC counts during suspend */
+#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
+ if (is_geode_lx()) {
+ /* RTSC counts during suspend */
#define RTSC_SUSP 0x100
- unsigned long res_low, res_high;
+ unsigned long res_low, res_high;
- rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
- /* Geode_LX - the OLPC CPU has a very reliable TSC */
- if (res_low & RTSC_SUSP)
- tsc_clocksource_reliable = 1;
+ rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+ /* Geode_LX - the OLPC CPU has a very reliable TSC */
+ if (res_low & RTSC_SUSP)
+ tsc_clocksource_reliable = 1;
+ }
#endif
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
tsc_clocksource_reliable = 1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index abd8b856..5246193 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -45,6 +45,7 @@
#include <linux/audit.h>
#include <linux/stddef.h>
#include <linux/slab.h>
+#include <linux/security.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -232,6 +233,32 @@
struct pt_regs *regs = current_pt_regs();
unsigned long err = 0;
+ err = security_mmap_addr(0);
+ if (err) {
+ /*
+ * vm86 cannot virtualize the address space, so vm86 users
+ * need to manage the low 1MB themselves using mmap. Given
+ * that BIOS places important data in the first page, vm86
+ * is essentially useless if mmap_min_addr != 0. DOSEMU,
+ * for example, won't even bother trying to use vm86 if it
+ * can't map a page at virtual address 0.
+ *
+ * To reduce the available kernel attack surface, simply
+ * disallow vm86(old) for users who cannot mmap at va 0.
+ *
+ * The implementation of security_mmap_addr will allow
+ * suitably privileged users to map va 0 even if
+ * vm.mmap_min_addr is set above 0, and we want this
+ * behavior for vm86 as well, as it ensures that legacy
+ * tools like vbetool will not fail just because of
+ * vm.mmap_min_addr.
+ */
+ pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n",
+ current->comm, task_pid_nr(current),
+ from_kuid_munged(&init_user_ns, current_uid()));
+ return -EPERM;
+ }
+
if (!vm86) {
if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
return -ENOMEM;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 00bf300..74e4bf1 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -364,7 +364,7 @@
#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
#include <asm/kexec.h>
. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e7a4fde..b372a75 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -650,6 +650,7 @@
u16 sel;
la = seg_base(ctxt, addr.seg) + addr.ea;
+ *linear = la;
*max_size = 0;
switch (mode) {
case X86EMUL_MODE_PROT64:
@@ -693,7 +694,6 @@
}
if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
return emulate_gp(ctxt, 0);
- *linear = la;
return X86EMUL_CONTINUE;
bad:
if (addr.seg == VCPU_SREG_SS)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fb16a8e..69088a1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3309,13 +3309,14 @@
walk_shadow_page_lockless_begin(vcpu);
- for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level;
+ for (shadow_walk_init(&iterator, vcpu, addr),
+ leaf = root = iterator.level;
shadow_walk_okay(&iterator);
__shadow_walk_next(&iterator, spte)) {
- leaf = iterator.level;
spte = mmu_spte_get_lockless(iterator.sptep);
sptes[leaf - 1] = spte;
+ leaf--;
if (!is_shadow_present_pte(spte))
break;
@@ -3329,7 +3330,7 @@
if (reserved) {
pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
__func__, addr);
- while (root >= leaf) {
+ while (root > leaf) {
pr_err("------ spte 0x%llx level %d.\n",
sptes[root - 1], root);
root--;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 148ea20..d019868 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1264,7 +1264,7 @@
vmcs, phys_addr);
}
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* This bitmap is used to indicate whether the vmclear
* operation is enabled on all cpus. All disabled by
@@ -1302,7 +1302,7 @@
#else
static inline void crash_enable_local_vmclear(int cpu) { }
static inline void crash_disable_local_vmclear(int cpu) { }
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
static void __loaded_vmcs_clear(void *arg)
{
@@ -10411,7 +10411,7 @@
if (r)
return r;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
crash_vmclear_local_loaded_vmcss);
#endif
@@ -10421,7 +10421,7 @@
static void __exit vmx_exit(void)
{
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1e7e76e..a60bdbc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5943,6 +5943,7 @@
put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
}
+#ifdef CONFIG_X86_64
static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
{
struct kvm_segment seg;
@@ -5958,6 +5959,7 @@
put_smstate(u32, buf, offset + 4, seg.limit);
put_smstate(u64, buf, offset + 8, seg.base);
}
+#endif
static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
{
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index db1b0bc..134948b 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -42,58 +42,21 @@
*/
static unsigned long mpx_mmap(unsigned long len)
{
- unsigned long ret;
- unsigned long addr, pgoff;
struct mm_struct *mm = current->mm;
- vm_flags_t vm_flags;
- struct vm_area_struct *vma;
+ unsigned long addr, populate;
/* Only bounds table can be allocated here */
if (len != mpx_bt_size_bytes(mm))
return -EINVAL;
down_write(&mm->mmap_sem);
-
- /* Too many mappings? */
- if (mm->map_count > sysctl_max_map_count) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* Obtain the address to map to. we verify (or select) it and ensure
- * that it represents a valid section of the address space.
- */
- addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE);
- if (addr & ~PAGE_MASK) {
- ret = addr;
- goto out;
- }
-
- vm_flags = VM_READ | VM_WRITE | VM_MPX |
- mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
-
- /* Set pgoff according to addr for anon_vma */
- pgoff = addr >> PAGE_SHIFT;
-
- ret = mmap_region(NULL, addr, len, vm_flags, pgoff);
- if (IS_ERR_VALUE(ret))
- goto out;
-
- vma = find_vma(mm, ret);
- if (!vma) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (vm_flags & VM_LOCKED) {
- up_write(&mm->mmap_sem);
- mm_populate(ret, len);
- return ret;
- }
-
-out:
+ addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
up_write(&mm->mmap_sem);
- return ret;
+ if (populate)
+ mm_populate(addr, populate);
+
+ return addr;
}
enum reg_type {
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 66338a6..c2aea63 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -192,10 +192,11 @@
node_set(node, numa_nodes_parsed);
- pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n",
+ pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
node, pxm,
(unsigned long long) start, (unsigned long long) end - 1,
- hotpluggable ? " hotplug" : "");
+ hotpluggable ? " hotplug" : "",
+ ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
/* Mark hotplug range in memblock. */
if (hotpluggable && memblock_mark_hotplug(start, ma->length))
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index e4308fe..1db84c0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -650,7 +650,7 @@
static void __init save_runtime_map(void)
{
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
efi_memory_desc_t *md;
void *tmp, *p, *q = NULL;
int count = 0;
@@ -748,7 +748,7 @@
static void __init kexec_enter_virtual_mode(void)
{
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
efi_memory_desc_t *md;
void *p;
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 020c101..5c9f63f 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -492,7 +492,7 @@
touch_nmi_watchdog();
}
-#if defined(CONFIG_KEXEC)
+#if defined(CONFIG_KEXEC_CORE)
static atomic_t uv_nmi_kexec_failed;
static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
{
@@ -519,13 +519,13 @@
uv_nmi_sync_exit(0);
}
-#else /* !CONFIG_KEXEC */
+#else /* !CONFIG_KEXEC_CORE */
static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
{
if (master)
pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
}
-#endif /* !CONFIG_KEXEC */
+#endif /* !CONFIG_KEXEC_CORE */
#ifdef CONFIG_KGDB
#ifdef CONFIG_KGDB_KDB
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2c50b44..9c479fe 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2812,9 +2812,9 @@
return 0;
}
-static int do_remap_mfn(struct vm_area_struct *vma,
+static int do_remap_gfn(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t *mfn, int nr,
+ xen_pfn_t *gfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages)
@@ -2830,14 +2830,14 @@
if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_XEN_PVH
/* We need to update the local page tables and the xen HAP */
- return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
+ return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
prot, domid, pages);
#else
return -EINVAL;
#endif
}
- rmd.mfn = mfn;
+ rmd.mfn = gfn;
rmd.prot = prot;
/* We use the err_ptr to indicate if there we are doing a contigious
* mapping or a discontigious mapping. */
@@ -2865,8 +2865,8 @@
batch_left, &done, domid);
/*
- * @err_ptr may be the same buffer as @mfn, so
- * only clear it after each chunk of @mfn is
+ * @err_ptr may be the same buffer as @gfn, so
+ * only clear it after each chunk of @gfn is
* used.
*/
if (err_ptr) {
@@ -2896,19 +2896,19 @@
return err < 0 ? err : mapped;
}
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t mfn, int nr,
+ xen_pfn_t gfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)
{
- return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
+ return do_remap_gfn(vma, addr, &gfn, nr, NULL, prot, domid, pages);
}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t *mfn, int nr,
+ xen_pfn_t *gfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid, struct page **pages)
{
@@ -2917,13 +2917,13 @@
* cause of "wrong memory was mapped in".
*/
BUG_ON(err_ptr == NULL);
- return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
+ return do_remap_gfn(vma, addr, gfn, nr, err_ptr, prot, domid, pages);
}
-EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
+EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
/* Returns: 0 success */
-int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
+int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
int numpgs, struct page **pages)
{
if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
@@ -2935,4 +2935,4 @@
return -EINVAL;
#endif
}
-EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
+EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 2a9ff73..3f4ebf0 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -453,7 +453,7 @@
}
#endif
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
- ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
+ ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
BUG();
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
index f01cb30..4427f38 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h
@@ -32,66 +32,6 @@
#include <asm-generic/dma-mapping-common.h>
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- struct dma_attrs *attrs)
-{
- void *ret;
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
- return ret;
-
- ret = ops->alloc(dev, size, dma_handle, gfp, attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, ret);
-
- return ret;
-}
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (dma_release_from_coherent(dev, get_order(size), vaddr))
- return;
-
- ops->free(dev, size, vaddr, dma_handle, attrs);
- debug_dma_free_coherent(dev, size, vaddr, dma_handle);
-}
-
-static inline int
-dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- debug_dma_mapping_error(dev, dma_addr);
- return ops->mapping_error(dev, dma_addr);
-}
-
-static inline int
-dma_supported(struct device *dev, u64 mask)
-{
- return 1;
-}
-
-static inline int
-dma_set_mask(struct device *dev, u64 mask)
-{
- if(!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
-
- *dev->dma_mask = mask;
-
- return 0;
-}
-
void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction);
diff --git a/block/bio.c b/block/bio.c
index 515b543..ad3f276 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1990,7 +1990,7 @@
get_io_context_active(ioc);
bio->bi_ioc = ioc;
- bio->bi_css = task_get_css(current, blkio_cgrp_id);
+ bio->bi_css = task_get_css(current, io_cgrp_id);
return 0;
}
EXPORT_SYMBOL_GPL(bio_associate_current);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d6283b3..ac8370c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -24,6 +24,7 @@
#include <linux/genhd.h>
#include <linux/delay.h>
#include <linux/atomic.h>
+#include <linux/ctype.h>
#include <linux/blk-cgroup.h>
#include "blk.h"
@@ -68,9 +69,14 @@
return;
for (i = 0; i < BLKCG_MAX_POLS; i++)
- kfree(blkg->pd[i]);
+ if (blkg->pd[i])
+ blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
- blk_exit_rl(&blkg->rl);
+ if (blkg->blkcg != &blkcg_root)
+ blk_exit_rl(&blkg->rl);
+
+ blkg_rwstat_exit(&blkg->stat_ios);
+ blkg_rwstat_exit(&blkg->stat_bytes);
kfree(blkg);
}
@@ -93,6 +99,10 @@
if (!blkg)
return NULL;
+ if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) ||
+ blkg_rwstat_init(&blkg->stat_ios, gfp_mask))
+ goto err_free;
+
blkg->q = q;
INIT_LIST_HEAD(&blkg->q_node);
blkg->blkcg = blkcg;
@@ -113,7 +123,7 @@
continue;
/* alloc per-policy data and attach it to blkg */
- pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
+ pd = pol->pd_alloc_fn(gfp_mask, q->node);
if (!pd)
goto err_free;
@@ -129,26 +139,11 @@
return NULL;
}
-/**
- * __blkg_lookup - internal version of blkg_lookup()
- * @blkcg: blkcg of interest
- * @q: request_queue of interest
- * @update_hint: whether to update lookup hint with the result or not
- *
- * This is internal version and shouldn't be used by policy
- * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
- * @q's bypass state. If @update_hint is %true, the caller should be
- * holding @q->queue_lock and lookup hint is updated on success.
- */
-struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
- bool update_hint)
+struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
+ struct request_queue *q, bool update_hint)
{
struct blkcg_gq *blkg;
- blkg = rcu_dereference(blkcg->blkg_hint);
- if (blkg && blkg->q == q)
- return blkg;
-
/*
* Hint didn't match. Look up from the radix tree. Note that the
* hint can only be updated under queue_lock as otherwise @blkg
@@ -166,29 +161,11 @@
return NULL;
}
-
-/**
- * blkg_lookup - lookup blkg for the specified blkcg - q pair
- * @blkcg: blkcg of interest
- * @q: request_queue of interest
- *
- * Lookup blkg for the @blkcg - @q pair. This function should be called
- * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
- * - see blk_queue_bypass_start() for details.
- */
-struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
-{
- WARN_ON_ONCE(!rcu_read_lock_held());
-
- if (unlikely(blk_queue_bypass(q)))
- return NULL;
- return __blkg_lookup(blkcg, q, false);
-}
-EXPORT_SYMBOL_GPL(blkg_lookup);
+EXPORT_SYMBOL_GPL(blkg_lookup_slowpath);
/*
* If @new_blkg is %NULL, this function tries to allocate a new one as
- * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return.
+ * necessary using %GFP_NOWAIT. @new_blkg is always consumed on return.
*/
static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct request_queue *q,
@@ -203,12 +180,12 @@
/* blkg holds a reference to blkcg */
if (!css_tryget_online(&blkcg->css)) {
- ret = -EINVAL;
+ ret = -ENODEV;
goto err_free_blkg;
}
wb_congested = wb_congested_get_create(&q->backing_dev_info,
- blkcg->css.id, GFP_ATOMIC);
+ blkcg->css.id, GFP_NOWAIT);
if (!wb_congested) {
ret = -ENOMEM;
goto err_put_css;
@@ -216,7 +193,7 @@
/* allocate */
if (!new_blkg) {
- new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
+ new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT);
if (unlikely(!new_blkg)) {
ret = -ENOMEM;
goto err_put_congested;
@@ -229,7 +206,7 @@
if (blkcg_parent(blkcg)) {
blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
if (WARN_ON_ONCE(!blkg->parent)) {
- ret = -EINVAL;
+ ret = -ENODEV;
goto err_put_congested;
}
blkg_get(blkg->parent);
@@ -240,7 +217,7 @@
struct blkcg_policy *pol = blkcg_policy[i];
if (blkg->pd[i] && pol->pd_init_fn)
- pol->pd_init_fn(blkg);
+ pol->pd_init_fn(blkg->pd[i]);
}
/* insert */
@@ -254,7 +231,7 @@
struct blkcg_policy *pol = blkcg_policy[i];
if (blkg->pd[i] && pol->pd_online_fn)
- pol->pd_online_fn(blkg);
+ pol->pd_online_fn(blkg->pd[i]);
}
}
blkg->online = true;
@@ -303,7 +280,7 @@
* we shouldn't allow anything to go through for a bypassing queue.
*/
if (unlikely(blk_queue_bypass(q)))
- return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
+ return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
blkg = __blkg_lookup(blkcg, q, true);
if (blkg)
@@ -327,11 +304,11 @@
return blkg;
}
}
-EXPORT_SYMBOL_GPL(blkg_lookup_create);
static void blkg_destroy(struct blkcg_gq *blkg)
{
struct blkcg *blkcg = blkg->blkcg;
+ struct blkcg_gq *parent = blkg->parent;
int i;
lockdep_assert_held(blkg->q->queue_lock);
@@ -345,8 +322,14 @@
struct blkcg_policy *pol = blkcg_policy[i];
if (blkg->pd[i] && pol->pd_offline_fn)
- pol->pd_offline_fn(blkg);
+ pol->pd_offline_fn(blkg->pd[i]);
}
+
+ if (parent) {
+ blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
+ blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
+ }
+
blkg->online = false;
radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
@@ -400,15 +383,6 @@
void __blkg_release_rcu(struct rcu_head *rcu_head)
{
struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
- int i;
-
- /* tell policies that this one is being freed */
- for (i = 0; i < BLKCG_MAX_POLS; i++) {
- struct blkcg_policy *pol = blkcg_policy[i];
-
- if (blkg->pd[i] && pol->pd_exit_fn)
- pol->pd_exit_fn(blkg);
- }
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
@@ -472,12 +446,14 @@
* anyway. If you get hit by a race, retry.
*/
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+ blkg_rwstat_reset(&blkg->stat_bytes);
+ blkg_rwstat_reset(&blkg->stat_ios);
+
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
- if (blkcg_policy_enabled(blkg->q, pol) &&
- pol->pd_reset_stats_fn)
- pol->pd_reset_stats_fn(blkg);
+ if (blkg->pd[i] && pol->pd_reset_stats_fn)
+ pol->pd_reset_stats_fn(blkg->pd[i]);
}
}
@@ -486,13 +462,14 @@
return 0;
}
-static const char *blkg_dev_name(struct blkcg_gq *blkg)
+const char *blkg_dev_name(struct blkcg_gq *blkg)
{
/* some drivers (floppy) instantiate a queue w/o disk registered */
if (blkg->q->backing_dev_info.dev)
return dev_name(blkg->q->backing_dev_info.dev);
return NULL;
}
+EXPORT_SYMBOL_GPL(blkg_dev_name);
/**
* blkcg_print_blkgs - helper for printing per-blkg data
@@ -581,9 +558,10 @@
for (i = 0; i < BLKG_RWSTAT_NR; i++)
seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
- (unsigned long long)rwstat->cnt[i]);
+ (unsigned long long)atomic64_read(&rwstat->aux_cnt[i]));
- v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
+ v = atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_READ]) +
+ atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]);
seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
return v;
}
@@ -620,31 +598,122 @@
}
EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
+static u64 blkg_prfill_rwstat_field(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+{
+ struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->blkg + off);
+
+ return __blkg_prfill_rwstat(sf, pd, &rwstat);
+}
+
+/**
+ * blkg_print_stat_bytes - seq_show callback for blkg->stat_bytes
+ * @sf: seq_file to print to
+ * @v: unused
+ *
+ * To be used as cftype->seq_show to print blkg->stat_bytes.
+ * cftype->private must be set to the blkcg_policy.
+ */
+int blkg_print_stat_bytes(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
+ offsetof(struct blkcg_gq, stat_bytes), true);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_print_stat_bytes);
+
+/**
+ * blkg_print_stat_bytes - seq_show callback for blkg->stat_ios
+ * @sf: seq_file to print to
+ * @v: unused
+ *
+ * To be used as cftype->seq_show to print blkg->stat_ios. cftype->private
+ * must be set to the blkcg_policy.
+ */
+int blkg_print_stat_ios(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
+ offsetof(struct blkcg_gq, stat_ios), true);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_print_stat_ios);
+
+static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd,
+ int off)
+{
+ struct blkg_rwstat rwstat = blkg_rwstat_recursive_sum(pd->blkg,
+ NULL, off);
+ return __blkg_prfill_rwstat(sf, pd, &rwstat);
+}
+
+/**
+ * blkg_print_stat_bytes_recursive - recursive version of blkg_print_stat_bytes
+ * @sf: seq_file to print to
+ * @v: unused
+ */
+int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ blkg_prfill_rwstat_field_recursive,
+ (void *)seq_cft(sf)->private,
+ offsetof(struct blkcg_gq, stat_bytes), true);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive);
+
+/**
+ * blkg_print_stat_ios_recursive - recursive version of blkg_print_stat_ios
+ * @sf: seq_file to print to
+ * @v: unused
+ */
+int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ blkg_prfill_rwstat_field_recursive,
+ (void *)seq_cft(sf)->private,
+ offsetof(struct blkcg_gq, stat_ios), true);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive);
+
/**
* blkg_stat_recursive_sum - collect hierarchical blkg_stat
- * @pd: policy private data of interest
- * @off: offset to the blkg_stat in @pd
+ * @blkg: blkg of interest
+ * @pol: blkcg_policy which contains the blkg_stat
+ * @off: offset to the blkg_stat in blkg_policy_data or @blkg
*
- * Collect the blkg_stat specified by @off from @pd and all its online
- * descendants and return the sum. The caller must be holding the queue
- * lock for online tests.
+ * Collect the blkg_stat specified by @blkg, @pol and @off and all its
+ * online descendants and their aux counts. The caller must be holding the
+ * queue lock for online tests.
+ *
+ * If @pol is NULL, blkg_stat is at @off bytes into @blkg; otherwise, it is
+ * at @off bytes into @blkg's blkg_policy_data of the policy.
*/
-u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
+u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
+ struct blkcg_policy *pol, int off)
{
- struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg;
struct cgroup_subsys_state *pos_css;
u64 sum = 0;
- lockdep_assert_held(pd->blkg->q->queue_lock);
+ lockdep_assert_held(blkg->q->queue_lock);
rcu_read_lock();
- blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
- struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
- struct blkg_stat *stat = (void *)pos_pd + off;
+ blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
+ struct blkg_stat *stat;
- if (pos_blkg->online)
- sum += blkg_stat_read(stat);
+ if (!pos_blkg->online)
+ continue;
+
+ if (pol)
+ stat = (void *)blkg_to_pd(pos_blkg, pol) + off;
+ else
+ stat = (void *)blkg + off;
+
+ sum += blkg_stat_read(stat) + atomic64_read(&stat->aux_cnt);
}
rcu_read_unlock();
@@ -654,37 +723,43 @@
/**
* blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
- * @pd: policy private data of interest
- * @off: offset to the blkg_stat in @pd
+ * @blkg: blkg of interest
+ * @pol: blkcg_policy which contains the blkg_rwstat
+ * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
*
- * Collect the blkg_rwstat specified by @off from @pd and all its online
- * descendants and return the sum. The caller must be holding the queue
- * lock for online tests.
+ * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
+ * online descendants and their aux counts. The caller must be holding the
+ * queue lock for online tests.
+ *
+ * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
+ * is at @off bytes into @blkg's blkg_policy_data of the policy.
*/
-struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
- int off)
+struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
+ struct blkcg_policy *pol, int off)
{
- struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg;
struct cgroup_subsys_state *pos_css;
struct blkg_rwstat sum = { };
int i;
- lockdep_assert_held(pd->blkg->q->queue_lock);
+ lockdep_assert_held(blkg->q->queue_lock);
rcu_read_lock();
- blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
- struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
- struct blkg_rwstat *rwstat = (void *)pos_pd + off;
- struct blkg_rwstat tmp;
+ blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
+ struct blkg_rwstat *rwstat;
if (!pos_blkg->online)
continue;
- tmp = blkg_rwstat_read(rwstat);
+ if (pol)
+ rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
+ else
+ rwstat = (void *)pos_blkg + off;
for (i = 0; i < BLKG_RWSTAT_NR; i++)
- sum.cnt[i] += tmp.cnt[i];
+ atomic64_add(atomic64_read(&rwstat->aux_cnt[i]) +
+ percpu_counter_sum_positive(&rwstat->cpu_cnt[i]),
+ &sum.aux_cnt[i]);
}
rcu_read_unlock();
@@ -700,29 +775,34 @@
* @ctx: blkg_conf_ctx to be filled
*
* Parse per-blkg config update from @input and initialize @ctx with the
- * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new
- * value. This function returns with RCU read lock and queue lock held and
- * must be paired with blkg_conf_finish().
+ * result. @ctx->blkg points to the blkg to be updated and @ctx->body the
+ * part of @input following MAJ:MIN. This function returns with RCU read
+ * lock and queue lock held and must be paired with blkg_conf_finish().
*/
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
- const char *input, struct blkg_conf_ctx *ctx)
+ char *input, struct blkg_conf_ctx *ctx)
__acquires(rcu) __acquires(disk->queue->queue_lock)
{
struct gendisk *disk;
struct blkcg_gq *blkg;
unsigned int major, minor;
- unsigned long long v;
- int part, ret;
+ int key_len, part, ret;
+ char *body;
- if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
+ if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
return -EINVAL;
+ body = input + key_len;
+ if (!isspace(*body))
+ return -EINVAL;
+ body = skip_spaces(body);
+
disk = get_gendisk(MKDEV(major, minor), &part);
if (!disk)
- return -EINVAL;
+ return -ENODEV;
if (part) {
put_disk(disk);
- return -EINVAL;
+ return -ENODEV;
}
rcu_read_lock();
@@ -731,7 +811,7 @@
if (blkcg_policy_enabled(disk->queue, pol))
blkg = blkg_lookup_create(blkcg, disk->queue);
else
- blkg = ERR_PTR(-EINVAL);
+ blkg = ERR_PTR(-EOPNOTSUPP);
if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
@@ -753,7 +833,7 @@
ctx->disk = disk;
ctx->blkg = blkg;
- ctx->v = v;
+ ctx->body = body;
return 0;
}
EXPORT_SYMBOL_GPL(blkg_conf_prep);
@@ -774,8 +854,55 @@
}
EXPORT_SYMBOL_GPL(blkg_conf_finish);
+static int blkcg_print_stat(struct seq_file *sf, void *v)
+{
+ struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+ struct blkcg_gq *blkg;
+
+ rcu_read_lock();
+
+ hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+ const char *dname;
+ struct blkg_rwstat rwstat;
+ u64 rbytes, wbytes, rios, wios;
+
+ dname = blkg_dev_name(blkg);
+ if (!dname)
+ continue;
+
+ spin_lock_irq(blkg->q->queue_lock);
+
+ rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
+ offsetof(struct blkcg_gq, stat_bytes));
+ rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
+ wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
+
+ rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
+ offsetof(struct blkcg_gq, stat_ios));
+ rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
+ wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
+
+ spin_unlock_irq(blkg->q->queue_lock);
+
+ if (rbytes || wbytes || rios || wios)
+ seq_printf(sf, "%s rbytes=%llu wbytes=%llu rios=%llu wios=%llu\n",
+ dname, rbytes, wbytes, rios, wios);
+ }
+
+ rcu_read_unlock();
+ return 0;
+}
+
struct cftype blkcg_files[] = {
{
+ .name = "stat",
+ .seq_show = blkcg_print_stat,
+ },
+ { } /* terminate */
+};
+
+struct cftype blkcg_legacy_files[] = {
+ {
.name = "reset_stats",
.write_u64 = blkcg_reset_stats,
},
@@ -822,18 +949,19 @@
static void blkcg_css_free(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
+ int i;
mutex_lock(&blkcg_pol_mutex);
+
list_del(&blkcg->all_blkcgs_node);
+
+ for (i = 0; i < BLKCG_MAX_POLS; i++)
+ if (blkcg->cpd[i])
+ blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
+
mutex_unlock(&blkcg_pol_mutex);
- if (blkcg != &blkcg_root) {
- int i;
-
- for (i = 0; i < BLKCG_MAX_POLS; i++)
- kfree(blkcg->pd[i]);
- kfree(blkcg);
- }
+ kfree(blkcg);
}
static struct cgroup_subsys_state *
@@ -847,13 +975,12 @@
if (!parent_css) {
blkcg = &blkcg_root;
- goto done;
- }
-
- blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
- if (!blkcg) {
- ret = ERR_PTR(-ENOMEM);
- goto free_blkcg;
+ } else {
+ blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
+ if (!blkcg) {
+ ret = ERR_PTR(-ENOMEM);
+ goto free_blkcg;
+ }
}
for (i = 0; i < BLKCG_MAX_POLS ; i++) {
@@ -866,23 +993,23 @@
* check if the policy requires any specific per-cgroup
* data: if it does, allocate and initialize it.
*/
- if (!pol || !pol->cpd_size)
+ if (!pol || !pol->cpd_alloc_fn)
continue;
- BUG_ON(blkcg->pd[i]);
- cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
+ cpd = pol->cpd_alloc_fn(GFP_KERNEL);
if (!cpd) {
ret = ERR_PTR(-ENOMEM);
goto free_pd_blkcg;
}
- blkcg->pd[i] = cpd;
+ blkcg->cpd[i] = cpd;
+ cpd->blkcg = blkcg;
cpd->plid = i;
- pol->cpd_init_fn(blkcg);
+ if (pol->cpd_init_fn)
+ pol->cpd_init_fn(cpd);
}
-done:
spin_lock_init(&blkcg->lock);
- INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
+ INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT);
INIT_HLIST_HEAD(&blkcg->blkg_list);
#ifdef CONFIG_CGROUP_WRITEBACK
INIT_LIST_HEAD(&blkcg->cgwb_list);
@@ -894,7 +1021,8 @@
free_pd_blkcg:
for (i--; i >= 0; i--)
- kfree(blkcg->pd[i]);
+ if (blkcg->cpd[i])
+ blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
free_blkcg:
kfree(blkcg);
mutex_unlock(&blkcg_pol_mutex);
@@ -938,7 +1066,7 @@
radix_tree_preload_end();
if (IS_ERR(blkg)) {
- kfree(new_blkg);
+ blkg_free(new_blkg);
return PTR_ERR(blkg);
}
@@ -1015,12 +1143,35 @@
return ret;
}
-struct cgroup_subsys blkio_cgrp_subsys = {
+static void blkcg_bind(struct cgroup_subsys_state *root_css)
+{
+ int i;
+
+ mutex_lock(&blkcg_pol_mutex);
+
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkcg_policy *pol = blkcg_policy[i];
+ struct blkcg *blkcg;
+
+ if (!pol || !pol->cpd_bind_fn)
+ continue;
+
+ list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node)
+ if (blkcg->cpd[pol->plid])
+ pol->cpd_bind_fn(blkcg->cpd[pol->plid]);
+ }
+ mutex_unlock(&blkcg_pol_mutex);
+}
+
+struct cgroup_subsys io_cgrp_subsys = {
.css_alloc = blkcg_css_alloc,
.css_offline = blkcg_css_offline,
.css_free = blkcg_css_free,
.can_attach = blkcg_can_attach,
- .legacy_cftypes = blkcg_files,
+ .bind = blkcg_bind,
+ .dfl_cftypes = blkcg_files,
+ .legacy_cftypes = blkcg_legacy_files,
+ .legacy_name = "blkio",
#ifdef CONFIG_MEMCG
/*
* This ensures that, if available, memcg is automatically enabled
@@ -1030,7 +1181,7 @@
.depends_on = 1 << memory_cgrp_id,
#endif
};
-EXPORT_SYMBOL_GPL(blkio_cgrp_subsys);
+EXPORT_SYMBOL_GPL(io_cgrp_subsys);
/**
* blkcg_activate_policy - activate a blkcg policy on a request_queue
@@ -1051,65 +1202,54 @@
int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol)
{
- LIST_HEAD(pds);
+ struct blkg_policy_data *pd_prealloc = NULL;
struct blkcg_gq *blkg;
- struct blkg_policy_data *pd, *nd;
- int cnt = 0, ret;
+ int ret;
if (blkcg_policy_enabled(q, pol))
return 0;
- /* count and allocate policy_data for all existing blkgs */
blk_queue_bypass_start(q);
- spin_lock_irq(q->queue_lock);
- list_for_each_entry(blkg, &q->blkg_list, q_node)
- cnt++;
- spin_unlock_irq(q->queue_lock);
-
- /* allocate per-blkg policy data for all existing blkgs */
- while (cnt--) {
- pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
- if (!pd) {
+pd_prealloc:
+ if (!pd_prealloc) {
+ pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
+ if (!pd_prealloc) {
ret = -ENOMEM;
- goto out_free;
+ goto out_bypass_end;
}
- list_add_tail(&pd->alloc_node, &pds);
}
- /*
- * Install the allocated pds and cpds. With @q bypassing, no new blkg
- * should have been created while the queue lock was dropped.
- */
spin_lock_irq(q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
- if (WARN_ON(list_empty(&pds))) {
- /* umm... this shouldn't happen, just abort */
- ret = -ENOMEM;
- goto out_unlock;
- }
- pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
- list_del_init(&pd->alloc_node);
+ struct blkg_policy_data *pd;
- /* grab blkcg lock too while installing @pd on @blkg */
- spin_lock(&blkg->blkcg->lock);
+ if (blkg->pd[pol->plid])
+ continue;
+
+ pd = pol->pd_alloc_fn(GFP_NOWAIT, q->node);
+ if (!pd)
+ swap(pd, pd_prealloc);
+ if (!pd) {
+ spin_unlock_irq(q->queue_lock);
+ goto pd_prealloc;
+ }
blkg->pd[pol->plid] = pd;
pd->blkg = blkg;
pd->plid = pol->plid;
- pol->pd_init_fn(blkg);
-
- spin_unlock(&blkg->blkcg->lock);
+ if (pol->pd_init_fn)
+ pol->pd_init_fn(pd);
}
__set_bit(pol->plid, q->blkcg_pols);
ret = 0;
-out_unlock:
+
spin_unlock_irq(q->queue_lock);
-out_free:
+out_bypass_end:
blk_queue_bypass_end(q);
- list_for_each_entry_safe(pd, nd, &pds, alloc_node)
- kfree(pd);
+ if (pd_prealloc)
+ pol->pd_free_fn(pd_prealloc);
return ret;
}
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
@@ -1139,13 +1279,12 @@
/* grab blkcg lock too while removing @pd from @blkg */
spin_lock(&blkg->blkcg->lock);
- if (pol->pd_offline_fn)
- pol->pd_offline_fn(blkg);
- if (pol->pd_exit_fn)
- pol->pd_exit_fn(blkg);
-
- kfree(blkg->pd[pol->plid]);
- blkg->pd[pol->plid] = NULL;
+ if (blkg->pd[pol->plid]) {
+ if (pol->pd_offline_fn)
+ pol->pd_offline_fn(blkg->pd[pol->plid]);
+ pol->pd_free_fn(blkg->pd[pol->plid]);
+ blkg->pd[pol->plid] = NULL;
+ }
spin_unlock(&blkg->blkcg->lock);
}
@@ -1167,9 +1306,6 @@
struct blkcg *blkcg;
int i, ret;
- if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
- return -EINVAL;
-
mutex_lock(&blkcg_pol_register_mutex);
mutex_lock(&blkcg_pol_mutex);
@@ -1186,36 +1322,42 @@
blkcg_policy[pol->plid] = pol;
/* allocate and install cpd's */
- if (pol->cpd_size) {
+ if (pol->cpd_alloc_fn) {
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
struct blkcg_policy_data *cpd;
- cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
+ cpd = pol->cpd_alloc_fn(GFP_KERNEL);
if (!cpd) {
mutex_unlock(&blkcg_pol_mutex);
goto err_free_cpds;
}
- blkcg->pd[pol->plid] = cpd;
+ blkcg->cpd[pol->plid] = cpd;
+ cpd->blkcg = blkcg;
cpd->plid = pol->plid;
- pol->cpd_init_fn(blkcg);
+ pol->cpd_init_fn(cpd);
}
}
mutex_unlock(&blkcg_pol_mutex);
/* everything is in place, add intf files for the new policy */
- if (pol->cftypes)
- WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys,
- pol->cftypes));
+ if (pol->dfl_cftypes)
+ WARN_ON(cgroup_add_dfl_cftypes(&io_cgrp_subsys,
+ pol->dfl_cftypes));
+ if (pol->legacy_cftypes)
+ WARN_ON(cgroup_add_legacy_cftypes(&io_cgrp_subsys,
+ pol->legacy_cftypes));
mutex_unlock(&blkcg_pol_register_mutex);
return 0;
err_free_cpds:
- if (pol->cpd_size) {
+ if (pol->cpd_alloc_fn) {
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
- kfree(blkcg->pd[pol->plid]);
- blkcg->pd[pol->plid] = NULL;
+ if (blkcg->cpd[pol->plid]) {
+ pol->cpd_free_fn(blkcg->cpd[pol->plid]);
+ blkcg->cpd[pol->plid] = NULL;
+ }
}
}
blkcg_policy[pol->plid] = NULL;
@@ -1242,16 +1384,20 @@
goto out_unlock;
/* kill the intf files first */
- if (pol->cftypes)
- cgroup_rm_cftypes(pol->cftypes);
+ if (pol->dfl_cftypes)
+ cgroup_rm_cftypes(pol->dfl_cftypes);
+ if (pol->legacy_cftypes)
+ cgroup_rm_cftypes(pol->legacy_cftypes);
/* remove cpds and unregister */
mutex_lock(&blkcg_pol_mutex);
- if (pol->cpd_size) {
+ if (pol->cpd_alloc_fn) {
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
- kfree(blkcg->pd[pol->plid]);
- blkcg->pd[pol->plid] = NULL;
+ if (blkcg->cpd[pol->plid]) {
+ pol->cpd_free_fn(blkcg->cpd[pol->plid]);
+ blkcg->cpd[pol->plid] = NULL;
+ }
}
}
blkcg_policy[pol->plid] = NULL;
diff --git a/block/blk-core.c b/block/blk-core.c
index 60912e9..2eb722d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1888,8 +1888,8 @@
*/
create_io_context(GFP_ATOMIC, q->node);
- if (blk_throtl_bio(q, bio))
- return false; /* throttled, will be resubmitted later */
+ if (!blkcg_bio_issue_check(q, bio))
+ return false;
trace_block_bio_queue(q, bio);
return true;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b231935..c75a263 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -83,14 +83,6 @@
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
-/* Per-cpu group stats */
-struct tg_stats_cpu {
- /* total bytes transferred */
- struct blkg_rwstat service_bytes;
- /* total IOs serviced, post merge */
- struct blkg_rwstat serviced;
-};
-
struct throtl_grp {
/* must be the first member */
struct blkg_policy_data pd;
@@ -141,12 +133,6 @@
/* When did we start a new slice */
unsigned long slice_start[2];
unsigned long slice_end[2];
-
- /* Per cpu stats pointer */
- struct tg_stats_cpu __percpu *stats_cpu;
-
- /* List of tgs waiting for per cpu stats memory to be allocated */
- struct list_head stats_alloc_node;
};
struct throtl_data
@@ -168,13 +154,6 @@
struct work_struct dispatch_work;
};
-/* list and work item to allocate percpu group stats */
-static DEFINE_SPINLOCK(tg_stats_alloc_lock);
-static LIST_HEAD(tg_stats_alloc_list);
-
-static void tg_stats_alloc_fn(struct work_struct *);
-static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn);
-
static void throtl_pending_timer_fn(unsigned long arg);
static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
@@ -192,11 +171,6 @@
return pd_to_blkg(&tg->pd);
}
-static inline struct throtl_grp *td_root_tg(struct throtl_data *td)
-{
- return blkg_to_tg(td->queue->root_blkg);
-}
-
/**
* sq_to_tg - return the throl_grp the specified service queue belongs to
* @sq: the throtl_service_queue of interest
@@ -256,53 +230,6 @@
} \
} while (0)
-static void tg_stats_init(struct tg_stats_cpu *tg_stats)
-{
- blkg_rwstat_init(&tg_stats->service_bytes);
- blkg_rwstat_init(&tg_stats->serviced);
-}
-
-/*
- * Worker for allocating per cpu stat for tgs. This is scheduled on the
- * system_wq once there are some groups on the alloc_list waiting for
- * allocation.
- */
-static void tg_stats_alloc_fn(struct work_struct *work)
-{
- static struct tg_stats_cpu *stats_cpu; /* this fn is non-reentrant */
- struct delayed_work *dwork = to_delayed_work(work);
- bool empty = false;
-
-alloc_stats:
- if (!stats_cpu) {
- int cpu;
-
- stats_cpu = alloc_percpu(struct tg_stats_cpu);
- if (!stats_cpu) {
- /* allocation failed, try again after some time */
- schedule_delayed_work(dwork, msecs_to_jiffies(10));
- return;
- }
- for_each_possible_cpu(cpu)
- tg_stats_init(per_cpu_ptr(stats_cpu, cpu));
- }
-
- spin_lock_irq(&tg_stats_alloc_lock);
-
- if (!list_empty(&tg_stats_alloc_list)) {
- struct throtl_grp *tg = list_first_entry(&tg_stats_alloc_list,
- struct throtl_grp,
- stats_alloc_node);
- swap(tg->stats_cpu, stats_cpu);
- list_del_init(&tg->stats_alloc_node);
- }
-
- empty = list_empty(&tg_stats_alloc_list);
- spin_unlock_irq(&tg_stats_alloc_lock);
- if (!empty)
- goto alloc_stats;
-}
-
static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
{
INIT_LIST_HEAD(&qn->node);
@@ -387,29 +314,46 @@
}
/* init a service_queue, assumes the caller zeroed it */
-static void throtl_service_queue_init(struct throtl_service_queue *sq,
- struct throtl_service_queue *parent_sq)
+static void throtl_service_queue_init(struct throtl_service_queue *sq)
{
INIT_LIST_HEAD(&sq->queued[0]);
INIT_LIST_HEAD(&sq->queued[1]);
sq->pending_tree = RB_ROOT;
- sq->parent_sq = parent_sq;
setup_timer(&sq->pending_timer, throtl_pending_timer_fn,
(unsigned long)sq);
}
-static void throtl_service_queue_exit(struct throtl_service_queue *sq)
+static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
{
- del_timer_sync(&sq->pending_timer);
+ struct throtl_grp *tg;
+ int rw;
+
+ tg = kzalloc_node(sizeof(*tg), gfp, node);
+ if (!tg)
+ return NULL;
+
+ throtl_service_queue_init(&tg->service_queue);
+
+ for (rw = READ; rw <= WRITE; rw++) {
+ throtl_qnode_init(&tg->qnode_on_self[rw], tg);
+ throtl_qnode_init(&tg->qnode_on_parent[rw], tg);
+ }
+
+ RB_CLEAR_NODE(&tg->rb_node);
+ tg->bps[READ] = -1;
+ tg->bps[WRITE] = -1;
+ tg->iops[READ] = -1;
+ tg->iops[WRITE] = -1;
+
+ return &tg->pd;
}
-static void throtl_pd_init(struct blkcg_gq *blkg)
+static void throtl_pd_init(struct blkg_policy_data *pd)
{
- struct throtl_grp *tg = blkg_to_tg(blkg);
+ struct throtl_grp *tg = pd_to_tg(pd);
+ struct blkcg_gq *blkg = tg_to_blkg(tg);
struct throtl_data *td = blkg->q->td;
- struct throtl_service_queue *parent_sq;
- unsigned long flags;
- int rw;
+ struct throtl_service_queue *sq = &tg->service_queue;
/*
* If on the default hierarchy, we switch to properly hierarchical
@@ -424,35 +368,10 @@
* Limits of a group don't interact with limits of other groups
* regardless of the position of the group in the hierarchy.
*/
- parent_sq = &td->service_queue;
-
+ sq->parent_sq = &td->service_queue;
if (cgroup_on_dfl(blkg->blkcg->css.cgroup) && blkg->parent)
- parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
-
- throtl_service_queue_init(&tg->service_queue, parent_sq);
-
- for (rw = READ; rw <= WRITE; rw++) {
- throtl_qnode_init(&tg->qnode_on_self[rw], tg);
- throtl_qnode_init(&tg->qnode_on_parent[rw], tg);
- }
-
- RB_CLEAR_NODE(&tg->rb_node);
+ sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
tg->td = td;
-
- tg->bps[READ] = -1;
- tg->bps[WRITE] = -1;
- tg->iops[READ] = -1;
- tg->iops[WRITE] = -1;
-
- /*
- * Ugh... We need to perform per-cpu allocation for tg->stats_cpu
- * but percpu allocator can't be called from IO path. Queue tg on
- * tg_stats_alloc_list and allocate from work item.
- */
- spin_lock_irqsave(&tg_stats_alloc_lock, flags);
- list_add(&tg->stats_alloc_node, &tg_stats_alloc_list);
- schedule_delayed_work(&tg_stats_alloc_work, 0);
- spin_unlock_irqrestore(&tg_stats_alloc_lock, flags);
}
/*
@@ -470,83 +389,21 @@
(tg->bps[rw] != -1 || tg->iops[rw] != -1);
}
-static void throtl_pd_online(struct blkcg_gq *blkg)
+static void throtl_pd_online(struct blkg_policy_data *pd)
{
/*
* We don't want new groups to escape the limits of its ancestors.
* Update has_rules[] after a new group is brought online.
*/
- tg_update_has_rules(blkg_to_tg(blkg));
+ tg_update_has_rules(pd_to_tg(pd));
}
-static void throtl_pd_exit(struct blkcg_gq *blkg)
+static void throtl_pd_free(struct blkg_policy_data *pd)
{
- struct throtl_grp *tg = blkg_to_tg(blkg);
- unsigned long flags;
+ struct throtl_grp *tg = pd_to_tg(pd);
- spin_lock_irqsave(&tg_stats_alloc_lock, flags);
- list_del_init(&tg->stats_alloc_node);
- spin_unlock_irqrestore(&tg_stats_alloc_lock, flags);
-
- free_percpu(tg->stats_cpu);
-
- throtl_service_queue_exit(&tg->service_queue);
-}
-
-static void throtl_pd_reset_stats(struct blkcg_gq *blkg)
-{
- struct throtl_grp *tg = blkg_to_tg(blkg);
- int cpu;
-
- if (tg->stats_cpu == NULL)
- return;
-
- for_each_possible_cpu(cpu) {
- struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu);
-
- blkg_rwstat_reset(&sc->service_bytes);
- blkg_rwstat_reset(&sc->serviced);
- }
-}
-
-static struct throtl_grp *throtl_lookup_tg(struct throtl_data *td,
- struct blkcg *blkcg)
-{
- /*
- * This is the common case when there are no blkcgs. Avoid lookup
- * in this case
- */
- if (blkcg == &blkcg_root)
- return td_root_tg(td);
-
- return blkg_to_tg(blkg_lookup(blkcg, td->queue));
-}
-
-static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
- struct blkcg *blkcg)
-{
- struct request_queue *q = td->queue;
- struct throtl_grp *tg = NULL;
-
- /*
- * This is the common case when there are no blkcgs. Avoid lookup
- * in this case
- */
- if (blkcg == &blkcg_root) {
- tg = td_root_tg(td);
- } else {
- struct blkcg_gq *blkg;
-
- blkg = blkg_lookup_create(blkcg, q);
-
- /* if %NULL and @q is alive, fall back to root_tg */
- if (!IS_ERR(blkg))
- tg = blkg_to_tg(blkg);
- else if (!blk_queue_dying(q))
- tg = td_root_tg(td);
- }
-
- return tg;
+ del_timer_sync(&tg->service_queue.pending_timer);
+ kfree(tg);
}
static struct throtl_grp *
@@ -956,32 +813,6 @@
return 0;
}
-static void throtl_update_dispatch_stats(struct blkcg_gq *blkg, u64 bytes,
- int rw)
-{
- struct throtl_grp *tg = blkg_to_tg(blkg);
- struct tg_stats_cpu *stats_cpu;
- unsigned long flags;
-
- /* If per cpu stats are not allocated yet, don't do any accounting. */
- if (tg->stats_cpu == NULL)
- return;
-
- /*
- * Disabling interrupts to provide mutual exclusion between two
- * writes on same cpu. It probably is not needed for 64bit. Not
- * optimizing that case yet.
- */
- local_irq_save(flags);
-
- stats_cpu = this_cpu_ptr(tg->stats_cpu);
-
- blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
- blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
-
- local_irq_restore(flags);
-}
-
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
@@ -995,17 +826,9 @@
* more than once as a throttled bio will go through blk-throtl the
* second time when it eventually gets issued. Set it when a bio
* is being charged to a tg.
- *
- * Dispatch stats aren't recursive and each @bio should only be
- * accounted by the @tg it was originally associated with. Let's
- * update the stats when setting REQ_THROTTLED for the first time
- * which is guaranteed to be for the @bio's original tg.
*/
- if (!(bio->bi_rw & REQ_THROTTLED)) {
+ if (!(bio->bi_rw & REQ_THROTTLED))
bio->bi_rw |= REQ_THROTTLED;
- throtl_update_dispatch_stats(tg_to_blkg(tg),
- bio->bi_iter.bi_size, bio->bi_rw);
- }
}
/**
@@ -1285,34 +1108,6 @@
}
}
-static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- struct throtl_grp *tg = pd_to_tg(pd);
- struct blkg_rwstat rwstat = { }, tmp;
- int i, cpu;
-
- if (tg->stats_cpu == NULL)
- return 0;
-
- for_each_possible_cpu(cpu) {
- struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu);
-
- tmp = blkg_rwstat_read((void *)sc + off);
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
- rwstat.cnt[i] += tmp.cnt[i];
- }
-
- return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-
-static int tg_print_cpu_rwstat(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_cpu_rwstat,
- &blkcg_policy_throtl, seq_cft(sf)->private, true);
- return 0;
-}
-
static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
int off)
{
@@ -1349,31 +1144,11 @@
return 0;
}
-static ssize_t tg_set_conf(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off, bool is_u64)
+static void tg_conf_updated(struct throtl_grp *tg)
{
- struct blkcg *blkcg = css_to_blkcg(of_css(of));
- struct blkg_conf_ctx ctx;
- struct throtl_grp *tg;
- struct throtl_service_queue *sq;
- struct blkcg_gq *blkg;
+ struct throtl_service_queue *sq = &tg->service_queue;
struct cgroup_subsys_state *pos_css;
- int ret;
-
- ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
- if (ret)
- return ret;
-
- tg = blkg_to_tg(ctx.blkg);
- sq = &tg->service_queue;
-
- if (!ctx.v)
- ctx.v = -1;
-
- if (is_u64)
- *(u64 *)((void *)tg + of_cft(of)->private) = ctx.v;
- else
- *(unsigned int *)((void *)tg + of_cft(of)->private) = ctx.v;
+ struct blkcg_gq *blkg;
throtl_log(&tg->service_queue,
"limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
@@ -1387,7 +1162,7 @@
* restrictions in the whole hierarchy and allows them to bypass
* blk-throttle.
*/
- blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg)
+ blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
tg_update_has_rules(blkg_to_tg(blkg));
/*
@@ -1405,9 +1180,39 @@
tg_update_disptime(tg);
throtl_schedule_next_dispatch(sq->parent_sq, true);
}
+}
+static ssize_t tg_set_conf(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off, bool is_u64)
+{
+ struct blkcg *blkcg = css_to_blkcg(of_css(of));
+ struct blkg_conf_ctx ctx;
+ struct throtl_grp *tg;
+ int ret;
+ u64 v;
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+ if (ret)
+ return ret;
+
+ ret = -EINVAL;
+ if (sscanf(ctx.body, "%llu", &v) != 1)
+ goto out_finish;
+ if (!v)
+ v = -1;
+
+ tg = blkg_to_tg(ctx.blkg);
+
+ if (is_u64)
+ *(u64 *)((void *)tg + of_cft(of)->private) = v;
+ else
+ *(unsigned int *)((void *)tg + of_cft(of)->private) = v;
+
+ tg_conf_updated(tg);
+ ret = 0;
+out_finish:
blkg_conf_finish(&ctx);
- return nbytes;
+ return ret ?: nbytes;
}
static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
@@ -1422,7 +1227,7 @@
return tg_set_conf(of, buf, nbytes, off, false);
}
-static struct cftype throtl_files[] = {
+static struct cftype throtl_legacy_files[] = {
{
.name = "throttle.read_bps_device",
.private = offsetof(struct throtl_grp, bps[READ]),
@@ -1449,13 +1254,124 @@
},
{
.name = "throttle.io_service_bytes",
- .private = offsetof(struct tg_stats_cpu, service_bytes),
- .seq_show = tg_print_cpu_rwstat,
+ .private = (unsigned long)&blkcg_policy_throtl,
+ .seq_show = blkg_print_stat_bytes,
},
{
.name = "throttle.io_serviced",
- .private = offsetof(struct tg_stats_cpu, serviced),
- .seq_show = tg_print_cpu_rwstat,
+ .private = (unsigned long)&blkcg_policy_throtl,
+ .seq_show = blkg_print_stat_ios,
+ },
+ { } /* terminate */
+};
+
+static u64 tg_prfill_max(struct seq_file *sf, struct blkg_policy_data *pd,
+ int off)
+{
+ struct throtl_grp *tg = pd_to_tg(pd);
+ const char *dname = blkg_dev_name(pd->blkg);
+ char bufs[4][21] = { "max", "max", "max", "max" };
+
+ if (!dname)
+ return 0;
+ if (tg->bps[READ] == -1 && tg->bps[WRITE] == -1 &&
+ tg->iops[READ] == -1 && tg->iops[WRITE] == -1)
+ return 0;
+
+ if (tg->bps[READ] != -1)
+ snprintf(bufs[0], sizeof(bufs[0]), "%llu", tg->bps[READ]);
+ if (tg->bps[WRITE] != -1)
+ snprintf(bufs[1], sizeof(bufs[1]), "%llu", tg->bps[WRITE]);
+ if (tg->iops[READ] != -1)
+ snprintf(bufs[2], sizeof(bufs[2]), "%u", tg->iops[READ]);
+ if (tg->iops[WRITE] != -1)
+ snprintf(bufs[3], sizeof(bufs[3]), "%u", tg->iops[WRITE]);
+
+ seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s\n",
+ dname, bufs[0], bufs[1], bufs[2], bufs[3]);
+ return 0;
+}
+
+static int tg_print_max(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_max,
+ &blkcg_policy_throtl, seq_cft(sf)->private, false);
+ return 0;
+}
+
+static ssize_t tg_set_max(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct blkcg *blkcg = css_to_blkcg(of_css(of));
+ struct blkg_conf_ctx ctx;
+ struct throtl_grp *tg;
+ u64 v[4];
+ int ret;
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+ if (ret)
+ return ret;
+
+ tg = blkg_to_tg(ctx.blkg);
+
+ v[0] = tg->bps[READ];
+ v[1] = tg->bps[WRITE];
+ v[2] = tg->iops[READ];
+ v[3] = tg->iops[WRITE];
+
+ while (true) {
+ char tok[27]; /* wiops=18446744073709551616 */
+ char *p;
+ u64 val = -1;
+ int len;
+
+ if (sscanf(ctx.body, "%26s%n", tok, &len) != 1)
+ break;
+ if (tok[0] == '\0')
+ break;
+ ctx.body += len;
+
+ ret = -EINVAL;
+ p = tok;
+ strsep(&p, "=");
+ if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max")))
+ goto out_finish;
+
+ ret = -ERANGE;
+ if (!val)
+ goto out_finish;
+
+ ret = -EINVAL;
+ if (!strcmp(tok, "rbps"))
+ v[0] = val;
+ else if (!strcmp(tok, "wbps"))
+ v[1] = val;
+ else if (!strcmp(tok, "riops"))
+ v[2] = min_t(u64, val, UINT_MAX);
+ else if (!strcmp(tok, "wiops"))
+ v[3] = min_t(u64, val, UINT_MAX);
+ else
+ goto out_finish;
+ }
+
+ tg->bps[READ] = v[0];
+ tg->bps[WRITE] = v[1];
+ tg->iops[READ] = v[2];
+ tg->iops[WRITE] = v[3];
+
+ tg_conf_updated(tg);
+ ret = 0;
+out_finish:
+ blkg_conf_finish(&ctx);
+ return ret ?: nbytes;
+}
+
+static struct cftype throtl_files[] = {
+ {
+ .name = "max",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = tg_print_max,
+ .write = tg_set_max,
},
{ } /* terminate */
};
@@ -1468,52 +1384,33 @@
}
static struct blkcg_policy blkcg_policy_throtl = {
- .pd_size = sizeof(struct throtl_grp),
- .cftypes = throtl_files,
+ .dfl_cftypes = throtl_files,
+ .legacy_cftypes = throtl_legacy_files,
+ .pd_alloc_fn = throtl_pd_alloc,
.pd_init_fn = throtl_pd_init,
.pd_online_fn = throtl_pd_online,
- .pd_exit_fn = throtl_pd_exit,
- .pd_reset_stats_fn = throtl_pd_reset_stats,
+ .pd_free_fn = throtl_pd_free,
};
-bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
+bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
+ struct bio *bio)
{
- struct throtl_data *td = q->td;
struct throtl_qnode *qn = NULL;
- struct throtl_grp *tg;
+ struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
struct throtl_service_queue *sq;
bool rw = bio_data_dir(bio);
- struct blkcg *blkcg;
bool throttled = false;
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
/* see throtl_charge_bio() */
- if (bio->bi_rw & REQ_THROTTLED)
+ if ((bio->bi_rw & REQ_THROTTLED) || !tg->has_rules[rw])
goto out;
- /*
- * A throtl_grp pointer retrieved under rcu can be used to access
- * basic fields like stats and io rates. If a group has no rules,
- * just update the dispatch stats in lockless manner and return.
- */
- rcu_read_lock();
- blkcg = bio_blkcg(bio);
- tg = throtl_lookup_tg(td, blkcg);
- if (tg) {
- if (!tg->has_rules[rw]) {
- throtl_update_dispatch_stats(tg_to_blkg(tg),
- bio->bi_iter.bi_size, bio->bi_rw);
- goto out_unlock_rcu;
- }
- }
-
- /*
- * Either group has not been allocated yet or it is not an unlimited
- * IO group
- */
spin_lock_irq(q->queue_lock);
- tg = throtl_lookup_create_tg(td, blkcg);
- if (unlikely(!tg))
+
+ if (unlikely(blk_queue_bypass(q)))
goto out_unlock;
sq = &tg->service_queue;
@@ -1580,8 +1477,6 @@
out_unlock:
spin_unlock_irq(q->queue_lock);
-out_unlock_rcu:
- rcu_read_unlock();
out:
/*
* As multiple blk-throtls may stack in the same issue path, we
@@ -1667,7 +1562,7 @@
return -ENOMEM;
INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
- throtl_service_queue_init(&td->service_queue, NULL);
+ throtl_service_queue_init(&td->service_queue);
q->td = td;
td->queue = q;
diff --git a/block/blk.h b/block/blk.h
index 838188b..98614ad 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -272,15 +272,10 @@
* Internal throttling interface
*/
#ifdef CONFIG_BLK_DEV_THROTTLING
-extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
extern void blk_throtl_drain(struct request_queue *q);
extern int blk_throtl_init(struct request_queue *q);
extern void blk_throtl_exit(struct request_queue *q);
#else /* CONFIG_BLK_DEV_THROTTLING */
-static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
-{
- return false;
-}
static inline void blk_throtl_drain(struct request_queue *q) { }
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
static inline void blk_throtl_exit(struct request_queue *q) { }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c62bb2e..04de884 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -68,9 +68,9 @@
#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
/* blkio-related constants */
-#define CFQ_WEIGHT_MIN 10
-#define CFQ_WEIGHT_MAX 1000
-#define CFQ_WEIGHT_DEFAULT 500
+#define CFQ_WEIGHT_LEGACY_MIN 10
+#define CFQ_WEIGHT_LEGACY_DFL 500
+#define CFQ_WEIGHT_LEGACY_MAX 1000
struct cfq_ttime {
unsigned long last_end_request;
@@ -177,10 +177,6 @@
struct cfqg_stats {
#ifdef CONFIG_CFQ_GROUP_IOSCHED
- /* total bytes transferred */
- struct blkg_rwstat service_bytes;
- /* total IOs serviced, post merge */
- struct blkg_rwstat serviced;
/* number of ios merged */
struct blkg_rwstat merged;
/* total time spent on device in ns, may not be accurate w/ queueing */
@@ -189,8 +185,6 @@
struct blkg_rwstat wait_time;
/* number of IOs queued up */
struct blkg_rwstat queued;
- /* total sectors transferred */
- struct blkg_stat sectors;
/* total disk time and nr sectors dispatched by this group */
struct blkg_stat time;
#ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -220,7 +214,7 @@
/* Per-cgroup data */
struct cfq_group_data {
/* must be the first member */
- struct blkcg_policy_data pd;
+ struct blkcg_policy_data cpd;
unsigned int weight;
unsigned int leaf_weight;
@@ -304,7 +298,11 @@
int dispatched;
struct cfq_ttime ttime;
struct cfqg_stats stats; /* stats for this cfqg */
- struct cfqg_stats dead_stats; /* stats pushed from dead children */
+
+ /* async queue for each priority case */
+ struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
+ struct cfq_queue *async_idle_cfqq;
+
};
struct cfq_io_cq {
@@ -370,12 +368,6 @@
struct cfq_queue *active_queue;
struct cfq_io_cq *active_cic;
- /*
- * async queue for each priority case
- */
- struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
- struct cfq_queue *async_idle_cfqq;
-
sector_t last_position;
/*
@@ -401,6 +393,7 @@
};
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
+static void cfq_put_queue(struct cfq_queue *cfqq);
static struct cfq_rb_root *st_for(struct cfq_group *cfqg,
enum wl_class_t class,
@@ -612,7 +605,7 @@
static struct cfq_group_data
*cpd_to_cfqgd(struct blkcg_policy_data *cpd)
{
- return cpd ? container_of(cpd, struct cfq_group_data, pd) : NULL;
+ return cpd ? container_of(cpd, struct cfq_group_data, cpd) : NULL;
}
static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
@@ -693,14 +686,6 @@
blkg_rwstat_add(&cfqg->stats.merged, rw, 1);
}
-static inline void cfqg_stats_update_dispatch(struct cfq_group *cfqg,
- uint64_t bytes, int rw)
-{
- blkg_stat_add(&cfqg->stats.sectors, bytes >> 9);
- blkg_rwstat_add(&cfqg->stats.serviced, rw, 1);
- blkg_rwstat_add(&cfqg->stats.service_bytes, rw, bytes);
-}
-
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
uint64_t start_time, uint64_t io_start_time, int rw)
{
@@ -718,8 +703,6 @@
static void cfqg_stats_reset(struct cfqg_stats *stats)
{
/* queued stats shouldn't be cleared */
- blkg_rwstat_reset(&stats->service_bytes);
- blkg_rwstat_reset(&stats->serviced);
blkg_rwstat_reset(&stats->merged);
blkg_rwstat_reset(&stats->service_time);
blkg_rwstat_reset(&stats->wait_time);
@@ -736,28 +719,26 @@
}
/* @to += @from */
-static void cfqg_stats_merge(struct cfqg_stats *to, struct cfqg_stats *from)
+static void cfqg_stats_add_aux(struct cfqg_stats *to, struct cfqg_stats *from)
{
/* queued stats shouldn't be cleared */
- blkg_rwstat_merge(&to->service_bytes, &from->service_bytes);
- blkg_rwstat_merge(&to->serviced, &from->serviced);
- blkg_rwstat_merge(&to->merged, &from->merged);
- blkg_rwstat_merge(&to->service_time, &from->service_time);
- blkg_rwstat_merge(&to->wait_time, &from->wait_time);
- blkg_stat_merge(&from->time, &from->time);
+ blkg_rwstat_add_aux(&to->merged, &from->merged);
+ blkg_rwstat_add_aux(&to->service_time, &from->service_time);
+ blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
+ blkg_stat_add_aux(&from->time, &from->time);
#ifdef CONFIG_DEBUG_BLK_CGROUP
- blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time);
- blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
- blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
- blkg_stat_merge(&to->dequeue, &from->dequeue);
- blkg_stat_merge(&to->group_wait_time, &from->group_wait_time);
- blkg_stat_merge(&to->idle_time, &from->idle_time);
- blkg_stat_merge(&to->empty_time, &from->empty_time);
+ blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
+ blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
+ blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
+ blkg_stat_add_aux(&to->dequeue, &from->dequeue);
+ blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
+ blkg_stat_add_aux(&to->idle_time, &from->idle_time);
+ blkg_stat_add_aux(&to->empty_time, &from->empty_time);
#endif
}
/*
- * Transfer @cfqg's stats to its parent's dead_stats so that the ancestors'
+ * Transfer @cfqg's stats to its parent's aux counts so that the ancestors'
* recursive stats can still account for the amount used by this cfqg after
* it's gone.
*/
@@ -770,10 +751,8 @@
if (unlikely(!parent))
return;
- cfqg_stats_merge(&parent->dead_stats, &cfqg->stats);
- cfqg_stats_merge(&parent->dead_stats, &cfqg->dead_stats);
+ cfqg_stats_add_aux(&parent->stats, &cfqg->stats);
cfqg_stats_reset(&cfqg->stats);
- cfqg_stats_reset(&cfqg->dead_stats);
}
#else /* CONFIG_CFQ_GROUP_IOSCHED */
@@ -795,8 +774,6 @@
unsigned long time, unsigned long unaccounted_time) { }
static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { }
static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { }
-static inline void cfqg_stats_update_dispatch(struct cfq_group *cfqg,
- uint64_t bytes, int rw) { }
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
uint64_t start_time, uint64_t io_start_time, int rw) { }
@@ -883,8 +860,7 @@
static void cfq_dispatch_insert(struct request_queue *, struct request *);
static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, bool is_sync,
- struct cfq_io_cq *cic, struct bio *bio,
- gfp_t gfp_mask);
+ struct cfq_io_cq *cic, struct bio *bio);
static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
{
@@ -1546,130 +1522,171 @@
}
#ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void cfqg_stats_init(struct cfqg_stats *stats)
+static int __cfq_set_weight(struct cgroup_subsys_state *css, u64 val,
+ bool on_dfl, bool reset_dev, bool is_leaf_weight);
+
+static void cfqg_stats_exit(struct cfqg_stats *stats)
{
- blkg_rwstat_init(&stats->service_bytes);
- blkg_rwstat_init(&stats->serviced);
- blkg_rwstat_init(&stats->merged);
- blkg_rwstat_init(&stats->service_time);
- blkg_rwstat_init(&stats->wait_time);
- blkg_rwstat_init(&stats->queued);
-
- blkg_stat_init(&stats->sectors);
- blkg_stat_init(&stats->time);
-
+ blkg_rwstat_exit(&stats->merged);
+ blkg_rwstat_exit(&stats->service_time);
+ blkg_rwstat_exit(&stats->wait_time);
+ blkg_rwstat_exit(&stats->queued);
+ blkg_stat_exit(&stats->time);
#ifdef CONFIG_DEBUG_BLK_CGROUP
- blkg_stat_init(&stats->unaccounted_time);
- blkg_stat_init(&stats->avg_queue_size_sum);
- blkg_stat_init(&stats->avg_queue_size_samples);
- blkg_stat_init(&stats->dequeue);
- blkg_stat_init(&stats->group_wait_time);
- blkg_stat_init(&stats->idle_time);
- blkg_stat_init(&stats->empty_time);
+ blkg_stat_exit(&stats->unaccounted_time);
+ blkg_stat_exit(&stats->avg_queue_size_sum);
+ blkg_stat_exit(&stats->avg_queue_size_samples);
+ blkg_stat_exit(&stats->dequeue);
+ blkg_stat_exit(&stats->group_wait_time);
+ blkg_stat_exit(&stats->idle_time);
+ blkg_stat_exit(&stats->empty_time);
#endif
}
-static void cfq_cpd_init(const struct blkcg *blkcg)
+static int cfqg_stats_init(struct cfqg_stats *stats, gfp_t gfp)
{
- struct cfq_group_data *cgd =
- cpd_to_cfqgd(blkcg->pd[blkcg_policy_cfq.plid]);
+ if (blkg_rwstat_init(&stats->merged, gfp) ||
+ blkg_rwstat_init(&stats->service_time, gfp) ||
+ blkg_rwstat_init(&stats->wait_time, gfp) ||
+ blkg_rwstat_init(&stats->queued, gfp) ||
+ blkg_stat_init(&stats->time, gfp))
+ goto err;
- if (blkcg == &blkcg_root) {
- cgd->weight = 2 * CFQ_WEIGHT_DEFAULT;
- cgd->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT;
- } else {
- cgd->weight = CFQ_WEIGHT_DEFAULT;
- cgd->leaf_weight = CFQ_WEIGHT_DEFAULT;
- }
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+ if (blkg_stat_init(&stats->unaccounted_time, gfp) ||
+ blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
+ blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
+ blkg_stat_init(&stats->dequeue, gfp) ||
+ blkg_stat_init(&stats->group_wait_time, gfp) ||
+ blkg_stat_init(&stats->idle_time, gfp) ||
+ blkg_stat_init(&stats->empty_time, gfp))
+ goto err;
+#endif
+ return 0;
+err:
+ cfqg_stats_exit(stats);
+ return -ENOMEM;
}
-static void cfq_pd_init(struct blkcg_gq *blkg)
+static struct blkcg_policy_data *cfq_cpd_alloc(gfp_t gfp)
{
- struct cfq_group *cfqg = blkg_to_cfqg(blkg);
- struct cfq_group_data *cgd = blkcg_to_cfqgd(blkg->blkcg);
+ struct cfq_group_data *cgd;
+
+ cgd = kzalloc(sizeof(*cgd), GFP_KERNEL);
+ if (!cgd)
+ return NULL;
+ return &cgd->cpd;
+}
+
+static void cfq_cpd_init(struct blkcg_policy_data *cpd)
+{
+ struct cfq_group_data *cgd = cpd_to_cfqgd(cpd);
+ unsigned int weight = cgroup_on_dfl(blkcg_root.css.cgroup) ?
+ CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
+
+ if (cpd_to_blkcg(cpd) == &blkcg_root)
+ weight *= 2;
+
+ cgd->weight = weight;
+ cgd->leaf_weight = weight;
+}
+
+static void cfq_cpd_free(struct blkcg_policy_data *cpd)
+{
+ kfree(cpd_to_cfqgd(cpd));
+}
+
+static void cfq_cpd_bind(struct blkcg_policy_data *cpd)
+{
+ struct blkcg *blkcg = cpd_to_blkcg(cpd);
+ bool on_dfl = cgroup_on_dfl(blkcg_root.css.cgroup);
+ unsigned int weight = on_dfl ? CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
+
+ if (blkcg == &blkcg_root)
+ weight *= 2;
+
+ WARN_ON_ONCE(__cfq_set_weight(&blkcg->css, weight, on_dfl, true, false));
+ WARN_ON_ONCE(__cfq_set_weight(&blkcg->css, weight, on_dfl, true, true));
+}
+
+static struct blkg_policy_data *cfq_pd_alloc(gfp_t gfp, int node)
+{
+ struct cfq_group *cfqg;
+
+ cfqg = kzalloc_node(sizeof(*cfqg), gfp, node);
+ if (!cfqg)
+ return NULL;
cfq_init_cfqg_base(cfqg);
- cfqg->weight = cgd->weight;
- cfqg->leaf_weight = cgd->leaf_weight;
- cfqg_stats_init(&cfqg->stats);
- cfqg_stats_init(&cfqg->dead_stats);
+ if (cfqg_stats_init(&cfqg->stats, gfp)) {
+ kfree(cfqg);
+ return NULL;
+ }
+
+ return &cfqg->pd;
}
-static void cfq_pd_offline(struct blkcg_gq *blkg)
+static void cfq_pd_init(struct blkg_policy_data *pd)
{
+ struct cfq_group *cfqg = pd_to_cfqg(pd);
+ struct cfq_group_data *cgd = blkcg_to_cfqgd(pd->blkg->blkcg);
+
+ cfqg->weight = cgd->weight;
+ cfqg->leaf_weight = cgd->leaf_weight;
+}
+
+static void cfq_pd_offline(struct blkg_policy_data *pd)
+{
+ struct cfq_group *cfqg = pd_to_cfqg(pd);
+ int i;
+
+ for (i = 0; i < IOPRIO_BE_NR; i++) {
+ if (cfqg->async_cfqq[0][i])
+ cfq_put_queue(cfqg->async_cfqq[0][i]);
+ if (cfqg->async_cfqq[1][i])
+ cfq_put_queue(cfqg->async_cfqq[1][i]);
+ }
+
+ if (cfqg->async_idle_cfqq)
+ cfq_put_queue(cfqg->async_idle_cfqq);
+
/*
* @blkg is going offline and will be ignored by
* blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so
* that they don't get lost. If IOs complete after this point, the
* stats for them will be lost. Oh well...
*/
- cfqg_stats_xfer_dead(blkg_to_cfqg(blkg));
+ cfqg_stats_xfer_dead(cfqg);
}
-/* offset delta from cfqg->stats to cfqg->dead_stats */
-static const int dead_stats_off_delta = offsetof(struct cfq_group, dead_stats) -
- offsetof(struct cfq_group, stats);
-
-/* to be used by recursive prfill, sums live and dead stats recursively */
-static u64 cfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
+static void cfq_pd_free(struct blkg_policy_data *pd)
{
- u64 sum = 0;
+ struct cfq_group *cfqg = pd_to_cfqg(pd);
- sum += blkg_stat_recursive_sum(pd, off);
- sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta);
- return sum;
+ cfqg_stats_exit(&cfqg->stats);
+ return kfree(cfqg);
}
-/* to be used by recursive prfill, sums live and dead rwstats recursively */
-static struct blkg_rwstat cfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
- int off)
+static void cfq_pd_reset_stats(struct blkg_policy_data *pd)
{
- struct blkg_rwstat a, b;
-
- a = blkg_rwstat_recursive_sum(pd, off);
- b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta);
- blkg_rwstat_merge(&a, &b);
- return a;
-}
-
-static void cfq_pd_reset_stats(struct blkcg_gq *blkg)
-{
- struct cfq_group *cfqg = blkg_to_cfqg(blkg);
+ struct cfq_group *cfqg = pd_to_cfqg(pd);
cfqg_stats_reset(&cfqg->stats);
- cfqg_stats_reset(&cfqg->dead_stats);
}
-/*
- * Search for the cfq group current task belongs to. request_queue lock must
- * be held.
- */
-static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
- struct blkcg *blkcg)
+static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd,
+ struct blkcg *blkcg)
{
- struct request_queue *q = cfqd->queue;
- struct cfq_group *cfqg = NULL;
+ struct blkcg_gq *blkg;
- /* avoid lookup for the common case where there's no blkcg */
- if (blkcg == &blkcg_root) {
- cfqg = cfqd->root_group;
- } else {
- struct blkcg_gq *blkg;
-
- blkg = blkg_lookup_create(blkcg, q);
- if (!IS_ERR(blkg))
- cfqg = blkg_to_cfqg(blkg);
- }
-
- return cfqg;
+ blkg = blkg_lookup(blkcg, cfqd->queue);
+ if (likely(blkg))
+ return blkg_to_cfqg(blkg);
+ return NULL;
}
static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
{
- /* Currently, all async queues are mapped to root group */
- if (!cfq_cfqq_sync(cfqq))
- cfqg = cfqq->cfqd->root_group;
-
cfqq->cfqg = cfqg;
/* cfqq reference on cfqg */
cfqg_get(cfqg);
@@ -1739,36 +1756,48 @@
static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off,
- bool is_leaf_weight)
+ bool on_dfl, bool is_leaf_weight)
{
+ unsigned int min = on_dfl ? CGROUP_WEIGHT_MIN : CFQ_WEIGHT_LEGACY_MIN;
+ unsigned int max = on_dfl ? CGROUP_WEIGHT_MAX : CFQ_WEIGHT_LEGACY_MAX;
struct blkcg *blkcg = css_to_blkcg(of_css(of));
struct blkg_conf_ctx ctx;
struct cfq_group *cfqg;
struct cfq_group_data *cfqgd;
int ret;
+ u64 v;
ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx);
if (ret)
return ret;
- ret = -EINVAL;
+ if (sscanf(ctx.body, "%llu", &v) == 1) {
+ /* require "default" on dfl */
+ ret = -ERANGE;
+ if (!v && on_dfl)
+ goto out_finish;
+ } else if (!strcmp(strim(ctx.body), "default")) {
+ v = 0;
+ } else {
+ ret = -EINVAL;
+ goto out_finish;
+ }
+
cfqg = blkg_to_cfqg(ctx.blkg);
cfqgd = blkcg_to_cfqgd(blkcg);
- if (!cfqg || !cfqgd)
- goto err;
- if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) {
+ ret = -ERANGE;
+ if (!v || (v >= min && v <= max)) {
if (!is_leaf_weight) {
- cfqg->dev_weight = ctx.v;
- cfqg->new_weight = ctx.v ?: cfqgd->weight;
+ cfqg->dev_weight = v;
+ cfqg->new_weight = v ?: cfqgd->weight;
} else {
- cfqg->dev_leaf_weight = ctx.v;
- cfqg->new_leaf_weight = ctx.v ?: cfqgd->leaf_weight;
+ cfqg->dev_leaf_weight = v;
+ cfqg->new_leaf_weight = v ?: cfqgd->leaf_weight;
}
ret = 0;
}
-
-err:
+out_finish:
blkg_conf_finish(&ctx);
return ret ?: nbytes;
}
@@ -1776,25 +1805,27 @@
static ssize_t cfqg_set_weight_device(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
- return __cfqg_set_weight_device(of, buf, nbytes, off, false);
+ return __cfqg_set_weight_device(of, buf, nbytes, off, false, false);
}
static ssize_t cfqg_set_leaf_weight_device(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
- return __cfqg_set_weight_device(of, buf, nbytes, off, true);
+ return __cfqg_set_weight_device(of, buf, nbytes, off, false, true);
}
-static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
- u64 val, bool is_leaf_weight)
+static int __cfq_set_weight(struct cgroup_subsys_state *css, u64 val,
+ bool on_dfl, bool reset_dev, bool is_leaf_weight)
{
+ unsigned int min = on_dfl ? CGROUP_WEIGHT_MIN : CFQ_WEIGHT_LEGACY_MIN;
+ unsigned int max = on_dfl ? CGROUP_WEIGHT_MAX : CFQ_WEIGHT_LEGACY_MAX;
struct blkcg *blkcg = css_to_blkcg(css);
struct blkcg_gq *blkg;
struct cfq_group_data *cfqgd;
int ret = 0;
- if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
- return -EINVAL;
+ if (val < min || val > max)
+ return -ERANGE;
spin_lock_irq(&blkcg->lock);
cfqgd = blkcg_to_cfqgd(blkcg);
@@ -1815,9 +1846,13 @@
continue;
if (!is_leaf_weight) {
+ if (reset_dev)
+ cfqg->dev_weight = 0;
if (!cfqg->dev_weight)
cfqg->new_weight = cfqgd->weight;
} else {
+ if (reset_dev)
+ cfqg->dev_leaf_weight = 0;
if (!cfqg->dev_leaf_weight)
cfqg->new_leaf_weight = cfqgd->leaf_weight;
}
@@ -1831,13 +1866,13 @@
static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
u64 val)
{
- return __cfq_set_weight(css, cft, val, false);
+ return __cfq_set_weight(css, val, false, false, false);
}
static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
- return __cfq_set_weight(css, cft, val, true);
+ return __cfq_set_weight(css, val, false, false, true);
}
static int cfqg_print_stat(struct seq_file *sf, void *v)
@@ -1857,16 +1892,16 @@
static u64 cfqg_prfill_stat_recursive(struct seq_file *sf,
struct blkg_policy_data *pd, int off)
{
- u64 sum = cfqg_stat_pd_recursive_sum(pd, off);
-
+ u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd),
+ &blkcg_policy_cfq, off);
return __blkg_prfill_u64(sf, pd, sum);
}
static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
struct blkg_policy_data *pd, int off)
{
- struct blkg_rwstat sum = cfqg_rwstat_pd_recursive_sum(pd, off);
-
+ struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd),
+ &blkcg_policy_cfq, off);
return __blkg_prfill_rwstat(sf, pd, &sum);
}
@@ -1886,6 +1921,40 @@
return 0;
}
+static u64 cfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
+ int off)
+{
+ u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
+
+ return __blkg_prfill_u64(sf, pd, sum >> 9);
+}
+
+static int cfqg_print_stat_sectors(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ cfqg_prfill_sectors, &blkcg_policy_cfq, 0, false);
+ return 0;
+}
+
+static u64 cfqg_prfill_sectors_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+{
+ struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL,
+ offsetof(struct blkcg_gq, stat_bytes));
+ u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
+ atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
+
+ return __blkg_prfill_u64(sf, pd, sum >> 9);
+}
+
+static int cfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ cfqg_prfill_sectors_recursive, &blkcg_policy_cfq, 0,
+ false);
+ return 0;
+}
+
#ifdef CONFIG_DEBUG_BLK_CGROUP
static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
struct blkg_policy_data *pd, int off)
@@ -1912,7 +1981,7 @@
}
#endif /* CONFIG_DEBUG_BLK_CGROUP */
-static struct cftype cfq_blkcg_files[] = {
+static struct cftype cfq_blkcg_legacy_files[] = {
/* on root, weight is mapped to leaf_weight */
{
.name = "weight_device",
@@ -1960,18 +2029,17 @@
},
{
.name = "sectors",
- .private = offsetof(struct cfq_group, stats.sectors),
- .seq_show = cfqg_print_stat,
+ .seq_show = cfqg_print_stat_sectors,
},
{
.name = "io_service_bytes",
- .private = offsetof(struct cfq_group, stats.service_bytes),
- .seq_show = cfqg_print_rwstat,
+ .private = (unsigned long)&blkcg_policy_cfq,
+ .seq_show = blkg_print_stat_bytes,
},
{
.name = "io_serviced",
- .private = offsetof(struct cfq_group, stats.serviced),
- .seq_show = cfqg_print_rwstat,
+ .private = (unsigned long)&blkcg_policy_cfq,
+ .seq_show = blkg_print_stat_ios,
},
{
.name = "io_service_time",
@@ -2002,18 +2070,17 @@
},
{
.name = "sectors_recursive",
- .private = offsetof(struct cfq_group, stats.sectors),
- .seq_show = cfqg_print_stat_recursive,
+ .seq_show = cfqg_print_stat_sectors_recursive,
},
{
.name = "io_service_bytes_recursive",
- .private = offsetof(struct cfq_group, stats.service_bytes),
- .seq_show = cfqg_print_rwstat_recursive,
+ .private = (unsigned long)&blkcg_policy_cfq,
+ .seq_show = blkg_print_stat_bytes_recursive,
},
{
.name = "io_serviced_recursive",
- .private = offsetof(struct cfq_group, stats.serviced),
- .seq_show = cfqg_print_rwstat_recursive,
+ .private = (unsigned long)&blkcg_policy_cfq,
+ .seq_show = blkg_print_stat_ios_recursive,
},
{
.name = "io_service_time_recursive",
@@ -2068,9 +2135,51 @@
#endif /* CONFIG_DEBUG_BLK_CGROUP */
{ } /* terminate */
};
+
+static int cfq_print_weight_on_dfl(struct seq_file *sf, void *v)
+{
+ struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+ struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
+
+ seq_printf(sf, "default %u\n", cgd->weight);
+ blkcg_print_blkgs(sf, blkcg, cfqg_prfill_weight_device,
+ &blkcg_policy_cfq, 0, false);
+ return 0;
+}
+
+static ssize_t cfq_set_weight_on_dfl(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ char *endp;
+ int ret;
+ u64 v;
+
+ buf = strim(buf);
+
+ /* "WEIGHT" or "default WEIGHT" sets the default weight */
+ v = simple_strtoull(buf, &endp, 0);
+ if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
+ ret = __cfq_set_weight(of_css(of), v, true, false, false);
+ return ret ?: nbytes;
+ }
+
+ /* "MAJ:MIN WEIGHT" */
+ return __cfqg_set_weight_device(of, buf, nbytes, off, true, false);
+}
+
+static struct cftype cfq_blkcg_files[] = {
+ {
+ .name = "weight",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = cfq_print_weight_on_dfl,
+ .write = cfq_set_weight_on_dfl,
+ },
+ { } /* terminate */
+};
+
#else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
- struct blkcg *blkcg)
+static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd,
+ struct blkcg *blkcg)
{
return cfqd->root_group;
}
@@ -2873,7 +2982,6 @@
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
cfqq->nr_sectors += blk_rq_sectors(rq);
- cfqg_stats_update_dispatch(cfqq->cfqg, blk_rq_bytes(rq), rq->cmd_flags);
}
/*
@@ -3506,14 +3614,14 @@
struct cfq_io_cq *cic = icq_to_cic(icq);
struct cfq_data *cfqd = cic_to_cfqd(cic);
- if (cic->cfqq[BLK_RW_ASYNC]) {
- cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
- cic->cfqq[BLK_RW_ASYNC] = NULL;
+ if (cic_to_cfqq(cic, false)) {
+ cfq_exit_cfqq(cfqd, cic_to_cfqq(cic, false));
+ cic_set_cfqq(cic, NULL, false);
}
- if (cic->cfqq[BLK_RW_SYNC]) {
- cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]);
- cic->cfqq[BLK_RW_SYNC] = NULL;
+ if (cic_to_cfqq(cic, true)) {
+ cfq_exit_cfqq(cfqd, cic_to_cfqq(cic, true));
+ cic_set_cfqq(cic, NULL, true);
}
}
@@ -3572,18 +3680,14 @@
if (unlikely(!cfqd) || likely(cic->ioprio == ioprio))
return;
- cfqq = cic->cfqq[BLK_RW_ASYNC];
+ cfqq = cic_to_cfqq(cic, false);
if (cfqq) {
- struct cfq_queue *new_cfqq;
- new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, bio,
- GFP_ATOMIC);
- if (new_cfqq) {
- cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
- cfq_put_queue(cfqq);
- }
+ cfq_put_queue(cfqq);
+ cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, bio);
+ cic_set_cfqq(cic, cfqq, false);
}
- cfqq = cic->cfqq[BLK_RW_SYNC];
+ cfqq = cic_to_cfqq(cic, true);
if (cfqq)
cfq_mark_cfqq_prio_changed(cfqq);
@@ -3614,7 +3718,7 @@
static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
{
struct cfq_data *cfqd = cic_to_cfqd(cic);
- struct cfq_queue *sync_cfqq;
+ struct cfq_queue *cfqq;
uint64_t serial_nr;
rcu_read_lock();
@@ -3628,15 +3732,22 @@
if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
return;
- sync_cfqq = cic_to_cfqq(cic, 1);
- if (sync_cfqq) {
- /*
- * Drop reference to sync queue. A new sync queue will be
- * assigned in new group upon arrival of a fresh request.
- */
- cfq_log_cfqq(cfqd, sync_cfqq, "changed cgroup");
- cic_set_cfqq(cic, NULL, 1);
- cfq_put_queue(sync_cfqq);
+ /*
+ * Drop reference to queues. New queues will be assigned in new
+ * group upon arrival of fresh requests.
+ */
+ cfqq = cic_to_cfqq(cic, false);
+ if (cfqq) {
+ cfq_log_cfqq(cfqd, cfqq, "changed cgroup");
+ cic_set_cfqq(cic, NULL, false);
+ cfq_put_queue(cfqq);
+ }
+
+ cfqq = cic_to_cfqq(cic, true);
+ if (cfqq) {
+ cfq_log_cfqq(cfqd, cfqq, "changed cgroup");
+ cic_set_cfqq(cic, NULL, true);
+ cfq_put_queue(cfqq);
}
cic->blkcg_serial_nr = serial_nr;
@@ -3645,81 +3756,19 @@
static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { }
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
-static struct cfq_queue *
-cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
- struct bio *bio, gfp_t gfp_mask)
-{
- struct blkcg *blkcg;
- struct cfq_queue *cfqq, *new_cfqq = NULL;
- struct cfq_group *cfqg;
-
-retry:
- rcu_read_lock();
-
- blkcg = bio_blkcg(bio);
- cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
- if (!cfqg) {
- cfqq = &cfqd->oom_cfqq;
- goto out;
- }
-
- cfqq = cic_to_cfqq(cic, is_sync);
-
- /*
- * Always try a new alloc if we fell back to the OOM cfqq
- * originally, since it should just be a temporary situation.
- */
- if (!cfqq || cfqq == &cfqd->oom_cfqq) {
- cfqq = NULL;
- if (new_cfqq) {
- cfqq = new_cfqq;
- new_cfqq = NULL;
- } else if (gfp_mask & __GFP_WAIT) {
- rcu_read_unlock();
- spin_unlock_irq(cfqd->queue->queue_lock);
- new_cfqq = kmem_cache_alloc_node(cfq_pool,
- gfp_mask | __GFP_ZERO,
- cfqd->queue->node);
- spin_lock_irq(cfqd->queue->queue_lock);
- if (new_cfqq)
- goto retry;
- else
- return &cfqd->oom_cfqq;
- } else {
- cfqq = kmem_cache_alloc_node(cfq_pool,
- gfp_mask | __GFP_ZERO,
- cfqd->queue->node);
- }
-
- if (cfqq) {
- cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
- cfq_init_prio_data(cfqq, cic);
- cfq_link_cfqq_cfqg(cfqq, cfqg);
- cfq_log_cfqq(cfqd, cfqq, "alloced");
- } else
- cfqq = &cfqd->oom_cfqq;
- }
-out:
- if (new_cfqq)
- kmem_cache_free(cfq_pool, new_cfqq);
-
- rcu_read_unlock();
- return cfqq;
-}
-
static struct cfq_queue **
-cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
+cfq_async_queue_prio(struct cfq_group *cfqg, int ioprio_class, int ioprio)
{
switch (ioprio_class) {
case IOPRIO_CLASS_RT:
- return &cfqd->async_cfqq[0][ioprio];
+ return &cfqg->async_cfqq[0][ioprio];
case IOPRIO_CLASS_NONE:
ioprio = IOPRIO_NORM;
/* fall through */
case IOPRIO_CLASS_BE:
- return &cfqd->async_cfqq[1][ioprio];
+ return &cfqg->async_cfqq[1][ioprio];
case IOPRIO_CLASS_IDLE:
- return &cfqd->async_idle_cfqq;
+ return &cfqg->async_idle_cfqq;
default:
BUG();
}
@@ -3727,12 +3776,20 @@
static struct cfq_queue *
cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
- struct bio *bio, gfp_t gfp_mask)
+ struct bio *bio)
{
int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
struct cfq_queue **async_cfqq = NULL;
- struct cfq_queue *cfqq = NULL;
+ struct cfq_queue *cfqq;
+ struct cfq_group *cfqg;
+
+ rcu_read_lock();
+ cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
+ if (!cfqg) {
+ cfqq = &cfqd->oom_cfqq;
+ goto out;
+ }
if (!is_sync) {
if (!ioprio_valid(cic->ioprio)) {
@@ -3740,22 +3797,32 @@
ioprio = task_nice_ioprio(tsk);
ioprio_class = task_nice_ioclass(tsk);
}
- async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
+ async_cfqq = cfq_async_queue_prio(cfqg, ioprio_class, ioprio);
cfqq = *async_cfqq;
+ if (cfqq)
+ goto out;
}
- if (!cfqq)
- cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, bio, gfp_mask);
+ cfqq = kmem_cache_alloc_node(cfq_pool, GFP_NOWAIT | __GFP_ZERO,
+ cfqd->queue->node);
+ if (!cfqq) {
+ cfqq = &cfqd->oom_cfqq;
+ goto out;
+ }
- /*
- * pin the queue now that it's allocated, scheduler exit will prune it
- */
- if (!is_sync && !(*async_cfqq)) {
+ cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
+ cfq_init_prio_data(cfqq, cic);
+ cfq_link_cfqq_cfqg(cfqq, cfqg);
+ cfq_log_cfqq(cfqd, cfqq, "alloced");
+
+ if (async_cfqq) {
+ /* a new async queue is created, pin and remember */
cfqq->ref++;
*async_cfqq = cfqq;
}
-
+out:
cfqq->ref++;
+ rcu_read_unlock();
return cfqq;
}
@@ -4289,8 +4356,6 @@
const bool is_sync = rq_is_sync(rq);
struct cfq_queue *cfqq;
- might_sleep_if(gfp_mask & __GFP_WAIT);
-
spin_lock_irq(q->queue_lock);
check_ioprio_changed(cic, bio);
@@ -4298,7 +4363,9 @@
new_queue:
cfqq = cic_to_cfqq(cic, is_sync);
if (!cfqq || cfqq == &cfqd->oom_cfqq) {
- cfqq = cfq_get_queue(cfqd, is_sync, cic, bio, gfp_mask);
+ if (cfqq)
+ cfq_put_queue(cfqq);
+ cfqq = cfq_get_queue(cfqd, is_sync, cic, bio);
cic_set_cfqq(cic, cfqq, is_sync);
} else {
/*
@@ -4404,21 +4471,6 @@
cancel_work_sync(&cfqd->unplug_work);
}
-static void cfq_put_async_queues(struct cfq_data *cfqd)
-{
- int i;
-
- for (i = 0; i < IOPRIO_BE_NR; i++) {
- if (cfqd->async_cfqq[0][i])
- cfq_put_queue(cfqd->async_cfqq[0][i]);
- if (cfqd->async_cfqq[1][i])
- cfq_put_queue(cfqd->async_cfqq[1][i]);
- }
-
- if (cfqd->async_idle_cfqq)
- cfq_put_queue(cfqd->async_idle_cfqq);
-}
-
static void cfq_exit_queue(struct elevator_queue *e)
{
struct cfq_data *cfqd = e->elevator_data;
@@ -4431,8 +4483,6 @@
if (cfqd->active_queue)
__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
- cfq_put_async_queues(cfqd);
-
spin_unlock_irq(q->queue_lock);
cfq_shutdown_timer_wq(cfqd);
@@ -4486,9 +4536,9 @@
goto out_free;
cfq_init_cfqg_base(cfqd->root_group);
+ cfqd->root_group->weight = 2 * CFQ_WEIGHT_LEGACY_DFL;
+ cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_LEGACY_DFL;
#endif
- cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT;
- cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT;
/*
* Not strictly needed (since RB_ROOT just clears the node and we
@@ -4499,7 +4549,7 @@
cfqd->prio_trees[i] = RB_ROOT;
/*
- * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
+ * Our fallback cfqq if cfq_get_queue() runs into OOM issues.
* Grab a permanent reference to it, so that the normal code flow
* will not attempt to free it. oom_cfqq is linked to root_group
* but shouldn't hold a reference as it'll never be unlinked. Lose
@@ -4683,13 +4733,18 @@
#ifdef CONFIG_CFQ_GROUP_IOSCHED
static struct blkcg_policy blkcg_policy_cfq = {
- .pd_size = sizeof(struct cfq_group),
- .cpd_size = sizeof(struct cfq_group_data),
- .cftypes = cfq_blkcg_files,
+ .dfl_cftypes = cfq_blkcg_files,
+ .legacy_cftypes = cfq_blkcg_legacy_files,
+ .cpd_alloc_fn = cfq_cpd_alloc,
.cpd_init_fn = cfq_cpd_init,
+ .cpd_free_fn = cfq_cpd_free,
+ .cpd_bind_fn = cfq_cpd_bind,
+
+ .pd_alloc_fn = cfq_pd_alloc,
.pd_init_fn = cfq_pd_init,
.pd_offline_fn = cfq_pd_offline,
+ .pd_free_fn = cfq_pd_free,
.pd_reset_stats_fn = cfq_pd_reset_stats,
};
#endif
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 35c2de1..fa18753 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -940,6 +940,7 @@
char *xbuf[XBUFSIZE];
char *xoutbuf[XBUFSIZE];
int ret = -ENOMEM;
+ unsigned int ivsize = crypto_skcipher_ivsize(tfm);
if (testmgr_alloc_buf(xbuf))
goto out_nobuf;
@@ -975,7 +976,7 @@
continue;
if (template[i].iv)
- memcpy(iv, template[i].iv, MAX_IVLEN);
+ memcpy(iv, template[i].iv, ivsize);
else
memset(iv, 0, MAX_IVLEN);
@@ -1051,7 +1052,7 @@
continue;
if (template[i].iv)
- memcpy(iv, template[i].iv, MAX_IVLEN);
+ memcpy(iv, template[i].iv, ivsize);
else
memset(iv, 0, MAX_IVLEN);
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index fc28b9f..30d8518 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -525,8 +525,7 @@
/* sys I/F for generic thermal sysfs support */
-static int thermal_get_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp)
{
struct acpi_thermal *tz = thermal->devdata;
int result;
@@ -633,7 +632,7 @@
}
static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
- int trip, unsigned long *temp)
+ int trip, int *temp)
{
struct acpi_thermal *tz = thermal->devdata;
int i;
@@ -686,7 +685,8 @@
}
static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
- unsigned long *temperature) {
+ int *temperature)
+{
struct acpi_thermal *tz = thermal->devdata;
if (tz->trips.critical.flags.valid) {
@@ -709,8 +709,8 @@
return -EINVAL;
if (type == THERMAL_TRIP_ACTIVE) {
- unsigned long trip_temp;
- unsigned long temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
+ int trip_temp;
+ int temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
tz->temperature, tz->kelvin_offset);
if (thermal_get_trip_temp(thermal, trip, &trip_temp))
return -EINVAL;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 6607f3c..a39e85f 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2834,7 +2834,7 @@
return VM_FAULT_SIGBUS;
}
-static struct vm_operations_struct binder_vm_ops = {
+static const struct vm_operations_struct binder_vm_ops = {
.open = binder_vma_open,
.close = binder_vma_close,
.fault = binder_vm_fault,
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 4167201..16550c6 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -213,6 +213,18 @@
}
/**
+ * genpd_queue_power_off_work - Queue up the execution of pm_genpd_poweroff().
+ * @genpd: PM domait to power off.
+ *
+ * Queue up the execution of pm_genpd_poweroff() unless it's already been done
+ * before.
+ */
+static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
+{
+ queue_work(pm_wq, &genpd->power_off_work);
+}
+
+/**
* __pm_genpd_poweron - Restore power to a given PM domain and its masters.
* @genpd: PM domain to power up.
*
@@ -259,8 +271,12 @@
return 0;
err:
- list_for_each_entry_continue_reverse(link, &genpd->slave_links, slave_node)
+ list_for_each_entry_continue_reverse(link,
+ &genpd->slave_links,
+ slave_node) {
genpd_sd_counter_dec(link->master);
+ genpd_queue_power_off_work(link->master);
+ }
return ret;
}
@@ -349,18 +365,6 @@
}
/**
- * genpd_queue_power_off_work - Queue up the execution of pm_genpd_poweroff().
- * @genpd: PM domait to power off.
- *
- * Queue up the execution of pm_genpd_poweroff() unless it's already been done
- * before.
- */
-static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
-{
- queue_work(pm_wq, &genpd->power_off_work);
-}
-
-/**
* pm_genpd_poweroff - Remove power from a given PM domain.
* @genpd: PM domain to power down.
*
@@ -1469,6 +1473,13 @@
mutex_lock(&genpd->lock);
+ if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
+ pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
+ subdomain->name);
+ ret = -EBUSY;
+ goto out;
+ }
+
list_for_each_entry(link, &genpd->master_links, master_node) {
if (link->slave != subdomain)
continue;
@@ -1487,6 +1498,7 @@
break;
}
+out:
mutex_unlock(&genpd->lock);
return ret;
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index eb25449..28cd75c 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -341,6 +341,34 @@
EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
/**
+ * dev_pm_opp_get_suspend_opp() - Get suspend opp
+ * @dev: device for which we do this operation
+ *
+ * Return: This function returns pointer to the suspend opp if it is
+ * defined and available, otherwise it returns NULL.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
+{
+ struct device_opp *dev_opp;
+
+ opp_rcu_lockdep_assert();
+
+ dev_opp = _find_device_opp(dev);
+ if (IS_ERR(dev_opp) || !dev_opp->suspend_opp ||
+ !dev_opp->suspend_opp->available)
+ return NULL;
+
+ return dev_opp->suspend_opp;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp);
+
+/**
* dev_pm_opp_get_opp_count() - Get number of opps available in the opp list
* @dev: device for which we do this operation
*
diff --git a/drivers/base/property.c b/drivers/base/property.c
index ff03f23..2d75366 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -611,13 +611,15 @@
*/
void *device_get_mac_address(struct device *dev, char *addr, int alen)
{
- addr = device_get_mac_addr(dev, "mac-address", addr, alen);
- if (addr)
- return addr;
+ char *res;
- addr = device_get_mac_addr(dev, "local-mac-address", addr, alen);
- if (addr)
- return addr;
+ res = device_get_mac_addr(dev, "mac-address", addr, alen);
+ if (res)
+ return res;
+
+ res = device_get_mac_addr(dev, "local-mac-address", addr, alen);
+ if (res)
+ return res;
return device_get_mac_addr(dev, "address", addr, alen);
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 698f761..d93a037 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4673,7 +4673,10 @@
}
ret = rbd_dev_v2_snap_context(rbd_dev);
- dout("rbd_dev_v2_snap_context returned %d\n", ret);
+ if (ret && first_time) {
+ kfree(rbd_dev->header.object_prefix);
+ rbd_dev->header.object_prefix = NULL;
+ }
return ret;
}
@@ -5154,7 +5157,6 @@
out_err:
if (parent) {
rbd_dev_unparent(rbd_dev);
- kfree(rbd_dev->header_name);
rbd_dev_destroy(parent);
} else {
rbd_put_client(rbdc);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 1508353..0823a96 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -249,7 +249,7 @@
struct blkfront_info *info)
{
struct grant *gnt_list_entry;
- unsigned long buffer_mfn;
+ unsigned long buffer_gfn;
BUG_ON(list_empty(&info->grants));
gnt_list_entry = list_first_entry(&info->grants, struct grant,
@@ -268,10 +268,10 @@
BUG_ON(!pfn);
gnt_list_entry->pfn = pfn;
}
- buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
+ buffer_gfn = pfn_to_gfn(gnt_list_entry->pfn);
gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
info->xbdev->otherend_id,
- buffer_mfn, 0);
+ buffer_gfn, 0);
return gnt_list_entry;
}
diff --git a/drivers/clk/h8300/clk-h8s2678.c b/drivers/clk/h8300/clk-h8s2678.c
index 2a38eb4..6cf38dc 100644
--- a/drivers/clk/h8300/clk-h8s2678.c
+++ b/drivers/clk/h8300/clk-h8s2678.c
@@ -8,6 +8,7 @@
#include <linux/err.h>
#include <linux/device.h>
#include <linux/of_address.h>
+#include <linux/slab.h>
static DEFINE_SPINLOCK(clklock);
diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig
index 2c16807..e434854 100644
--- a/drivers/clk/hisilicon/Kconfig
+++ b/drivers/clk/hisilicon/Kconfig
@@ -1,6 +1,12 @@
config COMMON_CLK_HI6220
bool "Hi6220 Clock Driver"
- depends on (ARCH_HISI || COMPILE_TEST) && MAILBOX
+ depends on ARCH_HISI || COMPILE_TEST
default ARCH_HISI
help
Build the Hisilicon Hi6220 clock driver based on the common clock framework.
+
+config STUB_CLK_HI6220
+ bool "Hi6220 Stub Clock Driver"
+ depends on COMMON_CLK_HI6220 && MAILBOX
+ help
+ Build the Hisilicon Hi6220 stub clock driver.
diff --git a/drivers/clk/hisilicon/Makefile b/drivers/clk/hisilicon/Makefile
index 4a1001a..74dba31 100644
--- a/drivers/clk/hisilicon/Makefile
+++ b/drivers/clk/hisilicon/Makefile
@@ -7,4 +7,5 @@
obj-$(CONFIG_ARCH_HI3xxx) += clk-hi3620.o
obj-$(CONFIG_ARCH_HIP04) += clk-hip04.o
obj-$(CONFIG_ARCH_HIX5HD2) += clk-hix5hd2.o
-obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o clk-hi6220-stub.o
+obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o
+obj-$(CONFIG_STUB_CLK_HI6220) += clk-hi6220-stub.o
diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c
index ed02bbc..abb4760 100644
--- a/drivers/clk/rockchip/clk-rk3188.c
+++ b/drivers/clk/rockchip/clk-rk3188.c
@@ -716,6 +716,8 @@
"aclk_cpu",
"aclk_peri",
"hclk_peri",
+ "pclk_cpu",
+ "pclk_peri",
};
static void __init rk3188_common_clk_init(struct device_node *np)
@@ -744,8 +746,6 @@
rockchip_clk_register_branches(common_clk_branches,
ARRAY_SIZE(common_clk_branches));
- rockchip_clk_protect_critical(rk3188_critical_clocks,
- ARRAY_SIZE(rk3188_critical_clocks));
rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0),
ROCKCHIP_SOFTRST_HIWORD_MASK);
@@ -765,6 +765,8 @@
mux_armclk_p, ARRAY_SIZE(mux_armclk_p),
&rk3066_cpuclk_data, rk3066_cpuclk_rates,
ARRAY_SIZE(rk3066_cpuclk_rates));
+ rockchip_clk_protect_critical(rk3188_critical_clocks,
+ ARRAY_SIZE(rk3188_critical_clocks));
}
CLK_OF_DECLARE(rk3066a_cru, "rockchip,rk3066a-cru", rk3066a_clk_init);
@@ -801,6 +803,9 @@
pr_warn("%s: missing clocks to reparent aclk_cpu_pre to gpll\n",
__func__);
}
+
+ rockchip_clk_protect_critical(rk3188_critical_clocks,
+ ARRAY_SIZE(rk3188_critical_clocks));
}
CLK_OF_DECLARE(rk3188a_cru, "rockchip,rk3188a-cru", rk3188a_clk_init);
diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c
index 251f48d..7f370d3 100644
--- a/drivers/clk/samsung/clk-exynos4.c
+++ b/drivers/clk/samsung/clk-exynos4.c
@@ -1398,6 +1398,45 @@
{ 0 },
};
+static const struct exynos_cpuclk_cfg_data e4212_armclk_d[] __initconst = {
+ { 1500000, E4210_CPU_DIV0(2, 1, 6, 0, 7, 3), E4210_CPU_DIV1(2, 6), },
+ { 1400000, E4210_CPU_DIV0(2, 1, 6, 0, 7, 3), E4210_CPU_DIV1(2, 6), },
+ { 1300000, E4210_CPU_DIV0(2, 1, 5, 0, 7, 3), E4210_CPU_DIV1(2, 5), },
+ { 1200000, E4210_CPU_DIV0(2, 1, 5, 0, 7, 3), E4210_CPU_DIV1(2, 5), },
+ { 1100000, E4210_CPU_DIV0(2, 1, 4, 0, 6, 3), E4210_CPU_DIV1(2, 4), },
+ { 1000000, E4210_CPU_DIV0(1, 1, 4, 0, 5, 2), E4210_CPU_DIV1(2, 4), },
+ { 900000, E4210_CPU_DIV0(1, 1, 3, 0, 5, 2), E4210_CPU_DIV1(2, 3), },
+ { 800000, E4210_CPU_DIV0(1, 1, 3, 0, 5, 2), E4210_CPU_DIV1(2, 3), },
+ { 700000, E4210_CPU_DIV0(1, 1, 3, 0, 4, 2), E4210_CPU_DIV1(2, 3), },
+ { 600000, E4210_CPU_DIV0(1, 1, 3, 0, 4, 2), E4210_CPU_DIV1(2, 3), },
+ { 500000, E4210_CPU_DIV0(1, 1, 3, 0, 4, 2), E4210_CPU_DIV1(2, 3), },
+ { 400000, E4210_CPU_DIV0(1, 1, 3, 0, 4, 2), E4210_CPU_DIV1(2, 3), },
+ { 300000, E4210_CPU_DIV0(1, 1, 2, 0, 4, 2), E4210_CPU_DIV1(2, 3), },
+ { 200000, E4210_CPU_DIV0(1, 1, 1, 0, 3, 1), E4210_CPU_DIV1(2, 3), },
+ { 0 },
+};
+
+#define E4412_CPU_DIV1(cores, hpm, copy) \
+ (((cores) << 8) | ((hpm) << 4) | ((copy) << 0))
+
+static const struct exynos_cpuclk_cfg_data e4412_armclk_d[] __initconst = {
+ { 1500000, E4210_CPU_DIV0(2, 1, 6, 0, 7, 3), E4412_CPU_DIV1(7, 0, 6), },
+ { 1400000, E4210_CPU_DIV0(2, 1, 6, 0, 7, 3), E4412_CPU_DIV1(6, 0, 6), },
+ { 1300000, E4210_CPU_DIV0(2, 1, 5, 0, 7, 3), E4412_CPU_DIV1(6, 0, 5), },
+ { 1200000, E4210_CPU_DIV0(2, 1, 5, 0, 7, 3), E4412_CPU_DIV1(5, 0, 5), },
+ { 1100000, E4210_CPU_DIV0(2, 1, 4, 0, 6, 3), E4412_CPU_DIV1(5, 0, 4), },
+ { 1000000, E4210_CPU_DIV0(1, 1, 4, 0, 5, 2), E4412_CPU_DIV1(4, 0, 4), },
+ { 900000, E4210_CPU_DIV0(1, 1, 3, 0, 5, 2), E4412_CPU_DIV1(4, 0, 3), },
+ { 800000, E4210_CPU_DIV0(1, 1, 3, 0, 5, 2), E4412_CPU_DIV1(3, 0, 3), },
+ { 700000, E4210_CPU_DIV0(1, 1, 3, 0, 4, 2), E4412_CPU_DIV1(3, 0, 3), },
+ { 600000, E4210_CPU_DIV0(1, 1, 3, 0, 4, 2), E4412_CPU_DIV1(2, 0, 3), },
+ { 500000, E4210_CPU_DIV0(1, 1, 3, 0, 4, 2), E4412_CPU_DIV1(2, 0, 3), },
+ { 400000, E4210_CPU_DIV0(1, 1, 3, 0, 4, 2), E4412_CPU_DIV1(1, 0, 3), },
+ { 300000, E4210_CPU_DIV0(1, 1, 2, 0, 4, 2), E4412_CPU_DIV1(1, 0, 3), },
+ { 200000, E4210_CPU_DIV0(1, 1, 1, 0, 3, 1), E4412_CPU_DIV1(0, 0, 3), },
+ { 0 },
+};
+
/* register exynos4 clocks */
static void __init exynos4_clk_init(struct device_node *np,
enum exynos4_soc soc)
@@ -1491,6 +1530,17 @@
samsung_clk_register_fixed_factor(ctx,
exynos4x12_fixed_factor_clks,
ARRAY_SIZE(exynos4x12_fixed_factor_clks));
+ if (of_machine_is_compatible("samsung,exynos4412")) {
+ exynos_register_cpu_clock(ctx, CLK_ARM_CLK, "armclk",
+ mout_core_p4x12[0], mout_core_p4x12[1], 0x14200,
+ e4412_armclk_d, ARRAY_SIZE(e4412_armclk_d),
+ CLK_CPU_NEEDS_DEBUG_ALT_DIV | CLK_CPU_HAS_DIV1);
+ } else {
+ exynos_register_cpu_clock(ctx, CLK_ARM_CLK, "armclk",
+ mout_core_p4x12[0], mout_core_p4x12[1], 0x14200,
+ e4212_armclk_d, ARRAY_SIZE(e4212_armclk_d),
+ CLK_CPU_NEEDS_DEBUG_ALT_DIV | CLK_CPU_HAS_DIV1);
+ }
}
samsung_clk_register_alias(ctx, exynos4_aliases,
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 77aa34e..cd0391e 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -24,55 +24,6 @@
This add the CPUfreq driver support for Versatile Express
big.LITTLE platforms using SPC for power management.
-
-config ARM_EXYNOS_CPUFREQ
- tristate "SAMSUNG EXYNOS CPUfreq Driver"
- depends on CPU_EXYNOS4210 || SOC_EXYNOS4212 || SOC_EXYNOS4412 || SOC_EXYNOS5250
- depends on THERMAL
- help
- This adds the CPUFreq driver for Samsung EXYNOS platforms.
- Supported SoC versions are:
- Exynos4210, Exynos4212, Exynos4412, and Exynos5250.
-
- If in doubt, say N.
-
-config ARM_EXYNOS4X12_CPUFREQ
- bool "SAMSUNG EXYNOS4x12"
- depends on SOC_EXYNOS4212 || SOC_EXYNOS4412
- depends on ARM_EXYNOS_CPUFREQ
- default y
- help
- This adds the CPUFreq driver for Samsung EXYNOS4X12
- SoC (EXYNOS4212 or EXYNOS4412).
-
- If in doubt, say N.
-
-config ARM_EXYNOS5250_CPUFREQ
- bool "SAMSUNG EXYNOS5250"
- depends on SOC_EXYNOS5250
- depends on ARM_EXYNOS_CPUFREQ
- default y
- help
- This adds the CPUFreq driver for Samsung EXYNOS5250
- SoC.
-
- If in doubt, say N.
-
-config ARM_EXYNOS_CPU_FREQ_BOOST_SW
- bool "EXYNOS Frequency Overclocking - Software"
- depends on ARM_EXYNOS_CPUFREQ && THERMAL
- select CPU_FREQ_BOOST_SW
- select EXYNOS_THERMAL
- help
- This driver supports software managed overclocking (BOOST).
- It allows usage of special frequencies for Samsung Exynos
- processors if thermal conditions are appropriate.
-
- It requires, for safe operation, thermal framework with properly
- defined trip points.
-
- If in doubt, say N.
-
config ARM_EXYNOS5440_CPUFREQ
tristate "SAMSUNG EXYNOS5440"
depends on SOC_EXYNOS5440
@@ -133,6 +84,7 @@
config ARM_MT8173_CPUFREQ
bool "Mediatek MT8173 CPUFreq support"
depends on ARCH_MEDIATEK && REGULATOR
+ depends on !CPU_THERMAL || THERMAL=y
select PM_OPP
help
This adds the CPUFreq driver support for Mediatek MT8173 SoC.
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 60a57ca..4134038 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -52,10 +52,6 @@
obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o
obj-$(CONFIG_UX500_SOC_DB8500) += dbx500-cpufreq.o
-obj-$(CONFIG_ARM_EXYNOS_CPUFREQ) += arm-exynos-cpufreq.o
-arm-exynos-cpufreq-y := exynos-cpufreq.o
-arm-exynos-cpufreq-$(CONFIG_ARM_EXYNOS4X12_CPUFREQ) += exynos4x12-cpufreq.o
-arm-exynos-cpufreq-$(CONFIG_ARM_EXYNOS5250_CPUFREQ) += exynos5250-cpufreq.o
obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ) += exynos5440-cpufreq.o
obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o
obj-$(CONFIG_ARM_HISI_ACPU_CPUFREQ) += hisi-acpu-cpufreq.o
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index c3583cd..7c0d70e 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -196,6 +196,7 @@
struct device *cpu_dev;
struct regulator *cpu_reg;
struct clk *cpu_clk;
+ struct dev_pm_opp *suspend_opp;
unsigned long min_uV = ~0, max_uV = 0;
unsigned int transition_latency;
bool need_update = false;
@@ -239,6 +240,17 @@
*/
of_cpumask_init_opp_table(policy->cpus);
+ /*
+ * But we need OPP table to function so if it is not there let's
+ * give platform code chance to provide it for us.
+ */
+ ret = dev_pm_opp_get_opp_count(cpu_dev);
+ if (ret <= 0) {
+ pr_debug("OPP table is not ready, deferring probe\n");
+ ret = -EPROBE_DEFER;
+ goto out_free_opp;
+ }
+
if (need_update) {
struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data();
@@ -249,24 +261,16 @@
* OPP tables are initialized only for policy->cpu, do it for
* others as well.
*/
- set_cpus_sharing_opps(cpu_dev, policy->cpus);
+ ret = set_cpus_sharing_opps(cpu_dev, policy->cpus);
+ if (ret)
+ dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+ __func__, ret);
of_property_read_u32(np, "clock-latency", &transition_latency);
} else {
transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev);
}
- /*
- * But we need OPP table to function so if it is not there let's
- * give platform code chance to provide it for us.
- */
- ret = dev_pm_opp_get_opp_count(cpu_dev);
- if (ret <= 0) {
- pr_debug("OPP table is not ready, deferring probe\n");
- ret = -EPROBE_DEFER;
- goto out_free_opp;
- }
-
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv) {
ret = -ENOMEM;
@@ -300,7 +304,8 @@
rcu_read_unlock();
tol_uV = opp_uV * priv->voltage_tolerance / 100;
- if (regulator_is_supported_voltage(cpu_reg, opp_uV,
+ if (regulator_is_supported_voltage(cpu_reg,
+ opp_uV - tol_uV,
opp_uV + tol_uV)) {
if (opp_uV < min_uV)
min_uV = opp_uV;
@@ -329,6 +334,13 @@
policy->driver_data = priv;
policy->clk = cpu_clk;
+
+ rcu_read_lock();
+ suspend_opp = dev_pm_opp_get_suspend_opp(cpu_dev);
+ if (suspend_opp)
+ policy->suspend_freq = dev_pm_opp_get_freq(suspend_opp) / 1000;
+ rcu_read_unlock();
+
ret = cpufreq_table_validate_and_show(policy, freq_table);
if (ret) {
dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
@@ -419,6 +431,7 @@
.ready = cpufreq_ready,
.name = "cpufreq-dt",
.attr = cpufreq_dt_attr,
+ .suspend = cpufreq_generic_suspend,
};
static int dt_cpufreq_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b3d9368..6633b3f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -239,7 +239,7 @@
EXPORT_SYMBOL_GPL(cpufreq_generic_init);
/* Only for cpufreq core internal use */
-struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
+static struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
{
struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
@@ -1626,8 +1626,8 @@
int ret;
if (!policy->suspend_freq) {
- pr_err("%s: suspend_freq can't be zero\n", __func__);
- return -EINVAL;
+ pr_debug("%s: suspend_freq not defined\n", __func__);
+ return 0;
}
pr_debug("%s: Setting suspend-freq: %u\n", __func__,
@@ -2031,8 +2031,7 @@
if (!try_module_get(policy->governor->owner))
return -EINVAL;
- pr_debug("__cpufreq_governor for CPU %u, event %u\n",
- policy->cpu, event);
+ pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event);
mutex_lock(&cpufreq_governor_lock);
if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c
deleted file mode 100644
index fa3dd84..0000000
--- a/drivers/cpufreq/exynos-cpufreq.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
- * http://www.samsung.com
- *
- * EXYNOS - CPU frequency scaling support for EXYNOS series
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/regulator/consumer.h>
-#include <linux/cpufreq.h>
-#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/cpu_cooling.h>
-#include <linux/cpu.h>
-
-#include "exynos-cpufreq.h"
-
-static struct exynos_dvfs_info *exynos_info;
-static struct thermal_cooling_device *cdev;
-static struct regulator *arm_regulator;
-static unsigned int locking_frequency;
-
-static int exynos_cpufreq_get_index(unsigned int freq)
-{
- struct cpufreq_frequency_table *freq_table = exynos_info->freq_table;
- struct cpufreq_frequency_table *pos;
-
- cpufreq_for_each_entry(pos, freq_table)
- if (pos->frequency == freq)
- break;
-
- if (pos->frequency == CPUFREQ_TABLE_END)
- return -EINVAL;
-
- return pos - freq_table;
-}
-
-static int exynos_cpufreq_scale(unsigned int target_freq)
-{
- struct cpufreq_frequency_table *freq_table = exynos_info->freq_table;
- unsigned int *volt_table = exynos_info->volt_table;
- struct cpufreq_policy *policy = cpufreq_cpu_get(0);
- unsigned int arm_volt, safe_arm_volt = 0;
- unsigned int mpll_freq_khz = exynos_info->mpll_freq_khz;
- struct device *dev = exynos_info->dev;
- unsigned int old_freq;
- int index, old_index;
- int ret = 0;
-
- old_freq = policy->cur;
-
- /*
- * The policy max have been changed so that we cannot get proper
- * old_index with cpufreq_frequency_table_target(). Thus, ignore
- * policy and get the index from the raw frequency table.
- */
- old_index = exynos_cpufreq_get_index(old_freq);
- if (old_index < 0) {
- ret = old_index;
- goto out;
- }
-
- index = exynos_cpufreq_get_index(target_freq);
- if (index < 0) {
- ret = index;
- goto out;
- }
-
- /*
- * ARM clock source will be changed APLL to MPLL temporary
- * To support this level, need to control regulator for
- * required voltage level
- */
- if (exynos_info->need_apll_change != NULL) {
- if (exynos_info->need_apll_change(old_index, index) &&
- (freq_table[index].frequency < mpll_freq_khz) &&
- (freq_table[old_index].frequency < mpll_freq_khz))
- safe_arm_volt = volt_table[exynos_info->pll_safe_idx];
- }
- arm_volt = volt_table[index];
-
- /* When the new frequency is higher than current frequency */
- if ((target_freq > old_freq) && !safe_arm_volt) {
- /* Firstly, voltage up to increase frequency */
- ret = regulator_set_voltage(arm_regulator, arm_volt, arm_volt);
- if (ret) {
- dev_err(dev, "failed to set cpu voltage to %d\n",
- arm_volt);
- return ret;
- }
- }
-
- if (safe_arm_volt) {
- ret = regulator_set_voltage(arm_regulator, safe_arm_volt,
- safe_arm_volt);
- if (ret) {
- dev_err(dev, "failed to set cpu voltage to %d\n",
- safe_arm_volt);
- return ret;
- }
- }
-
- exynos_info->set_freq(old_index, index);
-
- /* When the new frequency is lower than current frequency */
- if ((target_freq < old_freq) ||
- ((target_freq > old_freq) && safe_arm_volt)) {
- /* down the voltage after frequency change */
- ret = regulator_set_voltage(arm_regulator, arm_volt,
- arm_volt);
- if (ret) {
- dev_err(dev, "failed to set cpu voltage to %d\n",
- arm_volt);
- goto out;
- }
- }
-
-out:
- cpufreq_cpu_put(policy);
-
- return ret;
-}
-
-static int exynos_target(struct cpufreq_policy *policy, unsigned int index)
-{
- return exynos_cpufreq_scale(exynos_info->freq_table[index].frequency);
-}
-
-static int exynos_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
- policy->clk = exynos_info->cpu_clk;
- policy->suspend_freq = locking_frequency;
- return cpufreq_generic_init(policy, exynos_info->freq_table, 100000);
-}
-
-static struct cpufreq_driver exynos_driver = {
- .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
- .verify = cpufreq_generic_frequency_table_verify,
- .target_index = exynos_target,
- .get = cpufreq_generic_get,
- .init = exynos_cpufreq_cpu_init,
- .name = "exynos_cpufreq",
- .attr = cpufreq_generic_attr,
-#ifdef CONFIG_ARM_EXYNOS_CPU_FREQ_BOOST_SW
- .boost_supported = true,
-#endif
-#ifdef CONFIG_PM
- .suspend = cpufreq_generic_suspend,
-#endif
-};
-
-static int exynos_cpufreq_probe(struct platform_device *pdev)
-{
- struct device_node *cpu0;
- int ret = -EINVAL;
-
- exynos_info = kzalloc(sizeof(*exynos_info), GFP_KERNEL);
- if (!exynos_info)
- return -ENOMEM;
-
- exynos_info->dev = &pdev->dev;
-
- if (of_machine_is_compatible("samsung,exynos4212")) {
- exynos_info->type = EXYNOS_SOC_4212;
- ret = exynos4x12_cpufreq_init(exynos_info);
- } else if (of_machine_is_compatible("samsung,exynos4412")) {
- exynos_info->type = EXYNOS_SOC_4412;
- ret = exynos4x12_cpufreq_init(exynos_info);
- } else if (of_machine_is_compatible("samsung,exynos5250")) {
- exynos_info->type = EXYNOS_SOC_5250;
- ret = exynos5250_cpufreq_init(exynos_info);
- } else {
- pr_err("%s: Unknown SoC type\n", __func__);
- ret = -ENODEV;
- }
-
- if (ret)
- goto err_vdd_arm;
-
- if (exynos_info->set_freq == NULL) {
- dev_err(&pdev->dev, "No set_freq function (ERR)\n");
- ret = -EINVAL;
- goto err_vdd_arm;
- }
-
- arm_regulator = regulator_get(NULL, "vdd_arm");
- if (IS_ERR(arm_regulator)) {
- dev_err(&pdev->dev, "failed to get resource vdd_arm\n");
- ret = -EINVAL;
- goto err_vdd_arm;
- }
-
- /* Done here as we want to capture boot frequency */
- locking_frequency = clk_get_rate(exynos_info->cpu_clk) / 1000;
-
- ret = cpufreq_register_driver(&exynos_driver);
- if (ret)
- goto err_cpufreq_reg;
-
- cpu0 = of_get_cpu_node(0, NULL);
- if (!cpu0) {
- pr_err("failed to find cpu0 node\n");
- return 0;
- }
-
- if (of_find_property(cpu0, "#cooling-cells", NULL)) {
- cdev = of_cpufreq_cooling_register(cpu0,
- cpu_present_mask);
- if (IS_ERR(cdev))
- pr_err("running cpufreq without cooling device: %ld\n",
- PTR_ERR(cdev));
- }
-
- return 0;
-
-err_cpufreq_reg:
- dev_err(&pdev->dev, "failed to register cpufreq driver\n");
- regulator_put(arm_regulator);
-err_vdd_arm:
- kfree(exynos_info);
- return ret;
-}
-
-static struct platform_driver exynos_cpufreq_platdrv = {
- .driver = {
- .name = "exynos-cpufreq",
- },
- .probe = exynos_cpufreq_probe,
-};
-module_platform_driver(exynos_cpufreq_platdrv);
diff --git a/drivers/cpufreq/exynos-cpufreq.h b/drivers/cpufreq/exynos-cpufreq.h
deleted file mode 100644
index a3855e4..0000000
--- a/drivers/cpufreq/exynos-cpufreq.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2010 Samsung Electronics Co., Ltd.
- * http://www.samsung.com
- *
- * EXYNOS - CPUFreq support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-enum cpufreq_level_index {
- L0, L1, L2, L3, L4,
- L5, L6, L7, L8, L9,
- L10, L11, L12, L13, L14,
- L15, L16, L17, L18, L19,
- L20,
-};
-
-enum exynos_soc_type {
- EXYNOS_SOC_4212,
- EXYNOS_SOC_4412,
- EXYNOS_SOC_5250,
-};
-
-#define APLL_FREQ(f, a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, m, p, s) \
- { \
- .freq = (f) * 1000, \
- .clk_div_cpu0 = ((a0) | (a1) << 4 | (a2) << 8 | (a3) << 12 | \
- (a4) << 16 | (a5) << 20 | (a6) << 24 | (a7) << 28), \
- .clk_div_cpu1 = (b0 << 0 | b1 << 4 | b2 << 8), \
- .mps = ((m) << 16 | (p) << 8 | (s)), \
- }
-
-struct apll_freq {
- unsigned int freq;
- u32 clk_div_cpu0;
- u32 clk_div_cpu1;
- u32 mps;
-};
-
-struct exynos_dvfs_info {
- enum exynos_soc_type type;
- struct device *dev;
- unsigned long mpll_freq_khz;
- unsigned int pll_safe_idx;
- struct clk *cpu_clk;
- unsigned int *volt_table;
- struct cpufreq_frequency_table *freq_table;
- void (*set_freq)(unsigned int, unsigned int);
- bool (*need_apll_change)(unsigned int, unsigned int);
- void __iomem *cmu_regs;
-};
-
-#ifdef CONFIG_ARM_EXYNOS4X12_CPUFREQ
-extern int exynos4x12_cpufreq_init(struct exynos_dvfs_info *);
-#else
-static inline int exynos4x12_cpufreq_init(struct exynos_dvfs_info *info)
-{
- return -EOPNOTSUPP;
-}
-#endif
-#ifdef CONFIG_ARM_EXYNOS5250_CPUFREQ
-extern int exynos5250_cpufreq_init(struct exynos_dvfs_info *);
-#else
-static inline int exynos5250_cpufreq_init(struct exynos_dvfs_info *info)
-{
- return -EOPNOTSUPP;
-}
-#endif
-
-#define EXYNOS4_CLKSRC_CPU 0x14200
-#define EXYNOS4_CLKMUX_STATCPU 0x14400
-
-#define EXYNOS4_CLKDIV_CPU 0x14500
-#define EXYNOS4_CLKDIV_CPU1 0x14504
-#define EXYNOS4_CLKDIV_STATCPU 0x14600
-#define EXYNOS4_CLKDIV_STATCPU1 0x14604
-
-#define EXYNOS4_CLKSRC_CPU_MUXCORE_SHIFT (16)
-#define EXYNOS4_CLKMUX_STATCPU_MUXCORE_MASK (0x7 << EXYNOS4_CLKSRC_CPU_MUXCORE_SHIFT)
-
-#define EXYNOS5_APLL_LOCK 0x00000
-#define EXYNOS5_APLL_CON0 0x00100
-#define EXYNOS5_CLKMUX_STATCPU 0x00400
-#define EXYNOS5_CLKDIV_CPU0 0x00500
-#define EXYNOS5_CLKDIV_CPU1 0x00504
-#define EXYNOS5_CLKDIV_STATCPU0 0x00600
-#define EXYNOS5_CLKDIV_STATCPU1 0x00604
diff --git a/drivers/cpufreq/exynos4x12-cpufreq.c b/drivers/cpufreq/exynos4x12-cpufreq.c
deleted file mode 100644
index 9e78a85..0000000
--- a/drivers/cpufreq/exynos4x12-cpufreq.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Copyright (c) 2010-2012 Samsung Electronics Co., Ltd.
- * http://www.samsung.com
- *
- * EXYNOS4X12 - CPU frequency scaling support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/cpufreq.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-
-#include "exynos-cpufreq.h"
-
-static struct clk *cpu_clk;
-static struct clk *moutcore;
-static struct clk *mout_mpll;
-static struct clk *mout_apll;
-static struct exynos_dvfs_info *cpufreq;
-
-static unsigned int exynos4x12_volt_table[] = {
- 1350000, 1287500, 1250000, 1187500, 1137500, 1087500, 1037500,
- 1000000, 987500, 975000, 950000, 925000, 900000, 900000
-};
-
-static struct cpufreq_frequency_table exynos4x12_freq_table[] = {
- {CPUFREQ_BOOST_FREQ, L0, 1500 * 1000},
- {0, L1, 1400 * 1000},
- {0, L2, 1300 * 1000},
- {0, L3, 1200 * 1000},
- {0, L4, 1100 * 1000},
- {0, L5, 1000 * 1000},
- {0, L6, 900 * 1000},
- {0, L7, 800 * 1000},
- {0, L8, 700 * 1000},
- {0, L9, 600 * 1000},
- {0, L10, 500 * 1000},
- {0, L11, 400 * 1000},
- {0, L12, 300 * 1000},
- {0, L13, 200 * 1000},
- {0, 0, CPUFREQ_TABLE_END},
-};
-
-static struct apll_freq *apll_freq_4x12;
-
-static struct apll_freq apll_freq_4212[] = {
- /*
- * values:
- * freq
- * clock divider for CORE, COREM0, COREM1, PERIPH, ATB, PCLK_DBG, APLL, CORE2
- * clock divider for COPY, HPM, RESERVED
- * PLL M, P, S
- */
- APLL_FREQ(1500, 0, 3, 7, 0, 6, 1, 2, 0, 6, 2, 0, 250, 4, 0),
- APLL_FREQ(1400, 0, 3, 7, 0, 6, 1, 2, 0, 6, 2, 0, 175, 3, 0),
- APLL_FREQ(1300, 0, 3, 7, 0, 5, 1, 2, 0, 5, 2, 0, 325, 6, 0),
- APLL_FREQ(1200, 0, 3, 7, 0, 5, 1, 2, 0, 5, 2, 0, 200, 4, 0),
- APLL_FREQ(1100, 0, 3, 6, 0, 4, 1, 2, 0, 4, 2, 0, 275, 6, 0),
- APLL_FREQ(1000, 0, 2, 5, 0, 4, 1, 1, 0, 4, 2, 0, 125, 3, 0),
- APLL_FREQ(900, 0, 2, 5, 0, 3, 1, 1, 0, 3, 2, 0, 150, 4, 0),
- APLL_FREQ(800, 0, 2, 5, 0, 3, 1, 1, 0, 3, 2, 0, 100, 3, 0),
- APLL_FREQ(700, 0, 2, 4, 0, 3, 1, 1, 0, 3, 2, 0, 175, 3, 1),
- APLL_FREQ(600, 0, 2, 4, 0, 3, 1, 1, 0, 3, 2, 0, 200, 4, 1),
- APLL_FREQ(500, 0, 2, 4, 0, 3, 1, 1, 0, 3, 2, 0, 125, 3, 1),
- APLL_FREQ(400, 0, 2, 4, 0, 3, 1, 1, 0, 3, 2, 0, 100, 3, 1),
- APLL_FREQ(300, 0, 2, 4, 0, 2, 1, 1, 0, 3, 2, 0, 200, 4, 2),
- APLL_FREQ(200, 0, 1, 3, 0, 1, 1, 1, 0, 3, 2, 0, 100, 3, 2),
-};
-
-static struct apll_freq apll_freq_4412[] = {
- /*
- * values:
- * freq
- * clock divider for CORE, COREM0, COREM1, PERIPH, ATB, PCLK_DBG, APLL, CORE2
- * clock divider for COPY, HPM, CORES
- * PLL M, P, S
- */
- APLL_FREQ(1500, 0, 3, 7, 0, 6, 1, 2, 0, 6, 0, 7, 250, 4, 0),
- APLL_FREQ(1400, 0, 3, 7, 0, 6, 1, 2, 0, 6, 0, 6, 175, 3, 0),
- APLL_FREQ(1300, 0, 3, 7, 0, 5, 1, 2, 0, 5, 0, 6, 325, 6, 0),
- APLL_FREQ(1200, 0, 3, 7, 0, 5, 1, 2, 0, 5, 0, 5, 200, 4, 0),
- APLL_FREQ(1100, 0, 3, 6, 0, 4, 1, 2, 0, 4, 0, 5, 275, 6, 0),
- APLL_FREQ(1000, 0, 2, 5, 0, 4, 1, 1, 0, 4, 0, 4, 125, 3, 0),
- APLL_FREQ(900, 0, 2, 5, 0, 3, 1, 1, 0, 3, 0, 4, 150, 4, 0),
- APLL_FREQ(800, 0, 2, 5, 0, 3, 1, 1, 0, 3, 0, 3, 100, 3, 0),
- APLL_FREQ(700, 0, 2, 4, 0, 3, 1, 1, 0, 3, 0, 3, 175, 3, 1),
- APLL_FREQ(600, 0, 2, 4, 0, 3, 1, 1, 0, 3, 0, 2, 200, 4, 1),
- APLL_FREQ(500, 0, 2, 4, 0, 3, 1, 1, 0, 3, 0, 2, 125, 3, 1),
- APLL_FREQ(400, 0, 2, 4, 0, 3, 1, 1, 0, 3, 0, 1, 100, 3, 1),
- APLL_FREQ(300, 0, 2, 4, 0, 2, 1, 1, 0, 3, 0, 1, 200, 4, 2),
- APLL_FREQ(200, 0, 1, 3, 0, 1, 1, 1, 0, 3, 0, 0, 100, 3, 2),
-};
-
-static void exynos4x12_set_clkdiv(unsigned int div_index)
-{
- unsigned int tmp;
-
- /* Change Divider - CPU0 */
-
- tmp = apll_freq_4x12[div_index].clk_div_cpu0;
-
- __raw_writel(tmp, cpufreq->cmu_regs + EXYNOS4_CLKDIV_CPU);
-
- while (__raw_readl(cpufreq->cmu_regs + EXYNOS4_CLKDIV_STATCPU)
- & 0x11111111)
- cpu_relax();
-
- /* Change Divider - CPU1 */
- tmp = apll_freq_4x12[div_index].clk_div_cpu1;
-
- __raw_writel(tmp, cpufreq->cmu_regs + EXYNOS4_CLKDIV_CPU1);
-
- do {
- cpu_relax();
- tmp = __raw_readl(cpufreq->cmu_regs + EXYNOS4_CLKDIV_STATCPU1);
- } while (tmp != 0x0);
-}
-
-static void exynos4x12_set_apll(unsigned int index)
-{
- unsigned int tmp, freq = apll_freq_4x12[index].freq;
-
- /* MUX_CORE_SEL = MPLL, ARMCLK uses MPLL for lock time */
- clk_set_parent(moutcore, mout_mpll);
-
- do {
- cpu_relax();
- tmp = (__raw_readl(cpufreq->cmu_regs + EXYNOS4_CLKMUX_STATCPU)
- >> EXYNOS4_CLKSRC_CPU_MUXCORE_SHIFT);
- tmp &= 0x7;
- } while (tmp != 0x2);
-
- clk_set_rate(mout_apll, freq * 1000);
-
- /* MUX_CORE_SEL = APLL */
- clk_set_parent(moutcore, mout_apll);
-
- do {
- cpu_relax();
- tmp = __raw_readl(cpufreq->cmu_regs + EXYNOS4_CLKMUX_STATCPU);
- tmp &= EXYNOS4_CLKMUX_STATCPU_MUXCORE_MASK;
- } while (tmp != (0x1 << EXYNOS4_CLKSRC_CPU_MUXCORE_SHIFT));
-}
-
-static void exynos4x12_set_frequency(unsigned int old_index,
- unsigned int new_index)
-{
- if (old_index > new_index) {
- exynos4x12_set_clkdiv(new_index);
- exynos4x12_set_apll(new_index);
- } else if (old_index < new_index) {
- exynos4x12_set_apll(new_index);
- exynos4x12_set_clkdiv(new_index);
- }
-}
-
-int exynos4x12_cpufreq_init(struct exynos_dvfs_info *info)
-{
- struct device_node *np;
- unsigned long rate;
-
- /*
- * HACK: This is a temporary workaround to get access to clock
- * controller registers directly and remove static mappings and
- * dependencies on platform headers. It is necessary to enable
- * Exynos multi-platform support and will be removed together with
- * this whole driver as soon as Exynos gets migrated to use
- * cpufreq-dt driver.
- */
- np = of_find_compatible_node(NULL, NULL, "samsung,exynos4412-clock");
- if (!np) {
- pr_err("%s: failed to find clock controller DT node\n",
- __func__);
- return -ENODEV;
- }
-
- info->cmu_regs = of_iomap(np, 0);
- if (!info->cmu_regs) {
- pr_err("%s: failed to map CMU registers\n", __func__);
- return -EFAULT;
- }
-
- cpu_clk = clk_get(NULL, "armclk");
- if (IS_ERR(cpu_clk))
- return PTR_ERR(cpu_clk);
-
- moutcore = clk_get(NULL, "moutcore");
- if (IS_ERR(moutcore))
- goto err_moutcore;
-
- mout_mpll = clk_get(NULL, "mout_mpll");
- if (IS_ERR(mout_mpll))
- goto err_mout_mpll;
-
- rate = clk_get_rate(mout_mpll) / 1000;
-
- mout_apll = clk_get(NULL, "mout_apll");
- if (IS_ERR(mout_apll))
- goto err_mout_apll;
-
- if (info->type == EXYNOS_SOC_4212)
- apll_freq_4x12 = apll_freq_4212;
- else
- apll_freq_4x12 = apll_freq_4412;
-
- info->mpll_freq_khz = rate;
- /* 800Mhz */
- info->pll_safe_idx = L7;
- info->cpu_clk = cpu_clk;
- info->volt_table = exynos4x12_volt_table;
- info->freq_table = exynos4x12_freq_table;
- info->set_freq = exynos4x12_set_frequency;
-
- cpufreq = info;
-
- return 0;
-
-err_mout_apll:
- clk_put(mout_mpll);
-err_mout_mpll:
- clk_put(moutcore);
-err_moutcore:
- clk_put(cpu_clk);
-
- pr_debug("%s: failed initialization\n", __func__);
- return -EINVAL;
-}
diff --git a/drivers/cpufreq/exynos5250-cpufreq.c b/drivers/cpufreq/exynos5250-cpufreq.c
deleted file mode 100644
index 3eafdc7b..0000000
--- a/drivers/cpufreq/exynos5250-cpufreq.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2010-20122Samsung Electronics Co., Ltd.
- * http://www.samsung.com
- *
- * EXYNOS5250 - CPU frequency scaling support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/cpufreq.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-
-#include "exynos-cpufreq.h"
-
-static struct clk *cpu_clk;
-static struct clk *moutcore;
-static struct clk *mout_mpll;
-static struct clk *mout_apll;
-static struct exynos_dvfs_info *cpufreq;
-
-static unsigned int exynos5250_volt_table[] = {
- 1300000, 1250000, 1225000, 1200000, 1150000,
- 1125000, 1100000, 1075000, 1050000, 1025000,
- 1012500, 1000000, 975000, 950000, 937500,
- 925000
-};
-
-static struct cpufreq_frequency_table exynos5250_freq_table[] = {
- {0, L0, 1700 * 1000},
- {0, L1, 1600 * 1000},
- {0, L2, 1500 * 1000},
- {0, L3, 1400 * 1000},
- {0, L4, 1300 * 1000},
- {0, L5, 1200 * 1000},
- {0, L6, 1100 * 1000},
- {0, L7, 1000 * 1000},
- {0, L8, 900 * 1000},
- {0, L9, 800 * 1000},
- {0, L10, 700 * 1000},
- {0, L11, 600 * 1000},
- {0, L12, 500 * 1000},
- {0, L13, 400 * 1000},
- {0, L14, 300 * 1000},
- {0, L15, 200 * 1000},
- {0, 0, CPUFREQ_TABLE_END},
-};
-
-static struct apll_freq apll_freq_5250[] = {
- /*
- * values:
- * freq
- * clock divider for ARM, CPUD, ACP, PERIPH, ATB, PCLK_DBG, APLL, ARM2
- * clock divider for COPY, HPM, RESERVED
- * PLL M, P, S
- */
- APLL_FREQ(1700, 0, 3, 7, 7, 7, 3, 5, 0, 0, 2, 0, 425, 6, 0),
- APLL_FREQ(1600, 0, 3, 7, 7, 7, 1, 4, 0, 0, 2, 0, 200, 3, 0),
- APLL_FREQ(1500, 0, 2, 7, 7, 7, 1, 4, 0, 0, 2, 0, 250, 4, 0),
- APLL_FREQ(1400, 0, 2, 7, 7, 6, 1, 4, 0, 0, 2, 0, 175, 3, 0),
- APLL_FREQ(1300, 0, 2, 7, 7, 6, 1, 3, 0, 0, 2, 0, 325, 6, 0),
- APLL_FREQ(1200, 0, 2, 7, 7, 5, 1, 3, 0, 0, 2, 0, 200, 4, 0),
- APLL_FREQ(1100, 0, 3, 7, 7, 5, 1, 3, 0, 0, 2, 0, 275, 6, 0),
- APLL_FREQ(1000, 0, 1, 7, 7, 4, 1, 2, 0, 0, 2, 0, 125, 3, 0),
- APLL_FREQ(900, 0, 1, 7, 7, 4, 1, 2, 0, 0, 2, 0, 150, 4, 0),
- APLL_FREQ(800, 0, 1, 7, 7, 4, 1, 2, 0, 0, 2, 0, 100, 3, 0),
- APLL_FREQ(700, 0, 1, 7, 7, 3, 1, 1, 0, 0, 2, 0, 175, 3, 1),
- APLL_FREQ(600, 0, 1, 7, 7, 3, 1, 1, 0, 0, 2, 0, 200, 4, 1),
- APLL_FREQ(500, 0, 1, 7, 7, 2, 1, 1, 0, 0, 2, 0, 125, 3, 1),
- APLL_FREQ(400, 0, 1, 7, 7, 2, 1, 1, 0, 0, 2, 0, 100, 3, 1),
- APLL_FREQ(300, 0, 1, 7, 7, 1, 1, 1, 0, 0, 2, 0, 200, 4, 2),
- APLL_FREQ(200, 0, 1, 7, 7, 1, 1, 1, 0, 0, 2, 0, 100, 3, 2),
-};
-
-static void set_clkdiv(unsigned int div_index)
-{
- unsigned int tmp;
-
- /* Change Divider - CPU0 */
-
- tmp = apll_freq_5250[div_index].clk_div_cpu0;
-
- __raw_writel(tmp, cpufreq->cmu_regs + EXYNOS5_CLKDIV_CPU0);
-
- while (__raw_readl(cpufreq->cmu_regs + EXYNOS5_CLKDIV_STATCPU0)
- & 0x11111111)
- cpu_relax();
-
- /* Change Divider - CPU1 */
- tmp = apll_freq_5250[div_index].clk_div_cpu1;
-
- __raw_writel(tmp, cpufreq->cmu_regs + EXYNOS5_CLKDIV_CPU1);
-
- while (__raw_readl(cpufreq->cmu_regs + EXYNOS5_CLKDIV_STATCPU1) & 0x11)
- cpu_relax();
-}
-
-static void set_apll(unsigned int index)
-{
- unsigned int tmp;
- unsigned int freq = apll_freq_5250[index].freq;
-
- /* MUX_CORE_SEL = MPLL, ARMCLK uses MPLL for lock time */
- clk_set_parent(moutcore, mout_mpll);
-
- do {
- cpu_relax();
- tmp = (__raw_readl(cpufreq->cmu_regs + EXYNOS5_CLKMUX_STATCPU)
- >> 16);
- tmp &= 0x7;
- } while (tmp != 0x2);
-
- clk_set_rate(mout_apll, freq * 1000);
-
- /* MUX_CORE_SEL = APLL */
- clk_set_parent(moutcore, mout_apll);
-
- do {
- cpu_relax();
- tmp = __raw_readl(cpufreq->cmu_regs + EXYNOS5_CLKMUX_STATCPU);
- tmp &= (0x7 << 16);
- } while (tmp != (0x1 << 16));
-}
-
-static void exynos5250_set_frequency(unsigned int old_index,
- unsigned int new_index)
-{
- if (old_index > new_index) {
- set_clkdiv(new_index);
- set_apll(new_index);
- } else if (old_index < new_index) {
- set_apll(new_index);
- set_clkdiv(new_index);
- }
-}
-
-int exynos5250_cpufreq_init(struct exynos_dvfs_info *info)
-{
- struct device_node *np;
- unsigned long rate;
-
- /*
- * HACK: This is a temporary workaround to get access to clock
- * controller registers directly and remove static mappings and
- * dependencies on platform headers. It is necessary to enable
- * Exynos multi-platform support and will be removed together with
- * this whole driver as soon as Exynos gets migrated to use
- * cpufreq-dt driver.
- */
- np = of_find_compatible_node(NULL, NULL, "samsung,exynos5250-clock");
- if (!np) {
- pr_err("%s: failed to find clock controller DT node\n",
- __func__);
- return -ENODEV;
- }
-
- info->cmu_regs = of_iomap(np, 0);
- if (!info->cmu_regs) {
- pr_err("%s: failed to map CMU registers\n", __func__);
- return -EFAULT;
- }
-
- cpu_clk = clk_get(NULL, "armclk");
- if (IS_ERR(cpu_clk))
- return PTR_ERR(cpu_clk);
-
- moutcore = clk_get(NULL, "mout_cpu");
- if (IS_ERR(moutcore))
- goto err_moutcore;
-
- mout_mpll = clk_get(NULL, "mout_mpll");
- if (IS_ERR(mout_mpll))
- goto err_mout_mpll;
-
- rate = clk_get_rate(mout_mpll) / 1000;
-
- mout_apll = clk_get(NULL, "mout_apll");
- if (IS_ERR(mout_apll))
- goto err_mout_apll;
-
- info->mpll_freq_khz = rate;
- /* 800Mhz */
- info->pll_safe_idx = L9;
- info->cpu_clk = cpu_clk;
- info->volt_table = exynos5250_volt_table;
- info->freq_table = exynos5250_freq_table;
- info->set_freq = exynos5250_set_frequency;
-
- cpufreq = info;
-
- return 0;
-
-err_mout_apll:
- clk_put(mout_mpll);
-err_mout_mpll:
- clk_put(moutcore);
-err_moutcore:
- clk_put(cpu_clk);
-
- pr_err("%s: failed initialization\n", __func__);
- return -EINVAL;
-}
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cddc619..3af9dd7 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -260,24 +260,31 @@
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}
-#define PCT_TO_HWP(x) (x * 255 / 100)
static void intel_pstate_hwp_set(void)
{
- int min, max, cpu;
- u64 value, freq;
+ int min, hw_min, max, hw_max, cpu, range, adj_range;
+ u64 value, cap;
+
+ rdmsrl(MSR_HWP_CAPABILITIES, cap);
+ hw_min = HWP_LOWEST_PERF(cap);
+ hw_max = HWP_HIGHEST_PERF(cap);
+ range = hw_max - hw_min;
get_online_cpus();
for_each_online_cpu(cpu) {
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
- min = PCT_TO_HWP(limits.min_perf_pct);
+ adj_range = limits.min_perf_pct * range / 100;
+ min = hw_min + adj_range;
value &= ~HWP_MIN_PERF(~0L);
value |= HWP_MIN_PERF(min);
- max = PCT_TO_HWP(limits.max_perf_pct);
+ adj_range = limits.max_perf_pct * range / 100;
+ max = hw_min + adj_range;
if (limits.no_turbo) {
- rdmsrl( MSR_HWP_CAPABILITIES, freq);
- max = HWP_GUARANTEED_PERF(freq);
+ hw_max = HWP_GUARANTEED_PERF(cap);
+ if (hw_max < max)
+ max = hw_max;
}
value &= ~HWP_MAX_PERF(~0L);
@@ -423,6 +430,8 @@
limits.max_sysfs_pct = clamp_t(int, input, 0 , 100);
limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
+ limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct);
+ limits.max_perf_pct = max(limits.min_perf_pct, limits.max_perf_pct);
limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
if (hwp_active)
@@ -442,6 +451,8 @@
limits.min_sysfs_pct = clamp_t(int, input, 0 , 100);
limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct);
+ limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct);
+ limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct);
limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
if (hwp_active)
@@ -989,12 +1000,19 @@
limits.min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
limits.min_policy_pct = clamp_t(int, limits.min_policy_pct, 0 , 100);
- limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct);
- limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
-
limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100);
+
+ /* Normalize user input to [min_policy_pct, max_policy_pct] */
+ limits.min_perf_pct = max(limits.min_policy_pct, limits.min_sysfs_pct);
+ limits.min_perf_pct = min(limits.max_policy_pct, limits.min_perf_pct);
limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
+ limits.max_perf_pct = max(limits.min_policy_pct, limits.max_perf_pct);
+
+ /* Make sure min_perf_pct <= max_perf_pct */
+ limits.min_perf_pct = min(limits.max_perf_pct, limits.min_perf_pct);
+
+ limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
if (hwp_active)
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 1523e2d..344058f 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -187,6 +187,28 @@
}
/**
+ * cpuidle_coupled_state_verify - check if the coupled states are correctly set.
+ * @drv: struct cpuidle_driver for the platform
+ *
+ * Returns 0 for valid state values, a negative error code otherwise:
+ * * -EINVAL if any coupled state(safe_state_index) is wrongly set.
+ */
+int cpuidle_coupled_state_verify(struct cpuidle_driver *drv)
+{
+ int i;
+
+ for (i = drv->state_count - 1; i >= 0; i--) {
+ if (cpuidle_state_is_coupled(drv, i) &&
+ (drv->safe_state_index == i ||
+ drv->safe_state_index < 0 ||
+ drv->safe_state_index >= drv->state_count))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
* cpuidle_coupled_set_ready - mark a cpu as ready
* @coupled: the struct coupled that contains the current cpu
*/
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index 178c5ad..f87f399 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -35,6 +35,7 @@
#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state);
+int cpuidle_coupled_state_verify(struct cpuidle_driver *drv);
int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int next_state);
int cpuidle_coupled_register_device(struct cpuidle_device *dev);
@@ -46,6 +47,11 @@
return false;
}
+static inline int cpuidle_coupled_state_verify(struct cpuidle_driver *drv)
+{
+ return 0;
+}
+
static inline int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int next_state)
{
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 5db1478..389ade4 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -227,6 +227,10 @@
if (!drv || !drv->state_count)
return -EINVAL;
+ ret = cpuidle_coupled_state_verify(drv);
+ if (ret)
+ return ret;
+
if (cpuidle_disabled())
return -ENODEV;
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 07bc7aa..d234719 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -461,7 +461,7 @@
config CRYPTO_DEV_VMX
bool "Support for VMX cryptographic acceleration instructions"
- depends on PPC64
+ depends on PPC64 && VSX
help
Support for VMX cryptographic acceleration instructions.
diff --git a/drivers/crypto/qat/qat_common/adf_transport_debug.c b/drivers/crypto/qat/qat_common/adf_transport_debug.c
index e419869..52340b9 100644
--- a/drivers/crypto/qat/qat_common/adf_transport_debug.c
+++ b/drivers/crypto/qat/qat_common/adf_transport_debug.c
@@ -86,9 +86,7 @@
{
struct adf_etr_ring_data *ring = sfile->private;
struct adf_etr_bank_data *bank = ring->bank;
- uint32_t *msg = v;
void __iomem *csr = ring->bank->csr_addr;
- int i, x;
if (v == SEQ_START_TOKEN) {
int head, tail, empty;
@@ -113,18 +111,8 @@
seq_puts(sfile, "----------- Ring data ------------\n");
return 0;
}
- seq_printf(sfile, "%p:", msg);
- x = 0;
- i = 0;
- for (; i < (ADF_MSG_SIZE_TO_BYTES(ring->msg_size) >> 2); i++) {
- seq_printf(sfile, " %08X", *(msg + i));
- if ((ADF_MSG_SIZE_TO_BYTES(ring->msg_size) >> 2) != i + 1 &&
- (++x == 8)) {
- seq_printf(sfile, "\n%p:", msg + i + 1);
- x = 0;
- }
- }
- seq_puts(sfile, "\n");
+ seq_hex_dump(sfile, "", DUMP_PREFIX_ADDRESS, 32, 4,
+ v, ADF_MSG_SIZE_TO_BYTES(ring->msg_size), false);
return 0;
}
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
index e070c31..a19ee12 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
@@ -104,7 +104,7 @@
sg_miter_next(&mo);
oo = 0;
}
- } while (mo.length > 0);
+ } while (oleft > 0);
if (areq->info) {
for (i = 0; i < 4 && i < ivsize / 4; i++) {
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index ca78311..cf1268d 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -280,6 +280,7 @@
u8 max_interleave;
u8 (*get_node_id)(struct sbridge_pvt *pvt);
enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt);
+ enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr);
struct pci_dev *pci_vtd;
};
@@ -471,6 +472,9 @@
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2 0x2f6c
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3 0x2f6d
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0 0x2fbd
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1 0x2fbf
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2 0x2fb9
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3 0x2fbb
static const struct pci_id_descr pci_dev_descr_haswell[] = {
/* first item must be the HA */
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0) },
@@ -488,6 +492,9 @@
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1) },
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1) },
+ { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1) },
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1) },
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL, 1) },
@@ -762,6 +769,49 @@
return mtype;
}
+static enum dev_type sbridge_get_width(struct sbridge_pvt *pvt, u32 mtr)
+{
+ /* there's no way to figure out */
+ return DEV_UNKNOWN;
+}
+
+static enum dev_type __ibridge_get_width(u32 mtr)
+{
+ enum dev_type type;
+
+ switch (mtr) {
+ case 3:
+ type = DEV_UNKNOWN;
+ break;
+ case 2:
+ type = DEV_X16;
+ break;
+ case 1:
+ type = DEV_X8;
+ break;
+ case 0:
+ type = DEV_X4;
+ break;
+ }
+
+ return type;
+}
+
+static enum dev_type ibridge_get_width(struct sbridge_pvt *pvt, u32 mtr)
+{
+ /*
+ * ddr3_width on the documentation but also valid for DDR4 on
+ * Haswell
+ */
+ return __ibridge_get_width(GET_BITFIELD(mtr, 7, 8));
+}
+
+static enum dev_type broadwell_get_width(struct sbridge_pvt *pvt, u32 mtr)
+{
+ /* ddr3_width on the documentation but also valid for DDR4 */
+ return __ibridge_get_width(GET_BITFIELD(mtr, 8, 9));
+}
+
static u8 get_node_id(struct sbridge_pvt *pvt)
{
u32 reg;
@@ -966,17 +1016,7 @@
dimm->nr_pages = npages;
dimm->grain = 32;
- switch (banks) {
- case 16:
- dimm->dtype = DEV_X16;
- break;
- case 8:
- dimm->dtype = DEV_X8;
- break;
- case 4:
- dimm->dtype = DEV_X4;
- break;
- }
+ dimm->dtype = pvt->info.get_width(pvt, mtr);
dimm->mtype = mtype;
dimm->edac_mode = mode;
snprintf(dimm->label, sizeof(dimm->label),
@@ -1869,7 +1909,11 @@
}
break;
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0:
- pvt->pci_ddrio = pdev;
+ case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1:
+ case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2:
+ case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3:
+ if (!pvt->pci_ddrio)
+ pvt->pci_ddrio = pdev;
break;
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
pvt->pci_ha1 = pdev;
@@ -2361,6 +2405,7 @@
pvt->info.interleave_list = ibridge_interleave_list;
pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
pvt->info.interleave_pkg = ibridge_interleave_pkg;
+ pvt->info.get_width = ibridge_get_width;
mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx);
/* Store pci devices at mci for faster access */
@@ -2380,6 +2425,7 @@
pvt->info.interleave_list = sbridge_interleave_list;
pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list);
pvt->info.interleave_pkg = sbridge_interleave_pkg;
+ pvt->info.get_width = sbridge_get_width;
mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
/* Store pci devices at mci for faster access */
@@ -2399,6 +2445,7 @@
pvt->info.interleave_list = ibridge_interleave_list;
pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
pvt->info.interleave_pkg = ibridge_interleave_pkg;
+ pvt->info.get_width = ibridge_get_width;
mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell Socket#%d", mci->mc_idx);
/* Store pci devices at mci for faster access */
@@ -2418,6 +2465,7 @@
pvt->info.interleave_list = ibridge_interleave_list;
pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
pvt->info.interleave_pkg = ibridge_interleave_pkg;
+ pvt->info.get_width = broadwell_get_width;
mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell Socket#%d", mci->mc_idx);
/* Store pci devices at mci for faster access */
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 54071c1..84533e0 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -43,7 +43,7 @@
config EFI_RUNTIME_MAP
bool "Export efi runtime maps to sysfs"
- depends on X86 && EFI && KEXEC
+ depends on X86 && EFI && KEXEC_CORE
default y
help
Export efi runtime memory maps to /sys/firmware/efi/runtime-map.
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index b4fc9e4..8949b3f 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -356,7 +356,7 @@
config GPIO_RCAR
tristate "Renesas R-Car GPIO"
- depends on ARM && (ARCH_SHMOBILE || COMPILE_TEST)
+ depends on ARCH_SHMOBILE || COMPILE_TEST
select GPIOLIB_IRQCHIP
help
Say yes here to support GPIO on Renesas R-Car SoCs.
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index b752b56..8813aba 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -339,13 +339,15 @@
return 0;
}
-static void mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base)
+static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base)
{
struct irq_chip_generic *gc;
struct irq_chip_type *ct;
gc = irq_alloc_generic_chip("gpio-mxc", 1, irq_base,
port->base, handle_level_irq);
+ if (!gc)
+ return -ENOMEM;
gc->private = port;
ct = gc->chip_types;
@@ -360,6 +362,8 @@
irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK,
IRQ_NOREQUEST, 0);
+
+ return 0;
}
static void mxc_gpio_get_hw(struct platform_device *pdev)
@@ -477,12 +481,16 @@
}
/* gpio-mxc can be a generic irq chip */
- mxc_gpio_init_gc(port, irq_base);
+ err = mxc_gpio_init_gc(port, irq_base);
+ if (err < 0)
+ goto out_irqdomain_remove;
list_add_tail(&port->node, &mxc_gpio_ports);
return 0;
+out_irqdomain_remove:
+ irq_domain_remove(port->domain);
out_irqdesc_free:
irq_free_descs(irq_base, 32);
out_gpiochip_remove:
diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
index b7f383e..1387385 100644
--- a/drivers/gpio/gpio-mxs.c
+++ b/drivers/gpio/gpio-mxs.c
@@ -196,13 +196,16 @@
return 0;
}
-static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base)
+static int __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base)
{
struct irq_chip_generic *gc;
struct irq_chip_type *ct;
gc = irq_alloc_generic_chip("gpio-mxs", 1, irq_base,
port->base, handle_level_irq);
+ if (!gc)
+ return -ENOMEM;
+
gc->private = port;
ct = gc->chip_types;
@@ -216,6 +219,8 @@
irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK,
IRQ_NOREQUEST, 0);
+
+ return 0;
}
static int mxs_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
@@ -317,7 +322,9 @@
}
/* gpio-mxs can be a generic irq chip */
- mxs_gpio_init_gc(port, irq_base);
+ err = mxs_gpio_init_gc(port, irq_base);
+ if (err < 0)
+ goto out_irqdomain_remove;
/* setup one handler for each entry */
irq_set_chained_handler_and_data(port->irq, mxs_gpio_irq_handler,
@@ -343,6 +350,8 @@
out_bgpio_remove:
bgpio_remove(&port->bgc);
+out_irqdomain_remove:
+ irq_domain_remove(port->domain);
out_irqdesc_free:
irq_free_descs(irq_base, 32);
return err;
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 2ae0d47..072af52 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -1098,7 +1098,6 @@
} else {
bank->chip.label = "gpio";
bank->chip.base = gpio;
- gpio += bank->width;
}
bank->chip.ngpio = bank->width;
@@ -1108,6 +1107,9 @@
return ret;
}
+ if (!bank->is_mpuio)
+ gpio += bank->width;
+
#ifdef CONFIG_ARCH_OMAP1
/*
* REVISIT: Once we have OMAP1 supporting SPARSE_IRQ, we can drop
@@ -1253,8 +1255,11 @@
omap_gpio_mod_init(bank);
ret = omap_gpio_chip_init(bank, irqc);
- if (ret)
+ if (ret) {
+ pm_runtime_put_sync(bank->dev);
+ pm_runtime_disable(bank->dev);
return ret;
+ }
omap_gpio_show_rev(bank);
diff --git a/drivers/gpio/gpio-sx150x.c b/drivers/gpio/gpio-sx150x.c
index 458d9d7..9c6b967 100644
--- a/drivers/gpio/gpio-sx150x.c
+++ b/drivers/gpio/gpio-sx150x.c
@@ -706,4 +706,3 @@
MODULE_AUTHOR("Gregory Bean <gbean@codeaurora.org>");
MODULE_DESCRIPTION("Driver for Semtech SX150X I2C GPIO Expanders");
MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("i2c:sx150x");
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 980c1f8..5db3445 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1174,15 +1174,16 @@
* that the GPIO was actually requested.
*/
-static bool _gpiod_get_raw_value(const struct gpio_desc *desc)
+static int _gpiod_get_raw_value(const struct gpio_desc *desc)
{
struct gpio_chip *chip;
- bool value;
int offset;
+ int value;
chip = desc->chip;
offset = gpio_chip_hwgpio(desc);
- value = chip->get ? chip->get(chip, offset) : false;
+ value = chip->get ? chip->get(chip, offset) : -EIO;
+ value = value < 0 ? value : !!value;
trace_gpio_value(desc_to_gpio(desc), 1, value);
return value;
}
@@ -1192,7 +1193,7 @@
* @desc: gpio whose value will be returned
*
* Return the GPIO's raw value, i.e. the value of the physical line disregarding
- * its ACTIVE_LOW status.
+ * its ACTIVE_LOW status, or negative errno on failure.
*
* This function should be called from contexts where we cannot sleep, and will
* complain if the GPIO chip functions potentially sleep.
@@ -1212,7 +1213,7 @@
* @desc: gpio whose value will be returned
*
* Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into
- * account.
+ * account, or negative errno on failure.
*
* This function should be called from contexts where we cannot sleep, and will
* complain if the GPIO chip functions potentially sleep.
@@ -1226,6 +1227,9 @@
WARN_ON(desc->chip->can_sleep);
value = _gpiod_get_raw_value(desc);
+ if (value < 0)
+ return value;
+
if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
value = !value;
@@ -1548,7 +1552,7 @@
* @desc: gpio whose value will be returned
*
* Return the GPIO's raw value, i.e. the value of the physical line disregarding
- * its ACTIVE_LOW status.
+ * its ACTIVE_LOW status, or negative errno on failure.
*
* This function is to be called from contexts that can sleep.
*/
@@ -1566,7 +1570,7 @@
* @desc: gpio whose value will be returned
*
* Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into
- * account.
+ * account, or negative errno on failure.
*
* This function is to be called from contexts that can sleep.
*/
@@ -1579,6 +1583,9 @@
return 0;
value = _gpiod_get_raw_value(desc);
+ if (value < 0)
+ return value;
+
if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
value = !value;
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 4349154..f7d5166 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1515,7 +1515,8 @@
copied_props++;
}
- if (obj->type == DRM_MODE_OBJECT_PLANE && count_props) {
+ if (obj->type == DRM_MODE_OBJECT_PLANE && count_props &&
+ !(arg->flags & DRM_MODE_ATOMIC_TEST_ONLY)) {
plane = obj_to_plane(obj);
plane_mask |= (1 << drm_plane_index(plane));
plane->old_fb = plane->fb;
@@ -1537,10 +1538,11 @@
}
if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) {
+ /*
+ * Unlike commit, check_only does not clean up state.
+ * Below we call drm_atomic_state_free for it.
+ */
ret = drm_atomic_check_only(state);
- /* _check_only() does not free state, unlike _commit() */
- if (!ret)
- drm_atomic_state_free(state);
} else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) {
ret = drm_atomic_async_commit(state);
} else {
@@ -1567,25 +1569,30 @@
plane->old_fb = NULL;
}
+ if (ret && arg->flags & DRM_MODE_PAGE_FLIP_EVENT) {
+ /*
+ * TEST_ONLY and PAGE_FLIP_EVENT are mutually exclusive,
+ * if they weren't, this code should be called on success
+ * for TEST_ONLY too.
+ */
+
+ for_each_crtc_in_state(state, crtc, crtc_state, i) {
+ if (!crtc_state->event)
+ continue;
+
+ destroy_vblank_event(dev, file_priv,
+ crtc_state->event);
+ }
+ }
+
if (ret == -EDEADLK) {
drm_atomic_state_clear(state);
drm_modeset_backoff(&ctx);
goto retry;
}
- if (ret) {
- if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT) {
- for_each_crtc_in_state(state, crtc, crtc_state, i) {
- if (!crtc_state->event)
- continue;
-
- destroy_vblank_event(dev, file_priv,
- crtc_state->event);
- }
- }
-
+ if (ret || arg->flags & DRM_MODE_ATOMIC_TEST_ONLY)
drm_atomic_state_free(state);
- }
drm_modeset_drop_locks(&ctx);
drm_modeset_acquire_fini(&ctx);
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 80a02a4..291734e 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -159,6 +159,8 @@
}
EXPORT_SYMBOL(drm_dp_bw_code_to_link_rate);
+#define AUX_RETRY_INTERVAL 500 /* us */
+
/**
* DOC: dp helpers
*
@@ -213,7 +215,7 @@
return -EIO;
case DP_AUX_NATIVE_REPLY_DEFER:
- usleep_range(400, 500);
+ usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
break;
}
}
@@ -422,6 +424,90 @@
I2C_FUNC_10BIT_ADDR;
}
+#define AUX_PRECHARGE_LEN 10 /* 10 to 16 */
+#define AUX_SYNC_LEN (16 + 4) /* preamble + AUX_SYNC_END */
+#define AUX_STOP_LEN 4
+#define AUX_CMD_LEN 4
+#define AUX_ADDRESS_LEN 20
+#define AUX_REPLY_PAD_LEN 4
+#define AUX_LENGTH_LEN 8
+
+/*
+ * Calculate the duration of the AUX request/reply in usec. Gives the
+ * "best" case estimate, ie. successful while as short as possible.
+ */
+static int drm_dp_aux_req_duration(const struct drm_dp_aux_msg *msg)
+{
+ int len = AUX_PRECHARGE_LEN + AUX_SYNC_LEN + AUX_STOP_LEN +
+ AUX_CMD_LEN + AUX_ADDRESS_LEN + AUX_LENGTH_LEN;
+
+ if ((msg->request & DP_AUX_I2C_READ) == 0)
+ len += msg->size * 8;
+
+ return len;
+}
+
+static int drm_dp_aux_reply_duration(const struct drm_dp_aux_msg *msg)
+{
+ int len = AUX_PRECHARGE_LEN + AUX_SYNC_LEN + AUX_STOP_LEN +
+ AUX_CMD_LEN + AUX_REPLY_PAD_LEN;
+
+ /*
+ * For read we expect what was asked. For writes there will
+ * be 0 or 1 data bytes. Assume 0 for the "best" case.
+ */
+ if (msg->request & DP_AUX_I2C_READ)
+ len += msg->size * 8;
+
+ return len;
+}
+
+#define I2C_START_LEN 1
+#define I2C_STOP_LEN 1
+#define I2C_ADDR_LEN 9 /* ADDRESS + R/W + ACK/NACK */
+#define I2C_DATA_LEN 9 /* DATA + ACK/NACK */
+
+/*
+ * Calculate the length of the i2c transfer in usec, assuming
+ * the i2c bus speed is as specified. Gives the the "worst"
+ * case estimate, ie. successful while as long as possible.
+ * Doesn't account the the "MOT" bit, and instead assumes each
+ * message includes a START, ADDRESS and STOP. Neither does it
+ * account for additional random variables such as clock stretching.
+ */
+static int drm_dp_i2c_msg_duration(const struct drm_dp_aux_msg *msg,
+ int i2c_speed_khz)
+{
+ /* AUX bitrate is 1MHz, i2c bitrate as specified */
+ return DIV_ROUND_UP((I2C_START_LEN + I2C_ADDR_LEN +
+ msg->size * I2C_DATA_LEN +
+ I2C_STOP_LEN) * 1000, i2c_speed_khz);
+}
+
+/*
+ * Deterine how many retries should be attempted to successfully transfer
+ * the specified message, based on the estimated durations of the
+ * i2c and AUX transfers.
+ */
+static int drm_dp_i2c_retry_count(const struct drm_dp_aux_msg *msg,
+ int i2c_speed_khz)
+{
+ int aux_time_us = drm_dp_aux_req_duration(msg) +
+ drm_dp_aux_reply_duration(msg);
+ int i2c_time_us = drm_dp_i2c_msg_duration(msg, i2c_speed_khz);
+
+ return DIV_ROUND_UP(i2c_time_us, aux_time_us + AUX_RETRY_INTERVAL);
+}
+
+/*
+ * FIXME currently assumes 10 kHz as some real world devices seem
+ * to require it. We should query/set the speed via DPCD if supported.
+ */
+static int dp_aux_i2c_speed_khz __read_mostly = 10;
+module_param_unsafe(dp_aux_i2c_speed_khz, int, 0644);
+MODULE_PARM_DESC(dp_aux_i2c_speed_khz,
+ "Assumed speed of the i2c bus in kHz, (1-400, default 10)");
+
/*
* Transfer a single I2C-over-AUX message and handle various error conditions,
* retrying the transaction as appropriate. It is assumed that the
@@ -434,13 +520,16 @@
{
unsigned int retry, defer_i2c;
int ret;
-
/*
* DP1.2 sections 2.7.7.1.5.6.1 and 2.7.7.1.6.6.1: A DP Source device
* is required to retry at least seven times upon receiving AUX_DEFER
* before giving up the AUX transaction.
+ *
+ * We also try to account for the i2c bus speed.
*/
- for (retry = 0, defer_i2c = 0; retry < (7 + defer_i2c); retry++) {
+ int max_retries = max(7, drm_dp_i2c_retry_count(msg, dp_aux_i2c_speed_khz));
+
+ for (retry = 0, defer_i2c = 0; retry < (max_retries + defer_i2c); retry++) {
mutex_lock(&aux->hw_mutex);
ret = aux->transfer(aux, msg);
mutex_unlock(&aux->hw_mutex);
@@ -476,7 +565,7 @@
* For now just defer for long enough to hopefully be
* safe for all use-cases.
*/
- usleep_range(500, 600);
+ usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
continue;
default:
@@ -506,7 +595,7 @@
aux->i2c_defer_count++;
if (defer_i2c < 7)
defer_i2c++;
- usleep_range(400, 500);
+ usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
continue;
default:
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index df0b61a..bd1a415 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -77,6 +77,7 @@
config DRM_EXYNOS_G2D
bool "Exynos DRM G2D"
depends on DRM_EXYNOS && !VIDEO_SAMSUNG_S5P_G2D
+ select FRAME_VECTOR
help
Choose this option if you want to use Exynos G2D for DRM.
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 535b4ad..3734c34 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -194,10 +194,8 @@
dma_addr_t dma_addr;
unsigned long userptr;
unsigned long size;
- struct page **pages;
- unsigned int npages;
+ struct frame_vector *vec;
struct sg_table *sgt;
- struct vm_area_struct *vma;
atomic_t refcount;
bool in_pool;
bool out_of_list;
@@ -367,6 +365,7 @@
{
struct g2d_cmdlist_userptr *g2d_userptr =
(struct g2d_cmdlist_userptr *)obj;
+ struct page **pages;
if (!obj)
return;
@@ -386,19 +385,21 @@
exynos_gem_unmap_sgt_from_dma(drm_dev, g2d_userptr->sgt,
DMA_BIDIRECTIONAL);
- exynos_gem_put_pages_to_userptr(g2d_userptr->pages,
- g2d_userptr->npages,
- g2d_userptr->vma);
+ pages = frame_vector_pages(g2d_userptr->vec);
+ if (!IS_ERR(pages)) {
+ int i;
- exynos_gem_put_vma(g2d_userptr->vma);
+ for (i = 0; i < frame_vector_count(g2d_userptr->vec); i++)
+ set_page_dirty_lock(pages[i]);
+ }
+ put_vaddr_frames(g2d_userptr->vec);
+ frame_vector_destroy(g2d_userptr->vec);
if (!g2d_userptr->out_of_list)
list_del_init(&g2d_userptr->list);
sg_free_table(g2d_userptr->sgt);
kfree(g2d_userptr->sgt);
-
- drm_free_large(g2d_userptr->pages);
kfree(g2d_userptr);
}
@@ -412,9 +413,7 @@
struct exynos_drm_g2d_private *g2d_priv = file_priv->g2d_priv;
struct g2d_cmdlist_userptr *g2d_userptr;
struct g2d_data *g2d;
- struct page **pages;
struct sg_table *sgt;
- struct vm_area_struct *vma;
unsigned long start, end;
unsigned int npages, offset;
int ret;
@@ -460,65 +459,40 @@
return ERR_PTR(-ENOMEM);
atomic_set(&g2d_userptr->refcount, 1);
+ g2d_userptr->size = size;
start = userptr & PAGE_MASK;
offset = userptr & ~PAGE_MASK;
end = PAGE_ALIGN(userptr + size);
npages = (end - start) >> PAGE_SHIFT;
- g2d_userptr->npages = npages;
-
- pages = drm_calloc_large(npages, sizeof(struct page *));
- if (!pages) {
- DRM_ERROR("failed to allocate pages.\n");
+ g2d_userptr->vec = frame_vector_create(npages);
+ if (!g2d_userptr->vec) {
ret = -ENOMEM;
goto err_free;
}
- down_read(¤t->mm->mmap_sem);
- vma = find_vma(current->mm, userptr);
- if (!vma) {
- up_read(¤t->mm->mmap_sem);
- DRM_ERROR("failed to get vm region.\n");
- ret = -EFAULT;
- goto err_free_pages;
- }
-
- if (vma->vm_end < userptr + size) {
- up_read(¤t->mm->mmap_sem);
- DRM_ERROR("vma is too small.\n");
- ret = -EFAULT;
- goto err_free_pages;
- }
-
- g2d_userptr->vma = exynos_gem_get_vma(vma);
- if (!g2d_userptr->vma) {
- up_read(¤t->mm->mmap_sem);
- DRM_ERROR("failed to copy vma.\n");
- ret = -ENOMEM;
- goto err_free_pages;
- }
-
- g2d_userptr->size = size;
-
- ret = exynos_gem_get_pages_from_userptr(start & PAGE_MASK,
- npages, pages, vma);
- if (ret < 0) {
- up_read(¤t->mm->mmap_sem);
+ ret = get_vaddr_frames(start, npages, true, true, g2d_userptr->vec);
+ if (ret != npages) {
DRM_ERROR("failed to get user pages from userptr.\n");
- goto err_put_vma;
+ if (ret < 0)
+ goto err_destroy_framevec;
+ ret = -EFAULT;
+ goto err_put_framevec;
}
-
- up_read(¤t->mm->mmap_sem);
- g2d_userptr->pages = pages;
+ if (frame_vector_to_pages(g2d_userptr->vec) < 0) {
+ ret = -EFAULT;
+ goto err_put_framevec;
+ }
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
if (!sgt) {
ret = -ENOMEM;
- goto err_free_userptr;
+ goto err_put_framevec;
}
- ret = sg_alloc_table_from_pages(sgt, pages, npages, offset,
- size, GFP_KERNEL);
+ ret = sg_alloc_table_from_pages(sgt,
+ frame_vector_pages(g2d_userptr->vec),
+ npages, offset, size, GFP_KERNEL);
if (ret < 0) {
DRM_ERROR("failed to get sgt from pages.\n");
goto err_free_sgt;
@@ -553,16 +527,11 @@
err_free_sgt:
kfree(sgt);
-err_free_userptr:
- exynos_gem_put_pages_to_userptr(g2d_userptr->pages,
- g2d_userptr->npages,
- g2d_userptr->vma);
+err_put_framevec:
+ put_vaddr_frames(g2d_userptr->vec);
-err_put_vma:
- exynos_gem_put_vma(g2d_userptr->vma);
-
-err_free_pages:
- drm_free_large(pages);
+err_destroy_framevec:
+ frame_vector_destroy(g2d_userptr->vec);
err_free:
kfree(g2d_userptr);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 62b9ea1..f12fbc3 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -366,103 +366,6 @@
return 0;
}
-struct vm_area_struct *exynos_gem_get_vma(struct vm_area_struct *vma)
-{
- struct vm_area_struct *vma_copy;
-
- vma_copy = kmalloc(sizeof(*vma_copy), GFP_KERNEL);
- if (!vma_copy)
- return NULL;
-
- if (vma->vm_ops && vma->vm_ops->open)
- vma->vm_ops->open(vma);
-
- if (vma->vm_file)
- get_file(vma->vm_file);
-
- memcpy(vma_copy, vma, sizeof(*vma));
-
- vma_copy->vm_mm = NULL;
- vma_copy->vm_next = NULL;
- vma_copy->vm_prev = NULL;
-
- return vma_copy;
-}
-
-void exynos_gem_put_vma(struct vm_area_struct *vma)
-{
- if (!vma)
- return;
-
- if (vma->vm_ops && vma->vm_ops->close)
- vma->vm_ops->close(vma);
-
- if (vma->vm_file)
- fput(vma->vm_file);
-
- kfree(vma);
-}
-
-int exynos_gem_get_pages_from_userptr(unsigned long start,
- unsigned int npages,
- struct page **pages,
- struct vm_area_struct *vma)
-{
- int get_npages;
-
- /* the memory region mmaped with VM_PFNMAP. */
- if (vma_is_io(vma)) {
- unsigned int i;
-
- for (i = 0; i < npages; ++i, start += PAGE_SIZE) {
- unsigned long pfn;
- int ret = follow_pfn(vma, start, &pfn);
- if (ret)
- return ret;
-
- pages[i] = pfn_to_page(pfn);
- }
-
- if (i != npages) {
- DRM_ERROR("failed to get user_pages.\n");
- return -EINVAL;
- }
-
- return 0;
- }
-
- get_npages = get_user_pages(current, current->mm, start,
- npages, 1, 1, pages, NULL);
- get_npages = max(get_npages, 0);
- if (get_npages != npages) {
- DRM_ERROR("failed to get user_pages.\n");
- while (get_npages)
- put_page(pages[--get_npages]);
- return -EFAULT;
- }
-
- return 0;
-}
-
-void exynos_gem_put_pages_to_userptr(struct page **pages,
- unsigned int npages,
- struct vm_area_struct *vma)
-{
- if (!vma_is_io(vma)) {
- unsigned int i;
-
- for (i = 0; i < npages; i++) {
- set_page_dirty_lock(pages[i]);
-
- /*
- * undo the reference we took when populating
- * the table.
- */
- put_page(pages[i]);
- }
- }
-}
-
int exynos_gem_map_sgt_with_dma(struct drm_device *drm_dev,
struct sg_table *sgt,
enum dma_data_direction dir)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 81adf89..e1db8de 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1929,6 +1929,8 @@
struct skl_wm_values skl_hw;
struct vlv_wm_values vlv;
};
+
+ uint8_t max_level;
} wm;
struct i915_runtime_pm pm;
@@ -3384,13 +3386,13 @@
#define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true)
#define I915_READ64_2x32(lower_reg, upper_reg) ({ \
- u32 upper, lower, tmp; \
- tmp = I915_READ(upper_reg); \
+ u32 upper, lower, old_upper, loop = 0; \
+ upper = I915_READ(upper_reg); \
do { \
- upper = tmp; \
+ old_upper = upper; \
lower = I915_READ(lower_reg); \
- tmp = I915_READ(upper_reg); \
- } while (upper != tmp); \
+ upper = I915_READ(upper_reg); \
+ } while (upper != old_upper && loop++ < 2); \
(u64)upper << 32 | lower; })
#define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 923a3c4..a953d49 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1032,6 +1032,7 @@
u32 old_read = obj->base.read_domains;
u32 old_write = obj->base.write_domain;
+ obj->dirty = 1; /* be paranoid */
obj->base.write_domain = obj->base.pending_write_domain;
if (obj->base.write_domain == 0)
obj->base.pending_read_domains |= obj->base.read_domains;
@@ -1039,7 +1040,6 @@
i915_vma_move_to_active(vma, req);
if (obj->base.write_domain) {
- obj->dirty = 1;
i915_gem_request_assign(&obj->last_write_req, req);
intel_fb_obj_invalidate(obj, ORIGIN_CS);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b5fb143..5a244ab 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1558,7 +1558,7 @@
u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
- hotplug_trigger, hpd_status_g4x,
+ hotplug_trigger, hpd_status_i915,
i9xx_port_hotplug_long_detect);
intel_hpd_irq_handler(dev, pin_mask, long_mask);
}
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index ba1ae03..d0f1b8d 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -350,7 +350,7 @@
}
csr->mmio_count = dmc_header->mmio_count;
for (i = 0; i < dmc_header->mmio_count; i++) {
- if (dmc_header->mmioaddr[i] < CSR_MMIO_START_RANGE &&
+ if (dmc_header->mmioaddr[i] < CSR_MMIO_START_RANGE ||
dmc_header->mmioaddr[i] > CSR_MMIO_END_RANGE) {
DRM_ERROR(" Firmware has wrong mmio address 0x%x\n",
dmc_header->mmioaddr[i]);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ca9278b..8cc9264 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6305,7 +6305,7 @@
connector->base.name);
if (connector->get_hw_state(connector)) {
- struct drm_encoder *encoder = &connector->encoder->base;
+ struct intel_encoder *encoder = connector->encoder;
struct drm_connector_state *conn_state = connector->base.state;
I915_STATE_WARN(!crtc,
@@ -6317,13 +6317,13 @@
I915_STATE_WARN(!crtc->state->active,
"connector is active, but attached crtc isn't\n");
- if (!encoder)
+ if (!encoder || encoder->type == INTEL_OUTPUT_DP_MST)
return;
- I915_STATE_WARN(conn_state->best_encoder != encoder,
+ I915_STATE_WARN(conn_state->best_encoder != &encoder->base,
"atomic encoder doesn't match attached encoder\n");
- I915_STATE_WARN(conn_state->crtc != encoder->crtc,
+ I915_STATE_WARN(conn_state->crtc != encoder->base.crtc,
"attached encoder crtc differs from connector crtc\n");
} else {
I915_STATE_WARN(crtc && crtc->state->active,
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 983553c..3e4be5a 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -173,6 +173,11 @@
return;
}
+ /* MST encoders are bound to a crtc, not to a connector,
+ * force the mapping here for get_hw_state.
+ */
+ found->encoder = encoder;
+
DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links);
intel_mst->port = found->port;
@@ -400,7 +405,7 @@
static bool intel_dp_mst_get_hw_state(struct intel_connector *connector)
{
- if (connector->encoder) {
+ if (connector->encoder && connector->base.state->crtc) {
enum pipe pipe;
if (!connector->encoder->get_hw_state(connector->encoder, &pipe))
return false;
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 4a601cf..32a6c71 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -1048,11 +1048,7 @@
intel_connector->unregister = intel_connector_unregister;
/* Pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI port C */
- if (dev_priv->vbt.dsi.config->dual_link) {
- /* XXX: does dual link work on either pipe? */
- intel_encoder->crtc_mask = (1 << PIPE_A);
- intel_dsi->ports = ((1 << PORT_A) | (1 << PORT_C));
- } else if (dev_priv->vbt.dsi.port == DVO_PORT_MIPIA) {
+ if (dev_priv->vbt.dsi.port == DVO_PORT_MIPIA) {
intel_encoder->crtc_mask = (1 << PIPE_A);
intel_dsi->ports = (1 << PORT_A);
} else if (dev_priv->vbt.dsi.port == DVO_PORT_MIPIC) {
@@ -1060,6 +1056,9 @@
intel_dsi->ports = (1 << PORT_C);
}
+ if (dev_priv->vbt.dsi.config->dual_link)
+ intel_dsi->ports = ((1 << PORT_A) | (1 << PORT_C));
+
/* Create a DSI host (and a device) for each port. */
for_each_dsi_port(port, intel_dsi->ports) {
struct intel_dsi_host *host;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fff0c22..ddbb7ed 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -955,8 +955,6 @@
VLV_WM_LEVEL_PM2,
VLV_WM_LEVEL_PM5,
VLV_WM_LEVEL_DDR_DVFS,
- CHV_WM_NUM_LEVELS,
- VLV_WM_NUM_LEVELS = 1,
};
/* latency must be in 0.1us units. */
@@ -982,9 +980,13 @@
/* all latencies in usec */
dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
+ dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
+
if (IS_CHERRYVIEW(dev_priv)) {
dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
+
+ dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
}
}
@@ -1137,10 +1139,7 @@
memset(wm_state, 0, sizeof(*wm_state));
wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed;
- if (IS_CHERRYVIEW(dev))
- wm_state->num_levels = CHV_WM_NUM_LEVELS;
- else
- wm_state->num_levels = VLV_WM_NUM_LEVELS;
+ wm_state->num_levels = to_i915(dev)->wm.max_level + 1;
wm_state->num_active_planes = 0;
@@ -1220,7 +1219,7 @@
}
/* clear any (partially) filled invalid levels */
- for (level = wm_state->num_levels; level < CHV_WM_NUM_LEVELS; level++) {
+ for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) {
memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
}
@@ -1324,10 +1323,7 @@
struct intel_crtc *crtc;
int num_active_crtcs = 0;
- if (IS_CHERRYVIEW(dev))
- wm->level = VLV_WM_LEVEL_DDR_DVFS;
- else
- wm->level = VLV_WM_LEVEL_PM2;
+ wm->level = to_i915(dev)->wm.max_level;
wm->cxsr = true;
for_each_intel_crtc(dev, crtc) {
@@ -4083,9 +4079,29 @@
if (val & DSP_MAXFIFO_PM5_ENABLE)
wm->level = VLV_WM_LEVEL_PM5;
+ /*
+ * If DDR DVFS is disabled in the BIOS, Punit
+ * will never ack the request. So if that happens
+ * assume we don't have to enable/disable DDR DVFS
+ * dynamically. To test that just set the REQ_ACK
+ * bit to poke the Punit, but don't change the
+ * HIGH/LOW bits so that we don't actually change
+ * the current state.
+ */
val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
- if ((val & FORCE_DDR_HIGH_FREQ) == 0)
- wm->level = VLV_WM_LEVEL_DDR_DVFS;
+ val |= FORCE_DDR_FREQ_REQ_ACK;
+ vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
+
+ if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
+ FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
+ DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
+ "assuming DDR DVFS is disabled\n");
+ dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
+ } else {
+ val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
+ if ((val & FORCE_DDR_HIGH_FREQ) == 0)
+ wm->level = VLV_WM_LEVEL_DDR_DVFS;
+ }
mutex_unlock(&dev_priv->rps.hw_lock);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index 9dd1cac..e8eb14e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -689,6 +689,7 @@
static const struct nvkm_device_pci_vendor
nvkm_device_pci_10de_11fc[] = {
+ { 0x1179, 0x0001, NULL, { .War00C800_0 = true } }, /* Toshiba Tecra W50 */
{ 0x17aa, 0x2211, NULL, { .War00C800_0 = true } }, /* Lenovo W541 */
{ 0x17aa, 0x221e, NULL, { .War00C800_0 = true } }, /* Lenovo W541 */
{}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
index 426ba00..85c5b7f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
@@ -1048,11 +1048,11 @@
if (ret == 0) {
nvkm_kmap(*pgpuobj);
nvkm_wo32(*pgpuobj, 0x00, object->oclass);
+#ifdef __BIG_ENDIAN
+ nvkm_mo32(*pgpuobj, 0x00, 0x00080000, 0x00080000);
+#endif
nvkm_wo32(*pgpuobj, 0x04, 0x00000000);
nvkm_wo32(*pgpuobj, 0x08, 0x00000000);
-#ifdef __BIG_ENDIAN
- nvkm_mo32(*pgpuobj, 0x08, 0x00080000, 0x00080000);
-#endif
nvkm_wo32(*pgpuobj, 0x0c, 0x00000000);
nvkm_done(*pgpuobj);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c
index 07feae6..c233e3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c
@@ -326,7 +326,7 @@
return -EIO;
if (nvkm_msec(device, 2000,
- u32 tmp = nvkm_rd32(device, 0x002504) & 0x0000003f;
+ u32 tmp = nvkm_rd32(device, 0x00251c) & 0x0000003f;
if (tmp == 0x0000003f)
break;
) < 0)
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index a8dbb3e..7c6225c 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -160,9 +160,35 @@
*pwidth = head->width;
*pheight = head->height;
drm_mode_probed_add(connector, mode);
+ /* remember the last custom size for mode validation */
+ qdev->monitors_config_width = mode->hdisplay;
+ qdev->monitors_config_height = mode->vdisplay;
return 1;
}
+static struct mode_size {
+ int w;
+ int h;
+} common_modes[] = {
+ { 640, 480},
+ { 720, 480},
+ { 800, 600},
+ { 848, 480},
+ {1024, 768},
+ {1152, 768},
+ {1280, 720},
+ {1280, 800},
+ {1280, 854},
+ {1280, 960},
+ {1280, 1024},
+ {1440, 900},
+ {1400, 1050},
+ {1680, 1050},
+ {1600, 1200},
+ {1920, 1080},
+ {1920, 1200}
+};
+
static int qxl_add_common_modes(struct drm_connector *connector,
unsigned pwidth,
unsigned pheight)
@@ -170,29 +196,6 @@
struct drm_device *dev = connector->dev;
struct drm_display_mode *mode = NULL;
int i;
- struct mode_size {
- int w;
- int h;
- } common_modes[] = {
- { 640, 480},
- { 720, 480},
- { 800, 600},
- { 848, 480},
- {1024, 768},
- {1152, 768},
- {1280, 720},
- {1280, 800},
- {1280, 854},
- {1280, 960},
- {1280, 1024},
- {1440, 900},
- {1400, 1050},
- {1680, 1050},
- {1600, 1200},
- {1920, 1080},
- {1920, 1200}
- };
-
for (i = 0; i < ARRAY_SIZE(common_modes); i++) {
mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h,
60, false, false, false);
@@ -823,11 +826,22 @@
static int qxl_conn_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
+ struct drm_device *ddev = connector->dev;
+ struct qxl_device *qdev = ddev->dev_private;
+ int i;
+
/* TODO: is this called for user defined modes? (xrandr --add-mode)
* TODO: check that the mode fits in the framebuffer */
- DRM_DEBUG("%s: %dx%d status=%d\n", mode->name, mode->hdisplay,
- mode->vdisplay, mode->status);
- return MODE_OK;
+
+ if(qdev->monitors_config_width == mode->hdisplay &&
+ qdev->monitors_config_height == mode->vdisplay)
+ return MODE_OK;
+
+ for (i = 0; i < ARRAY_SIZE(common_modes); i++) {
+ if (common_modes[i].w == mode->hdisplay && common_modes[i].h == mode->vdisplay)
+ return MODE_OK;
+ }
+ return MODE_BAD;
}
static struct drm_encoder *qxl_best_encoder(struct drm_connector *connector)
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index d854969..01a8694 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -325,6 +325,8 @@
struct work_struct fb_work;
struct drm_property *hotplug_mode_update_property;
+ int monitors_config_width;
+ int monitors_config_height;
};
/* forward declaration for QXL_INFO_IO */
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 6394547..860062e 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -125,7 +125,7 @@
}
}
-static struct vm_operations_struct vgem_gem_vm_ops = {
+static const struct vm_operations_struct vgem_gem_vm_ops = {
.fault = vgem_gem_fault,
.open = drm_gem_vm_open,
.close = drm_gem_vm_close,
diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
index d04643f..95638df 100644
--- a/drivers/hsi/clients/cmt_speech.c
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -1110,7 +1110,7 @@
return 0;
}
-static struct vm_operations_struct cs_char_vm_ops = {
+static const struct vm_operations_struct cs_char_vm_ops = {
.fault = cs_char_vma_fault,
};
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 500b262..e13c902 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1140,8 +1140,8 @@
help
If you say yes here you get support for the hardware monitoring
functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D,
- NCT6791D, NCT6792D and compatible Super-I/O chips. This driver
- replaces the w83627ehf driver for NCT6775F and NCT6776F.
+ NCT6791D, NCT6792D, NCT6793D, and compatible Super-I/O chips. This
+ driver replaces the w83627ehf driver for NCT6775F and NCT6776F.
This driver can also be built as a module. If so, the module
will be called nct6775.
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index fe41d5a..e4e57bb 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -104,7 +104,7 @@
/* sysfs attributes for hwmon */
-static int lm75_read_temp(void *dev, long *temp)
+static int lm75_read_temp(void *dev, int *temp)
{
struct lm75_data *data = lm75_update_device(dev);
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index bd1c99d..8b4fa55 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -39,6 +39,7 @@
* nct6779d 15 5 5 2+6 0xc560 0xc1 0x5ca3
* nct6791d 15 6 6 2+6 0xc800 0xc1 0x5ca3
* nct6792d 15 6 6 2+6 0xc910 0xc1 0x5ca3
+ * nct6793d 15 6 6 2+6 0xd120 0xc1 0x5ca3
*
* #temp lists the number of monitored temperature sources (first value) plus
* the number of directly connectable temperature sensors (second value).
@@ -63,7 +64,7 @@
#define USE_ALTERNATE
-enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792 };
+enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793 };
/* used to set data->name = nct6775_device_names[data->sio_kind] */
static const char * const nct6775_device_names[] = {
@@ -73,6 +74,17 @@
"nct6779",
"nct6791",
"nct6792",
+ "nct6793",
+};
+
+static const char * const nct6775_sio_names[] __initconst = {
+ "NCT6106D",
+ "NCT6775F",
+ "NCT6776D/F",
+ "NCT6779D",
+ "NCT6791D",
+ "NCT6792D",
+ "NCT6793D",
};
static unsigned short force_id;
@@ -104,6 +116,7 @@
#define SIO_NCT6779_ID 0xc560
#define SIO_NCT6791_ID 0xc800
#define SIO_NCT6792_ID 0xc910
+#define SIO_NCT6793_ID 0xd120
#define SIO_ID_MASK 0xFFF0
enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 };
@@ -354,6 +367,10 @@
/* NCT6776 specific data */
+/* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */
+#define NCT6776_REG_FAN_STEP_UP_TIME NCT6775_REG_FAN_STEP_DOWN_TIME
+#define NCT6776_REG_FAN_STEP_DOWN_TIME NCT6775_REG_FAN_STEP_UP_TIME
+
static const s8 NCT6776_ALARM_BITS[] = {
0, 1, 2, 3, 8, 21, 20, 16, /* in0.. in7 */
17, -1, -1, -1, -1, -1, -1, /* in8..in14 */
@@ -533,7 +550,7 @@
4, 5, 13, -1, -1, -1, /* temp1..temp6 */
12, 9 }; /* intrusion0, intrusion1 */
-/* NCT6792 specific data */
+/* NCT6792/NCT6793 specific data */
static const u16 NCT6792_REG_TEMP_MON[] = {
0x73, 0x75, 0x77, 0x79, 0x7b, 0x7d };
@@ -1056,6 +1073,7 @@
case nct6779:
case nct6791:
case nct6792:
+ case nct6793:
return reg == 0x150 || reg == 0x153 || reg == 0x155 ||
((reg & 0xfff0) == 0x4b0 && (reg & 0x000f) < 0x0b) ||
reg == 0x402 ||
@@ -1407,6 +1425,7 @@
case nct6779:
case nct6791:
case nct6792:
+ case nct6793:
reg = nct6775_read_value(data,
data->REG_CRITICAL_PWM_ENABLE[i]);
if (reg & data->CRITICAL_PWM_ENABLE_MASK)
@@ -2822,6 +2841,7 @@
case nct6779:
case nct6791:
case nct6792:
+ case nct6793:
nct6775_write_value(data, data->REG_CRITICAL_PWM[nr],
val);
reg = nct6775_read_value(data,
@@ -3256,7 +3276,7 @@
pwm4pin = false;
pwm5pin = false;
pwm6pin = false;
- } else { /* NCT6779D, NCT6791D, or NCT6792D */
+ } else { /* NCT6779D, NCT6791D, NCT6792D, or NCT6793D */
regval = superio_inb(sioreg, 0x1c);
fan3pin = !(regval & (1 << 5));
@@ -3269,7 +3289,8 @@
fan4min = fan4pin;
- if (data->kind == nct6791 || data->kind == nct6792) {
+ if (data->kind == nct6791 || data->kind == nct6792 ||
+ data->kind == nct6793) {
regval = superio_inb(sioreg, 0x2d);
fan6pin = (regval & (1 << 1));
pwm6pin = (regval & (1 << 0));
@@ -3528,8 +3549,8 @@
data->REG_FAN_PULSES = NCT6776_REG_FAN_PULSES;
data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT;
data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME;
- data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME;
- data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME;
+ data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME;
+ data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME;
data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H;
data->REG_PWM[0] = NCT6775_REG_PWM;
data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT;
@@ -3600,8 +3621,8 @@
data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES;
data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT;
data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME;
- data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME;
- data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME;
+ data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME;
+ data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME;
data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H;
data->REG_PWM[0] = NCT6775_REG_PWM;
data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT;
@@ -3643,6 +3664,7 @@
break;
case nct6791:
case nct6792:
+ case nct6793:
data->in_num = 15;
data->pwm_num = 6;
data->auto_pwm_num = 4;
@@ -3677,8 +3699,8 @@
data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES;
data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT;
data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME;
- data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME;
- data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME;
+ data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME;
+ data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME;
data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H;
data->REG_PWM[0] = NCT6775_REG_PWM;
data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT;
@@ -3918,6 +3940,7 @@
case nct6779:
case nct6791:
case nct6792:
+ case nct6793:
break;
}
@@ -3950,6 +3973,7 @@
break;
case nct6791:
case nct6792:
+ case nct6793:
tmp |= 0x7e;
break;
}
@@ -4047,7 +4071,8 @@
if (reg != data->sio_reg_enable)
superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable);
- if (data->kind == nct6791 || data->kind == nct6792)
+ if (data->kind == nct6791 || data->kind == nct6792 ||
+ data->kind == nct6793)
nct6791_enable_io_mapping(sioreg);
superio_exit(sioreg);
@@ -4106,15 +4131,6 @@
.probe = nct6775_probe,
};
-static const char * const nct6775_sio_names[] __initconst = {
- "NCT6106D",
- "NCT6775F",
- "NCT6776D/F",
- "NCT6779D",
- "NCT6791D",
- "NCT6792D",
-};
-
/* nct6775_find() looks for a '627 in the Super-I/O config space */
static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
{
@@ -4150,6 +4166,9 @@
case SIO_NCT6792_ID:
sio_data->kind = nct6792;
break;
+ case SIO_NCT6793_ID:
+ sio_data->kind = nct6793;
+ break;
default:
if (val != 0xffff)
pr_debug("unsupported chip ID: 0x%04x\n", val);
@@ -4175,7 +4194,8 @@
superio_outb(sioaddr, SIO_REG_ENABLE, val | 0x01);
}
- if (sio_data->kind == nct6791 || sio_data->kind == nct6792)
+ if (sio_data->kind == nct6791 || sio_data->kind == nct6792 ||
+ sio_data->kind == nct6793)
nct6791_enable_io_mapping(sioaddr);
superio_exit(sioaddr);
@@ -4285,7 +4305,7 @@
}
MODULE_AUTHOR("Guenter Roeck <linux@roeck-us.net>");
-MODULE_DESCRIPTION("NCT6106D/NCT6775F/NCT6776F/NCT6779D/NCT6791D/NCT6792D driver");
+MODULE_DESCRIPTION("Driver for NCT6775F and compatible chips");
MODULE_LICENSE("GPL");
module_init(sensors_nct6775_init);
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index dc0b76c..feed306 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -477,7 +477,7 @@
return -EINVAL;
}
-static int ntc_read_temp(void *dev, long *temp)
+static int ntc_read_temp(void *dev, int *temp)
{
struct ntc_data *data = dev_get_drvdata(dev);
int ohm;
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index 9da2735..6548262 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -98,7 +98,7 @@
return tmp102;
}
-static int tmp102_read_temp(void *dev, long *temp)
+static int tmp102_read_temp(void *dev, int *temp)
{
struct tmp102 *tmp102 = tmp102_update_device(dev);
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index da4c697..aa26f3c 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -56,7 +56,6 @@
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
-source "drivers/infiniband/hw/ehca/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index 1bdb999..aded2a5 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -1,6 +1,5 @@
obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
obj-$(CONFIG_INFINIBAND_QIB) += qib/
-obj-$(CONFIG_INFINIBAND_EHCA) += ehca/
obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 7258818..e449e39 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -908,7 +908,7 @@
return 0;
}
-static struct vm_operations_struct qib_file_vm_ops = {
+static const struct vm_operations_struct qib_file_vm_ops = {
.fault = qib_file_vma_fault,
};
diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c
index 146cf29..34927b7 100644
--- a/drivers/infiniband/hw/qib/qib_mmap.c
+++ b/drivers/infiniband/hw/qib/qib_mmap.c
@@ -75,7 +75,7 @@
kref_put(&ip->ref, qib_release_mmap_info);
}
-static struct vm_operations_struct qib_vm_ops = {
+static const struct vm_operations_struct qib_vm_ops = {
.open = qib_vma_open,
.close = qib_vma_close,
};
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index dc439a4..403bd29 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -3095,7 +3095,7 @@
static int
isert_setup_np(struct iscsi_np *np,
- struct __kernel_sockaddr_storage *ksockaddr)
+ struct sockaddr_storage *ksockaddr)
{
struct isert_np *isert_np;
struct rdma_cm_id *isert_lid;
@@ -3117,7 +3117,7 @@
* in iscsi_target_configfs.c code..
*/
memcpy(&np->np_sockaddr, ksockaddr,
- sizeof(struct __kernel_sockaddr_storage));
+ sizeof(struct sockaddr_storage));
isert_lid = isert_setup_id(isert_np);
if (IS_ERR(isert_lid)) {
@@ -3199,32 +3199,11 @@
{
struct rdma_cm_id *cm_id = isert_conn->cm_id;
struct rdma_route *cm_route = &cm_id->route;
- struct sockaddr_in *sock_in;
- struct sockaddr_in6 *sock_in6;
conn->login_family = np->np_sockaddr.ss_family;
- if (np->np_sockaddr.ss_family == AF_INET6) {
- sock_in6 = (struct sockaddr_in6 *)&cm_route->addr.dst_addr;
- snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c",
- &sock_in6->sin6_addr.in6_u);
- conn->login_port = ntohs(sock_in6->sin6_port);
-
- sock_in6 = (struct sockaddr_in6 *)&cm_route->addr.src_addr;
- snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI6c",
- &sock_in6->sin6_addr.in6_u);
- conn->local_port = ntohs(sock_in6->sin6_port);
- } else {
- sock_in = (struct sockaddr_in *)&cm_route->addr.dst_addr;
- sprintf(conn->login_ip, "%pI4",
- &sock_in->sin_addr.s_addr);
- conn->login_port = ntohs(sock_in->sin_port);
-
- sock_in = (struct sockaddr_in *)&cm_route->addr.src_addr;
- sprintf(conn->local_ip, "%pI4",
- &sock_in->sin_addr.s_addr);
- conn->local_port = ntohs(sock_in->sin_port);
- }
+ conn->login_sockaddr = cm_route->addr.dst_addr;
+ conn->local_sockaddr = cm_route->addr.src_addr;
}
static int
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 9d35499..08d4964 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -290,19 +290,14 @@
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
- int retval;
- retval = mutex_lock_interruptible(&evdev->mutex);
- if (retval)
- return retval;
+ mutex_lock(&evdev->mutex);
- if (!evdev->exist || client->revoked)
- retval = -ENODEV;
- else
- retval = input_flush_device(&evdev->handle, file);
+ if (evdev->exist && !client->revoked)
+ input_flush_device(&evdev->handle, file);
mutex_unlock(&evdev->mutex);
- return retval;
+ return 0;
}
static void evdev_free(struct device *dev)
diff --git a/drivers/input/keyboard/imx_keypad.c b/drivers/input/keyboard/imx_keypad.c
index d2ea863..2165f3d 100644
--- a/drivers/input/keyboard/imx_keypad.c
+++ b/drivers/input/keyboard/imx_keypad.c
@@ -5,8 +5,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * <<Power management needs to be implemented>>.
*/
#include <linux/clk.h>
diff --git a/drivers/input/misc/ab8500-ponkey.c b/drivers/input/misc/ab8500-ponkey.c
index 1f7e15c..4f5ef5b 100644
--- a/drivers/input/misc/ab8500-ponkey.c
+++ b/drivers/input/misc/ab8500-ponkey.c
@@ -118,6 +118,7 @@
{ .compatible = "stericsson,ab8500-ponkey", },
{}
};
+MODULE_DEVICE_TABLE(of, ab8500_ponkey_match);
#endif
static struct platform_driver ab8500_ponkey_driver = {
diff --git a/drivers/input/misc/pwm-beeper.c b/drivers/input/misc/pwm-beeper.c
index e82edf8..f2261ab 100644
--- a/drivers/input/misc/pwm-beeper.c
+++ b/drivers/input/misc/pwm-beeper.c
@@ -173,6 +173,7 @@
{ .compatible = "pwm-beeper", },
{ },
};
+MODULE_DEVICE_TABLE(of, pwm_beeper_match);
#endif
static struct platform_driver pwm_beeper_driver = {
diff --git a/drivers/input/misc/regulator-haptic.c b/drivers/input/misc/regulator-haptic.c
index 6bf3f10..a804705 100644
--- a/drivers/input/misc/regulator-haptic.c
+++ b/drivers/input/misc/regulator-haptic.c
@@ -249,6 +249,7 @@
{ .compatible = "regulator-haptic" },
{ /* sentinel */ },
};
+MODULE_DEVICE_TABLE(of, regulator_haptic_dt_match);
static struct platform_driver regulator_haptic_driver = {
.probe = regulator_haptic_probe,
diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c
index 54116e5..6f997aa 100644
--- a/drivers/input/misc/sparcspkr.c
+++ b/drivers/input/misc/sparcspkr.c
@@ -253,6 +253,7 @@
},
{},
};
+MODULE_DEVICE_TABLE(of, bbc_beep_match);
static struct platform_driver bbc_beep_driver = {
.driver = {
@@ -332,6 +333,7 @@
},
{},
};
+MODULE_DEVICE_TABLE(of, grover_beep_match);
static struct platform_driver grover_beep_driver = {
.driver = {
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 95599e4..23d0549 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -232,7 +232,7 @@
struct xenbus_transaction xbt;
ret = gnttab_grant_foreign_access(dev->otherend_id,
- virt_to_mfn(info->page), 0);
+ virt_to_gfn(info->page), 0);
if (ret < 0)
return ret;
info->gref = ret;
@@ -255,7 +255,7 @@
goto error_irqh;
}
ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
- virt_to_mfn(info->page));
+ virt_to_gfn(info->page));
if (ret)
goto error_xenbus;
ret = xenbus_printf(xbt, dev->nodename, "page-gref", "%u", info->gref);
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index e2b7420..fa94530 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1170,6 +1170,7 @@
{ "ELAN0000", 0 },
{ "ELAN0100", 0 },
{ "ELAN0600", 0 },
+ { "ELAN1000", 0 },
{ }
};
MODULE_DEVICE_TABLE(acpi, elan_acpi_id);
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index c9c98f0a..db91de5 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -877,7 +877,7 @@
static int i8042_controller_check(void)
{
if (i8042_flush()) {
- pr_err("No controller found\n");
+ pr_info("No controller found\n");
return -ENODEV;
}
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 059edeb..600dccef 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -479,6 +479,18 @@
To compile this driver as a module, choose M here: the
module will be called mtouch.
+config TOUCHSCREEN_IMX6UL_TSC
+ tristate "Freescale i.MX6UL touchscreen controller"
+ depends on (OF && GPIOLIB) || COMPILE_TEST
+ help
+ Say Y here if you have a Freescale i.MX6UL, and want to
+ use the internal touchscreen controller.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called imx6ul_tsc.
+
config TOUCHSCREEN_INEXIO
tristate "iNexio serial touchscreens"
select SERIO
@@ -1040,4 +1052,16 @@
To compile this driver as a module, choose M here: the
module will be called zforce_ts.
+config TOUCHSCREEN_COLIBRI_VF50
+ tristate "Toradex Colibri on board touchscreen driver"
+ depends on GPIOLIB && IIO && VF610_ADC
+ help
+ Say Y here if you have a Colibri VF50 and plan to use
+ the on-board provided 4-wire touchscreen driver.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called colibri_vf50_ts.
+
endif
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index c85aae2..1b79cc0 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -38,6 +38,7 @@
obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o
obj-$(CONFIG_TOUCHSCREEN_GOODIX) += goodix.o
obj-$(CONFIG_TOUCHSCREEN_ILI210X) += ili210x.o
+obj-$(CONFIG_TOUCHSCREEN_IMX6UL_TSC) += imx6ul_tsc.o
obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o
obj-$(CONFIG_TOUCHSCREEN_INTEL_MID) += intel-mid-touch.o
obj-$(CONFIG_TOUCHSCREEN_IPROC) += bcm_iproc_tsc.o
@@ -85,3 +86,4 @@
obj-$(CONFIG_TOUCHSCREEN_SX8654) += sx8654.o
obj-$(CONFIG_TOUCHSCREEN_TPS6507X) += tps6507x-ts.o
obj-$(CONFIG_TOUCHSCREEN_ZFORCE) += zforce_ts.o
+obj-$(CONFIG_TOUCHSCREEN_COLIBRI_VF50) += colibri-vf50-ts.o
diff --git a/drivers/input/touchscreen/colibri-vf50-ts.c b/drivers/input/touchscreen/colibri-vf50-ts.c
new file mode 100644
index 0000000..5d4903a
--- /dev/null
+++ b/drivers/input/touchscreen/colibri-vf50-ts.c
@@ -0,0 +1,386 @@
+/*
+ * Toradex Colibri VF50 Touchscreen driver
+ *
+ * Copyright 2015 Toradex AG
+ *
+ * Originally authored by Stefan Agner for 3.0 kernel
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/iio/consumer.h>
+#include <linux/iio/types.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define DRIVER_NAME "colibri-vf50-ts"
+#define DRV_VERSION "1.0"
+
+#define VF_ADC_MAX ((1 << 12) - 1)
+
+#define COLI_TOUCH_MIN_DELAY_US 1000
+#define COLI_TOUCH_MAX_DELAY_US 2000
+#define COLI_PULLUP_MIN_DELAY_US 10000
+#define COLI_PULLUP_MAX_DELAY_US 11000
+#define COLI_TOUCH_NO_OF_AVGS 5
+#define COLI_TOUCH_REQ_ADC_CHAN 4
+
+struct vf50_touch_device {
+ struct platform_device *pdev;
+ struct input_dev *ts_input;
+ struct iio_channel *channels;
+ struct gpio_desc *gpio_xp;
+ struct gpio_desc *gpio_xm;
+ struct gpio_desc *gpio_yp;
+ struct gpio_desc *gpio_ym;
+ int pen_irq;
+ int min_pressure;
+ bool stop_touchscreen;
+};
+
+/*
+ * Enables given plates and measures touch parameters using ADC
+ */
+static int adc_ts_measure(struct iio_channel *channel,
+ struct gpio_desc *plate_p, struct gpio_desc *plate_m)
+{
+ int i, value = 0, val = 0;
+ int error;
+
+ gpiod_set_value(plate_p, 1);
+ gpiod_set_value(plate_m, 1);
+
+ usleep_range(COLI_TOUCH_MIN_DELAY_US, COLI_TOUCH_MAX_DELAY_US);
+
+ for (i = 0; i < COLI_TOUCH_NO_OF_AVGS; i++) {
+ error = iio_read_channel_raw(channel, &val);
+ if (error < 0) {
+ value = error;
+ goto error_iio_read;
+ }
+
+ value += val;
+ }
+
+ value /= COLI_TOUCH_NO_OF_AVGS;
+
+error_iio_read:
+ gpiod_set_value(plate_p, 0);
+ gpiod_set_value(plate_m, 0);
+
+ return value;
+}
+
+/*
+ * Enable touch detection using falling edge detection on XM
+ */
+static void vf50_ts_enable_touch_detection(struct vf50_touch_device *vf50_ts)
+{
+ /* Enable plate YM (needs to be strong GND, high active) */
+ gpiod_set_value(vf50_ts->gpio_ym, 1);
+
+ /*
+ * Let the platform mux to idle state in order to enable
+ * Pull-Up on GPIO
+ */
+ pinctrl_pm_select_idle_state(&vf50_ts->pdev->dev);
+
+ /* Wait for the pull-up to be stable on high */
+ usleep_range(COLI_PULLUP_MIN_DELAY_US, COLI_PULLUP_MAX_DELAY_US);
+}
+
+/*
+ * ADC touch screen sampling bottom half irq handler
+ */
+static irqreturn_t vf50_ts_irq_bh(int irq, void *private)
+{
+ struct vf50_touch_device *vf50_ts = private;
+ struct device *dev = &vf50_ts->pdev->dev;
+ int val_x, val_y, val_z1, val_z2, val_p = 0;
+ bool discard_val_on_start = true;
+
+ /* Disable the touch detection plates */
+ gpiod_set_value(vf50_ts->gpio_ym, 0);
+
+ /* Let the platform mux to default state in order to mux as ADC */
+ pinctrl_pm_select_default_state(dev);
+
+ while (!vf50_ts->stop_touchscreen) {
+ /* X-Direction */
+ val_x = adc_ts_measure(&vf50_ts->channels[0],
+ vf50_ts->gpio_xp, vf50_ts->gpio_xm);
+ if (val_x < 0)
+ break;
+
+ /* Y-Direction */
+ val_y = adc_ts_measure(&vf50_ts->channels[1],
+ vf50_ts->gpio_yp, vf50_ts->gpio_ym);
+ if (val_y < 0)
+ break;
+
+ /*
+ * Touch pressure
+ * Measure on XP/YM
+ */
+ val_z1 = adc_ts_measure(&vf50_ts->channels[2],
+ vf50_ts->gpio_yp, vf50_ts->gpio_xm);
+ if (val_z1 < 0)
+ break;
+ val_z2 = adc_ts_measure(&vf50_ts->channels[3],
+ vf50_ts->gpio_yp, vf50_ts->gpio_xm);
+ if (val_z2 < 0)
+ break;
+
+ /* Validate signal (avoid calculation using noise) */
+ if (val_z1 > 64 && val_x > 64) {
+ /*
+ * Calculate resistance between the plates
+ * lower resistance means higher pressure
+ */
+ int r_x = (1000 * val_x) / VF_ADC_MAX;
+
+ val_p = (r_x * val_z2) / val_z1 - r_x;
+
+ } else {
+ val_p = 2000;
+ }
+
+ val_p = 2000 - val_p;
+ dev_dbg(dev,
+ "Measured values: x: %d, y: %d, z1: %d, z2: %d, p: %d\n",
+ val_x, val_y, val_z1, val_z2, val_p);
+
+ /*
+ * If touch pressure is too low, stop measuring and reenable
+ * touch detection
+ */
+ if (val_p < vf50_ts->min_pressure || val_p > 2000)
+ break;
+
+ /*
+ * The pressure may not be enough for the first x and the
+ * second y measurement, but, the pressure is ok when the
+ * driver is doing the third and fourth measurement. To
+ * take care of this, we drop the first measurement always.
+ */
+ if (discard_val_on_start) {
+ discard_val_on_start = false;
+ } else {
+ /*
+ * Report touch position and sleep for
+ * the next measurement.
+ */
+ input_report_abs(vf50_ts->ts_input,
+ ABS_X, VF_ADC_MAX - val_x);
+ input_report_abs(vf50_ts->ts_input,
+ ABS_Y, VF_ADC_MAX - val_y);
+ input_report_abs(vf50_ts->ts_input,
+ ABS_PRESSURE, val_p);
+ input_report_key(vf50_ts->ts_input, BTN_TOUCH, 1);
+ input_sync(vf50_ts->ts_input);
+ }
+
+ usleep_range(COLI_PULLUP_MIN_DELAY_US,
+ COLI_PULLUP_MAX_DELAY_US);
+ }
+
+ /* Report no more touch, re-enable touch detection */
+ input_report_abs(vf50_ts->ts_input, ABS_PRESSURE, 0);
+ input_report_key(vf50_ts->ts_input, BTN_TOUCH, 0);
+ input_sync(vf50_ts->ts_input);
+
+ vf50_ts_enable_touch_detection(vf50_ts);
+
+ return IRQ_HANDLED;
+}
+
+static int vf50_ts_open(struct input_dev *dev_input)
+{
+ struct vf50_touch_device *touchdev = input_get_drvdata(dev_input);
+ struct device *dev = &touchdev->pdev->dev;
+
+ dev_dbg(dev, "Input device %s opened, starting touch detection\n",
+ dev_input->name);
+
+ touchdev->stop_touchscreen = false;
+
+ /* Mux detection before request IRQ, wait for pull-up to settle */
+ vf50_ts_enable_touch_detection(touchdev);
+
+ return 0;
+}
+
+static void vf50_ts_close(struct input_dev *dev_input)
+{
+ struct vf50_touch_device *touchdev = input_get_drvdata(dev_input);
+ struct device *dev = &touchdev->pdev->dev;
+
+ touchdev->stop_touchscreen = true;
+
+ /* Make sure IRQ is not running past close */
+ mb();
+ synchronize_irq(touchdev->pen_irq);
+
+ gpiod_set_value(touchdev->gpio_ym, 0);
+ pinctrl_pm_select_default_state(dev);
+
+ dev_dbg(dev, "Input device %s closed, disable touch detection\n",
+ dev_input->name);
+}
+
+static int vf50_ts_get_gpiod(struct device *dev, struct gpio_desc **gpio_d,
+ const char *con_id, enum gpiod_flags flags)
+{
+ int error;
+
+ *gpio_d = devm_gpiod_get(dev, con_id, flags);
+ if (IS_ERR(*gpio_d)) {
+ error = PTR_ERR(*gpio_d);
+ dev_err(dev, "Could not get gpio_%s %d\n", con_id, error);
+ return error;
+ }
+
+ return 0;
+}
+
+static void vf50_ts_channel_release(void *data)
+{
+ struct iio_channel *channels = data;
+
+ iio_channel_release_all(channels);
+}
+
+static int vf50_ts_probe(struct platform_device *pdev)
+{
+ struct input_dev *input;
+ struct iio_channel *channels;
+ struct device *dev = &pdev->dev;
+ struct vf50_touch_device *touchdev;
+ int num_adc_channels;
+ int error;
+
+ channels = iio_channel_get_all(dev);
+ if (IS_ERR(channels))
+ return PTR_ERR(channels);
+
+ error = devm_add_action(dev, vf50_ts_channel_release, channels);
+ if (error) {
+ iio_channel_release_all(channels);
+ dev_err(dev, "Failed to register iio channel release action");
+ return error;
+ }
+
+ num_adc_channels = 0;
+ while (channels[num_adc_channels].indio_dev)
+ num_adc_channels++;
+
+ if (num_adc_channels != COLI_TOUCH_REQ_ADC_CHAN) {
+ dev_err(dev, "Inadequate ADC channels specified\n");
+ return -EINVAL;
+ }
+
+ touchdev = devm_kzalloc(dev, sizeof(*touchdev), GFP_KERNEL);
+ if (!touchdev)
+ return -ENOMEM;
+
+ touchdev->pdev = pdev;
+ touchdev->channels = channels;
+
+ error = of_property_read_u32(dev->of_node, "vf50-ts-min-pressure",
+ &touchdev->min_pressure);
+ if (error)
+ return error;
+
+ input = devm_input_allocate_device(dev);
+ if (!input) {
+ dev_err(dev, "Failed to allocate TS input device\n");
+ return -ENOMEM;
+ }
+
+ platform_set_drvdata(pdev, touchdev);
+
+ input->name = DRIVER_NAME;
+ input->id.bustype = BUS_HOST;
+ input->dev.parent = dev;
+ input->open = vf50_ts_open;
+ input->close = vf50_ts_close;
+
+ input_set_capability(input, EV_KEY, BTN_TOUCH);
+ input_set_abs_params(input, ABS_X, 0, VF_ADC_MAX, 0, 0);
+ input_set_abs_params(input, ABS_Y, 0, VF_ADC_MAX, 0, 0);
+ input_set_abs_params(input, ABS_PRESSURE, 0, VF_ADC_MAX, 0, 0);
+
+ touchdev->ts_input = input;
+ input_set_drvdata(input, touchdev);
+
+ error = input_register_device(input);
+ if (error) {
+ dev_err(dev, "Failed to register input device\n");
+ return error;
+ }
+
+ error = vf50_ts_get_gpiod(dev, &touchdev->gpio_xp, "xp", GPIOD_OUT_LOW);
+ if (error)
+ return error;
+
+ error = vf50_ts_get_gpiod(dev, &touchdev->gpio_xm,
+ "xm", GPIOD_OUT_LOW);
+ if (error)
+ return error;
+
+ error = vf50_ts_get_gpiod(dev, &touchdev->gpio_yp, "yp", GPIOD_OUT_LOW);
+ if (error)
+ return error;
+
+ error = vf50_ts_get_gpiod(dev, &touchdev->gpio_ym, "ym", GPIOD_OUT_LOW);
+ if (error)
+ return error;
+
+ touchdev->pen_irq = platform_get_irq(pdev, 0);
+ if (touchdev->pen_irq < 0)
+ return touchdev->pen_irq;
+
+ error = devm_request_threaded_irq(dev, touchdev->pen_irq,
+ NULL, vf50_ts_irq_bh, IRQF_ONESHOT,
+ "vf50 touch", touchdev);
+ if (error) {
+ dev_err(dev, "Failed to request IRQ %d: %d\n",
+ touchdev->pen_irq, error);
+ return error;
+ }
+
+ return 0;
+}
+
+static const struct of_device_id vf50_touch_of_match[] = {
+ { .compatible = "toradex,vf50-touchscreen", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, vf50_touch_of_match);
+
+static struct platform_driver vf50_touch_driver = {
+ .driver = {
+ .name = "toradex,vf50_touchctrl",
+ .of_match_table = vf50_touch_of_match,
+ },
+ .probe = vf50_ts_probe,
+};
+module_platform_driver(vf50_touch_driver);
+
+MODULE_AUTHOR("Sanchayan Maity");
+MODULE_DESCRIPTION("Colibri VF50 Touchscreen driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/input/touchscreen/cyttsp4_i2c.c b/drivers/input/touchscreen/cyttsp4_i2c.c
index 9a323dd..a9f95c7 100644
--- a/drivers/input/touchscreen/cyttsp4_i2c.c
+++ b/drivers/input/touchscreen/cyttsp4_i2c.c
@@ -86,4 +86,3 @@
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Cypress TrueTouch(R) Standard Product (TTSP) I2C driver");
MODULE_AUTHOR("Cypress");
-MODULE_ALIAS("i2c:cyttsp4");
diff --git a/drivers/input/touchscreen/cyttsp_i2c.c b/drivers/input/touchscreen/cyttsp_i2c.c
index 519e2de..eee51b3 100644
--- a/drivers/input/touchscreen/cyttsp_i2c.c
+++ b/drivers/input/touchscreen/cyttsp_i2c.c
@@ -86,4 +86,3 @@
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Cypress TrueTouch(R) Standard Product (TTSP) I2C driver");
MODULE_AUTHOR("Cypress");
-MODULE_ALIAS("i2c:cyttsp");
diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
index ddac134..17cc20e 100644
--- a/drivers/input/touchscreen/elants_i2c.c
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -102,7 +102,7 @@
#define ELAN_FW_PAGESIZE 132
/* calibration timeout definition */
-#define ELAN_CALI_TIMEOUT_MSEC 10000
+#define ELAN_CALI_TIMEOUT_MSEC 12000
#define ELAN_POWERON_DELAY_USEC 500
#define ELAN_RESET_DELAY_MSEC 20
diff --git a/drivers/input/touchscreen/imx6ul_tsc.c b/drivers/input/touchscreen/imx6ul_tsc.c
new file mode 100644
index 0000000..ff0b758
--- /dev/null
+++ b/drivers/input/touchscreen/imx6ul_tsc.c
@@ -0,0 +1,523 @@
+/*
+ * Freescale i.MX6UL touchscreen controller driver
+ *
+ * Copyright (C) 2015 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio/consumer.h>
+#include <linux/input.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+
+/* ADC configuration registers field define */
+#define ADC_AIEN (0x1 << 7)
+#define ADC_CONV_DISABLE 0x1F
+#define ADC_CAL (0x1 << 7)
+#define ADC_CALF 0x2
+#define ADC_12BIT_MODE (0x2 << 2)
+#define ADC_IPG_CLK 0x00
+#define ADC_CLK_DIV_8 (0x03 << 5)
+#define ADC_SHORT_SAMPLE_MODE (0x0 << 4)
+#define ADC_HARDWARE_TRIGGER (0x1 << 13)
+#define SELECT_CHANNEL_4 0x04
+#define SELECT_CHANNEL_1 0x01
+#define DISABLE_CONVERSION_INT (0x0 << 7)
+
+/* ADC registers */
+#define REG_ADC_HC0 0x00
+#define REG_ADC_HC1 0x04
+#define REG_ADC_HC2 0x08
+#define REG_ADC_HC3 0x0C
+#define REG_ADC_HC4 0x10
+#define REG_ADC_HS 0x14
+#define REG_ADC_R0 0x18
+#define REG_ADC_CFG 0x2C
+#define REG_ADC_GC 0x30
+#define REG_ADC_GS 0x34
+
+#define ADC_TIMEOUT msecs_to_jiffies(100)
+
+/* TSC registers */
+#define REG_TSC_BASIC_SETING 0x00
+#define REG_TSC_PRE_CHARGE_TIME 0x10
+#define REG_TSC_FLOW_CONTROL 0x20
+#define REG_TSC_MEASURE_VALUE 0x30
+#define REG_TSC_INT_EN 0x40
+#define REG_TSC_INT_SIG_EN 0x50
+#define REG_TSC_INT_STATUS 0x60
+#define REG_TSC_DEBUG_MODE 0x70
+#define REG_TSC_DEBUG_MODE2 0x80
+
+/* TSC configuration registers field define */
+#define DETECT_4_WIRE_MODE (0x0 << 4)
+#define AUTO_MEASURE 0x1
+#define MEASURE_SIGNAL 0x1
+#define DETECT_SIGNAL (0x1 << 4)
+#define VALID_SIGNAL (0x1 << 8)
+#define MEASURE_INT_EN 0x1
+#define MEASURE_SIG_EN 0x1
+#define VALID_SIG_EN (0x1 << 8)
+#define DE_GLITCH_2 (0x2 << 29)
+#define START_SENSE (0x1 << 12)
+#define TSC_DISABLE (0x1 << 16)
+#define DETECT_MODE 0x2
+
+struct imx6ul_tsc {
+ struct device *dev;
+ struct input_dev *input;
+ void __iomem *tsc_regs;
+ void __iomem *adc_regs;
+ struct clk *tsc_clk;
+ struct clk *adc_clk;
+ struct gpio_desc *xnur_gpio;
+
+ int measure_delay_time;
+ int pre_charge_time;
+
+ struct completion completion;
+};
+
+/*
+ * TSC module need ADC to get the measure value. So
+ * before config TSC, we should initialize ADC module.
+ */
+static void imx6ul_adc_init(struct imx6ul_tsc *tsc)
+{
+ int adc_hc = 0;
+ int adc_gc;
+ int adc_gs;
+ int adc_cfg;
+ int timeout;
+
+ reinit_completion(&tsc->completion);
+
+ adc_cfg = readl(tsc->adc_regs + REG_ADC_CFG);
+ adc_cfg |= ADC_12BIT_MODE | ADC_IPG_CLK;
+ adc_cfg |= ADC_CLK_DIV_8 | ADC_SHORT_SAMPLE_MODE;
+ adc_cfg &= ~ADC_HARDWARE_TRIGGER;
+ writel(adc_cfg, tsc->adc_regs + REG_ADC_CFG);
+
+ /* enable calibration interrupt */
+ adc_hc |= ADC_AIEN;
+ adc_hc |= ADC_CONV_DISABLE;
+ writel(adc_hc, tsc->adc_regs + REG_ADC_HC0);
+
+ /* start ADC calibration */
+ adc_gc = readl(tsc->adc_regs + REG_ADC_GC);
+ adc_gc |= ADC_CAL;
+ writel(adc_gc, tsc->adc_regs + REG_ADC_GC);
+
+ timeout = wait_for_completion_timeout
+ (&tsc->completion, ADC_TIMEOUT);
+ if (timeout == 0)
+ dev_err(tsc->dev, "Timeout for adc calibration\n");
+
+ adc_gs = readl(tsc->adc_regs + REG_ADC_GS);
+ if (adc_gs & ADC_CALF)
+ dev_err(tsc->dev, "ADC calibration failed\n");
+
+ /* TSC need the ADC work in hardware trigger */
+ adc_cfg = readl(tsc->adc_regs + REG_ADC_CFG);
+ adc_cfg |= ADC_HARDWARE_TRIGGER;
+ writel(adc_cfg, tsc->adc_regs + REG_ADC_CFG);
+}
+
+/*
+ * This is a TSC workaround. Currently TSC misconnect two
+ * ADC channels, this function remap channel configure for
+ * hardware trigger.
+ */
+static void imx6ul_tsc_channel_config(struct imx6ul_tsc *tsc)
+{
+ int adc_hc0, adc_hc1, adc_hc2, adc_hc3, adc_hc4;
+
+ adc_hc0 = DISABLE_CONVERSION_INT;
+ writel(adc_hc0, tsc->adc_regs + REG_ADC_HC0);
+
+ adc_hc1 = DISABLE_CONVERSION_INT | SELECT_CHANNEL_4;
+ writel(adc_hc1, tsc->adc_regs + REG_ADC_HC1);
+
+ adc_hc2 = DISABLE_CONVERSION_INT;
+ writel(adc_hc2, tsc->adc_regs + REG_ADC_HC2);
+
+ adc_hc3 = DISABLE_CONVERSION_INT | SELECT_CHANNEL_1;
+ writel(adc_hc3, tsc->adc_regs + REG_ADC_HC3);
+
+ adc_hc4 = DISABLE_CONVERSION_INT;
+ writel(adc_hc4, tsc->adc_regs + REG_ADC_HC4);
+}
+
+/*
+ * TSC setting, confige the pre-charge time and measure delay time.
+ * different touch screen may need different pre-charge time and
+ * measure delay time.
+ */
+static void imx6ul_tsc_set(struct imx6ul_tsc *tsc)
+{
+ int basic_setting = 0;
+ int start;
+
+ basic_setting |= tsc->measure_delay_time << 8;
+ basic_setting |= DETECT_4_WIRE_MODE | AUTO_MEASURE;
+ writel(basic_setting, tsc->tsc_regs + REG_TSC_BASIC_SETING);
+
+ writel(DE_GLITCH_2, tsc->tsc_regs + REG_TSC_DEBUG_MODE2);
+
+ writel(tsc->pre_charge_time, tsc->tsc_regs + REG_TSC_PRE_CHARGE_TIME);
+ writel(MEASURE_INT_EN, tsc->tsc_regs + REG_TSC_INT_EN);
+ writel(MEASURE_SIG_EN | VALID_SIG_EN,
+ tsc->tsc_regs + REG_TSC_INT_SIG_EN);
+
+ /* start sense detection */
+ start = readl(tsc->tsc_regs + REG_TSC_FLOW_CONTROL);
+ start |= START_SENSE;
+ start &= ~TSC_DISABLE;
+ writel(start, tsc->tsc_regs + REG_TSC_FLOW_CONTROL);
+}
+
+static void imx6ul_tsc_init(struct imx6ul_tsc *tsc)
+{
+ imx6ul_adc_init(tsc);
+ imx6ul_tsc_channel_config(tsc);
+ imx6ul_tsc_set(tsc);
+}
+
+static void imx6ul_tsc_disable(struct imx6ul_tsc *tsc)
+{
+ int tsc_flow;
+ int adc_cfg;
+
+ /* TSC controller enters to idle status */
+ tsc_flow = readl(tsc->tsc_regs + REG_TSC_FLOW_CONTROL);
+ tsc_flow |= TSC_DISABLE;
+ writel(tsc_flow, tsc->tsc_regs + REG_TSC_FLOW_CONTROL);
+
+ /* ADC controller enters to stop mode */
+ adc_cfg = readl(tsc->adc_regs + REG_ADC_HC0);
+ adc_cfg |= ADC_CONV_DISABLE;
+ writel(adc_cfg, tsc->adc_regs + REG_ADC_HC0);
+}
+
+/* Delay some time (max 2ms), wait the pre-charge done. */
+static bool tsc_wait_detect_mode(struct imx6ul_tsc *tsc)
+{
+ unsigned long timeout = jiffies + msecs_to_jiffies(2);
+ int state_machine;
+ int debug_mode2;
+
+ do {
+ if (time_after(jiffies, timeout))
+ return false;
+
+ usleep_range(200, 400);
+ debug_mode2 = readl(tsc->tsc_regs + REG_TSC_DEBUG_MODE2);
+ state_machine = (debug_mode2 >> 20) & 0x7;
+ } while (state_machine != DETECT_MODE);
+
+ usleep_range(200, 400);
+ return true;
+}
+
+static irqreturn_t tsc_irq_fn(int irq, void *dev_id)
+{
+ struct imx6ul_tsc *tsc = dev_id;
+ int status;
+ int value;
+ int x, y;
+ int start;
+
+ status = readl(tsc->tsc_regs + REG_TSC_INT_STATUS);
+
+ /* write 1 to clear the bit measure-signal */
+ writel(MEASURE_SIGNAL | DETECT_SIGNAL,
+ tsc->tsc_regs + REG_TSC_INT_STATUS);
+
+ /* It's a HW self-clean bit. Set this bit and start sense detection */
+ start = readl(tsc->tsc_regs + REG_TSC_FLOW_CONTROL);
+ start |= START_SENSE;
+ writel(start, tsc->tsc_regs + REG_TSC_FLOW_CONTROL);
+
+ if (status & MEASURE_SIGNAL) {
+ value = readl(tsc->tsc_regs + REG_TSC_MEASURE_VALUE);
+ x = (value >> 16) & 0x0fff;
+ y = value & 0x0fff;
+
+ /*
+ * In detect mode, we can get the xnur gpio value,
+ * otherwise assume contact is stiull active.
+ */
+ if (!tsc_wait_detect_mode(tsc) ||
+ gpiod_get_value_cansleep(tsc->xnur_gpio)) {
+ input_report_key(tsc->input, BTN_TOUCH, 1);
+ input_report_abs(tsc->input, ABS_X, x);
+ input_report_abs(tsc->input, ABS_Y, y);
+ } else {
+ input_report_key(tsc->input, BTN_TOUCH, 0);
+ }
+
+ input_sync(tsc->input);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t adc_irq_fn(int irq, void *dev_id)
+{
+ struct imx6ul_tsc *tsc = dev_id;
+ int coco;
+ int value;
+
+ coco = readl(tsc->adc_regs + REG_ADC_HS);
+ if (coco & 0x01) {
+ value = readl(tsc->adc_regs + REG_ADC_R0);
+ complete(&tsc->completion);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int imx6ul_tsc_open(struct input_dev *input_dev)
+{
+ struct imx6ul_tsc *tsc = input_get_drvdata(input_dev);
+ int err;
+
+ err = clk_prepare_enable(tsc->adc_clk);
+ if (err) {
+ dev_err(tsc->dev,
+ "Could not prepare or enable the adc clock: %d\n",
+ err);
+ return err;
+ }
+
+ err = clk_prepare_enable(tsc->tsc_clk);
+ if (err) {
+ dev_err(tsc->dev,
+ "Could not prepare or enable the tsc clock: %d\n",
+ err);
+ clk_disable_unprepare(tsc->adc_clk);
+ return err;
+ }
+
+ imx6ul_tsc_init(tsc);
+
+ return 0;
+}
+
+static void imx6ul_tsc_close(struct input_dev *input_dev)
+{
+ struct imx6ul_tsc *tsc = input_get_drvdata(input_dev);
+
+ imx6ul_tsc_disable(tsc);
+
+ clk_disable_unprepare(tsc->tsc_clk);
+ clk_disable_unprepare(tsc->adc_clk);
+}
+
+static int imx6ul_tsc_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct imx6ul_tsc *tsc;
+ struct input_dev *input_dev;
+ struct resource *tsc_mem;
+ struct resource *adc_mem;
+ int err;
+ int tsc_irq;
+ int adc_irq;
+
+ tsc = devm_kzalloc(&pdev->dev, sizeof(struct imx6ul_tsc), GFP_KERNEL);
+ if (!tsc)
+ return -ENOMEM;
+
+ input_dev = devm_input_allocate_device(&pdev->dev);
+ if (!input_dev)
+ return -ENOMEM;
+
+ input_dev->name = "iMX6UL TouchScreen Controller";
+ input_dev->id.bustype = BUS_HOST;
+
+ input_dev->open = imx6ul_tsc_open;
+ input_dev->close = imx6ul_tsc_close;
+
+ input_set_capability(input_dev, EV_KEY, BTN_TOUCH);
+ input_set_abs_params(input_dev, ABS_X, 0, 0xFFF, 0, 0);
+ input_set_abs_params(input_dev, ABS_Y, 0, 0xFFF, 0, 0);
+
+ input_set_drvdata(input_dev, tsc);
+
+ tsc->dev = &pdev->dev;
+ tsc->input = input_dev;
+ init_completion(&tsc->completion);
+
+ tsc->xnur_gpio = devm_gpiod_get(&pdev->dev, "xnur", GPIOD_IN);
+ if (IS_ERR(tsc->xnur_gpio)) {
+ err = PTR_ERR(tsc->xnur_gpio);
+ dev_err(&pdev->dev,
+ "failed to request GPIO tsc_X- (xnur): %d\n", err);
+ return err;
+ }
+
+ tsc_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ tsc->tsc_regs = devm_ioremap_resource(&pdev->dev, tsc_mem);
+ if (IS_ERR(tsc->tsc_regs)) {
+ err = PTR_ERR(tsc->tsc_regs);
+ dev_err(&pdev->dev, "failed to remap tsc memory: %d\n", err);
+ return err;
+ }
+
+ adc_mem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ tsc->adc_regs = devm_ioremap_resource(&pdev->dev, adc_mem);
+ if (IS_ERR(tsc->adc_regs)) {
+ err = PTR_ERR(tsc->adc_regs);
+ dev_err(&pdev->dev, "failed to remap adc memory: %d\n", err);
+ return err;
+ }
+
+ tsc->tsc_clk = devm_clk_get(&pdev->dev, "tsc");
+ if (IS_ERR(tsc->tsc_clk)) {
+ err = PTR_ERR(tsc->tsc_clk);
+ dev_err(&pdev->dev, "failed getting tsc clock: %d\n", err);
+ return err;
+ }
+
+ tsc->adc_clk = devm_clk_get(&pdev->dev, "adc");
+ if (IS_ERR(tsc->adc_clk)) {
+ err = PTR_ERR(tsc->adc_clk);
+ dev_err(&pdev->dev, "failed getting adc clock: %d\n", err);
+ return err;
+ }
+
+ tsc_irq = platform_get_irq(pdev, 0);
+ if (tsc_irq < 0) {
+ dev_err(&pdev->dev, "no tsc irq resource?\n");
+ return tsc_irq;
+ }
+
+ adc_irq = platform_get_irq(pdev, 1);
+ if (adc_irq <= 0) {
+ dev_err(&pdev->dev, "no adc irq resource?\n");
+ return adc_irq;
+ }
+
+ err = devm_request_threaded_irq(tsc->dev, tsc_irq,
+ NULL, tsc_irq_fn, IRQF_ONESHOT,
+ dev_name(&pdev->dev), tsc);
+ if (err) {
+ dev_err(&pdev->dev,
+ "failed requesting tsc irq %d: %d\n",
+ tsc_irq, err);
+ return err;
+ }
+
+ err = devm_request_irq(tsc->dev, adc_irq, adc_irq_fn, 0,
+ dev_name(&pdev->dev), tsc);
+ if (err) {
+ dev_err(&pdev->dev,
+ "failed requesting adc irq %d: %d\n",
+ adc_irq, err);
+ return err;
+ }
+
+ err = of_property_read_u32(np, "measure-delay-time",
+ &tsc->measure_delay_time);
+ if (err)
+ tsc->measure_delay_time = 0xffff;
+
+ err = of_property_read_u32(np, "pre-charge-time",
+ &tsc->pre_charge_time);
+ if (err)
+ tsc->pre_charge_time = 0xfff;
+
+ err = input_register_device(tsc->input);
+ if (err) {
+ dev_err(&pdev->dev,
+ "failed to register input device: %d\n", err);
+ return err;
+ }
+
+ platform_set_drvdata(pdev, tsc);
+ return 0;
+}
+
+static int __maybe_unused imx6ul_tsc_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct imx6ul_tsc *tsc = platform_get_drvdata(pdev);
+ struct input_dev *input_dev = tsc->input;
+
+ mutex_lock(&input_dev->mutex);
+
+ if (input_dev->users) {
+ imx6ul_tsc_disable(tsc);
+
+ clk_disable_unprepare(tsc->tsc_clk);
+ clk_disable_unprepare(tsc->adc_clk);
+ }
+
+ mutex_unlock(&input_dev->mutex);
+
+ return 0;
+}
+
+static int __maybe_unused imx6ul_tsc_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct imx6ul_tsc *tsc = platform_get_drvdata(pdev);
+ struct input_dev *input_dev = tsc->input;
+ int retval = 0;
+
+ mutex_lock(&input_dev->mutex);
+
+ if (input_dev->users) {
+ retval = clk_prepare_enable(tsc->adc_clk);
+ if (retval)
+ goto out;
+
+ retval = clk_prepare_enable(tsc->tsc_clk);
+ if (retval) {
+ clk_disable_unprepare(tsc->adc_clk);
+ goto out;
+ }
+
+ imx6ul_tsc_init(tsc);
+ }
+
+out:
+ mutex_unlock(&input_dev->mutex);
+ return retval;
+}
+
+static SIMPLE_DEV_PM_OPS(imx6ul_tsc_pm_ops,
+ imx6ul_tsc_suspend, imx6ul_tsc_resume);
+
+static const struct of_device_id imx6ul_tsc_match[] = {
+ { .compatible = "fsl,imx6ul-tsc", },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx6ul_tsc_match);
+
+static struct platform_driver imx6ul_tsc_driver = {
+ .driver = {
+ .name = "imx6ul-tsc",
+ .of_match_table = imx6ul_tsc_match,
+ .pm = &imx6ul_tsc_pm_ops,
+ },
+ .probe = imx6ul_tsc_probe,
+};
+module_platform_driver(imx6ul_tsc_driver);
+
+MODULE_AUTHOR("Haibo Chen <haibo.chen@freescale.com>");
+MODULE_DESCRIPTION("Freescale i.MX6UL Touchscreen controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c
index c011699..4857943 100644
--- a/drivers/input/touchscreen/sun4i-ts.c
+++ b/drivers/input/touchscreen/sun4i-ts.c
@@ -191,7 +191,7 @@
writel(TEMP_IRQ_EN(1), ts->base + TP_INT_FIFOC);
}
-static int sun4i_get_temp(const struct sun4i_ts_data *ts, long *temp)
+static int sun4i_get_temp(const struct sun4i_ts_data *ts, int *temp)
{
/* No temp_data until the first irq */
if (ts->temp_data == -1)
@@ -202,7 +202,7 @@
return 0;
}
-static int sun4i_get_tz_temp(void *data, long *temp)
+static int sun4i_get_tz_temp(void *data, int *temp)
{
return sun4i_get_temp(data, temp);
}
@@ -215,14 +215,14 @@
char *buf)
{
struct sun4i_ts_data *ts = dev_get_drvdata(dev);
- long temp;
+ int temp;
int error;
error = sun4i_get_temp(ts, &temp);
if (error)
return error;
- return sprintf(buf, "%ld\n", temp);
+ return sprintf(buf, "%d\n", temp);
}
static ssize_t show_temp_label(struct device *dev,
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 0717aa9..9bc20e2 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -135,8 +135,9 @@
static ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr,
struct seq_file *s)
{
- return seq_printf(s, "%08x %08x %01x\n", cr->cam, cr->ram,
+ seq_printf(s, "%08x %08x %01x\n", cr->cam, cr->ram,
(cr->cam & MMU_CAM_P) ? 1 : 0);
+ return 0;
}
static size_t omap_dump_tlb_entries(struct omap_iommu *obj, struct seq_file *s)
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index d5415ee..3e01e6f 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -393,7 +393,7 @@
# of SCSI_DH if the latter isn't defined but if
# it is, DM_MULTIPATH must depend on it. We get a build
# error if SCSI_DH=m and DM_MULTIPATH=y
- depends on SCSI_DH || !SCSI_DH
+ depends on !SCSI_DH || SCSI
---help---
Allow volume managers to support multipath hardware.
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index eff7bdd..5a67671 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -159,12 +159,9 @@
static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
{
struct pgpath *pgpath, *tmp;
- struct multipath *m = ti->private;
list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
list_del(&pgpath->list);
- if (m->hw_handler_name)
- scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
dm_put_device(ti, pgpath->path.dev);
free_pgpath(pgpath);
}
@@ -580,6 +577,7 @@
q = bdev_get_queue(p->path.dev->bdev);
if (m->retain_attached_hw_handler) {
+retain:
attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
if (attached_handler_name) {
/*
@@ -599,20 +597,14 @@
}
if (m->hw_handler_name) {
- /*
- * Increments scsi_dh reference, even when using an
- * already-attached handler.
- */
r = scsi_dh_attach(q, m->hw_handler_name);
if (r == -EBUSY) {
- /*
- * Already attached to different hw_handler:
- * try to reattach with correct one.
- */
- scsi_dh_detach(q);
- r = scsi_dh_attach(q, m->hw_handler_name);
- }
+ char b[BDEVNAME_SIZE];
+ printk(KERN_INFO "dm-mpath: retaining handler on device %s\n",
+ bdevname(p->path.dev->bdev, b));
+ goto retain;
+ }
if (r < 0) {
ti->error = "error attaching hardware handler";
dm_put_device(ti, p->path.dev);
@@ -624,7 +616,6 @@
if (r < 0) {
ti->error = "unable to set hardware "
"handler parameters";
- scsi_dh_detach(q);
dm_put_device(ti, p->path.dev);
goto bad;
}
@@ -734,12 +725,6 @@
return 0;
m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
- if (!try_then_request_module(scsi_dh_handler_exist(m->hw_handler_name),
- "scsi_dh_%s", m->hw_handler_name)) {
- ti->error = "unknown hardware handler type";
- ret = -EINVAL;
- goto fail;
- }
if (hw_argc > 1) {
char *p;
diff --git a/drivers/media/platform/omap/Kconfig b/drivers/media/platform/omap/Kconfig
index dc2aaab..217d613 100644
--- a/drivers/media/platform/omap/Kconfig
+++ b/drivers/media/platform/omap/Kconfig
@@ -10,6 +10,7 @@
select OMAP2_DSS if HAS_IOMEM && ARCH_OMAP2PLUS
select OMAP2_VRFB if ARCH_OMAP2 || ARCH_OMAP3
select VIDEO_OMAP2_VOUT_VRFB if VIDEO_OMAP2_VOUT && OMAP2_VRFB
+ select FRAME_VECTOR
default n
---help---
V4L2 Display driver support for OMAP2/3 based boards.
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c
index f09c5f1..70c28d1 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -195,46 +195,34 @@
}
/*
- * omap_vout_uservirt_to_phys: This inline function is used to convert user
- * space virtual address to physical address.
+ * omap_vout_get_userptr: Convert user space virtual address to physical
+ * address.
*/
-static unsigned long omap_vout_uservirt_to_phys(unsigned long virtp)
+static int omap_vout_get_userptr(struct videobuf_buffer *vb, u32 virtp,
+ u32 *physp)
{
- unsigned long physp = 0;
- struct vm_area_struct *vma;
- struct mm_struct *mm = current->mm;
+ struct frame_vector *vec;
+ int ret;
/* For kernel direct-mapped memory, take the easy way */
- if (virtp >= PAGE_OFFSET)
- return virt_to_phys((void *) virtp);
-
- down_read(¤t->mm->mmap_sem);
- vma = find_vma(mm, virtp);
- if (vma && (vma->vm_flags & VM_IO) && vma->vm_pgoff) {
- /* this will catch, kernel-allocated, mmaped-to-usermode
- addresses */
- physp = (vma->vm_pgoff << PAGE_SHIFT) + (virtp - vma->vm_start);
- up_read(¤t->mm->mmap_sem);
- } else {
- /* otherwise, use get_user_pages() for general userland pages */
- int res, nr_pages = 1;
- struct page *pages;
-
- res = get_user_pages(current, current->mm, virtp, nr_pages, 1,
- 0, &pages, NULL);
- up_read(¤t->mm->mmap_sem);
-
- if (res == nr_pages) {
- physp = __pa(page_address(&pages[0]) +
- (virtp & ~PAGE_MASK));
- } else {
- printk(KERN_WARNING VOUT_NAME
- "get_user_pages failed\n");
- return 0;
- }
+ if (virtp >= PAGE_OFFSET) {
+ *physp = virt_to_phys((void *)virtp);
+ return 0;
}
- return physp;
+ vec = frame_vector_create(1);
+ if (!vec)
+ return -ENOMEM;
+
+ ret = get_vaddr_frames(virtp, 1, true, false, vec);
+ if (ret != 1) {
+ frame_vector_destroy(vec);
+ return -EINVAL;
+ }
+ *physp = __pfn_to_phys(frame_vector_pfns(vec)[0]);
+ vb->priv = vec;
+
+ return 0;
}
/*
@@ -784,11 +772,15 @@
* address of the buffer
*/
if (V4L2_MEMORY_USERPTR == vb->memory) {
+ int ret;
+
if (0 == vb->baddr)
return -EINVAL;
/* Physical address */
- vout->queued_buf_addr[vb->i] = (u8 *)
- omap_vout_uservirt_to_phys(vb->baddr);
+ ret = omap_vout_get_userptr(vb, vb->baddr,
+ (u32 *)&vout->queued_buf_addr[vb->i]);
+ if (ret < 0)
+ return ret;
} else {
unsigned long addr, dma_addr;
unsigned long size;
@@ -834,12 +826,13 @@
static void omap_vout_buffer_release(struct videobuf_queue *q,
struct videobuf_buffer *vb)
{
- struct omap_vout_device *vout = q->priv_data;
-
vb->state = VIDEOBUF_NEEDS_INIT;
+ if (vb->memory == V4L2_MEMORY_USERPTR && vb->priv) {
+ struct frame_vector *vec = vb->priv;
- if (V4L2_MEMORY_MMAP != vout->memory)
- return;
+ put_vaddr_frames(vec);
+ frame_vector_destroy(vec);
+ }
}
/*
@@ -872,7 +865,7 @@
vout->mmap_count--;
}
-static struct vm_operations_struct omap_vout_vm_ops = {
+static const struct vm_operations_struct omap_vout_vm_ops = {
.open = omap_vout_vm_open,
.close = omap_vout_vm_close,
};
diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
index b4b0229..82876a6 100644
--- a/drivers/media/v4l2-core/Kconfig
+++ b/drivers/media/v4l2-core/Kconfig
@@ -84,6 +84,7 @@
config VIDEOBUF2_MEMOPS
tristate
+ select FRAME_VECTOR
config VIDEOBUF2_DMA_CONTIG
tristate
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index f1022d8..4f59b7e 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -1691,9 +1691,7 @@
ret = __qbuf_mmap(vb, b);
break;
case V4L2_MEMORY_USERPTR:
- down_read(¤t->mm->mmap_sem);
ret = __qbuf_userptr(vb, b);
- up_read(¤t->mm->mmap_sem);
break;
case V4L2_MEMORY_DMABUF:
ret = __qbuf_dmabuf(vb, b);
diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index 94c1e64..2397ceb 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -32,15 +32,13 @@
dma_addr_t dma_addr;
enum dma_data_direction dma_dir;
struct sg_table *dma_sgt;
+ struct frame_vector *vec;
/* MMAP related */
struct vb2_vmarea_handler handler;
atomic_t refcount;
struct sg_table *sgt_base;
- /* USERPTR related */
- struct vm_area_struct *vma;
-
/* DMABUF related */
struct dma_buf_attachment *db_attach;
};
@@ -49,24 +47,6 @@
/* scatterlist table functions */
/*********************************************/
-
-static void vb2_dc_sgt_foreach_page(struct sg_table *sgt,
- void (*cb)(struct page *pg))
-{
- struct scatterlist *s;
- unsigned int i;
-
- for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
- struct page *page = sg_page(s);
- unsigned int n_pages = PAGE_ALIGN(s->offset + s->length)
- >> PAGE_SHIFT;
- unsigned int j;
-
- for (j = 0; j < n_pages; ++j, ++page)
- cb(page);
- }
-}
-
static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt)
{
struct scatterlist *s;
@@ -429,92 +409,12 @@
/* callbacks for USERPTR buffers */
/*********************************************/
-static inline int vma_is_io(struct vm_area_struct *vma)
-{
- return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));
-}
-
-static int vb2_dc_get_user_pfn(unsigned long start, int n_pages,
- struct vm_area_struct *vma, unsigned long *res)
-{
- unsigned long pfn, start_pfn, prev_pfn;
- unsigned int i;
- int ret;
-
- if (!vma_is_io(vma))
- return -EFAULT;
-
- ret = follow_pfn(vma, start, &pfn);
- if (ret)
- return ret;
-
- start_pfn = pfn;
- start += PAGE_SIZE;
-
- for (i = 1; i < n_pages; ++i, start += PAGE_SIZE) {
- prev_pfn = pfn;
- ret = follow_pfn(vma, start, &pfn);
-
- if (ret) {
- pr_err("no page for address %lu\n", start);
- return ret;
- }
- if (pfn != prev_pfn + 1)
- return -EINVAL;
- }
-
- *res = start_pfn;
- return 0;
-}
-
-static int vb2_dc_get_user_pages(unsigned long start, struct page **pages,
- int n_pages, struct vm_area_struct *vma,
- enum dma_data_direction dma_dir)
-{
- if (vma_is_io(vma)) {
- unsigned int i;
-
- for (i = 0; i < n_pages; ++i, start += PAGE_SIZE) {
- unsigned long pfn;
- int ret = follow_pfn(vma, start, &pfn);
-
- if (!pfn_valid(pfn))
- return -EINVAL;
-
- if (ret) {
- pr_err("no page for address %lu\n", start);
- return ret;
- }
- pages[i] = pfn_to_page(pfn);
- }
- } else {
- int n;
-
- n = get_user_pages(current, current->mm, start & PAGE_MASK,
- n_pages, dma_dir == DMA_FROM_DEVICE, 1, pages, NULL);
- /* negative error means that no page was pinned */
- n = max(n, 0);
- if (n != n_pages) {
- pr_err("got only %d of %d user pages\n", n, n_pages);
- while (n)
- put_page(pages[--n]);
- return -EFAULT;
- }
- }
-
- return 0;
-}
-
-static void vb2_dc_put_dirty_page(struct page *page)
-{
- set_page_dirty_lock(page);
- put_page(page);
-}
-
static void vb2_dc_put_userptr(void *buf_priv)
{
struct vb2_dc_buf *buf = buf_priv;
struct sg_table *sgt = buf->dma_sgt;
+ int i;
+ struct page **pages;
if (sgt) {
DEFINE_DMA_ATTRS(attrs);
@@ -526,13 +426,15 @@
*/
dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents,
buf->dma_dir, &attrs);
- if (!vma_is_io(buf->vma))
- vb2_dc_sgt_foreach_page(sgt, vb2_dc_put_dirty_page);
-
+ pages = frame_vector_pages(buf->vec);
+ /* sgt should exist only if vector contains pages... */
+ BUG_ON(IS_ERR(pages));
+ for (i = 0; i < frame_vector_count(buf->vec); i++)
+ set_page_dirty_lock(pages[i]);
sg_free_table(sgt);
kfree(sgt);
}
- vb2_put_vma(buf->vma);
+ vb2_destroy_framevec(buf->vec);
kfree(buf);
}
@@ -572,13 +474,10 @@
{
struct vb2_dc_conf *conf = alloc_ctx;
struct vb2_dc_buf *buf;
- unsigned long start;
- unsigned long end;
+ struct frame_vector *vec;
unsigned long offset;
- struct page **pages;
- int n_pages;
+ int n_pages, i;
int ret = 0;
- struct vm_area_struct *vma;
struct sg_table *sgt;
unsigned long contig_size;
unsigned long dma_align = dma_get_cache_alignment();
@@ -604,72 +503,43 @@
buf->dev = conf->dev;
buf->dma_dir = dma_dir;
- start = vaddr & PAGE_MASK;
offset = vaddr & ~PAGE_MASK;
- end = PAGE_ALIGN(vaddr + size);
- n_pages = (end - start) >> PAGE_SHIFT;
-
- pages = kmalloc(n_pages * sizeof(pages[0]), GFP_KERNEL);
- if (!pages) {
- ret = -ENOMEM;
- pr_err("failed to allocate pages table\n");
+ vec = vb2_create_framevec(vaddr, size, dma_dir == DMA_FROM_DEVICE);
+ if (IS_ERR(vec)) {
+ ret = PTR_ERR(vec);
goto fail_buf;
}
+ buf->vec = vec;
+ n_pages = frame_vector_count(vec);
+ ret = frame_vector_to_pages(vec);
+ if (ret < 0) {
+ unsigned long *nums = frame_vector_pfns(vec);
- /* current->mm->mmap_sem is taken by videobuf2 core */
- vma = find_vma(current->mm, vaddr);
- if (!vma) {
- pr_err("no vma for address %lu\n", vaddr);
- ret = -EFAULT;
- goto fail_pages;
- }
-
- if (vma->vm_end < vaddr + size) {
- pr_err("vma at %lu is too small for %lu bytes\n", vaddr, size);
- ret = -EFAULT;
- goto fail_pages;
- }
-
- buf->vma = vb2_get_vma(vma);
- if (!buf->vma) {
- pr_err("failed to copy vma\n");
- ret = -ENOMEM;
- goto fail_pages;
- }
-
- /* extract page list from userspace mapping */
- ret = vb2_dc_get_user_pages(start, pages, n_pages, vma, dma_dir);
- if (ret) {
- unsigned long pfn;
- if (vb2_dc_get_user_pfn(start, n_pages, vma, &pfn) == 0) {
- buf->dma_addr = vb2_dc_pfn_to_dma(buf->dev, pfn);
- buf->size = size;
- kfree(pages);
- return buf;
- }
-
- pr_err("failed to get user pages\n");
- goto fail_vma;
+ /*
+ * Failed to convert to pages... Check the memory is physically
+ * contiguous and use direct mapping
+ */
+ for (i = 1; i < n_pages; i++)
+ if (nums[i-1] + 1 != nums[i])
+ goto fail_pfnvec;
+ buf->dma_addr = vb2_dc_pfn_to_dma(buf->dev, nums[0]);
+ goto out;
}
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
if (!sgt) {
pr_err("failed to allocate sg table\n");
ret = -ENOMEM;
- goto fail_get_user_pages;
+ goto fail_pfnvec;
}
- ret = sg_alloc_table_from_pages(sgt, pages, n_pages,
+ ret = sg_alloc_table_from_pages(sgt, frame_vector_pages(vec), n_pages,
offset, size, GFP_KERNEL);
if (ret) {
pr_err("failed to initialize sg table\n");
goto fail_sgt;
}
- /* pages are no longer needed */
- kfree(pages);
- pages = NULL;
-
/*
* No need to sync to the device, this will happen later when the
* prepare() memop is called.
@@ -691,8 +561,9 @@
}
buf->dma_addr = sg_dma_address(sgt->sgl);
- buf->size = size;
buf->dma_sgt = sgt;
+out:
+ buf->size = size;
return buf;
@@ -701,23 +572,13 @@
buf->dma_dir, &attrs);
fail_sgt_init:
- if (!vma_is_io(buf->vma))
- vb2_dc_sgt_foreach_page(sgt, put_page);
sg_free_table(sgt);
fail_sgt:
kfree(sgt);
-fail_get_user_pages:
- if (pages && !vma_is_io(buf->vma))
- while (n_pages)
- put_page(pages[--n_pages]);
-
-fail_vma:
- vb2_put_vma(buf->vma);
-
-fail_pages:
- kfree(pages); /* kfree is NULL-proof */
+fail_pfnvec:
+ vb2_destroy_framevec(vec);
fail_buf:
kfree(buf);
diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
index 7289b81..be7bd65 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
@@ -38,6 +38,7 @@
struct device *dev;
void *vaddr;
struct page **pages;
+ struct frame_vector *vec;
int offset;
enum dma_data_direction dma_dir;
struct sg_table sg_table;
@@ -51,7 +52,6 @@
unsigned int num_pages;
atomic_t refcount;
struct vb2_vmarea_handler handler;
- struct vm_area_struct *vma;
struct dma_buf_attachment *db_attach;
};
@@ -225,25 +225,17 @@
dma_sync_sg_for_cpu(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);
}
-static inline int vma_is_io(struct vm_area_struct *vma)
-{
- return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));
-}
-
static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr,
unsigned long size,
enum dma_data_direction dma_dir)
{
struct vb2_dma_sg_conf *conf = alloc_ctx;
struct vb2_dma_sg_buf *buf;
- unsigned long first, last;
- int num_pages_from_user;
- struct vm_area_struct *vma;
struct sg_table *sgt;
DEFINE_DMA_ATTRS(attrs);
+ struct frame_vector *vec;
dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs);
-
buf = kzalloc(sizeof *buf, GFP_KERNEL);
if (!buf)
return NULL;
@@ -254,61 +246,19 @@
buf->offset = vaddr & ~PAGE_MASK;
buf->size = size;
buf->dma_sgt = &buf->sg_table;
+ vec = vb2_create_framevec(vaddr, size, buf->dma_dir == DMA_FROM_DEVICE);
+ if (IS_ERR(vec))
+ goto userptr_fail_pfnvec;
+ buf->vec = vec;
- first = (vaddr & PAGE_MASK) >> PAGE_SHIFT;
- last = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT;
- buf->num_pages = last - first + 1;
-
- buf->pages = kzalloc(buf->num_pages * sizeof(struct page *),
- GFP_KERNEL);
- if (!buf->pages)
- goto userptr_fail_alloc_pages;
-
- vma = find_vma(current->mm, vaddr);
- if (!vma) {
- dprintk(1, "no vma for address %lu\n", vaddr);
- goto userptr_fail_find_vma;
- }
-
- if (vma->vm_end < vaddr + size) {
- dprintk(1, "vma at %lu is too small for %lu bytes\n",
- vaddr, size);
- goto userptr_fail_find_vma;
- }
-
- buf->vma = vb2_get_vma(vma);
- if (!buf->vma) {
- dprintk(1, "failed to copy vma\n");
- goto userptr_fail_find_vma;
- }
-
- if (vma_is_io(buf->vma)) {
- for (num_pages_from_user = 0;
- num_pages_from_user < buf->num_pages;
- ++num_pages_from_user, vaddr += PAGE_SIZE) {
- unsigned long pfn;
-
- if (follow_pfn(vma, vaddr, &pfn)) {
- dprintk(1, "no page for address %lu\n", vaddr);
- break;
- }
- buf->pages[num_pages_from_user] = pfn_to_page(pfn);
- }
- } else
- num_pages_from_user = get_user_pages(current, current->mm,
- vaddr & PAGE_MASK,
- buf->num_pages,
- buf->dma_dir == DMA_FROM_DEVICE,
- 1, /* force */
- buf->pages,
- NULL);
-
- if (num_pages_from_user != buf->num_pages)
- goto userptr_fail_get_user_pages;
+ buf->pages = frame_vector_pages(vec);
+ if (IS_ERR(buf->pages))
+ goto userptr_fail_sgtable;
+ buf->num_pages = frame_vector_count(vec);
if (sg_alloc_table_from_pages(buf->dma_sgt, buf->pages,
buf->num_pages, buf->offset, size, 0))
- goto userptr_fail_alloc_table_from_pages;
+ goto userptr_fail_sgtable;
sgt = &buf->sg_table;
/*
@@ -324,17 +274,9 @@
userptr_fail_map:
sg_free_table(&buf->sg_table);
-userptr_fail_alloc_table_from_pages:
-userptr_fail_get_user_pages:
- dprintk(1, "get_user_pages requested/got: %d/%d]\n",
- buf->num_pages, num_pages_from_user);
- if (!vma_is_io(buf->vma))
- while (--num_pages_from_user >= 0)
- put_page(buf->pages[num_pages_from_user]);
- vb2_put_vma(buf->vma);
-userptr_fail_find_vma:
- kfree(buf->pages);
-userptr_fail_alloc_pages:
+userptr_fail_sgtable:
+ vb2_destroy_framevec(vec);
+userptr_fail_pfnvec:
kfree(buf);
return NULL;
}
@@ -362,11 +304,8 @@
while (--i >= 0) {
if (buf->dma_dir == DMA_FROM_DEVICE)
set_page_dirty_lock(buf->pages[i]);
- if (!vma_is_io(buf->vma))
- put_page(buf->pages[i]);
}
- kfree(buf->pages);
- vb2_put_vma(buf->vma);
+ vb2_destroy_framevec(buf->vec);
kfree(buf);
}
diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c
index 0d49b79..48c6a49 100644
--- a/drivers/media/v4l2-core/videobuf2-memops.c
+++ b/drivers/media/v4l2-core/videobuf2-memops.c
@@ -23,118 +23,62 @@
#include <media/videobuf2-memops.h>
/**
- * vb2_get_vma() - acquire and lock the virtual memory area
- * @vma: given virtual memory area
+ * vb2_create_framevec() - map virtual addresses to pfns
+ * @start: Virtual user address where we start mapping
+ * @length: Length of a range to map
+ * @write: Should we map for writing into the area
*
- * This function attempts to acquire an area mapped in the userspace for
- * the duration of a hardware operation. The area is "locked" by performing
- * the same set of operation that are done when process calls fork() and
- * memory areas are duplicated.
- *
- * Returns a copy of a virtual memory region on success or NULL.
+ * This function allocates and fills in a vector with pfns corresponding to
+ * virtual address range passed in arguments. If pfns have corresponding pages,
+ * page references are also grabbed to pin pages in memory. The function
+ * returns pointer to the vector on success and error pointer in case of
+ * failure. Returned vector needs to be freed via vb2_destroy_pfnvec().
*/
-struct vm_area_struct *vb2_get_vma(struct vm_area_struct *vma)
+struct frame_vector *vb2_create_framevec(unsigned long start,
+ unsigned long length,
+ bool write)
{
- struct vm_area_struct *vma_copy;
+ int ret;
+ unsigned long first, last;
+ unsigned long nr;
+ struct frame_vector *vec;
- vma_copy = kmalloc(sizeof(*vma_copy), GFP_KERNEL);
- if (vma_copy == NULL)
- return NULL;
-
- if (vma->vm_ops && vma->vm_ops->open)
- vma->vm_ops->open(vma);
-
- if (vma->vm_file)
- get_file(vma->vm_file);
-
- memcpy(vma_copy, vma, sizeof(*vma));
-
- vma_copy->vm_mm = NULL;
- vma_copy->vm_next = NULL;
- vma_copy->vm_prev = NULL;
-
- return vma_copy;
-}
-EXPORT_SYMBOL_GPL(vb2_get_vma);
-
-/**
- * vb2_put_userptr() - release a userspace virtual memory area
- * @vma: virtual memory region associated with the area to be released
- *
- * This function releases the previously acquired memory area after a hardware
- * operation.
- */
-void vb2_put_vma(struct vm_area_struct *vma)
-{
- if (!vma)
- return;
-
- if (vma->vm_ops && vma->vm_ops->close)
- vma->vm_ops->close(vma);
-
- if (vma->vm_file)
- fput(vma->vm_file);
-
- kfree(vma);
-}
-EXPORT_SYMBOL_GPL(vb2_put_vma);
-
-/**
- * vb2_get_contig_userptr() - lock physically contiguous userspace mapped memory
- * @vaddr: starting virtual address of the area to be verified
- * @size: size of the area
- * @res_paddr: will return physical address for the given vaddr
- * @res_vma: will return locked copy of struct vm_area for the given area
- *
- * This function will go through memory area of size @size mapped at @vaddr and
- * verify that the underlying physical pages are contiguous. If they are
- * contiguous the virtual memory area is locked and a @res_vma is filled with
- * the copy and @res_pa set to the physical address of the buffer.
- *
- * Returns 0 on success.
- */
-int vb2_get_contig_userptr(unsigned long vaddr, unsigned long size,
- struct vm_area_struct **res_vma, dma_addr_t *res_pa)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long offset, start, end;
- unsigned long this_pfn, prev_pfn;
- dma_addr_t pa = 0;
-
- start = vaddr;
- offset = start & ~PAGE_MASK;
- end = start + size;
-
- vma = find_vma(mm, start);
-
- if (vma == NULL || vma->vm_end < end)
- return -EFAULT;
-
- for (prev_pfn = 0; start < end; start += PAGE_SIZE) {
- int ret = follow_pfn(vma, start, &this_pfn);
- if (ret)
- return ret;
-
- if (prev_pfn == 0)
- pa = this_pfn << PAGE_SHIFT;
- else if (this_pfn != prev_pfn + 1)
- return -EFAULT;
-
- prev_pfn = this_pfn;
+ first = start >> PAGE_SHIFT;
+ last = (start + length - 1) >> PAGE_SHIFT;
+ nr = last - first + 1;
+ vec = frame_vector_create(nr);
+ if (!vec)
+ return ERR_PTR(-ENOMEM);
+ ret = get_vaddr_frames(start, nr, write, 1, vec);
+ if (ret < 0)
+ goto out_destroy;
+ /* We accept only complete set of PFNs */
+ if (ret != nr) {
+ ret = -EFAULT;
+ goto out_release;
}
-
- /*
- * Memory is contiguous, lock vma and return to the caller
- */
- *res_vma = vb2_get_vma(vma);
- if (*res_vma == NULL)
- return -ENOMEM;
-
- *res_pa = pa + offset;
- return 0;
+ return vec;
+out_release:
+ put_vaddr_frames(vec);
+out_destroy:
+ frame_vector_destroy(vec);
+ return ERR_PTR(ret);
}
-EXPORT_SYMBOL_GPL(vb2_get_contig_userptr);
+EXPORT_SYMBOL(vb2_create_framevec);
+
+/**
+ * vb2_destroy_framevec() - release vector of mapped pfns
+ * @vec: vector of pfns / pages to release
+ *
+ * This releases references to all pages in the vector @vec (if corresponding
+ * pfns are backed by pages) and frees the passed vector.
+ */
+void vb2_destroy_framevec(struct frame_vector *vec)
+{
+ put_vaddr_frames(vec);
+ frame_vector_destroy(vec);
+}
+EXPORT_SYMBOL(vb2_destroy_framevec);
/**
* vb2_common_vm_open() - increase refcount of the vma
diff --git a/drivers/media/v4l2-core/videobuf2-vmalloc.c b/drivers/media/v4l2-core/videobuf2-vmalloc.c
index 2fe4c27..ecb8f0c 100644
--- a/drivers/media/v4l2-core/videobuf2-vmalloc.c
+++ b/drivers/media/v4l2-core/videobuf2-vmalloc.c
@@ -23,11 +23,9 @@
struct vb2_vmalloc_buf {
void *vaddr;
- struct page **pages;
- struct vm_area_struct *vma;
+ struct frame_vector *vec;
enum dma_data_direction dma_dir;
unsigned long size;
- unsigned int n_pages;
atomic_t refcount;
struct vb2_vmarea_handler handler;
struct dma_buf *dbuf;
@@ -76,10 +74,8 @@
enum dma_data_direction dma_dir)
{
struct vb2_vmalloc_buf *buf;
- unsigned long first, last;
- int n_pages, offset;
- struct vm_area_struct *vma;
- dma_addr_t physp;
+ struct frame_vector *vec;
+ int n_pages, offset, i;
buf = kzalloc(sizeof(*buf), GFP_KERNEL);
if (!buf)
@@ -88,51 +84,36 @@
buf->dma_dir = dma_dir;
offset = vaddr & ~PAGE_MASK;
buf->size = size;
+ vec = vb2_create_framevec(vaddr, size, dma_dir == DMA_FROM_DEVICE);
+ if (IS_ERR(vec))
+ goto fail_pfnvec_create;
+ buf->vec = vec;
+ n_pages = frame_vector_count(vec);
+ if (frame_vector_to_pages(vec) < 0) {
+ unsigned long *nums = frame_vector_pfns(vec);
-
- vma = find_vma(current->mm, vaddr);
- if (vma && (vma->vm_flags & VM_PFNMAP) && (vma->vm_pgoff)) {
- if (vb2_get_contig_userptr(vaddr, size, &vma, &physp))
- goto fail_pages_array_alloc;
- buf->vma = vma;
- buf->vaddr = (__force void *)ioremap_nocache(physp, size);
- if (!buf->vaddr)
- goto fail_pages_array_alloc;
+ /*
+ * We cannot get page pointers for these pfns. Check memory is
+ * physically contiguous and use direct mapping.
+ */
+ for (i = 1; i < n_pages; i++)
+ if (nums[i-1] + 1 != nums[i])
+ goto fail_map;
+ buf->vaddr = (__force void *)
+ ioremap_nocache(nums[0] << PAGE_SHIFT, size);
} else {
- first = vaddr >> PAGE_SHIFT;
- last = (vaddr + size - 1) >> PAGE_SHIFT;
- buf->n_pages = last - first + 1;
- buf->pages = kzalloc(buf->n_pages * sizeof(struct page *),
- GFP_KERNEL);
- if (!buf->pages)
- goto fail_pages_array_alloc;
-
- /* current->mm->mmap_sem is taken by videobuf2 core */
- n_pages = get_user_pages(current, current->mm,
- vaddr & PAGE_MASK, buf->n_pages,
- dma_dir == DMA_FROM_DEVICE,
- 1, /* force */
- buf->pages, NULL);
- if (n_pages != buf->n_pages)
- goto fail_get_user_pages;
-
- buf->vaddr = vm_map_ram(buf->pages, buf->n_pages, -1,
+ buf->vaddr = vm_map_ram(frame_vector_pages(vec), n_pages, -1,
PAGE_KERNEL);
- if (!buf->vaddr)
- goto fail_get_user_pages;
}
+ if (!buf->vaddr)
+ goto fail_map;
buf->vaddr += offset;
return buf;
-fail_get_user_pages:
- pr_debug("get_user_pages requested/got: %d/%d]\n", n_pages,
- buf->n_pages);
- while (--n_pages >= 0)
- put_page(buf->pages[n_pages]);
- kfree(buf->pages);
-
-fail_pages_array_alloc:
+fail_map:
+ vb2_destroy_framevec(vec);
+fail_pfnvec_create:
kfree(buf);
return NULL;
@@ -143,20 +124,21 @@
struct vb2_vmalloc_buf *buf = buf_priv;
unsigned long vaddr = (unsigned long)buf->vaddr & PAGE_MASK;
unsigned int i;
+ struct page **pages;
+ unsigned int n_pages;
- if (buf->pages) {
+ if (!buf->vec->is_pfns) {
+ n_pages = frame_vector_count(buf->vec);
+ pages = frame_vector_pages(buf->vec);
if (vaddr)
- vm_unmap_ram((void *)vaddr, buf->n_pages);
- for (i = 0; i < buf->n_pages; ++i) {
- if (buf->dma_dir == DMA_FROM_DEVICE)
- set_page_dirty_lock(buf->pages[i]);
- put_page(buf->pages[i]);
- }
- kfree(buf->pages);
+ vm_unmap_ram((void *)vaddr, n_pages);
+ if (buf->dma_dir == DMA_FROM_DEVICE)
+ for (i = 0; i < n_pages; i++)
+ set_page_dirty_lock(pages[i]);
} else {
- vb2_put_vma(buf->vma);
iounmap((__force void __iomem *)buf->vaddr);
}
+ vb2_destroy_framevec(buf->vec);
kfree(buf);
}
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index c49d244..70e62d6 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -418,7 +418,7 @@
kfree(dma_map);
}
-static struct vm_operations_struct genwqe_vma_ops = {
+static const struct vm_operations_struct genwqe_vma_ops = {
.open = genwqe_vma_open,
.close = genwqe_vma_close,
};
diff --git a/drivers/misc/mei/wd.c b/drivers/misc/mei/wd.c
index 2bc0f50..b346638 100644
--- a/drivers/misc/mei/wd.c
+++ b/drivers/misc/mei/wd.c
@@ -364,6 +364,7 @@
int ret;
+ amt_wd_dev.parent = dev->dev;
/* unlock to perserve correct locking order */
mutex_unlock(&dev->device_lock);
ret = watchdog_register_device(&amt_wd_dev);
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 289e2044..9d56515 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -418,7 +418,7 @@
core_writel(priv, port, CORE_FAST_AGE_PORT);
reg = core_readl(priv, CORE_FAST_AGE_CTRL);
- reg |= EN_AGE_PORT | FAST_AGE_STR_DONE;
+ reg |= EN_AGE_PORT | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE;
core_writel(priv, reg, CORE_FAST_AGE_CTRL);
do {
@@ -432,6 +432,8 @@
if (!timeout)
return -ETIMEDOUT;
+ core_writel(priv, 0, CORE_FAST_AGE_CTRL);
+
return 0;
}
@@ -507,7 +509,7 @@
u32 reg;
reg = core_readl(priv, CORE_G_PCTL_PORT(port));
- cur_hw_state = reg >> G_MISTP_STATE_SHIFT;
+ cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT);
switch (state) {
case BR_STATE_DISABLED:
@@ -531,10 +533,12 @@
}
/* Fast-age ARL entries if we are moving a port from Learning or
- * Forwarding state to Disabled, Blocking or Listening state
+ * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening
+ * state (hw_state)
*/
if (cur_hw_state != hw_state) {
- if (cur_hw_state & 4 && !(hw_state & 4)) {
+ if (cur_hw_state >= G_MISTP_LEARN_STATE &&
+ hw_state <= G_MISTP_LISTEN_STATE) {
ret = bcm_sf2_sw_fast_age_port(ds, port);
if (ret) {
pr_err("%s: fast-ageing failed\n", __func__);
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 22e2ebf..789d7b7 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -112,8 +112,8 @@
spin_unlock(&priv->indir_lock); \
return (u64)indir << 32 | dir; \
} \
-static inline void name##_writeq(struct bcm_sf2_priv *priv, u32 off, \
- u64 val) \
+static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val, \
+ u32 off) \
{ \
spin_lock(&priv->indir_lock); \
reg_writel(priv, upper_32_bits(val), REG_DIR_DATA_WRITE); \
diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index d54b740..c2daaf0 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -117,6 +117,11 @@
.port_join_bridge = mv88e6xxx_join_bridge,
.port_leave_bridge = mv88e6xxx_leave_bridge,
.port_stp_update = mv88e6xxx_port_stp_update,
+ .port_pvid_get = mv88e6xxx_port_pvid_get,
+ .port_pvid_set = mv88e6xxx_port_pvid_set,
+ .port_vlan_add = mv88e6xxx_port_vlan_add,
+ .port_vlan_del = mv88e6xxx_port_vlan_del,
+ .vlan_getnext = mv88e6xxx_vlan_getnext,
.port_fdb_add = mv88e6xxx_port_fdb_add,
.port_fdb_del = mv88e6xxx_port_fdb_del,
.port_fdb_getnext = mv88e6xxx_port_fdb_getnext,
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index da48e66..fe64482 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -511,8 +511,7 @@
if (rxcomplete < budget) {
- napi_gro_flush(napi, false);
- __napi_complete(napi);
+ napi_complete(napi);
netdev_dbg(priv->dev,
"NAPI Complete, did %d packets with budget %d\n",
@@ -1518,6 +1517,7 @@
spin_lock_init(&priv->tx_lock);
spin_lock_init(&priv->rxdma_irq_lock);
+ netif_carrier_off(ndev);
ret = register_netdev(ndev);
if (ret) {
dev_err(&pdev->dev, "failed to register TSE net device\n");
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 0660dee..f683d97 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -818,10 +818,9 @@
INIT_LIST_HEAD(&lio->glist);
for (i = 0; i < lio->tx_qsize; i++) {
- g = kmalloc(sizeof(*g), GFP_KERNEL);
+ g = kzalloc(sizeof(*g), GFP_KERNEL);
if (!g)
break;
- memset(g, 0, sizeof(struct octnic_gather));
g->sg_size =
((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index eb22d58..f5dcde2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4568,28 +4568,23 @@
static int get_chip_type(struct pci_dev *pdev, u32 pl_rev)
{
- int ver, chip;
u16 device_id;
/* Retrieve adapter's device ID */
pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
- ver = device_id >> 12;
- switch (ver) {
+
+ switch (device_id >> 12) {
case CHELSIO_T4:
- chip |= CHELSIO_CHIP_CODE(CHELSIO_T4, pl_rev);
- break;
+ return CHELSIO_CHIP_CODE(CHELSIO_T4, pl_rev);
case CHELSIO_T5:
- chip |= CHELSIO_CHIP_CODE(CHELSIO_T5, pl_rev);
- break;
+ return CHELSIO_CHIP_CODE(CHELSIO_T5, pl_rev);
case CHELSIO_T6:
- chip |= CHELSIO_CHIP_CODE(CHELSIO_T6, pl_rev);
- break;
+ return CHELSIO_CHIP_CODE(CHELSIO_T6, pl_rev);
default:
dev_err(&pdev->dev, "Device %d is not supported\n",
device_id);
- return -EINVAL;
}
- return chip;
+ return -EINVAL;
}
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -4724,8 +4719,6 @@
err = -ENOMEM;
goto out_free_adapter;
}
- t4_write_reg(adapter, SGE_STAT_CFG_A,
- STATSOURCE_T5_V(7) | STATMODE_V(0));
}
setup_memwin(adapter);
@@ -4737,6 +4730,11 @@
if (err)
goto out_unmap_bar;
+ /* configure SGE_STAT_CFG_A to read WC stats */
+ if (!is_t4(adapter->params.chip))
+ t4_write_reg(adapter, SGE_STAT_CFG_A,
+ STATSOURCE_T5_V(7) | STATMODE_V(0));
+
for_each_port(adapter, i) {
struct net_device *netdev;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 78f446c..9162746 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -807,7 +807,7 @@
* message or, if we're doing a Large Send Offload, an LSO CPL message
* with an embedded TX Packet Write CPL message.
*/
- flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4;
+ flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
if (skb_shinfo(skb)->gso_size)
flits += (sizeof(struct fw_eth_tx_pkt_wr) +
sizeof(struct cpl_tx_pkt_lso_core) +
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ab46746..a32de30 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -762,8 +762,6 @@
struct fw_ldst_cmd {
__be32 op_to_addrspace;
-#define FW_LDST_CMD_ADDRSPACE_S 0
-#define FW_LDST_CMD_ADDRSPACE_V(x) ((x) << FW_LDST_CMD_ADDRSPACE_S)
__be32 cycles_to_len16;
union fw_ldst {
struct fw_ldst_addrval {
@@ -788,6 +786,13 @@
__be16 vctl;
__be16 rval;
} mdio;
+ struct fw_ldst_cim_rq {
+ u8 req_first64[8];
+ u8 req_second64[8];
+ u8 resp_first64[8];
+ u8 resp_second64[8];
+ __be32 r3[2];
+ } cim_rq;
union fw_ldst_mps {
struct fw_ldst_mps_rplc {
__be16 fid_idx;
@@ -828,9 +833,33 @@
__be16 nset_pkd;
__be32 data[12];
} pcie;
+ struct fw_ldst_i2c_deprecated {
+ u8 pid_pkd;
+ u8 base;
+ u8 boffset;
+ u8 data;
+ __be32 r9;
+ } i2c_deprecated;
+ struct fw_ldst_i2c {
+ u8 pid;
+ u8 did;
+ u8 boffset;
+ u8 blen;
+ __be32 r9;
+ __u8 data[48];
+ } i2c;
+ struct fw_ldst_le {
+ __be32 index;
+ __be32 r9;
+ u8 val[33];
+ u8 r11[7];
+ } le;
} u;
};
+#define FW_LDST_CMD_ADDRSPACE_S 0
+#define FW_LDST_CMD_ADDRSPACE_V(x) ((x) << FW_LDST_CMD_ADDRSPACE_S)
+
#define FW_LDST_CMD_MSG_S 31
#define FW_LDST_CMD_MSG_V(x) ((x) << FW_LDST_CMD_MSG_S)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index 92bafa7..c4b262c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -36,8 +36,8 @@
#define __T4FW_VERSION_H__
#define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x0D
-#define T4FW_VERSION_MICRO 0x20
+#define T4FW_VERSION_MINOR 0x0E
+#define T4FW_VERSION_MICRO 0x04
#define T4FW_VERSION_BUILD 0x00
#define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
#define T4FW_MIN_VERSION_MICRO 0x00
#define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x0D
-#define T5FW_VERSION_MICRO 0x20
+#define T5FW_VERSION_MINOR 0x0E
+#define T5FW_VERSION_MICRO 0x04
#define T5FW_VERSION_BUILD 0x00
#define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
#define T5FW_MIN_VERSION_MICRO 0x00
#define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x0D
-#define T6FW_VERSION_MICRO 0x2D
+#define T6FW_VERSION_MINOR 0x0E
+#define T6FW_VERSION_MICRO 0x04
#define T6FW_VERSION_BUILD 0x00
#define T6FW_MIN_VERSION_MAJOR 0x00
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index c0a7813..cf94b72 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1226,7 +1226,7 @@
if (int_status & ISR_PRS)
dm9000_rx(dev);
- /* Trnasmit Interrupt check */
+ /* Transmit Interrupt check */
if (int_status & ISR_PTS)
dm9000_tx_done(dev, db);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 3be1fbd..eb32391 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1968,7 +1968,7 @@
memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN);
}
- status = be_mcc_notify(adapter);
+ status = be_mcc_notify_wait(adapter);
err:
spin_unlock_bh(&adapter->mcc_lock);
return status;
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 442410c..a2c96fd 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1132,10 +1132,6 @@
memcpy(netdev->dev_addr, pdata->hwaddr, IFHWADDRLEN);
priv->phy_id = pdata->phy_id;
} else {
- priv->phy_id = -1;
-
-#ifdef CONFIG_OF
- {
const uint8_t *mac;
mac = of_get_property(pdev->dev.of_node,
@@ -1143,8 +1139,7 @@
NULL);
if (mac)
memcpy(netdev->dev_addr, mac, IFHWADDRLEN);
- }
-#endif
+ priv->phy_id = -1;
}
/* Check that the given MAC address is valid. If it isn't, read the
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 91925e3..dd4ca39 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1816,11 +1816,13 @@
struct fec_enet_private *fep = bus->priv;
struct device *dev = &fep->pdev->dev;
unsigned long time_left;
- int ret = 0;
+ int ret;
ret = pm_runtime_get_sync(dev);
if (ret < 0)
return ret;
+ else
+ ret = 0;
fep->mii_timeout = 0;
reinit_completion(&fep->mdio_done);
@@ -3029,6 +3031,14 @@
memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
}
+ /* Add netif status check here to avoid system hang in below case:
+ * ifconfig ethx down; ifconfig ethx hw ether xx:xx:xx:xx:xx:xx;
+ * After ethx down, fec all clocks are gated off and then register
+ * access causes system hang.
+ */
+ if (!netif_running(ndev))
+ return 0;
+
writel(ndev->dev_addr[3] | (ndev->dev_addr[2] << 8) |
(ndev->dev_addr[1] << 16) | (ndev->dev_addr[0] << 24),
fep->hwp + FEC_ADDR_LOW);
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 6e9a792..060dd39 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -583,7 +583,7 @@
atomic_set(&txring->next_to_clean, 0);
atomic_set(&txring->nr_free, jme->tx_ring_size);
- txring->bufinf = kmalloc(sizeof(struct jme_buffer_info) *
+ txring->bufinf = kzalloc(sizeof(struct jme_buffer_info) *
jme->tx_ring_size, GFP_ATOMIC);
if (unlikely(!(txring->bufinf)))
goto err_free_txring;
@@ -592,8 +592,6 @@
* Initialize Transmit Descriptors
*/
memset(txring->alloc, 0, TX_RING_ALLOC_SIZE(jme->tx_ring_size));
- memset(txring->bufinf, 0,
- sizeof(struct jme_buffer_info) * jme->tx_ring_size);
return 0;
@@ -845,7 +843,7 @@
rxring->next_to_use = 0;
atomic_set(&rxring->next_to_clean, 0);
- rxring->bufinf = kmalloc(sizeof(struct jme_buffer_info) *
+ rxring->bufinf = kzalloc(sizeof(struct jme_buffer_info) *
jme->rx_ring_size, GFP_ATOMIC);
if (unlikely(!(rxring->bufinf)))
goto err_free_rxring;
@@ -853,8 +851,6 @@
/*
* Initiallize Receive Descriptors
*/
- memset(rxring->bufinf, 0,
- sizeof(struct jme_buffer_info) * jme->rx_ring_size);
for (i = 0 ; i < jme->rx_ring_size ; ++i) {
if (unlikely(jme_make_new_rx_buf(jme, i))) {
jme_free_rx_resources(jme);
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index d52639b..960169e 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1859,14 +1859,11 @@
return;
}
- mc_spec = kmalloc(0x200, GFP_ATOMIC);
+ mc_spec = kzalloc(0x200, GFP_ATOMIC);
if (mc_spec == NULL)
goto oom;
mc_other = mc_spec + (0x100 >> 2);
- memset(mc_spec, 0, 0x100);
- memset(mc_other, 0, 0x100);
-
netdev_for_each_mc_addr(ha, dev) {
u8 *a = ha->addr;
u32 *table;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 5ab3adf..9f0bdd9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -918,8 +918,6 @@
mbx->req.arg = NULL;
return -ENOMEM;
}
- memset(mbx->req.arg, 0, sizeof(u32) * mbx->req.num);
- memset(mbx->rsp.arg, 0, sizeof(u32) * mbx->rsp.num);
temp = adapter->ahw->fw_hal_version << 29;
mbx->req.arg[0] = (type | (mbx->req.num << 16) | temp);
mbx->cmd_op = type;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index 6e6f18f..a5f422f 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -73,8 +73,6 @@
mbx->req.arg = NULL;
return -ENOMEM;
}
- memset(mbx->req.arg, 0, sizeof(u32) * mbx->req.num);
- memset(mbx->rsp.arg, 0, sizeof(u32) * mbx->rsp.num);
mbx->req.arg[0] = type;
break;
}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 546cd5f..7327b72 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -729,8 +729,6 @@
mbx->req.arg = NULL;
return -ENOMEM;
}
- memset(mbx->req.arg, 0, sizeof(u32) * mbx->req.num);
- memset(mbx->rsp.arg, 0, sizeof(u32) * mbx->rsp.num);
mbx->req.arg[0] = (type | (mbx->req.num << 16) |
(3 << 29));
mbx->rsp.arg[0] = (type & 0xffff) | mbx->rsp.num << 16;
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 24dcbe6..2b32e0c 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -833,7 +833,8 @@
unsigned features;
struct mii_if_info mii;
- struct rtl8169_counters counters;
+ dma_addr_t counters_phys_addr;
+ struct rtl8169_counters *counters;
struct rtl8169_tc_offsets tc_offset;
u32 saved_wolopts;
u32 opts1_mask;
@@ -2190,53 +2191,37 @@
}
}
-static struct rtl8169_counters *rtl8169_map_counters(struct net_device *dev,
- dma_addr_t *paddr,
- u32 counter_cmd)
+DECLARE_RTL_COND(rtl_counters_cond)
{
- struct rtl8169_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp->mmio_addr;
- struct device *d = &tp->pci_dev->dev;
- struct rtl8169_counters *counters;
- u32 cmd;
- counters = dma_alloc_coherent(d, sizeof(*counters), paddr, GFP_KERNEL);
- if (counters) {
- RTL_W32(CounterAddrHigh, (u64)*paddr >> 32);
- cmd = (u64)*paddr & DMA_BIT_MASK(32);
- RTL_W32(CounterAddrLow, cmd);
- RTL_W32(CounterAddrLow, cmd | counter_cmd);
- }
- return counters;
+ return RTL_R32(CounterAddrLow) & (CounterReset | CounterDump);
}
-static void rtl8169_unmap_counters (struct net_device *dev,
- dma_addr_t paddr,
- struct rtl8169_counters *counters)
+static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
{
struct rtl8169_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp->mmio_addr;
- struct device *d = &tp->pci_dev->dev;
+ dma_addr_t paddr = tp->counters_phys_addr;
+ u32 cmd;
+ bool ret;
+
+ RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
+ cmd = (u64)paddr & DMA_BIT_MASK(32);
+ RTL_W32(CounterAddrLow, cmd);
+ RTL_W32(CounterAddrLow, cmd | counter_cmd);
+
+ ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
RTL_W32(CounterAddrLow, 0);
RTL_W32(CounterAddrHigh, 0);
- dma_free_coherent(d, sizeof(*counters), counters, paddr);
-}
-
-DECLARE_RTL_COND(rtl_reset_counters_cond)
-{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(CounterAddrLow) & CounterReset;
+ return ret;
}
static bool rtl8169_reset_counters(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- struct rtl8169_counters *counters;
- dma_addr_t paddr;
- bool ret = true;
/*
* Versions prior to RTL_GIGA_MAC_VER_19 don't support resetting the
@@ -2245,32 +2230,13 @@
if (tp->mac_version < RTL_GIGA_MAC_VER_19)
return true;
- counters = rtl8169_map_counters(dev, &paddr, CounterReset);
- if (!counters)
- return false;
-
- if (!rtl_udelay_loop_wait_low(tp, &rtl_reset_counters_cond, 10, 1000))
- ret = false;
-
- rtl8169_unmap_counters(dev, paddr, counters);
-
- return ret;
-}
-
-DECLARE_RTL_COND(rtl_counters_cond)
-{
- void __iomem *ioaddr = tp->mmio_addr;
-
- return RTL_R32(CounterAddrLow) & CounterDump;
+ return rtl8169_do_counters(dev, CounterReset);
}
static bool rtl8169_update_counters(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp->mmio_addr;
- struct rtl8169_counters *counters;
- dma_addr_t paddr;
- bool ret = true;
/*
* Some chips are unable to dump tally counters when the receiver
@@ -2279,23 +2245,13 @@
if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0)
return true;
- counters = rtl8169_map_counters(dev, &paddr, CounterDump);
- if (!counters)
- return false;
-
- if (rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000))
- memcpy(&tp->counters, counters, sizeof(*counters));
- else
- ret = false;
-
- rtl8169_unmap_counters(dev, paddr, counters);
-
- return ret;
+ return rtl8169_do_counters(dev, CounterDump);
}
static bool rtl8169_init_counter_offsets(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
+ struct rtl8169_counters *counters = tp->counters;
bool ret = false;
/*
@@ -2323,9 +2279,9 @@
if (rtl8169_update_counters(dev))
ret = true;
- tp->tc_offset.tx_errors = tp->counters.tx_errors;
- tp->tc_offset.tx_multi_collision = tp->counters.tx_multi_collision;
- tp->tc_offset.tx_aborted = tp->counters.tx_aborted;
+ tp->tc_offset.tx_errors = counters->tx_errors;
+ tp->tc_offset.tx_multi_collision = counters->tx_multi_collision;
+ tp->tc_offset.tx_aborted = counters->tx_aborted;
tp->tc_offset.inited = true;
return ret;
@@ -2335,24 +2291,25 @@
struct ethtool_stats *stats, u64 *data)
{
struct rtl8169_private *tp = netdev_priv(dev);
+ struct rtl8169_counters *counters = tp->counters;
ASSERT_RTNL();
rtl8169_update_counters(dev);
- data[0] = le64_to_cpu(tp->counters.tx_packets);
- data[1] = le64_to_cpu(tp->counters.rx_packets);
- data[2] = le64_to_cpu(tp->counters.tx_errors);
- data[3] = le32_to_cpu(tp->counters.rx_errors);
- data[4] = le16_to_cpu(tp->counters.rx_missed);
- data[5] = le16_to_cpu(tp->counters.align_errors);
- data[6] = le32_to_cpu(tp->counters.tx_one_collision);
- data[7] = le32_to_cpu(tp->counters.tx_multi_collision);
- data[8] = le64_to_cpu(tp->counters.rx_unicast);
- data[9] = le64_to_cpu(tp->counters.rx_broadcast);
- data[10] = le32_to_cpu(tp->counters.rx_multicast);
- data[11] = le16_to_cpu(tp->counters.tx_aborted);
- data[12] = le16_to_cpu(tp->counters.tx_underun);
+ data[0] = le64_to_cpu(counters->tx_packets);
+ data[1] = le64_to_cpu(counters->rx_packets);
+ data[2] = le64_to_cpu(counters->tx_errors);
+ data[3] = le32_to_cpu(counters->rx_errors);
+ data[4] = le16_to_cpu(counters->rx_missed);
+ data[5] = le16_to_cpu(counters->align_errors);
+ data[6] = le32_to_cpu(counters->tx_one_collision);
+ data[7] = le32_to_cpu(counters->tx_multi_collision);
+ data[8] = le64_to_cpu(counters->rx_unicast);
+ data[9] = le64_to_cpu(counters->rx_broadcast);
+ data[10] = le32_to_cpu(counters->rx_multicast);
+ data[11] = le16_to_cpu(counters->tx_aborted);
+ data[12] = le16_to_cpu(counters->tx_underun);
}
static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -7780,6 +7737,7 @@
{
struct rtl8169_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp->mmio_addr;
+ struct rtl8169_counters *counters = tp->counters;
unsigned int start;
if (netif_running(dev))
@@ -7816,11 +7774,11 @@
* Subtract values fetched during initalization.
* See rtl8169_init_counter_offsets for a description why we do that.
*/
- stats->tx_errors = le64_to_cpu(tp->counters.tx_errors) -
+ stats->tx_errors = le64_to_cpu(counters->tx_errors) -
le64_to_cpu(tp->tc_offset.tx_errors);
- stats->collisions = le32_to_cpu(tp->counters.tx_multi_collision) -
+ stats->collisions = le32_to_cpu(counters->tx_multi_collision) -
le32_to_cpu(tp->tc_offset.tx_multi_collision);
- stats->tx_aborted_errors = le16_to_cpu(tp->counters.tx_aborted) -
+ stats->tx_aborted_errors = le16_to_cpu(counters->tx_aborted) -
le16_to_cpu(tp->tc_offset.tx_aborted);
return stats;
@@ -8022,6 +7980,9 @@
unregister_netdev(dev);
+ dma_free_coherent(&tp->pci_dev->dev, sizeof(*tp->counters),
+ tp->counters, tp->counters_phys_addr);
+
rtl_release_firmware(tp);
if (pci_dev_run_wake(pdev))
@@ -8447,9 +8408,16 @@
tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
+ tp->counters = dma_alloc_coherent (&pdev->dev, sizeof(*tp->counters),
+ &tp->counters_phys_addr, GFP_KERNEL);
+ if (!tp->counters) {
+ rc = -ENOMEM;
+ goto err_out_msi_4;
+ }
+
rc = register_netdev(dev);
if (rc < 0)
- goto err_out_msi_4;
+ goto err_out_cnt_5;
pci_set_drvdata(pdev, dev);
@@ -8483,6 +8451,9 @@
out:
return rc;
+err_out_cnt_5:
+ dma_free_coherent(&pdev->dev, sizeof(*tp->counters), tp->counters,
+ tp->counters_phys_addr);
err_out_msi_4:
netif_napi_del(&tp->napi);
rtl_disable_msi(pdev, tp);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 864b476..925f2f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -837,8 +837,11 @@
interface);
}
- if (IS_ERR(phydev)) {
+ if (IS_ERR_OR_NULL(phydev)) {
pr_err("%s: Could not attach to PHY\n", dev->name);
+ if (!phydev)
+ return -ENODEV;
+
return PTR_ERR(phydev);
}
diff --git a/drivers/net/ethernet/synopsys/Kconfig b/drivers/net/ethernet/synopsys/Kconfig
index a8f3151..8276ee5 100644
--- a/drivers/net/ethernet/synopsys/Kconfig
+++ b/drivers/net/ethernet/synopsys/Kconfig
@@ -20,7 +20,7 @@
select PHYLIB
select CRC32
select MII
- depends on OF
+ depends on OF && HAS_DMA
---help---
This driver supports the DWC Ethernet QoS from Synopsys
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index d8757bf..a9acf71 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -61,11 +61,21 @@
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Intel Corporation");
+/* Time in usecs for tx resource reaper */
+static unsigned int tx_time = 1;
+
+/* Number of descriptors to free before resuming tx */
+static unsigned int tx_start = 10;
+
+/* Number of descriptors still available before stop upper layer tx */
+static unsigned int tx_stop = 5;
+
struct ntb_netdev {
struct list_head list;
struct pci_dev *pdev;
struct net_device *ndev;
struct ntb_transport_qp *qp;
+ struct timer_list tx_timer;
};
#define NTB_TX_TIMEOUT_MS 1000
@@ -136,11 +146,42 @@
}
}
+static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev,
+ struct ntb_transport_qp *qp, int size)
+{
+ struct ntb_netdev *dev = netdev_priv(netdev);
+
+ netif_stop_queue(netdev);
+ /* Make sure to see the latest value of ntb_transport_tx_free_entry()
+ * since the queue was last started.
+ */
+ smp_mb();
+
+ if (likely(ntb_transport_tx_free_entry(qp) < size)) {
+ mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
+ return -EBUSY;
+ }
+
+ netif_start_queue(netdev);
+ return 0;
+}
+
+static int ntb_netdev_maybe_stop_tx(struct net_device *ndev,
+ struct ntb_transport_qp *qp, int size)
+{
+ if (netif_queue_stopped(ndev) ||
+ (ntb_transport_tx_free_entry(qp) >= size))
+ return 0;
+
+ return __ntb_netdev_maybe_stop_tx(ndev, qp, size);
+}
+
static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len)
{
struct net_device *ndev = qp_data;
struct sk_buff *skb;
+ struct ntb_netdev *dev = netdev_priv(ndev);
skb = data;
if (!skb || !ndev)
@@ -155,6 +196,15 @@
}
dev_kfree_skb(skb);
+
+ if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) {
+ /* Make sure anybody stopping the queue after this sees the new
+ * value of ntb_transport_tx_free_entry()
+ */
+ smp_mb();
+ if (netif_queue_stopped(ndev))
+ netif_wake_queue(ndev);
+ }
}
static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
@@ -163,10 +213,15 @@
struct ntb_netdev *dev = netdev_priv(ndev);
int rc;
+ ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
+
rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
if (rc)
goto err;
+ /* check for next submit */
+ ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
+
return NETDEV_TX_OK;
err:
@@ -175,6 +230,23 @@
return NETDEV_TX_BUSY;
}
+static void ntb_netdev_tx_timer(unsigned long data)
+{
+ struct net_device *ndev = (struct net_device *)data;
+ struct ntb_netdev *dev = netdev_priv(ndev);
+
+ if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) {
+ mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time));
+ } else {
+ /* Make sure anybody stopping the queue after this sees the new
+ * value of ntb_transport_tx_free_entry()
+ */
+ smp_mb();
+ if (netif_queue_stopped(ndev))
+ netif_wake_queue(ndev);
+ }
+}
+
static int ntb_netdev_open(struct net_device *ndev)
{
struct ntb_netdev *dev = netdev_priv(ndev);
@@ -197,8 +269,11 @@
}
}
+ setup_timer(&dev->tx_timer, ntb_netdev_tx_timer, (unsigned long)ndev);
+
netif_carrier_off(ndev);
ntb_transport_link_up(dev->qp);
+ netif_start_queue(ndev);
return 0;
@@ -219,6 +294,8 @@
while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
dev_kfree_skb(skb);
+ del_timer_sync(&dev->tx_timer);
+
return 0;
}
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index c07030d..c5ad98a 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -127,6 +127,11 @@
---help---
Currently supports the DP83867 PHY.
+config MICROCHIP_PHY
+ tristate "Drivers for Microchip PHYs"
+ help
+ Supports the LAN88XX PHYs.
+
config FIXED_PHY
tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
depends on PHYLIB
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 9bb1033..87f079c 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -37,3 +37,4 @@
obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o
obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o
obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o
+obj-$(CONFIG_MICROCHIP_PHY) += microchip.o
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 12c7eb2..fb1299c 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -325,7 +325,7 @@
phy_addr = phy_fixed_addr++;
spin_unlock(&phy_fixed_addr_lock);
- ret = fixed_phy_add(PHY_POLL, phy_addr, status, link_gpio);
+ ret = fixed_phy_add(irq, phy_addr, status, link_gpio);
if (ret < 0)
return ERR_PTR(ret);
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
new file mode 100644
index 0000000..c0a20eb
--- /dev/null
+++ b/drivers/net/phy/microchip.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 Microchip Technology
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+#include <linux/microchipphy.h>
+
+#define DRIVER_AUTHOR "WOOJUNG HUH <woojung.huh@microchip.com>"
+#define DRIVER_DESC "Microchip LAN88XX PHY driver"
+
+struct lan88xx_priv {
+ int chip_id;
+ int chip_rev;
+ __u32 wolopts;
+};
+
+static int lan88xx_phy_config_intr(struct phy_device *phydev)
+{
+ int rc;
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+ /* unmask all source and clear them before enable */
+ rc = phy_write(phydev, LAN88XX_INT_MASK, 0x7FFF);
+ rc = phy_read(phydev, LAN88XX_INT_STS);
+ rc = phy_write(phydev, LAN88XX_INT_MASK,
+ LAN88XX_INT_MASK_MDINTPIN_EN_ |
+ LAN88XX_INT_MASK_LINK_CHANGE_);
+ } else {
+ rc = phy_write(phydev, LAN88XX_INT_MASK, 0);
+ }
+
+ return rc < 0 ? rc : 0;
+}
+
+static int lan88xx_phy_ack_interrupt(struct phy_device *phydev)
+{
+ int rc = phy_read(phydev, LAN88XX_INT_STS);
+
+ return rc < 0 ? rc : 0;
+}
+
+int lan88xx_suspend(struct phy_device *phydev)
+{
+ struct lan88xx_priv *priv = phydev->priv;
+
+ /* do not power down PHY when WOL is enabled */
+ if (!priv->wolopts)
+ genphy_suspend(phydev);
+
+ return 0;
+}
+
+static int lan88xx_probe(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->dev;
+ struct lan88xx_priv *priv;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->wolopts = 0;
+
+ /* these values can be used to identify internal PHY */
+ priv->chip_id = phy_read_mmd_indirect(phydev, LAN88XX_MMD3_CHIP_ID,
+ 3, phydev->addr);
+ priv->chip_rev = phy_read_mmd_indirect(phydev, LAN88XX_MMD3_CHIP_REV,
+ 3, phydev->addr);
+
+ phydev->priv = priv;
+
+ return 0;
+}
+
+static void lan88xx_remove(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->dev;
+ struct lan88xx_priv *priv = phydev->priv;
+
+ if (priv)
+ devm_kfree(dev, priv);
+}
+
+static int lan88xx_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ struct lan88xx_priv *priv = phydev->priv;
+
+ priv->wolopts = wol->wolopts;
+
+ return 0;
+}
+
+static struct phy_driver microchip_phy_driver[] = {
+{
+ .phy_id = 0x0007c130,
+ .phy_id_mask = 0xfffffff0,
+ .name = "Microchip LAN88xx",
+
+ .features = (PHY_GBIT_FEATURES |
+ SUPPORTED_Pause | SUPPORTED_Asym_Pause),
+ .flags = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+
+ .probe = lan88xx_probe,
+ .remove = lan88xx_remove,
+
+ .config_init = genphy_config_init,
+ .config_aneg = genphy_config_aneg,
+ .read_status = genphy_read_status,
+
+ .ack_interrupt = lan88xx_phy_ack_interrupt,
+ .config_intr = lan88xx_phy_config_intr,
+
+ .suspend = lan88xx_suspend,
+ .resume = genphy_resume,
+ .set_wol = lan88xx_set_wol,
+
+ .driver = { .owner = THIS_MODULE, }
+} };
+
+module_phy_driver(microchip_phy_driver);
+
+static struct mdio_device_id __maybe_unused microchip_tbl[] = {
+ { 0x0007c130, 0xfffffff0 },
+ { }
+};
+
+MODULE_DEVICE_TABLE(mdio, microchip_tbl);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 39364a4..a39518f 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1049,8 +1049,7 @@
{
struct mii_if_info *mii = &dev->mii;
struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
- u16 ladv, radv;
- int ret;
+ int ladv, radv, ret;
u32 buf;
/* clear PHY interrupt status */
@@ -1104,12 +1103,12 @@
}
ladv = lan78xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE);
- if (unlikely(ladv < 0))
- return -EIO;
+ if (ladv < 0)
+ return ladv;
radv = lan78xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
- if (unlikely(radv < 0))
- return -EIO;
+ if (radv < 0)
+ return radv;
netif_dbg(dev, link, dev->net,
"speed: %u duplex: %d anadv: 0x%04x anlpa: 0x%04x",
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index fe4ec32..d9427ca 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -26,8 +26,13 @@
#include <linux/mdio.h>
#include <linux/usb/cdc.h>
-/* Version Information */
-#define DRIVER_VERSION "v1.08.1 (2015/07/28)"
+/* Information for net-next */
+#define NETNEXT_VERSION "08"
+
+/* Information for net */
+#define NET_VERSION "2"
+
+#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
#define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters"
#define MODULENAME "r8152"
@@ -143,6 +148,7 @@
#define OCP_EEE_ABLE 0xa5c4
#define OCP_EEE_ADV 0xa5d0
#define OCP_EEE_LPABLE 0xa5d2
+#define OCP_PHY_STATE 0xa708 /* nway state for 8153 */
#define OCP_ADC_CFG 0xbc06
/* SRAM Register */
@@ -427,6 +433,10 @@
/* OCP_DOWN_SPEED */
#define EN_10M_BGOFF 0x0080
+/* OCP_PHY_STATE */
+#define TXDIS_STATE 0x01
+#define ABD_STATE 0x02
+
/* OCP_ADC_CFG */
#define CKADSEL_L 0x0100
#define ADC_EN 0x0080
@@ -604,6 +614,7 @@
void (*unload)(struct r8152 *);
int (*eee_get)(struct r8152 *, struct ethtool_eee *);
int (*eee_set)(struct r8152 *, struct ethtool_eee *);
+ bool (*in_nway)(struct r8152 *);
} rtl_ops;
int intr_interval;
@@ -2941,6 +2952,32 @@
r8153_enable_aldps(tp);
}
+static bool rtl8152_in_nway(struct r8152 *tp)
+{
+ u16 nway_state;
+
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, 0x2000);
+ tp->ocp_base = 0x2000;
+ ocp_write_byte(tp, MCU_TYPE_PLA, 0xb014, 0x4c); /* phy state */
+ nway_state = ocp_read_word(tp, MCU_TYPE_PLA, 0xb01a);
+
+ /* bit 15: TXDIS_STATE, bit 14: ABD_STATE */
+ if (nway_state & 0xc000)
+ return false;
+ else
+ return true;
+}
+
+static bool rtl8153_in_nway(struct r8152 *tp)
+{
+ u16 phy_state = ocp_reg_read(tp, OCP_PHY_STATE) & 0xff;
+
+ if (phy_state == TXDIS_STATE || phy_state == ABD_STATE)
+ return false;
+ else
+ return true;
+}
+
static void set_carrier(struct r8152 *tp)
{
struct net_device *netdev = tp->netdev;
@@ -3405,6 +3442,27 @@
return 0;
}
+static bool delay_autosuspend(struct r8152 *tp)
+{
+ bool sw_linking = !!netif_carrier_ok(tp->netdev);
+ bool hw_linking = !!(rtl8152_get_speed(tp) & LINK_STATUS);
+
+ /* This means a linking change occurs and the driver doesn't detect it,
+ * yet. If the driver has disabled tx/rx and hw is linking on, the
+ * device wouldn't wake up by receiving any packet.
+ */
+ if (work_busy(&tp->schedule.work) || sw_linking != hw_linking)
+ return true;
+
+ /* If the linking down is occurred by nway, the device may miss the
+ * linking change event. And it wouldn't wake when linking on.
+ */
+ if (!sw_linking && tp->rtl_ops.in_nway(tp))
+ return true;
+ else
+ return false;
+}
+
static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
{
struct r8152 *tp = usb_get_intfdata(intf);
@@ -3414,7 +3472,7 @@
mutex_lock(&tp->control);
if (PMSG_IS_AUTO(message)) {
- if (netif_running(netdev) && work_busy(&tp->schedule.work)) {
+ if (netif_running(netdev) && delay_autosuspend(tp)) {
ret = -EBUSY;
goto out1;
}
@@ -4044,6 +4102,7 @@
ops->unload = rtl8152_unload;
ops->eee_get = r8152_get_eee;
ops->eee_set = r8152_set_eee;
+ ops->in_nway = rtl8152_in_nway;
break;
case RTL_VER_03:
@@ -4058,6 +4117,7 @@
ops->unload = rtl8153_unload;
ops->eee_get = r8153_get_eee;
ops->eee_set = r8153_set_eee;
+ ops->in_nway = rtl8153_in_nway;
break;
default:
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index e049857..b4cf107 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -428,12 +428,18 @@
old_state = entry->state;
entry->state = state;
__skb_unlink(skb, list);
- spin_unlock(&list->lock);
- spin_lock(&dev->done.lock);
+
+ /* defer_bh() is never called with list == &dev->done.
+ * spin_lock_nested() tells lockdep that it is OK to take
+ * dev->done.lock here with list->lock held.
+ */
+ spin_lock_nested(&dev->done.lock, SINGLE_DEPTH_NESTING);
+
__skb_queue_tail(&dev->done, skb);
if (dev->done.qlen == 1)
tasklet_schedule(&dev->bh);
- spin_unlock_irqrestore(&dev->done.lock, flags);
+ spin_unlock(&dev->done.lock);
+ spin_unlock_irqrestore(&list->lock, flags);
return old_state;
}
@@ -749,6 +755,20 @@
/*-------------------------------------------------------------------------*/
+static void wait_skb_queue_empty(struct sk_buff_head *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->lock, flags);
+ while (!skb_queue_empty(q)) {
+ spin_unlock_irqrestore(&q->lock, flags);
+ schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_lock_irqsave(&q->lock, flags);
+ }
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
// precondition: never called in_interrupt
static void usbnet_terminate_urbs(struct usbnet *dev)
{
@@ -762,14 +782,11 @@
unlink_urbs(dev, &dev->rxq);
/* maybe wait for deletions to finish. */
- while (!skb_queue_empty(&dev->rxq)
- && !skb_queue_empty(&dev->txq)
- && !skb_queue_empty(&dev->done)) {
- schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
- set_current_state(TASK_UNINTERRUPTIBLE);
- netif_dbg(dev, ifdown, dev->net,
- "waited for %d urb completions\n", temp);
- }
+ wait_skb_queue_empty(&dev->rxq);
+ wait_skb_queue_empty(&dev->txq);
+ wait_skb_queue_empty(&dev->done);
+ netif_dbg(dev, ifdown, dev->net,
+ "waited for %d urb completions\n", temp);
set_current_state(TASK_RUNNING);
remove_wait_queue(&dev->wait, &wait);
}
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ce988fd..cf8b7f0 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1223,7 +1223,6 @@
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct metadata_dst *tun_dst = NULL;
- struct ip_tunnel_info *info;
struct vxlan_sock *vs;
struct vxlanhdr *vxh;
u32 flags, vni;
@@ -1270,8 +1269,7 @@
if (!tun_dst)
goto drop;
- info = &tun_dst->u.tun_info;
- md = ip_tunnel_info_opts(info);
+ md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
} else {
memset(md, 0, sizeof(*md));
}
@@ -1286,7 +1284,7 @@
md->gbp = ntohs(gbp->policy_id);
if (tun_dst)
- info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+ tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
if (gbp->dont_learn)
md->gbp |= VXLAN_GBP_DONT_LEARN;
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index 758c4ba..8fef8d8 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -1358,6 +1358,8 @@
if( !slave_dev || !(slave_dev->flags & IFF_UP) ) {
netdev_err(dev, "trying to enslave non-active device %s\n",
slave_name);
+ if (slave_dev)
+ dev_put(slave_dev);
return -EPERM;
}
diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index 613ca2b..d1a1e16 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -156,6 +156,12 @@
.llseek = seq_lseek,
};
+static void wil_seq_hexdump(struct seq_file *s, void *p, int len,
+ const char *prefix)
+{
+ seq_hex_dump(s, prefix, DUMP_PREFIX_NONE, 16, 1, p, len, false);
+}
+
static void wil_print_ring(struct seq_file *s, const char *prefix,
void __iomem *off)
{
@@ -212,8 +218,6 @@
le16_to_cpu(hdr.seq), len,
le16_to_cpu(hdr.type), hdr.flags);
if (len <= MAX_MBOXITEM_SIZE) {
- int n = 0;
- char printbuf[16 * 3 + 2];
unsigned char databuf[MAX_MBOXITEM_SIZE];
void __iomem *src = wmi_buffer(wil, d.addr) +
sizeof(struct wil6210_mbox_hdr);
@@ -223,16 +227,7 @@
* reading header
*/
wil_memcpy_fromio_32(databuf, src, len);
- while (n < len) {
- int l = min(len - n, 16);
-
- hex_dump_to_buffer(databuf + n, l,
- 16, 1, printbuf,
- sizeof(printbuf),
- false);
- seq_printf(s, " : %s\n", printbuf);
- n += l;
- }
+ wil_seq_hexdump(s, databuf, len, " : ");
}
} else {
seq_puts(s, "\n");
@@ -867,22 +862,6 @@
.open = simple_open,
};
-static void wil_seq_hexdump(struct seq_file *s, void *p, int len,
- const char *prefix)
-{
- char printbuf[16 * 3 + 2];
- int i = 0;
-
- while (i < len) {
- int l = min(len - i, 16);
-
- hex_dump_to_buffer(p + i, l, 16, 1, printbuf,
- sizeof(printbuf), false);
- seq_printf(s, "%s%s\n", prefix, printbuf);
- i += l;
- }
-}
-
static void wil_seq_print_skb(struct seq_file *s, struct sk_buff *skb)
{
int i = 0;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 6dc76c1..a7bf747 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -200,11 +200,6 @@
struct xenvif_stats stats;
};
-/* Maximum number of Rx slots a to-guest packet may use, including the
- * slot needed for GSO meta-data.
- */
-#define XEN_NETBK_RX_SLOTS_MAX (MAX_SKB_FRAGS + 1)
-
enum state_bit_shift {
/* This bit marks that the vif is connected */
VIF_STATUS_CONNECTED,
@@ -317,11 +312,6 @@
void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
-/* Determine whether the needed number of slots (req) are available,
- * and set req_event if not.
- */
-bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed);
-
void xenvif_carrier_on(struct xenvif *vif);
/* Callback from stack when TX packet can be released */
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 42569b9..ec98d43 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -149,9 +149,20 @@
return i & (MAX_PENDING_REQS-1);
}
-bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed)
+static int xenvif_rx_ring_slots_needed(struct xenvif *vif)
+{
+ if (vif->gso_mask)
+ return DIV_ROUND_UP(vif->dev->gso_max_size, PAGE_SIZE) + 1;
+ else
+ return DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
+}
+
+static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
{
RING_IDX prod, cons;
+ int needed;
+
+ needed = xenvif_rx_ring_slots_needed(queue->vif);
do {
prod = queue->rx.sring->req_prod;
@@ -314,7 +325,7 @@
} else {
copy_gop->source.domid = DOMID_SELF;
copy_gop->source.u.gmfn =
- virt_to_mfn(page_address(page));
+ virt_to_gfn(page_address(page));
}
copy_gop->source.offset = offset;
@@ -513,7 +524,7 @@
skb_queue_head_init(&rxq);
- while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
+ while (xenvif_rx_ring_slots_available(queue)
&& (skb = xenvif_rx_dequeue(queue)) != NULL) {
queue->last_rx_time = jiffies;
@@ -1395,7 +1406,7 @@
queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
- virt_to_mfn(skb->data);
+ virt_to_gfn(skb->data);
queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
queue->tx_copy_ops[*copy_ops].dest.offset =
offset_in_page(skb->data);
@@ -1938,8 +1949,7 @@
prod = queue->rx.sring->req_prod;
cons = queue->rx.req_cons;
- return !queue->stalled
- && prod - cons < XEN_NETBK_RX_SLOTS_MAX
+ return !queue->stalled && prod - cons < 1
&& time_after(jiffies,
queue->last_rx_time + queue->vif->stall_timeout);
}
@@ -1951,14 +1961,13 @@
prod = queue->rx.sring->req_prod;
cons = queue->rx.req_cons;
- return queue->stalled
- && prod - cons >= XEN_NETBK_RX_SLOTS_MAX;
+ return queue->stalled && prod - cons >= 1;
}
static bool xenvif_have_rx_work(struct xenvif_queue *queue)
{
return (!skb_queue_empty(&queue->rx_queue)
- && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
+ && xenvif_rx_ring_slots_available(queue))
|| (queue->vif->stall_timeout &&
(xenvif_rx_queue_stalled(queue)
|| xenvif_rx_queue_ready(queue)))
@@ -2105,8 +2114,11 @@
if (!xen_domain())
return -ENODEV;
- /* Allow as many queues as there are CPUs, by default */
- xenvif_max_queues = num_online_cpus();
+ /* Allow as many queues as there are CPUs if user has not
+ * specified a value.
+ */
+ if (xenvif_max_queues == 0)
+ xenvif_max_queues = num_online_cpus();
if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index e27e6d2..f821a97 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -291,7 +291,7 @@
struct sk_buff *skb;
unsigned short id;
grant_ref_t ref;
- unsigned long pfn;
+ unsigned long gfn;
struct xen_netif_rx_request *req;
skb = xennet_alloc_one_rx_buffer(queue);
@@ -307,12 +307,12 @@
BUG_ON((signed short)ref < 0);
queue->grant_rx_ref[id] = ref;
- pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
+ gfn = xen_page_to_gfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
req = RING_GET_REQUEST(&queue->rx, req_prod);
gnttab_grant_foreign_access_ref(ref,
queue->info->xbdev->otherend_id,
- pfn_to_mfn(pfn),
+ gfn,
0);
req->id = id;
@@ -430,8 +430,10 @@
ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
BUG_ON((signed short)ref < 0);
- gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
- page_to_mfn(page), GNTMAP_readonly);
+ gnttab_grant_foreign_access_ref(ref,
+ queue->info->xbdev->otherend_id,
+ xen_page_to_gfn(page),
+ GNTMAP_readonly);
queue->tx_skbs[id].skb = skb;
queue->grant_tx_page[id] = page;
@@ -2132,8 +2134,11 @@
pr_info("Initialising Xen virtual ethernet driver\n");
- /* Allow as many queues as there are CPUs, by default */
- xennet_max_queues = num_online_cpus();
+ /* Allow as many queues as there are CPUs if user has not
+ * specified a value.
+ */
+ if (xennet_max_queues == 0)
+ xennet_max_queues = num_online_cpus();
return xenbus_register_frontend(&netfront_driver);
}
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index 87751cf..865a3e3 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -190,14 +190,17 @@
case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
return 1;
}
return 0;
@@ -237,7 +240,7 @@
static int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx)
{
- if (idx < 0 || idx > ndev->mw_count)
+ if (idx < 0 || idx >= ndev->mw_count)
return -EINVAL;
return ndev->reg->mw_bar[idx];
}
@@ -572,10 +575,13 @@
"Connection Topology -\t%s\n",
ntb_topo_string(ndev->ntb.topo));
- off += scnprintf(buf + off, buf_size - off,
- "B2B Offset -\t\t%#lx\n", ndev->b2b_off);
- off += scnprintf(buf + off, buf_size - off,
- "B2B MW Idx -\t\t%d\n", ndev->b2b_idx);
+ if (ndev->b2b_idx != UINT_MAX) {
+ off += scnprintf(buf + off, buf_size - off,
+ "B2B MW Idx -\t\t%u\n", ndev->b2b_idx);
+ off += scnprintf(buf + off, buf_size - off,
+ "B2B Offset -\t\t%#lx\n", ndev->b2b_off);
+ }
+
off += scnprintf(buf + off, buf_size - off,
"BAR4 Split -\t\t%s\n",
ndev->bar4_split ? "yes" : "no");
@@ -1484,7 +1490,7 @@
pdev = ndev_pdev(ndev);
mmio = ndev->self_mmio;
- if (ndev->b2b_idx >= ndev->mw_count) {
+ if (ndev->b2b_idx == UINT_MAX) {
dev_dbg(ndev_dev(ndev), "not using b2b mw\n");
b2b_bar = 0;
ndev->b2b_off = 0;
@@ -1776,6 +1782,13 @@
else
ndev->b2b_idx = b2b_mw_idx;
+ if (ndev->b2b_idx >= ndev->mw_count) {
+ dev_dbg(ndev_dev(ndev),
+ "b2b_mw_idx %d invalid for mw_count %u\n",
+ b2b_mw_idx, ndev->mw_count);
+ return -EINVAL;
+ }
+
dev_dbg(ndev_dev(ndev),
"setting up b2b mw idx %d means %d\n",
b2b_mw_idx, ndev->b2b_idx);
@@ -1843,6 +1856,9 @@
case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP;
break;
}
@@ -1857,6 +1873,9 @@
case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP;
break;
}
@@ -1878,6 +1897,9 @@
case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14;
break;
}
@@ -1996,7 +2018,7 @@
ndev->ntb.ops = &intel_ntb_ops;
ndev->b2b_off = 0;
- ndev->b2b_idx = INT_MAX;
+ ndev->b2b_idx = UINT_MAX;
ndev->bar4_split = 0;
@@ -2234,14 +2256,17 @@
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BDX)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_BDX)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_BDX)},
{0}
};
MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl);
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
index 7ddaf38..ea0612f7 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.h
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h
@@ -67,6 +67,9 @@
#define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E
#define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F
#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_BDX 0x6F0D
+#define PCI_DEVICE_ID_INTEL_NTB_PS_BDX 0x6F0E
+#define PCI_DEVICE_ID_INTEL_NTB_SS_BDX 0x6F0F
/* Intel Xeon hardware */
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 1c6386d..6e3ee90 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -119,7 +119,8 @@
struct ntb_transport_ctx *transport;
struct ntb_dev *ndev;
void *cb_data;
- struct dma_chan *dma_chan;
+ struct dma_chan *tx_dma_chan;
+ struct dma_chan *rx_dma_chan;
bool client_ready;
bool link_is_up;
@@ -297,7 +298,7 @@
static int ntb_bus_init(struct ntb_transport_ctx *nt)
{
- list_add(&nt->entry, &ntb_transport_list);
+ list_add_tail(&nt->entry, &ntb_transport_list);
return 0;
}
@@ -452,7 +453,7 @@
out_offset = 0;
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "NTB QP stats\n");
+ "\nNTB QP stats:\n\n");
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_bytes - \t%llu\n", qp->rx_bytes);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
@@ -470,11 +471,11 @@
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_err_ver - \t%llu\n", qp->rx_err_ver);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "rx_buff - \t%p\n", qp->rx_buff);
+ "rx_buff - \t0x%p\n", qp->rx_buff);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_index - \t%u\n", qp->rx_index);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "rx_max_entry - \t%u\n", qp->rx_max_entry);
+ "rx_max_entry - \t%u\n\n", qp->rx_max_entry);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"tx_bytes - \t%llu\n", qp->tx_bytes);
@@ -489,15 +490,32 @@
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "tx_mw - \t%p\n", qp->tx_mw);
+ "tx_mw - \t0x%p\n", qp->tx_mw);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "tx_index - \t%u\n", qp->tx_index);
+ "tx_index (H) - \t%u\n", qp->tx_index);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "RRI (T) - \t%u\n",
+ qp->remote_rx_info->entry);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"tx_max_entry - \t%u\n", qp->tx_max_entry);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "free tx - \t%u\n",
+ ntb_transport_tx_free_entry(qp));
out_offset += snprintf(buf + out_offset, out_count - out_offset,
- "\nQP Link %s\n",
+ "\n");
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "Using TX DMA - \t%s\n",
+ qp->tx_dma_chan ? "Yes" : "No");
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "Using RX DMA - \t%s\n",
+ qp->rx_dma_chan ? "Yes" : "No");
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "QP Link - \t%s\n",
qp->link_is_up ? "Up" : "Down");
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "\n");
+
if (out_offset > out_count)
out_offset = out_count;
@@ -535,6 +553,7 @@
}
entry = list_first_entry(list, struct ntb_queue_entry, entry);
list_del(&entry->entry);
+
out:
spin_unlock_irqrestore(lock, flags);
@@ -1206,7 +1225,7 @@
{
struct dma_async_tx_descriptor *txd;
struct ntb_transport_qp *qp = entry->qp;
- struct dma_chan *chan = qp->dma_chan;
+ struct dma_chan *chan = qp->rx_dma_chan;
struct dma_device *device;
size_t pay_off, buff_off, len;
struct dmaengine_unmap_data *unmap;
@@ -1219,18 +1238,18 @@
goto err;
if (len < copy_bytes)
- goto err_wait;
+ goto err;
device = chan->device;
pay_off = (size_t)offset & ~PAGE_MASK;
buff_off = (size_t)buf & ~PAGE_MASK;
if (!is_dma_copy_aligned(device, pay_off, buff_off, len))
- goto err_wait;
+ goto err;
unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
if (!unmap)
- goto err_wait;
+ goto err;
unmap->len = len;
unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset),
@@ -1273,12 +1292,6 @@
dmaengine_unmap_put(unmap);
err_get_unmap:
dmaengine_unmap_put(unmap);
-err_wait:
- /* If the callbacks come out of order, the writing of the index to the
- * last completed will be out of order. This may result in the
- * receive stalling forever.
- */
- dma_sync_wait(chan, qp->last_cookie);
err:
ntb_memcpy_rx(entry, offset);
qp->rx_memcpy++;
@@ -1373,8 +1386,8 @@
break;
}
- if (i && qp->dma_chan)
- dma_async_issue_pending(qp->dma_chan);
+ if (i && qp->rx_dma_chan)
+ dma_async_issue_pending(qp->rx_dma_chan);
if (i == qp->rx_max_entry) {
/* there is more work to do */
@@ -1441,7 +1454,7 @@
{
struct ntb_payload_header __iomem *hdr;
struct dma_async_tx_descriptor *txd;
- struct dma_chan *chan = qp->dma_chan;
+ struct dma_chan *chan = qp->tx_dma_chan;
struct dma_device *device;
size_t dest_off, buff_off;
struct dmaengine_unmap_data *unmap;
@@ -1634,14 +1647,27 @@
dma_cap_set(DMA_MEMCPY, dma_mask);
if (use_dma) {
- qp->dma_chan = dma_request_channel(dma_mask, ntb_dma_filter_fn,
- (void *)(unsigned long)node);
- if (!qp->dma_chan)
- dev_info(&pdev->dev, "Unable to allocate DMA channel\n");
+ qp->tx_dma_chan =
+ dma_request_channel(dma_mask, ntb_dma_filter_fn,
+ (void *)(unsigned long)node);
+ if (!qp->tx_dma_chan)
+ dev_info(&pdev->dev, "Unable to allocate TX DMA channel\n");
+
+ qp->rx_dma_chan =
+ dma_request_channel(dma_mask, ntb_dma_filter_fn,
+ (void *)(unsigned long)node);
+ if (!qp->rx_dma_chan)
+ dev_info(&pdev->dev, "Unable to allocate RX DMA channel\n");
} else {
- qp->dma_chan = NULL;
+ qp->tx_dma_chan = NULL;
+ qp->rx_dma_chan = NULL;
}
- dev_dbg(&pdev->dev, "Using %s memcpy\n", qp->dma_chan ? "DMA" : "CPU");
+
+ dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
+ qp->tx_dma_chan ? "DMA" : "CPU");
+
+ dev_dbg(&pdev->dev, "Using %s memcpy for RX\n",
+ qp->rx_dma_chan ? "DMA" : "CPU");
for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node);
@@ -1676,8 +1702,10 @@
err1:
while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
kfree(entry);
- if (qp->dma_chan)
- dma_release_channel(qp->dma_chan);
+ if (qp->tx_dma_chan)
+ dma_release_channel(qp->tx_dma_chan);
+ if (qp->rx_dma_chan)
+ dma_release_channel(qp->rx_dma_chan);
nt->qp_bitmap_free |= qp_bit;
err:
return NULL;
@@ -1701,12 +1729,27 @@
pdev = qp->ndev->pdev;
- if (qp->dma_chan) {
- struct dma_chan *chan = qp->dma_chan;
+ if (qp->tx_dma_chan) {
+ struct dma_chan *chan = qp->tx_dma_chan;
/* Putting the dma_chan to NULL will force any new traffic to be
* processed by the CPU instead of the DAM engine
*/
- qp->dma_chan = NULL;
+ qp->tx_dma_chan = NULL;
+
+ /* Try to be nice and wait for any queued DMA engine
+ * transactions to process before smashing it with a rock
+ */
+ dma_sync_wait(chan, qp->last_cookie);
+ dmaengine_terminate_all(chan);
+ dma_release_channel(chan);
+ }
+
+ if (qp->rx_dma_chan) {
+ struct dma_chan *chan = qp->rx_dma_chan;
+ /* Putting the dma_chan to NULL will force any new traffic to be
+ * processed by the CPU instead of the DAM engine
+ */
+ qp->rx_dma_chan = NULL;
/* Try to be nice and wait for any queued DMA engine
* transactions to process before smashing it with a rock
@@ -1843,7 +1886,7 @@
entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
if (!entry) {
qp->tx_err_no_buf++;
- return -ENOMEM;
+ return -EBUSY;
}
entry->cb_data = cb;
@@ -1954,21 +1997,34 @@
unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp)
{
unsigned int max;
+ unsigned int copy_align;
if (!qp)
return 0;
- if (!qp->dma_chan)
+ if (!qp->tx_dma_chan && !qp->rx_dma_chan)
return qp->tx_max_frame - sizeof(struct ntb_payload_header);
+ copy_align = max(qp->tx_dma_chan->device->copy_align,
+ qp->rx_dma_chan->device->copy_align);
+
/* If DMA engine usage is possible, try to find the max size for that */
max = qp->tx_max_frame - sizeof(struct ntb_payload_header);
- max -= max % (1 << qp->dma_chan->device->copy_align);
+ max -= max % (1 << copy_align);
return max;
}
EXPORT_SYMBOL_GPL(ntb_transport_max_size);
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
+{
+ unsigned int head = qp->tx_index;
+ unsigned int tail = qp->remote_rx_info->entry;
+
+ return tail > head ? tail - head : qp->tx_max_entry + tail - head;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry);
+
static void ntb_transport_doorbell_callback(void *data, int vector)
{
struct ntb_transport_ctx *nt = data;
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 02ff84f..957b421 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1103,16 +1103,9 @@
struct ioc *ioc = ioc_list;
while (ioc != NULL) {
- u32 *res_ptr = (u32 *)ioc->res_map;
- int j;
-
- for (j = 0; j < (ioc->res_size / sizeof(u32)); j++) {
- if ((j & 7) == 0)
- seq_puts(m, "\n ");
- seq_printf(m, "%08x", *res_ptr);
- res_ptr++;
- }
- seq_puts(m, "\n\n");
+ seq_hex_dump(m, " ", DUMP_PREFIX_NONE, 32, 4, ioc->res_map,
+ ioc->res_size, false);
+ seq_putc(m, '\n');
ioc = ioc->next;
break; /* XXX - remove me */
}
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index f1441e4..225049b 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1854,14 +1854,9 @@
{
struct sba_device *sba_dev = sba_list;
struct ioc *ioc = &sba_dev->ioc[0]; /* FIXME: Multi-IOC support! */
- unsigned int *res_ptr = (unsigned int *)ioc->res_map;
- int i;
- for (i = 0; i < (ioc->res_size/sizeof(unsigned int)); ++i, ++res_ptr) {
- if ((i & 7) == 0)
- seq_puts(m, "\n ");
- seq_printf(m, " %08x", *res_ptr);
- }
+ seq_hex_dump(m, " ", DUMP_PREFIX_NONE, 32, 4, ioc->res_map,
+ ioc->res_size, false);
seq_putc(m, '\n');
return 0;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 52a880c..dd652f2 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -467,7 +467,7 @@
pci_msi_shutdown(pci_dev);
pci_msix_shutdown(pci_dev);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* If this is a kexec reboot, turn off Bus Master bit on the
* device to tell it to not continue to do DMA. Don't touch
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 69723e0..9638a00 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -349,6 +349,9 @@
struct pinctrl_gpio_range *range = NULL;
struct gpio_chip *chip = gpio_to_chip(gpio);
+ if (WARN(!chip, "no gpio_chip for gpio%i?", gpio))
+ return false;
+
mutex_lock(&pinctrldev_list_mutex);
/* Loop over the pin controllers */
diff --git a/drivers/pinctrl/pinctrl-digicolor.c b/drivers/pinctrl/pinctrl-digicolor.c
index 461fffc..11f8b83 100644
--- a/drivers/pinctrl/pinctrl-digicolor.c
+++ b/drivers/pinctrl/pinctrl-digicolor.c
@@ -337,9 +337,9 @@
pmap->dev = &pdev->dev;
pmap->pctl = pinctrl_register(pctl_desc, &pdev->dev, pmap);
- if (!pmap->pctl) {
+ if (IS_ERR(pmap->pctl)) {
dev_err(&pdev->dev, "pinctrl driver registration failed\n");
- return -EINVAL;
+ return PTR_ERR(pmap->pctl);
}
ret = dc_gpiochip_add(pmap, pdev->dev.of_node);
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 67e08cb..29984b3 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -313,8 +313,7 @@
/* See if this pctldev has this function */
while (selector < nfuncs) {
- const char *fname = ops->get_function_name(pctldev,
- selector);
+ const char *fname = ops->get_function_name(pctldev, selector);
if (!strcmp(function, fname))
return selector;
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index c978b31..e1a3721 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -723,9 +723,9 @@
#endif
pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl);
- if (!pctrl->pctrl) {
+ if (IS_ERR(pctrl->pctrl)) {
dev_err(&pdev->dev, "couldn't register pm8xxx gpio driver\n");
- return -ENODEV;
+ return PTR_ERR(pctrl->pctrl);
}
pctrl->chip = pm8xxx_gpio_template;
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
index 2d1b69f..6652b8d 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
@@ -814,9 +814,9 @@
#endif
pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl);
- if (!pctrl->pctrl) {
+ if (IS_ERR(pctrl->pctrl)) {
dev_err(&pdev->dev, "couldn't register pm8xxx mpp driver\n");
- return -ENODEV;
+ return PTR_ERR(pctrl->pctrl);
}
pctrl->chip = pm8xxx_mpp_template;
diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
index 019844d..d168b39 100644
--- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
+++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c
@@ -361,7 +361,7 @@
u32 offset, u32 range)
{
struct s3c24xx_eint_data *data = irq_desc_get_handler_data(desc);
- struct irq_chip *chip = irq_desc_get_irq_chip(desc);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
struct samsung_pinctrl_drv_data *d = data->drvdata;
unsigned int pend, mask;
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index 1ef02da..460fa67 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -346,8 +346,7 @@
* as late as the polling interval is since we can't do that in the respective
* accessors of the module parameters.
*/
-static int acerhdf_get_ec_temp(struct thermal_zone_device *thermal,
- unsigned long *t)
+static int acerhdf_get_ec_temp(struct thermal_zone_device *thermal, int *t)
{
int temp, err = 0;
@@ -453,7 +452,7 @@
}
static int acerhdf_get_trip_hyst(struct thermal_zone_device *thermal, int trip,
- unsigned long *temp)
+ int *temp)
{
if (trip != 0)
return -EINVAL;
@@ -464,7 +463,7 @@
}
static int acerhdf_get_trip_temp(struct thermal_zone_device *thermal, int trip,
- unsigned long *temp)
+ int *temp)
{
if (trip == 0)
*temp = fanon;
@@ -477,7 +476,7 @@
}
static int acerhdf_get_crit_temp(struct thermal_zone_device *thermal,
- unsigned long *temperature)
+ int *temperature)
{
*temperature = ACERHDF_TEMP_CRIT;
return 0;
diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c
index 0944e83..9f713b8 100644
--- a/drivers/platform/x86/intel_mid_thermal.c
+++ b/drivers/platform/x86/intel_mid_thermal.c
@@ -132,7 +132,7 @@
* to achieve very close approximate temp value with less than
* 0.5C error
*/
-static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp)
+static int adc_to_temp(int direct, uint16_t adc_val, int *tp)
{
int temp;
@@ -174,14 +174,13 @@
*
* Can sleep
*/
-static int mid_read_temp(struct thermal_zone_device *tzd, unsigned long *temp)
+static int mid_read_temp(struct thermal_zone_device *tzd, int *temp)
{
struct thermal_device_info *td_info = tzd->devdata;
uint16_t adc_val, addr;
uint8_t data = 0;
int ret;
- unsigned long curr_temp;
-
+ int curr_temp;
addr = td_info->chnl_addr;
@@ -453,7 +452,7 @@
*
* Can sleep
*/
-static int read_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp)
+static int read_curr_temp(struct thermal_zone_device *tzd, int *temp)
{
WARN_ON(tzd == NULL);
return mid_read_temp(tzd, temp);
diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 1c202cc..907293e 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -619,7 +619,7 @@
#ifdef CONFIG_THERMAL
if (cm->tzd_batt) {
- ret = thermal_zone_get_temp(cm->tzd_batt, (unsigned long *)temp);
+ ret = thermal_zone_get_temp(cm->tzd_batt, temp);
if (!ret)
/* Calibrate temperature unit */
*temp /= 100;
diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 869284c..456987c 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -557,7 +557,7 @@
#ifdef CONFIG_THERMAL
static int power_supply_read_temp(struct thermal_zone_device *tzd,
- unsigned long *temp)
+ int *temp)
{
struct power_supply *psy;
union power_supply_propval val;
diff --git a/drivers/reset/reset-ath79.c b/drivers/reset/reset-ath79.c
index d2d2904..9aaf646 100644
--- a/drivers/reset/reset-ath79.c
+++ b/drivers/reset/reset-ath79.c
@@ -89,6 +89,7 @@
if (IS_ERR(ath79_reset->base))
return PTR_ERR(ath79_reset->base);
+ spin_lock_init(&ath79_reset->lock);
ath79_reset->rcdev.ops = &ath79_reset_ops;
ath79_reset->rcdev.owner = THIS_MODULE;
ath79_reset->rcdev.of_node = pdev->dev.of_node;
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 01bf1f5..4eb4554 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -1206,16 +1206,8 @@
static void sprinthx4(unsigned char *title, struct seq_file *m,
unsigned int *array, unsigned int len)
{
- int r;
-
seq_printf(m, "\n%s\n", title);
- for (r = 0; r < len; r++) {
- if ((r % 8) == 0)
- seq_printf(m, " ");
- seq_printf(m, "%08X ", array[r]);
- if ((r % 8) == 7)
- seq_putc(m, '\n');
- }
+ seq_hex_dump(m, " ", DUMP_PREFIX_NONE, 32, 4, array, len, false);
seq_putc(m, '\n');
}
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 471d0879..1a8c9b5 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -172,6 +172,7 @@
scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o
scsi_mod-y += scsi_trace.o scsi_logging.o
scsi_mod-$(CONFIG_PM) += scsi_pm.o
+scsi_mod-$(CONFIG_SCSI_DH) += scsi_dh.o
hv_storvsc-y := storvsc_drv.o
diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c
index edb43fd..c831e30 100644
--- a/drivers/scsi/aic94xx/aic94xx_sds.c
+++ b/drivers/scsi/aic94xx/aic94xx_sds.c
@@ -983,7 +983,7 @@
{
int err, i;
u32 offs, size;
- struct asd_ll_el *el;
+ struct asd_ll_el *el = NULL;
struct asd_ctrla_phy_settings *ps;
struct asd_ctrla_phy_settings dflt_ps;
@@ -1004,6 +1004,7 @@
size = sizeof(struct asd_ctrla_phy_settings);
ps = &dflt_ps;
+ goto out_process;
}
if (size == 0)
@@ -1028,7 +1029,7 @@
ASD_DPRINTK("couldn't find ctrla phy settings struct\n");
goto out2;
}
-
+out_process:
err = asd_process_ctrla_phy_settings(asd_ha, ps);
if (err) {
ASD_DPRINTK("couldn't process ctrla phy settings\n");
diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c
index 315d6d6..98f7e8c 100644
--- a/drivers/scsi/bfa/bfa_ioc.c
+++ b/drivers/scsi/bfa/bfa_ioc.c
@@ -3665,19 +3665,19 @@
if (sfp->state_query_cbfn)
sfp->state_query_cbfn(sfp->state_query_cbarg,
sfp->status);
- sfp->media = NULL;
- }
+ sfp->media = NULL;
+ }
- if (sfp->portspeed) {
- sfp->status = bfa_sfp_speed_valid(sfp, sfp->portspeed);
- if (sfp->state_query_cbfn)
- sfp->state_query_cbfn(sfp->state_query_cbarg,
- sfp->status);
- sfp->portspeed = BFA_PORT_SPEED_UNKNOWN;
- }
+ if (sfp->portspeed) {
+ sfp->status = bfa_sfp_speed_valid(sfp, sfp->portspeed);
+ if (sfp->state_query_cbfn)
+ sfp->state_query_cbfn(sfp->state_query_cbarg,
+ sfp->status);
+ sfp->portspeed = BFA_PORT_SPEED_UNKNOWN;
+ }
- sfp->state_query_lock = 0;
- sfp->state_query_cbfn = NULL;
+ sfp->state_query_lock = 0;
+ sfp->state_query_cbfn = NULL;
}
/*
@@ -3878,7 +3878,7 @@
bfa_trc(sfp, sfp->data_valid);
if (sfp->data_valid) {
u32 size = sizeof(struct sfp_mem_s);
- u8 *des = (u8 *) &(sfp->sfpmem);
+ u8 *des = (u8 *)(sfp->sfpmem);
memcpy(des, sfp->dbuf_kva, size);
}
/*
diff --git a/drivers/scsi/device_handler/Kconfig b/drivers/scsi/device_handler/Kconfig
index 69abd0a..e5647d5 100644
--- a/drivers/scsi/device_handler/Kconfig
+++ b/drivers/scsi/device_handler/Kconfig
@@ -3,7 +3,7 @@
#
menuconfig SCSI_DH
- tristate "SCSI Device Handlers"
+ bool "SCSI Device Handlers"
depends on SCSI
default n
help
diff --git a/drivers/scsi/device_handler/Makefile b/drivers/scsi/device_handler/Makefile
index e1d2ea0..09866c5 100644
--- a/drivers/scsi/device_handler/Makefile
+++ b/drivers/scsi/device_handler/Makefile
@@ -1,7 +1,6 @@
#
# SCSI Device Handler
#
-obj-$(CONFIG_SCSI_DH) += scsi_dh.o
obj-$(CONFIG_SCSI_DH_RDAC) += scsi_dh_rdac.o
obj-$(CONFIG_SCSI_DH_HP_SW) += scsi_dh_hp_sw.o
obj-$(CONFIG_SCSI_DH_EMC) += scsi_dh_emc.o
diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c
deleted file mode 100644
index 1efebc9..0000000
--- a/drivers/scsi/device_handler/scsi_dh.c
+++ /dev/null
@@ -1,621 +0,0 @@
-/*
- * SCSI device handler infrastruture.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2007
- * Authors:
- * Chandra Seetharaman <sekharan@us.ibm.com>
- * Mike Anderson <andmike@linux.vnet.ibm.com>
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <scsi/scsi_dh.h>
-#include "../scsi_priv.h"
-
-static DEFINE_SPINLOCK(list_lock);
-static LIST_HEAD(scsi_dh_list);
-
-static struct scsi_device_handler *get_device_handler(const char *name)
-{
- struct scsi_device_handler *tmp, *found = NULL;
-
- spin_lock(&list_lock);
- list_for_each_entry(tmp, &scsi_dh_list, list) {
- if (!strncmp(tmp->name, name, strlen(tmp->name))) {
- found = tmp;
- break;
- }
- }
- spin_unlock(&list_lock);
- return found;
-}
-
-/*
- * device_handler_match_function - Match a device handler to a device
- * @sdev - SCSI device to be tested
- *
- * Tests @sdev against the match function of all registered device_handler.
- * Returns the found device handler or NULL if not found.
- */
-static struct scsi_device_handler *
-device_handler_match_function(struct scsi_device *sdev)
-{
- struct scsi_device_handler *tmp_dh, *found_dh = NULL;
-
- spin_lock(&list_lock);
- list_for_each_entry(tmp_dh, &scsi_dh_list, list) {
- if (tmp_dh->match && tmp_dh->match(sdev)) {
- found_dh = tmp_dh;
- break;
- }
- }
- spin_unlock(&list_lock);
- return found_dh;
-}
-
-/*
- * device_handler_match - Attach a device handler to a device
- * @scsi_dh - The device handler to match against or NULL
- * @sdev - SCSI device to be tested against @scsi_dh
- *
- * Tests @sdev against the device handler @scsi_dh or against
- * all registered device_handler if @scsi_dh == NULL.
- * Returns the found device handler or NULL if not found.
- */
-static struct scsi_device_handler *
-device_handler_match(struct scsi_device_handler *scsi_dh,
- struct scsi_device *sdev)
-{
- struct scsi_device_handler *found_dh;
-
- found_dh = device_handler_match_function(sdev);
-
- if (scsi_dh && found_dh != scsi_dh)
- found_dh = NULL;
-
- return found_dh;
-}
-
-/*
- * scsi_dh_handler_attach - Attach a device handler to a device
- * @sdev - SCSI device the device handler should attach to
- * @scsi_dh - The device handler to attach
- */
-static int scsi_dh_handler_attach(struct scsi_device *sdev,
- struct scsi_device_handler *scsi_dh)
-{
- struct scsi_dh_data *d;
-
- if (sdev->scsi_dh_data) {
- if (sdev->scsi_dh_data->scsi_dh != scsi_dh)
- return -EBUSY;
-
- kref_get(&sdev->scsi_dh_data->kref);
- return 0;
- }
-
- if (!try_module_get(scsi_dh->module))
- return -EINVAL;
-
- d = scsi_dh->attach(sdev);
- if (IS_ERR(d)) {
- sdev_printk(KERN_ERR, sdev, "%s: Attach failed (%ld)\n",
- scsi_dh->name, PTR_ERR(d));
- module_put(scsi_dh->module);
- return PTR_ERR(d);
- }
-
- d->scsi_dh = scsi_dh;
- kref_init(&d->kref);
- d->sdev = sdev;
-
- spin_lock_irq(sdev->request_queue->queue_lock);
- sdev->scsi_dh_data = d;
- spin_unlock_irq(sdev->request_queue->queue_lock);
- return 0;
-}
-
-static void __detach_handler (struct kref *kref)
-{
- struct scsi_dh_data *scsi_dh_data =
- container_of(kref, struct scsi_dh_data, kref);
- struct scsi_device_handler *scsi_dh = scsi_dh_data->scsi_dh;
- struct scsi_device *sdev = scsi_dh_data->sdev;
-
- scsi_dh->detach(sdev);
-
- spin_lock_irq(sdev->request_queue->queue_lock);
- sdev->scsi_dh_data = NULL;
- spin_unlock_irq(sdev->request_queue->queue_lock);
-
- sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", scsi_dh->name);
- module_put(scsi_dh->module);
-}
-
-/*
- * scsi_dh_handler_detach - Detach a device handler from a device
- * @sdev - SCSI device the device handler should be detached from
- * @scsi_dh - Device handler to be detached
- *
- * Detach from a device handler. If a device handler is specified,
- * only detach if the currently attached handler matches @scsi_dh.
- */
-static void scsi_dh_handler_detach(struct scsi_device *sdev,
- struct scsi_device_handler *scsi_dh)
-{
- if (!sdev->scsi_dh_data)
- return;
-
- if (scsi_dh && scsi_dh != sdev->scsi_dh_data->scsi_dh)
- return;
-
- if (!scsi_dh)
- scsi_dh = sdev->scsi_dh_data->scsi_dh;
-
- if (scsi_dh)
- kref_put(&sdev->scsi_dh_data->kref, __detach_handler);
-}
-
-/*
- * Functions for sysfs attribute 'dh_state'
- */
-static ssize_t
-store_dh_state(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct scsi_device *sdev = to_scsi_device(dev);
- struct scsi_device_handler *scsi_dh;
- int err = -EINVAL;
-
- if (sdev->sdev_state == SDEV_CANCEL ||
- sdev->sdev_state == SDEV_DEL)
- return -ENODEV;
-
- if (!sdev->scsi_dh_data) {
- /*
- * Attach to a device handler
- */
- if (!(scsi_dh = get_device_handler(buf)))
- return err;
- err = scsi_dh_handler_attach(sdev, scsi_dh);
- } else {
- scsi_dh = sdev->scsi_dh_data->scsi_dh;
- if (!strncmp(buf, "detach", 6)) {
- /*
- * Detach from a device handler
- */
- scsi_dh_handler_detach(sdev, scsi_dh);
- err = 0;
- } else if (!strncmp(buf, "activate", 8)) {
- /*
- * Activate a device handler
- */
- if (scsi_dh->activate)
- err = scsi_dh->activate(sdev, NULL, NULL);
- else
- err = 0;
- }
- }
-
- return err<0?err:count;
-}
-
-static ssize_t
-show_dh_state(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct scsi_device *sdev = to_scsi_device(dev);
-
- if (!sdev->scsi_dh_data)
- return snprintf(buf, 20, "detached\n");
-
- return snprintf(buf, 20, "%s\n", sdev->scsi_dh_data->scsi_dh->name);
-}
-
-static struct device_attribute scsi_dh_state_attr =
- __ATTR(dh_state, S_IRUGO | S_IWUSR, show_dh_state,
- store_dh_state);
-
-/*
- * scsi_dh_sysfs_attr_add - Callback for scsi_init_dh
- */
-static int scsi_dh_sysfs_attr_add(struct device *dev, void *data)
-{
- struct scsi_device *sdev;
- int err;
-
- if (!scsi_is_sdev_device(dev))
- return 0;
-
- sdev = to_scsi_device(dev);
-
- err = device_create_file(&sdev->sdev_gendev,
- &scsi_dh_state_attr);
-
- return 0;
-}
-
-/*
- * scsi_dh_sysfs_attr_remove - Callback for scsi_exit_dh
- */
-static int scsi_dh_sysfs_attr_remove(struct device *dev, void *data)
-{
- struct scsi_device *sdev;
-
- if (!scsi_is_sdev_device(dev))
- return 0;
-
- sdev = to_scsi_device(dev);
-
- device_remove_file(&sdev->sdev_gendev,
- &scsi_dh_state_attr);
-
- return 0;
-}
-
-/*
- * scsi_dh_notifier - notifier chain callback
- */
-static int scsi_dh_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
- struct scsi_device *sdev;
- int err = 0;
- struct scsi_device_handler *devinfo = NULL;
-
- if (!scsi_is_sdev_device(dev))
- return 0;
-
- sdev = to_scsi_device(dev);
-
- if (action == BUS_NOTIFY_ADD_DEVICE) {
- err = device_create_file(dev, &scsi_dh_state_attr);
- /* don't care about err */
- devinfo = device_handler_match(NULL, sdev);
- if (devinfo)
- err = scsi_dh_handler_attach(sdev, devinfo);
- } else if (action == BUS_NOTIFY_DEL_DEVICE) {
- device_remove_file(dev, &scsi_dh_state_attr);
- scsi_dh_handler_detach(sdev, NULL);
- }
- return err;
-}
-
-/*
- * scsi_dh_notifier_add - Callback for scsi_register_device_handler
- */
-static int scsi_dh_notifier_add(struct device *dev, void *data)
-{
- struct scsi_device_handler *scsi_dh = data;
- struct scsi_device *sdev;
-
- if (!scsi_is_sdev_device(dev))
- return 0;
-
- if (!get_device(dev))
- return 0;
-
- sdev = to_scsi_device(dev);
-
- if (device_handler_match(scsi_dh, sdev))
- scsi_dh_handler_attach(sdev, scsi_dh);
-
- put_device(dev);
-
- return 0;
-}
-
-/*
- * scsi_dh_notifier_remove - Callback for scsi_unregister_device_handler
- */
-static int scsi_dh_notifier_remove(struct device *dev, void *data)
-{
- struct scsi_device_handler *scsi_dh = data;
- struct scsi_device *sdev;
-
- if (!scsi_is_sdev_device(dev))
- return 0;
-
- if (!get_device(dev))
- return 0;
-
- sdev = to_scsi_device(dev);
-
- scsi_dh_handler_detach(sdev, scsi_dh);
-
- put_device(dev);
-
- return 0;
-}
-
-/*
- * scsi_register_device_handler - register a device handler personality
- * module.
- * @scsi_dh - device handler to be registered.
- *
- * Returns 0 on success, -EBUSY if handler already registered.
- */
-int scsi_register_device_handler(struct scsi_device_handler *scsi_dh)
-{
-
- if (get_device_handler(scsi_dh->name))
- return -EBUSY;
-
- if (!scsi_dh->attach || !scsi_dh->detach)
- return -EINVAL;
-
- spin_lock(&list_lock);
- list_add(&scsi_dh->list, &scsi_dh_list);
- spin_unlock(&list_lock);
-
- bus_for_each_dev(&scsi_bus_type, NULL, scsi_dh, scsi_dh_notifier_add);
- printk(KERN_INFO "%s: device handler registered\n", scsi_dh->name);
-
- return SCSI_DH_OK;
-}
-EXPORT_SYMBOL_GPL(scsi_register_device_handler);
-
-/*
- * scsi_unregister_device_handler - register a device handler personality
- * module.
- * @scsi_dh - device handler to be unregistered.
- *
- * Returns 0 on success, -ENODEV if handler not registered.
- */
-int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh)
-{
-
- if (!get_device_handler(scsi_dh->name))
- return -ENODEV;
-
- bus_for_each_dev(&scsi_bus_type, NULL, scsi_dh,
- scsi_dh_notifier_remove);
-
- spin_lock(&list_lock);
- list_del(&scsi_dh->list);
- spin_unlock(&list_lock);
- printk(KERN_INFO "%s: device handler unregistered\n", scsi_dh->name);
-
- return SCSI_DH_OK;
-}
-EXPORT_SYMBOL_GPL(scsi_unregister_device_handler);
-
-/*
- * scsi_dh_activate - activate the path associated with the scsi_device
- * corresponding to the given request queue.
- * Returns immediately without waiting for activation to be completed.
- * @q - Request queue that is associated with the scsi_device to be
- * activated.
- * @fn - Function to be called upon completion of the activation.
- * Function fn is called with data (below) and the error code.
- * Function fn may be called from the same calling context. So,
- * do not hold the lock in the caller which may be needed in fn.
- * @data - data passed to the function fn upon completion.
- *
- */
-int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data)
-{
- int err = 0;
- unsigned long flags;
- struct scsi_device *sdev;
- struct scsi_device_handler *scsi_dh = NULL;
- struct device *dev = NULL;
-
- spin_lock_irqsave(q->queue_lock, flags);
- sdev = q->queuedata;
- if (!sdev) {
- spin_unlock_irqrestore(q->queue_lock, flags);
- err = SCSI_DH_NOSYS;
- if (fn)
- fn(data, err);
- return err;
- }
-
- if (sdev->scsi_dh_data)
- scsi_dh = sdev->scsi_dh_data->scsi_dh;
- dev = get_device(&sdev->sdev_gendev);
- if (!scsi_dh || !dev ||
- sdev->sdev_state == SDEV_CANCEL ||
- sdev->sdev_state == SDEV_DEL)
- err = SCSI_DH_NOSYS;
- if (sdev->sdev_state == SDEV_OFFLINE)
- err = SCSI_DH_DEV_OFFLINED;
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- if (err) {
- if (fn)
- fn(data, err);
- goto out;
- }
-
- if (scsi_dh->activate)
- err = scsi_dh->activate(sdev, fn, data);
-out:
- put_device(dev);
- return err;
-}
-EXPORT_SYMBOL_GPL(scsi_dh_activate);
-
-/*
- * scsi_dh_set_params - set the parameters for the device as per the
- * string specified in params.
- * @q - Request queue that is associated with the scsi_device for
- * which the parameters to be set.
- * @params - parameters in the following format
- * "no_of_params\0param1\0param2\0param3\0...\0"
- * for example, string for 2 parameters with value 10 and 21
- * is specified as "2\010\021\0".
- */
-int scsi_dh_set_params(struct request_queue *q, const char *params)
-{
- int err = -SCSI_DH_NOSYS;
- unsigned long flags;
- struct scsi_device *sdev;
- struct scsi_device_handler *scsi_dh = NULL;
-
- spin_lock_irqsave(q->queue_lock, flags);
- sdev = q->queuedata;
- if (sdev && sdev->scsi_dh_data)
- scsi_dh = sdev->scsi_dh_data->scsi_dh;
- if (scsi_dh && scsi_dh->set_params && get_device(&sdev->sdev_gendev))
- err = 0;
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- if (err)
- return err;
- err = scsi_dh->set_params(sdev, params);
- put_device(&sdev->sdev_gendev);
- return err;
-}
-EXPORT_SYMBOL_GPL(scsi_dh_set_params);
-
-/*
- * scsi_dh_handler_exist - Return TRUE(1) if a device handler exists for
- * the given name. FALSE(0) otherwise.
- * @name - name of the device handler.
- */
-int scsi_dh_handler_exist(const char *name)
-{
- return (get_device_handler(name) != NULL);
-}
-EXPORT_SYMBOL_GPL(scsi_dh_handler_exist);
-
-/*
- * scsi_dh_attach - Attach device handler
- * @q - Request queue that is associated with the scsi_device
- * the handler should be attached to
- * @name - name of the handler to attach
- */
-int scsi_dh_attach(struct request_queue *q, const char *name)
-{
- unsigned long flags;
- struct scsi_device *sdev;
- struct scsi_device_handler *scsi_dh;
- int err = 0;
-
- scsi_dh = get_device_handler(name);
- if (!scsi_dh)
- return -EINVAL;
-
- spin_lock_irqsave(q->queue_lock, flags);
- sdev = q->queuedata;
- if (!sdev || !get_device(&sdev->sdev_gendev))
- err = -ENODEV;
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- if (!err) {
- err = scsi_dh_handler_attach(sdev, scsi_dh);
- put_device(&sdev->sdev_gendev);
- }
- return err;
-}
-EXPORT_SYMBOL_GPL(scsi_dh_attach);
-
-/*
- * scsi_dh_detach - Detach device handler
- * @q - Request queue that is associated with the scsi_device
- * the handler should be detached from
- *
- * This function will detach the device handler only
- * if the sdev is not part of the internal list, ie
- * if it has been attached manually.
- */
-void scsi_dh_detach(struct request_queue *q)
-{
- unsigned long flags;
- struct scsi_device *sdev;
- struct scsi_device_handler *scsi_dh = NULL;
-
- spin_lock_irqsave(q->queue_lock, flags);
- sdev = q->queuedata;
- if (!sdev || !get_device(&sdev->sdev_gendev))
- sdev = NULL;
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- if (!sdev)
- return;
-
- if (sdev->scsi_dh_data) {
- scsi_dh = sdev->scsi_dh_data->scsi_dh;
- scsi_dh_handler_detach(sdev, scsi_dh);
- }
- put_device(&sdev->sdev_gendev);
-}
-EXPORT_SYMBOL_GPL(scsi_dh_detach);
-
-/*
- * scsi_dh_attached_handler_name - Get attached device handler's name
- * @q - Request queue that is associated with the scsi_device
- * that may have a device handler attached
- * @gfp - the GFP mask used in the kmalloc() call when allocating memory
- *
- * Returns name of attached handler, NULL if no handler is attached.
- * Caller must take care to free the returned string.
- */
-const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp)
-{
- unsigned long flags;
- struct scsi_device *sdev;
- const char *handler_name = NULL;
-
- spin_lock_irqsave(q->queue_lock, flags);
- sdev = q->queuedata;
- if (!sdev || !get_device(&sdev->sdev_gendev))
- sdev = NULL;
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- if (!sdev)
- return NULL;
-
- if (sdev->scsi_dh_data)
- handler_name = kstrdup(sdev->scsi_dh_data->scsi_dh->name, gfp);
-
- put_device(&sdev->sdev_gendev);
- return handler_name;
-}
-EXPORT_SYMBOL_GPL(scsi_dh_attached_handler_name);
-
-static struct notifier_block scsi_dh_nb = {
- .notifier_call = scsi_dh_notifier
-};
-
-static int __init scsi_dh_init(void)
-{
- int r;
-
- r = bus_register_notifier(&scsi_bus_type, &scsi_dh_nb);
-
- if (!r)
- bus_for_each_dev(&scsi_bus_type, NULL, NULL,
- scsi_dh_sysfs_attr_add);
-
- return r;
-}
-
-static void __exit scsi_dh_exit(void)
-{
- bus_for_each_dev(&scsi_bus_type, NULL, NULL,
- scsi_dh_sysfs_attr_remove);
- bus_unregister_notifier(&scsi_bus_type, &scsi_dh_nb);
-}
-
-module_init(scsi_dh_init);
-module_exit(scsi_dh_exit);
-
-MODULE_DESCRIPTION("SCSI device handler");
-MODULE_AUTHOR("Chandra Seetharaman <sekharan@us.ibm.com>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 854b568..cc2773b 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -62,7 +62,6 @@
#define ALUA_OPTIMIZE_STPG 1
struct alua_dh_data {
- struct scsi_dh_data dh_data;
int group_id;
int rel_port;
int tpgs;
@@ -86,11 +85,6 @@
static char print_alua_state(int);
static int alua_check_sense(struct scsi_device *, struct scsi_sense_hdr *);
-static inline struct alua_dh_data *get_alua_data(struct scsi_device *sdev)
-{
- return container_of(sdev->scsi_dh_data, struct alua_dh_data, dh_data);
-}
-
static int realloc_buffer(struct alua_dh_data *h, unsigned len)
{
if (h->buff && h->buff != h->inq)
@@ -708,7 +702,7 @@
*/
static int alua_set_params(struct scsi_device *sdev, const char *params)
{
- struct alua_dh_data *h = get_alua_data(sdev);
+ struct alua_dh_data *h = sdev->handler_data;
unsigned int optimize = 0, argc;
const char *p = params;
int result = SCSI_DH_OK;
@@ -746,7 +740,7 @@
static int alua_activate(struct scsi_device *sdev,
activate_complete fn, void *data)
{
- struct alua_dh_data *h = get_alua_data(sdev);
+ struct alua_dh_data *h = sdev->handler_data;
int err = SCSI_DH_OK;
int stpg = 0;
@@ -804,7 +798,7 @@
*/
static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
{
- struct alua_dh_data *h = get_alua_data(sdev);
+ struct alua_dh_data *h = sdev->handler_data;
int ret = BLKPREP_OK;
if (h->state == TPGS_STATE_TRANSITIONING)
@@ -819,23 +813,18 @@
}
-static bool alua_match(struct scsi_device *sdev)
-{
- return (scsi_device_tpgs(sdev) != 0);
-}
-
/*
* alua_bus_attach - Attach device handler
* @sdev: device to be attached to
*/
-static struct scsi_dh_data *alua_bus_attach(struct scsi_device *sdev)
+static int alua_bus_attach(struct scsi_device *sdev)
{
struct alua_dh_data *h;
int err;
h = kzalloc(sizeof(*h) , GFP_KERNEL);
if (!h)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
h->tpgs = TPGS_MODE_UNINITIALIZED;
h->state = TPGS_STATE_OPTIMIZED;
h->group_id = -1;
@@ -848,11 +837,11 @@
if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
goto failed;
- sdev_printk(KERN_NOTICE, sdev, "%s: Attached\n", ALUA_DH_NAME);
- return &h->dh_data;
+ sdev->handler_data = h;
+ return 0;
failed:
kfree(h);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
/*
@@ -861,10 +850,11 @@
*/
static void alua_bus_detach(struct scsi_device *sdev)
{
- struct alua_dh_data *h = get_alua_data(sdev);
+ struct alua_dh_data *h = sdev->handler_data;
if (h->buff && h->inq != h->buff)
kfree(h->buff);
+ sdev->handler_data = NULL;
kfree(h);
}
@@ -877,7 +867,6 @@
.check_sense = alua_check_sense,
.activate = alua_activate,
.set_params = alua_set_params,
- .match = alua_match,
};
static int __init alua_init(void)
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index 6ed1caa..e6fb97c 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -72,7 +72,6 @@
};
struct clariion_dh_data {
- struct scsi_dh_data dh_data;
/*
* Flags:
* CLARIION_SHORT_TRESPASS
@@ -114,13 +113,6 @@
int current_sp;
};
-static inline struct clariion_dh_data
- *get_clariion_data(struct scsi_device *sdev)
-{
- return container_of(sdev->scsi_dh_data, struct clariion_dh_data,
- dh_data);
-}
-
/*
* Parse MODE_SELECT cmd reply.
*/
@@ -450,7 +442,7 @@
static int clariion_prep_fn(struct scsi_device *sdev, struct request *req)
{
- struct clariion_dh_data *h = get_clariion_data(sdev);
+ struct clariion_dh_data *h = sdev->handler_data;
int ret = BLKPREP_OK;
if (h->lun_state != CLARIION_LUN_OWNED) {
@@ -533,7 +525,7 @@
static int clariion_activate(struct scsi_device *sdev,
activate_complete fn, void *data)
{
- struct clariion_dh_data *csdev = get_clariion_data(sdev);
+ struct clariion_dh_data *csdev = sdev->handler_data;
int result;
result = clariion_send_inquiry(sdev, csdev);
@@ -574,7 +566,7 @@
*/
static int clariion_set_params(struct scsi_device *sdev, const char *params)
{
- struct clariion_dh_data *csdev = get_clariion_data(sdev);
+ struct clariion_dh_data *csdev = sdev->handler_data;
unsigned int hr = 0, st = 0, argc;
const char *p = params;
int result = SCSI_DH_OK;
@@ -622,42 +614,14 @@
return result;
}
-static const struct {
- char *vendor;
- char *model;
-} clariion_dev_list[] = {
- {"DGC", "RAID"},
- {"DGC", "DISK"},
- {"DGC", "VRAID"},
- {NULL, NULL},
-};
-
-static bool clariion_match(struct scsi_device *sdev)
-{
- int i;
-
- if (scsi_device_tpgs(sdev))
- return false;
-
- for (i = 0; clariion_dev_list[i].vendor; i++) {
- if (!strncmp(sdev->vendor, clariion_dev_list[i].vendor,
- strlen(clariion_dev_list[i].vendor)) &&
- !strncmp(sdev->model, clariion_dev_list[i].model,
- strlen(clariion_dev_list[i].model))) {
- return true;
- }
- }
- return false;
-}
-
-static struct scsi_dh_data *clariion_bus_attach(struct scsi_device *sdev)
+static int clariion_bus_attach(struct scsi_device *sdev)
{
struct clariion_dh_data *h;
int err;
h = kzalloc(sizeof(*h) , GFP_KERNEL);
if (!h)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
h->lun_state = CLARIION_LUN_UNINITIALIZED;
h->default_sp = CLARIION_UNBOUND_LU;
h->current_sp = CLARIION_UNBOUND_LU;
@@ -675,18 +639,19 @@
CLARIION_NAME, h->current_sp + 'A',
h->port, lun_state[h->lun_state],
h->default_sp + 'A');
- return &h->dh_data;
+
+ sdev->handler_data = h;
+ return 0;
failed:
kfree(h);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
static void clariion_bus_detach(struct scsi_device *sdev)
{
- struct clariion_dh_data *h = get_clariion_data(sdev);
-
- kfree(h);
+ kfree(sdev->handler_data);
+ sdev->handler_data = NULL;
}
static struct scsi_device_handler clariion_dh = {
@@ -698,7 +663,6 @@
.activate = clariion_activate,
.prep_fn = clariion_prep_fn,
.set_params = clariion_set_params,
- .match = clariion_match,
};
static int __init clariion_init(void)
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index 485d995..9406d5f 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -38,7 +38,6 @@
#define HP_SW_PATH_PASSIVE 1
struct hp_sw_dh_data {
- struct scsi_dh_data dh_data;
unsigned char sense[SCSI_SENSE_BUFFERSIZE];
int path_state;
int retries;
@@ -50,11 +49,6 @@
static int hp_sw_start_stop(struct hp_sw_dh_data *);
-static inline struct hp_sw_dh_data *get_hp_sw_data(struct scsi_device *sdev)
-{
- return container_of(sdev->scsi_dh_data, struct hp_sw_dh_data, dh_data);
-}
-
/*
* tur_done - Handle TEST UNIT READY return status
* @sdev: sdev the command has been sent to
@@ -267,7 +261,7 @@
static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req)
{
- struct hp_sw_dh_data *h = get_hp_sw_data(sdev);
+ struct hp_sw_dh_data *h = sdev->handler_data;
int ret = BLKPREP_OK;
if (h->path_state != HP_SW_PATH_ACTIVE) {
@@ -292,7 +286,7 @@
activate_complete fn, void *data)
{
int ret = SCSI_DH_OK;
- struct hp_sw_dh_data *h = get_hp_sw_data(sdev);
+ struct hp_sw_dh_data *h = sdev->handler_data;
ret = hp_sw_tur(sdev, h);
@@ -311,43 +305,14 @@
return 0;
}
-static const struct {
- char *vendor;
- char *model;
-} hp_sw_dh_data_list[] = {
- {"COMPAQ", "MSA1000 VOLUME"},
- {"COMPAQ", "HSV110"},
- {"HP", "HSV100"},
- {"DEC", "HSG80"},
- {NULL, NULL},
-};
-
-static bool hp_sw_match(struct scsi_device *sdev)
-{
- int i;
-
- if (scsi_device_tpgs(sdev))
- return false;
-
- for (i = 0; hp_sw_dh_data_list[i].vendor; i++) {
- if (!strncmp(sdev->vendor, hp_sw_dh_data_list[i].vendor,
- strlen(hp_sw_dh_data_list[i].vendor)) &&
- !strncmp(sdev->model, hp_sw_dh_data_list[i].model,
- strlen(hp_sw_dh_data_list[i].model))) {
- return true;
- }
- }
- return false;
-}
-
-static struct scsi_dh_data *hp_sw_bus_attach(struct scsi_device *sdev)
+static int hp_sw_bus_attach(struct scsi_device *sdev)
{
struct hp_sw_dh_data *h;
int ret;
h = kzalloc(sizeof(*h), GFP_KERNEL);
if (!h)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
h->path_state = HP_SW_PATH_UNINITIALIZED;
h->retries = HP_SW_RETRIES;
h->sdev = sdev;
@@ -359,17 +324,18 @@
sdev_printk(KERN_INFO, sdev, "%s: attached to %s path\n",
HP_SW_NAME, h->path_state == HP_SW_PATH_ACTIVE?
"active":"passive");
- return &h->dh_data;
+
+ sdev->handler_data = h;
+ return 0;
failed:
kfree(h);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
static void hp_sw_bus_detach( struct scsi_device *sdev )
{
- struct hp_sw_dh_data *h = get_hp_sw_data(sdev);
-
- kfree(h);
+ kfree(sdev->handler_data);
+ sdev->handler_data = NULL;
}
static struct scsi_device_handler hp_sw_dh = {
@@ -379,7 +345,6 @@
.detach = hp_sw_bus_detach,
.activate = hp_sw_activate,
.prep_fn = hp_sw_prep_fn,
- .match = hp_sw_match,
};
static int __init hp_sw_init(void)
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index b46ace3..3613581 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -181,7 +181,6 @@
};
struct rdac_dh_data {
- struct scsi_dh_data dh_data;
struct rdac_controller *ctlr;
#define UNINITIALIZED_LUN (1 << 8)
unsigned lun;
@@ -260,11 +259,6 @@
sdev_printk(KERN_INFO, sdev, RDAC_NAME ": " f "\n", ## arg); \
} while (0);
-static inline struct rdac_dh_data *get_rdac_data(struct scsi_device *sdev)
-{
- return container_of(sdev->scsi_dh_data, struct rdac_dh_data, dh_data);
-}
-
static struct request *get_rdac_req(struct scsi_device *sdev,
void *buffer, unsigned buflen, int rw)
{
@@ -544,7 +538,7 @@
{
struct scsi_sense_hdr sense_hdr;
int err = SCSI_DH_IO, ret;
- struct rdac_dh_data *h = get_rdac_data(sdev);
+ struct rdac_dh_data *h = sdev->handler_data;
ret = scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, &sense_hdr);
if (!ret)
@@ -589,7 +583,7 @@
container_of(work, struct rdac_controller, ms_work);
struct request *rq;
struct scsi_device *sdev = ctlr->ms_sdev;
- struct rdac_dh_data *h = get_rdac_data(sdev);
+ struct rdac_dh_data *h = sdev->handler_data;
struct request_queue *q = sdev->request_queue;
int err, retry_cnt = RDAC_RETRY_COUNT;
struct rdac_queue_data *tmp, *qdata;
@@ -648,7 +642,7 @@
if (!qdata)
return SCSI_DH_RETRY;
- qdata->h = get_rdac_data(sdev);
+ qdata->h = sdev->handler_data;
qdata->callback_fn = fn;
qdata->callback_data = data;
@@ -667,7 +661,7 @@
static int rdac_activate(struct scsi_device *sdev,
activate_complete fn, void *data)
{
- struct rdac_dh_data *h = get_rdac_data(sdev);
+ struct rdac_dh_data *h = sdev->handler_data;
int err = SCSI_DH_OK;
int act = 0;
@@ -702,7 +696,7 @@
static int rdac_prep_fn(struct scsi_device *sdev, struct request *req)
{
- struct rdac_dh_data *h = get_rdac_data(sdev);
+ struct rdac_dh_data *h = sdev->handler_data;
int ret = BLKPREP_OK;
if (h->state != RDAC_STATE_ACTIVE) {
@@ -716,7 +710,7 @@
static int rdac_check_sense(struct scsi_device *sdev,
struct scsi_sense_hdr *sense_hdr)
{
- struct rdac_dh_data *h = get_rdac_data(sdev);
+ struct rdac_dh_data *h = sdev->handler_data;
RDAC_LOG(RDAC_LOG_SENSE, sdev, "array %s, ctlr %d, "
"I/O returned with sense %02x/%02x/%02x",
@@ -778,56 +772,7 @@
return SCSI_RETURN_NOT_HANDLED;
}
-static const struct {
- char *vendor;
- char *model;
-} rdac_dev_list[] = {
- {"IBM", "1722"},
- {"IBM", "1724"},
- {"IBM", "1726"},
- {"IBM", "1742"},
- {"IBM", "1745"},
- {"IBM", "1746"},
- {"IBM", "1813"},
- {"IBM", "1814"},
- {"IBM", "1815"},
- {"IBM", "1818"},
- {"IBM", "3526"},
- {"SGI", "TP9"},
- {"SGI", "IS"},
- {"STK", "OPENstorage D280"},
- {"STK", "FLEXLINE 380"},
- {"SUN", "CSM"},
- {"SUN", "LCSM100"},
- {"SUN", "STK6580_6780"},
- {"SUN", "SUN_6180"},
- {"SUN", "ArrayStorage"},
- {"DELL", "MD3"},
- {"NETAPP", "INF-01-00"},
- {"LSI", "INF-01-00"},
- {"ENGENIO", "INF-01-00"},
- {NULL, NULL},
-};
-
-static bool rdac_match(struct scsi_device *sdev)
-{
- int i;
-
- if (scsi_device_tpgs(sdev))
- return false;
-
- for (i = 0; rdac_dev_list[i].vendor; i++) {
- if (!strncmp(sdev->vendor, rdac_dev_list[i].vendor,
- strlen(rdac_dev_list[i].vendor)) &&
- !strncmp(sdev->model, rdac_dev_list[i].model,
- strlen(rdac_dev_list[i].model))) {
- return true;
- }
- }
- return false;
-}
-
-static struct scsi_dh_data *rdac_bus_attach(struct scsi_device *sdev)
+static int rdac_bus_attach(struct scsi_device *sdev)
{
struct rdac_dh_data *h;
int err;
@@ -836,7 +781,7 @@
h = kzalloc(sizeof(*h) , GFP_KERNEL);
if (!h)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
h->lun = UNINITIALIZED_LUN;
h->state = RDAC_STATE_ACTIVE;
@@ -861,7 +806,8 @@
RDAC_NAME, h->lun, mode[(int)h->mode],
lun_state[(int)h->lun_state]);
- return &h->dh_data;
+ sdev->handler_data = h;
+ return 0;
clean_ctlr:
spin_lock(&list_lock);
@@ -870,12 +816,12 @@
failed:
kfree(h);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
static void rdac_bus_detach( struct scsi_device *sdev )
{
- struct rdac_dh_data *h = get_rdac_data(sdev);
+ struct rdac_dh_data *h = sdev->handler_data;
if (h->ctlr && h->ctlr->ms_queued)
flush_workqueue(kmpath_rdacd);
@@ -884,6 +830,7 @@
if (h->ctlr)
kref_put(&h->ctlr->kref, release_controller);
spin_unlock(&list_lock);
+ sdev->handler_data = NULL;
kfree(h);
}
@@ -895,7 +842,6 @@
.attach = rdac_bus_attach,
.detach = rdac_bus_detach,
.activate = rdac_activate,
- .match = rdac_match,
};
static int __init rdac_init(void)
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index ec193a8..d3eb80c 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -364,7 +364,7 @@
* on the ethertype for the given device
*/
fcoe->fcoe_packet_type.func = fcoe_rcv;
- fcoe->fcoe_packet_type.type = __constant_htons(ETH_P_FCOE);
+ fcoe->fcoe_packet_type.type = htons(ETH_P_FCOE);
fcoe->fcoe_packet_type.dev = netdev;
dev_add_pack(&fcoe->fcoe_packet_type);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 3411919..b62836d 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4555,7 +4555,7 @@
spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
res = (struct ipr_resource_entry *)sdev->hostdata;
if (res) {
- if (ioa_cfg->sis64 && ipr_is_af_dasd_device(res)) {
+ if (ipr_is_af_dasd_device(res)) {
res->raw_mode = simple_strtoul(buf, NULL, 10);
len = strlen(buf);
if (res->sdev)
@@ -6383,9 +6383,13 @@
(!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) {
ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
}
- if (res->raw_mode && ipr_is_af_dasd_device(res))
+ if (res->raw_mode && ipr_is_af_dasd_device(res)) {
ioarcb->cmd_pkt.request_type = IPR_RQTYPE_PIPE;
+ if (scsi_cmd->underflow == 0)
+ ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_NO_ULEN_CHK;
+ }
+
if (ioa_cfg->sis64)
rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd);
else
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 98d9bb6..33c74d3 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -853,12 +853,9 @@
SAM_STAT_CHECK_CONDITION;
scsi_build_sense_buffer(1, sc->sense_buffer,
ILLEGAL_REQUEST, 0x10, ascq);
- sc->sense_buffer[7] = 0xc; /* Additional sense length */
- sc->sense_buffer[8] = 0; /* Information desc type */
- sc->sense_buffer[9] = 0xa; /* Additional desc length */
- sc->sense_buffer[10] = 0x80; /* Validity bit */
-
- put_unaligned_be64(sector, &sc->sense_buffer[12]);
+ scsi_set_sense_information(sc->sense_buffer,
+ SCSI_SENSE_BUFFERSIZE,
+ sector);
goto out;
}
}
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index eb62772..4abb93a 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -2284,7 +2284,7 @@
(struct lpfc_rdp_context *)(mbox->context2);
if (bf_get(lpfc_mqe_status, &mbox->u.mqe))
- goto error;
+ goto error_mbuf_free;
lpfc_sli_bemem_bcopy(mp->virt, &rdp_context->page_a2,
DMP_SFF_PAGE_A2_SIZE);
@@ -2299,13 +2299,14 @@
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_link_stat;
mbox->context2 = (struct lpfc_rdp_context *) rdp_context;
if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED)
- goto error;
+ goto error_cmd_free;
return;
-error:
+error_mbuf_free:
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
+error_cmd_free:
lpfc_sli4_mbox_cmd_free(phba, mbox);
rdp_context->cmpl(phba, rdp_context, FAILURE);
}
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index 6dec7cf..c167911 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -112,9 +112,12 @@
if (ret)
return ret;
+ /* global ioc spinlock to protect controller list on list operations */
printk(KERN_INFO "setting fwfault_debug(%d)\n", mpt2sas_fwfault_debug);
+ spin_lock(&gioc_lock);
list_for_each_entry(ioc, &mpt2sas_ioc_list, list)
ioc->fwfault_debug = mpt2sas_fwfault_debug;
+ spin_unlock(&gioc_lock);
return 0;
}
@@ -4437,6 +4440,8 @@
dexitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
__func__));
+ /* synchronizing freeing resource with pci_access_mutex lock */
+ mutex_lock(&ioc->pci_access_mutex);
if (ioc->chip_phys && ioc->chip) {
_base_mask_interrupts(ioc);
ioc->shost_recovery = 1;
@@ -4456,6 +4461,7 @@
pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
}
+ mutex_unlock(&ioc->pci_access_mutex);
return;
}
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index caff8d1..97ea360 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -238,6 +238,7 @@
* @flags: MPT_TARGET_FLAGS_XXX flags
* @deleted: target flaged for deletion
* @tm_busy: target is busy with TM request.
+ * @sdev: The sas_device associated with this target
*/
struct MPT2SAS_TARGET {
struct scsi_target *starget;
@@ -248,6 +249,7 @@
u32 flags;
u8 deleted;
u8 tm_busy;
+ struct _sas_device *sdev;
};
@@ -376,8 +378,24 @@
u8 phy;
u8 responding;
u8 pfa_led_on;
+ struct kref refcount;
};
+static inline void sas_device_get(struct _sas_device *s)
+{
+ kref_get(&s->refcount);
+}
+
+static inline void sas_device_free(struct kref *r)
+{
+ kfree(container_of(r, struct _sas_device, refcount));
+}
+
+static inline void sas_device_put(struct _sas_device *s)
+{
+ kref_put(&s->refcount, sas_device_free);
+}
+
/**
* struct _raid_device - raid volume link list
* @list: sas device list
@@ -799,6 +817,12 @@
* @delayed_tr_list: target reset link list
* @delayed_tr_volume_list: volume target reset link list
* @@temp_sensors_count: flag to carry the number of temperature sensors
+ * @pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and
+ * pci resource handling. PCI resource freeing will lead to free
+ * vital hardware/memory resource, which might be in use by cli/sysfs
+ * path functions resulting in Null pointer reference followed by kernel
+ * crash. To avoid the above race condition we use mutex syncrhonization
+ * which ensures the syncrhonization between cli/sysfs_show path
*/
struct MPT2SAS_ADAPTER {
struct list_head list;
@@ -1015,6 +1039,7 @@
u8 mfg_pg10_hide_flag;
u8 hide_drives;
+ struct mutex pci_access_mutex;
};
typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
@@ -1023,6 +1048,17 @@
/* base shared API */
extern struct list_head mpt2sas_ioc_list;
+/* spinlock on list operations over IOCs
+ * Case: when multiple warpdrive cards(IOCs) are in use
+ * Each IOC will added to the ioc list stucture on initialization.
+ * Watchdog threads run at regular intervals to check IOC for any
+ * fault conditions which will trigger the dead_ioc thread to
+ * deallocate pci resource, resulting deleting the IOC netry from list,
+ * this deletion need to protected by spinlock to enusre that
+ * ioc removal is syncrhonized, if not synchronized it might lead to
+ * list_del corruption as the ioc list is traversed in cli path
+ */
+extern spinlock_t gioc_lock;
void mpt2sas_base_start_watchdog(struct MPT2SAS_ADAPTER *ioc);
void mpt2sas_base_stop_watchdog(struct MPT2SAS_ADAPTER *ioc);
@@ -1095,11 +1131,12 @@
u16 handle);
struct _sas_node *mpt2sas_scsih_expander_find_by_sas_address(struct MPT2SAS_ADAPTER
*ioc, u64 sas_address);
-struct _sas_device *mpt2sas_scsih_sas_device_find_by_sas_address(
+struct _sas_device *mpt2sas_get_sdev_by_addr(
+ struct MPT2SAS_ADAPTER *ioc, u64 sas_address);
+struct _sas_device *__mpt2sas_get_sdev_by_addr(
struct MPT2SAS_ADAPTER *ioc, u64 sas_address);
void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc);
-
void mpt2sas_scsih_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase);
/* config shared API */
diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
index 4e50960..3694b63 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
@@ -427,13 +427,16 @@
_ctl_verify_adapter(int ioc_number, struct MPT2SAS_ADAPTER **iocpp)
{
struct MPT2SAS_ADAPTER *ioc;
-
+ /* global ioc lock to protect controller on list operations */
+ spin_lock(&gioc_lock);
list_for_each_entry(ioc, &mpt2sas_ioc_list, list) {
if (ioc->id != ioc_number)
continue;
+ spin_unlock(&gioc_lock);
*iocpp = ioc;
return ioc_number;
}
+ spin_unlock(&gioc_lock);
*iocpp = NULL;
return -1;
}
@@ -522,10 +525,15 @@
poll_wait(filep, &ctl_poll_wait, wait);
+ /* global ioc lock to protect controller on list operations */
+ spin_lock(&gioc_lock);
list_for_each_entry(ioc, &mpt2sas_ioc_list, list) {
- if (ioc->aen_event_read_flag)
+ if (ioc->aen_event_read_flag) {
+ spin_unlock(&gioc_lock);
return POLLIN | POLLRDNORM;
+ }
}
+ spin_unlock(&gioc_lock);
return 0;
}
@@ -2168,16 +2176,23 @@
if (_ctl_verify_adapter(ioctl_header.ioc_number, &ioc) == -1 || !ioc)
return -ENODEV;
+ /* pci_access_mutex lock acquired by ioctl path */
+ mutex_lock(&ioc->pci_access_mutex);
if (ioc->shost_recovery || ioc->pci_error_recovery ||
- ioc->is_driver_loading)
- return -EAGAIN;
+ ioc->is_driver_loading || ioc->remove_host) {
+ ret = -EAGAIN;
+ goto out_unlock_pciaccess;
+ }
state = (file->f_flags & O_NONBLOCK) ? NON_BLOCKING : BLOCKING;
if (state == NON_BLOCKING) {
- if (!mutex_trylock(&ioc->ctl_cmds.mutex))
- return -EAGAIN;
+ if (!mutex_trylock(&ioc->ctl_cmds.mutex)) {
+ ret = -EAGAIN;
+ goto out_unlock_pciaccess;
+ }
} else if (mutex_lock_interruptible(&ioc->ctl_cmds.mutex)) {
- return -ERESTARTSYS;
+ ret = -ERESTARTSYS;
+ goto out_unlock_pciaccess;
}
switch (cmd) {
@@ -2258,6 +2273,8 @@
}
mutex_unlock(&ioc->ctl_cmds.mutex);
+out_unlock_pciaccess:
+ mutex_unlock(&ioc->pci_access_mutex);
return ret;
}
@@ -2711,6 +2728,12 @@
"warpdrive\n", ioc->name, __func__);
goto out;
}
+ /* pci_access_mutex lock acquired by sysfs show path */
+ mutex_lock(&ioc->pci_access_mutex);
+ if (ioc->pci_error_recovery || ioc->remove_host) {
+ mutex_unlock(&ioc->pci_access_mutex);
+ return 0;
+ }
/* allocate upto GPIOVal 36 entries */
sz = offsetof(Mpi2IOUnitPage3_t, GPIOVal) + (sizeof(u16) * 36);
@@ -2749,6 +2772,7 @@
out:
kfree(io_unit_pg3);
+ mutex_unlock(&ioc->pci_access_mutex);
return rc;
}
static DEVICE_ATTR(BRM_status, S_IRUGO, _ctl_BRM_status_show, NULL);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 3f26147..0ad09b2 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -79,7 +79,8 @@
/* global parameters */
LIST_HEAD(mpt2sas_ioc_list);
-
+/* global ioc lock for list operations */
+DEFINE_SPINLOCK(gioc_lock);
/* local parameters */
static u8 scsi_io_cb_idx = -1;
static u8 tm_cb_idx = -1;
@@ -176,9 +177,37 @@
u8 VP_ID;
u8 ignore;
u16 event;
+ struct kref refcount;
char event_data[0] __aligned(4);
};
+static void fw_event_work_free(struct kref *r)
+{
+ kfree(container_of(r, struct fw_event_work, refcount));
+}
+
+static void fw_event_work_get(struct fw_event_work *fw_work)
+{
+ kref_get(&fw_work->refcount);
+}
+
+static void fw_event_work_put(struct fw_event_work *fw_work)
+{
+ kref_put(&fw_work->refcount, fw_event_work_free);
+}
+
+static struct fw_event_work *alloc_fw_event_work(int len)
+{
+ struct fw_event_work *fw_event;
+
+ fw_event = kzalloc(sizeof(*fw_event) + len, GFP_ATOMIC);
+ if (!fw_event)
+ return NULL;
+
+ kref_init(&fw_event->refcount);
+ return fw_event;
+}
+
/* raid transport support */
static struct raid_template *mpt2sas_raid_template;
@@ -293,8 +322,10 @@
return ret;
printk(KERN_INFO "setting logging_level(0x%08x)\n", logging_level);
+ spin_lock(&gioc_lock);
list_for_each_entry(ioc, &mpt2sas_ioc_list, list)
ioc->logging_level = logging_level;
+ spin_unlock(&gioc_lock);
return 0;
}
module_param_call(logging_level, _scsih_set_debug_level, param_get_int,
@@ -526,8 +557,61 @@
}
}
+static struct _sas_device *
+__mpt2sas_get_sdev_from_target(struct MPT2SAS_ADAPTER *ioc,
+ struct MPT2SAS_TARGET *tgt_priv)
+{
+ struct _sas_device *ret;
+
+ assert_spin_locked(&ioc->sas_device_lock);
+
+ ret = tgt_priv->sdev;
+ if (ret)
+ sas_device_get(ret);
+
+ return ret;
+}
+
+static struct _sas_device *
+mpt2sas_get_sdev_from_target(struct MPT2SAS_ADAPTER *ioc,
+ struct MPT2SAS_TARGET *tgt_priv)
+{
+ struct _sas_device *ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioc->sas_device_lock, flags);
+ ret = __mpt2sas_get_sdev_from_target(ioc, tgt_priv);
+ spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+
+ return ret;
+}
+
+
+struct _sas_device *
+__mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc,
+ u64 sas_address)
+{
+ struct _sas_device *sas_device;
+
+ assert_spin_locked(&ioc->sas_device_lock);
+
+ list_for_each_entry(sas_device, &ioc->sas_device_list, list)
+ if (sas_device->sas_address == sas_address)
+ goto found_device;
+
+ list_for_each_entry(sas_device, &ioc->sas_device_init_list, list)
+ if (sas_device->sas_address == sas_address)
+ goto found_device;
+
+ return NULL;
+
+found_device:
+ sas_device_get(sas_device);
+ return sas_device;
+}
+
/**
- * mpt2sas_scsih_sas_device_find_by_sas_address - sas device search
+ * mpt2sas_get_sdev_by_addr - sas device search
* @ioc: per adapter object
* @sas_address: sas address
* Context: Calling function should acquire ioc->sas_device_lock
@@ -536,24 +620,44 @@
* object.
*/
struct _sas_device *
-mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc,
+mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc,
u64 sas_address)
{
struct _sas_device *sas_device;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioc->sas_device_lock, flags);
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc,
+ sas_address);
+ spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+
+ return sas_device;
+}
+
+static struct _sas_device *
+__mpt2sas_get_sdev_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle)
+{
+ struct _sas_device *sas_device;
+
+ assert_spin_locked(&ioc->sas_device_lock);
list_for_each_entry(sas_device, &ioc->sas_device_list, list)
- if (sas_device->sas_address == sas_address)
- return sas_device;
+ if (sas_device->handle == handle)
+ goto found_device;
list_for_each_entry(sas_device, &ioc->sas_device_init_list, list)
- if (sas_device->sas_address == sas_address)
- return sas_device;
+ if (sas_device->handle == handle)
+ goto found_device;
return NULL;
+
+found_device:
+ sas_device_get(sas_device);
+ return sas_device;
}
/**
- * _scsih_sas_device_find_by_handle - sas device search
+ * mpt2sas_get_sdev_by_handle - sas device search
* @ioc: per adapter object
* @handle: sas device handle (assigned by firmware)
* Context: Calling function should acquire ioc->sas_device_lock
@@ -562,19 +666,16 @@
* object.
*/
static struct _sas_device *
-_scsih_sas_device_find_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle)
+mpt2sas_get_sdev_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle)
{
struct _sas_device *sas_device;
+ unsigned long flags;
- list_for_each_entry(sas_device, &ioc->sas_device_list, list)
- if (sas_device->handle == handle)
- return sas_device;
+ spin_lock_irqsave(&ioc->sas_device_lock, flags);
+ sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle);
+ spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- list_for_each_entry(sas_device, &ioc->sas_device_init_list, list)
- if (sas_device->handle == handle)
- return sas_device;
-
- return NULL;
+ return sas_device;
}
/**
@@ -583,7 +684,7 @@
* @sas_device: the sas_device object
* Context: This function will acquire ioc->sas_device_lock.
*
- * Removing object and freeing associated memory from the ioc->sas_device_list.
+ * If sas_device is on the list, remove it and decrement its reference count.
*/
static void
_scsih_sas_device_remove(struct MPT2SAS_ADAPTER *ioc,
@@ -594,9 +695,15 @@
if (!sas_device)
return;
+ /*
+ * The lock serializes access to the list, but we still need to verify
+ * that nobody removed the entry while we were waiting on the lock.
+ */
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- list_del(&sas_device->list);
- kfree(sas_device);
+ if (!list_empty(&sas_device->list)) {
+ list_del_init(&sas_device->list);
+ sas_device_put(sas_device);
+ }
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
}
@@ -620,6 +727,7 @@
sas_device->handle, (unsigned long long)sas_device->sas_address));
spin_lock_irqsave(&ioc->sas_device_lock, flags);
+ sas_device_get(sas_device);
list_add_tail(&sas_device->list, &ioc->sas_device_list);
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
@@ -659,6 +767,7 @@
sas_device->handle, (unsigned long long)sas_device->sas_address));
spin_lock_irqsave(&ioc->sas_device_lock, flags);
+ sas_device_get(sas_device);
list_add_tail(&sas_device->list, &ioc->sas_device_init_list);
_scsih_determine_boot_device(ioc, sas_device, 0);
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
@@ -1208,12 +1317,15 @@
goto not_sata;
if ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME))
goto not_sata;
+
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
- sas_device_priv_data->sas_target->sas_address);
- if (sas_device && sas_device->device_info &
- MPI2_SAS_DEVICE_INFO_SATA_DEVICE)
- max_depth = MPT2SAS_SATA_QUEUE_DEPTH;
+ sas_device = __mpt2sas_get_sdev_from_target(ioc, sas_target_priv_data);
+ if (sas_device) {
+ if (sas_device->device_info & MPI2_SAS_DEVICE_INFO_SATA_DEVICE)
+ max_depth = MPT2SAS_SATA_QUEUE_DEPTH;
+
+ sas_device_put(sas_device);
+ }
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
not_sata:
@@ -1271,18 +1383,20 @@
/* sas/sata devices */
spin_lock_irqsave(&ioc->sas_device_lock, flags);
rphy = dev_to_rphy(starget->dev.parent);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc,
rphy->identify.sas_address);
if (sas_device) {
sas_target_priv_data->handle = sas_device->handle;
sas_target_priv_data->sas_address = sas_device->sas_address;
+ sas_target_priv_data->sdev = sas_device;
sas_device->starget = starget;
sas_device->id = starget->id;
sas_device->channel = starget->channel;
if (test_bit(sas_device->handle, ioc->pd_handles))
sas_target_priv_data->flags |=
MPT_TARGET_FLAGS_RAID_COMPONENT;
+
}
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
@@ -1324,13 +1438,21 @@
spin_lock_irqsave(&ioc->sas_device_lock, flags);
rphy = dev_to_rphy(starget->dev.parent);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
- rphy->identify.sas_address);
+ sas_device = __mpt2sas_get_sdev_from_target(ioc, sas_target_priv_data);
if (sas_device && (sas_device->starget == starget) &&
(sas_device->id == starget->id) &&
(sas_device->channel == starget->channel))
sas_device->starget = NULL;
+ if (sas_device) {
+ /*
+ * Corresponding get() is in _scsih_target_alloc()
+ */
+ sas_target_priv_data->sdev = NULL;
+ sas_device_put(sas_device);
+
+ sas_device_put(sas_device);
+ }
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
out:
@@ -1386,7 +1508,7 @@
if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) {
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc,
sas_target_priv_data->sas_address);
if (sas_device && (sas_device->starget == NULL)) {
sdev_printk(KERN_INFO, sdev,
@@ -1394,6 +1516,10 @@
__func__, __LINE__);
sas_device->starget = starget;
}
+
+ if (sas_device)
+ sas_device_put(sas_device);
+
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
}
@@ -1428,10 +1554,13 @@
if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) {
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
- sas_target_priv_data->sas_address);
+ sas_device = __mpt2sas_get_sdev_from_target(ioc,
+ sas_target_priv_data);
if (sas_device && !sas_target_priv_data->num_luns)
sas_device->starget = NULL;
+
+ if (sas_device)
+ sas_device_put(sas_device);
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
}
@@ -2078,7 +2207,7 @@
}
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc,
sas_device_priv_data->sas_target->sas_address);
if (!sas_device) {
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
@@ -2112,17 +2241,18 @@
(unsigned long long) sas_device->enclosure_logical_id,
sas_device->slot);
+ sas_device_put(sas_device);
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
if (!ssp_target)
_scsih_display_sata_capabilities(ioc, handle, sdev);
-
_scsih_change_queue_depth(sdev, qdepth);
if (ssp_target) {
sas_read_port_mode_page(sdev);
_scsih_enable_tlr(ioc, sdev);
}
+
return 0;
}
@@ -2509,8 +2639,7 @@
device_str, (unsigned long long)priv_target->sas_address);
} else {
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
- priv_target->sas_address);
+ sas_device = __mpt2sas_get_sdev_from_target(ioc, priv_target);
if (sas_device) {
if (priv_target->flags &
MPT_TARGET_FLAGS_RAID_COMPONENT) {
@@ -2529,6 +2658,8 @@
"enclosure_logical_id(0x%016llx), slot(%d)\n",
(unsigned long long)sas_device->enclosure_logical_id,
sas_device->slot);
+
+ sas_device_put(sas_device);
}
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
}
@@ -2604,12 +2735,12 @@
{
struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
struct MPT2SAS_DEVICE *sas_device_priv_data;
- struct _sas_device *sas_device;
- unsigned long flags;
+ struct _sas_device *sas_device = NULL;
u16 handle;
int r;
struct scsi_target *starget = scmd->device->sdev_target;
+ struct MPT2SAS_TARGET *target_priv_data = starget->hostdata;
starget_printk(KERN_INFO, starget, "attempting device reset! "
"scmd(%p)\n", scmd);
@@ -2629,12 +2760,10 @@
handle = 0;
if (sas_device_priv_data->sas_target->flags &
MPT_TARGET_FLAGS_RAID_COMPONENT) {
- spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc,
- sas_device_priv_data->sas_target->handle);
+ sas_device = mpt2sas_get_sdev_from_target(ioc,
+ target_priv_data);
if (sas_device)
handle = sas_device->volume_handle;
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
} else
handle = sas_device_priv_data->sas_target->handle;
@@ -2651,6 +2780,10 @@
out:
sdev_printk(KERN_INFO, scmd->device, "device reset: %s scmd(%p)\n",
((r == SUCCESS) ? "SUCCESS" : "FAILED"), scmd);
+
+ if (sas_device)
+ sas_device_put(sas_device);
+
return r;
}
@@ -2665,11 +2798,11 @@
{
struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
struct MPT2SAS_DEVICE *sas_device_priv_data;
- struct _sas_device *sas_device;
- unsigned long flags;
+ struct _sas_device *sas_device = NULL;
u16 handle;
int r;
struct scsi_target *starget = scmd->device->sdev_target;
+ struct MPT2SAS_TARGET *target_priv_data = starget->hostdata;
starget_printk(KERN_INFO, starget, "attempting target reset! "
"scmd(%p)\n", scmd);
@@ -2689,12 +2822,10 @@
handle = 0;
if (sas_device_priv_data->sas_target->flags &
MPT_TARGET_FLAGS_RAID_COMPONENT) {
- spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc,
- sas_device_priv_data->sas_target->handle);
+ sas_device = mpt2sas_get_sdev_from_target(ioc,
+ target_priv_data);
if (sas_device)
handle = sas_device->volume_handle;
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
} else
handle = sas_device_priv_data->sas_target->handle;
@@ -2711,6 +2842,10 @@
out:
starget_printk(KERN_INFO, starget, "target reset: %s scmd(%p)\n",
((r == SUCCESS) ? "SUCCESS" : "FAILED"), scmd);
+
+ if (sas_device)
+ sas_device_put(sas_device);
+
return r;
}
@@ -2768,36 +2903,39 @@
return;
spin_lock_irqsave(&ioc->fw_event_lock, flags);
+ fw_event_work_get(fw_event);
list_add_tail(&fw_event->list, &ioc->fw_event_list);
INIT_DELAYED_WORK(&fw_event->delayed_work, _firmware_event_work);
+ fw_event_work_get(fw_event);
queue_delayed_work(ioc->firmware_event_thread,
&fw_event->delayed_work, 0);
spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
}
/**
- * _scsih_fw_event_free - delete fw_event
+ * _scsih_fw_event_del_from_list - delete fw_event from the list
* @ioc: per adapter object
* @fw_event: object describing the event
* Context: This function will acquire ioc->fw_event_lock.
*
- * This removes firmware event object from link list, frees associated memory.
+ * If the fw_event is on the fw_event_list, remove it and do a put.
*
* Return nothing.
*/
static void
-_scsih_fw_event_free(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work
+_scsih_fw_event_del_from_list(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work
*fw_event)
{
unsigned long flags;
spin_lock_irqsave(&ioc->fw_event_lock, flags);
- list_del(&fw_event->list);
- kfree(fw_event);
+ if (!list_empty(&fw_event->list)) {
+ list_del_init(&fw_event->list);
+ fw_event_work_put(fw_event);
+ }
spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
}
-
/**
* _scsih_error_recovery_delete_devices - remove devices not responding
* @ioc: per adapter object
@@ -2812,13 +2950,14 @@
if (ioc->is_driver_loading)
return;
- fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
+ fw_event = alloc_fw_event_work(0);
if (!fw_event)
return;
fw_event->event = MPT2SAS_REMOVE_UNRESPONDING_DEVICES;
fw_event->ioc = ioc;
_scsih_fw_event_add(ioc, fw_event);
+ fw_event_work_put(fw_event);
}
/**
@@ -2832,12 +2971,29 @@
{
struct fw_event_work *fw_event;
- fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
+ fw_event = alloc_fw_event_work(0);
if (!fw_event)
return;
fw_event->event = MPT2SAS_PORT_ENABLE_COMPLETE;
fw_event->ioc = ioc;
_scsih_fw_event_add(ioc, fw_event);
+ fw_event_work_put(fw_event);
+}
+
+static struct fw_event_work *dequeue_next_fw_event(struct MPT2SAS_ADAPTER *ioc)
+{
+ unsigned long flags;
+ struct fw_event_work *fw_event = NULL;
+
+ spin_lock_irqsave(&ioc->fw_event_lock, flags);
+ if (!list_empty(&ioc->fw_event_list)) {
+ fw_event = list_first_entry(&ioc->fw_event_list,
+ struct fw_event_work, list);
+ list_del_init(&fw_event->list);
+ }
+ spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+
+ return fw_event;
}
/**
@@ -2852,17 +3008,25 @@
static void
_scsih_fw_event_cleanup_queue(struct MPT2SAS_ADAPTER *ioc)
{
- struct fw_event_work *fw_event, *next;
+ struct fw_event_work *fw_event;
if (list_empty(&ioc->fw_event_list) ||
!ioc->firmware_event_thread || in_interrupt())
return;
- list_for_each_entry_safe(fw_event, next, &ioc->fw_event_list, list) {
- if (cancel_delayed_work_sync(&fw_event->delayed_work)) {
- _scsih_fw_event_free(ioc, fw_event);
- continue;
- }
+ while ((fw_event = dequeue_next_fw_event(ioc))) {
+ /*
+ * Wait on the fw_event to complete. If this returns 1, then
+ * the event was never executed, and we need a put for the
+ * reference the delayed_work had on the fw_event.
+ *
+ * If it did execute, we wait for it to finish, and the put will
+ * happen from _firmware_event_work()
+ */
+ if (cancel_delayed_work_sync(&fw_event->delayed_work))
+ fw_event_work_put(fw_event);
+
+ fw_event_work_put(fw_event);
}
}
@@ -3002,15 +3166,15 @@
list_for_each_entry(mpt2sas_port,
&sas_expander->sas_port_list, port_list) {
- if (mpt2sas_port->remote_identify.device_type ==
- SAS_END_DEVICE) {
+ if (mpt2sas_port->remote_identify.device_type == SAS_END_DEVICE) {
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device =
- mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
- mpt2sas_port->remote_identify.sas_address);
- if (sas_device)
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc,
+ mpt2sas_port->remote_identify.sas_address);
+ if (sas_device) {
set_bit(sas_device->handle,
- ioc->blocking_handles);
+ ioc->blocking_handles);
+ sas_device_put(sas_device);
+ }
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
}
}
@@ -3080,7 +3244,7 @@
{
Mpi2SCSITaskManagementRequest_t *mpi_request;
u16 smid;
- struct _sas_device *sas_device;
+ struct _sas_device *sas_device = NULL;
struct MPT2SAS_TARGET *sas_target_priv_data = NULL;
u64 sas_address = 0;
unsigned long flags;
@@ -3110,7 +3274,7 @@
return;
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
+ sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle);
if (sas_device && sas_device->starget &&
sas_device->starget->hostdata) {
sas_target_priv_data = sas_device->starget->hostdata;
@@ -3131,14 +3295,14 @@
if (!smid) {
delayed_tr = kzalloc(sizeof(*delayed_tr), GFP_ATOMIC);
if (!delayed_tr)
- return;
+ goto out;
INIT_LIST_HEAD(&delayed_tr->list);
delayed_tr->handle = handle;
list_add_tail(&delayed_tr->list, &ioc->delayed_tr_list);
dewtprintk(ioc, printk(MPT2SAS_INFO_FMT
"DELAYED:tr:handle(0x%04x), (open)\n",
ioc->name, handle));
- return;
+ goto out;
}
dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "tr_send:handle(0x%04x), "
@@ -3150,6 +3314,9 @@
mpi_request->DevHandle = cpu_to_le16(handle);
mpi_request->TaskType = MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET;
mpt2sas_base_put_smid_hi_priority(ioc, smid);
+out:
+ if (sas_device)
+ sas_device_put(sas_device);
}
@@ -4068,7 +4235,6 @@
char *desc_scsi_state = ioc->tmp_string;
u32 log_info = le32_to_cpu(mpi_reply->IOCLogInfo);
struct _sas_device *sas_device = NULL;
- unsigned long flags;
struct scsi_target *starget = scmd->device->sdev_target;
struct MPT2SAS_TARGET *priv_target = starget->hostdata;
char *device_str = NULL;
@@ -4200,9 +4366,7 @@
printk(MPT2SAS_WARN_FMT "\t%s wwid(0x%016llx)\n", ioc->name,
device_str, (unsigned long long)priv_target->sas_address);
} else {
- spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
- priv_target->sas_address);
+ sas_device = mpt2sas_get_sdev_from_target(ioc, priv_target);
if (sas_device) {
printk(MPT2SAS_WARN_FMT "\tsas_address(0x%016llx), "
"phy(%d)\n", ioc->name, sas_device->sas_address,
@@ -4211,8 +4375,9 @@
"\tenclosure_logical_id(0x%016llx), slot(%d)\n",
ioc->name, sas_device->enclosure_logical_id,
sas_device->slot);
+
+ sas_device_put(sas_device);
}
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
}
printk(MPT2SAS_WARN_FMT "\thandle(0x%04x), ioc_status(%s)(0x%04x), "
@@ -4259,7 +4424,7 @@
Mpi2SepRequest_t mpi_request;
struct _sas_device *sas_device;
- sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
+ sas_device = mpt2sas_get_sdev_by_handle(ioc, handle);
if (!sas_device)
return;
@@ -4274,7 +4439,7 @@
&mpi_request)) != 0) {
printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", ioc->name,
__FILE__, __LINE__, __func__);
- return;
+ goto out;
}
sas_device->pfa_led_on = 1;
@@ -4284,8 +4449,10 @@
"enclosure_processor: ioc_status (0x%04x), loginfo(0x%08x)\n",
ioc->name, le16_to_cpu(mpi_reply.IOCStatus),
le32_to_cpu(mpi_reply.IOCLogInfo)));
- return;
+ goto out;
}
+out:
+ sas_device_put(sas_device);
}
/**
@@ -4340,13 +4507,14 @@
{
struct fw_event_work *fw_event;
- fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
+ fw_event = alloc_fw_event_work(0);
if (!fw_event)
return;
fw_event->event = MPT2SAS_TURN_ON_PFA_LED;
fw_event->device_handle = handle;
fw_event->ioc = ioc;
_scsih_fw_event_add(ioc, fw_event);
+ fw_event_work_put(fw_event);
}
/**
@@ -4370,19 +4538,17 @@
/* only handle non-raid devices */
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
+ sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle);
if (!sas_device) {
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- return;
+ goto out_unlock;
}
starget = sas_device->starget;
sas_target_priv_data = starget->hostdata;
if ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_RAID_COMPONENT) ||
- ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME))) {
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- return;
- }
+ ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)))
+ goto out_unlock;
+
starget_printk(KERN_WARNING, starget, "predicted fault\n");
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
@@ -4396,7 +4562,7 @@
if (!event_reply) {
printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
- return;
+ goto out;
}
event_reply->Function = MPI2_FUNCTION_EVENT_NOTIFICATION;
@@ -4413,6 +4579,14 @@
event_data->SASAddress = cpu_to_le64(sas_target_priv_data->sas_address);
mpt2sas_ctl_add_to_event_log(ioc, event_reply);
kfree(event_reply);
+out:
+ if (sas_device)
+ sas_device_put(sas_device);
+ return;
+
+out_unlock:
+ spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+ goto out;
}
/**
@@ -5148,14 +5322,13 @@
spin_lock_irqsave(&ioc->sas_device_lock, flags);
sas_address = le64_to_cpu(sas_device_pg0.SASAddress);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc,
sas_address);
if (!sas_device) {
printk(MPT2SAS_ERR_FMT "device is not present "
"handle(0x%04x), no sas_device!!!\n", ioc->name, handle);
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- return;
+ goto out_unlock;
}
if (unlikely(sas_device->handle != handle)) {
@@ -5172,19 +5345,24 @@
MPI2_SAS_DEVICE0_FLAGS_DEVICE_PRESENT)) {
printk(MPT2SAS_ERR_FMT "device is not present "
"handle(0x%04x), flags!!!\n", ioc->name, handle);
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- return;
+ goto out_unlock;
}
/* check if there were any issues with discovery */
if (_scsih_check_access_status(ioc, sas_address, handle,
- sas_device_pg0.AccessStatus)) {
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- return;
- }
+ sas_device_pg0.AccessStatus))
+ goto out_unlock;
+
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
_scsih_ublock_io_device(ioc, sas_address);
+ if (sas_device)
+ sas_device_put(sas_device);
+ return;
+out_unlock:
+ spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+ if (sas_device)
+ sas_device_put(sas_device);
}
/**
@@ -5208,7 +5386,6 @@
u32 ioc_status;
__le64 sas_address;
u32 device_info;
- unsigned long flags;
if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0,
MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle))) {
@@ -5250,14 +5427,13 @@
return -1;
}
-
- spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
+ sas_device = mpt2sas_get_sdev_by_addr(ioc,
sas_address);
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- if (sas_device)
+ if (sas_device) {
+ sas_device_put(sas_device);
return 0;
+ }
sas_device = kzalloc(sizeof(struct _sas_device),
GFP_KERNEL);
@@ -5267,6 +5443,7 @@
return -1;
}
+ kref_init(&sas_device->refcount);
sas_device->handle = handle;
if (_scsih_get_sas_address(ioc, le16_to_cpu
(sas_device_pg0.ParentDevHandle),
@@ -5296,6 +5473,7 @@
else
_scsih_sas_device_add(ioc, sas_device);
+ sas_device_put(sas_device);
return 0;
}
@@ -5344,7 +5522,6 @@
"handle(0x%04x), sas_addr(0x%016llx)\n", ioc->name, __func__,
sas_device->handle, (unsigned long long)
sas_device->sas_address));
- kfree(sas_device);
}
/**
* _scsih_device_remove_by_handle - removing device object by handle
@@ -5363,12 +5540,17 @@
return;
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
- if (sas_device)
- list_del(&sas_device->list);
+ sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle);
+ if (sas_device) {
+ list_del_init(&sas_device->list);
+ sas_device_put(sas_device);
+ }
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- if (sas_device)
+
+ if (sas_device) {
_scsih_remove_device(ioc, sas_device);
+ sas_device_put(sas_device);
+ }
}
/**
@@ -5389,13 +5571,17 @@
return;
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
- sas_address);
- if (sas_device)
- list_del(&sas_device->list);
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_address);
+ if (sas_device) {
+ list_del_init(&sas_device->list);
+ sas_device_put(sas_device);
+ }
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- if (sas_device)
+
+ if (sas_device) {
_scsih_remove_device(ioc, sas_device);
+ sas_device_put(sas_device);
+ }
}
#ifdef CONFIG_SCSI_MPT2SAS_LOGGING
/**
@@ -5716,26 +5902,28 @@
spin_lock_irqsave(&ioc->sas_device_lock, flags);
sas_address = le64_to_cpu(event_data->SASAddress);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc,
sas_address);
- if (!sas_device || !sas_device->starget) {
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- return;
- }
+ if (!sas_device || !sas_device->starget)
+ goto out;
target_priv_data = sas_device->starget->hostdata;
- if (!target_priv_data) {
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- return;
- }
+ if (!target_priv_data)
+ goto out;
if (event_data->ReasonCode ==
MPI2_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET)
target_priv_data->tm_busy = 1;
else
target_priv_data->tm_busy = 0;
+
+out:
+ if (sas_device)
+ sas_device_put(sas_device);
+
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+
}
#ifdef CONFIG_SCSI_MPT2SAS_LOGGING
@@ -6123,7 +6311,7 @@
u16 handle = le16_to_cpu(element->PhysDiskDevHandle);
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
+ sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle);
if (sas_device) {
sas_device->volume_handle = 0;
sas_device->volume_wwid = 0;
@@ -6142,6 +6330,8 @@
/* exposing raid component */
if (starget)
starget_for_each_device(starget, NULL, _scsih_reprobe_lun);
+
+ sas_device_put(sas_device);
}
/**
@@ -6170,7 +6360,7 @@
&volume_wwid);
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
+ sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle);
if (sas_device) {
set_bit(handle, ioc->pd_handles);
if (sas_device->starget && sas_device->starget->hostdata) {
@@ -6189,6 +6379,8 @@
/* hiding raid component */
if (starget)
starget_for_each_device(starget, (void *)1, _scsih_reprobe_lun);
+
+ sas_device_put(sas_device);
}
/**
@@ -6221,7 +6413,6 @@
Mpi2EventIrConfigElement_t *element)
{
struct _sas_device *sas_device;
- unsigned long flags;
u16 handle = le16_to_cpu(element->PhysDiskDevHandle);
Mpi2ConfigReply_t mpi_reply;
Mpi2SasDevicePage0_t sas_device_pg0;
@@ -6231,11 +6422,11 @@
set_bit(handle, ioc->pd_handles);
- spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- if (sas_device)
+ sas_device = mpt2sas_get_sdev_by_handle(ioc, handle);
+ if (sas_device) {
+ sas_device_put(sas_device);
return;
+ }
if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0,
MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle))) {
@@ -6509,7 +6700,6 @@
u16 handle, parent_handle;
u32 state;
struct _sas_device *sas_device;
- unsigned long flags;
Mpi2ConfigReply_t mpi_reply;
Mpi2SasDevicePage0_t sas_device_pg0;
u32 ioc_status;
@@ -6542,12 +6732,11 @@
if (!ioc->is_warpdrive)
set_bit(handle, ioc->pd_handles);
- spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
-
- if (sas_device)
+ sas_device = mpt2sas_get_sdev_by_handle(ioc, handle);
+ if (sas_device) {
+ sas_device_put(sas_device);
return;
+ }
if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply,
&sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE,
@@ -7015,6 +7204,7 @@
struct _raid_device *raid_device, *raid_device_next;
struct list_head tmp_list;
unsigned long flags;
+ LIST_HEAD(head);
printk(MPT2SAS_INFO_FMT "removing unresponding devices: start\n",
ioc->name);
@@ -7022,14 +7212,29 @@
/* removing unresponding end devices */
printk(MPT2SAS_INFO_FMT "removing unresponding devices: end-devices\n",
ioc->name);
+
+ /*
+ * Iterate, pulling off devices marked as non-responding. We become the
+ * owner for the reference the list had on any object we prune.
+ */
+ spin_lock_irqsave(&ioc->sas_device_lock, flags);
list_for_each_entry_safe(sas_device, sas_device_next,
- &ioc->sas_device_list, list) {
+ &ioc->sas_device_list, list) {
if (!sas_device->responding)
- mpt2sas_device_remove_by_sas_address(ioc,
- sas_device->sas_address);
+ list_move_tail(&sas_device->list, &head);
else
sas_device->responding = 0;
}
+ spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+
+ /*
+ * Now, uninitialize and remove the unresponding devices we pruned.
+ */
+ list_for_each_entry_safe(sas_device, sas_device_next, &head, list) {
+ _scsih_remove_device(ioc, sas_device);
+ list_del_init(&sas_device->list);
+ sas_device_put(sas_device);
+ }
/* removing unresponding volumes */
if (ioc->ir_firmware) {
@@ -7179,11 +7384,11 @@
}
phys_disk_num = pd_pg0.PhysDiskNum;
handle = le16_to_cpu(pd_pg0.DevHandle);
- spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- if (sas_device)
+ sas_device = mpt2sas_get_sdev_by_handle(ioc, handle);
+ if (sas_device) {
+ sas_device_put(sas_device);
continue;
+ }
if (mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply,
&sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE,
handle) != 0)
@@ -7302,12 +7507,12 @@
if (!(_scsih_is_end_device(
le32_to_cpu(sas_device_pg0.DeviceInfo))))
continue;
- spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
+ sas_device = mpt2sas_get_sdev_by_addr(ioc,
le64_to_cpu(sas_device_pg0.SASAddress));
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
- if (sas_device)
+ if (sas_device) {
+ sas_device_put(sas_device);
continue;
+ }
parent_handle = le16_to_cpu(sas_device_pg0.ParentDevHandle);
if (!_scsih_get_sas_address(ioc, parent_handle, &sas_address)) {
printk(MPT2SAS_INFO_FMT "\tBEFORE adding end device: "
@@ -7410,17 +7615,27 @@
struct fw_event_work, delayed_work.work);
struct MPT2SAS_ADAPTER *ioc = fw_event->ioc;
+ _scsih_fw_event_del_from_list(ioc, fw_event);
+
/* the queue is being flushed so ignore this event */
- if (ioc->remove_host ||
- ioc->pci_error_recovery) {
- _scsih_fw_event_free(ioc, fw_event);
+ if (ioc->remove_host || ioc->pci_error_recovery) {
+ fw_event_work_put(fw_event);
return;
}
switch (fw_event->event) {
case MPT2SAS_REMOVE_UNRESPONDING_DEVICES:
- while (scsi_host_in_recovery(ioc->shost) || ioc->shost_recovery)
+ while (scsi_host_in_recovery(ioc->shost) ||
+ ioc->shost_recovery) {
+ /*
+ * If we're unloading, bail. Otherwise, this can become
+ * an infinite loop.
+ */
+ if (ioc->remove_host)
+ goto out;
+
ssleep(1);
+ }
_scsih_remove_unresponding_sas_devices(ioc);
_scsih_scan_for_devices_after_reset(ioc);
break;
@@ -7469,7 +7684,8 @@
_scsih_sas_ir_operation_status_event(ioc, fw_event);
break;
}
- _scsih_fw_event_free(ioc, fw_event);
+out:
+ fw_event_work_put(fw_event);
}
/**
@@ -7607,7 +7823,7 @@
}
sz = le16_to_cpu(mpi_reply->EventDataLength) * 4;
- fw_event = kzalloc(sizeof(*fw_event) + sz, GFP_ATOMIC);
+ fw_event = alloc_fw_event_work(sz);
if (!fw_event) {
printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
@@ -7620,6 +7836,7 @@
fw_event->VP_ID = mpi_reply->VP_ID;
fw_event->event = event;
_scsih_fw_event_add(ioc, fw_event);
+ fw_event_work_put(fw_event);
return;
}
@@ -7867,7 +8084,9 @@
sas_remove_host(shost);
scsi_remove_host(shost);
mpt2sas_base_detach(ioc);
+ spin_lock(&gioc_lock);
list_del(&ioc->list);
+ spin_unlock(&gioc_lock);
scsi_host_put(shost);
}
@@ -7966,6 +8185,48 @@
}
}
+static struct _sas_device *get_next_sas_device(struct MPT2SAS_ADAPTER *ioc)
+{
+ struct _sas_device *sas_device = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioc->sas_device_lock, flags);
+ if (!list_empty(&ioc->sas_device_init_list)) {
+ sas_device = list_first_entry(&ioc->sas_device_init_list,
+ struct _sas_device, list);
+ sas_device_get(sas_device);
+ }
+ spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+
+ return sas_device;
+}
+
+static void sas_device_make_active(struct MPT2SAS_ADAPTER *ioc,
+ struct _sas_device *sas_device)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioc->sas_device_lock, flags);
+
+ /*
+ * Since we dropped the lock during the call to port_add(), we need to
+ * be careful here that somebody else didn't move or delete this item
+ * while we were busy with other things.
+ *
+ * If it was on the list, we need a put() for the reference the list
+ * had. Either way, we need a get() for the destination list.
+ */
+ if (!list_empty(&sas_device->list)) {
+ list_del_init(&sas_device->list);
+ sas_device_put(sas_device);
+ }
+
+ sas_device_get(sas_device);
+ list_add_tail(&sas_device->list, &ioc->sas_device_list);
+
+ spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+}
+
/**
* _scsih_probe_sas - reporting sas devices to sas transport
* @ioc: per adapter object
@@ -7975,34 +8236,30 @@
static void
_scsih_probe_sas(struct MPT2SAS_ADAPTER *ioc)
{
- struct _sas_device *sas_device, *next;
- unsigned long flags;
+ struct _sas_device *sas_device;
- /* SAS Device List */
- list_for_each_entry_safe(sas_device, next, &ioc->sas_device_init_list,
- list) {
+ if (ioc->hide_drives)
+ return;
- if (ioc->hide_drives)
- continue;
-
+ while ((sas_device = get_next_sas_device(ioc))) {
if (!mpt2sas_transport_port_add(ioc, sas_device->handle,
- sas_device->sas_address_parent)) {
- list_del(&sas_device->list);
- kfree(sas_device);
+ sas_device->sas_address_parent)) {
+ _scsih_sas_device_remove(ioc, sas_device);
+ sas_device_put(sas_device);
continue;
} else if (!sas_device->starget) {
if (!ioc->is_driver_loading) {
mpt2sas_transport_port_remove(ioc,
- sas_device->sas_address,
- sas_device->sas_address_parent);
- list_del(&sas_device->list);
- kfree(sas_device);
+ sas_device->sas_address,
+ sas_device->sas_address_parent);
+ _scsih_sas_device_remove(ioc, sas_device);
+ sas_device_put(sas_device);
continue;
}
}
- spin_lock_irqsave(&ioc->sas_device_lock, flags);
- list_move_tail(&sas_device->list, &ioc->sas_device_list);
- spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+
+ sas_device_make_active(ioc, sas_device);
+ sas_device_put(sas_device);
}
}
@@ -8142,7 +8399,9 @@
ioc = shost_priv(shost);
memset(ioc, 0, sizeof(struct MPT2SAS_ADAPTER));
INIT_LIST_HEAD(&ioc->list);
+ spin_lock(&gioc_lock);
list_add_tail(&ioc->list, &mpt2sas_ioc_list);
+ spin_unlock(&gioc_lock);
ioc->shost = shost;
ioc->id = mpt_ids++;
sprintf(ioc->name, "%s%d", MPT2SAS_DRIVER_NAME, ioc->id);
@@ -8167,6 +8426,8 @@
ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds;
/* misc semaphores and spin locks */
mutex_init(&ioc->reset_in_progress_mutex);
+ /* initializing pci_access_mutex lock */
+ mutex_init(&ioc->pci_access_mutex);
spin_lock_init(&ioc->ioc_reset_in_progress_lock);
spin_lock_init(&ioc->scsi_lookup_lock);
spin_lock_init(&ioc->sas_device_lock);
@@ -8269,7 +8530,9 @@
out_attach_fail:
destroy_workqueue(ioc->firmware_event_thread);
out_thread_fail:
+ spin_lock(&gioc_lock);
list_del(&ioc->list);
+ spin_unlock(&gioc_lock);
scsi_host_put(shost);
return rv;
}
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index ff2500a..af86800 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1323,15 +1323,17 @@
int rc;
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc,
rphy->identify.sas_address);
if (sas_device) {
*identifier = sas_device->enclosure_logical_id;
rc = 0;
+ sas_device_put(sas_device);
} else {
*identifier = 0;
rc = -ENXIO;
}
+
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
return rc;
}
@@ -1351,12 +1353,14 @@
int rc;
spin_lock_irqsave(&ioc->sas_device_lock, flags);
- sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
+ sas_device = __mpt2sas_get_sdev_by_addr(ioc,
rphy->identify.sas_address);
- if (sas_device)
+ if (sas_device) {
rc = sas_device->slot;
- else
+ sas_device_put(sas_device);
+ } else {
rc = -ENXIO;
+ }
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
return rc;
}
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2.h b/drivers/scsi/mpt3sas/mpi/mpi2.h
index c34c115..ec27ad2 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2.h
@@ -8,7 +8,7 @@
* scatter/gather formats.
* Creation Date: June 21, 2006
*
- * mpi2.h Version: 02.00.31
+ * mpi2.h Version: 02.00.35
*
* NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
* prefix are for use only on MPI v2.5 products, and must not be used
@@ -88,6 +88,10 @@
* Added MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET.
* 04-09-13 02.00.30 Bumped MPI2_HEADER_VERSION_UNIT.
* 04-17-13 02.00.31 Bumped MPI2_HEADER_VERSION_UNIT.
+ * 08-19-13 02.00.32 Bumped MPI2_HEADER_VERSION_UNIT.
+ * 12-05-13 02.00.33 Bumped MPI2_HEADER_VERSION_UNIT.
+ * 01-08-14 02.00.34 Bumped MPI2_HEADER_VERSION_UNIT
+ * 06-13-14 02.00.35 Bumped MPI2_HEADER_VERSION_UNIT.
* --------------------------------------------------------------------------
*/
@@ -121,7 +125,7 @@
#define MPI2_VERSION_02_05 (0x0205)
/*Unit and Dev versioning for this MPI header set */
-#define MPI2_HEADER_VERSION_UNIT (0x1F)
+#define MPI2_HEADER_VERSION_UNIT (0x23)
#define MPI2_HEADER_VERSION_DEV (0x00)
#define MPI2_HEADER_VERSION_UNIT_MASK (0xFF00)
#define MPI2_HEADER_VERSION_UNIT_SHIFT (8)
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
index e261a31..581fdb3 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
@@ -6,7 +6,7 @@
* Title: MPI Configuration messages and pages
* Creation Date: November 10, 2006
*
- * mpi2_cnfg.h Version: 02.00.26
+ * mpi2_cnfg.h Version: 02.00.29
*
* NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
* prefix are for use only on MPI v2.5 products, and must not be used
@@ -165,6 +165,20 @@
* match the specification.
* 08-19-13 02.00.26 Added reserved words to MPI2_CONFIG_PAGE_IO_UNIT_7 for
* future use.
+ * 12-05-13 02.00.27 Added MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL for
+ * MPI2_CONFIG_PAGE_MAN_7.
+ * Added EnclosureLevel and ConnectorName fields to
+ * MPI2_CONFIG_PAGE_SAS_DEV_0.
+ * Added MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID for
+ * MPI2_CONFIG_PAGE_SAS_DEV_0.
+ * Added EnclosureLevel field to
+ * MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
+ * Added MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID for
+ * MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
+ * 01-08-14 02.00.28 Added more defines for the BiosOptions field of
+ * MPI2_CONFIG_PAGE_BIOS_1.
+ * 06-13-14 02.00.29 Added SSUTimeout field to MPI2_CONFIG_PAGE_BIOS_1, and
+ * more defines for the BiosOptions field..
* --------------------------------------------------------------------------
*/
@@ -724,6 +738,7 @@
#define MPI2_MANUFACTURING7_PAGEVERSION (0x01)
/*defines for the Flags field */
+#define MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL (0x00000008)
#define MPI2_MANPAGE7_FLAG_EVENTREPLAY_SLOT_ORDER (0x00000002)
#define MPI2_MANPAGE7_FLAG_USE_SLOT_INFO (0x00000001)
@@ -1311,7 +1326,9 @@
MPI2_CONFIG_PAGE_HEADER Header; /*0x00 */
U32 BiosOptions; /*0x04 */
U32 IOCSettings; /*0x08 */
- U32 Reserved1; /*0x0C */
+ U8 SSUTimeout; /*0x0C */
+ U8 Reserved1; /*0x0D */
+ U16 Reserved2; /*0x0E */
U32 DeviceSettings; /*0x10 */
U16 NumberOfDevices; /*0x14 */
U16 UEFIVersion; /*0x16 */
@@ -1323,9 +1340,24 @@
*PTR_MPI2_CONFIG_PAGE_BIOS_1,
Mpi2BiosPage1_t, *pMpi2BiosPage1_t;
-#define MPI2_BIOSPAGE1_PAGEVERSION (0x05)
+#define MPI2_BIOSPAGE1_PAGEVERSION (0x07)
/*values for BIOS Page 1 BiosOptions field */
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_MASK (0x00003800)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_PBDHL (0x00000000)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_ENCSLOSURE (0x00000800)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_LWWID (0x00001000)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_PSENS (0x00001800)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_ESPHY (0x00002000)
+
+#define MPI2_BIOSPAGE1_OPTIONS_X86_DISABLE_BIOS (0x00000400)
+
+#define MPI2_BIOSPAGE1_OPTIONS_MASK_REGISTRATION_UEFI_BSD (0x00000300)
+#define MPI2_BIOSPAGE1_OPTIONS_USE_BIT0_REGISTRATION_UEFI_BSD (0x00000000)
+#define MPI2_BIOSPAGE1_OPTIONS_FULL_REGISTRATION_UEFI_BSD (0x00000100)
+#define MPI2_BIOSPAGE1_OPTIONS_ADAPTER_REGISTRATION_UEFI_BSD (0x00000200)
+#define MPI2_BIOSPAGE1_OPTIONS_DISABLE_REGISTRATION_UEFI_BSD (0x00000300)
+
#define MPI2_BIOSPAGE1_OPTIONS_MASK_OEM_ID (0x000000F0)
#define MPI2_BIOSPAGE1_OPTIONS_LSI_OEM_ID (0x00000000)
@@ -2633,9 +2665,9 @@
U8
ControlGroup; /*0x2E */
U8
- Reserved1; /*0x2F */
+ EnclosureLevel; /*0x2F */
U32
- Reserved2; /*0x30 */
+ ConnectorName[4]; /*0x30 */
U32
Reserved3; /*0x34 */
} MPI2_CONFIG_PAGE_SAS_DEV_0,
@@ -2643,7 +2675,7 @@
Mpi2SasDevicePage0_t,
*pMpi2SasDevicePage0_t;
-#define MPI2_SASDEVICE0_PAGEVERSION (0x08)
+#define MPI2_SASDEVICE0_PAGEVERSION (0x09)
/*values for SAS Device Page 0 AccessStatus field */
#define MPI2_SAS_DEVICE0_ASTATUS_NO_ERRORS (0x00)
@@ -2683,6 +2715,7 @@
#define MPI2_SAS_DEVICE0_FLAGS_SATA_NCQ_SUPPORTED (0x0020)
#define MPI2_SAS_DEVICE0_FLAGS_SATA_FUA_SUPPORTED (0x0010)
#define MPI2_SAS_DEVICE0_FLAGS_PORT_SELECTOR_ATTACH (0x0008)
+#define MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID (0x0002)
#define MPI2_SAS_DEVICE0_FLAGS_DEVICE_PRESENT (0x0001)
@@ -3019,8 +3052,10 @@
NumSlots; /*0x18 */
U16
StartSlot; /*0x1A */
- U16
+ U8
Reserved2; /*0x1C */
+ U8
+ EnclosureLevel; /*0x1D */
U16
SEPDevHandle; /*0x1E */
U32
@@ -3031,9 +3066,10 @@
*PTR_MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0,
Mpi2SasEnclosurePage0_t, *pMpi2SasEnclosurePage0_t;
-#define MPI2_SASENCLOSURE0_PAGEVERSION (0x03)
+#define MPI2_SASENCLOSURE0_PAGEVERSION (0x04)
/*values for SAS Enclosure Page 0 Flags field */
+#define MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID (0x0010)
#define MPI2_SAS_ENCLS0_FLAGS_MNG_MASK (0x000F)
#define MPI2_SAS_ENCLS0_FLAGS_MNG_UNKNOWN (0x0000)
#define MPI2_SAS_ENCLS0_FLAGS_MNG_IOC_SES (0x0001)
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h
index 4908309..d7598cc 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h
@@ -6,7 +6,7 @@
* Title: MPI IOC, Port, Event, FW Download, and FW Upload messages
* Creation Date: October 11, 2006
*
- * mpi2_ioc.h Version: 02.00.23
+ * mpi2_ioc.h Version: 02.00.24
*
* NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
* prefix are for use only on MPI v2.5 products, and must not be used
@@ -132,6 +132,7 @@
* Added MPI2_IOCFACTS_CAPABILITY_RDPQ_ARRAY_CAPABLE.
* Added MPI2_FW_DOWNLOAD_ITYPE_PUBLIC_KEY.
* Added Encrypted Hash Extended Image.
+ * 12-05-13 02.00.24 Added MPI25_HASH_IMAGE_TYPE_BIOS.
* --------------------------------------------------------------------------
*/
@@ -1598,6 +1599,7 @@
/* values for HashImageType */
#define MPI25_HASH_IMAGE_TYPE_UNUSED (0x00)
#define MPI25_HASH_IMAGE_TYPE_FIRMWARE (0x01)
+#define MPI25_HASH_IMAGE_TYPE_BIOS (0x02)
/* values for HashAlgorithm */
#define MPI25_HASH_ALGORITHM_UNUSED (0x00)
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h
index 904910d..1629e5b 100644
--- a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h
@@ -6,7 +6,7 @@
* Title: MPI diagnostic tool structures and definitions
* Creation Date: March 26, 2007
*
- * mpi2_tool.h Version: 02.00.11
+ * mpi2_tool.h Version: 02.00.12
*
* Version History
* ---------------
@@ -33,6 +33,7 @@
* 07-26-12 02.00.10 Modified MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST so that
* it uses MPI Chain SGE as well as MPI Simple SGE.
* 08-19-13 02.00.11 Added MPI2_TOOLBOX_TEXT_DISPLAY_TOOL and related info.
+ * 01-08-14 02.00.12 Added MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC.
* --------------------------------------------------------------------------
*/
@@ -100,6 +101,7 @@
#define MPI2_TOOLBOX_CLEAN_OTHER_PERSIST_PAGES (0x20000000)
#define MPI2_TOOLBOX_CLEAN_FW_CURRENT (0x10000000)
#define MPI2_TOOLBOX_CLEAN_FW_BACKUP (0x08000000)
+#define MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC (0x04000000)
#define MPI2_TOOLBOX_CLEAN_MEGARAID (0x02000000)
#define MPI2_TOOLBOX_CLEAN_INITIALIZATION (0x01000000)
#define MPI2_TOOLBOX_CLEAN_FLASH (0x00000004)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 43f87e9..d4f1dcd 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -83,10 +83,10 @@
module_param(msix_disable, int, 0);
MODULE_PARM_DESC(msix_disable, " disable msix routed interrupts (default=0)");
-static int max_msix_vectors = 8;
+static int max_msix_vectors = -1;
module_param(max_msix_vectors, int, 0);
MODULE_PARM_DESC(max_msix_vectors,
- " max msix vectors - (default=8)");
+ " max msix vectors");
static int mpt3sas_fwfault_debug;
MODULE_PARM_DESC(mpt3sas_fwfault_debug,
@@ -1009,8 +1009,30 @@
}
wmb();
- writel(reply_q->reply_post_host_index | (msix_index <<
- MPI2_RPHI_MSIX_INDEX_SHIFT), &ioc->chip->ReplyPostHostIndex);
+
+ /* Update Reply Post Host Index.
+ * For those HBA's which support combined reply queue feature
+ * 1. Get the correct Supplemental Reply Post Host Index Register.
+ * i.e. (msix_index / 8)th entry from Supplemental Reply Post Host
+ * Index Register address bank i.e replyPostRegisterIndex[],
+ * 2. Then update this register with new reply host index value
+ * in ReplyPostIndex field and the MSIxIndex field with
+ * msix_index value reduced to a value between 0 and 7,
+ * using a modulo 8 operation. Since each Supplemental Reply Post
+ * Host Index Register supports 8 MSI-X vectors.
+ *
+ * For other HBA's just update the Reply Post Host Index register with
+ * new reply host index value in ReplyPostIndex Field and msix_index
+ * value in MSIxIndex field.
+ */
+ if (ioc->msix96_vector)
+ writel(reply_q->reply_post_host_index | ((msix_index & 7) <<
+ MPI2_RPHI_MSIX_INDEX_SHIFT),
+ ioc->replyPostRegisterIndex[msix_index/8]);
+ else
+ writel(reply_q->reply_post_host_index | (msix_index <<
+ MPI2_RPHI_MSIX_INDEX_SHIFT),
+ &ioc->chip->ReplyPostHostIndex);
atomic_dec(&reply_q->busy);
return IRQ_HANDLED;
}
@@ -1338,7 +1360,7 @@
sg_scmd = scsi_sglist(scmd);
sges_left = scsi_dma_map(scmd);
- if (!sges_left) {
+ if (sges_left < 0) {
sdev_printk(KERN_ERR, scmd->device,
"pci_map_sg failed: request for %d bytes!\n",
scsi_bufflen(scmd));
@@ -1407,7 +1429,7 @@
fill_in_last_segment:
/* fill the last segment */
- while (sges_left) {
+ while (sges_left > 0) {
if (sges_left == 1)
_base_add_sg_single_ieee(sg_local,
simple_sgl_flags_last, 0, sg_dma_len(sg_scmd),
@@ -1560,8 +1582,6 @@
pci_read_config_word(ioc->pdev, base + 2, &message_control);
ioc->msix_vector_count = (message_control & 0x3FF) + 1;
- if (ioc->msix_vector_count > 8)
- ioc->msix_vector_count = 8;
dinitprintk(ioc, pr_info(MPT3SAS_FMT
"msix is supported, vector_count(%d)\n",
ioc->name, ioc->msix_vector_count));
@@ -1793,6 +1813,36 @@
}
/**
+ * mpt3sas_base_unmap_resources - free controller resources
+ * @ioc: per adapter object
+ */
+void
+mpt3sas_base_unmap_resources(struct MPT3SAS_ADAPTER *ioc)
+{
+ struct pci_dev *pdev = ioc->pdev;
+
+ dexitprintk(ioc, printk(MPT3SAS_FMT "%s\n",
+ ioc->name, __func__));
+
+ _base_free_irq(ioc);
+ _base_disable_msix(ioc);
+
+ if (ioc->msix96_vector)
+ kfree(ioc->replyPostRegisterIndex);
+
+ if (ioc->chip_phys) {
+ iounmap(ioc->chip);
+ ioc->chip_phys = 0;
+ }
+
+ if (pci_is_enabled(pdev)) {
+ pci_release_selected_regions(ioc->pdev, ioc->bars);
+ pci_disable_pcie_error_reporting(pdev);
+ pci_disable_device(pdev);
+ }
+}
+
+/**
* mpt3sas_base_map_resources - map in controller resources (io/irq/memap)
* @ioc: per adapter object
*
@@ -1882,6 +1932,36 @@
if (r)
goto out_fail;
+ /* Use the Combined reply queue feature only for SAS3 C0 & higher
+ * revision HBAs and also only when reply queue count is greater than 8
+ */
+ if (ioc->msix96_vector && ioc->reply_queue_count > 8) {
+ /* Determine the Supplemental Reply Post Host Index Registers
+ * Addresse. Supplemental Reply Post Host Index Registers
+ * starts at offset MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET and
+ * each register is at offset bytes of
+ * MPT3_SUP_REPLY_POST_HOST_INDEX_REG_OFFSET from previous one.
+ */
+ ioc->replyPostRegisterIndex = kcalloc(
+ MPT3_SUP_REPLY_POST_HOST_INDEX_REG_COUNT,
+ sizeof(resource_size_t *), GFP_KERNEL);
+ if (!ioc->replyPostRegisterIndex) {
+ dfailprintk(ioc, printk(MPT3SAS_FMT
+ "allocation for reply Post Register Index failed!!!\n",
+ ioc->name));
+ r = -ENOMEM;
+ goto out_fail;
+ }
+
+ for (i = 0; i < MPT3_SUP_REPLY_POST_HOST_INDEX_REG_COUNT; i++) {
+ ioc->replyPostRegisterIndex[i] = (resource_size_t *)
+ ((u8 *)&ioc->chip->Doorbell +
+ MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET +
+ (i * MPT3_SUP_REPLY_POST_HOST_INDEX_REG_OFFSET));
+ }
+ } else
+ ioc->msix96_vector = 0;
+
list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
reply_q->name, ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
@@ -1897,12 +1977,7 @@
return 0;
out_fail:
- if (ioc->chip_phys)
- iounmap(ioc->chip);
- ioc->chip_phys = 0;
- pci_release_selected_regions(ioc->pdev, ioc->bars);
- pci_disable_pcie_error_reporting(pdev);
- pci_disable_device(pdev);
+ mpt3sas_base_unmap_resources(ioc);
return r;
}
@@ -2292,6 +2367,99 @@
/**
+ * _base_display_dell_branding - Display branding string
+ * @ioc: per adapter object
+ *
+ * Return nothing.
+ */
+static void
+_base_display_dell_branding(struct MPT3SAS_ADAPTER *ioc)
+{
+ if (ioc->pdev->subsystem_vendor != PCI_VENDOR_ID_DELL)
+ return;
+
+ switch (ioc->pdev->device) {
+ case MPI25_MFGPAGE_DEVID_SAS3008:
+ switch (ioc->pdev->subsystem_device) {
+ case MPT3SAS_DELL_12G_HBA_SSDID:
+ pr_info(MPT3SAS_FMT "%s\n", ioc->name,
+ MPT3SAS_DELL_12G_HBA_BRANDING);
+ break;
+ default:
+ pr_info(MPT3SAS_FMT
+ "Dell 12Gbps HBA: Subsystem ID: 0x%X\n", ioc->name,
+ ioc->pdev->subsystem_device);
+ break;
+ }
+ break;
+ default:
+ pr_info(MPT3SAS_FMT
+ "Dell 12Gbps HBA: Subsystem ID: 0x%X\n", ioc->name,
+ ioc->pdev->subsystem_device);
+ break;
+ }
+}
+
+/**
+ * _base_display_cisco_branding - Display branding string
+ * @ioc: per adapter object
+ *
+ * Return nothing.
+ */
+static void
+_base_display_cisco_branding(struct MPT3SAS_ADAPTER *ioc)
+{
+ if (ioc->pdev->subsystem_vendor != PCI_VENDOR_ID_CISCO)
+ return;
+
+ switch (ioc->pdev->device) {
+ case MPI25_MFGPAGE_DEVID_SAS3008:
+ switch (ioc->pdev->subsystem_device) {
+ case MPT3SAS_CISCO_12G_8E_HBA_SSDID:
+ pr_info(MPT3SAS_FMT "%s\n", ioc->name,
+ MPT3SAS_CISCO_12G_8E_HBA_BRANDING);
+ break;
+ case MPT3SAS_CISCO_12G_8I_HBA_SSDID:
+ pr_info(MPT3SAS_FMT "%s\n", ioc->name,
+ MPT3SAS_CISCO_12G_8I_HBA_BRANDING);
+ break;
+ case MPT3SAS_CISCO_12G_AVILA_HBA_SSDID:
+ pr_info(MPT3SAS_FMT "%s\n", ioc->name,
+ MPT3SAS_CISCO_12G_AVILA_HBA_BRANDING);
+ break;
+ default:
+ pr_info(MPT3SAS_FMT
+ "Cisco 12Gbps SAS HBA: Subsystem ID: 0x%X\n",
+ ioc->name, ioc->pdev->subsystem_device);
+ break;
+ }
+ break;
+ case MPI25_MFGPAGE_DEVID_SAS3108_1:
+ switch (ioc->pdev->subsystem_device) {
+ case MPT3SAS_CISCO_12G_AVILA_HBA_SSDID:
+ pr_info(MPT3SAS_FMT "%s\n", ioc->name,
+ MPT3SAS_CISCO_12G_AVILA_HBA_BRANDING);
+ break;
+ case MPT3SAS_CISCO_12G_COLUSA_MEZZANINE_HBA_SSDID:
+ pr_info(MPT3SAS_FMT "%s\n", ioc->name,
+ MPT3SAS_CISCO_12G_COLUSA_MEZZANINE_HBA_BRANDING);
+ break;
+ default:
+ pr_info(MPT3SAS_FMT
+ "Cisco 12Gbps SAS HBA: Subsystem ID: 0x%X\n",
+ ioc->name, ioc->pdev->subsystem_device);
+ break;
+ }
+ break;
+ default:
+ pr_info(MPT3SAS_FMT
+ "Cisco 12Gbps SAS HBA: Subsystem ID: 0x%X\n",
+ ioc->name, ioc->pdev->subsystem_device);
+ break;
+ }
+}
+
+/**
* _base_display_ioc_capabilities - Disply IOC's capabilities.
* @ioc: per adapter object
*
@@ -2321,6 +2489,8 @@
bios_version & 0x000000FF);
_base_display_intel_branding(ioc);
+ _base_display_dell_branding(ioc);
+ _base_display_cisco_branding(ioc);
pr_info(MPT3SAS_FMT "Protocol=(", ioc->name);
@@ -3139,6 +3309,9 @@
* Notes: MPI2_HIS_IOC2SYS_DB_STATUS - set to one when IOC writes to doorbell.
*/
static int
+_base_diag_reset(struct MPT3SAS_ADAPTER *ioc, int sleep_flag);
+
+static int
_base_wait_for_doorbell_int(struct MPT3SAS_ADAPTER *ioc, int timeout,
int sleep_flag)
{
@@ -3681,6 +3854,64 @@
}
/**
+ * _base_wait_for_iocstate - Wait until the card is in READY or OPERATIONAL
+ * @ioc: per adapter object
+ * @timeout:
+ * @sleep_flag: CAN_SLEEP or NO_SLEEP
+ *
+ * Returns 0 for success, non-zero for failure.
+ */
+static int
+_base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout,
+ int sleep_flag)
+{
+ u32 ioc_state;
+ int rc;
+
+ dinitprintk(ioc, printk(MPT3SAS_FMT "%s\n", ioc->name,
+ __func__));
+
+ if (ioc->pci_error_recovery) {
+ dfailprintk(ioc, printk(MPT3SAS_FMT
+ "%s: host in pci error recovery\n", ioc->name, __func__));
+ return -EFAULT;
+ }
+
+ ioc_state = mpt3sas_base_get_iocstate(ioc, 0);
+ dhsprintk(ioc, printk(MPT3SAS_FMT "%s: ioc_state(0x%08x)\n",
+ ioc->name, __func__, ioc_state));
+
+ if (((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_READY) ||
+ (ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_OPERATIONAL)
+ return 0;
+
+ if (ioc_state & MPI2_DOORBELL_USED) {
+ dhsprintk(ioc, printk(MPT3SAS_FMT
+ "unexpected doorbell active!\n", ioc->name));
+ goto issue_diag_reset;
+ }
+
+ if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
+ mpt3sas_base_fault_info(ioc, ioc_state &
+ MPI2_DOORBELL_DATA_MASK);
+ goto issue_diag_reset;
+ }
+
+ ioc_state = _base_wait_on_iocstate(ioc, MPI2_IOC_STATE_READY,
+ timeout, sleep_flag);
+ if (ioc_state) {
+ dfailprintk(ioc, printk(MPT3SAS_FMT
+ "%s: failed going to ready state (ioc_state=0x%x)\n",
+ ioc->name, __func__, ioc_state));
+ return -EFAULT;
+ }
+
+ issue_diag_reset:
+ rc = _base_diag_reset(ioc, sleep_flag);
+ return rc;
+}
+
+/**
* _base_get_ioc_facts - obtain ioc facts reply and save in ioc
* @ioc: per adapter object
* @sleep_flag: CAN_SLEEP or NO_SLEEP
@@ -3698,6 +3929,13 @@
dinitprintk(ioc, pr_info(MPT3SAS_FMT "%s\n", ioc->name,
__func__));
+ r = _base_wait_for_iocstate(ioc, 10, sleep_flag);
+ if (r) {
+ dfailprintk(ioc, printk(MPT3SAS_FMT
+ "%s: failed getting to correct state\n",
+ ioc->name, __func__));
+ return r;
+ }
mpi_reply_sz = sizeof(Mpi2IOCFactsReply_t);
mpi_request_sz = sizeof(Mpi2IOCFactsRequest_t);
memset(&mpi_request, 0, mpi_request_sz);
@@ -3783,7 +4021,7 @@
mpi_request.WhoInit = MPI2_WHOINIT_HOST_DRIVER;
mpi_request.VF_ID = 0; /* TODO */
mpi_request.VP_ID = 0;
- mpi_request.MsgVersion = cpu_to_le16(MPI2_VERSION);
+ mpi_request.MsgVersion = cpu_to_le16(MPI25_VERSION);
mpi_request.HeaderVersion = cpu_to_le16(MPI2_HEADER_VERSION);
if (_base_is_controller_msix_enabled(ioc))
@@ -4524,8 +4762,15 @@
/* initialize reply post host index */
list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
- writel(reply_q->msix_index << MPI2_RPHI_MSIX_INDEX_SHIFT,
- &ioc->chip->ReplyPostHostIndex);
+ if (ioc->msix96_vector)
+ writel((reply_q->msix_index & 7)<<
+ MPI2_RPHI_MSIX_INDEX_SHIFT,
+ ioc->replyPostRegisterIndex[reply_q->msix_index/8]);
+ else
+ writel(reply_q->msix_index <<
+ MPI2_RPHI_MSIX_INDEX_SHIFT,
+ &ioc->chip->ReplyPostHostIndex);
+
if (!_base_is_controller_msix_enabled(ioc))
goto skip_init_reply_post_host_index;
}
@@ -4564,8 +4809,6 @@
void
mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc)
{
- struct pci_dev *pdev = ioc->pdev;
-
dexitprintk(ioc, pr_info(MPT3SAS_FMT "%s\n", ioc->name,
__func__));
@@ -4576,18 +4819,7 @@
ioc->shost_recovery = 0;
}
- _base_free_irq(ioc);
- _base_disable_msix(ioc);
-
- if (ioc->chip_phys && ioc->chip)
- iounmap(ioc->chip);
- ioc->chip_phys = 0;
-
- if (pci_is_enabled(pdev)) {
- pci_release_selected_regions(ioc->pdev, ioc->bars);
- pci_disable_pcie_error_reporting(pdev);
- pci_disable_device(pdev);
- }
+ mpt3sas_base_unmap_resources(ioc);
return;
}
@@ -4602,6 +4834,7 @@
{
int r, i;
int cpu_id, last_cpu_id = 0;
+ u8 revision;
dinitprintk(ioc, pr_info(MPT3SAS_FMT "%s\n", ioc->name,
__func__));
@@ -4621,6 +4854,20 @@
goto out_free_resources;
}
+ /* Check whether the controller revision is C0 or above.
+ * only C0 and above revision controllers support 96 MSI-X vectors.
+ */
+ revision = ioc->pdev->revision;
+
+ if ((ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3004 ||
+ ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3008 ||
+ ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3108_1 ||
+ ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3108_2 ||
+ ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3108_5 ||
+ ioc->pdev->device == MPI25_MFGPAGE_DEVID_SAS3108_6) &&
+ (revision >= 0x02))
+ ioc->msix96_vector = 1;
+
ioc->rdpq_array_enable_assigned = 0;
ioc->dma_mask = 0;
r = mpt3sas_base_map_resources(ioc);
@@ -4643,7 +4890,6 @@
ioc->build_sg_scmd = &_base_build_sg_scmd_ieee;
ioc->build_sg = &_base_build_sg_ieee;
ioc->build_zero_len_sge = &_base_build_zero_len_sge_ieee;
- ioc->mpi25 = 1;
ioc->sge_size_ieee = sizeof(Mpi2IeeeSgeSimple64_t);
/*
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index afa8816..f0e462b 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -71,8 +71,8 @@
#define MPT3SAS_DRIVER_NAME "mpt3sas"
#define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
#define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION "04.100.00.00"
-#define MPT3SAS_MAJOR_VERSION 4
+#define MPT3SAS_DRIVER_VERSION "09.100.00.00"
+#define MPT3SAS_MAJOR_VERSION 9
#define MPT3SAS_MINOR_VERSION 100
#define MPT3SAS_BUILD_VERSION 0
#define MPT3SAS_RELEASE_VERSION 00
@@ -152,12 +152,49 @@
#define MPT3SAS_INTEL_RS3UC080_SSDID 0x3524
/*
+ * Dell HBA branding
+ */
+#define MPT3SAS_DELL_12G_HBA_BRANDING \
+ "Dell 12Gbps HBA"
+
+/*
+ * Dell HBA SSDIDs
+ */
+#define MPT3SAS_DELL_12G_HBA_SSDID 0x1F46
+
+/*
+ * Cisco HBA branding
+ */
+#define MPT3SAS_CISCO_12G_8E_HBA_BRANDING \
+ "Cisco 9300-8E 12G SAS HBA"
+#define MPT3SAS_CISCO_12G_8I_HBA_BRANDING \
+ "Cisco 9300-8i 12G SAS HBA"
+#define MPT3SAS_CISCO_12G_AVILA_HBA_BRANDING \
+ "Cisco 12G Modular SAS Pass through Controller"
+#define MPT3SAS_CISCO_12G_COLUSA_MEZZANINE_HBA_BRANDING \
+ "UCS C3X60 12G SAS Pass through Controller"
+/*
+ * Cisco HBA SSSDIDs
+ */
+#define MPT3SAS_CISCO_12G_8E_HBA_SSDID 0x14C
+#define MPT3SAS_CISCO_12G_8I_HBA_SSDID 0x154
+#define MPT3SAS_CISCO_12G_AVILA_HBA_SSDID 0x155
+#define MPT3SAS_CISCO_12G_COLUSA_MEZZANINE_HBA_SSDID 0x156
+
+/*
* status bits for ioc->diag_buffer_status
*/
#define MPT3_DIAG_BUFFER_IS_REGISTERED (0x01)
#define MPT3_DIAG_BUFFER_IS_RELEASED (0x02)
#define MPT3_DIAG_BUFFER_IS_DIAG_RESET (0x04)
+/*
+ * Combined Reply Queue constants,
+ * There are twelve Supplemental Reply Post Host Index Registers
+ * and each register is at offset 0x10 bytes from the previous one.
+ */
+#define MPT3_SUP_REPLY_POST_HOST_INDEX_REG_COUNT 12
+#define MPT3_SUP_REPLY_POST_HOST_INDEX_REG_OFFSET (0x10)
/* OEM Identifiers */
#define MFG10_OEM_ID_INVALID (0x00000000)
@@ -173,6 +210,8 @@
#define MFG10_GF0_SSD_DATA_SCRUB_DISABLE (0x00000008)
#define MFG10_GF0_SINGLE_DRIVE_R0 (0x00000010)
+#define VIRTUAL_IO_FAILED_RETRY (0x32010081)
+
/* OEM Specific Flags will come from OEM specific header files */
struct Mpi2ManufacturingPage10_t {
MPI2_CONFIG_PAGE_HEADER Header; /* 00h */
@@ -294,7 +333,8 @@
* @responding: used in _scsih_sas_device_mark_responding
* @fast_path: fast path feature enable bit
* @pfa_led_on: flag for PFA LED status
- *
+ * @pend_sas_rphy_add: flag to check if device is in sas_rphy_add()
+ * addition routine.
*/
struct _sas_device {
struct list_head list;
@@ -315,6 +355,9 @@
u8 responding;
u8 fast_path;
u8 pfa_led_on;
+ u8 pend_sas_rphy_add;
+ u8 enclosure_level;
+ u8 connector_name[4];
};
/**
@@ -728,7 +771,8 @@
* is assigned only ones
* @reply_queue_count: number of reply queue's
* @reply_queue_list: link list contaning the reply queue info
- * @reply_post_host_index: head index in the pool where FW completes IO
+ * @msix96_vector: 96 MSI-X vector support
+ * @replyPostRegisterIndex: index of next position in Reply Desc Post Queue
* @delayed_tr_list: target reset link list
* @delayed_tr_volume_list: volume target reset link list
* @@temp_sensors_count: flag to carry the number of temperature sensors
@@ -814,7 +858,6 @@
MPT_BUILD_SG_SCMD build_sg_scmd;
MPT_BUILD_SG build_sg;
MPT_BUILD_ZERO_LEN_SGE build_zero_len_sge;
- u8 mpi25;
u16 sge_size_ieee;
/* function ptr for MPI sg elements only */
@@ -937,6 +980,10 @@
u8 reply_queue_count;
struct list_head reply_queue_list;
+ u8 msix96_vector;
+ /* reply post register index */
+ resource_size_t **replyPostRegisterIndex;
+
struct list_head delayed_tr_list;
struct list_head delayed_tr_volume_list;
u8 temp_sensors_count;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 5a97e32..8ccef38 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -585,6 +585,22 @@
if (!sas_device)
return;
+ pr_info(MPT3SAS_FMT
+ "removing handle(0x%04x), sas_addr(0x%016llx)\n",
+ ioc->name, sas_device->handle,
+ (unsigned long long) sas_device->sas_address);
+
+ if (sas_device->enclosure_handle != 0)
+ pr_info(MPT3SAS_FMT
+ "removing enclosure logical id(0x%016llx), slot(%d)\n",
+ ioc->name, (unsigned long long)
+ sas_device->enclosure_logical_id, sas_device->slot);
+
+ if (sas_device->connector_name[0] != '\0')
+ pr_info(MPT3SAS_FMT
+ "removing enclosure level(0x%04x), connector name( %s)\n",
+ ioc->name, sas_device->enclosure_level,
+ sas_device->connector_name);
spin_lock_irqsave(&ioc->sas_device_lock, flags);
list_del(&sas_device->list);
@@ -663,6 +679,18 @@
ioc->name, __func__, sas_device->handle,
(unsigned long long)sas_device->sas_address));
+ if (sas_device->enclosure_handle != 0)
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "%s: enclosure logical id(0x%016llx), slot( %d)\n",
+ ioc->name, __func__, (unsigned long long)
+ sas_device->enclosure_logical_id, sas_device->slot));
+
+ if (sas_device->connector_name[0] != '\0')
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "%s: enclosure level(0x%04x), connector name( %s)\n",
+ ioc->name, __func__,
+ sas_device->enclosure_level, sas_device->connector_name));
+
spin_lock_irqsave(&ioc->sas_device_lock, flags);
list_add_tail(&sas_device->list, &ioc->sas_device_list);
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
@@ -704,6 +732,18 @@
__func__, sas_device->handle,
(unsigned long long)sas_device->sas_address));
+ if (sas_device->enclosure_handle != 0)
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "%s: enclosure logical id(0x%016llx), slot( %d)\n",
+ ioc->name, __func__, (unsigned long long)
+ sas_device->enclosure_logical_id, sas_device->slot));
+
+ if (sas_device->connector_name[0] != '\0')
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "%s: enclosure level(0x%04x), connector name( %s)\n",
+ ioc->name, __func__, sas_device->enclosure_level,
+ sas_device->connector_name));
+
spin_lock_irqsave(&ioc->sas_device_lock, flags);
list_add_tail(&sas_device->list, &ioc->sas_device_init_list);
_scsih_determine_boot_device(ioc, sas_device, 0);
@@ -1772,10 +1812,16 @@
"sas_addr(0x%016llx), phy(%d), device_name(0x%016llx)\n",
ds, handle, (unsigned long long)sas_device->sas_address,
sas_device->phy, (unsigned long long)sas_device->device_name);
- sdev_printk(KERN_INFO, sdev,
- "%s: enclosure_logical_id(0x%016llx), slot(%d)\n",
- ds, (unsigned long long)
- sas_device->enclosure_logical_id, sas_device->slot);
+ if (sas_device->enclosure_handle != 0)
+ sdev_printk(KERN_INFO, sdev,
+ "%s: enclosure_logical_id(0x%016llx), slot(%d)\n",
+ ds, (unsigned long long)
+ sas_device->enclosure_logical_id, sas_device->slot);
+ if (sas_device->connector_name[0] != '\0')
+ sdev_printk(KERN_INFO, sdev,
+ "%s: enclosure level(0x%04x), connector name( %s)\n",
+ ds, sas_device->enclosure_level,
+ sas_device->connector_name);
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
@@ -2189,10 +2235,17 @@
sas_device->handle,
(unsigned long long)sas_device->sas_address,
sas_device->phy);
- starget_printk(KERN_INFO, starget,
- "enclosure_logical_id(0x%016llx), slot(%d)\n",
- (unsigned long long)sas_device->enclosure_logical_id,
- sas_device->slot);
+ if (sas_device->enclosure_handle != 0)
+ starget_printk(KERN_INFO, starget,
+ "enclosure_logical_id(0x%016llx), slot(%d)\n",
+ (unsigned long long)
+ sas_device->enclosure_logical_id,
+ sas_device->slot);
+ if (sas_device->connector_name)
+ starget_printk(KERN_INFO, starget,
+ "enclosure level(0x%04x),connector name(%s)\n",
+ sas_device->enclosure_level,
+ sas_device->connector_name);
}
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
}
@@ -2552,6 +2605,75 @@
}
/**
+ * _scsih_internal_device_block - block the sdev device
+ * @sdev: per device object
+ * @sas_device_priv_data : per device driver private data
+ *
+ * make sure device is blocked without error, if not
+ * print an error
+ */
+static void
+_scsih_internal_device_block(struct scsi_device *sdev,
+ struct MPT3SAS_DEVICE *sas_device_priv_data)
+{
+ int r = 0;
+
+ sdev_printk(KERN_INFO, sdev, "device_block, handle(0x%04x)\n",
+ sas_device_priv_data->sas_target->handle);
+ sas_device_priv_data->block = 1;
+
+ r = scsi_internal_device_block(sdev);
+ if (r == -EINVAL)
+ sdev_printk(KERN_WARNING, sdev,
+ "device_block failed with return(%d) for handle(0x%04x)\n",
+ sas_device_priv_data->sas_target->handle, r);
+}
+
+/**
+ * _scsih_internal_device_unblock - unblock the sdev device
+ * @sdev: per device object
+ * @sas_device_priv_data : per device driver private data
+ * make sure device is unblocked without error, if not retry
+ * by blocking and then unblocking
+ */
+
+static void
+_scsih_internal_device_unblock(struct scsi_device *sdev,
+ struct MPT3SAS_DEVICE *sas_device_priv_data)
+{
+ int r = 0;
+
+ sdev_printk(KERN_WARNING, sdev, "device_unblock and setting to running, "
+ "handle(0x%04x)\n", sas_device_priv_data->sas_target->handle);
+ sas_device_priv_data->block = 0;
+ r = scsi_internal_device_unblock(sdev, SDEV_RUNNING);
+ if (r == -EINVAL) {
+ /* The device has been set to SDEV_RUNNING by SD layer during
+ * device addition but the request queue is still stopped by
+ * our earlier block call. We need to perform a block again
+ * to get the device to SDEV_BLOCK and then to SDEV_RUNNING */
+
+ sdev_printk(KERN_WARNING, sdev,
+ "device_unblock failed with return(%d) for handle(0x%04x) "
+ "performing a block followed by an unblock\n",
+ sas_device_priv_data->sas_target->handle, r);
+ sas_device_priv_data->block = 1;
+ r = scsi_internal_device_block(sdev);
+ if (r)
+ sdev_printk(KERN_WARNING, sdev, "retried device_block "
+ "failed with return(%d) for handle(0x%04x)\n",
+ sas_device_priv_data->sas_target->handle, r);
+
+ sas_device_priv_data->block = 0;
+ r = scsi_internal_device_unblock(sdev, SDEV_RUNNING);
+ if (r)
+ sdev_printk(KERN_WARNING, sdev, "retried device_unblock"
+ " failed with return(%d) for handle(0x%04x)\n",
+ sas_device_priv_data->sas_target->handle, r);
+ }
+}
+
+/**
* _scsih_ublock_io_all_device - unblock every device
* @ioc: per adapter object
*
@@ -2570,11 +2692,10 @@
if (!sas_device_priv_data->block)
continue;
- sas_device_priv_data->block = 0;
dewtprintk(ioc, sdev_printk(KERN_INFO, sdev,
"device_running, handle(0x%04x)\n",
sas_device_priv_data->sas_target->handle));
- scsi_internal_device_unblock(sdev, SDEV_RUNNING);
+ _scsih_internal_device_unblock(sdev, sas_device_priv_data);
}
}
@@ -2599,10 +2720,9 @@
if (sas_device_priv_data->sas_target->sas_address
!= sas_address)
continue;
- if (sas_device_priv_data->block) {
- sas_device_priv_data->block = 0;
- scsi_internal_device_unblock(sdev, SDEV_RUNNING);
- }
+ if (sas_device_priv_data->block)
+ _scsih_internal_device_unblock(sdev,
+ sas_device_priv_data);
}
}
@@ -2625,10 +2745,7 @@
continue;
if (sas_device_priv_data->block)
continue;
- sas_device_priv_data->block = 1;
- scsi_internal_device_block(sdev);
- sdev_printk(KERN_INFO, sdev, "device_blocked, handle(0x%04x)\n",
- sas_device_priv_data->sas_target->handle);
+ _scsih_internal_device_block(sdev, sas_device_priv_data);
}
}
@@ -2644,6 +2761,11 @@
{
struct MPT3SAS_DEVICE *sas_device_priv_data;
struct scsi_device *sdev;
+ struct _sas_device *sas_device;
+
+ sas_device = _scsih_sas_device_find_by_handle(ioc, handle);
+ if (!sas_device)
+ return;
shost_for_each_device(sdev, ioc->shost) {
sas_device_priv_data = sdev->hostdata;
@@ -2653,10 +2775,9 @@
continue;
if (sas_device_priv_data->block)
continue;
- sas_device_priv_data->block = 1;
- scsi_internal_device_block(sdev);
- sdev_printk(KERN_INFO, sdev,
- "device_blocked, handle(0x%04x)\n", handle);
+ if (sas_device->pend_sas_rphy_add)
+ continue;
+ _scsih_internal_device_block(sdev, sas_device_priv_data);
}
}
@@ -2806,6 +2927,18 @@
"setting delete flag: handle(0x%04x), sas_addr(0x%016llx)\n",
ioc->name, handle,
(unsigned long long)sas_address));
+ if (sas_device->enclosure_handle != 0)
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "setting delete flag:enclosure logical id(0x%016llx),"
+ " slot(%d)\n", ioc->name, (unsigned long long)
+ sas_device->enclosure_logical_id,
+ sas_device->slot));
+ if (sas_device->connector_name)
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "setting delete flag: enclosure level(0x%04x),"
+ " connector name( %s)\n", ioc->name,
+ sas_device->enclosure_level,
+ sas_device->connector_name));
_scsih_ublock_io_device(ioc, sas_address);
sas_target_priv_data->handle = MPT3SAS_INVALID_DEVICE_HANDLE;
}
@@ -3821,10 +3954,19 @@
"\tsas_address(0x%016llx), phy(%d)\n",
ioc->name, (unsigned long long)
sas_device->sas_address, sas_device->phy);
- pr_warn(MPT3SAS_FMT
- "\tenclosure_logical_id(0x%016llx), slot(%d)\n",
- ioc->name, (unsigned long long)
- sas_device->enclosure_logical_id, sas_device->slot);
+ if (sas_device->enclosure_handle != 0)
+ pr_warn(MPT3SAS_FMT
+ "\tenclosure_logical_id(0x%016llx),"
+ "slot(%d)\n", ioc->name,
+ (unsigned long long)
+ sas_device->enclosure_logical_id,
+ sas_device->slot);
+ if (sas_device->connector_name[0])
+ pr_warn(MPT3SAS_FMT
+ "\tenclosure level(0x%04x),"
+ " connector name( %s)\n", ioc->name,
+ sas_device->enclosure_level,
+ sas_device->connector_name);
}
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
}
@@ -3999,7 +4141,16 @@
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
return;
}
- starget_printk(KERN_WARNING, starget, "predicted fault\n");
+ if (sas_device->enclosure_handle != 0)
+ starget_printk(KERN_INFO, starget, "predicted fault, "
+ "enclosure logical id(0x%016llx), slot(%d)\n",
+ (unsigned long long)sas_device->enclosure_logical_id,
+ sas_device->slot);
+ if (sas_device->connector_name[0] != '\0')
+ starget_printk(KERN_WARNING, starget, "predicted fault, "
+ "enclosure level(0x%04x), connector name( %s)\n",
+ sas_device->enclosure_level,
+ sas_device->connector_name);
spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
if (ioc->pdev->subsystem_vendor == PCI_VENDOR_ID_IBM)
@@ -4119,8 +4270,15 @@
_scsih_smart_predicted_fault(ioc,
le16_to_cpu(mpi_reply->DevHandle));
mpt3sas_trigger_scsi(ioc, data.skey, data.asc, data.ascq);
- }
+#ifdef CONFIG_SCSI_MPT3SAS_LOGGING
+ if (!(ioc->logging_level & MPT_DEBUG_REPLY) &&
+ ((scmd->sense_buffer[2] == UNIT_ATTENTION) ||
+ (scmd->sense_buffer[2] == MEDIUM_ERROR) ||
+ (scmd->sense_buffer[2] == HARDWARE_ERROR)))
+ _scsih_scsi_ioc_info(ioc, scmd, mpi_reply, smid);
+#endif
+ }
switch (ioc_status) {
case MPI2_IOCSTATUS_BUSY:
case MPI2_IOCSTATUS_INSUFFICIENT_RESOURCES:
@@ -4146,6 +4304,9 @@
scmd->device->expecting_cc_ua = 1;
}
break;
+ } else if (log_info == VIRTUAL_IO_FAILED_RETRY) {
+ scmd->result = DID_RESET << 16;
+ break;
}
scmd->result = DID_SOFT_ERROR << 16;
break;
@@ -4788,6 +4949,16 @@
sas_device->handle, handle);
sas_target_priv_data->handle = handle;
sas_device->handle = handle;
+ if (sas_device_pg0.Flags &
+ MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID) {
+ sas_device->enclosure_level =
+ le16_to_cpu(sas_device_pg0.EnclosureLevel);
+ memcpy(&sas_device->connector_name[0],
+ &sas_device_pg0.ConnectorName[0], 4);
+ } else {
+ sas_device->enclosure_level = 0;
+ sas_device->connector_name[0] = '\0';
+ }
}
/* check if device is present */
@@ -4894,14 +5065,24 @@
ioc->name, __FILE__, __LINE__, __func__);
sas_device->enclosure_handle =
le16_to_cpu(sas_device_pg0.EnclosureHandle);
- sas_device->slot =
- le16_to_cpu(sas_device_pg0.Slot);
+ if (sas_device->enclosure_handle != 0)
+ sas_device->slot =
+ le16_to_cpu(sas_device_pg0.Slot);
sas_device->device_info = device_info;
sas_device->sas_address = sas_address;
sas_device->phy = sas_device_pg0.PhyNum;
sas_device->fast_path = (le16_to_cpu(sas_device_pg0.Flags) &
MPI25_SAS_DEVICE0_FLAGS_FAST_PATH_CAPABLE) ? 1 : 0;
+ if (sas_device_pg0.Flags & MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID) {
+ sas_device->enclosure_level =
+ le16_to_cpu(sas_device_pg0.EnclosureLevel);
+ memcpy(&sas_device->connector_name[0],
+ &sas_device_pg0.ConnectorName[0], 4);
+ } else {
+ sas_device->enclosure_level = 0;
+ sas_device->connector_name[0] = '\0';
+ }
/* get enclosure_logical_id */
if (sas_device->enclosure_handle && !(mpt3sas_config_get_enclosure_pg0(
ioc, &mpi_reply, &enclosure_pg0, MPI2_SAS_ENCLOS_PGAD_FORM_HANDLE,
@@ -4943,6 +5124,18 @@
ioc->name, __func__,
sas_device->handle, (unsigned long long)
sas_device->sas_address));
+ if (sas_device->enclosure_handle != 0)
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "%s: enter: enclosure logical id(0x%016llx), slot(%d)\n",
+ ioc->name, __func__,
+ (unsigned long long)sas_device->enclosure_logical_id,
+ sas_device->slot));
+ if (sas_device->connector_name[0] != '\0')
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "%s: enter: enclosure level(0x%04x), connector name( %s)\n",
+ ioc->name, __func__,
+ sas_device->enclosure_level,
+ sas_device->connector_name));
if (sas_device->starget && sas_device->starget->hostdata) {
sas_target_priv_data = sas_device->starget->hostdata;
@@ -4959,12 +5152,34 @@
"removing handle(0x%04x), sas_addr(0x%016llx)\n",
ioc->name, sas_device->handle,
(unsigned long long) sas_device->sas_address);
+ if (sas_device->enclosure_handle != 0)
+ pr_info(MPT3SAS_FMT
+ "removing : enclosure logical id(0x%016llx), slot(%d)\n",
+ ioc->name,
+ (unsigned long long)sas_device->enclosure_logical_id,
+ sas_device->slot);
+ if (sas_device->connector_name[0] != '\0')
+ pr_info(MPT3SAS_FMT
+ "removing enclosure level(0x%04x), connector name( %s)\n",
+ ioc->name, sas_device->enclosure_level,
+ sas_device->connector_name);
dewtprintk(ioc, pr_info(MPT3SAS_FMT
"%s: exit: handle(0x%04x), sas_addr(0x%016llx)\n",
ioc->name, __func__,
- sas_device->handle, (unsigned long long)
- sas_device->sas_address));
+ sas_device->handle, (unsigned long long)
+ sas_device->sas_address));
+ if (sas_device->enclosure_handle != 0)
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "%s: exit: enclosure logical id(0x%016llx), slot(%d)\n",
+ ioc->name, __func__,
+ (unsigned long long)sas_device->enclosure_logical_id,
+ sas_device->slot));
+ if (sas_device->connector_name[0] != '\0')
+ dewtprintk(ioc, pr_info(MPT3SAS_FMT
+ "%s: exit: enclosure level(0x%04x), connector name(%s)\n",
+ ioc->name, __func__, sas_device->enclosure_level,
+ sas_device->connector_name));
kfree(sas_device);
}
@@ -6357,9 +6572,7 @@
/**
* _scsih_mark_responding_sas_device - mark a sas_devices as responding
* @ioc: per adapter object
- * @sas_address: sas address
- * @slot: enclosure slot id
- * @handle: device handle
+ * @sas_device_pg0: SAS Device page 0
*
* After host reset, find out whether devices are still responding.
* Used in _scsih_remove_unresponsive_sas_devices.
@@ -6367,8 +6580,8 @@
* Return nothing.
*/
static void
-_scsih_mark_responding_sas_device(struct MPT3SAS_ADAPTER *ioc, u64 sas_address,
- u16 slot, u16 handle)
+_scsih_mark_responding_sas_device(struct MPT3SAS_ADAPTER *ioc,
+Mpi2SasDevicePage0_t *sas_device_pg0)
{
struct MPT3SAS_TARGET *sas_target_priv_data = NULL;
struct scsi_target *starget;
@@ -6377,8 +6590,8 @@
spin_lock_irqsave(&ioc->sas_device_lock, flags);
list_for_each_entry(sas_device, &ioc->sas_device_list, list) {
- if (sas_device->sas_address == sas_address &&
- sas_device->slot == slot) {
+ if ((sas_device->sas_address == sas_device_pg0->SASAddress) &&
+ (sas_device->slot == sas_device_pg0->Slot)) {
sas_device->responding = 1;
starget = sas_device->starget;
if (starget && starget->hostdata) {
@@ -6387,22 +6600,40 @@
sas_target_priv_data->deleted = 0;
} else
sas_target_priv_data = NULL;
- if (starget)
+ if (starget) {
starget_printk(KERN_INFO, starget,
- "handle(0x%04x), sas_addr(0x%016llx), "
- "enclosure logical id(0x%016llx), "
- "slot(%d)\n", handle,
- (unsigned long long)sas_device->sas_address,
+ "handle(0x%04x), sas_addr(0x%016llx)\n",
+ sas_device_pg0->DevHandle,
(unsigned long long)
- sas_device->enclosure_logical_id,
- sas_device->slot);
- if (sas_device->handle == handle)
+ sas_device->sas_address);
+
+ if (sas_device->enclosure_handle != 0)
+ starget_printk(KERN_INFO, starget,
+ "enclosure logical id(0x%016llx),"
+ " slot(%d)\n",
+ (unsigned long long)
+ sas_device->enclosure_logical_id,
+ sas_device->slot);
+ }
+ if (sas_device_pg0->Flags &
+ MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID) {
+ sas_device->enclosure_level =
+ le16_to_cpu(sas_device_pg0->EnclosureLevel);
+ memcpy(&sas_device->connector_name[0],
+ &sas_device_pg0->ConnectorName[0], 4);
+ } else {
+ sas_device->enclosure_level = 0;
+ sas_device->connector_name[0] = '\0';
+ }
+
+ if (sas_device->handle == sas_device_pg0->DevHandle)
goto out;
pr_info("\thandle changed from(0x%04x)!!!\n",
sas_device->handle);
- sas_device->handle = handle;
+ sas_device->handle = sas_device_pg0->DevHandle;
if (sas_target_priv_data)
- sas_target_priv_data->handle = handle;
+ sas_target_priv_data->handle =
+ sas_device_pg0->DevHandle;
goto out;
}
}
@@ -6441,13 +6672,15 @@
MPI2_IOCSTATUS_MASK;
if (ioc_status != MPI2_IOCSTATUS_SUCCESS)
break;
- handle = le16_to_cpu(sas_device_pg0.DevHandle);
+ handle = sas_device_pg0.DevHandle =
+ le16_to_cpu(sas_device_pg0.DevHandle);
device_info = le32_to_cpu(sas_device_pg0.DeviceInfo);
if (!(_scsih_is_end_device(device_info)))
continue;
- _scsih_mark_responding_sas_device(ioc,
- le64_to_cpu(sas_device_pg0.SASAddress),
- le16_to_cpu(sas_device_pg0.Slot), handle);
+ sas_device_pg0.SASAddress =
+ le64_to_cpu(sas_device_pg0.SASAddress);
+ sas_device_pg0.Slot = le16_to_cpu(sas_device_pg0.Slot);
+ _scsih_mark_responding_sas_device(ioc, &sas_device_pg0);
}
out:
@@ -7854,8 +8087,8 @@
/* event thread */
snprintf(ioc->firmware_event_name, sizeof(ioc->firmware_event_name),
"fw_event%d", ioc->id);
- ioc->firmware_event_thread = create_singlethread_workqueue(
- ioc->firmware_event_name);
+ ioc->firmware_event_thread = alloc_ordered_workqueue(
+ ioc->firmware_event_name, WQ_MEM_RECLAIM);
if (!ioc->firmware_event_thread) {
pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index efb98af..70fd019 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -649,6 +649,7 @@
unsigned long flags;
struct _sas_node *sas_node;
struct sas_rphy *rphy;
+ struct _sas_device *sas_device = NULL;
int i;
struct sas_port *port;
@@ -731,10 +732,27 @@
mpt3sas_port->remote_identify.device_type);
rphy->identify = mpt3sas_port->remote_identify;
+
+ if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) {
+ sas_device = mpt3sas_scsih_sas_device_find_by_sas_address(ioc,
+ mpt3sas_port->remote_identify.sas_address);
+ if (!sas_device) {
+ dfailprintk(ioc, printk(MPT3SAS_FMT
+ "failure at %s:%d/%s()!\n",
+ ioc->name, __FILE__, __LINE__, __func__));
+ goto out_fail;
+ }
+ sas_device->pend_sas_rphy_add = 1;
+ }
+
if ((sas_rphy_add(rphy))) {
pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
}
+
+ if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE)
+ sas_device->pend_sas_rphy_add = 0;
+
if ((ioc->logging_level & MPT_DEBUG_TRANSPORT))
dev_printk(KERN_INFO, &rphy->dev,
"add: handle(0x%04x), sas_addr(0x%016llx)\n",
@@ -1946,7 +1964,7 @@
} else {
dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio),
blk_rq_bytes(req), PCI_DMA_BIDIRECTIONAL);
- if (!dma_addr_out) {
+ if (pci_dma_mapping_error(ioc->pdev, dma_addr_out)) {
pr_info(MPT3SAS_FMT "%s(): DMA Addr out = NULL\n",
ioc->name, __func__);
rc = -ENOMEM;
@@ -1968,7 +1986,7 @@
} else {
dma_addr_in = pci_map_single(ioc->pdev, bio_data(rsp->bio),
blk_rq_bytes(rsp), PCI_DMA_BIDIRECTIONAL);
- if (!dma_addr_in) {
+ if (pci_dma_mapping_error(ioc->pdev, dma_addr_in)) {
pr_info(MPT3SAS_FMT "%s(): DMA Addr in = NULL\n",
ioc->name, __func__);
rc = -ENOMEM;
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index 39306b1..04e67a1 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -2642,6 +2642,7 @@
ts->resp = SAS_TASK_COMPLETE;
ts->stat = SAS_OPEN_REJECT;
ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+ break;
default:
PM8001_IO_DBG(pm8001_ha,
pm8001_printk("Unknown status 0x%x\n", status));
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 0e1628f..9a389f1 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -2337,6 +2337,7 @@
ts->resp = SAS_TASK_COMPLETE;
ts->stat = SAS_OPEN_REJECT;
ts->open_rej_reason = SAS_OREJ_RSVD_RETRY;
+ break;
default:
PM8001_IO_DBG(pm8001_ha,
pm8001_printk("Unknown status 0x%x\n", status));
diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig
index 33f60c9..a0f732b 100644
--- a/drivers/scsi/qla2xxx/Kconfig
+++ b/drivers/scsi/qla2xxx/Kconfig
@@ -32,10 +32,10 @@
They are also included in the linux-firmware tree as well.
config TCM_QLA2XXX
- tristate "TCM_QLA2XXX fabric module for Qlogic 2xxx series target mode HBAs"
+ tristate "TCM_QLA2XXX fabric module for QLogic 24xx+ series target mode HBAs"
depends on SCSI_QLA_FC && TARGET_CORE
depends on LIBFC
select BTREE
default n
---help---
- Say Y here to enable the TCM_QLA2XXX fabric module for Qlogic 2xxx series target mode HBAs
+ Say Y here to enable the TCM_QLA2XXX fabric module for QLogic 24xx+ series target mode HBAs
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 7ed7bae..ac65cb7 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1359,9 +1359,7 @@
struct qla_hw_data *ha = tgt->ha;
scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
struct se_session *se_sess;
- struct se_node_acl *se_nacl;
struct tcm_qla2xxx_lport *lport;
- struct tcm_qla2xxx_nacl *nacl;
BUG_ON(in_interrupt());
@@ -1371,8 +1369,6 @@
dump_stack();
return;
}
- se_nacl = se_sess->se_node_acl;
- nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl);
lport = vha->vha_tgt.target_lport_ptr;
if (!lport) {
@@ -1680,7 +1676,6 @@
(struct tcm_qla2xxx_lport *)target_lport_ptr;
struct tcm_qla2xxx_lport *base_lport =
(struct tcm_qla2xxx_lport *)base_vha->vha_tgt.target_lport_ptr;
- struct tcm_qla2xxx_tpg *base_tpg;
struct fc_vport_identifiers vport_id;
if (!qla_tgt_mode_enabled(base_vha)) {
@@ -1693,7 +1688,6 @@
pr_err("qla2xxx base_lport or tpg_1 not available\n");
return -EPERM;
}
- base_tpg = base_lport->tpg_1;
memset(&vport_id, 0, sizeof(vport_id));
vport_id.port_name = npiv_wwpn;
@@ -1810,6 +1804,11 @@
.module = THIS_MODULE,
.name = "qla2xxx",
.node_acl_size = sizeof(struct tcm_qla2xxx_nacl),
+ /*
+ * XXX: Limit assumes single page per scatter-gather-list entry.
+ * Current maximum is ~4.9 MB per se_cmd->t_data_sg with PAGE_SIZE=4096
+ */
+ .max_data_sg_nents = 1200,
.get_fabric_name = tcm_qla2xxx_get_fabric_name,
.tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn,
.tpg_get_tag = tcm_qla2xxx_get_tag,
@@ -1958,7 +1957,7 @@
tcm_qla2xxx_deregister_configfs();
}
-MODULE_DESCRIPTION("TCM QLA2XXX series NPIV enabled fabric driver");
+MODULE_DESCRIPTION("TCM QLA24XX+ series NPIV enabled fabric driver");
MODULE_LICENSE("GPL");
module_init(tcm_qla2xxx_init);
module_exit(tcm_qla2xxx_exit);
diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c
index 2ff0922..c126966 100644
--- a/drivers/scsi/scsi_common.c
+++ b/drivers/scsi/scsi_common.c
@@ -5,6 +5,8 @@
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/string.h>
+#include <linux/errno.h>
+#include <asm/unaligned.h>
#include <scsi/scsi_common.h>
/* NB: These are exposed through /proc/scsi/scsi and form part of the ABI.
@@ -176,3 +178,110 @@
return true;
}
EXPORT_SYMBOL(scsi_normalize_sense);
+
+/**
+ * scsi_sense_desc_find - search for a given descriptor type in descriptor sense data format.
+ * @sense_buffer: byte array of descriptor format sense data
+ * @sb_len: number of valid bytes in sense_buffer
+ * @desc_type: value of descriptor type to find
+ * (e.g. 0 -> information)
+ *
+ * Notes:
+ * only valid when sense data is in descriptor format
+ *
+ * Return value:
+ * pointer to start of (first) descriptor if found else NULL
+ */
+const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
+ int desc_type)
+{
+ int add_sen_len, add_len, desc_len, k;
+ const u8 * descp;
+
+ if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7])))
+ return NULL;
+ if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73))
+ return NULL;
+ add_sen_len = (add_sen_len < (sb_len - 8)) ?
+ add_sen_len : (sb_len - 8);
+ descp = &sense_buffer[8];
+ for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) {
+ descp += desc_len;
+ add_len = (k < (add_sen_len - 1)) ? descp[1]: -1;
+ desc_len = add_len + 2;
+ if (descp[0] == desc_type)
+ return descp;
+ if (add_len < 0) // short descriptor ??
+ break;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(scsi_sense_desc_find);
+
+/**
+ * scsi_build_sense_buffer - build sense data in a buffer
+ * @desc: Sense format (non zero == descriptor format,
+ * 0 == fixed format)
+ * @buf: Where to build sense data
+ * @key: Sense key
+ * @asc: Additional sense code
+ * @ascq: Additional sense code qualifier
+ *
+ **/
+void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq)
+{
+ if (desc) {
+ buf[0] = 0x72; /* descriptor, current */
+ buf[1] = key;
+ buf[2] = asc;
+ buf[3] = ascq;
+ buf[7] = 0;
+ } else {
+ buf[0] = 0x70; /* fixed, current */
+ buf[2] = key;
+ buf[7] = 0xa;
+ buf[12] = asc;
+ buf[13] = ascq;
+ }
+}
+EXPORT_SYMBOL(scsi_build_sense_buffer);
+
+/**
+ * scsi_set_sense_information - set the information field in a
+ * formatted sense data buffer
+ * @buf: Where to build sense data
+ * @buf_len: buffer length
+ * @info: 64-bit information value to be set
+ *
+ * Return value:
+ * 0 on success or EINVAL for invalid sense buffer length
+ **/
+int scsi_set_sense_information(u8 *buf, int buf_len, u64 info)
+{
+ if ((buf[0] & 0x7f) == 0x72) {
+ u8 *ucp, len;
+
+ len = buf[7];
+ ucp = (char *)scsi_sense_desc_find(buf, len + 8, 0);
+ if (!ucp) {
+ buf[7] = len + 0xc;
+ ucp = buf + 8 + len;
+ }
+
+ if (buf_len < len + 0xc)
+ /* Not enough room for info */
+ return -EINVAL;
+
+ ucp[0] = 0;
+ ucp[1] = 0xa;
+ ucp[2] = 0x80; /* Valid bit */
+ ucp[3] = 0;
+ put_unaligned_be64(info, &ucp[4]);
+ } else if ((buf[0] & 0x7f) == 0x70) {
+ buf[0] |= 0x80;
+ put_unaligned_be64(info, &buf[3]);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(scsi_set_sense_information);
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 30268bb..dfcc45b 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -25,6 +25,9 @@
* module options to "modprobe scsi_debug num_tgts=2" [20021221]
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+
#include <linux/module.h>
#include <linux/kernel.h>
@@ -201,7 +204,6 @@
/* If REPORT LUNS has luns >= 256 it can choose "flat space" (value 1)
* or "peripheral device" addressing (value 0) */
#define SAM2_LUN_ADDRESS_METHOD 0
-#define SAM2_WLUN_REPORT_LUNS 0xc101
/* SCSI_DEBUG_CANQUEUE is the maximum number of commands that can be queued
* (for response) at one time. Can be reduced by max_queue option. Command
@@ -698,7 +700,7 @@
else
hpnt->max_id = scsi_debug_num_tgts;
/* scsi_debug_max_luns; */
- hpnt->max_lun = SAM2_WLUN_REPORT_LUNS;
+ hpnt->max_lun = SCSI_W_LUN_REPORT_LUNS + 1;
}
spin_unlock(&sdebug_host_list_lock);
}
@@ -1288,7 +1290,7 @@
arr = kzalloc(SDEBUG_MAX_INQ_ARR_SZ, GFP_ATOMIC);
if (! arr)
return DID_REQUEUE << 16;
- have_wlun = (scp->device->lun == SAM2_WLUN_REPORT_LUNS);
+ have_wlun = (scp->device->lun == SCSI_W_LUN_REPORT_LUNS);
if (have_wlun)
pq_pdt = 0x1e; /* present, wlun */
else if (scsi_debug_no_lun_0 && (0 == devip->lun))
@@ -1427,12 +1429,11 @@
unsigned char * sbuff;
unsigned char *cmd = scp->cmnd;
unsigned char arr[SCSI_SENSE_BUFFERSIZE];
- bool dsense, want_dsense;
+ bool dsense;
int len = 18;
memset(arr, 0, sizeof(arr));
dsense = !!(cmd[1] & 1);
- want_dsense = dsense || scsi_debug_dsense;
sbuff = scp->sense_buffer;
if ((iec_m_pg[2] & 0x4) && (6 == (iec_m_pg[3] & 0xf))) {
if (dsense) {
@@ -2446,8 +2447,7 @@
__be16 csum = dif_compute_csum(data, scsi_debug_sector_size);
if (sdt->guard_tag != csum) {
- pr_err("%s: GUARD check failed on sector %lu rcvd 0x%04x, data 0x%04x\n",
- __func__,
+ pr_err("GUARD check failed on sector %lu rcvd 0x%04x, data 0x%04x\n",
(unsigned long)sector,
be16_to_cpu(sdt->guard_tag),
be16_to_cpu(csum));
@@ -2455,14 +2455,14 @@
}
if (scsi_debug_dif == SD_DIF_TYPE1_PROTECTION &&
be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) {
- pr_err("%s: REF check failed on sector %lu\n",
- __func__, (unsigned long)sector);
+ pr_err("REF check failed on sector %lu\n",
+ (unsigned long)sector);
return 0x03;
}
if (scsi_debug_dif == SD_DIF_TYPE2_PROTECTION &&
be32_to_cpu(sdt->ref_tag) != ei_lba) {
- pr_err("%s: REF check failed on sector %lu\n",
- __func__, (unsigned long)sector);
+ pr_err("REF check failed on sector %lu\n",
+ (unsigned long)sector);
return 0x03;
}
return 0;
@@ -2680,7 +2680,7 @@
return 0;
}
-void dump_sector(unsigned char *buf, int len)
+static void dump_sector(unsigned char *buf, int len)
{
int i, j, n;
@@ -3365,8 +3365,8 @@
one_lun[i].scsi_lun[1] = lun & 0xff;
}
if (want_wlun) {
- one_lun[i].scsi_lun[0] = (SAM2_WLUN_REPORT_LUNS >> 8) & 0xff;
- one_lun[i].scsi_lun[1] = SAM2_WLUN_REPORT_LUNS & 0xff;
+ one_lun[i].scsi_lun[0] = (SCSI_W_LUN_REPORT_LUNS >> 8) & 0xff;
+ one_lun[i].scsi_lun[1] = SCSI_W_LUN_REPORT_LUNS & 0xff;
i++;
}
alloc_len = (unsigned char *)(one_lun + i) - arr;
@@ -3449,7 +3449,7 @@
atomic_inc(&sdebug_completions);
qa_indx = indx;
if ((qa_indx < 0) || (qa_indx >= SCSI_DEBUG_CANQUEUE)) {
- pr_err("%s: wild qa_indx=%d\n", __func__, qa_indx);
+ pr_err("wild qa_indx=%d\n", qa_indx);
return;
}
spin_lock_irqsave(&queued_arr_lock, iflags);
@@ -3457,21 +3457,21 @@
scp = sqcp->a_cmnd;
if (NULL == scp) {
spin_unlock_irqrestore(&queued_arr_lock, iflags);
- pr_err("%s: scp is NULL\n", __func__);
+ pr_err("scp is NULL\n");
return;
}
devip = (struct sdebug_dev_info *)scp->device->hostdata;
if (devip)
atomic_dec(&devip->num_in_q);
else
- pr_err("%s: devip=NULL\n", __func__);
+ pr_err("devip=NULL\n");
if (atomic_read(&retired_max_queue) > 0)
retiring = 1;
sqcp->a_cmnd = NULL;
if (!test_and_clear_bit(qa_indx, queued_in_use_bm)) {
spin_unlock_irqrestore(&queued_arr_lock, iflags);
- pr_err("%s: Unexpected completion\n", __func__);
+ pr_err("Unexpected completion\n");
return;
}
@@ -3481,7 +3481,7 @@
retval = atomic_read(&retired_max_queue);
if (qa_indx >= retval) {
spin_unlock_irqrestore(&queued_arr_lock, iflags);
- pr_err("%s: index %d too large\n", __func__, retval);
+ pr_err("index %d too large\n", retval);
return;
}
k = find_last_bit(queued_in_use_bm, retval);
@@ -3509,7 +3509,7 @@
atomic_inc(&sdebug_completions);
qa_indx = sd_hrtp->qa_indx;
if ((qa_indx < 0) || (qa_indx >= SCSI_DEBUG_CANQUEUE)) {
- pr_err("%s: wild qa_indx=%d\n", __func__, qa_indx);
+ pr_err("wild qa_indx=%d\n", qa_indx);
goto the_end;
}
spin_lock_irqsave(&queued_arr_lock, iflags);
@@ -3517,21 +3517,21 @@
scp = sqcp->a_cmnd;
if (NULL == scp) {
spin_unlock_irqrestore(&queued_arr_lock, iflags);
- pr_err("%s: scp is NULL\n", __func__);
+ pr_err("scp is NULL\n");
goto the_end;
}
devip = (struct sdebug_dev_info *)scp->device->hostdata;
if (devip)
atomic_dec(&devip->num_in_q);
else
- pr_err("%s: devip=NULL\n", __func__);
+ pr_err("devip=NULL\n");
if (atomic_read(&retired_max_queue) > 0)
retiring = 1;
sqcp->a_cmnd = NULL;
if (!test_and_clear_bit(qa_indx, queued_in_use_bm)) {
spin_unlock_irqrestore(&queued_arr_lock, iflags);
- pr_err("%s: Unexpected completion\n", __func__);
+ pr_err("Unexpected completion\n");
goto the_end;
}
@@ -3541,7 +3541,7 @@
retval = atomic_read(&retired_max_queue);
if (qa_indx >= retval) {
spin_unlock_irqrestore(&queued_arr_lock, iflags);
- pr_err("%s: index %d too large\n", __func__, retval);
+ pr_err("index %d too large\n", retval);
goto the_end;
}
k = find_last_bit(queued_in_use_bm, retval);
@@ -3580,7 +3580,7 @@
return devip;
sdbg_host = *(struct sdebug_host_info **)shost_priv(sdev->host);
if (!sdbg_host) {
- pr_err("%s: Host info NULL\n", __func__);
+ pr_err("Host info NULL\n");
return NULL;
}
list_for_each_entry(devip, &sdbg_host->dev_info_list, dev_list) {
@@ -3596,8 +3596,7 @@
if (!open_devip) { /* try and make a new one */
open_devip = sdebug_device_create(sdbg_host, GFP_ATOMIC);
if (!open_devip) {
- printk(KERN_ERR "%s: out of memory at line %d\n",
- __func__, __LINE__);
+ pr_err("out of memory at line %d\n", __LINE__);
return NULL;
}
}
@@ -3615,7 +3614,7 @@
static int scsi_debug_slave_alloc(struct scsi_device *sdp)
{
if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
- printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %llu>\n",
+ pr_info("slave_alloc <%u %u %u %llu>\n",
sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue);
return 0;
@@ -3626,7 +3625,7 @@
struct sdebug_dev_info *devip;
if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
- printk(KERN_INFO "scsi_debug: slave_configure <%u %u %u %llu>\n",
+ pr_info("slave_configure <%u %u %u %llu>\n",
sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
if (sdp->host->max_cmd_len != SCSI_DEBUG_MAX_CMD_LEN)
sdp->host->max_cmd_len = SCSI_DEBUG_MAX_CMD_LEN;
@@ -3646,7 +3645,7 @@
(struct sdebug_dev_info *)sdp->hostdata;
if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
- printk(KERN_INFO "scsi_debug: slave_destroy <%u %u %u %llu>\n",
+ pr_info("slave_destroy <%u %u %u %llu>\n",
sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
if (devip) {
/* make this slot available for re-use */
@@ -3897,8 +3896,7 @@
return;
if (scsi_debug_num_parts > SDEBUG_MAX_PARTS) {
scsi_debug_num_parts = SDEBUG_MAX_PARTS;
- pr_warn("%s: reducing partitions to %d\n", __func__,
- SDEBUG_MAX_PARTS);
+ pr_warn("reducing partitions to %d\n", SDEBUG_MAX_PARTS);
}
num_sectors = (int)sdebug_store_sectors;
sectors_per_part = (num_sectors - sdebug_sectors_per)
@@ -3942,14 +3940,20 @@
unsigned long iflags;
int k, num_in_q, qdepth, inject;
struct sdebug_queued_cmd *sqcp = NULL;
- struct scsi_device *sdp = cmnd->device;
+ struct scsi_device *sdp;
- if (NULL == cmnd || NULL == devip) {
- pr_warn("%s: called with NULL cmnd or devip pointer\n",
- __func__);
+ /* this should never happen */
+ if (WARN_ON(!cmnd))
+ return SCSI_MLQUEUE_HOST_BUSY;
+
+ if (NULL == devip) {
+ pr_warn("called devip == NULL\n");
/* no particularly good error to report back */
return SCSI_MLQUEUE_HOST_BUSY;
}
+
+ sdp = cmnd->device;
+
if ((scsi_result) && (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts))
sdev_printk(KERN_INFO, sdp, "%s: non-zero result=0x%x\n",
__func__, scsi_result);
@@ -4383,8 +4387,7 @@
fake_storep = vmalloc(sz);
if (NULL == fake_storep) {
- pr_err("%s: out of memory, 9\n",
- __func__);
+ pr_err("out of memory, 9\n");
return -ENOMEM;
}
memset(fake_storep, 0, sz);
@@ -4784,8 +4787,7 @@
atomic_set(&retired_max_queue, 0);
if (scsi_debug_ndelay >= 1000000000) {
- pr_warn("%s: ndelay must be less than 1 second, ignored\n",
- __func__);
+ pr_warn("ndelay must be less than 1 second, ignored\n");
scsi_debug_ndelay = 0;
} else if (scsi_debug_ndelay > 0)
scsi_debug_delay = DELAY_OVERRIDDEN;
@@ -4797,8 +4799,7 @@
case 4096:
break;
default:
- pr_err("%s: invalid sector_size %d\n", __func__,
- scsi_debug_sector_size);
+ pr_err("invalid sector_size %d\n", scsi_debug_sector_size);
return -EINVAL;
}
@@ -4811,29 +4812,28 @@
break;
default:
- pr_err("%s: dif must be 0, 1, 2 or 3\n", __func__);
+ pr_err("dif must be 0, 1, 2 or 3\n");
return -EINVAL;
}
if (scsi_debug_guard > 1) {
- pr_err("%s: guard must be 0 or 1\n", __func__);
+ pr_err("guard must be 0 or 1\n");
return -EINVAL;
}
if (scsi_debug_ato > 1) {
- pr_err("%s: ato must be 0 or 1\n", __func__);
+ pr_err("ato must be 0 or 1\n");
return -EINVAL;
}
if (scsi_debug_physblk_exp > 15) {
- pr_err("%s: invalid physblk_exp %u\n", __func__,
- scsi_debug_physblk_exp);
+ pr_err("invalid physblk_exp %u\n", scsi_debug_physblk_exp);
return -EINVAL;
}
if (scsi_debug_lowest_aligned > 0x3fff) {
- pr_err("%s: lowest_aligned too big: %u\n", __func__,
- scsi_debug_lowest_aligned);
+ pr_err("lowest_aligned too big: %u\n",
+ scsi_debug_lowest_aligned);
return -EINVAL;
}
@@ -4863,7 +4863,7 @@
if (0 == scsi_debug_fake_rw) {
fake_storep = vmalloc(sz);
if (NULL == fake_storep) {
- pr_err("%s: out of memory, 1\n", __func__);
+ pr_err("out of memory, 1\n");
return -ENOMEM;
}
memset(fake_storep, 0, sz);
@@ -4877,11 +4877,10 @@
dif_size = sdebug_store_sectors * sizeof(struct sd_dif_tuple);
dif_storep = vmalloc(dif_size);
- pr_err("%s: dif_storep %u bytes @ %p\n", __func__, dif_size,
- dif_storep);
+ pr_err("dif_storep %u bytes @ %p\n", dif_size, dif_storep);
if (dif_storep == NULL) {
- pr_err("%s: out of mem. (DIX)\n", __func__);
+ pr_err("out of mem. (DIX)\n");
ret = -ENOMEM;
goto free_vm;
}
@@ -4903,18 +4902,17 @@
if (scsi_debug_unmap_alignment &&
scsi_debug_unmap_granularity <=
scsi_debug_unmap_alignment) {
- pr_err("%s: ERR: unmap_granularity <= unmap_alignment\n",
- __func__);
+ pr_err("ERR: unmap_granularity <= unmap_alignment\n");
return -EINVAL;
}
map_size = lba_to_map_index(sdebug_store_sectors - 1) + 1;
map_storep = vmalloc(BITS_TO_LONGS(map_size) * sizeof(long));
- pr_info("%s: %lu provisioning blocks\n", __func__, map_size);
+ pr_info("%lu provisioning blocks\n", map_size);
if (map_storep == NULL) {
- pr_err("%s: out of mem. (MAP)\n", __func__);
+ pr_err("out of mem. (MAP)\n");
ret = -ENOMEM;
goto free_vm;
}
@@ -4928,18 +4926,18 @@
pseudo_primary = root_device_register("pseudo_0");
if (IS_ERR(pseudo_primary)) {
- pr_warn("%s: root_device_register() error\n", __func__);
+ pr_warn("root_device_register() error\n");
ret = PTR_ERR(pseudo_primary);
goto free_vm;
}
ret = bus_register(&pseudo_lld_bus);
if (ret < 0) {
- pr_warn("%s: bus_register error: %d\n", __func__, ret);
+ pr_warn("bus_register error: %d\n", ret);
goto dev_unreg;
}
ret = driver_register(&sdebug_driverfs_driver);
if (ret < 0) {
- pr_warn("%s: driver_register error: %d\n", __func__, ret);
+ pr_warn("driver_register error: %d\n", ret);
goto bus_unreg;
}
@@ -4948,16 +4946,14 @@
for (k = 0; k < host_to_add; k++) {
if (sdebug_add_adapter()) {
- pr_err("%s: sdebug_add_adapter failed k=%d\n",
- __func__, k);
+ pr_err("sdebug_add_adapter failed k=%d\n", k);
break;
}
}
- if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) {
- pr_info("%s: built %d host(s)\n", __func__,
- scsi_debug_add_host);
- }
+ if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
+ pr_info("built %d host(s)\n", scsi_debug_add_host);
+
return 0;
bus_unreg:
@@ -4965,10 +4961,8 @@
dev_unreg:
root_device_unregister(pseudo_primary);
free_vm:
- if (map_storep)
- vfree(map_storep);
- if (dif_storep)
- vfree(dif_storep);
+ vfree(map_storep);
+ vfree(dif_storep);
vfree(fake_storep);
return ret;
@@ -4986,9 +4980,7 @@
bus_unregister(&pseudo_lld_bus);
root_device_unregister(pseudo_primary);
- if (dif_storep)
- vfree(dif_storep);
-
+ vfree(dif_storep);
vfree(fake_storep);
}
@@ -5012,8 +5004,7 @@
sdbg_host = kzalloc(sizeof(*sdbg_host),GFP_KERNEL);
if (NULL == sdbg_host) {
- printk(KERN_ERR "%s: out of memory at line %d\n",
- __func__, __LINE__);
+ pr_err("out of memory at line %d\n", __LINE__);
return -ENOMEM;
}
@@ -5023,8 +5014,7 @@
for (k = 0; k < devs_per_host; k++) {
sdbg_devinfo = sdebug_device_create(sdbg_host, GFP_KERNEL);
if (!sdbg_devinfo) {
- printk(KERN_ERR "%s: out of memory at line %d\n",
- __func__, __LINE__);
+ pr_err("out of memory at line %d\n", __LINE__);
error = -ENOMEM;
goto clean;
}
@@ -5178,7 +5168,7 @@
}
sdev_printk(KERN_INFO, sdp, "%s: cmd %s\n", my_name, b);
}
- has_wlun_rl = (sdp->lun == SAM2_WLUN_REPORT_LUNS);
+ has_wlun_rl = (sdp->lun == SCSI_W_LUN_REPORT_LUNS);
if ((sdp->lun >= scsi_debug_max_luns) && !has_wlun_rl)
return schedule_resp(scp, NULL, errsts_no_connect, 0);
@@ -5338,7 +5328,7 @@
sdebug_driver_template.use_clustering = ENABLE_CLUSTERING;
hpnt = scsi_host_alloc(&sdebug_driver_template, sizeof(sdbg_host));
if (NULL == hpnt) {
- pr_err("%s: scsi_host_alloc failed\n", __func__);
+ pr_err("scsi_host_alloc failed\n");
error = -ENODEV;
return error;
}
@@ -5349,7 +5339,8 @@
hpnt->max_id = scsi_debug_num_tgts + 1;
else
hpnt->max_id = scsi_debug_num_tgts;
- hpnt->max_lun = SAM2_WLUN_REPORT_LUNS; /* = scsi_debug_max_luns; */
+ /* = scsi_debug_max_luns; */
+ hpnt->max_lun = SCSI_W_LUN_REPORT_LUNS + 1;
host_prot = 0;
@@ -5381,7 +5372,7 @@
scsi_host_set_prot(hpnt, host_prot);
- printk(KERN_INFO "scsi_debug: host protection%s%s%s%s%s%s%s\n",
+ pr_info("host protection%s%s%s%s%s%s%s\n",
(host_prot & SHOST_DIF_TYPE1_PROTECTION) ? " DIF1" : "",
(host_prot & SHOST_DIF_TYPE2_PROTECTION) ? " DIF2" : "",
(host_prot & SHOST_DIF_TYPE3_PROTECTION) ? " DIF3" : "",
@@ -5409,7 +5400,7 @@
error = scsi_add_host(hpnt, &sdbg_host->dev);
if (error) {
- printk(KERN_ERR "%s: scsi_add_host failed\n", __func__);
+ pr_err("scsi_add_host failed\n");
error = -ENODEV;
scsi_host_put(hpnt);
} else
@@ -5426,8 +5417,7 @@
sdbg_host = to_sdebug_host(dev);
if (!sdbg_host) {
- printk(KERN_ERR "%s: Unable to locate host info\n",
- __func__);
+ pr_err("Unable to locate host info\n");
return -ENODEV;
}
diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c
new file mode 100644
index 0000000..edb044a
--- /dev/null
+++ b/drivers/scsi/scsi_dh.c
@@ -0,0 +1,437 @@
+/*
+ * SCSI device handler infrastruture.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007
+ * Authors:
+ * Chandra Seetharaman <sekharan@us.ibm.com>
+ * Mike Anderson <andmike@linux.vnet.ibm.com>
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <scsi/scsi_dh.h>
+#include "scsi_priv.h"
+
+static DEFINE_SPINLOCK(list_lock);
+static LIST_HEAD(scsi_dh_list);
+
+struct scsi_dh_blist {
+ const char *vendor;
+ const char *model;
+ const char *driver;
+};
+
+static const struct scsi_dh_blist scsi_dh_blist[] = {
+ {"DGC", "RAID", "clariion" },
+ {"DGC", "DISK", "clariion" },
+ {"DGC", "VRAID", "clariion" },
+
+ {"COMPAQ", "MSA1000 VOLUME", "hp_sw" },
+ {"COMPAQ", "HSV110", "hp_sw" },
+ {"HP", "HSV100", "hp_sw"},
+ {"DEC", "HSG80", "hp_sw"},
+
+ {"IBM", "1722", "rdac", },
+ {"IBM", "1724", "rdac", },
+ {"IBM", "1726", "rdac", },
+ {"IBM", "1742", "rdac", },
+ {"IBM", "1745", "rdac", },
+ {"IBM", "1746", "rdac", },
+ {"IBM", "1813", "rdac", },
+ {"IBM", "1814", "rdac", },
+ {"IBM", "1815", "rdac", },
+ {"IBM", "1818", "rdac", },
+ {"IBM", "3526", "rdac", },
+ {"SGI", "TP9", "rdac", },
+ {"SGI", "IS", "rdac", },
+ {"STK", "OPENstorage D280", "rdac", },
+ {"STK", "FLEXLINE 380", "rdac", },
+ {"SUN", "CSM", "rdac", },
+ {"SUN", "LCSM100", "rdac", },
+ {"SUN", "STK6580_6780", "rdac", },
+ {"SUN", "SUN_6180", "rdac", },
+ {"SUN", "ArrayStorage", "rdac", },
+ {"DELL", "MD3", "rdac", },
+ {"NETAPP", "INF-01-00", "rdac", },
+ {"LSI", "INF-01-00", "rdac", },
+ {"ENGENIO", "INF-01-00", "rdac", },
+ {NULL, NULL, NULL },
+};
+
+static const char *
+scsi_dh_find_driver(struct scsi_device *sdev)
+{
+ const struct scsi_dh_blist *b;
+
+ if (scsi_device_tpgs(sdev))
+ return "alua";
+
+ for (b = scsi_dh_blist; b->vendor; b++) {
+ if (!strncmp(sdev->vendor, b->vendor, strlen(b->vendor)) &&
+ !strncmp(sdev->model, b->model, strlen(b->model))) {
+ return b->driver;
+ }
+ }
+ return NULL;
+}
+
+
+static struct scsi_device_handler *__scsi_dh_lookup(const char *name)
+{
+ struct scsi_device_handler *tmp, *found = NULL;
+
+ spin_lock(&list_lock);
+ list_for_each_entry(tmp, &scsi_dh_list, list) {
+ if (!strncmp(tmp->name, name, strlen(tmp->name))) {
+ found = tmp;
+ break;
+ }
+ }
+ spin_unlock(&list_lock);
+ return found;
+}
+
+static struct scsi_device_handler *scsi_dh_lookup(const char *name)
+{
+ struct scsi_device_handler *dh;
+
+ dh = __scsi_dh_lookup(name);
+ if (!dh) {
+ request_module(name);
+ dh = __scsi_dh_lookup(name);
+ }
+
+ return dh;
+}
+
+/*
+ * scsi_dh_handler_attach - Attach a device handler to a device
+ * @sdev - SCSI device the device handler should attach to
+ * @scsi_dh - The device handler to attach
+ */
+static int scsi_dh_handler_attach(struct scsi_device *sdev,
+ struct scsi_device_handler *scsi_dh)
+{
+ int error;
+
+ if (!try_module_get(scsi_dh->module))
+ return -EINVAL;
+
+ error = scsi_dh->attach(sdev);
+ if (error) {
+ sdev_printk(KERN_ERR, sdev, "%s: Attach failed (%d)\n",
+ scsi_dh->name, error);
+ module_put(scsi_dh->module);
+ } else
+ sdev->handler = scsi_dh;
+
+ return error;
+}
+
+/*
+ * scsi_dh_handler_detach - Detach a device handler from a device
+ * @sdev - SCSI device the device handler should be detached from
+ */
+static void scsi_dh_handler_detach(struct scsi_device *sdev)
+{
+ sdev->handler->detach(sdev);
+ sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", sdev->handler->name);
+ module_put(sdev->handler->module);
+}
+
+/*
+ * Functions for sysfs attribute 'dh_state'
+ */
+static ssize_t
+store_dh_state(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct scsi_device *sdev = to_scsi_device(dev);
+ struct scsi_device_handler *scsi_dh;
+ int err = -EINVAL;
+
+ if (sdev->sdev_state == SDEV_CANCEL ||
+ sdev->sdev_state == SDEV_DEL)
+ return -ENODEV;
+
+ if (!sdev->handler) {
+ /*
+ * Attach to a device handler
+ */
+ scsi_dh = scsi_dh_lookup(buf);
+ if (!scsi_dh)
+ return err;
+ err = scsi_dh_handler_attach(sdev, scsi_dh);
+ } else {
+ if (!strncmp(buf, "detach", 6)) {
+ /*
+ * Detach from a device handler
+ */
+ sdev_printk(KERN_WARNING, sdev,
+ "can't detach handler %s.\n",
+ sdev->handler->name);
+ err = -EINVAL;
+ } else if (!strncmp(buf, "activate", 8)) {
+ /*
+ * Activate a device handler
+ */
+ if (sdev->handler->activate)
+ err = sdev->handler->activate(sdev, NULL, NULL);
+ else
+ err = 0;
+ }
+ }
+
+ return err<0?err:count;
+}
+
+static ssize_t
+show_dh_state(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct scsi_device *sdev = to_scsi_device(dev);
+
+ if (!sdev->handler)
+ return snprintf(buf, 20, "detached\n");
+
+ return snprintf(buf, 20, "%s\n", sdev->handler->name);
+}
+
+static struct device_attribute scsi_dh_state_attr =
+ __ATTR(dh_state, S_IRUGO | S_IWUSR, show_dh_state,
+ store_dh_state);
+
+int scsi_dh_add_device(struct scsi_device *sdev)
+{
+ struct scsi_device_handler *devinfo = NULL;
+ const char *drv;
+ int err;
+
+ err = device_create_file(&sdev->sdev_gendev, &scsi_dh_state_attr);
+ if (err)
+ return err;
+
+ drv = scsi_dh_find_driver(sdev);
+ if (drv)
+ devinfo = scsi_dh_lookup(drv);
+ if (devinfo)
+ err = scsi_dh_handler_attach(sdev, devinfo);
+ return err;
+}
+
+void scsi_dh_remove_device(struct scsi_device *sdev)
+{
+ if (sdev->handler)
+ scsi_dh_handler_detach(sdev);
+ device_remove_file(&sdev->sdev_gendev, &scsi_dh_state_attr);
+}
+
+/*
+ * scsi_register_device_handler - register a device handler personality
+ * module.
+ * @scsi_dh - device handler to be registered.
+ *
+ * Returns 0 on success, -EBUSY if handler already registered.
+ */
+int scsi_register_device_handler(struct scsi_device_handler *scsi_dh)
+{
+ if (__scsi_dh_lookup(scsi_dh->name))
+ return -EBUSY;
+
+ if (!scsi_dh->attach || !scsi_dh->detach)
+ return -EINVAL;
+
+ spin_lock(&list_lock);
+ list_add(&scsi_dh->list, &scsi_dh_list);
+ spin_unlock(&list_lock);
+
+ printk(KERN_INFO "%s: device handler registered\n", scsi_dh->name);
+
+ return SCSI_DH_OK;
+}
+EXPORT_SYMBOL_GPL(scsi_register_device_handler);
+
+/*
+ * scsi_unregister_device_handler - register a device handler personality
+ * module.
+ * @scsi_dh - device handler to be unregistered.
+ *
+ * Returns 0 on success, -ENODEV if handler not registered.
+ */
+int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh)
+{
+ if (!__scsi_dh_lookup(scsi_dh->name))
+ return -ENODEV;
+
+ spin_lock(&list_lock);
+ list_del(&scsi_dh->list);
+ spin_unlock(&list_lock);
+ printk(KERN_INFO "%s: device handler unregistered\n", scsi_dh->name);
+
+ return SCSI_DH_OK;
+}
+EXPORT_SYMBOL_GPL(scsi_unregister_device_handler);
+
+static struct scsi_device *get_sdev_from_queue(struct request_queue *q)
+{
+ struct scsi_device *sdev;
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ sdev = q->queuedata;
+ if (!sdev || !get_device(&sdev->sdev_gendev))
+ sdev = NULL;
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ return sdev;
+}
+
+/*
+ * scsi_dh_activate - activate the path associated with the scsi_device
+ * corresponding to the given request queue.
+ * Returns immediately without waiting for activation to be completed.
+ * @q - Request queue that is associated with the scsi_device to be
+ * activated.
+ * @fn - Function to be called upon completion of the activation.
+ * Function fn is called with data (below) and the error code.
+ * Function fn may be called from the same calling context. So,
+ * do not hold the lock in the caller which may be needed in fn.
+ * @data - data passed to the function fn upon completion.
+ *
+ */
+int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data)
+{
+ struct scsi_device *sdev;
+ int err = SCSI_DH_NOSYS;
+
+ sdev = get_sdev_from_queue(q);
+ if (!sdev) {
+ if (fn)
+ fn(data, err);
+ return err;
+ }
+
+ if (!sdev->handler)
+ goto out_fn;
+ err = SCSI_DH_NOTCONN;
+ if (sdev->sdev_state == SDEV_CANCEL ||
+ sdev->sdev_state == SDEV_DEL)
+ goto out_fn;
+
+ err = SCSI_DH_DEV_OFFLINED;
+ if (sdev->sdev_state == SDEV_OFFLINE)
+ goto out_fn;
+
+ if (sdev->handler->activate)
+ err = sdev->handler->activate(sdev, fn, data);
+
+out_put_device:
+ put_device(&sdev->sdev_gendev);
+ return err;
+
+out_fn:
+ if (fn)
+ fn(data, err);
+ goto out_put_device;
+}
+EXPORT_SYMBOL_GPL(scsi_dh_activate);
+
+/*
+ * scsi_dh_set_params - set the parameters for the device as per the
+ * string specified in params.
+ * @q - Request queue that is associated with the scsi_device for
+ * which the parameters to be set.
+ * @params - parameters in the following format
+ * "no_of_params\0param1\0param2\0param3\0...\0"
+ * for example, string for 2 parameters with value 10 and 21
+ * is specified as "2\010\021\0".
+ */
+int scsi_dh_set_params(struct request_queue *q, const char *params)
+{
+ struct scsi_device *sdev;
+ int err = -SCSI_DH_NOSYS;
+
+ sdev = get_sdev_from_queue(q);
+ if (!sdev)
+ return err;
+
+ if (sdev->handler && sdev->handler->set_params)
+ err = sdev->handler->set_params(sdev, params);
+ put_device(&sdev->sdev_gendev);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scsi_dh_set_params);
+
+/*
+ * scsi_dh_attach - Attach device handler
+ * @q - Request queue that is associated with the scsi_device
+ * the handler should be attached to
+ * @name - name of the handler to attach
+ */
+int scsi_dh_attach(struct request_queue *q, const char *name)
+{
+ struct scsi_device *sdev;
+ struct scsi_device_handler *scsi_dh;
+ int err = 0;
+
+ sdev = get_sdev_from_queue(q);
+ if (!sdev)
+ return -ENODEV;
+
+ scsi_dh = scsi_dh_lookup(name);
+ if (!scsi_dh) {
+ err = -EINVAL;
+ goto out_put_device;
+ }
+
+ if (sdev->handler) {
+ if (sdev->handler != scsi_dh)
+ err = -EBUSY;
+ goto out_put_device;
+ }
+
+ err = scsi_dh_handler_attach(sdev, scsi_dh);
+
+out_put_device:
+ put_device(&sdev->sdev_gendev);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scsi_dh_attach);
+
+/*
+ * scsi_dh_attached_handler_name - Get attached device handler's name
+ * @q - Request queue that is associated with the scsi_device
+ * that may have a device handler attached
+ * @gfp - the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Returns name of attached handler, NULL if no handler is attached.
+ * Caller must take care to free the returned string.
+ */
+const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp)
+{
+ struct scsi_device *sdev;
+ const char *handler_name = NULL;
+
+ sdev = get_sdev_from_queue(q);
+ if (!sdev)
+ return NULL;
+
+ if (sdev->handler)
+ handler_name = kstrdup(sdev->handler->name, gfp);
+ put_device(&sdev->sdev_gendev);
+ return handler_name;
+}
+EXPORT_SYMBOL_GPL(scsi_dh_attached_handler_name);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index afd34a6..66a96cd 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -33,9 +33,11 @@
#include <scsi/scsi_device.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_eh.h>
+#include <scsi/scsi_common.h>
#include <scsi/scsi_transport.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_ioctl.h>
+#include <scsi/scsi_dh.h>
#include <scsi/sg.h>
#include "scsi_priv.h"
@@ -463,11 +465,10 @@
if (scsi_sense_is_deferred(&sshdr))
return NEEDS_RETRY;
- if (sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh &&
- sdev->scsi_dh_data->scsi_dh->check_sense) {
+ if (sdev->handler && sdev->handler->check_sense) {
int rc;
- rc = sdev->scsi_dh_data->scsi_dh->check_sense(sdev, &sshdr);
+ rc = sdev->handler->check_sense(sdev, &sshdr);
if (rc != SCSI_RETURN_NOT_HANDLED)
return rc;
/* handler does not care. Drop down to default handling */
@@ -2178,8 +2179,17 @@
* We never actually get interrupted because kthread_run
* disables signal delivery for the created thread.
*/
- while (!kthread_should_stop()) {
+ while (true) {
+ /*
+ * The sequence in kthread_stop() sets the stop flag first
+ * then wakes the process. To avoid missed wakeups, the task
+ * should always be in a non running state before the stop
+ * flag is checked
+ */
set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread_should_stop())
+ break;
+
if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
shost->host_failed != atomic_read(&shost->host_busy)) {
SCSI_LOG_ERROR_RECOVERY(1,
@@ -2416,45 +2426,6 @@
EXPORT_SYMBOL(scsi_command_normalize_sense);
/**
- * scsi_sense_desc_find - search for a given descriptor type in descriptor sense data format.
- * @sense_buffer: byte array of descriptor format sense data
- * @sb_len: number of valid bytes in sense_buffer
- * @desc_type: value of descriptor type to find
- * (e.g. 0 -> information)
- *
- * Notes:
- * only valid when sense data is in descriptor format
- *
- * Return value:
- * pointer to start of (first) descriptor if found else NULL
- */
-const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
- int desc_type)
-{
- int add_sen_len, add_len, desc_len, k;
- const u8 * descp;
-
- if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7])))
- return NULL;
- if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73))
- return NULL;
- add_sen_len = (add_sen_len < (sb_len - 8)) ?
- add_sen_len : (sb_len - 8);
- descp = &sense_buffer[8];
- for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) {
- descp += desc_len;
- add_len = (k < (add_sen_len - 1)) ? descp[1]: -1;
- desc_len = add_len + 2;
- if (descp[0] == desc_type)
- return descp;
- if (add_len < 0) // short descriptor ??
- break;
- }
- return NULL;
-}
-EXPORT_SYMBOL(scsi_sense_desc_find);
-
-/**
* scsi_get_sense_info_fld - get information field from sense data (either fixed or descriptor format)
* @sense_buffer: byte array of sense data
* @sb_len: number of valid bytes in sense_buffer
@@ -2503,31 +2474,3 @@
}
}
EXPORT_SYMBOL(scsi_get_sense_info_fld);
-
-/**
- * scsi_build_sense_buffer - build sense data in a buffer
- * @desc: Sense format (non zero == descriptor format,
- * 0 == fixed format)
- * @buf: Where to build sense data
- * @key: Sense key
- * @asc: Additional sense code
- * @ascq: Additional sense code qualifier
- *
- **/
-void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq)
-{
- if (desc) {
- buf[0] = 0x72; /* descriptor, current */
- buf[1] = key;
- buf[2] = asc;
- buf[3] = ascq;
- buf[7] = 0;
- } else {
- buf[0] = 0x70; /* fixed, current */
- buf[2] = key;
- buf[7] = 0xa;
- buf[12] = asc;
- buf[13] = ascq;
- }
-}
-EXPORT_SYMBOL(scsi_build_sense_buffer);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 882864f..cbfc599 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -31,6 +31,7 @@
#include <scsi/scsi_driver.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>
+#include <scsi/scsi_dh.h>
#include <trace/events/scsi.h>
@@ -1248,9 +1249,8 @@
{
struct scsi_cmnd *cmd = req->special;
- if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh
- && sdev->scsi_dh_data->scsi_dh->prep_fn)) {
- int ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req);
+ if (unlikely(sdev->handler && sdev->handler->prep_fn)) {
+ int ret = sdev->handler->prep_fn(sdev, req);
if (ret != BLKPREP_OK)
return ret;
}
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index e3902fc..644bb73 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -170,6 +170,15 @@
extern struct async_domain scsi_sd_pm_domain;
extern struct async_domain scsi_sd_probe_domain;
+/* scsi_dh.c */
+#ifdef CONFIG_SCSI_DH
+int scsi_dh_add_device(struct scsi_device *sdev);
+void scsi_dh_remove_device(struct scsi_device *sdev);
+#else
+static inline int scsi_dh_add_device(struct scsi_device *sdev) { return 0; }
+static inline void scsi_dh_remove_device(struct scsi_device *sdev) { }
+#endif
+
/*
* internal scsi timeout functions: for use by mid-layer and transport
* classes.
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 9ad41168..b333389 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1030,11 +1030,20 @@
"failed to add device: %d\n", error);
return error;
}
+
+ error = scsi_dh_add_device(sdev);
+ if (error) {
+ sdev_printk(KERN_INFO, sdev,
+ "failed to add device handler: %d\n", error);
+ return error;
+ }
+
device_enable_async_suspend(&sdev->sdev_dev);
error = device_add(&sdev->sdev_dev);
if (error) {
sdev_printk(KERN_INFO, sdev,
"failed to add class device: %d\n", error);
+ scsi_dh_remove_device(sdev);
device_del(&sdev->sdev_gendev);
return error;
}
@@ -1074,6 +1083,7 @@
bsg_unregister_queue(sdev->request_queue);
device_unregister(&sdev->sdev_dev);
transport_remove_device(dev);
+ scsi_dh_remove_device(sdev);
device_del(dev);
} else
put_device(&sdev->sdev_dev);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 9a05819..30d26e3 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -1222,13 +1222,6 @@
u64 identifier;
int error;
- /*
- * Only devices behind an expander are supported, because the
- * enclosure identifier is a SMP feature.
- */
- if (scsi_is_sas_phy_local(phy))
- return -EINVAL;
-
error = i->f->get_enclosure_identifier(rphy, &identifier);
if (error)
return error;
@@ -1248,9 +1241,6 @@
struct sas_internal *i = to_sas_internal(shost->transportt);
int val;
- if (scsi_is_sas_phy_local(phy))
- return -EINVAL;
-
val = i->f->get_bay_identifier(rphy);
if (val < 0)
return val;
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index fad22ca..9dc8687 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -377,7 +377,6 @@
unsigned int data_len = scsi_bufflen(sc);
unsigned int data_grants = 0, seg_grants = 0;
struct scatterlist *sg;
- unsigned long mfn;
struct scsiif_request_segment *seg;
ring_req->nr_segments = 0;
@@ -420,9 +419,9 @@
ref = gnttab_claim_grant_reference(&gref_head);
BUG_ON(ref == -ENOSPC);
- mfn = pfn_to_mfn(page_to_pfn(page));
gnttab_grant_foreign_access_ref(ref,
- info->dev->otherend_id, mfn, 1);
+ info->dev->otherend_id,
+ xen_page_to_gfn(page), 1);
shadow->gref[ref_cnt] = ref;
ring_req->seg[ref_cnt].gref = ref;
ring_req->seg[ref_cnt].offset = (uint16_t)off;
@@ -454,9 +453,10 @@
ref = gnttab_claim_grant_reference(&gref_head);
BUG_ON(ref == -ENOSPC);
- mfn = pfn_to_mfn(page_to_pfn(page));
gnttab_grant_foreign_access_ref(ref,
- info->dev->otherend_id, mfn, grant_ro);
+ info->dev->otherend_id,
+ xen_page_to_gfn(page),
+ grant_ro);
shadow->gref[ref_cnt] = ref;
seg->gref = ref;
diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c
index 327adcf..a6155c9 100644
--- a/drivers/soc/qcom/smd.c
+++ b/drivers/soc/qcom/smd.c
@@ -96,6 +96,7 @@
* @smd: handle to qcom_smd
* @of_node: of_node handle for information related to this edge
* @edge_id: identifier of this edge
+ * @remote_pid: identifier of remote processor
* @irq: interrupt for signals on this edge
* @ipc_regmap: regmap handle holding the outgoing ipc register
* @ipc_offset: offset within @ipc_regmap of the register for ipc
@@ -111,6 +112,7 @@
struct qcom_smd *smd;
struct device_node *of_node;
unsigned edge_id;
+ unsigned remote_pid;
int irq;
@@ -310,7 +312,7 @@
SET_TX_CHANNEL_INFO(channel, fHEAD, 0);
SET_TX_CHANNEL_INFO(channel, fTAIL, 0);
SET_TX_CHANNEL_INFO(channel, fSTATE, 1);
- SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 0);
+ SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 1);
SET_TX_CHANNEL_INFO(channel, head, 0);
SET_TX_CHANNEL_INFO(channel, tail, 0);
@@ -572,7 +574,7 @@
* have to scan if the amount of available space in smem have changed
* since last scan.
*/
- available = qcom_smem_get_free_space(edge->edge_id);
+ available = qcom_smem_get_free_space(edge->remote_pid);
if (available != edge->smem_available) {
edge->smem_available = available;
edge->need_rescan = true;
@@ -681,7 +683,7 @@
goto out;
}
- SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 1);
+ SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 0);
ret = wait_event_interruptible(channel->fblockread_event,
qcom_smd_get_tx_avail(channel) >= tlen ||
@@ -689,7 +691,7 @@
if (ret)
goto out;
- SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 0);
+ SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 1);
}
SET_TX_CHANNEL_INFO(channel, fTAIL, 0);
@@ -976,7 +978,8 @@
spin_lock_init(&channel->recv_lock);
init_waitqueue_head(&channel->fblockread_event);
- ret = qcom_smem_get(edge->edge_id, smem_info_item, (void **)&info, &info_size);
+ ret = qcom_smem_get(edge->remote_pid, smem_info_item, (void **)&info,
+ &info_size);
if (ret)
goto free_name_and_channel;
@@ -997,7 +1000,8 @@
goto free_name_and_channel;
}
- ret = qcom_smem_get(edge->edge_id, smem_fifo_item, &fifo_base, &fifo_size);
+ ret = qcom_smem_get(edge->remote_pid, smem_fifo_item, &fifo_base,
+ &fifo_size);
if (ret)
goto free_name_and_channel;
@@ -1041,7 +1045,7 @@
int i;
for (tbl = 0; tbl < SMD_ALLOC_TBL_COUNT; tbl++) {
- ret = qcom_smem_get(edge->edge_id,
+ ret = qcom_smem_get(edge->remote_pid,
smem_items[tbl].alloc_tbl_id,
(void **)&alloc_tbl,
NULL);
@@ -1184,6 +1188,10 @@
return -EINVAL;
}
+ edge->remote_pid = QCOM_SMEM_HOST_ANY;
+ key = "qcom,remote-pid";
+ of_property_read_u32(node, key, &edge->remote_pid);
+
syscon_np = of_parse_phandle(node, "qcom,ipc", 0);
if (!syscon_np) {
dev_err(dev, "no qcom,ipc node\n");
diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c
index 7c2c324c..5236518 100644
--- a/drivers/soc/qcom/smem.c
+++ b/drivers/soc/qcom/smem.c
@@ -258,10 +258,6 @@
size_t alloc_size;
void *p;
- /* We're not going to find it if there's no matching partition */
- if (host >= SMEM_HOST_COUNT || !smem->partitions[host])
- return -ENOENT;
-
phdr = smem->partitions[host];
p = (void *)phdr + sizeof(*phdr);
@@ -371,8 +367,9 @@
if (ret)
return ret;
- ret = qcom_smem_alloc_private(__smem, host, item, size);
- if (ret == -ENOENT)
+ if (host < SMEM_HOST_COUNT && __smem->partitions[host])
+ ret = qcom_smem_alloc_private(__smem, host, item, size);
+ else
ret = qcom_smem_alloc_global(__smem, item, size);
hwspin_unlock_irqrestore(__smem->hwlock, &flags);
@@ -428,10 +425,6 @@
struct smem_private_entry *hdr;
void *p;
- /* We're not going to find it if there's no matching partition */
- if (host >= SMEM_HOST_COUNT || !smem->partitions[host])
- return -ENOENT;
-
phdr = smem->partitions[host];
p = (void *)phdr + sizeof(*phdr);
@@ -484,8 +477,9 @@
if (ret)
return ret;
- ret = qcom_smem_get_private(__smem, host, item, ptr, size);
- if (ret == -ENOENT)
+ if (host < SMEM_HOST_COUNT && __smem->partitions[host])
+ ret = qcom_smem_get_private(__smem, host, item, ptr, size);
+ else
ret = qcom_smem_get_global(__smem, item, ptr, size);
hwspin_unlock_irqrestore(__smem->hwlock, &flags);
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index eec878e..217aa53 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -997,7 +997,7 @@
mutex_unlock(&buffer->lock);
}
-static struct vm_operations_struct ion_vma_ops = {
+static const struct vm_operations_struct ion_vma_ops = {
.open = ion_vm_open,
.close = ion_vm_close,
.fault = ion_vm_fault,
diff --git a/drivers/staging/board/armadillo800eva.c b/drivers/staging/board/armadillo800eva.c
index 81df77b..9c41652 100644
--- a/drivers/staging/board/armadillo800eva.c
+++ b/drivers/staging/board/armadillo800eva.c
@@ -91,7 +91,7 @@
.pdev = &lcdc0_device,
.clocks = lcdc0_clocks,
.nclocks = ARRAY_SIZE(lcdc0_clocks),
- .domain = "a4lc",
+ .domain = "/system-controller@e6180000/pm-domains/c5/a4lc@1"
},
};
diff --git a/drivers/staging/board/board.c b/drivers/staging/board/board.c
index 29d456e..3eb5eb8 100644
--- a/drivers/staging/board/board.c
+++ b/drivers/staging/board/board.c
@@ -135,6 +135,40 @@
return error;
}
+#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+static int board_staging_add_dev_domain(struct platform_device *pdev,
+ const char *domain)
+{
+ struct of_phandle_args pd_args;
+ struct generic_pm_domain *pd;
+ struct device_node *np;
+
+ np = of_find_node_by_path(domain);
+ if (!np) {
+ pr_err("Cannot find domain node %s\n", domain);
+ return -ENOENT;
+ }
+
+ pd_args.np = np;
+ pd_args.args_count = 0;
+ pd = of_genpd_get_from_provider(&pd_args);
+ if (IS_ERR(pd)) {
+ pr_err("Cannot find genpd %s (%ld)\n", domain, PTR_ERR(pd));
+ return PTR_ERR(pd);
+
+ }
+ pr_debug("Found genpd %s for device %s\n", pd->name, pdev->name);
+
+ return pm_genpd_add_device(pd, &pdev->dev);
+}
+#else
+static inline int board_staging_add_dev_domain(struct platform_device *pdev,
+ const char *domain)
+{
+ return 0;
+}
+#endif
+
int __init board_staging_register_device(const struct board_staging_dev *dev)
{
struct platform_device *pdev = dev->pdev;
@@ -161,7 +195,7 @@
}
if (dev->domain)
- __pm_genpd_name_add_device(dev->domain, &pdev->dev, NULL);
+ board_staging_add_dev_domain(pdev, dev->domain);
return error;
}
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index fd54d09..0e8a451 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -2156,7 +2156,7 @@
comedi_buf_map_put(bm);
}
-static struct vm_operations_struct comedi_vm_ops = {
+static const struct vm_operations_struct comedi_vm_ops = {
.open = comedi_vm_open,
.close = comedi_vm_close,
};
diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig
index cf5fe9b..d7f6235 100644
--- a/drivers/staging/rdma/Kconfig
+++ b/drivers/staging/rdma/Kconfig
@@ -24,6 +24,8 @@
source "drivers/staging/rdma/amso1100/Kconfig"
+source "drivers/staging/rdma/ehca/Kconfig"
+
source "drivers/staging/rdma/hfi1/Kconfig"
source "drivers/staging/rdma/ipath/Kconfig"
diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile
index cbd915a..139d78e 100644
--- a/drivers/staging/rdma/Makefile
+++ b/drivers/staging/rdma/Makefile
@@ -1,4 +1,5 @@
# Entries for RDMA_STAGING tree
obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/
+obj-$(CONFIG_INFINIBAND_EHCA) += ehca/
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig
similarity index 69%
rename from drivers/infiniband/hw/ehca/Kconfig
rename to drivers/staging/rdma/ehca/Kconfig
index 59f807d..3fadd2a 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/staging/rdma/ehca/Kconfig
@@ -2,7 +2,8 @@
tristate "eHCA support"
depends on IBMEBUS
---help---
- This driver supports the IBM pSeries eHCA InfiniBand adapter.
+ This driver supports the deprecated IBM pSeries eHCA InfiniBand
+ adapter.
To compile the driver as a module, choose M here. The module
will be called ib_ehca.
diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile
similarity index 100%
rename from drivers/infiniband/hw/ehca/Makefile
rename to drivers/staging/rdma/ehca/Makefile
diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO
new file mode 100644
index 0000000..199a4a6
--- /dev/null
+++ b/drivers/staging/rdma/ehca/TODO
@@ -0,0 +1,4 @@
+9/2015
+
+The ehca driver has been deprecated and moved to drivers/staging/rdma.
+It will be removed in the 4.6 merge window.
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_av.c
rename to drivers/staging/rdma/ehca/ehca_av.c
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_classes.h
rename to drivers/staging/rdma/ehca/ehca_classes.h
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
rename to drivers/staging/rdma/ehca/ehca_classes_pSeries.h
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_cq.c
rename to drivers/staging/rdma/ehca/ehca_cq.c
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_eq.c
rename to drivers/staging/rdma/ehca/ehca_eq.c
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_hca.c
rename to drivers/staging/rdma/ehca/ehca_hca.c
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_irq.c
rename to drivers/staging/rdma/ehca/ehca_irq.c
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_irq.h
rename to drivers/staging/rdma/ehca/ehca_irq.h
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_iverbs.h
rename to drivers/staging/rdma/ehca/ehca_iverbs.h
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_main.c
rename to drivers/staging/rdma/ehca/ehca_main.c
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_mcast.c
rename to drivers/staging/rdma/ehca/ehca_mcast.c
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_mrmw.c
rename to drivers/staging/rdma/ehca/ehca_mrmw.c
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_mrmw.h
rename to drivers/staging/rdma/ehca/ehca_mrmw.h
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_pd.c
rename to drivers/staging/rdma/ehca/ehca_pd.c
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_qes.h
rename to drivers/staging/rdma/ehca/ehca_qes.h
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_qp.c
rename to drivers/staging/rdma/ehca/ehca_qp.c
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_reqs.c
rename to drivers/staging/rdma/ehca/ehca_reqs.c
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_sqp.c
rename to drivers/staging/rdma/ehca/ehca_sqp.c
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_tools.h
rename to drivers/staging/rdma/ehca/ehca_tools.h
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ehca_uverbs.c
rename to drivers/staging/rdma/ehca/ehca_uverbs.c
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/hcp_if.c
rename to drivers/staging/rdma/ehca/hcp_if.c
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/hcp_if.h
rename to drivers/staging/rdma/ehca/hcp_if.h
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/hcp_phyp.c
rename to drivers/staging/rdma/ehca/hcp_phyp.c
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/hcp_phyp.h
rename to drivers/staging/rdma/ehca/hcp_phyp.h
diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/hipz_fns.h
rename to drivers/staging/rdma/ehca/hipz_fns.h
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/hipz_fns_core.h
rename to drivers/staging/rdma/ehca/hipz_fns_core.h
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/hipz_hw.h
rename to drivers/staging/rdma/ehca/hipz_hw.h
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c
similarity index 100%
rename from drivers/infiniband/hw/ehca/ipz_pt_fn.c
rename to drivers/staging/rdma/ehca/ipz_pt_fn.c
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h
similarity index 100%
rename from drivers/infiniband/hw/ehca/ipz_pt_fn.h
rename to drivers/staging/rdma/ehca/ipz_pt_fn.h
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index fd09290..342a07c 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -269,14 +269,14 @@
}
bool iscsit_check_np_match(
- struct __kernel_sockaddr_storage *sockaddr,
+ struct sockaddr_storage *sockaddr,
struct iscsi_np *np,
int network_transport)
{
struct sockaddr_in *sock_in, *sock_in_e;
struct sockaddr_in6 *sock_in6, *sock_in6_e;
bool ip_match = false;
- u16 port;
+ u16 port, port_e;
if (sockaddr->ss_family == AF_INET6) {
sock_in6 = (struct sockaddr_in6 *)sockaddr;
@@ -288,6 +288,7 @@
ip_match = true;
port = ntohs(sock_in6->sin6_port);
+ port_e = ntohs(sock_in6_e->sin6_port);
} else {
sock_in = (struct sockaddr_in *)sockaddr;
sock_in_e = (struct sockaddr_in *)&np->np_sockaddr;
@@ -296,9 +297,10 @@
ip_match = true;
port = ntohs(sock_in->sin_port);
+ port_e = ntohs(sock_in_e->sin_port);
}
- if (ip_match && (np->np_port == port) &&
+ if (ip_match && (port_e == port) &&
(np->np_network_transport == network_transport))
return true;
@@ -309,7 +311,7 @@
* Called with mutex np_lock held
*/
static struct iscsi_np *iscsit_get_np(
- struct __kernel_sockaddr_storage *sockaddr,
+ struct sockaddr_storage *sockaddr,
int network_transport)
{
struct iscsi_np *np;
@@ -340,12 +342,9 @@
}
struct iscsi_np *iscsit_add_np(
- struct __kernel_sockaddr_storage *sockaddr,
- char *ip_str,
+ struct sockaddr_storage *sockaddr,
int network_transport)
{
- struct sockaddr_in *sock_in;
- struct sockaddr_in6 *sock_in6;
struct iscsi_np *np;
int ret;
@@ -368,16 +367,6 @@
}
np->np_flags |= NPF_IP_NETWORK;
- if (sockaddr->ss_family == AF_INET6) {
- sock_in6 = (struct sockaddr_in6 *)sockaddr;
- snprintf(np->np_ip, IPV6_ADDRESS_SPACE, "%s", ip_str);
- np->np_port = ntohs(sock_in6->sin6_port);
- } else {
- sock_in = (struct sockaddr_in *)sockaddr;
- sprintf(np->np_ip, "%s", ip_str);
- np->np_port = ntohs(sock_in->sin_port);
- }
-
np->np_network_transport = network_transport;
spin_lock_init(&np->np_thread_lock);
init_completion(&np->np_restart_comp);
@@ -411,8 +400,8 @@
list_add_tail(&np->np_list, &g_np_list);
mutex_unlock(&np_lock);
- pr_debug("CORE[0] - Added Network Portal: %s:%hu on %s\n",
- np->np_ip, np->np_port, np->np_transport->name);
+ pr_debug("CORE[0] - Added Network Portal: %pISpc on %s\n",
+ &np->np_sockaddr, np->np_transport->name);
return np;
}
@@ -481,8 +470,8 @@
list_del(&np->np_list);
mutex_unlock(&np_lock);
- pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n",
- np->np_ip, np->np_port, np->np_transport->name);
+ pr_debug("CORE[0] - Removed Network Portal: %pISpc on %s\n",
+ &np->np_sockaddr, np->np_transport->name);
iscsit_put_transport(np->np_transport);
kfree(np);
@@ -1209,7 +1198,6 @@
u8 *pad_bytes)
{
u32 data_crc;
- u32 i;
struct scatterlist *sg;
unsigned int page_off;
@@ -1218,15 +1206,15 @@
sg = cmd->first_data_sg;
page_off = cmd->first_data_sg_off;
- i = 0;
while (data_length) {
- u32 cur_len = min_t(u32, data_length, (sg[i].length - page_off));
+ u32 cur_len = min_t(u32, data_length, (sg->length - page_off));
- crypto_hash_update(hash, &sg[i], cur_len);
+ crypto_hash_update(hash, sg, cur_len);
data_length -= cur_len;
page_off = 0;
- i++;
+ /* iscsit_map_iovec has already checked for invalid sg pointers */
+ sg = sg_next(sg);
}
if (padding) {
@@ -2556,7 +2544,7 @@
cmd->stat_sn = conn->stat_sn++;
hdr->statsn = cpu_to_be32(cmd->stat_sn);
hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
hdr->async_event = ISCSI_ASYNC_MSG_DROPPING_CONNECTION;
hdr->param1 = cpu_to_be16(cmd->logout_cid);
hdr->param2 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Wait);
@@ -2628,7 +2616,7 @@
hdr->statsn = cpu_to_be32(0xFFFFFFFF);
hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
hdr->datasn = cpu_to_be32(datain->data_sn);
hdr->offset = cpu_to_be32(datain->offset);
@@ -2839,7 +2827,7 @@
iscsit_increment_maxcmdsn(cmd, conn->sess);
hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
pr_debug("Built Logout Response ITT: 0x%08x StatSN:"
" 0x%08x Response: 0x%02x CID: %hu on CID: %hu\n",
@@ -2902,7 +2890,7 @@
iscsit_increment_maxcmdsn(cmd, conn->sess);
hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
pr_debug("Built NOPIN %s Response ITT: 0x%08x, TTT: 0x%08x,"
" StatSN: 0x%08x, Length %u\n", (nopout_response) ?
@@ -3049,7 +3037,7 @@
hdr->ttt = cpu_to_be32(r2t->targ_xfer_tag);
hdr->statsn = cpu_to_be32(conn->stat_sn);
hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
hdr->r2tsn = cpu_to_be32(r2t->r2t_sn);
hdr->data_offset = cpu_to_be32(r2t->offset);
hdr->data_length = cpu_to_be32(r2t->xfer_len);
@@ -3202,7 +3190,7 @@
iscsit_increment_maxcmdsn(cmd, conn->sess);
hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
pr_debug("Built SCSI Response, ITT: 0x%08x, StatSN: 0x%08x,"
" Response: 0x%02x, SAM Status: 0x%02x, CID: %hu\n",
@@ -3321,7 +3309,7 @@
iscsit_increment_maxcmdsn(cmd, conn->sess);
hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
pr_debug("Built Task Management Response ITT: 0x%08x,"
" StatSN: 0x%08x, Response: 0x%02x, CID: %hu\n",
@@ -3399,6 +3387,7 @@
int target_name_printed;
unsigned char buf[ISCSI_IQN_LEN+12]; /* iqn + "TargetName=" + \0 */
unsigned char *text_in = cmd->text_in_ptr, *text_ptr = NULL;
+ bool active;
buffer_len = min(conn->conn_ops->MaxRecvDataSegmentLength,
SENDTARGETS_BUF_LIMIT);
@@ -3452,19 +3441,18 @@
}
spin_lock(&tpg->tpg_state_lock);
- if ((tpg->tpg_state == TPG_STATE_FREE) ||
- (tpg->tpg_state == TPG_STATE_INACTIVE)) {
- spin_unlock(&tpg->tpg_state_lock);
- continue;
- }
+ active = (tpg->tpg_state == TPG_STATE_ACTIVE);
spin_unlock(&tpg->tpg_state_lock);
+ if (!active && tpg->tpg_attrib.tpg_enabled_sendtargets)
+ continue;
+
spin_lock(&tpg->tpg_np_lock);
list_for_each_entry(tpg_np, &tpg->tpg_gnp_list,
tpg_np_list) {
struct iscsi_np *np = tpg_np->tpg_np;
bool inaddr_any = iscsit_check_inaddr_any(np);
- char *fmt_str;
+ struct sockaddr_storage *sockaddr;
if (np->np_network_transport != network_transport)
continue;
@@ -3492,15 +3480,15 @@
}
}
- if (np->np_sockaddr.ss_family == AF_INET6)
- fmt_str = "TargetAddress=[%s]:%hu,%hu";
+ if (inaddr_any)
+ sockaddr = &conn->local_sockaddr;
else
- fmt_str = "TargetAddress=%s:%hu,%hu";
+ sockaddr = &np->np_sockaddr;
- len = sprintf(buf, fmt_str,
- inaddr_any ? conn->local_ip : np->np_ip,
- np->np_port,
- tpg->tpgt);
+ len = sprintf(buf, "TargetAddress="
+ "%pISpc,%hu",
+ sockaddr,
+ tpg->tpgt);
len += 1;
if ((len + payload_len) > buffer_len) {
@@ -3576,7 +3564,7 @@
*/
cmd->maxcmdsn_inc = 0;
hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
pr_debug("Built Text Response: ITT: 0x%08x, TTT: 0x%08x, StatSN: 0x%08x,"
" Length: %u, CID: %hu F: %d C: %d\n", cmd->init_task_tag,
@@ -3654,7 +3642,7 @@
cmd->stat_sn = conn->stat_sn++;
hdr->statsn = cpu_to_be32(cmd->stat_sn);
hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
}
EXPORT_SYMBOL(iscsit_build_reject);
diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h
index 7d0f9c0..4cf2c0f 100644
--- a/drivers/target/iscsi/iscsi_target.h
+++ b/drivers/target/iscsi/iscsi_target.h
@@ -10,10 +10,10 @@
extern void iscsit_login_kref_put(struct kref *);
extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *,
struct iscsi_tpg_np *);
-extern bool iscsit_check_np_match(struct __kernel_sockaddr_storage *,
+extern bool iscsit_check_np_match(struct sockaddr_storage *,
struct iscsi_np *, int);
-extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *,
- char *, int);
+extern struct iscsi_np *iscsit_add_np(struct sockaddr_storage *,
+ int);
extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *,
struct iscsi_portal_group *, bool);
extern int iscsit_del_np(struct iscsi_np *);
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index c1898c8..c7461d7 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -99,7 +99,7 @@
* Use existing np->np_sockaddr for SCTP network portal reference
*/
tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr,
- np->np_ip, tpg_np, ISCSI_SCTP_TCP);
+ tpg_np, ISCSI_SCTP_TCP);
if (!tpg_np_sctp || IS_ERR(tpg_np_sctp))
goto out;
} else {
@@ -177,7 +177,7 @@
}
tpg_np_iser = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr,
- np->np_ip, tpg_np, ISCSI_INFINIBAND);
+ tpg_np, ISCSI_INFINIBAND);
if (IS_ERR(tpg_np_iser)) {
rc = PTR_ERR(tpg_np_iser);
goto out;
@@ -220,7 +220,7 @@
struct iscsi_portal_group *tpg;
struct iscsi_tpg_np *tpg_np;
char *str, *str2, *ip_str, *port_str;
- struct __kernel_sockaddr_storage sockaddr;
+ struct sockaddr_storage sockaddr;
struct sockaddr_in *sock_in;
struct sockaddr_in6 *sock_in6;
unsigned long port;
@@ -235,7 +235,7 @@
memset(buf, 0, MAX_PORTAL_LEN + 1);
snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name);
- memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
+ memset(&sockaddr, 0, sizeof(struct sockaddr_storage));
str = strstr(buf, "[");
if (str) {
@@ -248,8 +248,8 @@
return ERR_PTR(-EINVAL);
}
str++; /* Skip over leading "[" */
- *str2 = '\0'; /* Terminate the IPv6 address */
- str2++; /* Skip over the "]" */
+ *str2 = '\0'; /* Terminate the unbracketed IPv6 address */
+ str2++; /* Skip over the \0 */
port_str = strstr(str2, ":");
if (!port_str) {
pr_err("Unable to locate \":port\""
@@ -267,7 +267,7 @@
sock_in6 = (struct sockaddr_in6 *)&sockaddr;
sock_in6->sin6_family = AF_INET6;
sock_in6->sin6_port = htons((unsigned short)port);
- ret = in6_pton(str, IPV6_ADDRESS_SPACE,
+ ret = in6_pton(str, -1,
(void *)&sock_in6->sin6_addr.in6_u, -1, &end);
if (ret <= 0) {
pr_err("in6_pton returned: %d\n", ret);
@@ -316,7 +316,7 @@
* sys/kernel/config/iscsi/$IQN/$TPG/np/$IP:$PORT/
*
*/
- tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, str, NULL,
+ tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, NULL,
ISCSI_TCP);
if (IS_ERR(tpg_np)) {
iscsit_put_tpg(tpg);
@@ -344,8 +344,8 @@
se_tpg = &tpg->tpg_se_tpg;
pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu"
- " PORTAL: %s:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item),
- tpg->tpgt, tpg_np->tpg_np->np_ip, tpg_np->tpg_np->np_port);
+ " PORTAL: %pISpc\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item),
+ tpg->tpgt, &tpg_np->tpg_np->np_sockaddr);
ret = iscsit_tpg_del_network_portal(tpg, tpg_np);
if (ret < 0)
@@ -656,6 +656,7 @@
struct iscsi_conn *conn;
struct se_session *se_sess;
ssize_t rb = 0;
+ u32 max_cmd_sn;
spin_lock_bh(&se_nacl->nacl_sess_lock);
se_sess = se_nacl->nacl_sess;
@@ -703,11 +704,12 @@
" Values]-----------------------\n");
rb += sprintf(page+rb, " CmdSN/WR : CmdSN/WC : ExpCmdSN"
" : MaxCmdSN : ITT : TTT\n");
+ max_cmd_sn = (u32) atomic_read(&sess->max_cmd_sn);
rb += sprintf(page+rb, " 0x%08x 0x%08x 0x%08x 0x%08x"
" 0x%08x 0x%08x\n",
sess->cmdsn_window,
- (sess->max_cmd_sn - sess->exp_cmd_sn) + 1,
- sess->exp_cmd_sn, sess->max_cmd_sn,
+ (max_cmd_sn - sess->exp_cmd_sn) + 1,
+ sess->exp_cmd_sn, max_cmd_sn,
sess->init_task_tag, sess->targ_xfer_tag);
rb += sprintf(page+rb, "----------------------[iSCSI"
" Connections]-------------------------\n");
@@ -751,7 +753,7 @@
break;
}
- rb += sprintf(page+rb, " Address %s %s", conn->login_ip,
+ rb += sprintf(page+rb, " Address %pISc %s", &conn->login_sockaddr,
(conn->network_transport == ISCSI_TCP) ?
"TCP" : "SCTP");
rb += sprintf(page+rb, " StatSN: 0x%08x\n",
@@ -1010,6 +1012,11 @@
*/
DEF_TPG_ATTRIB(fabric_prot_type);
TPG_ATTR(fabric_prot_type, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_tpg_enabled_sendtargets
+ */
+DEF_TPG_ATTRIB(tpg_enabled_sendtargets);
+TPG_ATTR(tpg_enabled_sendtargets, S_IRUGO | S_IWUSR);
static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
&iscsi_tpg_attrib_authentication.attr,
@@ -1024,6 +1031,7 @@
&iscsi_tpg_attrib_default_erl.attr,
&iscsi_tpg_attrib_t10_pi.attr,
&iscsi_tpg_attrib_fabric_prot_type.attr,
+ &iscsi_tpg_attrib_tpg_enabled_sendtargets.attr,
NULL,
};
diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c
index 5fabcd3..0382fa2 100644
--- a/drivers/target/iscsi/iscsi_target_device.c
+++ b/drivers/target/iscsi/iscsi_target_device.c
@@ -47,19 +47,19 @@
* core_set_queue_depth_for_node().
*/
sess->cmdsn_window = se_nacl->queue_depth;
- sess->max_cmd_sn = (sess->max_cmd_sn + se_nacl->queue_depth) - 1;
+ atomic_add(se_nacl->queue_depth - 1, &sess->max_cmd_sn);
}
void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess)
{
+ u32 max_cmd_sn;
+
if (cmd->immediate_cmd || cmd->maxcmdsn_inc)
return;
cmd->maxcmdsn_inc = 1;
- mutex_lock(&sess->cmdsn_mutex);
- sess->max_cmd_sn += 1;
- pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn);
- mutex_unlock(&sess->cmdsn_mutex);
+ max_cmd_sn = atomic_inc_return(&sess->max_cmd_sn);
+ pr_debug("Updated MaxCmdSN to 0x%08x\n", max_cmd_sn);
}
EXPORT_SYMBOL(iscsit_increment_maxcmdsn);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 7e8f65e..96e78c8 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -331,7 +331,7 @@
* The FFP CmdSN window values will be allocated from the TPG's
* Initiator Node's ACL once the login has been successfully completed.
*/
- sess->max_cmd_sn = be32_to_cpu(pdu->cmdsn);
+ atomic_set(&sess->max_cmd_sn, be32_to_cpu(pdu->cmdsn));
sess->sess_ops = kzalloc(sizeof(struct iscsi_sess_ops), GFP_KERNEL);
if (!sess->sess_ops) {
@@ -729,9 +729,9 @@
stop_timer = 1;
}
- pr_debug("iSCSI Login successful on CID: %hu from %s to"
- " %s:%hu,%hu\n", conn->cid, conn->login_ip,
- conn->local_ip, conn->local_port, tpg->tpgt);
+ pr_debug("iSCSI Login successful on CID: %hu from %pISpc to"
+ " %pISpc,%hu\n", conn->cid, &conn->login_sockaddr,
+ &conn->local_sockaddr, tpg->tpgt);
list_add_tail(&conn->conn_list, &sess->sess_conn_list);
atomic_inc(&sess->nconn);
@@ -776,8 +776,8 @@
pr_debug("Moving to TARG_SESS_STATE_LOGGED_IN.\n");
sess->session_state = TARG_SESS_STATE_LOGGED_IN;
- pr_debug("iSCSI Login successful on CID: %hu from %s to %s:%hu,%hu\n",
- conn->cid, conn->login_ip, conn->local_ip, conn->local_port,
+ pr_debug("iSCSI Login successful on CID: %hu from %pISpc to %pISpc,%hu\n",
+ conn->cid, &conn->login_sockaddr, &conn->local_sockaddr,
tpg->tpgt);
spin_lock_bh(&sess->conn_lock);
@@ -823,8 +823,8 @@
struct iscsi_np *np = (struct iscsi_np *) data;
spin_lock_bh(&np->np_thread_lock);
- pr_err("iSCSI Login timeout on Network Portal %s:%hu\n",
- np->np_ip, np->np_port);
+ pr_err("iSCSI Login timeout on Network Portal %pISpc\n",
+ &np->np_sockaddr);
if (np->np_login_timer_flags & ISCSI_TF_STOP) {
spin_unlock_bh(&np->np_thread_lock);
@@ -877,7 +877,7 @@
int iscsit_setup_np(
struct iscsi_np *np,
- struct __kernel_sockaddr_storage *sockaddr)
+ struct sockaddr_storage *sockaddr)
{
struct socket *sock = NULL;
int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len;
@@ -916,7 +916,7 @@
* in iscsi_target_configfs.c code..
*/
memcpy(&np->np_sockaddr, sockaddr,
- sizeof(struct __kernel_sockaddr_storage));
+ sizeof(struct sockaddr_storage));
if (sockaddr->ss_family == AF_INET6)
len = sizeof(struct sockaddr_in6);
@@ -975,7 +975,7 @@
int iscsi_target_setup_login_socket(
struct iscsi_np *np,
- struct __kernel_sockaddr_storage *sockaddr)
+ struct sockaddr_storage *sockaddr)
{
struct iscsit_transport *t;
int rc;
@@ -1015,44 +1015,42 @@
rc = conn->sock->ops->getname(conn->sock,
(struct sockaddr *)&sock_in6, &err, 1);
if (!rc) {
- if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr))
- snprintf(conn->login_ip, sizeof(conn->login_ip), "[%pI6c]",
- &sock_in6.sin6_addr.in6_u);
- else
- snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI4",
- &sock_in6.sin6_addr.s6_addr32[3]);
- conn->login_port = ntohs(sock_in6.sin6_port);
+ if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
+ memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6));
+ } else {
+ /* Pretend to be an ipv4 socket */
+ sock_in.sin_family = AF_INET;
+ sock_in.sin_port = sock_in6.sin6_port;
+ memcpy(&sock_in.sin_addr, &sock_in6.sin6_addr.s6_addr32[3], 4);
+ memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in));
+ }
}
rc = conn->sock->ops->getname(conn->sock,
(struct sockaddr *)&sock_in6, &err, 0);
if (!rc) {
- if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr))
- snprintf(conn->local_ip, sizeof(conn->local_ip), "[%pI6c]",
- &sock_in6.sin6_addr.in6_u);
- else
- snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI4",
- &sock_in6.sin6_addr.s6_addr32[3]);
- conn->local_port = ntohs(sock_in6.sin6_port);
+ if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
+ memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6));
+ } else {
+ /* Pretend to be an ipv4 socket */
+ sock_in.sin_family = AF_INET;
+ sock_in.sin_port = sock_in6.sin6_port;
+ memcpy(&sock_in.sin_addr, &sock_in6.sin6_addr.s6_addr32[3], 4);
+ memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in));
+ }
}
} else {
memset(&sock_in, 0, sizeof(struct sockaddr_in));
rc = conn->sock->ops->getname(conn->sock,
(struct sockaddr *)&sock_in, &err, 1);
- if (!rc) {
- sprintf(conn->login_ip, "%pI4",
- &sock_in.sin_addr.s_addr);
- conn->login_port = ntohs(sock_in.sin_port);
- }
+ if (!rc)
+ memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in));
rc = conn->sock->ops->getname(conn->sock,
(struct sockaddr *)&sock_in, &err, 0);
- if (!rc) {
- sprintf(conn->local_ip, "%pI4",
- &sock_in.sin_addr.s_addr);
- conn->local_port = ntohs(sock_in.sin_port);
- }
+ if (!rc)
+ memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in));
}
return 0;
@@ -1302,8 +1300,8 @@
spin_lock_bh(&np->np_thread_lock);
if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
spin_unlock_bh(&np->np_thread_lock);
- pr_err("iSCSI Network Portal on %s:%hu currently not"
- " active.\n", np->np_ip, np->np_port);
+ pr_err("iSCSI Network Portal on %pISpc currently not"
+ " active.\n", &np->np_sockaddr);
iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
goto new_sess_out;
@@ -1312,9 +1310,9 @@
conn->network_transport = np->np_network_transport;
- pr_debug("Received iSCSI login request from %s on %s Network"
- " Portal %s:%hu\n", conn->login_ip, np->np_transport->name,
- conn->local_ip, conn->local_port);
+ pr_debug("Received iSCSI login request from %pISpc on %s Network"
+ " Portal %pISpc\n", &conn->login_sockaddr, np->np_transport->name,
+ &conn->local_sockaddr);
pr_debug("Moving to TARG_CONN_STATE_IN_LOGIN.\n");
conn->conn_state = TARG_CONN_STATE_IN_LOGIN;
diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h
index 57aa0d0..b597aa2 100644
--- a/drivers/target/iscsi/iscsi_target_login.h
+++ b/drivers/target/iscsi/iscsi_target_login.h
@@ -5,9 +5,9 @@
extern int iscsi_check_for_session_reinstatement(struct iscsi_conn *);
extern int iscsi_login_post_auth_non_zero_tsih(struct iscsi_conn *, u16, u32);
extern int iscsit_setup_np(struct iscsi_np *,
- struct __kernel_sockaddr_storage *);
+ struct sockaddr_storage *);
extern int iscsi_target_setup_login_socket(struct iscsi_np *,
- struct __kernel_sockaddr_storage *);
+ struct sockaddr_storage *);
extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *);
extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *);
extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32);
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index f9cde91..5c964c0 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -341,7 +341,6 @@
static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
{
u32 padding = 0;
- struct iscsi_session *sess = conn->sess;
struct iscsi_login_rsp *login_rsp;
login_rsp = (struct iscsi_login_rsp *) login->rsp;
@@ -353,7 +352,7 @@
login_rsp->itt = login->init_task_tag;
login_rsp->statsn = cpu_to_be32(conn->stat_sn++);
login_rsp->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
- login_rsp->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ login_rsp->max_cmdsn = cpu_to_be32((u32) atomic_read(&conn->sess->max_cmd_sn));
pr_debug("Sending Login Response, Flags: 0x%02x, ITT: 0x%08x,"
" ExpCmdSN; 0x%08x, MaxCmdSN: 0x%08x, StatSN: 0x%08x, Length:"
@@ -382,10 +381,6 @@
goto err;
login->rsp_length = 0;
- mutex_lock(&sess->cmdsn_mutex);
- login_rsp->exp_cmdsn = cpu_to_be32(sess->exp_cmd_sn);
- login_rsp->max_cmdsn = cpu_to_be32(sess->max_cmd_sn);
- mutex_unlock(&sess->cmdsn_mutex);
return 0;
diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c
index 5e1349a..9dd94ff 100644
--- a/drivers/target/iscsi/iscsi_target_stat.c
+++ b/drivers/target/iscsi/iscsi_target_stat.c
@@ -430,7 +430,7 @@
int ret;
spin_lock(&lstat->lock);
- ret = snprintf(page, PAGE_SIZE, "%s\n", lstat->last_intr_fail_ip_addr);
+ ret = snprintf(page, PAGE_SIZE, "%pISc\n", &lstat->last_intr_fail_sockaddr);
spin_unlock(&lstat->lock);
return ret;
diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c
index cf59c39..11320df 100644
--- a/drivers/target/iscsi/iscsi_target_tmr.c
+++ b/drivers/target/iscsi/iscsi_target_tmr.c
@@ -50,7 +50,7 @@
pr_err("Unable to locate RefTaskTag: 0x%08x on CID:"
" %hu.\n", hdr->rtt, conn->cid);
return (iscsi_sna_gte(be32_to_cpu(hdr->refcmdsn), conn->sess->exp_cmd_sn) &&
- iscsi_sna_lte(be32_to_cpu(hdr->refcmdsn), conn->sess->max_cmd_sn)) ?
+ iscsi_sna_lte(be32_to_cpu(hdr->refcmdsn), (u32) atomic_read(&conn->sess->max_cmd_sn))) ?
ISCSI_TMF_RSP_COMPLETE : ISCSI_TMF_RSP_NO_TASK;
}
if (ref_cmd->cmd_sn != be32_to_cpu(hdr->refcmdsn)) {
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index 968068f..23c95cd 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -226,6 +226,7 @@
a->default_erl = TA_DEFAULT_ERL;
a->t10_pi = TA_DEFAULT_T10_PI;
a->fabric_prot_type = TA_DEFAULT_FABRIC_PROT_TYPE;
+ a->tpg_enabled_sendtargets = TA_DEFAULT_TPG_ENABLED_SENDTARGETS;
}
int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg)
@@ -430,7 +431,7 @@
static bool iscsit_tpg_check_network_portal(
struct iscsi_tiqn *tiqn,
- struct __kernel_sockaddr_storage *sockaddr,
+ struct sockaddr_storage *sockaddr,
int network_transport)
{
struct iscsi_portal_group *tpg;
@@ -459,8 +460,7 @@
struct iscsi_tpg_np *iscsit_tpg_add_network_portal(
struct iscsi_portal_group *tpg,
- struct __kernel_sockaddr_storage *sockaddr,
- char *ip_str,
+ struct sockaddr_storage *sockaddr,
struct iscsi_tpg_np *tpg_np_parent,
int network_transport)
{
@@ -470,8 +470,8 @@
if (!tpg_np_parent) {
if (iscsit_tpg_check_network_portal(tpg->tpg_tiqn, sockaddr,
network_transport)) {
- pr_err("Network Portal: %s already exists on a"
- " different TPG on %s\n", ip_str,
+ pr_err("Network Portal: %pISc already exists on a"
+ " different TPG on %s\n", sockaddr,
tpg->tpg_tiqn->tiqn);
return ERR_PTR(-EEXIST);
}
@@ -484,7 +484,7 @@
return ERR_PTR(-ENOMEM);
}
- np = iscsit_add_np(sockaddr, ip_str, network_transport);
+ np = iscsit_add_np(sockaddr, network_transport);
if (IS_ERR(np)) {
kfree(tpg_np);
return ERR_CAST(np);
@@ -514,8 +514,8 @@
spin_unlock(&tpg_np_parent->tpg_np_parent_lock);
}
- pr_debug("CORE[%s] - Added Network Portal: %s:%hu,%hu on %s\n",
- tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt,
+ pr_debug("CORE[%s] - Added Network Portal: %pISpc,%hu on %s\n",
+ tpg->tpg_tiqn->tiqn, &np->np_sockaddr, tpg->tpgt,
np->np_transport->name);
return tpg_np;
@@ -528,8 +528,8 @@
{
iscsit_clear_tpg_np_login_thread(tpg_np, tpg, true);
- pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n",
- tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt,
+ pr_debug("CORE[%s] - Removed Network Portal: %pISpc,%hu on %s\n",
+ tpg->tpg_tiqn->tiqn, &np->np_sockaddr, tpg->tpgt,
np->np_transport->name);
tpg_np->tpg_np = NULL;
@@ -892,3 +892,21 @@
return 0;
}
+
+int iscsit_ta_tpg_enabled_sendtargets(
+ struct iscsi_portal_group *tpg,
+ u32 flag)
+{
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ if ((flag != 0) && (flag != 1)) {
+ pr_err("Illegal value %d\n", flag);
+ return -EINVAL;
+ }
+
+ a->tpg_enabled_sendtargets = flag;
+ pr_debug("iSCSI_TPG[%hu] - TPG enabled bit required for SendTargets:"
+ " %s\n", tpg->tpgt, (a->tpg_enabled_sendtargets) ? "ON" : "OFF");
+
+ return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h
index 95ff5bd..9db32bd 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.h
+++ b/drivers/target/iscsi/iscsi_target_tpg.h
@@ -22,7 +22,7 @@
extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *);
extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int);
extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *,
- struct __kernel_sockaddr_storage *, char *, struct iscsi_tpg_np *,
+ struct sockaddr_storage *, struct iscsi_tpg_np *,
int);
extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *,
struct iscsi_tpg_np *);
@@ -40,5 +40,6 @@
extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32);
extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32);
extern int iscsit_ta_fabric_prot_type(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_tpg_enabled_sendtargets(struct iscsi_portal_group *, u32);
#endif /* ISCSI_TARGET_TPG_H */
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index a2bff07..428b0d9 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -233,6 +233,7 @@
static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cmdsn)
{
+ u32 max_cmdsn;
int ret;
/*
@@ -241,10 +242,10 @@
* or order CmdSNs due to multiple connection sessions and/or
* CRC failures.
*/
- if (iscsi_sna_gt(cmdsn, sess->max_cmd_sn)) {
+ max_cmdsn = atomic_read(&sess->max_cmd_sn);
+ if (iscsi_sna_gt(cmdsn, max_cmdsn)) {
pr_err("Received CmdSN: 0x%08x is greater than"
- " MaxCmdSN: 0x%08x, ignoring.\n", cmdsn,
- sess->max_cmd_sn);
+ " MaxCmdSN: 0x%08x, ignoring.\n", cmdsn, max_cmdsn);
ret = CMDSN_MAXCMDSN_OVERRUN;
} else if (cmdsn == sess->exp_cmd_sn) {
@@ -1371,6 +1372,33 @@
return iscsit_do_tx_data(conn, &c);
}
+static bool sockaddr_equal(struct sockaddr_storage *x, struct sockaddr_storage *y)
+{
+ switch (x->ss_family) {
+ case AF_INET: {
+ struct sockaddr_in *sinx = (struct sockaddr_in *)x;
+ struct sockaddr_in *siny = (struct sockaddr_in *)y;
+ if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
+ return false;
+ if (sinx->sin_port != siny->sin_port)
+ return false;
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
+ struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
+ if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
+ return false;
+ if (sinx->sin6_port != siny->sin6_port)
+ return false;
+ break;
+ }
+ default:
+ return false;
+ }
+ return true;
+}
+
void iscsit_collect_login_stats(
struct iscsi_conn *conn,
u8 status_class,
@@ -1387,7 +1415,7 @@
ls = &tiqn->login_stats;
spin_lock(&ls->lock);
- if (!strcmp(conn->login_ip, ls->last_intr_fail_ip_addr) &&
+ if (sockaddr_equal(&conn->login_sockaddr, &ls->last_intr_fail_sockaddr) &&
((get_jiffies_64() - ls->last_fail_time) < 10)) {
/* We already have the failure info for this login */
spin_unlock(&ls->lock);
@@ -1427,8 +1455,7 @@
ls->last_intr_fail_ip_family = conn->login_family;
- snprintf(ls->last_intr_fail_ip_addr, IPV6_ADDRESS_SPACE,
- "%s", conn->login_ip);
+ ls->last_intr_fail_sockaddr = conn->login_sockaddr;
ls->last_fail_time = get_jiffies_64();
}
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index a556bde..5bc85ff 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -526,7 +526,7 @@
static char *tcm_loop_get_endpoint_wwn(struct se_portal_group *se_tpg)
{
/*
- * Return the passed NAA identifier for the SAS Target Port
+ * Return the passed NAA identifier for the Target Port
*/
return &tl_tpg(se_tpg)->tl_hba->tl_wwn_address[0];
}
@@ -845,7 +845,7 @@
transport_free_session(tl_nexus->se_sess);
goto out;
}
- /* Now, register the SAS I_T Nexus as active. */
+ /* Now, register the I_T Nexus as active. */
transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl,
tl_nexus->se_sess, tl_nexus);
tl_tpg->tl_nexus = tl_nexus;
@@ -884,7 +884,7 @@
" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tpg->tl_hba),
tl_nexus->se_sess->se_node_acl->initiatorname);
/*
- * Release the SCSI I_T Nexus to the emulated SAS Target Port
+ * Release the SCSI I_T Nexus to the emulated Target Port
*/
transport_deregister_session(tl_nexus->se_sess);
tpg->tl_nexus = NULL;
@@ -1034,6 +1034,11 @@
}
if (!strncmp(page, "offline", 7)) {
tl_tpg->tl_transport_status = TCM_TRANSPORT_OFFLINE;
+ if (tl_tpg->tl_nexus) {
+ struct se_session *tl_sess = tl_tpg->tl_nexus->se_sess;
+
+ core_allocate_nexus_loss_ua(tl_sess->se_node_acl);
+ }
return count;
}
return -EINVAL;
@@ -1077,7 +1082,7 @@
tl_tpg->tl_hba = tl_hba;
tl_tpg->tl_tpgt = tpgt;
/*
- * Register the tl_tpg as a emulated SAS TCM Target Endpoint
+ * Register the tl_tpg as a emulated TCM Target Endpoint
*/
ret = core_tpg_register(wwn, &tl_tpg->tl_se_tpg, tl_hba->tl_proto_id);
if (ret < 0)
@@ -1102,11 +1107,11 @@
tl_hba = tl_tpg->tl_hba;
tpgt = tl_tpg->tl_tpgt;
/*
- * Release the I_T Nexus for the Virtual SAS link if present
+ * Release the I_T Nexus for the Virtual target link if present
*/
tcm_loop_drop_nexus(tl_tpg);
/*
- * Deregister the tl_tpg as a emulated SAS TCM Target Endpoint
+ * Deregister the tl_tpg as a emulated TCM Target Endpoint
*/
core_tpg_deregister(se_tpg);
@@ -1199,8 +1204,9 @@
struct tcm_loop_hba, tl_hba_wwn);
pr_debug("TCM_Loop_ConfigFS: Deallocating emulated Target"
- " SAS Address: %s at Linux/SCSI Host ID: %d\n",
- tl_hba->tl_wwn_address, tl_hba->sh->host_no);
+ " %s Address: %s at Linux/SCSI Host ID: %d\n",
+ tcm_loop_dump_proto_id(tl_hba), tl_hba->tl_wwn_address,
+ tl_hba->sh->host_no);
/*
* Call device_unregister() on the original tl_hba->dev.
* tcm_loop_fabric_scsi.c:tcm_loop_release_adapter() will
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 09e682b..dcc424a 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -620,8 +620,6 @@
lacl->mapped_lun = mapped_lun;
lacl->se_lun_nacl = nacl;
- snprintf(lacl->initiatorname, TRANSPORT_IQN_LEN, "%s",
- nacl->initiatorname);
return lacl;
}
@@ -656,7 +654,7 @@
" InitiatorNode: %s\n", tpg->se_tpg_tfo->get_fabric_name(),
tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->mapped_lun,
(lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) ? "RW" : "RO",
- lacl->initiatorname);
+ nacl->initiatorname);
/*
* Check to see if there are any existing persistent reservation APTPL
* pre-registrations that need to be enabled for this LUN ACL..
@@ -688,7 +686,7 @@
" InitiatorNode: %s Mapped LUN: %llu\n",
tpg->se_tpg_tfo->get_fabric_name(),
tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun,
- lacl->initiatorname, lacl->mapped_lun);
+ nacl->initiatorname, lacl->mapped_lun);
return 0;
}
@@ -701,7 +699,7 @@
" Mapped LUN: %llu\n", tpg->se_tpg_tfo->get_fabric_name(),
tpg->se_tpg_tfo->tpg_get_tag(tpg),
tpg->se_tpg_tfo->get_fabric_name(),
- lacl->initiatorname, lacl->mapped_lun);
+ lacl->se_lun_nacl->initiatorname, lacl->mapped_lun);
kfree(lacl);
}
@@ -754,7 +752,7 @@
dev->dev_link_magic = SE_DEV_LINK_MAGIC;
dev->se_hba = hba;
dev->transport = hba->backend->ops;
- dev->prot_length = sizeof(struct se_dif_v1_tuple);
+ dev->prot_length = sizeof(struct t10_pi_tuple);
dev->hba_index = hba->hba_index;
INIT_LIST_HEAD(&dev->dev_list);
@@ -771,7 +769,6 @@
spin_lock_init(&dev->se_tmr_lock);
spin_lock_init(&dev->qf_cmd_lock);
sema_init(&dev->caw_sem, 1);
- atomic_set(&dev->dev_ordered_id, 0);
INIT_LIST_HEAD(&dev->t10_wwn.t10_vpd_list);
spin_lock_init(&dev->t10_wwn.t10_vpd_lock);
INIT_LIST_HEAD(&dev->t10_pr.registration_list);
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 48a3698..be42429 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -203,7 +203,7 @@
pr_debug("%s_ConfigFS: Changed Initiator ACL: %s"
" Mapped LUN: %llu Write Protect bit to %s\n",
se_tpg->se_tpg_tfo->get_fabric_name(),
- lacl->initiatorname, lacl->mapped_lun, (op) ? "ON" : "OFF");
+ se_nacl->initiatorname, lacl->mapped_lun, (op) ? "ON" : "OFF");
return count;
diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c
index be9cefc..9522960 100644
--- a/drivers/target/target_core_hba.c
+++ b/drivers/target/target_core_hba.c
@@ -184,3 +184,8 @@
kfree(hba);
return 0;
}
+
+bool target_sense_desc_format(struct se_device *dev)
+{
+ return dev->transport->get_blocks(dev) > U32_MAX;
+}
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index e318ddb..0b4b2a6 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -154,6 +154,38 @@
return 0;
}
+static sense_reason_t
+sbc_emulate_startstop(struct se_cmd *cmd)
+{
+ unsigned char *cdb = cmd->t_task_cdb;
+
+ /*
+ * See sbc3r36 section 5.25
+ * Immediate bit should be set since there is nothing to complete
+ * POWER CONDITION MODIFIER 0h
+ */
+ if (!(cdb[1] & 1) || cdb[2] || cdb[3])
+ return TCM_INVALID_CDB_FIELD;
+
+ /*
+ * See sbc3r36 section 5.25
+ * POWER CONDITION 0h START_VALID - process START and LOEJ
+ */
+ if (cdb[4] >> 4 & 0xf)
+ return TCM_INVALID_CDB_FIELD;
+
+ /*
+ * See sbc3r36 section 5.25
+ * LOEJ 0h - nothing to load or unload
+ * START 1h - we are ready
+ */
+ if (!(cdb[4] & 1) || (cdb[4] & 2) || (cdb[4] & 4))
+ return TCM_INVALID_CDB_FIELD;
+
+ target_complete_cmd(cmd, SAM_STAT_GOOD);
+ return 0;
+}
+
sector_t sbc_get_write_same_sectors(struct se_cmd *cmd)
{
u32 num_blocks;
@@ -960,6 +992,9 @@
" than 1\n", sectors);
return TCM_INVALID_CDB_FIELD;
}
+ if (sbc_check_dpofua(dev, cmd, cdb))
+ return TCM_INVALID_CDB_FIELD;
+
/*
* Double size because we have two buffers, note that
* zero is not an error..
@@ -1069,6 +1104,10 @@
size = 0;
cmd->execute_cmd = sbc_emulate_noop;
break;
+ case START_STOP:
+ size = 0;
+ cmd->execute_cmd = sbc_emulate_startstop;
+ break;
default:
ret = spc_parse_cdb(cmd, &size);
if (ret)
@@ -1191,7 +1230,7 @@
sbc_dif_generate(struct se_cmd *cmd)
{
struct se_device *dev = cmd->se_dev;
- struct se_dif_v1_tuple *sdt;
+ struct t10_pi_tuple *sdt;
struct scatterlist *dsg = cmd->t_data_sg, *psg;
sector_t sector = cmd->t_task_lba;
void *daddr, *paddr;
@@ -1203,7 +1242,7 @@
daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
for (j = 0; j < psg->length;
- j += sizeof(struct se_dif_v1_tuple)) {
+ j += sizeof(*sdt)) {
__u16 crc;
unsigned int avail;
@@ -1256,7 +1295,7 @@
}
static sense_reason_t
-sbc_dif_v1_verify(struct se_cmd *cmd, struct se_dif_v1_tuple *sdt,
+sbc_dif_v1_verify(struct se_cmd *cmd, struct t10_pi_tuple *sdt,
__u16 crc, sector_t sector, unsigned int ei_lba)
{
__be16 csum;
@@ -1346,7 +1385,7 @@
unsigned int ei_lba, struct scatterlist *psg, int psg_off)
{
struct se_device *dev = cmd->se_dev;
- struct se_dif_v1_tuple *sdt;
+ struct t10_pi_tuple *sdt;
struct scatterlist *dsg = cmd->t_data_sg;
sector_t sector = start;
void *daddr, *paddr;
@@ -1361,7 +1400,7 @@
for (i = psg_off; i < psg->length &&
sector < start + sectors;
- i += sizeof(struct se_dif_v1_tuple)) {
+ i += sizeof(*sdt)) {
__u16 crc;
unsigned int avail;
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index f87d4ce..9413e1a 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -484,8 +484,8 @@
spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf)
{
struct se_device *dev = cmd->se_dev;
- int have_tp = 0;
- int opt, min;
+ u32 mtl = 0;
+ int have_tp = 0, opt, min;
/*
* Following spc3r22 section 6.5.3 Block Limits VPD page, when
@@ -516,8 +516,15 @@
/*
* Set MAXIMUM TRANSFER LENGTH
+ *
+ * XXX: Currently assumes single PAGE_SIZE per scatterlist for fabrics
+ * enforcing maximum HW scatter-gather-list entry limit
*/
- put_unaligned_be32(dev->dev_attrib.hw_max_sectors, &buf[8]);
+ if (cmd->se_tfo->max_data_sg_nents) {
+ mtl = (cmd->se_tfo->max_data_sg_nents * PAGE_SIZE) /
+ dev->dev_attrib.block_size;
+ }
+ put_unaligned_be32(min_not_zero(mtl, dev->dev_attrib.hw_max_sectors), &buf[8]);
/*
* Set OPTIMAL TRANSFER LENGTH
@@ -768,7 +775,12 @@
if (pc == 1)
goto out;
- p[2] = 2;
+ /* GLTSD: No implicit save of log parameters */
+ p[2] = (1 << 1);
+ if (target_sense_desc_format(dev))
+ /* D_SENSE: Descriptor format sense data for 64bit sectors */
+ p[2] |= (1 << 2);
+
/*
* From spc4r23, 7.4.7 Control mode page
*
@@ -1151,6 +1163,7 @@
unsigned char *rbuf;
u8 ua_asc = 0, ua_ascq = 0;
unsigned char buf[SE_SENSE_BUF];
+ bool desc_format = target_sense_desc_format(cmd->se_dev);
memset(buf, 0, SE_SENSE_BUF);
@@ -1164,32 +1177,11 @@
if (!rbuf)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
- if (!core_scsi3_ua_clear_for_request_sense(cmd, &ua_asc, &ua_ascq)) {
- /*
- * CURRENT ERROR, UNIT ATTENTION
- */
- buf[0] = 0x70;
- buf[SPC_SENSE_KEY_OFFSET] = UNIT_ATTENTION;
-
- /*
- * The Additional Sense Code (ASC) from the UNIT ATTENTION
- */
- buf[SPC_ASC_KEY_OFFSET] = ua_asc;
- buf[SPC_ASCQ_KEY_OFFSET] = ua_ascq;
- buf[7] = 0x0A;
- } else {
- /*
- * CURRENT ERROR, NO SENSE
- */
- buf[0] = 0x70;
- buf[SPC_SENSE_KEY_OFFSET] = NO_SENSE;
-
- /*
- * NO ADDITIONAL SENSE INFORMATION
- */
- buf[SPC_ASC_KEY_OFFSET] = 0x00;
- buf[7] = 0x0A;
- }
+ if (!core_scsi3_ua_clear_for_request_sense(cmd, &ua_asc, &ua_ascq))
+ scsi_build_sense_buffer(desc_format, buf, UNIT_ATTENTION,
+ ua_asc, ua_ascq);
+ else
+ scsi_build_sense_buffer(desc_format, buf, NO_SENSE, 0x0, 0x0);
memcpy(rbuf, buf, min_t(u32, sizeof(buf), cmd->data_length));
transport_kunmap_data_sg(cmd);
@@ -1418,9 +1410,6 @@
}
break;
default:
- pr_warn("TARGET_CORE[%s]: Unsupported SCSI Opcode"
- " 0x%02x, sending CHECK_CONDITION.\n",
- cmd->se_tfo->get_fabric_name(), cdb[0]);
return TCM_UNSUPPORTED_SCSI_OPCODE;
}
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index babde4a..2d0381d 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -41,6 +41,7 @@
#include "target_core_internal.h"
#include "target_core_alua.h"
#include "target_core_pr.h"
+#include "target_core_ua.h"
extern struct se_device *g_lun0_dev;
@@ -83,6 +84,22 @@
}
EXPORT_SYMBOL(core_tpg_get_initiator_node_acl);
+void core_allocate_nexus_loss_ua(
+ struct se_node_acl *nacl)
+{
+ struct se_dev_entry *deve;
+
+ if (!nacl)
+ return;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link)
+ core_scsi3_ua_allocate(deve, 0x29,
+ ASCQ_29H_NEXUS_LOSS_OCCURRED);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(core_allocate_nexus_loss_ua);
+
/* core_tpg_add_node_to_devs():
*
*
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ce8574b..5bacc7b 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -39,6 +39,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <scsi/scsi_proto.h>
+#include <scsi/scsi_common.h>
#include <target/target_core_base.h>
#include <target/target_core_backend.h>
@@ -1074,6 +1075,55 @@
}
EXPORT_SYMBOL(transport_set_vpd_ident);
+static sense_reason_t
+target_check_max_data_sg_nents(struct se_cmd *cmd, struct se_device *dev,
+ unsigned int size)
+{
+ u32 mtl;
+
+ if (!cmd->se_tfo->max_data_sg_nents)
+ return TCM_NO_SENSE;
+ /*
+ * Check if fabric enforced maximum SGL entries per I/O descriptor
+ * exceeds se_cmd->data_length. If true, set SCF_UNDERFLOW_BIT +
+ * residual_count and reduce original cmd->data_length to maximum
+ * length based on single PAGE_SIZE entry scatter-lists.
+ */
+ mtl = (cmd->se_tfo->max_data_sg_nents * PAGE_SIZE);
+ if (cmd->data_length > mtl) {
+ /*
+ * If an existing CDB overflow is present, calculate new residual
+ * based on CDB size minus fabric maximum transfer length.
+ *
+ * If an existing CDB underflow is present, calculate new residual
+ * based on original cmd->data_length minus fabric maximum transfer
+ * length.
+ *
+ * Otherwise, set the underflow residual based on cmd->data_length
+ * minus fabric maximum transfer length.
+ */
+ if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) {
+ cmd->residual_count = (size - mtl);
+ } else if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
+ u32 orig_dl = size + cmd->residual_count;
+ cmd->residual_count = (orig_dl - mtl);
+ } else {
+ cmd->se_cmd_flags |= SCF_UNDERFLOW_BIT;
+ cmd->residual_count = (cmd->data_length - mtl);
+ }
+ cmd->data_length = mtl;
+ /*
+ * Reset sbc_check_prot() calculated protection payload
+ * length based upon the new smaller MTL.
+ */
+ if (cmd->prot_length) {
+ u32 sectors = (mtl / dev->dev_attrib.block_size);
+ cmd->prot_length = dev->prot_length * sectors;
+ }
+ }
+ return TCM_NO_SENSE;
+}
+
sense_reason_t
target_cmd_size_check(struct se_cmd *cmd, unsigned int size)
{
@@ -1087,9 +1137,9 @@
" 0x%02x\n", cmd->se_tfo->get_fabric_name(),
cmd->data_length, size, cmd->t_task_cdb[0]);
- if (cmd->data_direction == DMA_TO_DEVICE) {
- pr_err("Rejecting underflow/overflow"
- " WRITE data\n");
+ if (cmd->data_direction == DMA_TO_DEVICE &&
+ cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) {
+ pr_err("Rejecting underflow/overflow WRITE data\n");
return TCM_INVALID_CDB_FIELD;
}
/*
@@ -1119,7 +1169,7 @@
}
}
- return 0;
+ return target_check_max_data_sg_nents(cmd, dev, size);
}
@@ -1177,14 +1227,7 @@
" emulation is not supported\n");
return TCM_INVALID_CDB_FIELD;
}
- /*
- * Used to determine when ORDERED commands should go from
- * Dormant to Active status.
- */
- cmd->se_ordered_id = atomic_inc_return(&dev->dev_ordered_id);
- pr_debug("Allocated se_ordered_id: %u for Task Attr: 0x%02x on %s\n",
- cmd->se_ordered_id, cmd->sam_task_attr,
- dev->transport->name);
+
return 0;
}
@@ -1246,6 +1289,11 @@
}
ret = dev->transport->parse_cdb(cmd);
+ if (ret == TCM_UNSUPPORTED_SCSI_OPCODE)
+ pr_warn_ratelimited("%s/%s: Unsupported SCSI Opcode 0x%02x, sending CHECK_CONDITION.\n",
+ cmd->se_tfo->get_fabric_name(),
+ cmd->se_sess->se_node_acl->initiatorname,
+ cmd->t_task_cdb[0]);
if (ret)
return ret;
@@ -1693,8 +1741,7 @@
check_stop:
transport_lun_remove_cmd(cmd);
- if (!transport_cmd_check_stop_to_fabric(cmd))
- ;
+ transport_cmd_check_stop_to_fabric(cmd);
return;
queue_full:
@@ -1767,16 +1814,14 @@
*/
switch (cmd->sam_task_attr) {
case TCM_HEAD_TAG:
- pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x, "
- "se_ordered_id: %u\n",
- cmd->t_task_cdb[0], cmd->se_ordered_id);
+ pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x\n",
+ cmd->t_task_cdb[0]);
return false;
case TCM_ORDERED_TAG:
atomic_inc_mb(&dev->dev_ordered_sync);
- pr_debug("Added ORDERED for CDB: 0x%02x to ordered list, "
- " se_ordered_id: %u\n",
- cmd->t_task_cdb[0], cmd->se_ordered_id);
+ pr_debug("Added ORDERED for CDB: 0x%02x to ordered list\n",
+ cmd->t_task_cdb[0]);
/*
* Execute an ORDERED command if no other older commands
@@ -1800,10 +1845,8 @@
list_add_tail(&cmd->se_delayed_node, &dev->delayed_cmd_list);
spin_unlock(&dev->delayed_cmd_lock);
- pr_debug("Added CDB: 0x%02x Task Attr: 0x%02x to"
- " delayed CMD list, se_ordered_id: %u\n",
- cmd->t_task_cdb[0], cmd->sam_task_attr,
- cmd->se_ordered_id);
+ pr_debug("Added CDB: 0x%02x Task Attr: 0x%02x to delayed CMD listn",
+ cmd->t_task_cdb[0], cmd->sam_task_attr);
return true;
}
@@ -1888,20 +1931,18 @@
if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
atomic_dec_mb(&dev->simple_cmds);
dev->dev_cur_ordered_id++;
- pr_debug("Incremented dev->dev_cur_ordered_id: %u for"
- " SIMPLE: %u\n", dev->dev_cur_ordered_id,
- cmd->se_ordered_id);
+ pr_debug("Incremented dev->dev_cur_ordered_id: %u for SIMPLE\n",
+ dev->dev_cur_ordered_id);
} else if (cmd->sam_task_attr == TCM_HEAD_TAG) {
dev->dev_cur_ordered_id++;
- pr_debug("Incremented dev_cur_ordered_id: %u for"
- " HEAD_OF_QUEUE: %u\n", dev->dev_cur_ordered_id,
- cmd->se_ordered_id);
+ pr_debug("Incremented dev_cur_ordered_id: %u for HEAD_OF_QUEUE\n",
+ dev->dev_cur_ordered_id);
} else if (cmd->sam_task_attr == TCM_ORDERED_TAG) {
atomic_dec_mb(&dev->dev_ordered_sync);
dev->dev_cur_ordered_id++;
- pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED:"
- " %u\n", dev->dev_cur_ordered_id, cmd->se_ordered_id);
+ pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n",
+ dev->dev_cur_ordered_id);
}
target_restart_delayed_cmds(dev);
@@ -2615,37 +2656,159 @@
}
EXPORT_SYMBOL(transport_wait_for_tasks);
-static int transport_get_sense_codes(
- struct se_cmd *cmd,
- u8 *asc,
- u8 *ascq)
+struct sense_info {
+ u8 key;
+ u8 asc;
+ u8 ascq;
+ bool add_sector_info;
+};
+
+static const struct sense_info sense_info_table[] = {
+ [TCM_NO_SENSE] = {
+ .key = NOT_READY
+ },
+ [TCM_NON_EXISTENT_LUN] = {
+ .key = ILLEGAL_REQUEST,
+ .asc = 0x25 /* LOGICAL UNIT NOT SUPPORTED */
+ },
+ [TCM_UNSUPPORTED_SCSI_OPCODE] = {
+ .key = ILLEGAL_REQUEST,
+ .asc = 0x20, /* INVALID COMMAND OPERATION CODE */
+ },
+ [TCM_SECTOR_COUNT_TOO_MANY] = {
+ .key = ILLEGAL_REQUEST,
+ .asc = 0x20, /* INVALID COMMAND OPERATION CODE */
+ },
+ [TCM_UNKNOWN_MODE_PAGE] = {
+ .key = ILLEGAL_REQUEST,
+ .asc = 0x24, /* INVALID FIELD IN CDB */
+ },
+ [TCM_CHECK_CONDITION_ABORT_CMD] = {
+ .key = ABORTED_COMMAND,
+ .asc = 0x29, /* BUS DEVICE RESET FUNCTION OCCURRED */
+ .ascq = 0x03,
+ },
+ [TCM_INCORRECT_AMOUNT_OF_DATA] = {
+ .key = ABORTED_COMMAND,
+ .asc = 0x0c, /* WRITE ERROR */
+ .ascq = 0x0d, /* NOT ENOUGH UNSOLICITED DATA */
+ },
+ [TCM_INVALID_CDB_FIELD] = {
+ .key = ILLEGAL_REQUEST,
+ .asc = 0x24, /* INVALID FIELD IN CDB */
+ },
+ [TCM_INVALID_PARAMETER_LIST] = {
+ .key = ILLEGAL_REQUEST,
+ .asc = 0x26, /* INVALID FIELD IN PARAMETER LIST */
+ },
+ [TCM_PARAMETER_LIST_LENGTH_ERROR] = {
+ .key = ILLEGAL_REQUEST,
+ .asc = 0x1a, /* PARAMETER LIST LENGTH ERROR */
+ },
+ [TCM_UNEXPECTED_UNSOLICITED_DATA] = {
+ .key = ILLEGAL_REQUEST,
+ .asc = 0x0c, /* WRITE ERROR */
+ .ascq = 0x0c, /* UNEXPECTED_UNSOLICITED_DATA */
+ },
+ [TCM_SERVICE_CRC_ERROR] = {
+ .key = ABORTED_COMMAND,
+ .asc = 0x47, /* PROTOCOL SERVICE CRC ERROR */
+ .ascq = 0x05, /* N/A */
+ },
+ [TCM_SNACK_REJECTED] = {
+ .key = ABORTED_COMMAND,
+ .asc = 0x11, /* READ ERROR */
+ .ascq = 0x13, /* FAILED RETRANSMISSION REQUEST */
+ },
+ [TCM_WRITE_PROTECTED] = {
+ .key = DATA_PROTECT,
+ .asc = 0x27, /* WRITE PROTECTED */
+ },
+ [TCM_ADDRESS_OUT_OF_RANGE] = {
+ .key = ILLEGAL_REQUEST,
+ .asc = 0x21, /* LOGICAL BLOCK ADDRESS OUT OF RANGE */
+ },
+ [TCM_CHECK_CONDITION_UNIT_ATTENTION] = {
+ .key = UNIT_ATTENTION,
+ },
+ [TCM_CHECK_CONDITION_NOT_READY] = {
+ .key = NOT_READY,
+ },
+ [TCM_MISCOMPARE_VERIFY] = {
+ .key = MISCOMPARE,
+ .asc = 0x1d, /* MISCOMPARE DURING VERIFY OPERATION */
+ .ascq = 0x00,
+ },
+ [TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED] = {
+ .key = ABORTED_COMMAND,
+ .asc = 0x10,
+ .ascq = 0x01, /* LOGICAL BLOCK GUARD CHECK FAILED */
+ .add_sector_info = true,
+ },
+ [TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED] = {
+ .key = ABORTED_COMMAND,
+ .asc = 0x10,
+ .ascq = 0x02, /* LOGICAL BLOCK APPLICATION TAG CHECK FAILED */
+ .add_sector_info = true,
+ },
+ [TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED] = {
+ .key = ABORTED_COMMAND,
+ .asc = 0x10,
+ .ascq = 0x03, /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */
+ .add_sector_info = true,
+ },
+ [TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE] = {
+ /*
+ * Returning ILLEGAL REQUEST would cause immediate IO errors on
+ * Solaris initiators. Returning NOT READY instead means the
+ * operations will be retried a finite number of times and we
+ * can survive intermittent errors.
+ */
+ .key = NOT_READY,
+ .asc = 0x08, /* LOGICAL UNIT COMMUNICATION FAILURE */
+ },
+};
+
+static int translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason)
{
- *asc = cmd->scsi_asc;
- *ascq = cmd->scsi_ascq;
+ const struct sense_info *si;
+ u8 *buffer = cmd->sense_buffer;
+ int r = (__force int)reason;
+ u8 asc, ascq;
+ bool desc_format = target_sense_desc_format(cmd->se_dev);
+
+ if (r < ARRAY_SIZE(sense_info_table) && sense_info_table[r].key)
+ si = &sense_info_table[r];
+ else
+ si = &sense_info_table[(__force int)
+ TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE];
+
+ if (reason == TCM_CHECK_CONDITION_UNIT_ATTENTION) {
+ core_scsi3_ua_for_check_condition(cmd, &asc, &ascq);
+ WARN_ON_ONCE(asc == 0);
+ } else if (si->asc == 0) {
+ WARN_ON_ONCE(cmd->scsi_asc == 0);
+ asc = cmd->scsi_asc;
+ ascq = cmd->scsi_ascq;
+ } else {
+ asc = si->asc;
+ ascq = si->ascq;
+ }
+
+ scsi_build_sense_buffer(desc_format, buffer, si->key, asc, ascq);
+ if (si->add_sector_info)
+ return scsi_set_sense_information(buffer,
+ cmd->scsi_sense_length,
+ cmd->bad_sector);
return 0;
}
-static
-void transport_err_sector_info(unsigned char *buffer, sector_t bad_sector)
-{
- /* Place failed LBA in sense data information descriptor 0. */
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 0xc;
- buffer[SPC_DESC_TYPE_OFFSET] = 0; /* Information */
- buffer[SPC_ADDITIONAL_DESC_LEN_OFFSET] = 0xa;
- buffer[SPC_VALIDITY_OFFSET] = 0x80;
-
- /* Descriptor Information: failing sector */
- put_unaligned_be64(bad_sector, &buffer[12]);
-}
-
int
transport_send_check_condition_and_sense(struct se_cmd *cmd,
sense_reason_t reason, int from_transport)
{
- unsigned char *buffer = cmd->sense_buffer;
unsigned long flags;
- u8 asc = 0, ascq = 0;
spin_lock_irqsave(&cmd->t_state_lock, flags);
if (cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) {
@@ -2655,243 +2818,17 @@
cmd->se_cmd_flags |= SCF_SENT_CHECK_CONDITION;
spin_unlock_irqrestore(&cmd->t_state_lock, flags);
- if (!reason && from_transport)
- goto after_reason;
+ if (!from_transport) {
+ int rc;
- if (!from_transport)
cmd->se_cmd_flags |= SCF_EMULATED_TASK_SENSE;
-
- /*
- * Actual SENSE DATA, see SPC-3 7.23.2 SPC_SENSE_KEY_OFFSET uses
- * SENSE KEY values from include/scsi/scsi.h
- */
- switch (reason) {
- case TCM_NO_SENSE:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* Not Ready */
- buffer[SPC_SENSE_KEY_OFFSET] = NOT_READY;
- /* NO ADDITIONAL SENSE INFORMATION */
- buffer[SPC_ASC_KEY_OFFSET] = 0;
- buffer[SPC_ASCQ_KEY_OFFSET] = 0;
- break;
- case TCM_NON_EXISTENT_LUN:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* LOGICAL UNIT NOT SUPPORTED */
- buffer[SPC_ASC_KEY_OFFSET] = 0x25;
- break;
- case TCM_UNSUPPORTED_SCSI_OPCODE:
- case TCM_SECTOR_COUNT_TOO_MANY:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* INVALID COMMAND OPERATION CODE */
- buffer[SPC_ASC_KEY_OFFSET] = 0x20;
- break;
- case TCM_UNKNOWN_MODE_PAGE:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* INVALID FIELD IN CDB */
- buffer[SPC_ASC_KEY_OFFSET] = 0x24;
- break;
- case TCM_CHECK_CONDITION_ABORT_CMD:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ABORTED COMMAND */
- buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND;
- /* BUS DEVICE RESET FUNCTION OCCURRED */
- buffer[SPC_ASC_KEY_OFFSET] = 0x29;
- buffer[SPC_ASCQ_KEY_OFFSET] = 0x03;
- break;
- case TCM_INCORRECT_AMOUNT_OF_DATA:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ABORTED COMMAND */
- buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND;
- /* WRITE ERROR */
- buffer[SPC_ASC_KEY_OFFSET] = 0x0c;
- /* NOT ENOUGH UNSOLICITED DATA */
- buffer[SPC_ASCQ_KEY_OFFSET] = 0x0d;
- break;
- case TCM_INVALID_CDB_FIELD:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* INVALID FIELD IN CDB */
- buffer[SPC_ASC_KEY_OFFSET] = 0x24;
- break;
- case TCM_INVALID_PARAMETER_LIST:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* INVALID FIELD IN PARAMETER LIST */
- buffer[SPC_ASC_KEY_OFFSET] = 0x26;
- break;
- case TCM_PARAMETER_LIST_LENGTH_ERROR:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* PARAMETER LIST LENGTH ERROR */
- buffer[SPC_ASC_KEY_OFFSET] = 0x1a;
- break;
- case TCM_UNEXPECTED_UNSOLICITED_DATA:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ABORTED COMMAND */
- buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND;
- /* WRITE ERROR */
- buffer[SPC_ASC_KEY_OFFSET] = 0x0c;
- /* UNEXPECTED_UNSOLICITED_DATA */
- buffer[SPC_ASCQ_KEY_OFFSET] = 0x0c;
- break;
- case TCM_SERVICE_CRC_ERROR:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ABORTED COMMAND */
- buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND;
- /* PROTOCOL SERVICE CRC ERROR */
- buffer[SPC_ASC_KEY_OFFSET] = 0x47;
- /* N/A */
- buffer[SPC_ASCQ_KEY_OFFSET] = 0x05;
- break;
- case TCM_SNACK_REJECTED:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ABORTED COMMAND */
- buffer[SPC_SENSE_KEY_OFFSET] = ABORTED_COMMAND;
- /* READ ERROR */
- buffer[SPC_ASC_KEY_OFFSET] = 0x11;
- /* FAILED RETRANSMISSION REQUEST */
- buffer[SPC_ASCQ_KEY_OFFSET] = 0x13;
- break;
- case TCM_WRITE_PROTECTED:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* DATA PROTECT */
- buffer[SPC_SENSE_KEY_OFFSET] = DATA_PROTECT;
- /* WRITE PROTECTED */
- buffer[SPC_ASC_KEY_OFFSET] = 0x27;
- break;
- case TCM_ADDRESS_OUT_OF_RANGE:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* LOGICAL BLOCK ADDRESS OUT OF RANGE */
- buffer[SPC_ASC_KEY_OFFSET] = 0x21;
- break;
- case TCM_CHECK_CONDITION_UNIT_ATTENTION:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* UNIT ATTENTION */
- buffer[SPC_SENSE_KEY_OFFSET] = UNIT_ATTENTION;
- core_scsi3_ua_for_check_condition(cmd, &asc, &ascq);
- buffer[SPC_ASC_KEY_OFFSET] = asc;
- buffer[SPC_ASCQ_KEY_OFFSET] = ascq;
- break;
- case TCM_CHECK_CONDITION_NOT_READY:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* Not Ready */
- buffer[SPC_SENSE_KEY_OFFSET] = NOT_READY;
- transport_get_sense_codes(cmd, &asc, &ascq);
- buffer[SPC_ASC_KEY_OFFSET] = asc;
- buffer[SPC_ASCQ_KEY_OFFSET] = ascq;
- break;
- case TCM_MISCOMPARE_VERIFY:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- buffer[SPC_SENSE_KEY_OFFSET] = MISCOMPARE;
- /* MISCOMPARE DURING VERIFY OPERATION */
- buffer[SPC_ASC_KEY_OFFSET] = 0x1d;
- buffer[SPC_ASCQ_KEY_OFFSET] = 0x00;
- break;
- case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* LOGICAL BLOCK GUARD CHECK FAILED */
- buffer[SPC_ASC_KEY_OFFSET] = 0x10;
- buffer[SPC_ASCQ_KEY_OFFSET] = 0x01;
- transport_err_sector_info(buffer, cmd->bad_sector);
- break;
- case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* LOGICAL BLOCK APPLICATION TAG CHECK FAILED */
- buffer[SPC_ASC_KEY_OFFSET] = 0x10;
- buffer[SPC_ASCQ_KEY_OFFSET] = 0x02;
- transport_err_sector_info(buffer, cmd->bad_sector);
- break;
- case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /* ILLEGAL REQUEST */
- buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
- /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */
- buffer[SPC_ASC_KEY_OFFSET] = 0x10;
- buffer[SPC_ASCQ_KEY_OFFSET] = 0x03;
- transport_err_sector_info(buffer, cmd->bad_sector);
- break;
- case TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE:
- default:
- /* CURRENT ERROR */
- buffer[0] = 0x70;
- buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
- /*
- * Returning ILLEGAL REQUEST would cause immediate IO errors on
- * Solaris initiators. Returning NOT READY instead means the
- * operations will be retried a finite number of times and we
- * can survive intermittent errors.
- */
- buffer[SPC_SENSE_KEY_OFFSET] = NOT_READY;
- /* LOGICAL UNIT COMMUNICATION FAILURE */
- buffer[SPC_ASC_KEY_OFFSET] = 0x08;
- break;
+ cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
+ cmd->scsi_sense_length = TRANSPORT_SENSE_BUFFER;
+ rc = translate_sense_reason(cmd, reason);
+ if (rc)
+ return rc;
}
- /*
- * This code uses linux/include/scsi/scsi.h SAM status codes!
- */
- cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
- /*
- * Automatically padded, this value is encoded in the fabric's
- * data_length response PDU containing the SCSI defined sense data.
- */
- cmd->scsi_sense_length = TRANSPORT_SENSE_BUFFER;
-after_reason:
trace_target_cmd_complete(cmd);
return cmd->se_tfo->queue_status(cmd);
}
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index c448ef4..937cebf 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -25,6 +25,7 @@
#include <linux/parser.h>
#include <linux/vmalloc.h>
#include <linux/uio_driver.h>
+#include <linux/stringify.h>
#include <net/genetlink.h>
#include <scsi/scsi_common.h>
#include <scsi/scsi_proto.h>
@@ -538,14 +539,8 @@
UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n",
cmd->se_cmd);
- transport_generic_request_failure(cmd->se_cmd,
- TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
- cmd->se_cmd = NULL;
- kmem_cache_free(tcmu_cmd_cache, cmd);
- return;
- }
-
- if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
+ entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION;
+ } else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
se_cmd->scsi_sense_length);
@@ -577,7 +572,6 @@
static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
{
struct tcmu_mailbox *mb;
- LIST_HEAD(cpl_cmds);
unsigned long flags;
int handled = 0;
@@ -905,7 +899,7 @@
WARN_ON(!PAGE_ALIGNED(udev->data_off));
WARN_ON(udev->data_size % PAGE_SIZE);
- info->version = xstr(TCMU_MAILBOX_VERSION);
+ info->version = __stringify(TCMU_MAILBOX_VERSION);
info->mem[0].name = "tcm-user command & data buffer";
info->mem[0].addr = (phys_addr_t) udev->mb_addr;
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 4515f52..47fe94e 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -450,6 +450,8 @@
memset(&xcopy_pt_sess, 0, sizeof(struct se_session));
INIT_LIST_HEAD(&xcopy_pt_sess.sess_list);
INIT_LIST_HEAD(&xcopy_pt_sess.sess_acl_list);
+ INIT_LIST_HEAD(&xcopy_pt_sess.sess_cmd_list);
+ spin_lock_init(&xcopy_pt_sess.sess_cmd_lock);
xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg;
xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess;
@@ -644,7 +646,7 @@
pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n",
(unsigned long long)src_lba, src_sectors, length);
- transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length,
+ transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length,
DMA_FROM_DEVICE, 0, &xpt_cmd->sense_buffer[0]);
xop->src_pt_cmd = xpt_cmd;
@@ -704,7 +706,7 @@
pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n",
(unsigned long long)dst_lba, dst_sectors, length);
- transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length,
+ transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length,
DMA_TO_DEVICE, 0, &xpt_cmd->sense_buffer[0]);
xop->dst_pt_cmd = xpt_cmd;
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 6803172..aa3caca 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -255,7 +255,7 @@
struct ft_cmd *cmd = arg;
struct fc_frame_header *fh;
- if (unlikely(IS_ERR(fp))) {
+ if (IS_ERR(fp)) {
/* XXX need to find cmd if queued */
cmd->seq = NULL;
cmd->aborted = true;
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 118938e..0390044 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -340,6 +340,14 @@
tristate
depends on ACPI
+config INTEL_PCH_THERMAL
+ tristate "Intel PCH Thermal Reporting Driver"
+ depends on X86 && PCI
+ help
+ Enable this to support thermal reporting on certain intel PCHs.
+ Thermal reporting device will provide temperature reading,
+ programmable trip points and other information.
+
menu "Texas Instruments thermal drivers"
source "drivers/thermal/ti-soc-thermal/Kconfig"
endmenu
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 535dfee..26f1608 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -41,6 +41,7 @@
obj-$(CONFIG_INTEL_QUARK_DTS_THERMAL) += intel_quark_dts_thermal.o
obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/
obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/
+obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o
obj-$(CONFIG_ST_THERMAL) += st/
obj-$(CONFIG_TEGRA_SOCTHERM) += tegra_soctherm.o
obj-$(CONFIG_HISI_THERMAL) += hisi_thermal.o
diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c
index 01255fd..26b8d32 100644
--- a/drivers/thermal/armada_thermal.c
+++ b/drivers/thermal/armada_thermal.c
@@ -155,7 +155,7 @@
}
static int armada_get_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+ int *temp)
{
struct armada_thermal_priv *priv = thermal->devdata;
unsigned long reg;
diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c
index 2fb273c..652acd8 100644
--- a/drivers/thermal/db8500_thermal.c
+++ b/drivers/thermal/db8500_thermal.c
@@ -107,8 +107,7 @@
}
/* Callback to get current temperature */
-static int db8500_sys_get_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+static int db8500_sys_get_temp(struct thermal_zone_device *thermal, int *temp)
{
struct db8500_thermal_zone *pzone = thermal->devdata;
@@ -180,7 +179,7 @@
/* Callback to get trip point temperature */
static int db8500_sys_get_trip_temp(struct thermal_zone_device *thermal,
- int trip, unsigned long *temp)
+ int trip, int *temp)
{
struct db8500_thermal_zone *pzone = thermal->devdata;
struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
@@ -195,7 +194,7 @@
/* Callback to get critical trip point temperature */
static int db8500_sys_get_crit_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+ int *temp)
{
struct db8500_thermal_zone *pzone = thermal->devdata;
struct db8500_thsens_platform_data *ptrips = pzone->trip_tab;
diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c
index 09f6e30..a0bc9de 100644
--- a/drivers/thermal/dove_thermal.c
+++ b/drivers/thermal/dove_thermal.c
@@ -93,7 +93,7 @@
}
static int dove_get_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+ int *temp)
{
unsigned long reg;
struct dove_thermal_priv *priv = thermal->devdata;
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index c2c10bb..34fe365 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -34,7 +34,7 @@
static int get_trip_level(struct thermal_zone_device *tz)
{
int count = 0;
- unsigned long trip_temp;
+ int trip_temp;
enum thermal_trip_type trip_type;
if (tz->trips == 0 || !tz->ops->get_trip_temp)
diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c
index c5dd76b..70836c5 100644
--- a/drivers/thermal/gov_bang_bang.c
+++ b/drivers/thermal/gov_bang_bang.c
@@ -25,14 +25,13 @@
static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
{
- long trip_temp;
- unsigned long trip_hyst;
+ int trip_temp, trip_hyst;
struct thermal_instance *instance;
tz->ops->get_trip_temp(tz, trip, &trip_temp);
tz->ops->get_trip_hyst(tz, trip, &trip_hyst);
- dev_dbg(&tz->device, "Trip%d[temp=%ld]:temp=%d:hyst=%ld\n",
+ dev_dbg(&tz->device, "Trip%d[temp=%d]:temp=%d:hyst=%d\n",
trip, trip_temp, tz->temperature,
trip_hyst);
diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index b49f97c..36d0729 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -155,7 +155,7 @@
mutex_unlock(&data->thermal_lock);
}
-static int hisi_thermal_get_temp(void *_sensor, long *temp)
+static int hisi_thermal_get_temp(void *_sensor, int *temp)
{
struct hisi_thermal_sensor *sensor = _sensor;
struct hisi_thermal_data *data = sensor->thermal;
@@ -178,7 +178,7 @@
data->irq_bind_sensor = sensor_id;
mutex_unlock(&data->thermal_lock);
- dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%ld, thres=%d\n",
+ dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n",
sensor->id, data->irq_enabled, *temp, sensor->thres_temp);
/*
* Bind irq to sensor for two cases:
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index fde4c28..4bec1d3 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -98,10 +98,10 @@
enum thermal_device_mode mode;
struct regmap *tempmon;
u32 c1, c2; /* See formula in imx_get_sensor_data() */
- unsigned long temp_passive;
- unsigned long temp_critical;
- unsigned long alarm_temp;
- unsigned long last_temp;
+ int temp_passive;
+ int temp_critical;
+ int alarm_temp;
+ int last_temp;
bool irq_enabled;
int irq;
struct clk *thermal_clk;
@@ -109,7 +109,7 @@
};
static void imx_set_panic_temp(struct imx_thermal_data *data,
- signed long panic_temp)
+ int panic_temp)
{
struct regmap *map = data->tempmon;
int critical_value;
@@ -121,7 +121,7 @@
}
static void imx_set_alarm_temp(struct imx_thermal_data *data,
- signed long alarm_temp)
+ int alarm_temp)
{
struct regmap *map = data->tempmon;
int alarm_value;
@@ -133,7 +133,7 @@
TEMPSENSE0_ALARM_VALUE_SHIFT);
}
-static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp)
+static int imx_get_temp(struct thermal_zone_device *tz, int *temp)
{
struct imx_thermal_data *data = tz->devdata;
struct regmap *map = data->tempmon;
@@ -189,13 +189,13 @@
if (data->alarm_temp == data->temp_critical &&
*temp < data->temp_passive) {
imx_set_alarm_temp(data, data->temp_passive);
- dev_dbg(&tz->device, "thermal alarm off: T < %lu\n",
+ dev_dbg(&tz->device, "thermal alarm off: T < %d\n",
data->alarm_temp / 1000);
}
}
if (*temp != data->last_temp) {
- dev_dbg(&tz->device, "millicelsius: %ld\n", *temp);
+ dev_dbg(&tz->device, "millicelsius: %d\n", *temp);
data->last_temp = *temp;
}
@@ -262,8 +262,7 @@
return 0;
}
-static int imx_get_crit_temp(struct thermal_zone_device *tz,
- unsigned long *temp)
+static int imx_get_crit_temp(struct thermal_zone_device *tz, int *temp)
{
struct imx_thermal_data *data = tz->devdata;
@@ -272,7 +271,7 @@
}
static int imx_get_trip_temp(struct thermal_zone_device *tz, int trip,
- unsigned long *temp)
+ int *temp)
{
struct imx_thermal_data *data = tz->devdata;
@@ -282,7 +281,7 @@
}
static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip,
- unsigned long temp)
+ int temp)
{
struct imx_thermal_data *data = tz->devdata;
@@ -434,7 +433,7 @@
{
struct imx_thermal_data *data = dev;
- dev_dbg(&data->tz->device, "THERMAL ALARM: T > %lu\n",
+ dev_dbg(&data->tz->device, "THERMAL ALARM: T > %d\n",
data->alarm_temp / 1000);
thermal_zone_device_update(data->tz);
diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c
index 031018e..5836e55 100644
--- a/drivers/thermal/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3400_thermal.c
@@ -186,7 +186,7 @@
}
static int int3400_thermal_get_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+ int *temp)
{
*temp = 20 * 1000; /* faked temp sensor with 20C */
return 0;
diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
index 1e25133..b9b2666 100644
--- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
+++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c
@@ -20,7 +20,7 @@
#include "int340x_thermal_zone.h"
static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone,
- unsigned long *temp)
+ int *temp)
{
struct int34x_thermal_zone *d = zone->devdata;
unsigned long long tmp;
@@ -49,7 +49,7 @@
}
static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone,
- int trip, unsigned long *temp)
+ int trip, int *temp)
{
struct int34x_thermal_zone *d = zone->devdata;
int i;
@@ -114,7 +114,7 @@
}
static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone,
- int trip, unsigned long temp)
+ int trip, int temp)
{
struct int34x_thermal_zone *d = zone->devdata;
acpi_status status;
@@ -136,7 +136,7 @@
static int int340x_thermal_get_trip_hyst(struct thermal_zone_device *zone,
- int trip, unsigned long *temp)
+ int trip, int *temp)
{
struct int34x_thermal_zone *d = zone->devdata;
acpi_status status;
@@ -163,7 +163,7 @@
};
static int int340x_thermal_get_trip_config(acpi_handle handle, char *name,
- unsigned long *temp)
+ int *temp)
{
unsigned long long r;
acpi_status status;
diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/int340x_thermal/int340x_thermal_zone.h
index 9f38ab7..aaadf72 100644
--- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.h
+++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.h
@@ -21,7 +21,7 @@
#define INT340X_THERMAL_MAX_ACT_TRIP_COUNT 10
struct active_trip {
- unsigned long temp;
+ int temp;
int id;
bool valid;
};
@@ -31,11 +31,11 @@
struct active_trip act_trips[INT340X_THERMAL_MAX_ACT_TRIP_COUNT];
unsigned long *aux_trips;
int aux_trip_nr;
- unsigned long psv_temp;
+ int psv_temp;
int psv_trip_id;
- unsigned long crt_temp;
+ int crt_temp;
int crt_trip_id;
- unsigned long hot_temp;
+ int hot_temp;
int hot_trip_id;
struct thermal_zone_device *zone;
struct thermal_zone_device_ops *override_ops;
diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c
index 3df3dc3..ccc0ad0 100644
--- a/drivers/thermal/int340x_thermal/processor_thermal_device.c
+++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c
@@ -145,7 +145,7 @@
return -EINVAL;
}
-static int read_temp_msr(unsigned long *temp)
+static int read_temp_msr(int *temp)
{
int cpu;
u32 eax, edx;
@@ -177,7 +177,7 @@
}
static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone,
- unsigned long *temp)
+ int *temp)
{
int ret;
diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c
new file mode 100644
index 0000000..50c7da7
--- /dev/null
+++ b/drivers/thermal/intel_pch_thermal.c
@@ -0,0 +1,283 @@
+/* intel_pch_thermal.c - Intel PCH Thermal driver
+ *
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * Authors:
+ * Tushar Dave <tushar.n.dave@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/thermal.h>
+
+/* Intel PCH thermal Device IDs */
+#define PCH_THERMAL_DID_WPT 0x9CA4 /* Wildcat Point */
+
+/* Wildcat Point-LP PCH Thermal registers */
+#define WPT_TEMP 0x0000 /* Temperature */
+#define WPT_TSC 0x04 /* Thermal Sensor Control */
+#define WPT_TSS 0x06 /* Thermal Sensor Status */
+#define WPT_TSEL 0x08 /* Thermal Sensor Enable and Lock */
+#define WPT_TSREL 0x0A /* Thermal Sensor Report Enable and Lock */
+#define WPT_TSMIC 0x0C /* Thermal Sensor SMI Control */
+#define WPT_CTT 0x0010 /* Catastrophic Trip Point */
+#define WPT_TAHV 0x0014 /* Thermal Alert High Value */
+#define WPT_TALV 0x0018 /* Thermal Alert Low Value */
+#define WPT_TL 0x00000040 /* Throttle Value */
+#define WPT_PHL 0x0060 /* PCH Hot Level */
+#define WPT_PHLC 0x62 /* PHL Control */
+#define WPT_TAS 0x80 /* Thermal Alert Status */
+#define WPT_TSPIEN 0x82 /* PCI Interrupt Event Enables */
+#define WPT_TSGPEN 0x84 /* General Purpose Event Enables */
+
+/* Wildcat Point-LP PCH Thermal Register bit definitions */
+#define WPT_TEMP_TSR 0x00ff /* Temp TS Reading */
+#define WPT_TSC_CPDE 0x01 /* Catastrophic Power-Down Enable */
+#define WPT_TSS_TSDSS 0x10 /* Thermal Sensor Dynamic Shutdown Status */
+#define WPT_TSS_GPES 0x08 /* GPE status */
+#define WPT_TSEL_ETS 0x01 /* Enable TS */
+#define WPT_TSEL_PLDB 0x80 /* TSEL Policy Lock-Down Bit */
+#define WPT_TL_TOL 0x000001FF /* T0 Level */
+#define WPT_TL_T1L 0x1ff00000 /* T1 Level */
+#define WPT_TL_TTEN 0x20000000 /* TT Enable */
+
+static char driver_name[] = "Intel PCH thermal driver";
+
+struct pch_thermal_device {
+ void __iomem *hw_base;
+ const struct pch_dev_ops *ops;
+ struct pci_dev *pdev;
+ struct thermal_zone_device *tzd;
+ int crt_trip_id;
+ unsigned long crt_temp;
+ int hot_trip_id;
+ unsigned long hot_temp;
+};
+
+static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
+{
+ u8 tsel;
+ u16 trip_temp;
+
+ *nr_trips = 0;
+
+ /* Check if BIOS has already enabled thermal sensor */
+ if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS))
+ goto read_trips;
+
+ tsel = readb(ptd->hw_base + WPT_TSEL);
+ /*
+ * When TSEL's Policy Lock-Down bit is 1, TSEL become RO.
+ * If so, thermal sensor cannot enable. Bail out.
+ */
+ if (tsel & WPT_TSEL_PLDB) {
+ dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n");
+ return -ENODEV;
+ }
+
+ writeb(tsel|WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL);
+ if (!(WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS))) {
+ dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n");
+ return -ENODEV;
+ }
+
+read_trips:
+ ptd->crt_trip_id = -1;
+ trip_temp = readw(ptd->hw_base + WPT_CTT);
+ trip_temp &= 0x1FF;
+ if (trip_temp) {
+ /* Resolution of 1/2 degree C and an offset of -50C */
+ ptd->crt_temp = trip_temp * 1000 / 2 - 50000;
+ ptd->crt_trip_id = 0;
+ ++(*nr_trips);
+ }
+
+ ptd->hot_trip_id = -1;
+ trip_temp = readw(ptd->hw_base + WPT_PHL);
+ trip_temp &= 0x1FF;
+ if (trip_temp) {
+ /* Resolution of 1/2 degree C and an offset of -50C */
+ ptd->hot_temp = trip_temp * 1000 / 2 - 50000;
+ ptd->hot_trip_id = *nr_trips;
+ ++(*nr_trips);
+ }
+
+ return 0;
+}
+
+static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp)
+{
+ u8 wpt_temp;
+
+ wpt_temp = WPT_TEMP_TSR & readl(ptd->hw_base + WPT_TEMP);
+
+ /* Resolution of 1/2 degree C and an offset of -50C */
+ *temp = (wpt_temp * 1000 / 2 - 50000);
+
+ return 0;
+}
+
+struct pch_dev_ops {
+ int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips);
+ int (*get_temp)(struct pch_thermal_device *ptd, int *temp);
+};
+
+
+/* dev ops for Wildcat Point */
+static struct pch_dev_ops pch_dev_ops_wpt = {
+ .hw_init = pch_wpt_init,
+ .get_temp = pch_wpt_get_temp,
+};
+
+static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp)
+{
+ struct pch_thermal_device *ptd = tzd->devdata;
+
+ return ptd->ops->get_temp(ptd, temp);
+}
+
+static int pch_get_trip_type(struct thermal_zone_device *tzd, int trip,
+ enum thermal_trip_type *type)
+{
+ struct pch_thermal_device *ptd = tzd->devdata;
+
+ if (ptd->crt_trip_id == trip)
+ *type = THERMAL_TRIP_CRITICAL;
+ else if (ptd->hot_trip_id == trip)
+ *type = THERMAL_TRIP_HOT;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int pch_get_trip_temp(struct thermal_zone_device *tzd, int trip, int *temp)
+{
+ struct pch_thermal_device *ptd = tzd->devdata;
+
+ if (ptd->crt_trip_id == trip)
+ *temp = ptd->crt_temp;
+ else if (ptd->hot_trip_id == trip)
+ *temp = ptd->hot_temp;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct thermal_zone_device_ops tzd_ops = {
+ .get_temp = pch_thermal_get_temp,
+ .get_trip_type = pch_get_trip_type,
+ .get_trip_temp = pch_get_trip_temp,
+};
+
+
+static int intel_pch_thermal_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct pch_thermal_device *ptd;
+ int err;
+ int nr_trips;
+ char *dev_name;
+
+ ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL);
+ if (!ptd)
+ return -ENOMEM;
+
+ switch (pdev->device) {
+ case PCH_THERMAL_DID_WPT:
+ ptd->ops = &pch_dev_ops_wpt;
+ dev_name = "pch_wildcat_point";
+ break;
+ default:
+ dev_err(&pdev->dev, "unknown pch thermal device\n");
+ return -ENODEV;
+ }
+
+ pci_set_drvdata(pdev, ptd);
+ ptd->pdev = pdev;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable pci device\n");
+ return err;
+ }
+
+ err = pci_request_regions(pdev, driver_name);
+ if (err) {
+ dev_err(&pdev->dev, "failed to request pci region\n");
+ goto error_disable;
+ }
+
+ ptd->hw_base = pci_ioremap_bar(pdev, 0);
+ if (!ptd->hw_base) {
+ err = -ENOMEM;
+ dev_err(&pdev->dev, "failed to map mem base\n");
+ goto error_release;
+ }
+
+ err = ptd->ops->hw_init(ptd, &nr_trips);
+ if (err)
+ goto error_cleanup;
+
+ ptd->tzd = thermal_zone_device_register(dev_name, nr_trips, 0, ptd,
+ &tzd_ops, NULL, 0, 0);
+ if (IS_ERR(ptd->tzd)) {
+ dev_err(&pdev->dev, "Failed to register thermal zone %s\n",
+ dev_name);
+ err = PTR_ERR(ptd->tzd);
+ goto error_cleanup;
+ }
+
+ return 0;
+
+error_cleanup:
+ iounmap(ptd->hw_base);
+error_release:
+ pci_release_regions(pdev);
+error_disable:
+ pci_disable_device(pdev);
+ dev_err(&pdev->dev, "pci device failed to probe\n");
+ return err;
+}
+
+static void intel_pch_thermal_remove(struct pci_dev *pdev)
+{
+ struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+ thermal_zone_device_unregister(ptd->tzd);
+ iounmap(ptd->hw_base);
+ pci_set_drvdata(pdev, NULL);
+ pci_release_region(pdev, 0);
+ pci_disable_device(pdev);
+}
+
+static struct pci_device_id intel_pch_thermal_id[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
+ { 0, },
+};
+MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
+
+static struct pci_driver intel_pch_thermal_driver = {
+ .name = "intel_pch_thermal",
+ .id_table = intel_pch_thermal_id,
+ .probe = intel_pch_thermal_probe,
+ .remove = intel_pch_thermal_remove,
+};
+
+module_pci_driver(intel_pch_thermal_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel PCH Thermal driver");
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index 2ac0c70..6c79588 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -693,11 +693,14 @@
{ X86_VENDOR_INTEL, 6, 0x3f},
{ X86_VENDOR_INTEL, 6, 0x45},
{ X86_VENDOR_INTEL, 6, 0x46},
+ { X86_VENDOR_INTEL, 6, 0x47},
{ X86_VENDOR_INTEL, 6, 0x4c},
{ X86_VENDOR_INTEL, 6, 0x4d},
+ { X86_VENDOR_INTEL, 6, 0x4e},
{ X86_VENDOR_INTEL, 6, 0x4f},
{ X86_VENDOR_INTEL, 6, 0x56},
{ X86_VENDOR_INTEL, 6, 0x57},
+ { X86_VENDOR_INTEL, 6, 0x5e},
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
diff --git a/drivers/thermal/intel_quark_dts_thermal.c b/drivers/thermal/intel_quark_dts_thermal.c
index 4434ec8..5ed90e6 100644
--- a/drivers/thermal/intel_quark_dts_thermal.c
+++ b/drivers/thermal/intel_quark_dts_thermal.c
@@ -186,7 +186,7 @@
return ret;
}
-static int _get_trip_temp(int trip, unsigned long *temp)
+static int _get_trip_temp(int trip, int *temp)
{
int status;
u32 out;
@@ -212,19 +212,18 @@
}
static inline int sys_get_trip_temp(struct thermal_zone_device *tzd,
- int trip, unsigned long *temp)
+ int trip, int *temp)
{
return _get_trip_temp(trip, temp);
}
-static inline int sys_get_crit_temp(struct thermal_zone_device *tzd,
- unsigned long *temp)
+static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, int *temp)
{
return _get_trip_temp(QRK_DTS_ID_TP_CRITICAL, temp);
}
static int update_trip_temp(struct soc_sensor_entry *aux_entry,
- int trip, unsigned long temp)
+ int trip, int temp)
{
u32 out;
u32 temp_out;
@@ -272,7 +271,7 @@
}
static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
- unsigned long temp)
+ int temp)
{
return update_trip_temp(tzd->devdata, trip, temp);
}
@@ -289,7 +288,7 @@
}
static int sys_get_curr_temp(struct thermal_zone_device *tzd,
- unsigned long *temp)
+ int *temp)
{
u32 out;
int ret;
diff --git a/drivers/thermal/intel_soc_dts_iosf.c b/drivers/thermal/intel_soc_dts_iosf.c
index 42e4b6a..5841d1d 100644
--- a/drivers/thermal/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel_soc_dts_iosf.c
@@ -80,7 +80,7 @@
}
static int sys_get_trip_temp(struct thermal_zone_device *tzd, int trip,
- unsigned long *temp)
+ int *temp)
{
int status;
u32 out;
@@ -106,7 +106,7 @@
}
static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
- int thres_index, unsigned long temp,
+ int thres_index, int temp,
enum thermal_trip_type trip_type)
{
int status;
@@ -196,7 +196,7 @@
}
static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
- unsigned long temp)
+ int temp)
{
struct intel_soc_dts_sensor_entry *dts = tzd->devdata;
struct intel_soc_dts_sensors *sensors = dts->sensors;
@@ -226,7 +226,7 @@
}
static int sys_get_curr_temp(struct thermal_zone_device *tzd,
- unsigned long *temp)
+ int *temp)
{
int status;
u32 out;
diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c
index 11041fe..8922366 100644
--- a/drivers/thermal/kirkwood_thermal.c
+++ b/drivers/thermal/kirkwood_thermal.c
@@ -33,7 +33,7 @@
};
static int kirkwood_get_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+ int *temp)
{
unsigned long reg;
struct kirkwood_thermal_priv *priv = thermal->devdata;
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index b295b2b..42b7d42 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -91,7 +91,7 @@
/*** DT thermal zone device callbacks ***/
static int of_thermal_get_temp(struct thermal_zone_device *tz,
- unsigned long *temp)
+ int *temp)
{
struct __thermal_zone *data = tz->devdata;
@@ -177,7 +177,7 @@
* Return: zero on success, error code otherwise
*/
static int of_thermal_set_emul_temp(struct thermal_zone_device *tz,
- unsigned long temp)
+ int temp)
{
struct __thermal_zone *data = tz->devdata;
@@ -311,7 +311,7 @@
}
static int of_thermal_get_trip_temp(struct thermal_zone_device *tz, int trip,
- unsigned long *temp)
+ int *temp)
{
struct __thermal_zone *data = tz->devdata;
@@ -324,7 +324,7 @@
}
static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip,
- unsigned long temp)
+ int temp)
{
struct __thermal_zone *data = tz->devdata;
@@ -338,7 +338,7 @@
}
static int of_thermal_get_trip_hyst(struct thermal_zone_device *tz, int trip,
- unsigned long *hyst)
+ int *hyst)
{
struct __thermal_zone *data = tz->devdata;
@@ -351,7 +351,7 @@
}
static int of_thermal_set_trip_hyst(struct thermal_zone_device *tz, int trip,
- unsigned long hyst)
+ int hyst)
{
struct __thermal_zone *data = tz->devdata;
@@ -365,7 +365,7 @@
}
static int of_thermal_get_crit_temp(struct thermal_zone_device *tz,
- unsigned long *temp)
+ int *temp)
{
struct __thermal_zone *data = tz->devdata;
int i;
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
index 2516769..9c8a7aa 100644
--- a/drivers/thermal/power_allocator.c
+++ b/drivers/thermal/power_allocator.c
@@ -92,8 +92,8 @@
* Return: The power budget for the next period.
*/
static u32 pid_controller(struct thermal_zone_device *tz,
- unsigned long current_temp,
- unsigned long control_temp,
+ int current_temp,
+ int control_temp,
u32 max_allocatable_power)
{
s64 p, i, d, power_range;
@@ -102,7 +102,7 @@
max_power_frac = int_to_frac(max_allocatable_power);
- err = ((s32)control_temp - (s32)current_temp);
+ err = control_temp - current_temp;
err = int_to_frac(err);
/* Calculate the proportional term */
@@ -223,8 +223,8 @@
}
static int allocate_power(struct thermal_zone_device *tz,
- unsigned long current_temp,
- unsigned long control_temp)
+ int current_temp,
+ int control_temp)
{
struct thermal_instance *instance;
struct power_allocator_params *params = tz->governor_data;
@@ -331,7 +331,7 @@
granted_power, total_granted_power,
num_actors, power_range,
max_allocatable_power, current_temp,
- (s32)control_temp - (s32)current_temp);
+ control_temp - current_temp);
kfree(req_power);
unlock:
@@ -416,7 +416,7 @@
{
int ret;
struct power_allocator_params *params;
- unsigned long switch_on_temp, control_temp;
+ int switch_on_temp, control_temp;
u32 temperature_threshold;
if (!tz->tzp || !tz->tzp->sustainable_power) {
@@ -481,7 +481,7 @@
static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
{
int ret;
- unsigned long switch_on_temp, control_temp, current_temp;
+ int switch_on_temp, control_temp, current_temp;
struct power_allocator_params *params = tz->governor_data;
/*
diff --git a/drivers/thermal/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom-spmi-temp-alarm.c
index c8d27b8..b677aad 100644
--- a/drivers/thermal/qcom-spmi-temp-alarm.c
+++ b/drivers/thermal/qcom-spmi-temp-alarm.c
@@ -117,7 +117,7 @@
return 0;
}
-static int qpnp_tm_get_temp(void *data, long *temp)
+static int qpnp_tm_get_temp(void *data, int *temp)
{
struct qpnp_tm_chip *chip = data;
int ret, mili_celsius;
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index fe4e767..5d4ae7d 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -200,8 +200,7 @@
return ret;
}
-static int rcar_thermal_get_temp(struct thermal_zone_device *zone,
- unsigned long *temp)
+static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
{
struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
@@ -235,7 +234,7 @@
}
static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone,
- int trip, unsigned long *temp)
+ int trip, int *temp)
{
struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
struct device *dev = rcar_priv_to_dev(priv);
@@ -299,7 +298,7 @@
static void rcar_thermal_work(struct work_struct *work)
{
struct rcar_thermal_priv *priv;
- unsigned long cctemp, nctemp;
+ int cctemp, nctemp;
priv = container_of(work, struct rcar_thermal_priv, work.work);
diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index cd8f5f93..c89ffb2 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -64,7 +64,7 @@
void (*control)(void __iomem *reg, bool on);
/* Per-sensor methods */
- int (*get_temp)(int chn, void __iomem *reg, long *temp);
+ int (*get_temp)(int chn, void __iomem *reg, int *temp);
void (*set_tshut_temp)(int chn, void __iomem *reg, long temp);
void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m);
};
@@ -191,7 +191,7 @@
return 0;
}
-static long rk_tsadcv2_code_to_temp(u32 code)
+static int rk_tsadcv2_code_to_temp(u32 code)
{
unsigned int low = 0;
unsigned int high = ARRAY_SIZE(v2_code_table) - 1;
@@ -277,7 +277,7 @@
writel_relaxed(val, regs + TSADCV2_AUTO_CON);
}
-static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, long *temp)
+static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, int *temp)
{
u32 val;
@@ -366,7 +366,7 @@
return IRQ_HANDLED;
}
-static int rockchip_thermal_get_temp(void *_sensor, long *out_temp)
+static int rockchip_thermal_get_temp(void *_sensor, int *out_temp)
{
struct rockchip_thermal_sensor *sensor = _sensor;
struct rockchip_thermal_data *thermal = sensor->thermal;
@@ -374,7 +374,7 @@
int retval;
retval = tsadc->get_temp(sensor->id, thermal->regs, out_temp);
- dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %ld, retval: %d\n",
+ dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %d, retval: %d\n",
sensor->id, *out_temp, retval);
return retval;
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index c96ff10..0bae8cc 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -207,8 +207,7 @@
int (*tmu_initialize)(struct platform_device *pdev);
void (*tmu_control)(struct platform_device *pdev, bool on);
int (*tmu_read)(struct exynos_tmu_data *data);
- void (*tmu_set_emulation)(struct exynos_tmu_data *data,
- unsigned long temp);
+ void (*tmu_set_emulation)(struct exynos_tmu_data *data, int temp);
void (*tmu_clear_irqs)(struct exynos_tmu_data *data);
};
@@ -216,7 +215,7 @@
{
char data[10], *envp[] = { data, NULL };
struct thermal_zone_device *tz = p->tzd;
- unsigned long temp;
+ int temp;
unsigned int i;
if (!tz) {
@@ -517,7 +516,7 @@
struct thermal_zone_device *tz = data->tzd;
unsigned int status, trim_info;
unsigned int rising_threshold = 0, falling_threshold = 0;
- unsigned long temp, temp_hist;
+ int temp, temp_hist;
int ret = 0, threshold_code, i, sensor_id, cal_type;
status = readb(data->base + EXYNOS_TMU_REG_STATUS);
@@ -610,7 +609,7 @@
struct exynos_tmu_data *data = platform_get_drvdata(pdev);
unsigned int trim_info = 0, con, rising_threshold;
int ret = 0, threshold_code;
- unsigned long crit_temp = 0;
+ int crit_temp = 0;
/*
* For exynos5440 soc triminfo value is swapped between TMU0 and
@@ -663,7 +662,7 @@
unsigned int status, trim_info;
unsigned int rising_threshold = 0, falling_threshold = 0;
int ret = 0, threshold_code, i;
- unsigned long temp, temp_hist;
+ int temp, temp_hist;
unsigned int reg_off, bit_off;
status = readb(data->base + EXYNOS_TMU_REG_STATUS);
@@ -876,7 +875,7 @@
writel(con, data->base + EXYNOS_TMU_REG_CONTROL);
}
-static int exynos_get_temp(void *p, long *temp)
+static int exynos_get_temp(void *p, int *temp)
{
struct exynos_tmu_data *data = p;
@@ -896,7 +895,7 @@
#ifdef CONFIG_THERMAL_EMULATION
static u32 get_emul_con_reg(struct exynos_tmu_data *data, unsigned int val,
- unsigned long temp)
+ int temp)
{
if (temp) {
temp /= MCELSIUS;
@@ -926,7 +925,7 @@
}
static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data,
- unsigned long temp)
+ int temp)
{
unsigned int val;
u32 emul_con;
@@ -946,7 +945,7 @@
}
static void exynos5440_tmu_set_emulation(struct exynos_tmu_data *data,
- unsigned long temp)
+ int temp)
{
unsigned int val;
@@ -955,7 +954,7 @@
writel(val, data->base + EXYNOS5440_TMU_S0_7_DEBUG);
}
-static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp)
+static int exynos_tmu_set_emulation(void *drv_data, int temp)
{
struct exynos_tmu_data *data = drv_data;
int ret = -EINVAL;
@@ -978,7 +977,7 @@
#else
#define exynos4412_tmu_set_emulation NULL
#define exynos5440_tmu_set_emulation NULL
-static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp)
+static int exynos_tmu_set_emulation(void *drv_data, int temp)
{ return -EINVAL; }
#endif /* CONFIG_THERMAL_EMULATION */
diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c
index bddb717..534dd91 100644
--- a/drivers/thermal/spear_thermal.c
+++ b/drivers/thermal/spear_thermal.c
@@ -38,7 +38,7 @@
};
static inline int thermal_get_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+ int *temp)
{
struct spear_thermal_dev *stdev = thermal->devdata;
diff --git a/drivers/thermal/st/st_thermal.c b/drivers/thermal/st/st_thermal.c
index 88c759d..be637e6 100644
--- a/drivers/thermal/st/st_thermal.c
+++ b/drivers/thermal/st/st_thermal.c
@@ -111,8 +111,7 @@
}
/* Callback to get temperature from HW*/
-static int st_thermal_get_temp(struct thermal_zone_device *th,
- unsigned long *temperature)
+static int st_thermal_get_temp(struct thermal_zone_device *th, int *temperature)
{
struct st_thermal_sensor *sensor = th->devdata;
struct device *dev = sensor->dev;
@@ -159,7 +158,7 @@
}
static int st_thermal_get_trip_temp(struct thermal_zone_device *th,
- int trip, unsigned long *temp)
+ int trip, int *temp)
{
struct st_thermal_sensor *sensor = th->devdata;
struct device *dev = sensor->dev;
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index 5a0f12d..2f9f708 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -113,7 +113,7 @@
static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
{
- long trip_temp;
+ int trip_temp;
enum thermal_trip_type trip_type;
enum thermal_trend trend;
struct thermal_instance *instance;
@@ -135,7 +135,7 @@
trace_thermal_zone_trip(tz, trip, trip_type);
}
- dev_dbg(&tz->device, "Trip%d[type=%d,temp=%ld]:trend=%d,throttle=%d\n",
+ dev_dbg(&tz->device, "Trip%d[type=%d,temp=%d]:trend=%d,throttle=%d\n",
trip, trip_type, trip_temp, trend, throttle);
mutex_lock(&tz->lock);
diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c
index 9197fc0..74ea576 100644
--- a/drivers/thermal/tegra_soctherm.c
+++ b/drivers/thermal/tegra_soctherm.c
@@ -293,7 +293,7 @@
* H denotes an addition of 0.5 Celsius and N denotes negation
* of the final value.
*/
-static long translate_temp(u16 val)
+static int translate_temp(u16 val)
{
long t;
@@ -306,7 +306,7 @@
return t;
}
-static int tegra_thermctl_get_temp(void *data, long *out_temp)
+static int tegra_thermctl_get_temp(void *data, int *out_temp)
{
struct tegra_thermctl_zone *zone = data;
u32 val;
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 4ca211b..5e5fc70 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -426,7 +426,7 @@
static void handle_critical_trips(struct thermal_zone_device *tz,
int trip, enum thermal_trip_type trip_type)
{
- long trip_temp;
+ int trip_temp;
tz->ops->get_trip_temp(tz, trip, &trip_temp);
@@ -465,7 +465,7 @@
}
/**
- * thermal_zone_get_temp() - returns its the temperature of thermal zone
+ * thermal_zone_get_temp() - returns the temperature of a thermal zone
* @tz: a valid pointer to a struct thermal_zone_device
* @temp: a valid pointer to where to store the resulting temperature.
*
@@ -474,14 +474,12 @@
*
* Return: On success returns 0, an error code otherwise
*/
-int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp)
+int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp)
{
int ret = -EINVAL;
-#ifdef CONFIG_THERMAL_EMULATION
int count;
- unsigned long crit_temp = -1UL;
+ int crit_temp = INT_MAX;
enum thermal_trip_type type;
-#endif
if (!tz || IS_ERR(tz) || !tz->ops->get_temp)
goto exit;
@@ -489,25 +487,26 @@
mutex_lock(&tz->lock);
ret = tz->ops->get_temp(tz, temp);
-#ifdef CONFIG_THERMAL_EMULATION
- if (!tz->emul_temperature)
- goto skip_emul;
- for (count = 0; count < tz->trips; count++) {
- ret = tz->ops->get_trip_type(tz, count, &type);
- if (!ret && type == THERMAL_TRIP_CRITICAL) {
- ret = tz->ops->get_trip_temp(tz, count, &crit_temp);
- break;
+ if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {
+ for (count = 0; count < tz->trips; count++) {
+ ret = tz->ops->get_trip_type(tz, count, &type);
+ if (!ret && type == THERMAL_TRIP_CRITICAL) {
+ ret = tz->ops->get_trip_temp(tz, count,
+ &crit_temp);
+ break;
+ }
}
+
+ /*
+ * Only allow emulating a temperature when the real temperature
+ * is below the critical temperature so that the emulation code
+ * cannot hide critical conditions.
+ */
+ if (!ret && *temp < crit_temp)
+ *temp = tz->emul_temperature;
}
-
- if (ret)
- goto skip_emul;
-
- if (*temp < crit_temp)
- *temp = tz->emul_temperature;
-skip_emul:
-#endif
+
mutex_unlock(&tz->lock);
exit:
return ret;
@@ -516,8 +515,7 @@
static void update_temperature(struct thermal_zone_device *tz)
{
- long temp;
- int ret;
+ int temp, ret;
ret = thermal_zone_get_temp(tz, &temp);
if (ret) {
@@ -577,15 +575,14 @@
temp_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct thermal_zone_device *tz = to_thermal_zone(dev);
- long temperature;
- int ret;
+ int temperature, ret;
ret = thermal_zone_get_temp(tz, &temperature);
if (ret)
return ret;
- return sprintf(buf, "%ld\n", temperature);
+ return sprintf(buf, "%d\n", temperature);
}
static ssize_t
@@ -689,7 +686,7 @@
{
struct thermal_zone_device *tz = to_thermal_zone(dev);
int trip, ret;
- long temperature;
+ int temperature;
if (!tz->ops->get_trip_temp)
return -EPERM;
@@ -702,7 +699,7 @@
if (ret)
return ret;
- return sprintf(buf, "%ld\n", temperature);
+ return sprintf(buf, "%d\n", temperature);
}
static ssize_t
@@ -711,7 +708,7 @@
{
struct thermal_zone_device *tz = to_thermal_zone(dev);
int trip, ret;
- unsigned long temperature;
+ int temperature;
if (!tz->ops->set_trip_hyst)
return -EPERM;
@@ -719,7 +716,7 @@
if (!sscanf(attr->attr.name, "trip_point_%d_hyst", &trip))
return -EINVAL;
- if (kstrtoul(buf, 10, &temperature))
+ if (kstrtoint(buf, 10, &temperature))
return -EINVAL;
/*
@@ -738,7 +735,7 @@
{
struct thermal_zone_device *tz = to_thermal_zone(dev);
int trip, ret;
- unsigned long temperature;
+ int temperature;
if (!tz->ops->get_trip_hyst)
return -EPERM;
@@ -748,7 +745,7 @@
ret = tz->ops->get_trip_hyst(tz, trip, &temperature);
- return ret ? ret : sprintf(buf, "%ld\n", temperature);
+ return ret ? ret : sprintf(buf, "%d\n", temperature);
}
static ssize_t
@@ -847,7 +844,27 @@
return sprintf(buf, "%s\n", tz->governor->name);
}
-#ifdef CONFIG_THERMAL_EMULATION
+static ssize_t
+available_policies_show(struct device *dev, struct device_attribute *devattr,
+ char *buf)
+{
+ struct thermal_governor *pos;
+ ssize_t count = 0;
+ ssize_t size = PAGE_SIZE;
+
+ mutex_lock(&thermal_governor_lock);
+
+ list_for_each_entry(pos, &thermal_governor_list, governor_list) {
+ size = PAGE_SIZE - count;
+ count += scnprintf(buf + count, size, "%s ", pos->name);
+ }
+ count += scnprintf(buf + count, size, "\n");
+
+ mutex_unlock(&thermal_governor_lock);
+
+ return count;
+}
+
static ssize_t
emul_temp_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
@@ -873,7 +890,6 @@
return ret ? ret : count;
}
static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
-#endif/*CONFIG_THERMAL_EMULATION*/
static ssize_t
sustainable_power_show(struct device *dev, struct device_attribute *devattr,
@@ -1032,6 +1048,7 @@
static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show, passive_store);
static DEVICE_ATTR(policy, S_IRUGO | S_IWUSR, policy_show, policy_store);
+static DEVICE_ATTR(available_policies, S_IRUGO, available_policies_show, NULL);
/* sys I/F for cooling device */
#define to_cooling_device(_dev) \
@@ -1803,11 +1820,12 @@
goto unregister;
}
-#ifdef CONFIG_THERMAL_EMULATION
- result = device_create_file(&tz->device, &dev_attr_emul_temp);
- if (result)
- goto unregister;
-#endif
+ if (IS_ENABLED(CONFIG_THERMAL_EMULATION)) {
+ result = device_create_file(&tz->device, &dev_attr_emul_temp);
+ if (result)
+ goto unregister;
+ }
+
/* Create policy attribute */
result = device_create_file(&tz->device, &dev_attr_policy);
if (result)
@@ -1818,6 +1836,11 @@
if (result)
goto unregister;
+ /* Create available_policies attribute */
+ result = device_create_file(&tz->device, &dev_attr_available_policies);
+ if (result)
+ goto unregister;
+
/* Update 'this' zone's governor information */
mutex_lock(&thermal_governor_lock);
@@ -1849,9 +1872,6 @@
INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
- if (!tz->ops->get_temp)
- thermal_zone_device_set_polling(tz, 0);
-
thermal_zone_device_update(tz);
return tz;
@@ -1918,6 +1938,7 @@
if (tz->ops->get_mode)
device_remove_file(&tz->device, &dev_attr_mode);
device_remove_file(&tz->device, &dev_attr_policy);
+ device_remove_file(&tz->device, &dev_attr_available_policies);
remove_trip_attrs(tz);
thermal_set_governor(tz, NULL);
diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c
index 1967bee..06fd2ed9 100644
--- a/drivers/thermal/thermal_hwmon.c
+++ b/drivers/thermal/thermal_hwmon.c
@@ -69,7 +69,7 @@
static ssize_t
temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
{
- long temperature;
+ int temperature;
int ret;
struct thermal_hwmon_attr *hwmon_attr
= container_of(attr, struct thermal_hwmon_attr, attr);
@@ -83,7 +83,7 @@
if (ret)
return ret;
- return sprintf(buf, "%ld\n", temperature);
+ return sprintf(buf, "%d\n", temperature);
}
static ssize_t
@@ -95,14 +95,14 @@
= container_of(hwmon_attr, struct thermal_hwmon_temp,
temp_crit);
struct thermal_zone_device *tz = temp->tz;
- long temperature;
+ int temperature;
int ret;
ret = tz->ops->get_trip_temp(tz, 0, &temperature);
if (ret)
return ret;
- return sprintf(buf, "%ld\n", temperature);
+ return sprintf(buf, "%d\n", temperature);
}
@@ -142,7 +142,7 @@
static bool thermal_zone_crit_temp_valid(struct thermal_zone_device *tz)
{
- unsigned long temp;
+ int temp;
return tz->ops->get_crit_temp && !tz->ops->get_crit_temp(tz, &temp);
}
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index c7c5b37..b213a12 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -76,14 +76,14 @@
/* thermal zone ops */
/* Get temperature callback function for thermal zone */
-static inline int __ti_thermal_get_temp(void *devdata, long *temp)
+static inline int __ti_thermal_get_temp(void *devdata, int *temp)
{
struct thermal_zone_device *pcb_tz = NULL;
struct ti_thermal_data *data = devdata;
struct ti_bandgap *bgp;
const struct ti_temp_sensor *s;
int ret, tmp, slope, constant;
- unsigned long pcb_temp;
+ int pcb_temp;
if (!data)
return 0;
@@ -119,7 +119,7 @@
}
static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+ int *temp)
{
struct ti_thermal_data *data = thermal->devdata;
@@ -229,7 +229,7 @@
/* Get trip temperature callback functions for thermal zone */
static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal,
- int trip, unsigned long *temp)
+ int trip, int *temp)
{
if (!ti_thermal_is_valid_trip(trip))
return -EINVAL;
@@ -280,7 +280,7 @@
/* Get critical temperature callback functions for thermal zone */
static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
- unsigned long *temp)
+ int *temp)
{
/* shutdown zone */
return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index 50d1d2c..7fc919f 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -164,7 +164,7 @@
return err;
}
-static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp)
+static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
{
u32 eax, edx;
struct phy_dev_entry *phy_dev_entry;
@@ -175,7 +175,7 @@
if (eax & 0x80000000) {
*temp = phy_dev_entry->tj_max -
((eax >> 16) & 0x7f) * 1000;
- pr_debug("sys_get_curr_temp %ld\n", *temp);
+ pr_debug("sys_get_curr_temp %d\n", *temp);
return 0;
}
@@ -183,7 +183,7 @@
}
static int sys_get_trip_temp(struct thermal_zone_device *tzd,
- int trip, unsigned long *temp)
+ int trip, int *temp)
{
u32 eax, edx;
struct phy_dev_entry *phy_dev_entry;
@@ -214,13 +214,13 @@
*temp = phy_dev_entry->tj_max - thres_reg_value * 1000;
else
*temp = 0;
- pr_debug("sys_get_trip_temp %ld\n", *temp);
+ pr_debug("sys_get_trip_temp %d\n", *temp);
return 0;
}
static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
- unsigned long temp)
+ int temp)
{
u32 l, h;
struct phy_dev_entry *phy_dev_entry;
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index a9d837f..10beb15 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -200,7 +200,7 @@
{
int r;
uint64_t v = 0;
- unsigned long mfn;
+ unsigned long gfn;
struct xencons_info *info;
if (!xen_hvm_domain())
@@ -217,7 +217,7 @@
}
/*
* If the toolstack (or the hypervisor) hasn't set these values, the
- * default value is 0. Even though mfn = 0 and evtchn = 0 are
+ * default value is 0. Even though gfn = 0 and evtchn = 0 are
* theoretically correct values, in practice they never are and they
* mean that a legacy toolstack hasn't initialized the pv console correctly.
*/
@@ -229,8 +229,8 @@
r = hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v);
if (r < 0 || v == 0)
goto err;
- mfn = v;
- info->intf = xen_remap(mfn << PAGE_SHIFT, PAGE_SIZE);
+ gfn = v;
+ info->intf = xen_remap(gfn << PAGE_SHIFT, PAGE_SIZE);
if (info->intf == NULL)
goto err;
info->vtermno = HVC_COOKIE;
@@ -265,7 +265,8 @@
return 0;
}
info->evtchn = xen_start_info->console.domU.evtchn;
- info->intf = mfn_to_virt(xen_start_info->console.domU.mfn);
+ /* GFN == MFN for PV guest */
+ info->intf = gfn_to_virt(xen_start_info->console.domU.mfn);
info->vtermno = HVC_COOKIE;
spin_lock(&xencons_lock);
@@ -374,7 +375,6 @@
int ret, evtchn, devid, ref, irq;
struct xenbus_transaction xbt;
grant_ref_t gref_head;
- unsigned long mfn;
ret = xenbus_alloc_evtchn(dev, &evtchn);
if (ret)
@@ -389,10 +389,6 @@
irq, &domU_hvc_ops, 256);
if (IS_ERR(info->hvc))
return PTR_ERR(info->hvc);
- if (xen_pv_domain())
- mfn = virt_to_mfn(info->intf);
- else
- mfn = __pa(info->intf) >> PAGE_SHIFT;
ret = gnttab_alloc_grant_references(1, &gref_head);
if (ret < 0)
return ret;
@@ -401,7 +397,7 @@
if (ref < 0)
return ref;
gnttab_grant_foreign_access_ref(ref, info->xbdev->otherend_id,
- mfn, 0);
+ virt_to_gfn(info->intf), 0);
again:
ret = xenbus_transaction_start(&xbt);
diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c
index 4f0cbb5..d3af01c 100644
--- a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c
@@ -1091,7 +1091,7 @@
omapfb_put_mem_region(rg);
}
-static struct vm_operations_struct mmap_user_ops = {
+static const struct vm_operations_struct mmap_user_ops = {
.open = mmap_user_open,
.close = mmap_user_close,
};
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index 09dc447..0567d51 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -46,7 +46,7 @@
int nr_pages;
int irq;
struct xenfb_page *page;
- unsigned long *mfns;
+ unsigned long *gfns;
int update_wanted; /* XENFB_TYPE_UPDATE wanted */
int feature_resize; /* XENFB_TYPE_RESIZE ok */
struct xenfb_resize resize; /* protected by resize_lock */
@@ -402,8 +402,8 @@
info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
- if (!info->mfns)
+ info->gfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
+ if (!info->gfns)
goto error_nomem;
/* set up shared page */
@@ -530,29 +530,29 @@
framebuffer_release(info->fb_info);
}
free_page((unsigned long)info->page);
- vfree(info->mfns);
+ vfree(info->gfns);
vfree(info->fb);
kfree(info);
return 0;
}
-static unsigned long vmalloc_to_mfn(void *address)
+static unsigned long vmalloc_to_gfn(void *address)
{
- return pfn_to_mfn(vmalloc_to_pfn(address));
+ return xen_page_to_gfn(vmalloc_to_page(address));
}
static void xenfb_init_shared_page(struct xenfb_info *info,
struct fb_info *fb_info)
{
int i;
- int epd = PAGE_SIZE / sizeof(info->mfns[0]);
+ int epd = PAGE_SIZE / sizeof(info->gfns[0]);
for (i = 0; i < info->nr_pages; i++)
- info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
+ info->gfns[i] = vmalloc_to_gfn(info->fb + i * PAGE_SIZE);
for (i = 0; i * epd < info->nr_pages; i++)
- info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]);
+ info->page->pd[i] = vmalloc_to_gfn(&info->gfns[i * epd]);
info->page->width = fb_info->var.xres;
info->page->height = fb_info->var.yres;
@@ -586,7 +586,7 @@
goto unbind_irq;
}
ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
- virt_to_mfn(info->page));
+ virt_to_gfn(info->page));
if (ret)
goto error_xenbus;
ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 55c4b5b..c68edc1 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -188,6 +188,15 @@
Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will
reboot your system when the timeout is reached.
+config SAMA5D4_WATCHDOG
+ tristate "Atmel SAMA5D4 Watchdog Timer"
+ depends on ARCH_AT91
+ select WATCHDOG_CORE
+ help
+ Atmel SAMA5D4 watchdog timer is embedded into SAMA5D4 chips.
+ Its Watchdog Timer Mode Register can be written more than once.
+ This will reboot your system when the timeout is reached.
+
config CADENCE_WATCHDOG
tristate "Cadence Watchdog Timer"
depends on HAS_IOMEM
@@ -558,6 +567,17 @@
To compile this driver as a module, choose M here: the
module will be called digicolor_wdt.
+config LPC18XX_WATCHDOG
+ tristate "LPC18xx/43xx Watchdog"
+ depends on ARCH_LPC18XX || COMPILE_TEST
+ select WATCHDOG_CORE
+ help
+ Say Y here if to include support for the watchdog timer
+ in NXP LPC SoCs family, which includes LPC18xx/LPC43xx
+ processors.
+ To compile this driver as a module, choose M here: the
+ module will be called lpc18xx_wdt.
+
# AVR32 Architecture
config AT32AP700X_WDT
@@ -1334,7 +1354,7 @@
config 8xxx_WDT
tristate "MPC8xxx Platform Watchdog Timer"
- depends on PPC_8xx || PPC_83xx || PPC_86xx
+ depends on PPC_8xx || PPC_83xx || PPC_86xx || PPC_MPC512x
select WATCHDOG_CORE
help
This driver is for a SoC level watchdog that exists on some
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 59ea9a1..0c616e3 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -41,6 +41,7 @@
obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o
obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o
obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o
+obj-$(CONFIG_SAMA5D4_WATCHDOG) += sama5d4_wdt.o
obj-$(CONFIG_DW_WATCHDOG) += dw_wdt.o
obj-$(CONFIG_EP93XX_WATCHDOG) += ep93xx_wdt.o
obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o
@@ -66,6 +67,7 @@
obj-$(CONFIG_MESON_WATCHDOG) += meson_wdt.o
obj-$(CONFIG_MEDIATEK_WATCHDOG) += mtk_wdt.o
obj-$(CONFIG_DIGICOLOR_WATCHDOG) += digicolor_wdt.o
+obj-$(CONFIG_LPC18XX_WATCHDOG) += lpc18xx_wdt.o
# AVR32 Architecture
obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c
index 9ba1153..e12a797 100644
--- a/drivers/watchdog/at91rm9200_wdt.c
+++ b/drivers/watchdog/at91rm9200_wdt.c
@@ -244,7 +244,7 @@
}
regmap_st = syscon_node_to_regmap(parent->of_node);
- if (!regmap_st)
+ if (IS_ERR(regmap_st))
return -ENODEV;
res = misc_register(&at91wdt_miscdev);
diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index e4698f7..7e6acaf 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -17,6 +17,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/clk.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -90,6 +91,7 @@
unsigned long heartbeat; /* WDT heartbeat in jiffies */
bool nowayout;
unsigned int irq;
+ struct clk *sclk;
};
/* ......................................................................... */
@@ -352,15 +354,25 @@
if (IS_ERR(wdt->base))
return PTR_ERR(wdt->base);
+ wdt->sclk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(wdt->sclk))
+ return PTR_ERR(wdt->sclk);
+
+ err = clk_prepare_enable(wdt->sclk);
+ if (err) {
+ dev_err(&pdev->dev, "Could not enable slow clock\n");
+ return err;
+ }
+
if (pdev->dev.of_node) {
err = of_at91wdt_init(pdev->dev.of_node, wdt);
if (err)
- return err;
+ goto err_clk;
}
err = at91_wdt_init(pdev, wdt);
if (err)
- return err;
+ goto err_clk;
platform_set_drvdata(pdev, wdt);
@@ -368,6 +380,11 @@
wdt->wdd.timeout, wdt->nowayout);
return 0;
+
+err_clk:
+ clk_disable_unprepare(wdt->sclk);
+
+ return err;
}
static int __exit at91wdt_remove(struct platform_device *pdev)
@@ -377,6 +394,7 @@
pr_warn("I quit now, hardware will probably reboot!\n");
del_timer(&wdt->timer);
+ clk_disable_unprepare(wdt->sclk);
return 0;
}
diff --git a/drivers/watchdog/at91sam9_wdt.h b/drivers/watchdog/at91sam9_wdt.h
index c6fbb2e6..b79a83b 100644
--- a/drivers/watchdog/at91sam9_wdt.h
+++ b/drivers/watchdog/at91sam9_wdt.h
@@ -22,11 +22,13 @@
#define AT91_WDT_MR 0x04 /* Watchdog Mode Register */
#define AT91_WDT_WDV (0xfff << 0) /* Counter Value */
+#define AT91_WDT_SET_WDV(x) ((x) & AT91_WDT_WDV)
#define AT91_WDT_WDFIEN (1 << 12) /* Fault Interrupt Enable */
#define AT91_WDT_WDRSTEN (1 << 13) /* Reset Processor */
#define AT91_WDT_WDRPROC (1 << 14) /* Timer Restart */
#define AT91_WDT_WDDIS (1 << 15) /* Watchdog Disable */
#define AT91_WDT_WDD (0xfff << 16) /* Delta Value */
+#define AT91_WDT_SET_WDD(x) (((x) << 16) & AT91_WDT_WDD)
#define AT91_WDT_WDDBGHLT (1 << 28) /* Debug Halt */
#define AT91_WDT_WDIDLEHLT (1 << 29) /* Idle Halt */
diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c
index 7116968..66c3e65 100644
--- a/drivers/watchdog/bcm2835_wdt.c
+++ b/drivers/watchdog/bcm2835_wdt.c
@@ -182,6 +182,7 @@
watchdog_set_drvdata(&bcm2835_wdt_wdd, wdt);
watchdog_init_timeout(&bcm2835_wdt_wdd, heartbeat, dev);
watchdog_set_nowayout(&bcm2835_wdt_wdd, nowayout);
+ bcm2835_wdt_wdd.parent = &pdev->dev;
err = watchdog_register_device(&bcm2835_wdt_wdd);
if (err) {
dev_err(dev, "Failed to register watchdog device");
diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c
index b28a072..4064a43 100644
--- a/drivers/watchdog/bcm47xx_wdt.c
+++ b/drivers/watchdog/bcm47xx_wdt.c
@@ -209,6 +209,7 @@
wdt->wdd.info = &bcm47xx_wdt_info;
wdt->wdd.timeout = WDT_DEFAULT_TIME;
+ wdt->wdd.parent = &pdev->dev;
ret = wdt->wdd.ops->set_timeout(&wdt->wdd, timeout);
if (ret)
goto err_timer;
diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c
index 22d8ae6..e0c9842 100644
--- a/drivers/watchdog/bcm_kona_wdt.c
+++ b/drivers/watchdog/bcm_kona_wdt.c
@@ -319,6 +319,7 @@
spin_lock_init(&wdt->lock);
platform_set_drvdata(pdev, wdt);
watchdog_set_drvdata(&bcm_kona_wdt_wdd, wdt);
+ bcm_kona_wdt_wdd.parent = &pdev->dev;
ret = bcm_kona_wdt_set_timeout_reg(&bcm_kona_wdt_wdd, 0);
if (ret) {
diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
index e96b09b..04da4b6 100644
--- a/drivers/watchdog/booke_wdt.c
+++ b/drivers/watchdog/booke_wdt.c
@@ -186,8 +186,6 @@
static int booke_wdt_set_timeout(struct watchdog_device *wdt_dev,
unsigned int timeout)
{
- if (timeout > MAX_WDT_TIMEOUT)
- return -EINVAL;
wdt_dev->timeout = timeout;
booke_wdt_set(wdt_dev);
@@ -211,7 +209,6 @@
.info = &booke_wdt_info,
.ops = &booke_wdt_ops,
.min_timeout = 1,
- .max_timeout = 0xFFFF
};
static void __exit booke_wdt_exit(void)
@@ -229,6 +226,7 @@
booke_wdt_set_timeout(&booke_wdt_dev,
period_to_sec(booke_wdt_period));
watchdog_set_nowayout(&booke_wdt_dev, nowayout);
+ booke_wdt_dev.max_timeout = MAX_WDT_TIMEOUT;
if (booke_wdt_enabled)
booke_wdt_start(&booke_wdt_dev);
diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c
index ce12f43..a099b77 100644
--- a/drivers/watchdog/coh901327_wdt.c
+++ b/drivers/watchdog/coh901327_wdt.c
@@ -358,6 +358,7 @@
if (ret < 0)
coh901327_wdt.timeout = 60;
+ coh901327_wdt.parent = &pdev->dev;
ret = watchdog_register_device(&coh901327_wdt);
if (ret == 0)
dev_info(&pdev->dev,
diff --git a/drivers/watchdog/da9052_wdt.c b/drivers/watchdog/da9052_wdt.c
index 2e95896..67e6797 100644
--- a/drivers/watchdog/da9052_wdt.c
+++ b/drivers/watchdog/da9052_wdt.c
@@ -195,6 +195,7 @@
da9052_wdt->timeout = DA9052_DEF_TIMEOUT;
da9052_wdt->info = &da9052_wdt_info;
da9052_wdt->ops = &da9052_wdt_ops;
+ da9052_wdt->parent = &pdev->dev;
watchdog_set_drvdata(da9052_wdt, driver_data);
kref_init(&driver_data->kref);
diff --git a/drivers/watchdog/da9055_wdt.c b/drivers/watchdog/da9055_wdt.c
index 495089d..04d1430 100644
--- a/drivers/watchdog/da9055_wdt.c
+++ b/drivers/watchdog/da9055_wdt.c
@@ -161,6 +161,7 @@
da9055_wdt->timeout = DA9055_DEF_TIMEOUT;
da9055_wdt->info = &da9055_wdt_info;
da9055_wdt->ops = &da9055_wdt_ops;
+ da9055_wdt->parent = &pdev->dev;
watchdog_set_nowayout(da9055_wdt, nowayout);
watchdog_set_drvdata(da9055_wdt, driver_data);
diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c
index b3a870c..7386111 100644
--- a/drivers/watchdog/da9062_wdt.c
+++ b/drivers/watchdog/da9062_wdt.c
@@ -210,6 +210,7 @@
wdt->wdtdev.max_timeout = DA9062_WDT_MAX_TIMEOUT;
wdt->wdtdev.timeout = DA9062_WDG_DEFAULT_TIMEOUT;
wdt->wdtdev.status = WATCHDOG_NOWAYOUT_INIT_STATUS;
+ wdt->wdtdev.parent = &pdev->dev;
watchdog_set_drvdata(&wdt->wdtdev, wdt);
dev_set_drvdata(&pdev->dev, wdt);
diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c
index e2fe2eb..6bf130b 100644
--- a/drivers/watchdog/da9063_wdt.c
+++ b/drivers/watchdog/da9063_wdt.c
@@ -175,6 +175,7 @@
wdt->wdtdev.min_timeout = DA9063_WDT_MIN_TIMEOUT;
wdt->wdtdev.max_timeout = DA9063_WDT_MAX_TIMEOUT;
wdt->wdtdev.timeout = DA9063_WDG_TIMEOUT;
+ wdt->wdtdev.parent = &pdev->dev;
wdt->wdtdev.status = WATCHDOG_NOWAYOUT_INIT_STATUS;
diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index cfdf8a4..17454ca 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -179,6 +179,7 @@
wdd->min_timeout = 1;
wdd->max_timeout = MAX_HEARTBEAT;
wdd->timeout = DEFAULT_HEARTBEAT;
+ wdd->parent = &pdev->dev;
watchdog_init_timeout(wdd, heartbeat, dev);
diff --git a/drivers/watchdog/digicolor_wdt.c b/drivers/watchdog/digicolor_wdt.c
index 31d8e49..50abe1b 100644
--- a/drivers/watchdog/digicolor_wdt.c
+++ b/drivers/watchdog/digicolor_wdt.c
@@ -143,6 +143,7 @@
}
dc_wdt_wdd.max_timeout = U32_MAX / clk_get_rate(wdt->clk);
dc_wdt_wdd.timeout = dc_wdt_wdd.max_timeout;
+ dc_wdt_wdd.parent = &pdev->dev;
spin_lock_init(&wdt->lock);
diff --git a/drivers/watchdog/ep93xx_wdt.c b/drivers/watchdog/ep93xx_wdt.c
index 7a2cc71..0a4d7cc 100644
--- a/drivers/watchdog/ep93xx_wdt.c
+++ b/drivers/watchdog/ep93xx_wdt.c
@@ -132,6 +132,7 @@
val = readl(mmio_base + EP93XX_WATCHDOG);
ep93xx_wdt_wdd.bootstatus = (val & 0x01) ? WDIOF_CARDRESET : 0;
ep93xx_wdt_wdd.timeout = timeout;
+ ep93xx_wdt_wdd.parent = &pdev->dev;
watchdog_set_nowayout(&ep93xx_wdt_wdd, nowayout);
diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c
index 1687cc2..90d59d3 100644
--- a/drivers/watchdog/gpio_wdt.c
+++ b/drivers/watchdog/gpio_wdt.c
@@ -50,12 +50,41 @@
gpio_direction_input(priv->gpio);
}
+static void gpio_wdt_hwping(unsigned long data)
+{
+ struct watchdog_device *wdd = (struct watchdog_device *)data;
+ struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
+
+ if (priv->armed && time_after(jiffies, priv->last_jiffies +
+ msecs_to_jiffies(wdd->timeout * 1000))) {
+ dev_crit(wdd->dev, "Timer expired. System will reboot soon!\n");
+ return;
+ }
+
+ /* Restart timer */
+ mod_timer(&priv->timer, jiffies + priv->hw_margin);
+
+ switch (priv->hw_algo) {
+ case HW_ALGO_TOGGLE:
+ /* Toggle output pin */
+ priv->state = !priv->state;
+ gpio_set_value_cansleep(priv->gpio, priv->state);
+ break;
+ case HW_ALGO_LEVEL:
+ /* Pulse */
+ gpio_set_value_cansleep(priv->gpio, !priv->active_low);
+ udelay(1);
+ gpio_set_value_cansleep(priv->gpio, priv->active_low);
+ break;
+ }
+}
+
static void gpio_wdt_start_impl(struct gpio_wdt_priv *priv)
{
priv->state = priv->active_low;
gpio_direction_output(priv->gpio, priv->state);
priv->last_jiffies = jiffies;
- mod_timer(&priv->timer, priv->last_jiffies + priv->hw_margin);
+ gpio_wdt_hwping((unsigned long)&priv->wdd);
}
static int gpio_wdt_start(struct watchdog_device *wdd)
@@ -97,35 +126,6 @@
return gpio_wdt_ping(wdd);
}
-static void gpio_wdt_hwping(unsigned long data)
-{
- struct watchdog_device *wdd = (struct watchdog_device *)data;
- struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
-
- if (priv->armed && time_after(jiffies, priv->last_jiffies +
- msecs_to_jiffies(wdd->timeout * 1000))) {
- dev_crit(wdd->dev, "Timer expired. System will reboot soon!\n");
- return;
- }
-
- /* Restart timer */
- mod_timer(&priv->timer, jiffies + priv->hw_margin);
-
- switch (priv->hw_algo) {
- case HW_ALGO_TOGGLE:
- /* Toggle output pin */
- priv->state = !priv->state;
- gpio_set_value_cansleep(priv->gpio, priv->state);
- break;
- case HW_ALGO_LEVEL:
- /* Pulse */
- gpio_set_value_cansleep(priv->gpio, !priv->active_low);
- udelay(1);
- gpio_set_value_cansleep(priv->gpio, priv->active_low);
- break;
- }
-}
-
static int gpio_wdt_notify_sys(struct notifier_block *nb, unsigned long code,
void *unused)
{
@@ -182,10 +182,10 @@
ret = of_property_read_string(pdev->dev.of_node, "hw_algo", &algo);
if (ret)
return ret;
- if (!strncmp(algo, "toggle", 6)) {
+ if (!strcmp(algo, "toggle")) {
priv->hw_algo = HW_ALGO_TOGGLE;
f = GPIOF_IN;
- } else if (!strncmp(algo, "level", 5)) {
+ } else if (!strcmp(algo, "level")) {
priv->hw_algo = HW_ALGO_LEVEL;
f = priv->active_low ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW;
} else {
@@ -217,6 +217,7 @@
priv->wdd.ops = &gpio_wdt_ops;
priv->wdd.min_timeout = SOFT_TIMEOUT_MIN;
priv->wdd.max_timeout = SOFT_TIMEOUT_MAX;
+ priv->wdd.parent = &pdev->dev;
if (watchdog_init_timeout(&priv->wdd, 0, &pdev->dev) < 0)
priv->wdd.timeout = SOFT_TIMEOUT_DEF;
diff --git a/drivers/watchdog/ie6xx_wdt.c b/drivers/watchdog/ie6xx_wdt.c
index 9bc39ae..78c2541 100644
--- a/drivers/watchdog/ie6xx_wdt.c
+++ b/drivers/watchdog/ie6xx_wdt.c
@@ -267,6 +267,7 @@
ie6xx_wdt_dev.timeout = timeout;
watchdog_set_nowayout(&ie6xx_wdt_dev, nowayout);
+ ie6xx_wdt_dev.parent = &pdev->dev;
spin_lock_init(&ie6xx_wdt_data.unlock_sequence);
diff --git a/drivers/watchdog/imgpdc_wdt.c b/drivers/watchdog/imgpdc_wdt.c
index 0f73621..15ab072 100644
--- a/drivers/watchdog/imgpdc_wdt.c
+++ b/drivers/watchdog/imgpdc_wdt.c
@@ -316,6 +316,7 @@
{
struct pdc_wdt_dev *pdc_wdt = platform_get_drvdata(pdev);
+ unregister_restart_handler(&pdc_wdt->restart_handler);
pdc_wdt_stop(&pdc_wdt->wdt_dev);
watchdog_unregister_device(&pdc_wdt->wdt_dev);
clk_disable_unprepare(pdc_wdt->wdt_clk);
diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
index 84f6701c..0a436b5 100644
--- a/drivers/watchdog/intel-mid_wdt.c
+++ b/drivers/watchdog/intel-mid_wdt.c
@@ -137,6 +137,7 @@
wdt_dev->min_timeout = MID_WDT_TIMEOUT_MIN;
wdt_dev->max_timeout = MID_WDT_TIMEOUT_MAX;
wdt_dev->timeout = MID_WDT_DEFAULT_TIMEOUT;
+ wdt_dev->parent = &pdev->dev;
watchdog_set_drvdata(wdt_dev, &pdev->dev);
platform_set_drvdata(pdev, wdt_dev);
diff --git a/drivers/watchdog/jz4740_wdt.c b/drivers/watchdog/jz4740_wdt.c
index 4c2cc09..6a7d5c3 100644
--- a/drivers/watchdog/jz4740_wdt.c
+++ b/drivers/watchdog/jz4740_wdt.c
@@ -174,6 +174,7 @@
jz4740_wdt->timeout = heartbeat;
jz4740_wdt->min_timeout = 1;
jz4740_wdt->max_timeout = MAX_HEARTBEAT;
+ jz4740_wdt->parent = &pdev->dev;
watchdog_set_nowayout(jz4740_wdt, nowayout);
watchdog_set_drvdata(jz4740_wdt, drvdata);
diff --git a/drivers/watchdog/lpc18xx_wdt.c b/drivers/watchdog/lpc18xx_wdt.c
new file mode 100644
index 0000000..ab7b8b1
--- /dev/null
+++ b/drivers/watchdog/lpc18xx_wdt.c
@@ -0,0 +1,340 @@
+/*
+ * NXP LPC18xx Watchdog Timer (WDT)
+ *
+ * Copyright (c) 2015 Ariel D'Alessandro <ariel@vanguardiasur.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * Notes
+ * -----
+ * The Watchdog consists of a fixed divide-by-4 clock pre-scaler and a 24-bit
+ * counter which decrements on every clock cycle.
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/watchdog.h>
+
+/* Registers */
+#define LPC18XX_WDT_MOD 0x00
+#define LPC18XX_WDT_MOD_WDEN BIT(0)
+#define LPC18XX_WDT_MOD_WDRESET BIT(1)
+
+#define LPC18XX_WDT_TC 0x04
+#define LPC18XX_WDT_TC_MIN 0xff
+#define LPC18XX_WDT_TC_MAX 0xffffff
+
+#define LPC18XX_WDT_FEED 0x08
+#define LPC18XX_WDT_FEED_MAGIC1 0xaa
+#define LPC18XX_WDT_FEED_MAGIC2 0x55
+
+#define LPC18XX_WDT_TV 0x0c
+
+/* Clock pre-scaler */
+#define LPC18XX_WDT_CLK_DIV 4
+
+/* Timeout values in seconds */
+#define LPC18XX_WDT_DEF_TIMEOUT 30U
+
+static int heartbeat;
+module_param(heartbeat, int, 0);
+MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds (default="
+ __MODULE_STRING(LPC18XX_WDT_DEF_TIMEOUT) ")");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+struct lpc18xx_wdt_dev {
+ struct watchdog_device wdt_dev;
+ struct clk *reg_clk;
+ struct clk *wdt_clk;
+ unsigned long clk_rate;
+ void __iomem *base;
+ struct timer_list timer;
+ struct notifier_block restart_handler;
+ spinlock_t lock;
+};
+
+static int lpc18xx_wdt_feed(struct watchdog_device *wdt_dev)
+{
+ struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev);
+ unsigned long flags;
+
+ /*
+ * An abort condition will occur if an interrupt happens during the feed
+ * sequence.
+ */
+ spin_lock_irqsave(&lpc18xx_wdt->lock, flags);
+ writel(LPC18XX_WDT_FEED_MAGIC1, lpc18xx_wdt->base + LPC18XX_WDT_FEED);
+ writel(LPC18XX_WDT_FEED_MAGIC2, lpc18xx_wdt->base + LPC18XX_WDT_FEED);
+ spin_unlock_irqrestore(&lpc18xx_wdt->lock, flags);
+
+ return 0;
+}
+
+static void lpc18xx_wdt_timer_feed(unsigned long data)
+{
+ struct watchdog_device *wdt_dev = (struct watchdog_device *)data;
+ struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev);
+
+ lpc18xx_wdt_feed(wdt_dev);
+
+ /* Use safe value (1/2 of real timeout) */
+ mod_timer(&lpc18xx_wdt->timer, jiffies +
+ msecs_to_jiffies((wdt_dev->timeout * MSEC_PER_SEC) / 2));
+}
+
+/*
+ * Since LPC18xx Watchdog cannot be disabled in hardware, we must keep feeding
+ * it with a timer until userspace watchdog software takes over.
+ */
+static int lpc18xx_wdt_stop(struct watchdog_device *wdt_dev)
+{
+ lpc18xx_wdt_timer_feed((unsigned long)wdt_dev);
+
+ return 0;
+}
+
+static void __lpc18xx_wdt_set_timeout(struct lpc18xx_wdt_dev *lpc18xx_wdt)
+{
+ unsigned int val;
+
+ val = DIV_ROUND_UP(lpc18xx_wdt->wdt_dev.timeout * lpc18xx_wdt->clk_rate,
+ LPC18XX_WDT_CLK_DIV);
+ writel(val, lpc18xx_wdt->base + LPC18XX_WDT_TC);
+}
+
+static int lpc18xx_wdt_set_timeout(struct watchdog_device *wdt_dev,
+ unsigned int new_timeout)
+{
+ struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev);
+
+ lpc18xx_wdt->wdt_dev.timeout = new_timeout;
+ __lpc18xx_wdt_set_timeout(lpc18xx_wdt);
+
+ return 0;
+}
+
+static unsigned int lpc18xx_wdt_get_timeleft(struct watchdog_device *wdt_dev)
+{
+ struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev);
+ unsigned int val;
+
+ val = readl(lpc18xx_wdt->base + LPC18XX_WDT_TV);
+ return (val * LPC18XX_WDT_CLK_DIV) / lpc18xx_wdt->clk_rate;
+}
+
+static int lpc18xx_wdt_start(struct watchdog_device *wdt_dev)
+{
+ struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev);
+ unsigned int val;
+
+ if (timer_pending(&lpc18xx_wdt->timer))
+ del_timer(&lpc18xx_wdt->timer);
+
+ val = readl(lpc18xx_wdt->base + LPC18XX_WDT_MOD);
+ val |= LPC18XX_WDT_MOD_WDEN;
+ val |= LPC18XX_WDT_MOD_WDRESET;
+ writel(val, lpc18xx_wdt->base + LPC18XX_WDT_MOD);
+
+ /*
+ * Setting the WDEN bit in the WDMOD register is not sufficient to
+ * enable the Watchdog. A valid feed sequence must be completed after
+ * setting WDEN before the Watchdog is capable of generating a reset.
+ */
+ lpc18xx_wdt_feed(wdt_dev);
+
+ return 0;
+}
+
+static struct watchdog_info lpc18xx_wdt_info = {
+ .identity = "NXP LPC18xx Watchdog",
+ .options = WDIOF_SETTIMEOUT |
+ WDIOF_KEEPALIVEPING |
+ WDIOF_MAGICCLOSE,
+};
+
+static const struct watchdog_ops lpc18xx_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = lpc18xx_wdt_start,
+ .stop = lpc18xx_wdt_stop,
+ .ping = lpc18xx_wdt_feed,
+ .set_timeout = lpc18xx_wdt_set_timeout,
+ .get_timeleft = lpc18xx_wdt_get_timeleft,
+};
+
+static int lpc18xx_wdt_restart(struct notifier_block *this, unsigned long mode,
+ void *cmd)
+{
+ struct lpc18xx_wdt_dev *lpc18xx_wdt = container_of(this,
+ struct lpc18xx_wdt_dev, restart_handler);
+ unsigned long flags;
+ int val;
+
+ /*
+ * Incorrect feed sequence causes immediate watchdog reset if enabled.
+ */
+ spin_lock_irqsave(&lpc18xx_wdt->lock, flags);
+
+ val = readl(lpc18xx_wdt->base + LPC18XX_WDT_MOD);
+ val |= LPC18XX_WDT_MOD_WDEN;
+ val |= LPC18XX_WDT_MOD_WDRESET;
+ writel(val, lpc18xx_wdt->base + LPC18XX_WDT_MOD);
+
+ writel(LPC18XX_WDT_FEED_MAGIC1, lpc18xx_wdt->base + LPC18XX_WDT_FEED);
+ writel(LPC18XX_WDT_FEED_MAGIC2, lpc18xx_wdt->base + LPC18XX_WDT_FEED);
+
+ writel(LPC18XX_WDT_FEED_MAGIC1, lpc18xx_wdt->base + LPC18XX_WDT_FEED);
+ writel(LPC18XX_WDT_FEED_MAGIC1, lpc18xx_wdt->base + LPC18XX_WDT_FEED);
+
+ spin_unlock_irqrestore(&lpc18xx_wdt->lock, flags);
+
+ return NOTIFY_OK;
+}
+
+static int lpc18xx_wdt_probe(struct platform_device *pdev)
+{
+ struct lpc18xx_wdt_dev *lpc18xx_wdt;
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ int ret;
+
+ lpc18xx_wdt = devm_kzalloc(dev, sizeof(*lpc18xx_wdt), GFP_KERNEL);
+ if (!lpc18xx_wdt)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ lpc18xx_wdt->base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(lpc18xx_wdt->base))
+ return PTR_ERR(lpc18xx_wdt->base);
+
+ lpc18xx_wdt->reg_clk = devm_clk_get(dev, "reg");
+ if (IS_ERR(lpc18xx_wdt->reg_clk)) {
+ dev_err(dev, "failed to get the reg clock\n");
+ return PTR_ERR(lpc18xx_wdt->reg_clk);
+ }
+
+ lpc18xx_wdt->wdt_clk = devm_clk_get(dev, "wdtclk");
+ if (IS_ERR(lpc18xx_wdt->wdt_clk)) {
+ dev_err(dev, "failed to get the wdt clock\n");
+ return PTR_ERR(lpc18xx_wdt->wdt_clk);
+ }
+
+ ret = clk_prepare_enable(lpc18xx_wdt->reg_clk);
+ if (ret) {
+ dev_err(dev, "could not prepare or enable sys clock\n");
+ return ret;
+ }
+
+ ret = clk_prepare_enable(lpc18xx_wdt->wdt_clk);
+ if (ret) {
+ dev_err(dev, "could not prepare or enable wdt clock\n");
+ goto disable_reg_clk;
+ }
+
+ /* We use the clock rate to calculate timeouts */
+ lpc18xx_wdt->clk_rate = clk_get_rate(lpc18xx_wdt->wdt_clk);
+ if (lpc18xx_wdt->clk_rate == 0) {
+ dev_err(dev, "failed to get clock rate\n");
+ ret = -EINVAL;
+ goto disable_wdt_clk;
+ }
+
+ lpc18xx_wdt->wdt_dev.info = &lpc18xx_wdt_info;
+ lpc18xx_wdt->wdt_dev.ops = &lpc18xx_wdt_ops;
+
+ lpc18xx_wdt->wdt_dev.min_timeout = DIV_ROUND_UP(LPC18XX_WDT_TC_MIN *
+ LPC18XX_WDT_CLK_DIV, lpc18xx_wdt->clk_rate);
+
+ lpc18xx_wdt->wdt_dev.max_timeout = (LPC18XX_WDT_TC_MAX *
+ LPC18XX_WDT_CLK_DIV) / lpc18xx_wdt->clk_rate;
+
+ lpc18xx_wdt->wdt_dev.timeout = min(lpc18xx_wdt->wdt_dev.max_timeout,
+ LPC18XX_WDT_DEF_TIMEOUT);
+
+ spin_lock_init(&lpc18xx_wdt->lock);
+
+ lpc18xx_wdt->wdt_dev.parent = dev;
+ watchdog_set_drvdata(&lpc18xx_wdt->wdt_dev, lpc18xx_wdt);
+
+ ret = watchdog_init_timeout(&lpc18xx_wdt->wdt_dev, heartbeat, dev);
+
+ __lpc18xx_wdt_set_timeout(lpc18xx_wdt);
+
+ setup_timer(&lpc18xx_wdt->timer, lpc18xx_wdt_timer_feed,
+ (unsigned long)&lpc18xx_wdt->wdt_dev);
+
+ watchdog_set_nowayout(&lpc18xx_wdt->wdt_dev, nowayout);
+
+ platform_set_drvdata(pdev, lpc18xx_wdt);
+
+ ret = watchdog_register_device(&lpc18xx_wdt->wdt_dev);
+ if (ret)
+ goto disable_wdt_clk;
+
+ lpc18xx_wdt->restart_handler.notifier_call = lpc18xx_wdt_restart;
+ lpc18xx_wdt->restart_handler.priority = 128;
+ ret = register_restart_handler(&lpc18xx_wdt->restart_handler);
+ if (ret)
+ dev_warn(dev, "failed to register restart handler: %d\n", ret);
+
+ return 0;
+
+disable_wdt_clk:
+ clk_disable_unprepare(lpc18xx_wdt->wdt_clk);
+disable_reg_clk:
+ clk_disable_unprepare(lpc18xx_wdt->reg_clk);
+ return ret;
+}
+
+static void lpc18xx_wdt_shutdown(struct platform_device *pdev)
+{
+ struct lpc18xx_wdt_dev *lpc18xx_wdt = platform_get_drvdata(pdev);
+
+ lpc18xx_wdt_stop(&lpc18xx_wdt->wdt_dev);
+}
+
+static int lpc18xx_wdt_remove(struct platform_device *pdev)
+{
+ struct lpc18xx_wdt_dev *lpc18xx_wdt = platform_get_drvdata(pdev);
+
+ unregister_restart_handler(&lpc18xx_wdt->restart_handler);
+
+ dev_warn(&pdev->dev, "I quit now, hardware will probably reboot!\n");
+ del_timer(&lpc18xx_wdt->timer);
+
+ watchdog_unregister_device(&lpc18xx_wdt->wdt_dev);
+ clk_disable_unprepare(lpc18xx_wdt->wdt_clk);
+ clk_disable_unprepare(lpc18xx_wdt->reg_clk);
+
+ return 0;
+}
+
+static const struct of_device_id lpc18xx_wdt_match[] = {
+ { .compatible = "nxp,lpc1850-wwdt" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, lpc18xx_wdt_match);
+
+static struct platform_driver lpc18xx_wdt_driver = {
+ .driver = {
+ .name = "lpc18xx-wdt",
+ .of_match_table = lpc18xx_wdt_match,
+ },
+ .probe = lpc18xx_wdt_probe,
+ .remove = lpc18xx_wdt_remove,
+ .shutdown = lpc18xx_wdt_shutdown,
+};
+module_platform_driver(lpc18xx_wdt_driver);
+
+MODULE_AUTHOR("Ariel D'Alessandro <ariel@vanguardiasur.com.ar>");
+MODULE_DESCRIPTION("NXP LPC18xx Watchdog Timer Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/mena21_wdt.c b/drivers/watchdog/mena21_wdt.c
index d193a5e..6901300 100644
--- a/drivers/watchdog/mena21_wdt.c
+++ b/drivers/watchdog/mena21_wdt.c
@@ -197,6 +197,7 @@
watchdog_init_timeout(&a21_wdt, 30, &pdev->dev);
watchdog_set_nowayout(&a21_wdt, nowayout);
watchdog_set_drvdata(&a21_wdt, drv);
+ a21_wdt.parent = &pdev->dev;
reset = a21_wdt_get_bootstatus(drv);
if (reset == 2)
diff --git a/drivers/watchdog/menf21bmc_wdt.c b/drivers/watchdog/menf21bmc_wdt.c
index 59f0913..3aefdde 100644
--- a/drivers/watchdog/menf21bmc_wdt.c
+++ b/drivers/watchdog/menf21bmc_wdt.c
@@ -130,6 +130,7 @@
drv_data->wdt.info = &menf21bmc_wdt_info;
drv_data->wdt.min_timeout = BMC_WD_TIMEOUT_MIN;
drv_data->wdt.max_timeout = BMC_WD_TIMEOUT_MAX;
+ drv_data->wdt.parent = &pdev->dev;
drv_data->i2c_client = i2c_client;
/*
diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
index 689381a..5f2273a 100644
--- a/drivers/watchdog/mpc8xxx_wdt.c
+++ b/drivers/watchdog/mpc8xxx_wdt.c
@@ -50,8 +50,12 @@
bool hw_enabled;
};
-static struct mpc8xxx_wdt __iomem *wd_base;
-static int mpc8xxx_wdt_init_late(void);
+struct mpc8xxx_wdt_ddata {
+ struct mpc8xxx_wdt __iomem *base;
+ struct watchdog_device wdd;
+ struct timer_list timer;
+ spinlock_t lock;
+};
static u16 timeout = 0xffff;
module_param(timeout, ushort, 0);
@@ -68,65 +72,59 @@
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-/*
- * We always prescale, but if someone really doesn't want to they can set this
- * to 0
- */
-static int prescale = 1;
-
-static DEFINE_SPINLOCK(wdt_spinlock);
-
-static void mpc8xxx_wdt_keepalive(void)
+static void mpc8xxx_wdt_keepalive(struct mpc8xxx_wdt_ddata *ddata)
{
/* Ping the WDT */
- spin_lock(&wdt_spinlock);
- out_be16(&wd_base->swsrr, 0x556c);
- out_be16(&wd_base->swsrr, 0xaa39);
- spin_unlock(&wdt_spinlock);
+ spin_lock(&ddata->lock);
+ out_be16(&ddata->base->swsrr, 0x556c);
+ out_be16(&ddata->base->swsrr, 0xaa39);
+ spin_unlock(&ddata->lock);
}
-static struct watchdog_device mpc8xxx_wdt_dev;
-static void mpc8xxx_wdt_timer_ping(unsigned long arg);
-static DEFINE_TIMER(wdt_timer, mpc8xxx_wdt_timer_ping, 0,
- (unsigned long)&mpc8xxx_wdt_dev);
-
static void mpc8xxx_wdt_timer_ping(unsigned long arg)
{
- struct watchdog_device *w = (struct watchdog_device *)arg;
+ struct mpc8xxx_wdt_ddata *ddata = (void *)arg;
- mpc8xxx_wdt_keepalive();
+ mpc8xxx_wdt_keepalive(ddata);
/* We're pinging it twice faster than needed, just to be sure. */
- mod_timer(&wdt_timer, jiffies + HZ * w->timeout / 2);
+ mod_timer(&ddata->timer, jiffies + HZ * ddata->wdd.timeout / 2);
}
static int mpc8xxx_wdt_start(struct watchdog_device *w)
{
- u32 tmp = SWCRR_SWEN;
+ struct mpc8xxx_wdt_ddata *ddata =
+ container_of(w, struct mpc8xxx_wdt_ddata, wdd);
+
+ u32 tmp = SWCRR_SWEN | SWCRR_SWPR;
/* Good, fire up the show */
- if (prescale)
- tmp |= SWCRR_SWPR;
if (reset)
tmp |= SWCRR_SWRI;
tmp |= timeout << 16;
- out_be32(&wd_base->swcrr, tmp);
+ out_be32(&ddata->base->swcrr, tmp);
- del_timer_sync(&wdt_timer);
+ del_timer_sync(&ddata->timer);
return 0;
}
static int mpc8xxx_wdt_ping(struct watchdog_device *w)
{
- mpc8xxx_wdt_keepalive();
+ struct mpc8xxx_wdt_ddata *ddata =
+ container_of(w, struct mpc8xxx_wdt_ddata, wdd);
+
+ mpc8xxx_wdt_keepalive(ddata);
return 0;
}
static int mpc8xxx_wdt_stop(struct watchdog_device *w)
{
- mod_timer(&wdt_timer, jiffies);
+ struct mpc8xxx_wdt_ddata *ddata =
+ container_of(w, struct mpc8xxx_wdt_ddata, wdd);
+
+ mod_timer(&ddata->timer, jiffies);
return 0;
}
@@ -143,53 +141,57 @@
.stop = mpc8xxx_wdt_stop,
};
-static struct watchdog_device mpc8xxx_wdt_dev = {
- .info = &mpc8xxx_wdt_info,
- .ops = &mpc8xxx_wdt_ops,
-};
-
-static const struct of_device_id mpc8xxx_wdt_match[];
static int mpc8xxx_wdt_probe(struct platform_device *ofdev)
{
int ret;
- const struct of_device_id *match;
- struct device_node *np = ofdev->dev.of_node;
+ struct resource *res;
const struct mpc8xxx_wdt_type *wdt_type;
+ struct mpc8xxx_wdt_ddata *ddata;
u32 freq = fsl_get_sys_freq();
bool enabled;
unsigned int timeout_sec;
- match = of_match_device(mpc8xxx_wdt_match, &ofdev->dev);
- if (!match)
+ wdt_type = of_device_get_match_data(&ofdev->dev);
+ if (!wdt_type)
return -EINVAL;
- wdt_type = match->data;
if (!freq || freq == -1)
return -EINVAL;
- wd_base = of_iomap(np, 0);
- if (!wd_base)
+ ddata = devm_kzalloc(&ofdev->dev, sizeof(*ddata), GFP_KERNEL);
+ if (!ddata)
return -ENOMEM;
- enabled = in_be32(&wd_base->swcrr) & SWCRR_SWEN;
+ res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
+ ddata->base = devm_ioremap_resource(&ofdev->dev, res);
+ if (IS_ERR(ddata->base))
+ return PTR_ERR(ddata->base);
+
+ enabled = in_be32(&ddata->base->swcrr) & SWCRR_SWEN;
if (!enabled && wdt_type->hw_enabled) {
pr_info("could not be enabled in software\n");
- ret = -ENOSYS;
- goto err_unmap;
+ return -ENODEV;
}
- /* Calculate the timeout in seconds */
- if (prescale)
- timeout_sec = (timeout * wdt_type->prescaler) / freq;
- else
- timeout_sec = timeout / freq;
+ spin_lock_init(&ddata->lock);
+ setup_timer(&ddata->timer, mpc8xxx_wdt_timer_ping,
+ (unsigned long)ddata);
- mpc8xxx_wdt_dev.timeout = timeout_sec;
-#ifdef MODULE
- ret = mpc8xxx_wdt_init_late();
- if (ret)
- goto err_unmap;
-#endif
+ ddata->wdd.info = &mpc8xxx_wdt_info,
+ ddata->wdd.ops = &mpc8xxx_wdt_ops,
+
+ /* Calculate the timeout in seconds */
+ timeout_sec = (timeout * wdt_type->prescaler) / freq;
+
+ ddata->wdd.timeout = timeout_sec;
+
+ watchdog_set_nowayout(&ddata->wdd, nowayout);
+
+ ret = watchdog_register_device(&ddata->wdd);
+ if (ret) {
+ pr_err("cannot register watchdog device (err=%d)\n", ret);
+ return ret;
+ }
pr_info("WDT driver for MPC8xxx initialized. mode:%s timeout=%d (%d seconds)\n",
reset ? "reset" : "interrupt", timeout, timeout_sec);
@@ -200,21 +202,20 @@
* userspace handles it.
*/
if (enabled)
- mod_timer(&wdt_timer, jiffies);
+ mod_timer(&ddata->timer, jiffies);
+
+ platform_set_drvdata(ofdev, ddata);
return 0;
-err_unmap:
- iounmap(wd_base);
- wd_base = NULL;
- return ret;
}
static int mpc8xxx_wdt_remove(struct platform_device *ofdev)
{
+ struct mpc8xxx_wdt_ddata *ddata = platform_get_drvdata(ofdev);
+
pr_crit("Watchdog removed, expect the %s soon!\n",
reset ? "reset" : "machine check exception");
- del_timer_sync(&wdt_timer);
- watchdog_unregister_device(&mpc8xxx_wdt_dev);
- iounmap(wd_base);
+ del_timer_sync(&ddata->timer);
+ watchdog_unregister_device(&ddata->wdd);
return 0;
}
@@ -253,31 +254,6 @@
},
};
-/*
- * We do wdt initialization in two steps: arch_initcall probes the wdt
- * very early to start pinging the watchdog (misc devices are not yet
- * available), and later module_init() just registers the misc device.
- */
-static int mpc8xxx_wdt_init_late(void)
-{
- int ret;
-
- if (!wd_base)
- return -ENODEV;
-
- watchdog_set_nowayout(&mpc8xxx_wdt_dev, nowayout);
-
- ret = watchdog_register_device(&mpc8xxx_wdt_dev);
- if (ret) {
- pr_err("cannot register watchdog device (err=%d)\n", ret);
- return ret;
- }
- return 0;
-}
-#ifndef MODULE
-module_init(mpc8xxx_wdt_init_late);
-#endif
-
static int __init mpc8xxx_wdt_init(void)
{
return platform_driver_register(&mpc8xxx_wdt_driver);
diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index 938b987..6ad9df9 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -210,6 +210,14 @@
return 0;
}
+static void mtk_wdt_shutdown(struct platform_device *pdev)
+{
+ struct mtk_wdt_dev *mtk_wdt = platform_get_drvdata(pdev);
+
+ if (watchdog_active(&mtk_wdt->wdt_dev))
+ mtk_wdt_stop(&mtk_wdt->wdt_dev);
+}
+
static int mtk_wdt_remove(struct platform_device *pdev)
{
struct mtk_wdt_dev *mtk_wdt = platform_get_drvdata(pdev);
@@ -221,17 +229,48 @@
return 0;
}
+#ifdef CONFIG_PM_SLEEP
+static int mtk_wdt_suspend(struct device *dev)
+{
+ struct mtk_wdt_dev *mtk_wdt = dev_get_drvdata(dev);
+
+ if (watchdog_active(&mtk_wdt->wdt_dev))
+ mtk_wdt_stop(&mtk_wdt->wdt_dev);
+
+ return 0;
+}
+
+static int mtk_wdt_resume(struct device *dev)
+{
+ struct mtk_wdt_dev *mtk_wdt = dev_get_drvdata(dev);
+
+ if (watchdog_active(&mtk_wdt->wdt_dev)) {
+ mtk_wdt_start(&mtk_wdt->wdt_dev);
+ mtk_wdt_ping(&mtk_wdt->wdt_dev);
+ }
+
+ return 0;
+}
+#endif
+
static const struct of_device_id mtk_wdt_dt_ids[] = {
{ .compatible = "mediatek,mt6589-wdt" },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, mtk_wdt_dt_ids);
+static const struct dev_pm_ops mtk_wdt_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(mtk_wdt_suspend,
+ mtk_wdt_resume)
+};
+
static struct platform_driver mtk_wdt_driver = {
.probe = mtk_wdt_probe,
.remove = mtk_wdt_remove,
+ .shutdown = mtk_wdt_shutdown,
.driver = {
.name = DRV_NAME,
+ .pm = &mtk_wdt_pm_ops,
.of_match_table = mtk_wdt_dt_ids,
},
};
diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c
index c028454..bd917bb 100644
--- a/drivers/watchdog/nv_tco.c
+++ b/drivers/watchdog/nv_tco.c
@@ -294,6 +294,8 @@
PCI_ANY_ID, PCI_ANY_ID, },
{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SMBUS,
PCI_ANY_ID, PCI_ANY_ID, },
+ { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS,
+ PCI_ANY_ID, PCI_ANY_ID, },
{ 0, }, /* End of list */
};
MODULE_DEVICE_TABLE(pci, tco_pci_tbl);
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index de911c7..d96bee0 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -253,6 +253,7 @@
wdev->wdog.ops = &omap_wdt_ops;
wdev->wdog.min_timeout = TIMER_MARGIN_MIN;
wdev->wdog.max_timeout = TIMER_MARGIN_MAX;
+ wdev->wdog.parent = &pdev->dev;
if (watchdog_init_timeout(&wdev->wdog, timer_margin, &pdev->dev) < 0)
wdev->wdog.timeout = TIMER_MARGIN_DEFAULT;
diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index ef0c628..c6b8f4a 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c
@@ -567,6 +567,7 @@
dev->wdt.timeout = wdt_max_duration;
dev->wdt.max_timeout = wdt_max_duration;
+ dev->wdt.parent = &pdev->dev;
watchdog_init_timeout(&dev->wdt, heartbeat, &pdev->dev);
platform_set_drvdata(pdev, &dev->wdt);
diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c
index b9c6049..4224b3e 100644
--- a/drivers/watchdog/pnx4008_wdt.c
+++ b/drivers/watchdog/pnx4008_wdt.c
@@ -167,6 +167,7 @@
pnx4008_wdd.bootstatus = (readl(WDTIM_RES(wdt_base)) & WDOG_RESET) ?
WDIOF_CARDRESET : 0;
+ pnx4008_wdd.parent = &pdev->dev;
watchdog_set_nowayout(&pnx4008_wdd, nowayout);
pnx4008_wdt_stop(&pnx4008_wdd); /* disable for now */
diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c
index aa03ca8..773dcfa 100644
--- a/drivers/watchdog/qcom-wdt.c
+++ b/drivers/watchdog/qcom-wdt.c
@@ -171,6 +171,7 @@
wdt->wdd.ops = &qcom_wdt_ops;
wdt->wdd.min_timeout = 1;
wdt->wdd.max_timeout = 0x10000000U / wdt->rate;
+ wdt->wdd.parent = &pdev->dev;
/*
* If 'timeout-sec' unspecified in devicetree, assume a 30 second
diff --git a/drivers/watchdog/retu_wdt.c b/drivers/watchdog/retu_wdt.c
index b7c68e27..39cd51d 100644
--- a/drivers/watchdog/retu_wdt.c
+++ b/drivers/watchdog/retu_wdt.c
@@ -127,6 +127,7 @@
retu_wdt->timeout = RETU_WDT_MAX_TIMER;
retu_wdt->min_timeout = 0;
retu_wdt->max_timeout = RETU_WDT_MAX_TIMER;
+ retu_wdt->parent = &pdev->dev;
watchdog_set_drvdata(retu_wdt, wdev);
watchdog_set_nowayout(retu_wdt, nowayout);
diff --git a/drivers/watchdog/rt2880_wdt.c b/drivers/watchdog/rt2880_wdt.c
index a6f7e2e..1967919 100644
--- a/drivers/watchdog/rt2880_wdt.c
+++ b/drivers/watchdog/rt2880_wdt.c
@@ -161,6 +161,7 @@
rt288x_wdt_dev.dev = &pdev->dev;
rt288x_wdt_dev.bootstatus = rt288x_wdt_bootcause();
rt288x_wdt_dev.max_timeout = (0xfffful / rt288x_wdt_freq);
+ rt288x_wdt_dev.parent = &pdev->dev;
watchdog_init_timeout(&rt288x_wdt_dev, rt288x_wdt_dev.max_timeout,
&pdev->dev);
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index e89ae02..d781000 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -607,6 +607,7 @@
watchdog_set_nowayout(&wdt->wdt_device, nowayout);
wdt->wdt_device.bootstatus = s3c2410wdt_get_bootstatus(wdt);
+ wdt->wdt_device.parent = &pdev->dev;
ret = watchdog_register_device(&wdt->wdt_device);
if (ret) {
diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c
new file mode 100644
index 0000000..a49634c
--- /dev/null
+++ b/drivers/watchdog/sama5d4_wdt.c
@@ -0,0 +1,280 @@
+/*
+ * Driver for Atmel SAMA5D4 Watchdog Timer
+ *
+ * Copyright (C) 2015 Atmel Corporation
+ *
+ * Licensed under GPLv2.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/watchdog.h>
+
+#include "at91sam9_wdt.h"
+
+/* minimum and maximum watchdog timeout, in seconds */
+#define MIN_WDT_TIMEOUT 1
+#define MAX_WDT_TIMEOUT 16
+#define WDT_DEFAULT_TIMEOUT MAX_WDT_TIMEOUT
+
+#define WDT_SEC2TICKS(s) ((s) ? (((s) << 8) - 1) : 0)
+
+struct sama5d4_wdt {
+ struct watchdog_device wdd;
+ void __iomem *reg_base;
+ u32 config;
+};
+
+static int wdt_timeout = WDT_DEFAULT_TIMEOUT;
+static bool nowayout = WATCHDOG_NOWAYOUT;
+
+module_param(wdt_timeout, int, 0);
+MODULE_PARM_DESC(wdt_timeout,
+ "Watchdog timeout in seconds. (default = "
+ __MODULE_STRING(WDT_DEFAULT_TIMEOUT) ")");
+
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout,
+ "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+#define wdt_read(wdt, field) \
+ readl_relaxed((wdt)->reg_base + (field))
+
+#define wdt_write(wtd, field, val) \
+ writel_relaxed((val), (wdt)->reg_base + (field))
+
+static int sama5d4_wdt_start(struct watchdog_device *wdd)
+{
+ struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd);
+ u32 reg;
+
+ reg = wdt_read(wdt, AT91_WDT_MR);
+ reg &= ~AT91_WDT_WDDIS;
+ wdt_write(wdt, AT91_WDT_MR, reg);
+
+ return 0;
+}
+
+static int sama5d4_wdt_stop(struct watchdog_device *wdd)
+{
+ struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd);
+ u32 reg;
+
+ reg = wdt_read(wdt, AT91_WDT_MR);
+ reg |= AT91_WDT_WDDIS;
+ wdt_write(wdt, AT91_WDT_MR, reg);
+
+ return 0;
+}
+
+static int sama5d4_wdt_ping(struct watchdog_device *wdd)
+{
+ struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd);
+
+ wdt_write(wdt, AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT);
+
+ return 0;
+}
+
+static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd,
+ unsigned int timeout)
+{
+ struct sama5d4_wdt *wdt = watchdog_get_drvdata(wdd);
+ u32 value = WDT_SEC2TICKS(timeout);
+ u32 reg;
+
+ reg = wdt_read(wdt, AT91_WDT_MR);
+ reg &= ~AT91_WDT_WDV;
+ reg &= ~AT91_WDT_WDD;
+ reg |= AT91_WDT_SET_WDV(value);
+ reg |= AT91_WDT_SET_WDD(value);
+ wdt_write(wdt, AT91_WDT_MR, reg);
+
+ wdd->timeout = timeout;
+
+ return 0;
+}
+
+static const struct watchdog_info sama5d4_wdt_info = {
+ .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING,
+ .identity = "Atmel SAMA5D4 Watchdog",
+};
+
+static struct watchdog_ops sama5d4_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = sama5d4_wdt_start,
+ .stop = sama5d4_wdt_stop,
+ .ping = sama5d4_wdt_ping,
+ .set_timeout = sama5d4_wdt_set_timeout,
+};
+
+static irqreturn_t sama5d4_wdt_irq_handler(int irq, void *dev_id)
+{
+ struct sama5d4_wdt *wdt = platform_get_drvdata(dev_id);
+
+ if (wdt_read(wdt, AT91_WDT_SR)) {
+ pr_crit("Atmel Watchdog Software Reset\n");
+ emergency_restart();
+ pr_crit("Reboot didn't succeed\n");
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int of_sama5d4_wdt_init(struct device_node *np, struct sama5d4_wdt *wdt)
+{
+ const char *tmp;
+
+ wdt->config = AT91_WDT_WDDIS;
+
+ if (!of_property_read_string(np, "atmel,watchdog-type", &tmp) &&
+ !strcmp(tmp, "software"))
+ wdt->config |= AT91_WDT_WDFIEN;
+ else
+ wdt->config |= AT91_WDT_WDRSTEN;
+
+ if (of_property_read_bool(np, "atmel,idle-halt"))
+ wdt->config |= AT91_WDT_WDIDLEHLT;
+
+ if (of_property_read_bool(np, "atmel,dbg-halt"))
+ wdt->config |= AT91_WDT_WDDBGHLT;
+
+ return 0;
+}
+
+static int sama5d4_wdt_init(struct sama5d4_wdt *wdt)
+{
+ struct watchdog_device *wdd = &wdt->wdd;
+ u32 value = WDT_SEC2TICKS(wdd->timeout);
+ u32 reg;
+
+ /*
+ * Because the fields WDV and WDD must not be modified when the WDDIS
+ * bit is set, so clear the WDDIS bit before writing the WDT_MR.
+ */
+ reg = wdt_read(wdt, AT91_WDT_MR);
+ reg &= ~AT91_WDT_WDDIS;
+ wdt_write(wdt, AT91_WDT_MR, reg);
+
+ reg = wdt->config;
+ reg |= AT91_WDT_SET_WDD(value);
+ reg |= AT91_WDT_SET_WDV(value);
+
+ wdt_write(wdt, AT91_WDT_MR, reg);
+
+ return 0;
+}
+
+static int sama5d4_wdt_probe(struct platform_device *pdev)
+{
+ struct watchdog_device *wdd;
+ struct sama5d4_wdt *wdt;
+ struct resource *res;
+ void __iomem *regs;
+ u32 irq = 0;
+ int ret;
+
+ wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+ if (!wdt)
+ return -ENOMEM;
+
+ wdd = &wdt->wdd;
+ wdd->timeout = wdt_timeout;
+ wdd->info = &sama5d4_wdt_info;
+ wdd->ops = &sama5d4_wdt_ops;
+ wdd->min_timeout = MIN_WDT_TIMEOUT;
+ wdd->max_timeout = MAX_WDT_TIMEOUT;
+
+ watchdog_set_drvdata(wdd, wdt);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ regs = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
+
+ wdt->reg_base = regs;
+
+ if (pdev->dev.of_node) {
+ irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+ if (!irq)
+ dev_warn(&pdev->dev, "failed to get IRQ from DT\n");
+
+ ret = of_sama5d4_wdt_init(pdev->dev.of_node, wdt);
+ if (ret)
+ return ret;
+ }
+
+ if ((wdt->config & AT91_WDT_WDFIEN) && irq) {
+ ret = devm_request_irq(&pdev->dev, irq, sama5d4_wdt_irq_handler,
+ IRQF_SHARED | IRQF_IRQPOLL |
+ IRQF_NO_SUSPEND, pdev->name, pdev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "cannot register interrupt handler\n");
+ return ret;
+ }
+ }
+
+ ret = watchdog_init_timeout(wdd, wdt_timeout, &pdev->dev);
+ if (ret) {
+ dev_err(&pdev->dev, "unable to set timeout value\n");
+ return ret;
+ }
+
+ ret = sama5d4_wdt_init(wdt);
+ if (ret)
+ return ret;
+
+ watchdog_set_nowayout(wdd, nowayout);
+
+ ret = watchdog_register_device(wdd);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register watchdog device\n");
+ return ret;
+ }
+
+ platform_set_drvdata(pdev, wdt);
+
+ dev_info(&pdev->dev, "initialized (timeout = %d sec, nowayout = %d)\n",
+ wdt_timeout, nowayout);
+
+ return 0;
+}
+
+static int sama5d4_wdt_remove(struct platform_device *pdev)
+{
+ struct sama5d4_wdt *wdt = platform_get_drvdata(pdev);
+
+ sama5d4_wdt_stop(&wdt->wdd);
+
+ watchdog_unregister_device(&wdt->wdd);
+
+ return 0;
+}
+
+static const struct of_device_id sama5d4_wdt_of_match[] = {
+ { .compatible = "atmel,sama5d4-wdt", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, sama5d4_wdt_of_match);
+
+static struct platform_driver sama5d4_wdt_driver = {
+ .probe = sama5d4_wdt_probe,
+ .remove = sama5d4_wdt_remove,
+ .driver = {
+ .name = "sama5d4_wdt",
+ .of_match_table = sama5d4_wdt_of_match,
+ }
+};
+module_platform_driver(sama5d4_wdt_driver);
+
+MODULE_AUTHOR("Atmel Corporation");
+MODULE_DESCRIPTION("Atmel SAMA5D4 Watchdog Timer driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c
index 567458b..f908121 100644
--- a/drivers/watchdog/shwdt.c
+++ b/drivers/watchdog/shwdt.c
@@ -252,6 +252,7 @@
watchdog_set_nowayout(&sh_wdt_dev, nowayout);
watchdog_set_drvdata(&sh_wdt_dev, wdt);
+ sh_wdt_dev.parent = &pdev->dev;
spin_lock_init(&wdt->lock);
diff --git a/drivers/watchdog/sirfsoc_wdt.c b/drivers/watchdog/sirfsoc_wdt.c
index 42fa5c0c5..d0578ab 100644
--- a/drivers/watchdog/sirfsoc_wdt.c
+++ b/drivers/watchdog/sirfsoc_wdt.c
@@ -154,6 +154,7 @@
watchdog_init_timeout(&sirfsoc_wdd, timeout, &pdev->dev);
watchdog_set_nowayout(&sirfsoc_wdd, nowayout);
+ sirfsoc_wdd.parent = &pdev->dev;
ret = watchdog_register_device(&sirfsoc_wdd);
if (ret)
diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c
index 4e7fec3..01d8162 100644
--- a/drivers/watchdog/sp805_wdt.c
+++ b/drivers/watchdog/sp805_wdt.c
@@ -226,6 +226,7 @@
wdt->adev = adev;
wdt->wdd.info = &wdt_info;
wdt->wdd.ops = &wdt_ops;
+ wdt->wdd.parent = &adev->dev;
spin_lock_init(&wdt->lock);
watchdog_set_nowayout(&wdt->wdd, nowayout);
diff --git a/drivers/watchdog/st_lpc_wdt.c b/drivers/watchdog/st_lpc_wdt.c
index 6785afd..14e9bad 100644
--- a/drivers/watchdog/st_lpc_wdt.c
+++ b/drivers/watchdog/st_lpc_wdt.c
@@ -241,6 +241,7 @@
return -EINVAL;
}
st_wdog_dev.max_timeout = 0xFFFFFFFF / st_wdog->clkrate;
+ st_wdog_dev.parent = &pdev->dev;
ret = clk_prepare_enable(clk);
if (ret) {
diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c
index e7f0d5b..3ee6128 100644
--- a/drivers/watchdog/stmp3xxx_rtc_wdt.c
+++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c
@@ -76,6 +76,7 @@
watchdog_set_drvdata(&stmp3xxx_wdd, &pdev->dev);
stmp3xxx_wdd.timeout = clamp_t(unsigned, heartbeat, 1, STMP3XXX_MAX_TIMEOUT);
+ stmp3xxx_wdd.parent = &pdev->dev;
ret = watchdog_register_device(&stmp3xxx_wdd);
if (ret < 0) {
diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c
index a29afb3..47bd8a1 100644
--- a/drivers/watchdog/sunxi_wdt.c
+++ b/drivers/watchdog/sunxi_wdt.c
@@ -184,7 +184,7 @@
/* Set system reset function */
reg = readl(wdt_base + regs->wdt_cfg);
reg &= ~(regs->wdt_reset_mask);
- reg |= ~(regs->wdt_reset_val);
+ reg |= regs->wdt_reset_val;
writel(reg, wdt_base + regs->wdt_cfg);
/* Enable watchdog */
diff --git a/drivers/watchdog/tegra_wdt.c b/drivers/watchdog/tegra_wdt.c
index 30451ea..7f97cdd 100644
--- a/drivers/watchdog/tegra_wdt.c
+++ b/drivers/watchdog/tegra_wdt.c
@@ -218,6 +218,7 @@
wdd->ops = &tegra_wdt_ops;
wdd->min_timeout = MIN_WDT_TIMEOUT;
wdd->max_timeout = MAX_WDT_TIMEOUT;
+ wdd->parent = &pdev->dev;
watchdog_set_drvdata(wdd, wdt);
diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c
index 2c1db6f..9bf3cc0 100644
--- a/drivers/watchdog/twl4030_wdt.c
+++ b/drivers/watchdog/twl4030_wdt.c
@@ -83,6 +83,7 @@
wdt->timeout = 30;
wdt->min_timeout = 1;
wdt->max_timeout = 30;
+ wdt->parent = &pdev->dev;
watchdog_set_nowayout(wdt, nowayout);
platform_set_drvdata(pdev, wdt);
diff --git a/drivers/watchdog/txx9wdt.c b/drivers/watchdog/txx9wdt.c
index 7f61593..c2da880 100644
--- a/drivers/watchdog/txx9wdt.c
+++ b/drivers/watchdog/txx9wdt.c
@@ -131,6 +131,7 @@
txx9wdt.timeout = timeout;
txx9wdt.min_timeout = 1;
txx9wdt.max_timeout = WD_MAX_TIMEOUT;
+ txx9wdt.parent = &dev->dev;
watchdog_set_nowayout(&txx9wdt, nowayout);
ret = watchdog_register_device(&txx9wdt);
diff --git a/drivers/watchdog/ux500_wdt.c b/drivers/watchdog/ux500_wdt.c
index 9de09ab..37c0843 100644
--- a/drivers/watchdog/ux500_wdt.c
+++ b/drivers/watchdog/ux500_wdt.c
@@ -96,6 +96,7 @@
ux500_wdt.max_timeout = WATCHDOG_MAX28;
}
+ ux500_wdt.parent = &pdev->dev;
watchdog_set_nowayout(&ux500_wdt, nowayout);
/* disable auto off on sleep */
diff --git a/drivers/watchdog/via_wdt.c b/drivers/watchdog/via_wdt.c
index 56369c4..5f9cbc3 100644
--- a/drivers/watchdog/via_wdt.c
+++ b/drivers/watchdog/via_wdt.c
@@ -206,6 +206,7 @@
timeout = WDT_TIMEOUT;
wdt_dev.timeout = timeout;
+ wdt_dev.parent = &pdev->dev;
watchdog_set_nowayout(&wdt_dev, nowayout);
if (readl(wdt_mem) & VIA_WDT_FIRED)
wdt_dev.bootstatus |= WDIOF_CARDRESET;
diff --git a/drivers/watchdog/wm831x_wdt.c b/drivers/watchdog/wm831x_wdt.c
index 2fa17e7..8d1184a 100644
--- a/drivers/watchdog/wm831x_wdt.c
+++ b/drivers/watchdog/wm831x_wdt.c
@@ -215,6 +215,7 @@
wm831x_wdt->info = &wm831x_wdt_info;
wm831x_wdt->ops = &wm831x_wdt_ops;
+ wm831x_wdt->parent = &pdev->dev;
watchdog_set_nowayout(wm831x_wdt, nowayout);
watchdog_set_drvdata(wm831x_wdt, driver_data);
diff --git a/drivers/watchdog/wm8350_wdt.c b/drivers/watchdog/wm8350_wdt.c
index 34d272a..4ab4b83 100644
--- a/drivers/watchdog/wm8350_wdt.c
+++ b/drivers/watchdog/wm8350_wdt.c
@@ -151,6 +151,7 @@
watchdog_set_nowayout(&wm8350_wdt, nowayout);
watchdog_set_drvdata(&wm8350_wdt, wm8350);
+ wm8350_wdt.parent = &pdev->dev;
/* Default to 4s timeout */
wm8350_wdt_set_timeout(&wm8350_wdt, 4);
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 1fa633b..c79329f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -441,7 +441,7 @@
/* Update direct mapping, invalidate P2M, and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = frame_list[i];
- frame_list[i] = pfn_to_mfn(pfn);
+ frame_list[i] = pfn_to_gfn(pfn);
page = pfn_to_page(pfn);
#ifdef CONFIG_XEN_HAVE_PVMMU
diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c
index 0edb91c..8ae2fc90 100644
--- a/drivers/xen/biomerge.c
+++ b/drivers/xen/biomerge.c
@@ -6,10 +6,10 @@
bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
const struct bio_vec *vec2)
{
- unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page));
- unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page));
+ unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page));
+ unsigned long bfn2 = pfn_to_bfn(page_to_pfn(vec2->bv_page));
return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&
- ((mfn1 == mfn2) || ((mfn1+1) == mfn2));
+ ((bfn1 == bfn2) || ((bfn1+1) == bfn2));
}
EXPORT_SYMBOL(xen_biovec_phys_mergeable);
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 68d1290..6cd5e65 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1688,7 +1688,7 @@
struct physdev_pirq_eoi_gmfn eoi_gmfn;
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
- eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
+ eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
/* TODO: No PVH support for PIRQ EOI */
if (rc != 0) {
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index ed673e1..1d4baf5 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -111,7 +111,7 @@
for (i = 0; i < EVTCHN_FIFO_MAX_QUEUES; i++)
q->head[i] = 0;
- init_control.control_gfn = virt_to_mfn(control_block);
+ init_control.control_gfn = virt_to_gfn(control_block);
init_control.offset = 0;
init_control.vcpu = cpu;
@@ -167,7 +167,7 @@
/* Mask all events in this page before adding it. */
init_array_page(array_page);
- expand_array.array_gfn = virt_to_mfn(array_page);
+ expand_array.array_gfn = virt_to_gfn(array_page);
ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array);
if (ret < 0)
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index e53fe19..4547a91 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -142,7 +142,8 @@
/* Grant foreign access to the page. */
rc = gnttab_grant_foreign_access(op->domid,
- pfn_to_mfn(page_to_pfn(gref->page)), readonly);
+ xen_page_to_gfn(gref->page),
+ readonly);
if (rc < 0)
goto undo;
gref_ids[i] = gref->gref_id = rc;
@@ -493,7 +494,7 @@
mutex_unlock(&gref_mutex);
}
-static struct vm_operations_struct gntalloc_vmops = {
+static const struct vm_operations_struct gntalloc_vmops = {
.open = gntalloc_vma_open,
.close = gntalloc_vma_close,
};
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 0dbb222..2ea0b3b 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -433,7 +433,7 @@
return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT];
}
-static struct vm_operations_struct gntdev_vmops = {
+static const struct vm_operations_struct gntdev_vmops = {
.open = gntdev_vma_open,
.close = gntdev_vma_close,
.find_special_page = gntdev_vma_find_special_page,
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index d10effe..e12bd36 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -80,7 +80,7 @@
* is resuming in a new domain.
*/
si->cancelled = HYPERVISOR_suspend(xen_pv_domain()
- ? virt_to_mfn(xen_start_info)
+ ? virt_to_gfn(xen_start_info)
: 0);
xen_arch_post_suspend(si->cancelled);
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 5a29616..5e9adac 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -193,16 +193,16 @@
return ret;
}
-struct mmap_mfn_state {
+struct mmap_gfn_state {
unsigned long va;
struct vm_area_struct *vma;
domid_t domain;
};
-static int mmap_mfn_range(void *data, void *state)
+static int mmap_gfn_range(void *data, void *state)
{
struct privcmd_mmap_entry *msg = data;
- struct mmap_mfn_state *st = state;
+ struct mmap_gfn_state *st = state;
struct vm_area_struct *vma = st->vma;
int rc;
@@ -216,7 +216,7 @@
((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
return -EINVAL;
- rc = xen_remap_domain_mfn_range(vma,
+ rc = xen_remap_domain_gfn_range(vma,
msg->va & PAGE_MASK,
msg->mfn, msg->npages,
vma->vm_page_prot,
@@ -236,7 +236,7 @@
struct vm_area_struct *vma;
int rc;
LIST_HEAD(pagelist);
- struct mmap_mfn_state state;
+ struct mmap_gfn_state state;
/* We only support privcmd_ioctl_mmap_batch for auto translated. */
if (xen_feature(XENFEAT_auto_translated_physmap))
@@ -273,7 +273,7 @@
rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
&pagelist,
- mmap_mfn_range, &state);
+ mmap_gfn_range, &state);
out_up:
@@ -299,18 +299,18 @@
int global_error;
int version;
- /* User-space mfn array to store errors in the second pass for V1. */
- xen_pfn_t __user *user_mfn;
+ /* User-space gfn array to store errors in the second pass for V1. */
+ xen_pfn_t __user *user_gfn;
/* User-space int array to store errors in the second pass for V2. */
int __user *user_err;
};
-/* auto translated dom0 note: if domU being created is PV, then mfn is
- * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
+/* auto translated dom0 note: if domU being created is PV, then gfn is
+ * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP).
*/
static int mmap_batch_fn(void *data, int nr, void *state)
{
- xen_pfn_t *mfnp = data;
+ xen_pfn_t *gfnp = data;
struct mmap_batch_state *st = state;
struct vm_area_struct *vma = st->vma;
struct page **pages = vma->vm_private_data;
@@ -321,8 +321,8 @@
cur_pages = &pages[st->index];
BUG_ON(nr < 0);
- ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
- (int *)mfnp, st->vma->vm_page_prot,
+ ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr,
+ (int *)gfnp, st->vma->vm_page_prot,
st->domain, cur_pages);
/* Adjust the global_error? */
@@ -347,22 +347,22 @@
if (st->version == 1) {
if (err) {
- xen_pfn_t mfn;
+ xen_pfn_t gfn;
- ret = get_user(mfn, st->user_mfn);
+ ret = get_user(gfn, st->user_gfn);
if (ret < 0)
return ret;
/*
* V1 encodes the error codes in the 32bit top
- * nibble of the mfn (with its known
+ * nibble of the gfn (with its known
* limitations vis-a-vis 64 bit callers).
*/
- mfn |= (err == -ENOENT) ?
+ gfn |= (err == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR;
- return __put_user(mfn, st->user_mfn++);
+ return __put_user(gfn, st->user_gfn++);
} else
- st->user_mfn++;
+ st->user_gfn++;
} else { /* st->version == 2 */
if (err)
return __put_user(err, st->user_err++);
@@ -388,7 +388,7 @@
return 0;
}
-/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
+/* Allocate pfns that are then mapped with gfns from foreign domid. Update
* the vma with the page info to use later.
* Returns: 0 if success, otherwise -errno
*/
@@ -414,7 +414,7 @@
return 0;
}
-static struct vm_operations_struct privcmd_vm_ops;
+static const struct vm_operations_struct privcmd_vm_ops;
static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
{
@@ -526,7 +526,7 @@
if (state.global_error) {
/* Write back errors in second pass. */
- state.user_mfn = (xen_pfn_t *)m.arr;
+ state.user_gfn = (xen_pfn_t *)m.arr;
state.user_err = m.err;
ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors, &state);
@@ -587,7 +587,7 @@
if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages)
return;
- rc = xen_unmap_domain_mfn_range(vma, numpgs, pages);
+ rc = xen_unmap_domain_gfn_range(vma, numpgs, pages);
if (rc == 0)
free_xenballooned_pages(numpgs, pages);
else
@@ -605,7 +605,7 @@
return VM_FAULT_SIGBUS;
}
-static struct vm_operations_struct privcmd_vm_ops = {
+static const struct vm_operations_struct privcmd_vm_ops = {
.close = privcmd_close,
.fault = privcmd_fault
};
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4c54932..79bc493 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -82,8 +82,8 @@
*/
static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
{
- unsigned long mfn = pfn_to_mfn(PFN_DOWN(paddr));
- dma_addr_t dma = (dma_addr_t)mfn << PAGE_SHIFT;
+ unsigned long bfn = pfn_to_bfn(PFN_DOWN(paddr));
+ dma_addr_t dma = (dma_addr_t)bfn << PAGE_SHIFT;
dma |= paddr & ~PAGE_MASK;
@@ -92,7 +92,7 @@
static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
{
- unsigned long pfn = mfn_to_pfn(PFN_DOWN(baddr));
+ unsigned long pfn = bfn_to_pfn(PFN_DOWN(baddr));
dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT;
phys_addr_t paddr = dma;
@@ -110,15 +110,15 @@
unsigned int offset,
size_t length)
{
- unsigned long next_mfn;
+ unsigned long next_bfn;
int i;
int nr_pages;
- next_mfn = pfn_to_mfn(pfn);
+ next_bfn = pfn_to_bfn(pfn);
nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
for (i = 1; i < nr_pages; i++) {
- if (pfn_to_mfn(++pfn) != ++next_mfn)
+ if (pfn_to_bfn(++pfn) != ++next_bfn)
return 0;
}
return 1;
@@ -138,8 +138,8 @@
static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
{
- unsigned long mfn = PFN_DOWN(dma_addr);
- unsigned long pfn = mfn_to_local_pfn(mfn);
+ unsigned long bfn = PFN_DOWN(dma_addr);
+ unsigned long pfn = bfn_to_local_pfn(bfn);
phys_addr_t paddr;
/* If the address is outside our domain, it CAN
@@ -311,9 +311,6 @@
*/
flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
- if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret))
- return ret;
-
/* On ARM this function returns an ioremap'ped virtual address for
* which virt_to_phys doesn't return the corresponding physical
* address. In fact on ARM virt_to_phys only works for kernel direct
@@ -356,9 +353,6 @@
phys_addr_t phys;
u64 dma_mask = DMA_BIT_MASK(32);
- if (dma_release_from_coherent(hwdev, order, vaddr))
- return;
-
if (hwdev && hwdev->coherent_dma_mask)
dma_mask = hwdev->coherent_dma_mask;
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 239738f..945fc43 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -129,21 +129,17 @@
/* xen generic tmem ops */
static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid,
- u32 index, unsigned long pfn)
+ u32 index, struct page *page)
{
- unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn;
-
return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index,
- gmfn, 0, 0, 0);
+ xen_page_to_gfn(page), 0, 0, 0);
}
static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid,
- u32 index, unsigned long pfn)
+ u32 index, struct page *page)
{
- unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn;
-
return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index,
- gmfn, 0, 0, 0);
+ xen_page_to_gfn(page), 0, 0, 0);
}
static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index)
@@ -173,14 +169,13 @@
{
u32 ind = (u32) index;
struct tmem_oid oid = *(struct tmem_oid *)&key;
- unsigned long pfn = page_to_pfn(page);
if (pool < 0)
return;
if (ind != index)
return;
mb(); /* ensure page is quiescent; tmem may address it with an alias */
- (void)xen_tmem_put_page((u32)pool, oid, ind, pfn);
+ (void)xen_tmem_put_page((u32)pool, oid, ind, page);
}
static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key,
@@ -188,7 +183,6 @@
{
u32 ind = (u32) index;
struct tmem_oid oid = *(struct tmem_oid *)&key;
- unsigned long pfn = page_to_pfn(page);
int ret;
/* translate return values to linux semantics */
@@ -196,7 +190,7 @@
return -1;
if (ind != index)
return -1;
- ret = xen_tmem_get_page((u32)pool, oid, ind, pfn);
+ ret = xen_tmem_get_page((u32)pool, oid, ind, page);
if (ret == 1)
return 0;
else
@@ -287,7 +281,6 @@
{
u64 ind64 = (u64)offset;
u32 ind = (u32)offset;
- unsigned long pfn = page_to_pfn(page);
int pool = tmem_frontswap_poolid;
int ret;
@@ -296,7 +289,7 @@
if (ind64 != ind)
return -1;
mb(); /* ensure page is quiescent; tmem may address it with an alias */
- ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn);
+ ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), page);
/* translate Xen tmem return values to linux semantics */
if (ret == 1)
return 0;
@@ -313,7 +306,6 @@
{
u64 ind64 = (u64)offset;
u32 ind = (u32)offset;
- unsigned long pfn = page_to_pfn(page);
int pool = tmem_frontswap_poolid;
int ret;
@@ -321,7 +313,7 @@
return -1;
if (ind64 != ind)
return -1;
- ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn);
+ ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), page);
/* translate Xen tmem return values to linux semantics */
if (ret == 1)
return 0;
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index e303535..2ba09c1 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -380,7 +380,7 @@
for (i = 0; i < nr_pages; i++) {
err = gnttab_grant_foreign_access(dev->otherend_id,
- virt_to_mfn(vaddr), 0);
+ virt_to_gfn(vaddr), 0);
if (err < 0) {
xenbus_dev_fatal(dev, err,
"granting access to ring page");
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
index b17707e..ee6d9ef 100644
--- a/drivers/xen/xenbus/xenbus_dev_backend.c
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -49,7 +49,7 @@
goto out_err;
gnttab_grant_foreign_access_ref(GNTTAB_RESERVED_XENSTORE, domid,
- virt_to_mfn(xen_store_interface), 0 /* writable */);
+ virt_to_gfn(xen_store_interface), 0 /* writable */);
arg.dom = DOMID_SELF;
arg.remote_dom = domid;
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 4308fb3..3cbe055 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -75,7 +75,7 @@
enum xenstore_init xen_store_domain_type;
EXPORT_SYMBOL_GPL(xen_store_domain_type);
-static unsigned long xen_store_mfn;
+static unsigned long xen_store_gfn;
static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
@@ -711,9 +711,7 @@
if (!page)
goto out_err;
- xen_store_mfn = xen_start_info->store_mfn =
- pfn_to_mfn(virt_to_phys((void *)page) >>
- PAGE_SHIFT);
+ xen_store_gfn = xen_start_info->store_mfn = virt_to_gfn((void *)page);
/* Next allocate a local port which xenstored can bind to */
alloc_unbound.dom = DOMID_SELF;
@@ -787,12 +785,12 @@
err = xenstored_local_init();
if (err)
goto out_error;
- xen_store_interface = mfn_to_virt(xen_store_mfn);
+ xen_store_interface = gfn_to_virt(xen_store_gfn);
break;
case XS_PV:
xen_store_evtchn = xen_start_info->store_evtchn;
- xen_store_mfn = xen_start_info->store_mfn;
- xen_store_interface = mfn_to_virt(xen_store_mfn);
+ xen_store_gfn = xen_start_info->store_mfn;
+ xen_store_interface = gfn_to_virt(xen_store_gfn);
break;
case XS_HVM:
err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
@@ -802,9 +800,9 @@
err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
if (err)
goto out_error;
- xen_store_mfn = (unsigned long)v;
+ xen_store_gfn = (unsigned long)v;
xen_store_interface =
- xen_remap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
+ xen_remap(xen_store_gfn << PAGE_SHIFT, PAGE_SIZE);
break;
default:
pr_warn("Xenstore state unknown\n");
diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
index 58a5389..cff2387 100644
--- a/drivers/xen/xlate_mmu.c
+++ b/drivers/xen/xlate_mmu.c
@@ -38,8 +38,8 @@
#include <xen/interface/xen.h>
#include <xen/interface/memory.h>
-/* map fgmfn of domid to lpfn in the current domain */
-static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
+/* map fgfn of domid to lpfn in the current domain */
+static int map_foreign_page(unsigned long lpfn, unsigned long fgfn,
unsigned int domid)
{
int rc;
@@ -49,7 +49,7 @@
.size = 1,
.space = XENMAPSPACE_gmfn_foreign,
};
- xen_ulong_t idx = fgmfn;
+ xen_ulong_t idx = fgfn;
xen_pfn_t gpfn = lpfn;
int err = 0;
@@ -62,13 +62,13 @@
}
struct remap_data {
- xen_pfn_t *fgmfn; /* foreign domain's gmfn */
+ xen_pfn_t *fgfn; /* foreign domain's gfn */
pgprot_t prot;
domid_t domid;
struct vm_area_struct *vma;
int index;
struct page **pages;
- struct xen_remap_mfn_info *info;
+ struct xen_remap_gfn_info *info;
int *err_ptr;
int mapped;
};
@@ -82,20 +82,20 @@
pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
int rc;
- rc = map_foreign_page(pfn, *info->fgmfn, info->domid);
+ rc = map_foreign_page(pfn, *info->fgfn, info->domid);
*info->err_ptr++ = rc;
if (!rc) {
set_pte_at(info->vma->vm_mm, addr, ptep, pte);
info->mapped++;
}
- info->fgmfn++;
+ info->fgfn++;
return 0;
}
int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t *mfn, int nr,
+ xen_pfn_t *gfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages)
@@ -108,7 +108,7 @@
x86 PVOPS */
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
- data.fgmfn = mfn;
+ data.fgfn = gfn;
data.prot = prot;
data.domid = domid;
data.vma = vma;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 3f89c9e..5b50c4c 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/writeback.h>
+#include <linux/blkdev.h>
#include "affs.h"
static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -352,18 +353,19 @@
* blocks, we will have to change it.
*/
- size = sb->s_bdev->bd_inode->i_size >> 9;
+ size = i_size_read(sb->s_bdev->bd_inode) >> 9;
pr_debug("initial blocksize=%d, #blocks=%d\n", 512, size);
affs_set_blocksize(sb, PAGE_SIZE);
/* Try to find root block. Its location depends on the block size. */
- i = 512;
- j = 4096;
+ i = bdev_logical_block_size(sb->s_bdev);
+ j = PAGE_SIZE;
if (blocksize > 0) {
i = j = blocksize;
size = size / (blocksize / 512);
}
+
for (blocksize = i; blocksize <= j; blocksize <<= 1, size >>= 1) {
sbi->s_root_block = root_block;
if (root_block < 0)
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 1ce06c84..3e36e4a 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -42,8 +42,14 @@
/* Thresholding related variants */
atomic_t pending;
- int max_active;
- int current_max;
+
+ /* Up limit of concurrency workers */
+ int limit_active;
+
+ /* Current number of concurrency workers */
+ int current_active;
+
+ /* Threshold to change current_active */
int thresh;
unsigned int count;
spinlock_t thres_lock;
@@ -88,7 +94,7 @@
BTRFS_WORK_HELPER(scrubparity_helper);
static struct __btrfs_workqueue *
-__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
+__btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active,
int thresh)
{
struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -96,26 +102,31 @@
if (!ret)
return NULL;
- ret->max_active = max_active;
+ ret->limit_active = limit_active;
atomic_set(&ret->pending, 0);
if (thresh == 0)
thresh = DFT_THRESHOLD;
/* For low threshold, disabling threshold is a better choice */
if (thresh < DFT_THRESHOLD) {
- ret->current_max = max_active;
+ ret->current_active = limit_active;
ret->thresh = NO_THRESHOLD;
} else {
- ret->current_max = 1;
+ /*
+ * For threshold-able wq, let its concurrency grow on demand.
+ * Use minimal max_active at alloc time to reduce resource
+ * usage.
+ */
+ ret->current_active = 1;
ret->thresh = thresh;
}
if (flags & WQ_HIGHPRI)
ret->normal_wq = alloc_workqueue("%s-%s-high", flags,
- ret->max_active,
- "btrfs", name);
+ ret->current_active, "btrfs",
+ name);
else
ret->normal_wq = alloc_workqueue("%s-%s", flags,
- ret->max_active, "btrfs",
+ ret->current_active, "btrfs",
name);
if (!ret->normal_wq) {
kfree(ret);
@@ -134,7 +145,7 @@
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
unsigned int flags,
- int max_active,
+ int limit_active,
int thresh)
{
struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -143,14 +154,14 @@
return NULL;
ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI,
- max_active, thresh);
+ limit_active, thresh);
if (!ret->normal) {
kfree(ret);
return NULL;
}
if (flags & WQ_HIGHPRI) {
- ret->high = __btrfs_alloc_workqueue(name, flags, max_active,
+ ret->high = __btrfs_alloc_workqueue(name, flags, limit_active,
thresh);
if (!ret->high) {
__btrfs_destroy_workqueue(ret->normal);
@@ -180,7 +191,7 @@
*/
static inline void thresh_exec_hook(struct __btrfs_workqueue *wq)
{
- int new_max_active;
+ int new_current_active;
long pending;
int need_change = 0;
@@ -197,7 +208,7 @@
wq->count %= (wq->thresh / 4);
if (!wq->count)
goto out;
- new_max_active = wq->current_max;
+ new_current_active = wq->current_active;
/*
* pending may be changed later, but it's OK since we really
@@ -205,19 +216,19 @@
*/
pending = atomic_read(&wq->pending);
if (pending > wq->thresh)
- new_max_active++;
+ new_current_active++;
if (pending < wq->thresh / 2)
- new_max_active--;
- new_max_active = clamp_val(new_max_active, 1, wq->max_active);
- if (new_max_active != wq->current_max) {
+ new_current_active--;
+ new_current_active = clamp_val(new_current_active, 1, wq->limit_active);
+ if (new_current_active != wq->current_active) {
need_change = 1;
- wq->current_max = new_max_active;
+ wq->current_active = new_current_active;
}
out:
spin_unlock(&wq->thres_lock);
if (need_change) {
- workqueue_set_max_active(wq->normal_wq, wq->current_max);
+ workqueue_set_max_active(wq->normal_wq, wq->current_active);
}
}
@@ -351,13 +362,13 @@
kfree(wq);
}
-void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max)
+void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int limit_active)
{
if (!wq)
return;
- wq->normal->max_active = max;
+ wq->normal->limit_active = limit_active;
if (wq->high)
- wq->high->max_active = max;
+ wq->high->limit_active = limit_active;
}
void btrfs_set_work_high_priority(struct btrfs_work *work)
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index b0b093b..ad4d064 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -69,7 +69,7 @@
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
unsigned int flags,
- int max_active,
+ int limit_active,
int thresh);
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
btrfs_func_t func,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 564a7de..e54dd59 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -183,8 +183,7 @@
}
out:
- if (path)
- btrfs_free_path(path);
+ btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9ebd34f..0d98aee 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3443,6 +3443,26 @@
return 0;
}
+int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
+{
+ if ((flags & (BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_AVAIL_ALLOC_BIT_SINGLE)) ||
+ ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0))
+ return 0;
+
+ if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ return 1;
+
+ if (flags & BTRFS_BLOCK_GROUP_RAID6)
+ return 2;
+
+ pr_warn("BTRFS: unknown raid type: %llu\n", flags);
+ return 0;
+}
+
int btrfs_calc_num_tolerated_disk_barrier_failures(
struct btrfs_fs_info *fs_info)
{
@@ -3452,13 +3472,12 @@
BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
- int num_types = 4;
int i;
int c;
int num_tolerated_disk_barrier_failures =
(int)fs_info->fs_devices->num_devices;
- for (i = 0; i < num_types; i++) {
+ for (i = 0; i < ARRAY_SIZE(types); i++) {
struct btrfs_space_info *tmp;
sinfo = NULL;
@@ -3476,44 +3495,21 @@
down_read(&sinfo->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
- if (!list_empty(&sinfo->block_groups[c])) {
- u64 flags;
+ u64 flags;
- btrfs_get_block_group_info(
- &sinfo->block_groups[c], &space);
- if (space.total_bytes == 0 ||
- space.used_bytes == 0)
- continue;
- flags = space.flags;
- /*
- * return
- * 0: if dup, single or RAID0 is configured for
- * any of metadata, system or data, else
- * 1: if RAID5 is configured, or if RAID1 or
- * RAID10 is configured and only two mirrors
- * are used, else
- * 2: if RAID6 is configured, else
- * num_mirrors - 1: if RAID1 or RAID10 is
- * configured and more than
- * 2 mirrors are used.
- */
- if (num_tolerated_disk_barrier_failures > 0 &&
- ((flags & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID0)) ||
- ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
- == 0)))
- num_tolerated_disk_barrier_failures = 0;
- else if (num_tolerated_disk_barrier_failures > 1) {
- if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID10)) {
- num_tolerated_disk_barrier_failures = 1;
- } else if (flags &
- BTRFS_BLOCK_GROUP_RAID6) {
- num_tolerated_disk_barrier_failures = 2;
- }
- }
- }
+ if (list_empty(&sinfo->block_groups[c]))
+ continue;
+
+ btrfs_get_block_group_info(&sinfo->block_groups[c],
+ &space);
+ if (space.total_bytes == 0 || space.used_bytes == 0)
+ continue;
+ flags = space.flags;
+
+ num_tolerated_disk_barrier_failures = min(
+ num_tolerated_disk_barrier_failures,
+ btrfs_get_num_tolerated_disk_barrier_failures(
+ flags));
}
up_read(&sinfo->groups_sem);
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index d4cbfee..bdfb479 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -139,6 +139,7 @@
u64 objectid);
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
+int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int btrfs_calc_num_tolerated_disk_barrier_failures(
struct btrfs_fs_info *fs_info);
int __init btrfs_end_io_wq_init(void);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 237da01..a0fa725 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6909,8 +6909,7 @@
trace_btrfs_get_extent(root, em);
- if (path)
- btrfs_free_path(path);
+ btrfs_free_path(path);
if (trans) {
ret = btrfs_end_transaction(trans, root);
if (!err)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9a11db0..a39f5d1 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3267,13 +3267,13 @@
scrub_blocked_if_needed(fs_info);
}
- /* for raid56, we skip parity stripe */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
ret = get_raid56_logic_offset(physical, num, map,
&logical,
&stripe_logical);
logical += base;
if (ret) {
+ /* it is parity strip */
stripe_logical += base;
stripe_end = stripe_logical + increment;
ret = scrub_raid56_parity(sctx, map, scrub_dev,
@@ -3480,7 +3480,6 @@
static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev,
- u64 chunk_tree, u64 chunk_objectid,
u64 chunk_offset, u64 length,
u64 dev_offset, int is_dev_replace)
{
@@ -3531,8 +3530,6 @@
struct btrfs_root *root = sctx->dev_root;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 length;
- u64 chunk_tree;
- u64 chunk_objectid;
u64 chunk_offset;
int ret = 0;
int slot;
@@ -3596,8 +3593,6 @@
if (found_key.offset + length <= start)
goto skip;
- chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
- chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
/*
@@ -3630,9 +3625,8 @@
dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
- ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid,
- chunk_offset, length, found_key.offset,
- is_dev_replace);
+ ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
+ found_key.offset, is_dev_replace);
/*
* flush, submit all pending read and write bios, afterwards
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index a4b9c8b..f31db43 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -115,8 +115,7 @@
ret = -EAGAIN;
}
out:
- if (path)
- btrfs_free_path(path);
+ btrfs_free_path(path);
if (ret == -EAGAIN) {
if (root->defrag_max.objectid > root->defrag_progress.objectid)
goto done;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 76201d6..6fc73586 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3585,23 +3585,10 @@
} while (read_seqretry(&fs_info->profiles_lock, seq));
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
- int num_tolerated_disk_barrier_failures;
- u64 target = bctl->sys.target;
-
- num_tolerated_disk_barrier_failures =
- btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
- if (num_tolerated_disk_barrier_failures > 0 &&
- (target &
- (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_AVAIL_ALLOC_BIT_SINGLE)))
- num_tolerated_disk_barrier_failures = 0;
- else if (num_tolerated_disk_barrier_failures > 1 &&
- (target &
- (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)))
- num_tolerated_disk_barrier_failures = 1;
-
- fs_info->num_tolerated_disk_barrier_failures =
- num_tolerated_disk_barrier_failures;
+ fs_info->num_tolerated_disk_barrier_failures = min(
+ btrfs_calc_num_tolerated_disk_barrier_failures(fs_info),
+ btrfs_get_num_tolerated_disk_barrier_failures(
+ bctl->sys.target));
}
ret = insert_balance_item(fs_info->tree_root, bctl);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 890c509..9d23e78 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -276,7 +276,7 @@
for (i = 0; i < num_pages; i++) {
struct page *page = osd_data->pages[i];
- if (rc < 0)
+ if (rc < 0 && rc != ENOENT)
goto unlock;
if (bytes < (int)PAGE_CACHE_SIZE) {
/* zero (remainder of) page */
@@ -717,8 +717,10 @@
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
- if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
+ if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
pr_warn("writepage_start %p on forced umount\n", inode);
+ truncate_pagecache(inode, 0);
+ mapping_set_error(mapping, -EIO);
return -EIO; /* we're in a forced umount, don't write! */
}
if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
@@ -1593,7 +1595,7 @@
return err;
}
-static struct vm_operations_struct ceph_vmops = {
+static const struct vm_operations_struct ceph_vmops = {
.fault = ceph_filemap_fault,
.page_mkwrite = ceph_page_mkwrite,
};
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ddd5e94..27b5668 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2413,6 +2413,14 @@
goto out_unlock;
}
+ if (!__ceph_is_any_caps(ci) &&
+ ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ dout("get_cap_refs %p forced umount\n", inode);
+ *err = -EIO;
+ ret = 1;
+ goto out_unlock;
+ }
+
dout("get_cap_refs %p have %s needed %s\n", inode,
ceph_cap_string(have), ceph_cap_string(need));
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8b79d87..0c62868 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -136,7 +136,6 @@
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct ceph_file_info *cf = file->private_data;
- struct inode *parent_inode = NULL;
int err;
int flags, fmode, wanted;
@@ -210,10 +209,7 @@
ihold(inode);
req->r_num_caps = 1;
- if (flags & O_CREAT)
- parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry);
- err = ceph_mdsc_do_request(mdsc, parent_inode, req);
- iput(parent_inode);
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
if (!err)
err = ceph_init_file(inode, file, req->r_fmode);
ceph_mdsc_put_request(req);
@@ -279,7 +275,7 @@
if (err)
goto out_req;
- if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
+ if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
if (d_unhashed(dentry)) {
@@ -956,6 +952,12 @@
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
+ if (iocb->ki_flags & IOCB_APPEND) {
+ err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
+ if (err < 0)
+ goto out;
+ }
+
err = generic_write_checks(iocb, from);
if (err <= 0)
goto out;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6aa07af..51cb02d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2107,7 +2107,6 @@
msg = create_request_message(mdsc, req, mds, drop_cap_releases);
if (IS_ERR(msg)) {
req->r_err = PTR_ERR(msg);
- complete_request(mdsc, req);
return PTR_ERR(msg);
}
req->r_request = msg;
@@ -2135,7 +2134,7 @@
{
struct ceph_mds_session *session = NULL;
int mds = -1;
- int err = -EAGAIN;
+ int err = 0;
if (req->r_err || req->r_got_result) {
if (req->r_aborted)
@@ -2149,6 +2148,11 @@
err = -EIO;
goto finish;
}
+ if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ dout("do_request forced umount\n");
+ err = -EIO;
+ goto finish;
+ }
put_request_session(req);
@@ -2196,13 +2200,15 @@
out_session:
ceph_put_mds_session(session);
+finish:
+ if (err) {
+ dout("__do_request early error %d\n", err);
+ req->r_err = err;
+ complete_request(mdsc, req);
+ __unregister_request(mdsc, req);
+ }
out:
return err;
-
-finish:
- req->r_err = err;
- complete_request(mdsc, req);
- goto out;
}
/*
@@ -2289,8 +2295,6 @@
if (req->r_err) {
err = req->r_err;
- __unregister_request(mdsc, req);
- dout("do_request early error %d\n", err);
goto out;
}
@@ -2411,7 +2415,7 @@
mutex_unlock(&mdsc->mutex);
goto out;
}
- if (req->r_got_safe && !head->safe) {
+ if (req->r_got_safe) {
pr_warn("got unsafe after safe on %llu from mds%d\n",
tid, mds);
mutex_unlock(&mdsc->mutex);
@@ -2520,8 +2524,7 @@
if (err) {
req->r_err = err;
} else {
- req->r_reply = msg;
- ceph_msg_get(msg);
+ req->r_reply = ceph_msg_get(msg);
req->r_got_result = true;
}
} else {
@@ -3555,7 +3558,7 @@
{
u64 want_tid, want_flush, want_snap;
- if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
+ if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return;
dout("sync\n");
@@ -3584,7 +3587,7 @@
*/
static bool done_closing_sessions(struct ceph_mds_client *mdsc)
{
- if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
+ if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return true;
return atomic_read(&mdsc->num_sessions) == 0;
}
@@ -3643,6 +3646,34 @@
dout("stopped\n");
}
+void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
+{
+ struct ceph_mds_session *session;
+ int mds;
+
+ dout("force umount\n");
+
+ mutex_lock(&mdsc->mutex);
+ for (mds = 0; mds < mdsc->max_sessions; mds++) {
+ session = __ceph_lookup_mds_session(mdsc, mds);
+ if (!session)
+ continue;
+ mutex_unlock(&mdsc->mutex);
+ mutex_lock(&session->s_mutex);
+ __close_session(mdsc, session);
+ if (session->s_state == CEPH_MDS_SESSION_CLOSING) {
+ cleanup_session_requests(mdsc, session);
+ remove_session_caps(session);
+ }
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ mutex_lock(&mdsc->mutex);
+ kick_requests(mdsc, mds);
+ }
+ __wake_requests(mdsc, &mdsc->waiting_for_map);
+ mutex_unlock(&mdsc->mutex);
+}
+
static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
{
dout("stop\n");
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 762757e..f575eaf 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -366,6 +366,7 @@
extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
+extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc);
extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 233d906a..4aa7122 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -338,12 +338,6 @@
return 0;
}
- if (num == 0 && realm->seq == ceph_empty_snapc->seq) {
- ceph_get_snap_context(ceph_empty_snapc);
- snapc = ceph_empty_snapc;
- goto done;
- }
-
/* alloc new snap context */
err = -ENOMEM;
if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
@@ -381,7 +375,6 @@
realm->ino, realm, snapc, snapc->seq,
(unsigned int) snapc->num_snaps);
-done:
ceph_put_snap_context(realm->cached_context);
realm->cached_context = snapc;
return 0;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 7b6bfcb..f446afa 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -708,6 +708,7 @@
if (!fsc)
return;
fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
+ ceph_mdsc_force_umount(fsc->mdsc);
return;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6a1119e..e739950 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -325,8 +325,11 @@
static void
cifs_show_security(struct seq_file *s, struct cifs_ses *ses)
{
- if (ses->sectype == Unspecified)
+ if (ses->sectype == Unspecified) {
+ if (ses->user_name == NULL)
+ seq_puts(s, ",sec=none");
return;
+ }
seq_puts(s, ",sec=");
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3f50cee..e2a6af1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3216,7 +3216,7 @@
return VM_FAULT_LOCKED;
}
-static struct vm_operations_struct cifs_file_vm_ops = {
+static const struct vm_operations_struct cifs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = cifs_page_mkwrite,
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index c63f522..28a77bf 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -67,6 +67,12 @@
goto out_drop_write;
}
+ if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
+ rc = -EBADF;
+ cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
+ goto out_fput;
+ }
+
if ((!src_file.file->private_data) || (!dst_file->private_data)) {
rc = -EBADF;
cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 9b1ffaa..f6c6c8a 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -353,7 +353,7 @@
char *result;
insize = max_t(unsigned int,
- INSIZE(readlink), OUTSIZE(readlink)+ *length + 1);
+ INSIZE(readlink), OUTSIZE(readlink)+ *length);
UPARG(CODA_READLINK);
inp->coda_readlink.VFid = *fid;
@@ -361,8 +361,8 @@
error = coda_upcall(coda_vcp(sb), insize, &outsize, inp);
if (!error) {
retlen = outp->coda_readlink.count;
- if ( retlen > *length )
- retlen = *length;
+ if (retlen >= *length)
+ retlen = *length - 1;
*length = retlen;
result = (char *)outp + (long)outp->coda_readlink.data;
memcpy(buffer, result, retlen);
diff --git a/fs/coredump.c b/fs/coredump.c
index c5ecde6..a8f7564 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -513,10 +513,10 @@
const struct cred *old_cred;
struct cred *cred;
int retval = 0;
- int flag = 0;
int ispipe;
struct files_struct *displaced;
- bool need_nonrelative = false;
+ /* require nonrelative corefile path and be extra careful */
+ bool need_suid_safe = false;
bool core_dumped = false;
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
@@ -550,9 +550,8 @@
*/
if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
/* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
- need_nonrelative = true;
+ need_suid_safe = true;
}
retval = coredump_wait(siginfo->si_signo, &core_state);
@@ -633,7 +632,7 @@
if (cprm.limit < binfmt->min_coredump)
goto fail_unlock;
- if (need_nonrelative && cn.corename[0] != '/') {
+ if (need_suid_safe && cn.corename[0] != '/') {
printk(KERN_WARNING "Pid %d(%s) can only dump core "\
"to fully qualified path!\n",
task_tgid_vnr(current), current->comm);
@@ -641,8 +640,35 @@
goto fail_unlock;
}
+ /*
+ * Unlink the file if it exists unless this is a SUID
+ * binary - in that case, we're running around with root
+ * privs and don't want to unlink another user's coredump.
+ */
+ if (!need_suid_safe) {
+ mm_segment_t old_fs;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ /*
+ * If it doesn't exist, that's fine. If there's some
+ * other problem, we'll catch it at the filp_open().
+ */
+ (void) sys_unlink((const char __user *)cn.corename);
+ set_fs(old_fs);
+ }
+
+ /*
+ * There is a race between unlinking and creating the
+ * file, but if that causes an EEXIST here, that's
+ * fine - another process raced with us while creating
+ * the corefile, and the other process won. To userspace,
+ * what matters is that at least one of the two processes
+ * writes its coredump successfully, not which one.
+ */
cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+ O_CREAT | 2 | O_NOFOLLOW |
+ O_LARGEFILE | O_EXCL,
0600);
if (IS_ERR(cprm.file))
goto fail_unlock;
@@ -659,11 +685,15 @@
if (!S_ISREG(inode->i_mode))
goto close_fail;
/*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
+ * Don't dump core if the filesystem changed owner or mode
+ * of the file during file creation. This is an issue when
+ * a process dumps core while its cwd is e.g. on a vfat
+ * filesystem.
*/
if (!uid_eq(inode->i_uid, current_fsuid()))
goto close_fail;
+ if ((inode->i_mode & 0677) != 0600)
+ goto close_fail;
if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
goto close_fail;
if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ae0f438..587ac08 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,8 +53,6 @@
unsigned int for_background:1;
unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
unsigned int auto_free:1; /* free on completion */
- unsigned int single_wait:1;
- unsigned int single_done:1;
enum wb_reason reason; /* why was writeback initiated? */
struct list_head list; /* pending work list */
@@ -178,14 +176,11 @@
static void wb_queue_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
- trace_writeback_queue(wb->bdi, work);
+ trace_writeback_queue(wb, work);
spin_lock_bh(&wb->work_lock);
- if (!test_bit(WB_registered, &wb->state)) {
- if (work->single_wait)
- work->single_done = 1;
+ if (!test_bit(WB_registered, &wb->state))
goto out_unlock;
- }
if (work->done)
atomic_inc(&work->done->cnt);
list_add_tail(&work->list, &wb->work_list);
@@ -706,7 +701,7 @@
/**
* inode_congested - test whether an inode is congested
- * @inode: inode to test for congestion
+ * @inode: inode to test for congestion (may be NULL)
* @cong_bits: mask of WB_[a]sync_congested bits to test
*
* Tests whether @inode is congested. @cong_bits is the mask of congestion
@@ -716,6 +711,9 @@
* determined by whether the cgwb (cgroup bdi_writeback) for the blkcg
* associated with @inode is congested; otherwise, the root wb's congestion
* state is used.
+ *
+ * @inode is allowed to be NULL as this function is often called on
+ * mapping->host which is NULL for the swapper space.
*/
int inode_congested(struct inode *inode, int cong_bits)
{
@@ -738,32 +736,6 @@
EXPORT_SYMBOL_GPL(inode_congested);
/**
- * wb_wait_for_single_work - wait for completion of a single bdi_writeback_work
- * @bdi: bdi the work item was issued to
- * @work: work item to wait for
- *
- * Wait for the completion of @work which was issued to one of @bdi's
- * bdi_writeback's. The caller must have set @work->single_wait before
- * issuing it. This wait operates independently fo
- * wb_wait_for_completion() and also disables automatic freeing of @work.
- */
-static void wb_wait_for_single_work(struct backing_dev_info *bdi,
- struct wb_writeback_work *work)
-{
- if (WARN_ON_ONCE(!work->single_wait))
- return;
-
- wait_event(bdi->wb_waitq, work->single_done);
-
- /*
- * Paired with smp_wmb() in wb_do_writeback() and ensures that all
- * modifications to @work prior to assertion of ->single_done is
- * visible to the caller once this function returns.
- */
- smp_rmb();
-}
-
-/**
* wb_split_bdi_pages - split nr_pages to write according to bandwidth
* @wb: target bdi_writeback to split @nr_pages to
* @nr_pages: number of pages to write for the whole bdi
@@ -792,38 +764,6 @@
}
/**
- * wb_clone_and_queue_work - clone a wb_writeback_work and issue it to a wb
- * @wb: target bdi_writeback
- * @base_work: source wb_writeback_work
- *
- * Try to make a clone of @base_work and issue it to @wb. If cloning
- * succeeds, %true is returned; otherwise, @base_work is issued directly
- * and %false is returned. In the latter case, the caller is required to
- * wait for @base_work's completion using wb_wait_for_single_work().
- *
- * A clone is auto-freed on completion. @base_work never is.
- */
-static bool wb_clone_and_queue_work(struct bdi_writeback *wb,
- struct wb_writeback_work *base_work)
-{
- struct wb_writeback_work *work;
-
- work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (work) {
- *work = *base_work;
- work->auto_free = 1;
- work->single_wait = 0;
- } else {
- work = base_work;
- work->auto_free = 0;
- work->single_wait = 1;
- }
- work->single_done = 0;
- wb_queue_work(wb, work);
- return work != base_work;
-}
-
-/**
* bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi
* @bdi: target backing_dev_info
* @base_work: wb_writeback_work to issue
@@ -838,15 +778,19 @@
struct wb_writeback_work *base_work,
bool skip_if_busy)
{
- long nr_pages = base_work->nr_pages;
- int next_blkcg_id = 0;
+ int next_memcg_id = 0;
struct bdi_writeback *wb;
struct wb_iter iter;
might_sleep();
restart:
rcu_read_lock();
- bdi_for_each_wb(wb, bdi, &iter, next_blkcg_id) {
+ bdi_for_each_wb(wb, bdi, &iter, next_memcg_id) {
+ DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
+ struct wb_writeback_work fallback_work;
+ struct wb_writeback_work *work;
+ long nr_pages;
+
/* SYNC_ALL writes out I_DIRTY_TIME too */
if (!wb_has_dirty_io(wb) &&
(base_work->sync_mode == WB_SYNC_NONE ||
@@ -855,13 +799,30 @@
if (skip_if_busy && writeback_in_progress(wb))
continue;
- base_work->nr_pages = wb_split_bdi_pages(wb, nr_pages);
- if (!wb_clone_and_queue_work(wb, base_work)) {
- next_blkcg_id = wb->blkcg_css->id + 1;
- rcu_read_unlock();
- wb_wait_for_single_work(bdi, base_work);
- goto restart;
+ nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (work) {
+ *work = *base_work;
+ work->nr_pages = nr_pages;
+ work->auto_free = 1;
+ wb_queue_work(wb, work);
+ continue;
}
+
+ /* alloc failed, execute synchronously using on-stack fallback */
+ work = &fallback_work;
+ *work = *base_work;
+ work->nr_pages = nr_pages;
+ work->auto_free = 0;
+ work->done = &fallback_work_done;
+
+ wb_queue_work(wb, work);
+
+ next_memcg_id = wb->memcg_css->id + 1;
+ rcu_read_unlock();
+ wb_wait_for_completion(bdi, &fallback_work_done);
+ goto restart;
}
rcu_read_unlock();
}
@@ -902,8 +863,6 @@
if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
base_work->auto_free = 0;
- base_work->single_wait = 0;
- base_work->single_done = 0;
wb_queue_work(&bdi->wb, base_work);
}
}
@@ -924,7 +883,7 @@
*/
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
- trace_writeback_nowork(wb->bdi);
+ trace_writeback_nowork(wb);
wb_wakeup(wb);
return;
}
@@ -954,7 +913,7 @@
* We just wake up the flusher thread. It will perform background
* writeback as soon as there is no other work to do.
*/
- trace_writeback_wake_background(wb->bdi);
+ trace_writeback_wake_background(wb);
wb_wakeup(wb);
}
@@ -1421,6 +1380,10 @@
* Write a portion of b_io inodes which belong to @sb.
*
* Return the number of pages and/or inodes written.
+ *
+ * NOTE! This is called with wb->list_lock held, and will
+ * unlock and relock that for each inode it ends up doing
+ * IO for.
*/
static long writeback_sb_inodes(struct super_block *sb,
struct bdi_writeback *wb,
@@ -1439,9 +1402,7 @@
unsigned long start_time = jiffies;
long write_chunk;
long wrote = 0; /* count both pages and inodes */
- struct blk_plug plug;
- blk_start_plug(&plug);
while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev);
@@ -1539,7 +1500,6 @@
break;
}
}
- blk_finish_plug(&plug);
return wrote;
}
@@ -1586,12 +1546,15 @@
.range_cyclic = 1,
.reason = reason,
};
+ struct blk_plug plug;
+ blk_start_plug(&plug);
spin_lock(&wb->list_lock);
if (list_empty(&wb->b_io))
queue_io(wb, &work);
__writeback_inodes_wb(wb, &work);
spin_unlock(&wb->list_lock);
+ blk_finish_plug(&plug);
return nr_pages - work.nr_pages;
}
@@ -1619,10 +1582,12 @@
unsigned long oldest_jif;
struct inode *inode;
long progress;
+ struct blk_plug plug;
oldest_jif = jiffies;
work->older_than_this = &oldest_jif;
+ blk_start_plug(&plug);
spin_lock(&wb->list_lock);
for (;;) {
/*
@@ -1660,14 +1625,14 @@
} else if (work->for_background)
oldest_jif = jiffies;
- trace_writeback_start(wb->bdi, work);
+ trace_writeback_start(wb, work);
if (list_empty(&wb->b_io))
queue_io(wb, work);
if (work->sb)
progress = writeback_sb_inodes(work->sb, wb, work);
else
progress = __writeback_inodes_wb(wb, work);
- trace_writeback_written(wb->bdi, work);
+ trace_writeback_written(wb, work);
wb_update_bandwidth(wb, wb_start);
@@ -1692,7 +1657,7 @@
* we'll just busyloop.
*/
if (!list_empty(&wb->b_more_io)) {
- trace_writeback_wait(wb->bdi, work);
+ trace_writeback_wait(wb, work);
inode = wb_inode(wb->b_more_io.prev);
spin_lock(&inode->i_lock);
spin_unlock(&wb->list_lock);
@@ -1702,6 +1667,7 @@
}
}
spin_unlock(&wb->list_lock);
+ blk_finish_plug(&plug);
return nr_pages - work->nr_pages;
}
@@ -1797,26 +1763,14 @@
set_bit(WB_writeback_running, &wb->state);
while ((work = get_next_work_item(wb)) != NULL) {
struct wb_completion *done = work->done;
- bool need_wake_up = false;
- trace_writeback_exec(wb->bdi, work);
+ trace_writeback_exec(wb, work);
wrote += wb_writeback(wb, work);
- if (work->single_wait) {
- WARN_ON_ONCE(work->auto_free);
- /* paired w/ rmb in wb_wait_for_single_work() */
- smp_wmb();
- work->single_done = 1;
- need_wake_up = true;
- } else if (work->auto_free) {
+ if (work->auto_free)
kfree(work);
- }
-
if (done && atomic_dec_and_test(&done->cnt))
- need_wake_up = true;
-
- if (need_wake_up)
wake_up_all(&wb->bdi->wb_waitq);
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a38e38f..9bd1244 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -34,6 +34,7 @@
#include <linux/percpu.h>
#include <linux/list_sort.h>
#include <linux/lockref.h>
+#include <linux/rhashtable.h>
#include "gfs2.h"
#include "incore.h"
@@ -50,9 +51,8 @@
#include "trace_gfs2.h"
struct gfs2_glock_iter {
- int hash; /* hash bucket index */
- unsigned nhash; /* Index within current bucket */
struct gfs2_sbd *sdp; /* incore superblock */
+ struct rhashtable_iter hti; /* rhashtable iterator */
struct gfs2_glock *gl; /* current glock struct */
loff_t last_pos; /* last position */
};
@@ -70,44 +70,19 @@
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
-#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
-static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
-static struct dentry *gfs2_root;
+static struct rhashtable_params ht_parms = {
+ .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
+ .key_len = sizeof(struct lm_lockname),
+ .key_offset = offsetof(struct gfs2_glock, gl_name),
+ .head_offset = offsetof(struct gfs2_glock, gl_node),
+};
-/**
- * gl_hash() - Turn glock number into hash bucket number
- * @lock: The glock number
- *
- * Returns: The number of the corresponding hash bucket
- */
+static struct rhashtable gl_hash_table;
-static unsigned int gl_hash(const struct gfs2_sbd *sdp,
- const struct lm_lockname *name)
+void gfs2_glock_free(struct gfs2_glock *gl)
{
- unsigned int h;
-
- h = jhash(&name->ln_number, sizeof(u64), 0);
- h = jhash(&name->ln_type, sizeof(unsigned int), h);
- h = jhash(&sdp, sizeof(struct gfs2_sbd *), h);
- h &= GFS2_GL_HASH_MASK;
-
- return h;
-}
-
-static inline void spin_lock_bucket(unsigned int hash)
-{
- hlist_bl_lock(&gl_hash_table[hash]);
-}
-
-static inline void spin_unlock_bucket(unsigned int hash)
-{
- hlist_bl_unlock(&gl_hash_table[hash]);
-}
-
-static void gfs2_glock_dealloc(struct rcu_head *rcu)
-{
- struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
if (gl->gl_ops->go_flags & GLOF_ASPACE) {
kmem_cache_free(gfs2_glock_aspace_cachep, gl);
@@ -115,13 +90,6 @@
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(gfs2_glock_cachep, gl);
}
-}
-
-void gfs2_glock_free(struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
-
- call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
wake_up(&sdp->sd_glock_wait);
}
@@ -192,7 +160,7 @@
void gfs2_glock_put(struct gfs2_glock *gl)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = gfs2_glock2aspace(gl);
if (lockref_put_or_lock(&gl->gl_lockref))
@@ -202,9 +170,7 @@
gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock);
- spin_lock_bucket(gl->gl_hash);
- hlist_bl_del_rcu(&gl->gl_list);
- spin_unlock_bucket(gl->gl_hash);
+ rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
trace_gfs2_glock_put(gl);
@@ -212,33 +178,6 @@
}
/**
- * search_bucket() - Find struct gfs2_glock by lock number
- * @bucket: the bucket to search
- * @name: The lock name
- *
- * Returns: NULL, or the struct gfs2_glock with the requested number
- */
-
-static struct gfs2_glock *search_bucket(unsigned int hash,
- const struct gfs2_sbd *sdp,
- const struct lm_lockname *name)
-{
- struct gfs2_glock *gl;
- struct hlist_bl_node *h;
-
- hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
- if (!lm_name_equal(&gl->gl_name, name))
- continue;
- if (gl->gl_sbd != sdp)
- continue;
- if (lockref_get_not_dead(&gl->gl_lockref))
- return gl;
- }
-
- return NULL;
-}
-
-/**
* may_grant - check if its ok to grant a new lock
* @gl: The glock
* @gh: The lock request which we wish to grant
@@ -506,7 +445,7 @@
__acquires(&gl->gl_spin)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
unsigned int lck_flags = gh ? gh->gh_flags : 0;
int ret;
@@ -628,7 +567,7 @@
static void delete_work_func(struct work_struct *work)
{
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_inode *ip;
struct inode *inode;
u64 no_addr = gl->gl_name.ln_number;
@@ -704,15 +643,17 @@
struct gfs2_glock **glp)
{
struct super_block *s = sdp->sd_vfs;
- struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type };
- struct gfs2_glock *gl, *tmp;
- unsigned int hash = gl_hash(sdp, &name);
+ struct lm_lockname name = { .ln_number = number,
+ .ln_type = glops->go_type,
+ .ln_sbd = sdp };
+ struct gfs2_glock *gl, *tmp = NULL;
struct address_space *mapping;
struct kmem_cache *cachep;
+ int ret, tries = 0;
- rcu_read_lock();
- gl = search_bucket(hash, sdp, &name);
- rcu_read_unlock();
+ gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
+ if (gl && !lockref_get_not_dead(&gl->gl_lockref))
+ gl = NULL;
*glp = gl;
if (gl)
@@ -739,14 +680,13 @@
}
atomic_inc(&sdp->sd_glock_disposal);
- gl->gl_sbd = sdp;
+ gl->gl_node.next = NULL;
gl->gl_flags = 0;
gl->gl_name = name;
gl->gl_lockref.count = 1;
gl->gl_state = LM_ST_UNLOCKED;
gl->gl_target = LM_ST_UNLOCKED;
gl->gl_demote_state = LM_ST_EXCLUSIVE;
- gl->gl_hash = hash;
gl->gl_ops = glops;
gl->gl_dstamp = ktime_set(0, 0);
preempt_disable();
@@ -771,22 +711,34 @@
mapping->writeback_index = 0;
}
- spin_lock_bucket(hash);
- tmp = search_bucket(hash, sdp, &name);
- if (tmp) {
- spin_unlock_bucket(hash);
- kfree(gl->gl_lksb.sb_lvbptr);
- kmem_cache_free(cachep, gl);
- atomic_dec(&sdp->sd_glock_disposal);
- gl = tmp;
- } else {
- hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
- spin_unlock_bucket(hash);
+again:
+ ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node,
+ ht_parms);
+ if (ret == 0) {
+ *glp = gl;
+ return 0;
}
- *glp = gl;
+ if (ret == -EEXIST) {
+ ret = 0;
+ tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
+ if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
+ if (++tries < 100) {
+ cond_resched();
+ goto again;
+ }
+ tmp = NULL;
+ ret = -ENOMEM;
+ }
+ } else {
+ WARN_ON_ONCE(ret);
+ }
+ kfree(gl->gl_lksb.sb_lvbptr);
+ kmem_cache_free(cachep, gl);
+ atomic_dec(&sdp->sd_glock_disposal);
+ *glp = tmp;
- return 0;
+ return ret;
}
/**
@@ -928,7 +880,7 @@
__acquires(&gl->gl_spin)
{
struct gfs2_glock *gl = gh->gh_gl;
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct list_head *insert_pt = NULL;
struct gfs2_holder *gh2;
int try_futile = 0;
@@ -1006,7 +958,7 @@
int gfs2_glock_nq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
int error = 0;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
@@ -1313,7 +1265,7 @@
void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
{
- struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
spin_lock(&gl->gl_spin);
gl->gl_reply = ret;
@@ -1462,31 +1414,26 @@
*
*/
-static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
- unsigned int hash)
+static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
{
struct gfs2_glock *gl;
- struct hlist_bl_head *head = &gl_hash_table[hash];
- struct hlist_bl_node *pos;
+ struct rhash_head *pos, *next;
+ const struct bucket_table *tbl;
+ int i;
rcu_read_lock();
- hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
- if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref))
- examiner(gl);
+ tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table);
+ for (i = 0; i < tbl->size; i++) {
+ rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) {
+ if ((gl->gl_name.ln_sbd == sdp) &&
+ lockref_get_not_dead(&gl->gl_lockref))
+ examiner(gl);
+ }
}
rcu_read_unlock();
cond_resched();
}
-static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
-{
- unsigned x;
-
- for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
- examine_bucket(examiner, sdp, x);
-}
-
-
/**
* thaw_glock - thaw out a glock which has an unprocessed reply waiting
* @gl: The glock to thaw
@@ -1569,7 +1516,7 @@
int ret;
ret = gfs2_truncatei_resume(ip);
- gfs2_assert_withdraw(gl->gl_sbd, ret == 0);
+ gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0);
spin_lock(&gl->gl_spin);
clear_bit(GLF_LOCK, &gl->gl_flags);
@@ -1733,17 +1680,17 @@
{
struct gfs2_glock *gl = iter_ptr;
- seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n",
+ seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n",
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
- (long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
- (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
- (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
- (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
- (long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
- (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
- (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
- (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
+ (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
+ (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
+ (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
+ (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
+ (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
+ (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
+ (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
+ (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
return 0;
}
@@ -1776,11 +1723,10 @@
static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
{
- struct gfs2_glock_iter *gi = seq->private;
- struct gfs2_sbd *sdp = gi->sdp;
- unsigned index = gi->hash >> 3;
- unsigned subindex = gi->hash & 0x07;
- s64 value;
+ struct gfs2_sbd *sdp = seq->private;
+ loff_t pos = *(loff_t *)iter_ptr;
+ unsigned index = pos >> 3;
+ unsigned subindex = pos & 0x07;
int i;
if (index == 0 && subindex != 0)
@@ -1791,12 +1737,12 @@
for_each_possible_cpu(i) {
const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
- if (index == 0) {
- value = i;
- } else {
- value = lkstats->lkstats[index - 1].stats[subindex];
- }
- seq_printf(seq, " %15lld", (long long)value);
+
+ if (index == 0)
+ seq_printf(seq, " %15u", i);
+ else
+ seq_printf(seq, " %15llu", (unsigned long long)lkstats->
+ lkstats[index - 1].stats[subindex]);
}
seq_putc(seq, '\n');
return 0;
@@ -1804,20 +1750,24 @@
int __init gfs2_glock_init(void)
{
- unsigned i;
- for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
- INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
- }
+ int ret;
+
+ ret = rhashtable_init(&gl_hash_table, &ht_parms);
+ if (ret < 0)
+ return ret;
glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
WQ_HIGHPRI | WQ_FREEZABLE, 0);
- if (!glock_workqueue)
+ if (!glock_workqueue) {
+ rhashtable_destroy(&gl_hash_table);
return -ENOMEM;
+ }
gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
WQ_MEM_RECLAIM | WQ_FREEZABLE,
0);
if (!gfs2_delete_workqueue) {
destroy_workqueue(glock_workqueue);
+ rhashtable_destroy(&gl_hash_table);
return -ENOMEM;
}
@@ -1829,72 +1779,41 @@
void gfs2_glock_exit(void)
{
unregister_shrinker(&glock_shrinker);
+ rhashtable_destroy(&gl_hash_table);
destroy_workqueue(glock_workqueue);
destroy_workqueue(gfs2_delete_workqueue);
}
-static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
+static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
{
- return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
- struct gfs2_glock, gl_list);
-}
-
-static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
-{
- return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
- struct gfs2_glock, gl_list);
-}
-
-static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
-{
- struct gfs2_glock *gl;
-
do {
- gl = gi->gl;
- if (gl) {
- gi->gl = glock_hash_next(gl);
- gi->nhash++;
- } else {
- if (gi->hash >= GFS2_GL_HASH_SIZE) {
- rcu_read_unlock();
- return 1;
- }
- gi->gl = glock_hash_chain(gi->hash);
- gi->nhash = 0;
- }
- while (gi->gl == NULL) {
- gi->hash++;
- if (gi->hash >= GFS2_GL_HASH_SIZE) {
- rcu_read_unlock();
- return 1;
- }
- gi->gl = glock_hash_chain(gi->hash);
- gi->nhash = 0;
+ gi->gl = rhashtable_walk_next(&gi->hti);
+ if (IS_ERR(gi->gl)) {
+ if (PTR_ERR(gi->gl) == -EAGAIN)
+ continue;
+ gi->gl = NULL;
}
/* Skip entries for other sb and dead entries */
- } while (gi->sdp != gi->gl->gl_sbd ||
- __lockref_is_dead(&gi->gl->gl_lockref));
-
- return 0;
+ } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) ||
+ __lockref_is_dead(&gi->gl->gl_lockref)));
}
static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
{
struct gfs2_glock_iter *gi = seq->private;
loff_t n = *pos;
+ int ret;
if (gi->last_pos <= *pos)
- n = gi->nhash + (*pos - gi->last_pos);
- else
- gi->hash = 0;
+ n = (*pos - gi->last_pos);
- gi->nhash = 0;
- rcu_read_lock();
+ ret = rhashtable_walk_start(&gi->hti);
+ if (ret)
+ return NULL;
do {
- if (gfs2_glock_iter_next(gi))
- return NULL;
- } while (n--);
+ gfs2_glock_iter_next(gi);
+ } while (gi->gl && n--);
gi->last_pos = *pos;
return gi->gl;
@@ -1907,9 +1826,7 @@
(*pos)++;
gi->last_pos = *pos;
- if (gfs2_glock_iter_next(gi))
- return NULL;
-
+ gfs2_glock_iter_next(gi);
return gi->gl;
}
@@ -1917,9 +1834,8 @@
{
struct gfs2_glock_iter *gi = seq->private;
- if (gi->gl)
- rcu_read_unlock();
gi->gl = NULL;
+ rhashtable_walk_stop(&gi->hti);
}
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -1930,26 +1846,19 @@
static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct gfs2_glock_iter *gi = seq->private;
-
- gi->hash = *pos;
+ preempt_disable();
if (*pos >= GFS2_NR_SBSTATS)
return NULL;
- preempt_disable();
- return SEQ_START_TOKEN;
+ return pos;
}
static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
loff_t *pos)
{
- struct gfs2_glock_iter *gi = seq->private;
(*pos)++;
- gi->hash++;
- if (gi->hash >= GFS2_NR_SBSTATS) {
- preempt_enable();
+ if (*pos >= GFS2_NR_SBSTATS)
return NULL;
- }
- return SEQ_START_TOKEN;
+ return pos;
}
static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
@@ -1987,14 +1896,28 @@
if (ret == 0) {
struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private;
+
gi->sdp = inode->i_private;
+ gi->last_pos = 0;
seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
if (seq->buf)
seq->size = GFS2_SEQ_GOODSIZE;
+ gi->gl = NULL;
+ ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
}
return ret;
}
+static int gfs2_glocks_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct gfs2_glock_iter *gi = seq->private;
+
+ gi->gl = NULL;
+ rhashtable_walk_exit(&gi->hti);
+ return seq_release_private(inode, file);
+}
+
static int gfs2_glstats_open(struct inode *inode, struct file *file)
{
int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
@@ -2003,21 +1926,22 @@
struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private;
gi->sdp = inode->i_private;
+ gi->last_pos = 0;
seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
if (seq->buf)
seq->size = GFS2_SEQ_GOODSIZE;
+ gi->gl = NULL;
+ ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
}
return ret;
}
static int gfs2_sbstats_open(struct inode *inode, struct file *file)
{
- int ret = seq_open_private(file, &gfs2_sbstats_seq_ops,
- sizeof(struct gfs2_glock_iter));
+ int ret = seq_open(file, &gfs2_sbstats_seq_ops);
if (ret == 0) {
struct seq_file *seq = file->private_data;
- struct gfs2_glock_iter *gi = seq->private;
- gi->sdp = inode->i_private;
+ seq->private = inode->i_private; /* sdp */
}
return ret;
}
@@ -2027,7 +1951,7 @@
.open = gfs2_glocks_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = gfs2_glocks_release,
};
static const struct file_operations gfs2_glstats_fops = {
@@ -2035,7 +1959,7 @@
.open = gfs2_glstats_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = gfs2_glocks_release,
};
static const struct file_operations gfs2_sbstats_fops = {
@@ -2043,7 +1967,7 @@
.open = gfs2_sbstats_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release,
};
int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index fa3fa5e..1f6c9c3 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -32,13 +32,15 @@
static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
{
- fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
+ fs_err(gl->gl_name.ln_sbd,
+ "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page "
+ "state 0x%lx\n",
bh, (unsigned long long)bh->b_blocknr, bh->b_state,
bh->b_page->mapping, bh->b_page->flags);
- fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n",
+ fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n",
gl->gl_name.ln_type, gl->gl_name.ln_number,
gfs2_glock2aspace(gl));
- gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n");
+ gfs2_lm_withdraw(gl->gl_name.ln_sbd, "AIL error\n");
}
/**
@@ -52,7 +54,7 @@
static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
unsigned int nr_revokes)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd, *tmp;
struct buffer_head *bh;
@@ -80,7 +82,7 @@
static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_trans tr;
memset(&tr, 0, sizeof(tr));
@@ -109,7 +111,7 @@
void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
unsigned int revokes = atomic_read(&gl->gl_ail_count);
unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
int ret;
@@ -139,7 +141,7 @@
static void rgrp_go_sync(struct gfs2_glock *gl)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = &sdp->sd_aspace;
struct gfs2_rgrpd *rgd;
int error;
@@ -179,7 +181,7 @@
static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = &sdp->sd_aspace;
struct gfs2_rgrpd *rgd = gl->gl_object;
@@ -218,7 +220,7 @@
GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
- gfs2_log_flush(gl->gl_sbd, gl, NORMAL_FLUSH);
+ gfs2_log_flush(gl->gl_name.ln_sbd, gl, NORMAL_FLUSH);
filemap_fdatawrite(metamapping);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
@@ -252,7 +254,7 @@
{
struct gfs2_inode *ip = gl->gl_object;
- gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
+ gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count));
if (flags & DIO_METADATA) {
struct address_space *mapping = gfs2_glock2aspace(gl);
@@ -264,9 +266,9 @@
}
}
- if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) {
- gfs2_log_flush(gl->gl_sbd, NULL, NORMAL_FLUSH);
- gl->gl_sbd->sd_rindex_uptodate = 0;
+ if (ip == GFS2_I(gl->gl_name.ln_sbd->sd_rindex)) {
+ gfs2_log_flush(gl->gl_name.ln_sbd, NULL, NORMAL_FLUSH);
+ gl->gl_name.ln_sbd->sd_rindex_uptodate = 0;
}
if (ip && S_ISREG(ip->i_inode.i_mode))
truncate_inode_pages(ip->i_inode.i_mapping, 0);
@@ -281,7 +283,7 @@
static int inode_go_demote_ok(const struct gfs2_glock *gl)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_holder *gh;
if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
@@ -416,7 +418,7 @@
static int inode_go_lock(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_inode *ip = gl->gl_object;
int error = 0;
@@ -477,7 +479,7 @@
static void freeze_go_sync(struct gfs2_glock *gl)
{
int error = 0;
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
if (gl->gl_state == LM_ST_SHARED &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
@@ -500,7 +502,7 @@
static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
struct gfs2_log_header_host head;
@@ -545,7 +547,7 @@
static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
{
struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY))
return;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a1ec7c2..121ed08 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -22,6 +22,7 @@
#include <linux/ktime.h>
#include <linux/percpu.h>
#include <linux/lockref.h>
+#include <linux/rhashtable.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -203,13 +204,15 @@
};
struct lm_lockname {
+ struct gfs2_sbd *ln_sbd;
u64 ln_number;
unsigned int ln_type;
};
#define lm_name_equal(name1, name2) \
- (((name1)->ln_number == (name2)->ln_number) && \
- ((name1)->ln_type == (name2)->ln_type))
+ (((name1)->ln_number == (name2)->ln_number) && \
+ ((name1)->ln_type == (name2)->ln_type) && \
+ ((name1)->ln_sbd == (name2)->ln_sbd))
struct gfs2_glock_operations {
@@ -241,7 +244,7 @@
};
struct gfs2_lkstats {
- s64 stats[GFS2_NR_LKSTATS];
+ u64 stats[GFS2_NR_LKSTATS];
};
enum {
@@ -327,7 +330,6 @@
struct gfs2_glock {
struct hlist_bl_node gl_list;
- struct gfs2_sbd *gl_sbd;
unsigned long gl_flags; /* GLF_... */
struct lm_lockname gl_name;
@@ -341,7 +343,6 @@
gl_req:2, /* State in last dlm request */
gl_reply:8; /* Last reply from the dlm */
- unsigned int gl_hash;
unsigned long gl_demote_time; /* time of first demote request */
long gl_hold_time;
struct list_head gl_holders;
@@ -367,7 +368,7 @@
loff_t end;
} gl_vm;
};
- struct rcu_head gl_rcu;
+ struct rhash_head gl_node;
};
#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
@@ -835,7 +836,7 @@
static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
{
- const struct gfs2_sbd *sdp = gl->gl_sbd;
+ const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
preempt_disable();
this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++;
preempt_enable();
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 641383a..284c154 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -31,7 +31,7 @@
*
* @delta is the difference between the current rtt sample and the
* running average srtt. We add 1/8 of that to the srtt in order to
- * update the current srtt estimate. The varience estimate is a bit
+ * update the current srtt estimate. The variance estimate is a bit
* more complicated. We subtract the abs value of the @delta from
* the current variance estimate and add 1/4 of that to the running
* total.
@@ -80,7 +80,7 @@
preempt_disable();
rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
- lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
+ lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */
gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */
preempt_enable();
@@ -108,7 +108,7 @@
dstamp = gl->gl_dstamp;
gl->gl_dstamp = ktime_get_real();
irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
- lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
+ lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */
gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */
preempt_enable();
@@ -253,7 +253,7 @@
static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
unsigned int flags)
{
- struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
int req;
u32 lkf;
char strname[GDLM_STRNAME_BYTES] = "";
@@ -281,7 +281,7 @@
static void gdlm_put_lock(struct gfs2_glock *gl)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int lvb_needs_unlock = 0;
int error;
@@ -319,7 +319,7 @@
static void gdlm_cancel(struct gfs2_glock *gl)
{
- struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
}
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 92324ac..d5369a1 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -70,7 +70,7 @@
static void maybe_release_space(struct gfs2_bufdata *bd)
{
struct gfs2_glock *gl = bd->bd_gl;
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_rgrpd *rgd = gl->gl_object;
unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
struct gfs2_bitmap *bi = rgd->rd_bits + index;
@@ -578,7 +578,7 @@
static void gfs2_meta_sync(struct gfs2_glock *gl)
{
struct address_space *mapping = gfs2_glock2aspace(gl);
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
int error;
if (mapping == NULL)
@@ -588,7 +588,7 @@
error = filemap_fdatawait(mapping);
if (error)
- gfs2_io_error(gl->gl_sbd);
+ gfs2_io_error(gl->gl_name.ln_sbd);
}
static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index b984a6e..0e1d4be 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -114,7 +114,7 @@
struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
{
struct address_space *mapping = gfs2_glock2aspace(gl);
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct page *page;
struct buffer_head *bh;
unsigned int shift;
@@ -200,7 +200,7 @@
int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head **bhp)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct buffer_head *bh;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
@@ -362,7 +362,7 @@
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct buffer_head *first_bh, *bh;
u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
sdp->sd_sb.sb_bsize_shift;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index ac5d802..8ca1615 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -44,7 +44,7 @@
{
struct inode *inode = mapping->host;
if (mapping->a_ops == &gfs2_meta_aops)
- return (((struct gfs2_glock *)mapping) - 1)->gl_sbd;
+ return (((struct gfs2_glock *)mapping) - 1)->gl_name.ln_sbd;
else if (mapping->a_ops == &gfs2_rgrp_aops)
return container_of(mapping, struct gfs2_sbd, sd_aspace);
else
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 9b61f92..3a31226 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -119,7 +119,7 @@
while (!list_empty(list)) {
qd = list_entry(list->next, struct gfs2_quota_data, qd_lru);
- sdp = qd->qd_gl->gl_sbd;
+ sdp = qd->qd_gl->gl_name.ln_sbd;
list_del(&qd->qd_lru);
@@ -302,7 +302,7 @@
static void qd_hold(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+ struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref));
lockref_get(&qd->qd_lockref);
}
@@ -367,7 +367,7 @@
static int bh_get(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+ struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
unsigned int block, offset;
struct buffer_head *bh;
@@ -414,7 +414,7 @@
static void bh_put(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+ struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
mutex_lock(&sdp->sd_quota_mutex);
gfs2_assert(sdp, qd->qd_bh_count);
@@ -486,7 +486,7 @@
static void qd_unlock(struct gfs2_quota_data *qd)
{
- gfs2_assert_warn(qd->qd_gl->gl_sbd,
+ gfs2_assert_warn(qd->qd_gl->gl_name.ln_sbd,
test_bit(QDF_LOCKED, &qd->qd_flags));
clear_bit(QDF_LOCKED, &qd->qd_flags);
bh_put(qd);
@@ -614,7 +614,7 @@
static void do_qc(struct gfs2_quota_data *qd, s64 change)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+ struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
struct gfs2_quota_change *qc = qd->qd_bh_qc;
s64 x;
@@ -831,7 +831,7 @@
static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
{
- struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
+ struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_name.ln_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned int data_blocks, ind_blocks;
@@ -922,7 +922,7 @@
gfs2_glock_dq_uninit(&ghs[qx]);
mutex_unlock(&ip->i_inode.i_mutex);
kfree(ghs);
- gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl, NORMAL_FLUSH);
+ gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
return error;
}
@@ -954,7 +954,7 @@
static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
struct gfs2_holder *q_gh)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+ struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct gfs2_holder i_gh;
int error;
@@ -1037,7 +1037,7 @@
static int need_sync(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+ struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
struct gfs2_tune *gt = &sdp->sd_tune;
s64 value;
unsigned int num, den;
@@ -1125,7 +1125,7 @@
static int print_message(struct gfs2_quota_data *qd, char *type)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
+ struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
fs_info(sdp, "quota %s for %s %u\n",
type,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c6c6232..475985d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1860,13 +1860,13 @@
static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
{
const struct gfs2_glock *gl = rgd->rd_gl;
- const struct gfs2_sbd *sdp = gl->gl_sbd;
+ const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_lkstats *st;
- s64 r_dcount, l_dcount;
- s64 l_srttb, a_srttb = 0;
+ u64 r_dcount, l_dcount;
+ u64 l_srttb, a_srttb = 0;
s64 srttb_diff;
- s64 sqr_diff;
- s64 var;
+ u64 sqr_diff;
+ u64 var;
int cpu, nonzero = 0;
preempt_disable();
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 20c007d..49ac55d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -104,7 +104,7 @@
),
TP_fast_assign(
- __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->glnum = gl->gl_name.ln_number;
__entry->gltype = gl->gl_name.ln_type;
__entry->cur_state = glock_trace_state(gl->gl_state);
@@ -140,7 +140,7 @@
),
TP_fast_assign(
- __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->gltype = gl->gl_name.ln_type;
__entry->glnum = gl->gl_name.ln_number;
__entry->cur_state = glock_trace_state(gl->gl_state);
@@ -174,7 +174,7 @@
),
TP_fast_assign(
- __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->gltype = gl->gl_name.ln_type;
__entry->glnum = gl->gl_name.ln_number;
__entry->cur_state = glock_trace_state(gl->gl_state);
@@ -209,7 +209,7 @@
),
TP_fast_assign(
- __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->dev = gh->gh_gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->glnum = gh->gh_gl->gl_name.ln_number;
__entry->gltype = gh->gh_gl->gl_name.ln_type;
__entry->first = first;
@@ -239,7 +239,7 @@
),
TP_fast_assign(
- __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->dev = gh->gh_gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->glnum = gh->gh_gl->gl_name.ln_number;
__entry->gltype = gh->gh_gl->gl_name.ln_type;
__entry->queue = queue;
@@ -267,18 +267,18 @@
__field( int, status )
__field( char, flags )
__field( s64, tdiff )
- __field( s64, srtt )
- __field( s64, srttvar )
- __field( s64, srttb )
- __field( s64, srttvarb )
- __field( s64, sirt )
- __field( s64, sirtvar )
- __field( s64, dcount )
- __field( s64, qcount )
+ __field( u64, srtt )
+ __field( u64, srttvar )
+ __field( u64, srttb )
+ __field( u64, srttvarb )
+ __field( u64, sirt )
+ __field( u64, sirtvar )
+ __field( u64, dcount )
+ __field( u64, qcount )
),
TP_fast_assign(
- __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->glnum = gl->gl_name.ln_number;
__entry->gltype = gl->gl_name.ln_type;
__entry->status = gl->gl_lksb.sb_status;
@@ -333,7 +333,7 @@
),
TP_fast_assign(
- __entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->dev = bd->bd_gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->pin = pin;
__entry->len = bd->bd_bh->b_size;
__entry->block = bd->bd_bh->b_blocknr;
@@ -449,7 +449,7 @@
),
TP_fast_assign(
- __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->dev = ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->lblock = lblock;
__entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0;
__entry->inum = ip->i_no_addr;
@@ -489,7 +489,7 @@
),
TP_fast_assign(
- __entry->dev = rgd->rd_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->dev = rgd->rd_gl->gl_name.ln_sbd->sd_vfs->s_dev;
__entry->start = block;
__entry->inum = ip->i_no_addr;
__entry->len = len;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 88bff24..b95d0d6 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -158,7 +158,7 @@
void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
{
struct gfs2_trans *tr = current->journal_info;
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
struct gfs2_bufdata *bd;
@@ -224,7 +224,7 @@
void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_bufdata *bd;
lock_buffer(bh);
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index d3fa6bd..221719e 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -288,7 +288,6 @@
page_cache_release(page);
goto fail;
}
- page_cache_release(page);
node->page[i] = page;
}
@@ -398,11 +397,11 @@
void hfs_bnode_free(struct hfs_bnode *node)
{
- //int i;
+ int i;
- //for (i = 0; i < node->tree->pages_per_bnode; i++)
- // if (node->page[i])
- // page_cache_release(node->page[i]);
+ for (i = 0; i < node->tree->pages_per_bnode; i++)
+ if (node->page[i])
+ page_cache_release(node->page[i]);
kfree(node);
}
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 9f4ee7f..6fc766d 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -131,13 +131,16 @@
hfs_bnode_write(node, entry, data_off + key_len, entry_len);
hfs_bnode_dump(node);
- if (new_node) {
- /* update parent key if we inserted a key
- * at the start of the first node
- */
- if (!rec && new_node != node)
- hfs_brec_update_parent(fd);
+ /*
+ * update parent key if we inserted a key
+ * at the start of the node and it is not the new node
+ */
+ if (!rec && new_node != node) {
+ hfs_bnode_read_key(node, fd->search_key, data_off + size);
+ hfs_brec_update_parent(fd);
+ }
+ if (new_node) {
hfs_bnode_put(fd->bnode);
if (!new_node->parent) {
hfs_btree_inc_height(tree);
@@ -166,9 +169,6 @@
goto again;
}
- if (!rec)
- hfs_brec_update_parent(fd);
-
return 0;
}
@@ -366,6 +366,8 @@
if (IS_ERR(parent))
return PTR_ERR(parent);
__hfs_brec_find(parent, fd);
+ if (fd->record < 0)
+ return -ENOENT;
hfs_bnode_dump(parent);
rec = fd->record;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 759708f..6392466 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -454,7 +454,6 @@
page_cache_release(page);
goto fail;
}
- page_cache_release(page);
node->page[i] = page;
}
@@ -566,13 +565,11 @@
void hfs_bnode_free(struct hfs_bnode *node)
{
-#if 0
int i;
for (i = 0; i < node->tree->pages_per_bnode; i++)
if (node->page[i])
page_cache_release(node->page[i]);
-#endif
kfree(node);
}
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 2d48d28..91e0045 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -92,6 +92,29 @@
}
/**
+ * kernfs_path_len - determine the length of the full path of a given node
+ * @kn: kernfs_node of interest
+ *
+ * The returned length doesn't include the space for the terminating '\0'.
+ */
+size_t kernfs_path_len(struct kernfs_node *kn)
+{
+ size_t len = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kernfs_rename_lock, flags);
+
+ do {
+ len += strlen(kn->name) + 1;
+ kn = kn->parent;
+ } while (kn && kn->parent);
+
+ spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+
+ return len;
+}
+
+/**
* kernfs_path - build full path of a given node
* @kn: kernfs_node of interest
* @buf: buffer to copy @kn's name into
diff --git a/fs/namei.c b/fs/namei.c
index 29b9279..726d211 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2438,7 +2438,7 @@
/**
* path_mountpoint - look up a path to be umounted
- * @nameidata: lookup context
+ * @nd: lookup context
* @flags: lookup flags
* @path: pointer to container for result
*
diff --git a/fs/nsfs.c b/fs/nsfs.c
index e4905fb..8f20d60 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -142,7 +142,8 @@
struct inode *inode = d_inode(dentry);
const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
- return seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino);
+ seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino);
+ return 0;
}
static const struct super_operations nsfs_ops = {
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index d0e436d..ce12e0b 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1776,7 +1776,7 @@
struct dlm_migratable_lockres *mres)
{
struct dlm_migratable_lock *ml;
- struct list_head *queue;
+ struct list_head *queue, *iter;
struct list_head *tmpq = NULL;
struct dlm_lock *newlock = NULL;
struct dlm_lockstatus *lksb = NULL;
@@ -1821,7 +1821,9 @@
spin_lock(&res->spinlock);
for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
tmpq = dlm_list_idx_to_ptr(res, j);
- list_for_each_entry(lock, tmpq, list) {
+ list_for_each(iter, tmpq) {
+ lock = list_entry(iter,
+ struct dlm_lock, list);
if (lock->ml.cookie == ml->cookie)
break;
lock = NULL;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aa50d1a..b25eee4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1230,10 +1230,9 @@
size_t count, loff_t *ppos)
{
struct inode * inode = file_inode(file);
- char *page, *tmp;
- ssize_t length;
uid_t loginuid;
kuid_t kloginuid;
+ int rv;
rcu_read_lock();
if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
@@ -1242,46 +1241,28 @@
}
rcu_read_unlock();
- if (count >= PAGE_SIZE)
- count = PAGE_SIZE - 1;
-
if (*ppos != 0) {
/* No partial writes. */
return -EINVAL;
}
- page = (char*)__get_free_page(GFP_TEMPORARY);
- if (!page)
- return -ENOMEM;
- length = -EFAULT;
- if (copy_from_user(page, buf, count))
- goto out_free_page;
- page[count] = '\0';
- loginuid = simple_strtoul(page, &tmp, 10);
- if (tmp == page) {
- length = -EINVAL;
- goto out_free_page;
-
- }
+ rv = kstrtou32_from_user(buf, count, 10, &loginuid);
+ if (rv < 0)
+ return rv;
/* is userspace tring to explicitly UNSET the loginuid? */
if (loginuid == AUDIT_UID_UNSET) {
kloginuid = INVALID_UID;
} else {
kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
- if (!uid_valid(kloginuid)) {
- length = -EINVAL;
- goto out_free_page;
- }
+ if (!uid_valid(kloginuid))
+ return -EINVAL;
}
- length = audit_set_loginuid(kloginuid);
- if (likely(length == 0))
- length = count;
-
-out_free_page:
- free_page((unsigned long) page);
- return length;
+ rv = audit_set_loginuid(kloginuid);
+ if (rv < 0)
+ return rv;
+ return count;
}
static const struct file_operations proc_loginuid_operations = {
@@ -1335,8 +1316,9 @@
const char __user * buf, size_t count, loff_t *ppos)
{
struct task_struct *task;
- char buffer[PROC_NUMBUF], *end;
+ char buffer[PROC_NUMBUF];
int make_it_fail;
+ int rv;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
@@ -1345,9 +1327,9 @@
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
- if (*end)
- return -EINVAL;
+ rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
+ if (rv < 0)
+ return rv;
if (make_it_fail < 0 || make_it_fail > 1)
return -EINVAL;
@@ -1836,8 +1818,6 @@
return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
-
/*
* dname_to_vma_addr - maps a dentry name into two unsigned longs
* which represent vma start and end addresses.
@@ -1864,11 +1844,6 @@
if (flags & LOOKUP_RCU)
return -ECHILD;
- if (!capable(CAP_SYS_ADMIN)) {
- status = -EPERM;
- goto out_notask;
- }
-
inode = d_inode(dentry);
task = get_proc_task(inode);
if (!task)
@@ -1957,6 +1932,29 @@
unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
};
+/*
+ * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
+ * symlinks may be used to bypass permissions on ancestor directories in the
+ * path to the file in question.
+ */
+static const char *
+proc_map_files_follow_link(struct dentry *dentry, void **cookie)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ return proc_pid_follow_link(dentry, NULL);
+}
+
+/*
+ * Identical to proc_pid_link_inode_operations except for follow_link()
+ */
+static const struct inode_operations proc_map_files_link_inode_operations = {
+ .readlink = proc_pid_readlink,
+ .follow_link = proc_map_files_follow_link,
+ .setattr = proc_setattr,
+};
+
static int
proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
struct task_struct *task, const void *ptr)
@@ -1972,7 +1970,7 @@
ei = PROC_I(inode);
ei->op.proc_get_link = proc_map_files_get_link;
- inode->i_op = &proc_pid_link_inode_operations;
+ inode->i_op = &proc_map_files_link_inode_operations;
inode->i_size = 64;
inode->i_mode = S_IFLNK;
@@ -1996,10 +1994,6 @@
int result;
struct mm_struct *mm;
- result = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
- goto out;
-
result = -ENOENT;
task = get_proc_task(dir);
if (!task)
@@ -2053,10 +2047,6 @@
struct map_files_info *p;
int ret;
- ret = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
- goto out;
-
ret = -ENOENT;
task = get_proc_task(file_inode(file));
if (!task)
@@ -2245,7 +2235,6 @@
.llseek = seq_lseek,
.release = seq_release_private,
};
-#endif /* CONFIG_CHECKPOINT_RESTORE */
static int proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
@@ -2481,32 +2470,20 @@
{
struct task_struct *task;
struct mm_struct *mm;
- char buffer[PROC_NUMBUF], *end;
unsigned int val;
int ret;
int i;
unsigned long mask;
- ret = -EFAULT;
- memset(buffer, 0, sizeof(buffer));
- if (count > sizeof(buffer) - 1)
- count = sizeof(buffer) - 1;
- if (copy_from_user(buffer, buf, count))
- goto out_no_task;
-
- ret = -EINVAL;
- val = (unsigned int)simple_strtoul(buffer, &end, 0);
- if (*end == '\n')
- end++;
- if (end - buffer == 0)
- goto out_no_task;
+ ret = kstrtouint_from_user(buf, count, 0, &val);
+ if (ret < 0)
+ return ret;
ret = -ESRCH;
task = get_proc_task(file_inode(file));
if (!task)
goto out_no_task;
- ret = end - buffer;
mm = get_task_mm(task);
if (!mm)
goto out_no_mm;
@@ -2522,7 +2499,9 @@
out_no_mm:
put_task_struct(task);
out_no_task:
- return ret;
+ if (ret < 0)
+ return ret;
+ return count;
}
static const struct file_operations proc_coredump_filter_operations = {
@@ -2744,9 +2723,7 @@
static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
-#ifdef CONFIG_CHECKPOINT_RESTORE
DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
-#endif
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index e5dee5c..ff3ffc7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -26,7 +26,7 @@
#include "internal.h"
-static DEFINE_SPINLOCK(proc_subdir_lock);
+static DEFINE_RWLOCK(proc_subdir_lock);
static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
{
@@ -172,9 +172,9 @@
{
int rv;
- spin_lock(&proc_subdir_lock);
+ read_lock(&proc_subdir_lock);
rv = __xlate_proc_name(name, ret, residual);
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
return rv;
}
@@ -231,11 +231,11 @@
{
struct inode *inode;
- spin_lock(&proc_subdir_lock);
+ read_lock(&proc_subdir_lock);
de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len);
if (de) {
pde_get(de);
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
inode = proc_get_inode(dir->i_sb, de);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -243,7 +243,7 @@
d_add(dentry, inode);
return NULL;
}
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
return ERR_PTR(-ENOENT);
}
@@ -270,12 +270,12 @@
if (!dir_emit_dots(file, ctx))
return 0;
- spin_lock(&proc_subdir_lock);
+ read_lock(&proc_subdir_lock);
de = pde_subdir_first(de);
i = ctx->pos - 2;
for (;;) {
if (!de) {
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
return 0;
}
if (!i)
@@ -287,19 +287,19 @@
do {
struct proc_dir_entry *next;
pde_get(de);
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
if (!dir_emit(ctx, de->name, de->namelen,
de->low_ino, de->mode >> 12)) {
pde_put(de);
return 0;
}
- spin_lock(&proc_subdir_lock);
+ read_lock(&proc_subdir_lock);
ctx->pos++;
next = pde_subdir_next(de);
pde_put(de);
de = next;
} while (de);
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
return 1;
}
@@ -338,16 +338,16 @@
if (ret)
return ret;
- spin_lock(&proc_subdir_lock);
+ write_lock(&proc_subdir_lock);
dp->parent = dir;
if (pde_subdir_insert(dir, dp) == false) {
WARN(1, "proc_dir_entry '%s/%s' already registered\n",
dir->name, dp->name);
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
proc_free_inum(dp->low_ino);
return -EEXIST;
}
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
return 0;
}
@@ -549,9 +549,9 @@
const char *fn = name;
unsigned int len;
- spin_lock(&proc_subdir_lock);
+ write_lock(&proc_subdir_lock);
if (__xlate_proc_name(name, &parent, &fn) != 0) {
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
return;
}
len = strlen(fn);
@@ -559,7 +559,7 @@
de = pde_subdir_find(parent, fn, len);
if (de)
rb_erase(&de->subdir_node, &parent->subdir);
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
if (!de) {
WARN(1, "name '%s'\n", name);
return;
@@ -583,16 +583,16 @@
const char *fn = name;
unsigned int len;
- spin_lock(&proc_subdir_lock);
+ write_lock(&proc_subdir_lock);
if (__xlate_proc_name(name, &parent, &fn) != 0) {
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
return -ENOENT;
}
len = strlen(fn);
root = pde_subdir_find(parent, fn, len);
if (!root) {
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
return -ENOENT;
}
rb_erase(&root->subdir_node, &parent->subdir);
@@ -605,7 +605,7 @@
de = next;
continue;
}
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
proc_entry_rundown(de);
next = de->parent;
@@ -616,7 +616,7 @@
break;
pde_put(de);
- spin_lock(&proc_subdir_lock);
+ write_lock(&proc_subdir_lock);
de = next;
}
pde_put(root);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 7eee2d8..9348403 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -9,12 +9,16 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/hugetlb.h>
+#include <linux/memcontrol.h>
+#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
#include <linux/kernel-page-flags.h>
#include <asm/uaccess.h>
#include "internal.h"
#define KPMSIZE sizeof(u64)
#define KPMMASK (KPMSIZE - 1)
+#define KPMBITS (KPMSIZE * BITS_PER_BYTE)
/* /proc/kpagecount - an array exposing page counts
*
@@ -54,6 +58,8 @@
pfn++;
out++;
count -= KPMSIZE;
+
+ cond_resched();
}
*ppos += (char __user *)out - buf;
@@ -146,6 +152,9 @@
if (PageBalloon(page))
u |= 1 << KPF_BALLOON;
+ if (page_is_idle(page))
+ u |= 1 << KPF_IDLE;
+
u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
@@ -212,6 +221,8 @@
pfn++;
out++;
count -= KPMSIZE;
+
+ cond_resched();
}
*ppos += (char __user *)out - buf;
@@ -225,10 +236,64 @@
.read = kpageflags_read,
};
+#ifdef CONFIG_MEMCG
+static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 __user *out = (u64 __user *)buf;
+ struct page *ppage;
+ unsigned long src = *ppos;
+ unsigned long pfn;
+ ssize_t ret = 0;
+ u64 ino;
+
+ pfn = src / KPMSIZE;
+ count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
+ if (src & KPMMASK || count & KPMMASK)
+ return -EINVAL;
+
+ while (count > 0) {
+ if (pfn_valid(pfn))
+ ppage = pfn_to_page(pfn);
+ else
+ ppage = NULL;
+
+ if (ppage)
+ ino = page_cgroup_ino(ppage);
+ else
+ ino = 0;
+
+ if (put_user(ino, out)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ pfn++;
+ out++;
+ count -= KPMSIZE;
+
+ cond_resched();
+ }
+
+ *ppos += (char __user *)out - buf;
+ if (!ret)
+ ret = (char __user *)out - buf;
+ return ret;
+}
+
+static const struct file_operations proc_kpagecgroup_operations = {
+ .llseek = mem_lseek,
+ .read = kpagecgroup_read,
+};
+#endif /* CONFIG_MEMCG */
+
static int __init proc_page_init(void)
{
proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
+#ifdef CONFIG_MEMCG
+ proc_create("kpagecgroup", S_IRUSR, NULL, &proc_kpagecgroup_operations);
+#endif
return 0;
}
fs_initcall(proc_page_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 41f1a50..e2d46ad 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -13,6 +13,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -459,7 +460,7 @@
mss->resident += size;
/* Accumulate the size in pages that have been accessed. */
- if (young || PageReferenced(page))
+ if (young || page_is_young(page) || PageReferenced(page))
mss->referenced += size;
mapcount = page_mapcount(page);
if (mapcount >= 2) {
@@ -807,6 +808,7 @@
/* Clear accessed and referenced bits. */
pmdp_test_and_clear_young(vma, addr, pmd);
+ test_and_clear_page_young(page);
ClearPageReferenced(page);
out:
spin_unlock(ptl);
@@ -834,6 +836,7 @@
/* Clear accessed and referenced bits. */
ptep_test_and_clear_young(vma, addr, pte);
+ test_and_clear_page_young(page);
ClearPageReferenced(page);
}
pte_unmap_unlock(pte - 1, ptl);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index ce9e39f..225586e 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/mm.h>
+#include <linux/printk.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -371,16 +372,16 @@
* @esc: set of characters that need escaping
*
* Puts string into buffer, replacing each occurrence of character from
- * @esc with usual octal escape. Returns 0 in case of success, -1 - in
- * case of overflow.
+ * @esc with usual octal escape.
+ * Use seq_has_overflowed() to check for errors.
*/
-int seq_escape(struct seq_file *m, const char *s, const char *esc)
+void seq_escape(struct seq_file *m, const char *s, const char *esc)
{
char *end = m->buf + m->size;
- char *p;
+ char *p;
char c;
- for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) {
+ for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) {
if (!strchr(esc, c)) {
*p++ = c;
continue;
@@ -393,14 +394,13 @@
continue;
}
seq_set_overflow(m);
- return -1;
- }
+ return;
+ }
m->count = p - m->buf;
- return 0;
}
EXPORT_SYMBOL(seq_escape);
-int seq_vprintf(struct seq_file *m, const char *f, va_list args)
+void seq_vprintf(struct seq_file *m, const char *f, va_list args)
{
int len;
@@ -408,24 +408,20 @@
len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
if (m->count + len < m->size) {
m->count += len;
- return 0;
+ return;
}
}
seq_set_overflow(m);
- return -1;
}
EXPORT_SYMBOL(seq_vprintf);
-int seq_printf(struct seq_file *m, const char *f, ...)
+void seq_printf(struct seq_file *m, const char *f, ...)
{
- int ret;
va_list args;
va_start(args, f);
- ret = seq_vprintf(m, f, args);
+ seq_vprintf(m, f, args);
va_end(args);
-
- return ret;
}
EXPORT_SYMBOL(seq_printf);
@@ -663,26 +659,25 @@
}
EXPORT_SYMBOL(seq_open_private);
-int seq_putc(struct seq_file *m, char c)
+void seq_putc(struct seq_file *m, char c)
{
- if (m->count < m->size) {
- m->buf[m->count++] = c;
- return 0;
- }
- return -1;
+ if (m->count >= m->size)
+ return;
+
+ m->buf[m->count++] = c;
}
EXPORT_SYMBOL(seq_putc);
-int seq_puts(struct seq_file *m, const char *s)
+void seq_puts(struct seq_file *m, const char *s)
{
int len = strlen(s);
- if (m->count + len < m->size) {
- memcpy(m->buf + m->count, s, len);
- m->count += len;
- return 0;
+
+ if (m->count + len >= m->size) {
+ seq_set_overflow(m);
+ return;
}
- seq_set_overflow(m);
- return -1;
+ memcpy(m->buf + m->count, s, len);
+ m->count += len;
}
EXPORT_SYMBOL(seq_puts);
@@ -693,8 +688,8 @@
* This routine is very quick when you show lots of numbers.
* In usual cases, it will be better to use seq_printf(). It's easier to read.
*/
-int seq_put_decimal_ull(struct seq_file *m, char delimiter,
- unsigned long long num)
+void seq_put_decimal_ull(struct seq_file *m, char delimiter,
+ unsigned long long num)
{
int len;
@@ -706,35 +701,33 @@
if (num < 10) {
m->buf[m->count++] = num + '0';
- return 0;
+ return;
}
len = num_to_str(m->buf + m->count, m->size - m->count, num);
if (!len)
goto overflow;
m->count += len;
- return 0;
+ return;
+
overflow:
seq_set_overflow(m);
- return -1;
}
EXPORT_SYMBOL(seq_put_decimal_ull);
-int seq_put_decimal_ll(struct seq_file *m, char delimiter,
- long long num)
+void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num)
{
if (num < 0) {
if (m->count + 3 >= m->size) {
seq_set_overflow(m);
- return -1;
+ return;
}
if (delimiter)
m->buf[m->count++] = delimiter;
num = -num;
delimiter = '-';
}
- return seq_put_decimal_ull(m, delimiter, num);
-
+ seq_put_decimal_ull(m, delimiter, num);
}
EXPORT_SYMBOL(seq_put_decimal_ll);
@@ -773,6 +766,47 @@
}
EXPORT_SYMBOL(seq_pad);
+/* A complete analogue of print_hex_dump() */
+void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
+ int rowsize, int groupsize, const void *buf, size_t len,
+ bool ascii)
+{
+ const u8 *ptr = buf;
+ int i, linelen, remaining = len;
+ int ret;
+
+ if (rowsize != 16 && rowsize != 32)
+ rowsize = 16;
+
+ for (i = 0; i < len && !seq_has_overflowed(m); i += rowsize) {
+ linelen = min(remaining, rowsize);
+ remaining -= rowsize;
+
+ switch (prefix_type) {
+ case DUMP_PREFIX_ADDRESS:
+ seq_printf(m, "%s%p: ", prefix_str, ptr + i);
+ break;
+ case DUMP_PREFIX_OFFSET:
+ seq_printf(m, "%s%.8x: ", prefix_str, i);
+ break;
+ default:
+ seq_printf(m, "%s", prefix_str);
+ break;
+ }
+
+ ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
+ m->buf + m->count, m->size - m->count,
+ ascii);
+ if (ret >= m->size - m->count) {
+ seq_set_overflow(m);
+ } else {
+ m->count += ret;
+ seq_putc(m, '\n');
+ }
+ }
+}
+EXPORT_SYMBOL(seq_hex_dump);
+
struct list_head *seq_list_start(struct list_head *head, loff_t pos)
{
struct list_head *lh;
diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
index 940d5ec..b1bc954 100644
--- a/include/asm-generic/dma-mapping-common.h
+++ b/include/asm-generic/dma-mapping-common.h
@@ -6,6 +6,7 @@
#include <linux/scatterlist.h>
#include <linux/dma-debug.h>
#include <linux/dma-attrs.h>
+#include <asm-generic/dma-coherent.h>
static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
size_t size,
@@ -237,4 +238,121 @@
#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, NULL)
+#ifndef arch_dma_alloc_attrs
+#define arch_dma_alloc_attrs(dev, flag) (true)
+#endif
+
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *ops = get_dma_ops(dev);
+ void *cpu_addr;
+
+ BUG_ON(!ops);
+
+ if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr))
+ return cpu_addr;
+
+ if (!arch_dma_alloc_attrs(&dev, &flag))
+ return NULL;
+ if (!ops->alloc)
+ return NULL;
+
+ cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
+ debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+ return cpu_addr;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!ops);
+ WARN_ON(irqs_disabled());
+
+ if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
+ return;
+
+ if (!ops->free)
+ return;
+
+ debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+ ops->free(dev, size, cpu_addr, dma_handle, attrs);
+}
+
+static inline void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ return dma_alloc_attrs(dev, size, dma_handle, flag, NULL);
+}
+
+static inline void dma_free_coherent(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle)
+{
+ return dma_free_attrs(dev, size, cpu_addr, dma_handle, NULL);
+}
+
+static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ DEFINE_DMA_ATTRS(attrs);
+
+ dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs);
+ return dma_alloc_attrs(dev, size, dma_handle, gfp, &attrs);
+}
+
+static inline void dma_free_noncoherent(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle)
+{
+ DEFINE_DMA_ATTRS(attrs);
+
+ dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs);
+ dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs);
+}
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ debug_dma_mapping_error(dev, dma_addr);
+
+ if (get_dma_ops(dev)->mapping_error)
+ return get_dma_ops(dev)->mapping_error(dev, dma_addr);
+
+#ifdef DMA_ERROR_CODE
+ return dma_addr == DMA_ERROR_CODE;
+#else
+ return 0;
+#endif
+}
+
+#ifndef HAVE_ARCH_DMA_SUPPORTED
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+ struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (!ops)
+ return 0;
+ if (!ops->dma_supported)
+ return 1;
+ return ops->dma_supported(dev, mask);
+}
+#endif
+
+#ifndef HAVE_ARCH_DMA_SET_MASK
+static inline int dma_set_mask(struct device *dev, u64 mask)
+{
+ struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (ops->set_dma_mask)
+ return ops->set_dma_mask(dev, mask);
+
+ if (!dev->dma_mask || !dma_supported(dev, mask))
+ return -EIO;
+ *dev->dma_mask = mask;
+ return 0;
+}
+#endif
+
#endif
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 83bfb87..e2aadbc 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -111,8 +111,8 @@
cpu_relax();
}
-#ifndef virt_queued_spin_lock
-static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock)
+#ifndef virt_spin_lock
+static __always_inline bool virt_spin_lock(struct qspinlock *lock)
{
return false;
}
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index e596675..e1e4d7c 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -52,13 +52,16 @@
/* Timer IRQ */
const struct kvm_irq_level *irq;
+
+ /* VGIC mapping */
+ struct irq_phys_map *map;
};
int kvm_timer_hyp_init(void);
void kvm_timer_enable(struct kvm *kvm);
void kvm_timer_init(struct kvm *kvm);
-void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *irq);
+int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+ const struct kvm_irq_level *irq);
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 133ea00..d901f1a 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -95,11 +95,15 @@
#define LR_STATE_ACTIVE (1 << 1)
#define LR_STATE_MASK (3 << 0)
#define LR_EOI_INT (1 << 2)
+#define LR_HW (1 << 3)
struct vgic_lr {
- u16 irq;
- u8 source;
- u8 state;
+ unsigned irq:10;
+ union {
+ unsigned hwirq:10;
+ unsigned source:3;
+ };
+ unsigned state:4;
};
struct vgic_vmcr {
@@ -155,6 +159,19 @@
struct kvm_io_device dev;
};
+struct irq_phys_map {
+ u32 virt_irq;
+ u32 phys_irq;
+ u32 irq;
+ bool active;
+};
+
+struct irq_phys_map_entry {
+ struct list_head entry;
+ struct rcu_head rcu;
+ struct irq_phys_map map;
+};
+
struct vgic_dist {
spinlock_t lock;
bool in_kernel;
@@ -252,6 +269,10 @@
struct vgic_vm_ops vm_ops;
struct vgic_io_device dist_iodev;
struct vgic_io_device *redist_iodevs;
+
+ /* Virtual irq to hwirq mapping */
+ spinlock_t irq_phys_map_lock;
+ struct list_head irq_phys_map_list;
};
struct vgic_v2_cpu_if {
@@ -303,6 +324,9 @@
struct vgic_v2_cpu_if vgic_v2;
struct vgic_v3_cpu_if vgic_v3;
};
+
+ /* Protected by the distributor's irq_phys_map_lock */
+ struct list_head irq_phys_map_list;
};
#define LR_EMPTY 0xff
@@ -317,16 +341,25 @@
int kvm_vgic_hyp_init(void);
int kvm_vgic_map_resources(struct kvm *kvm);
int kvm_vgic_get_max_vcpus(void);
+void kvm_vgic_early_init(struct kvm *kvm);
int kvm_vgic_create(struct kvm *kvm, u32 type);
void kvm_vgic_destroy(struct kvm *kvm);
+void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
bool level);
+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
+ struct irq_phys_map *map, bool level);
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
+struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
+ int virt_irq, int irq);
+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
+bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
+void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0fe9df9..5a5d79e 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -286,7 +286,7 @@
* %current's blkcg equals the effective blkcg of its memcg. No
* need to use the relatively expensive cgroup_get_e_css().
*/
- if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id)))
+ if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
return wb;
return NULL;
}
@@ -402,7 +402,7 @@
}
struct wb_iter {
- int start_blkcg_id;
+ int start_memcg_id;
struct radix_tree_iter tree_iter;
void **slot;
};
@@ -414,9 +414,9 @@
WARN_ON_ONCE(!rcu_read_lock_held());
- if (iter->start_blkcg_id >= 0) {
- iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id);
- iter->start_blkcg_id = -1;
+ if (iter->start_memcg_id >= 0) {
+ iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id);
+ iter->start_memcg_id = -1;
} else {
iter->slot = radix_tree_next_slot(iter->slot, titer, 0);
}
@@ -430,30 +430,30 @@
static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter,
struct backing_dev_info *bdi,
- int start_blkcg_id)
+ int start_memcg_id)
{
- iter->start_blkcg_id = start_blkcg_id;
+ iter->start_memcg_id = start_memcg_id;
- if (start_blkcg_id)
+ if (start_memcg_id)
return __wb_iter_next(iter, bdi);
else
return &bdi->wb;
}
/**
- * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order
+ * bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order
* @wb_cur: cursor struct bdi_writeback pointer
* @bdi: bdi to walk wb's of
* @iter: pointer to struct wb_iter to be used as iteration buffer
- * @start_blkcg_id: blkcg ID to start iteration from
+ * @start_memcg_id: memcg ID to start iteration from
*
* Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending
- * blkcg ID order starting from @start_blkcg_id. @iter is struct wb_iter
+ * memcg ID order starting from @start_memcg_id. @iter is struct wb_iter
* to be used as temp storage during iteration. rcu_read_lock() must be
* held throughout iteration.
*/
-#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \
- for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id); \
+#define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id) \
+ for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id); \
(wb_cur); (wb_cur) = __wb_iter_next(iter, bdi))
#else /* CONFIG_CGROUP_WRITEBACK */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index a4cd164..0a5cc7a 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -14,12 +14,15 @@
*/
#include <linux/cgroup.h>
-#include <linux/u64_stats_sync.h>
+#include <linux/percpu_counter.h>
#include <linux/seq_file.h>
#include <linux/radix-tree.h>
#include <linux/blkdev.h>
#include <linux/atomic.h>
+/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
+#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
+
/* Max limits for throttle policy */
#define THROTL_IOPS_MAX UINT_MAX
@@ -45,7 +48,7 @@
struct blkcg_gq *blkg_hint;
struct hlist_head blkg_list;
- struct blkcg_policy_data *pd[BLKCG_MAX_POLS];
+ struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
struct list_head all_blkcgs_node;
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -53,14 +56,19 @@
#endif
};
+/*
+ * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
+ * recursive. Used to carry stats of dead children, and, for blkg_rwstat,
+ * to carry result values from read and sum operations.
+ */
struct blkg_stat {
- struct u64_stats_sync syncp;
- uint64_t cnt;
+ struct percpu_counter cpu_cnt;
+ atomic64_t aux_cnt;
};
struct blkg_rwstat {
- struct u64_stats_sync syncp;
- uint64_t cnt[BLKG_RWSTAT_NR];
+ struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
+ atomic64_t aux_cnt[BLKG_RWSTAT_NR];
};
/*
@@ -68,32 +76,28 @@
* request_queue (q). This is used by blkcg policies which need to track
* information per blkcg - q pair.
*
- * There can be multiple active blkcg policies and each has its private
- * data on each blkg, the size of which is determined by
- * blkcg_policy->pd_size. blkcg core allocates and frees such areas
- * together with blkg and invokes pd_init/exit_fn() methods.
- *
- * Such private data must embed struct blkg_policy_data (pd) at the
- * beginning and pd_size can't be smaller than pd.
+ * There can be multiple active blkcg policies and each blkg:policy pair is
+ * represented by a blkg_policy_data which is allocated and freed by each
+ * policy's pd_alloc/free_fn() methods. A policy can allocate private data
+ * area by allocating larger data structure which embeds blkg_policy_data
+ * at the beginning.
*/
struct blkg_policy_data {
/* the blkg and policy id this per-policy data belongs to */
struct blkcg_gq *blkg;
int plid;
-
- /* used during policy activation */
- struct list_head alloc_node;
};
/*
- * Policies that need to keep per-blkcg data which is independent
- * from any request_queue associated to it must specify its size
- * with the cpd_size field of the blkcg_policy structure and
- * embed a blkcg_policy_data in it. cpd_init() is invoked to let
- * each policy handle per-blkcg data.
+ * Policies that need to keep per-blkcg data which is independent from any
+ * request_queue associated to it should implement cpd_alloc/free_fn()
+ * methods. A policy can allocate private data area by allocating larger
+ * data structure which embeds blkcg_policy_data at the beginning.
+ * cpd_init() is invoked to let each policy handle per-blkcg data.
*/
struct blkcg_policy_data {
- /* the policy id this per-policy data belongs to */
+ /* the blkcg and policy id this per-policy data belongs to */
+ struct blkcg *blkcg;
int plid;
};
@@ -123,40 +127,50 @@
/* is this blkg online? protected by both blkcg and q locks */
bool online;
+ struct blkg_rwstat stat_bytes;
+ struct blkg_rwstat stat_ios;
+
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
struct rcu_head rcu_head;
};
-typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg);
-typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
-typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg);
-typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg);
-typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg);
-typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg);
+typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
+typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
+typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
+typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
+typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
+typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
struct blkcg_policy {
int plid;
- /* policy specific private data size */
- size_t pd_size;
- /* policy specific per-blkcg data size */
- size_t cpd_size;
/* cgroup files for the policy */
- struct cftype *cftypes;
+ struct cftype *dfl_cftypes;
+ struct cftype *legacy_cftypes;
/* operations */
+ blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
blkcg_pol_init_cpd_fn *cpd_init_fn;
+ blkcg_pol_free_cpd_fn *cpd_free_fn;
+ blkcg_pol_bind_cpd_fn *cpd_bind_fn;
+
+ blkcg_pol_alloc_pd_fn *pd_alloc_fn;
blkcg_pol_init_pd_fn *pd_init_fn;
blkcg_pol_online_pd_fn *pd_online_fn;
blkcg_pol_offline_pd_fn *pd_offline_fn;
- blkcg_pol_exit_pd_fn *pd_exit_fn;
+ blkcg_pol_free_pd_fn *pd_free_fn;
blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
};
extern struct blkcg blkcg_root;
extern struct cgroup_subsys_state * const blkcg_root_css;
-struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
+struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
+ struct request_queue *q, bool update_hint);
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q);
int blkcg_init_queue(struct request_queue *q);
@@ -171,6 +185,7 @@
void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol);
+const char *blkg_dev_name(struct blkcg_gq *blkg);
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
u64 (*prfill)(struct seq_file *,
struct blkg_policy_data *, int),
@@ -182,19 +197,24 @@
u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
int off);
+int blkg_print_stat_bytes(struct seq_file *sf, void *v);
+int blkg_print_stat_ios(struct seq_file *sf, void *v);
+int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
+int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
-u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off);
-struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
- int off);
+u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
+ struct blkcg_policy *pol, int off);
+struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
+ struct blkcg_policy *pol, int off);
struct blkg_conf_ctx {
struct gendisk *disk;
struct blkcg_gq *blkg;
- u64 v;
+ char *body;
};
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
- const char *input, struct blkg_conf_ctx *ctx);
+ char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
@@ -205,7 +225,7 @@
static inline struct blkcg *task_blkcg(struct task_struct *tsk)
{
- return css_to_blkcg(task_css(tsk, blkio_cgrp_id));
+ return css_to_blkcg(task_css(tsk, io_cgrp_id));
}
static inline struct blkcg *bio_blkcg(struct bio *bio)
@@ -218,7 +238,7 @@
static inline struct cgroup_subsys_state *
task_get_blkcg_css(struct task_struct *task)
{
- return task_get_css(task, blkio_cgrp_id);
+ return task_get_css(task, io_cgrp_id);
}
/**
@@ -233,6 +253,52 @@
}
/**
+ * __blkg_lookup - internal version of blkg_lookup()
+ * @blkcg: blkcg of interest
+ * @q: request_queue of interest
+ * @update_hint: whether to update lookup hint with the result or not
+ *
+ * This is internal version and shouldn't be used by policy
+ * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
+ * @q's bypass state. If @update_hint is %true, the caller should be
+ * holding @q->queue_lock and lookup hint is updated on success.
+ */
+static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
+ struct request_queue *q,
+ bool update_hint)
+{
+ struct blkcg_gq *blkg;
+
+ if (blkcg == &blkcg_root)
+ return q->root_blkg;
+
+ blkg = rcu_dereference(blkcg->blkg_hint);
+ if (blkg && blkg->q == q)
+ return blkg;
+
+ return blkg_lookup_slowpath(blkcg, q, update_hint);
+}
+
+/**
+ * blkg_lookup - lookup blkg for the specified blkcg - q pair
+ * @blkcg: blkcg of interest
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for the @blkcg - @q pair. This function should be called
+ * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
+ * - see blk_queue_bypass_start() for details.
+ */
+static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
+ struct request_queue *q)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (unlikely(blk_queue_bypass(q)))
+ return NULL;
+ return __blkg_lookup(blkcg, q, false);
+}
+
+/**
* blkg_to_pdata - get policy private data
* @blkg: blkg of interest
* @pol: policy of interest
@@ -248,7 +314,7 @@
static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
struct blkcg_policy *pol)
{
- return blkcg ? blkcg->pd[pol->plid] : NULL;
+ return blkcg ? blkcg->cpd[pol->plid] : NULL;
}
/**
@@ -262,6 +328,11 @@
return pd ? pd->blkg : NULL;
}
+static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
+{
+ return cpd ? cpd->blkcg : NULL;
+}
+
/**
* blkg_path - format cgroup path of blkg
* @blkg: blkg of interest
@@ -309,9 +380,6 @@
call_rcu(&blkg->rcu_head, __blkg_release_rcu);
}
-struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
- bool update_hint);
-
/**
* blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
* @d_blkg: loop cursor pointing to the current descendant
@@ -373,8 +441,8 @@
* or if either the blkcg or queue is going away. Fall back to
* root_rl in such cases.
*/
- blkg = blkg_lookup_create(blkcg, q);
- if (IS_ERR(blkg))
+ blkg = blkg_lookup(blkcg, q);
+ if (unlikely(!blkg))
goto root_rl;
blkg_get(blkg);
@@ -394,8 +462,7 @@
*/
static inline void blk_put_rl(struct request_list *rl)
{
- /* root_rl may not have blkg set */
- if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
+ if (rl->blkg->blkcg != &blkcg_root)
blkg_put(rl->blkg);
}
@@ -433,9 +500,21 @@
#define blk_queue_for_each_rl(rl, q) \
for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
-static inline void blkg_stat_init(struct blkg_stat *stat)
+static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
{
- u64_stats_init(&stat->syncp);
+ int ret;
+
+ ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
+ if (ret)
+ return ret;
+
+ atomic64_set(&stat->aux_cnt, 0);
+ return 0;
+}
+
+static inline void blkg_stat_exit(struct blkg_stat *stat)
+{
+ percpu_counter_destroy(&stat->cpu_cnt);
}
/**
@@ -443,34 +522,21 @@
* @stat: target blkg_stat
* @val: value to add
*
- * Add @val to @stat. The caller is responsible for synchronizing calls to
- * this function.
+ * Add @val to @stat. The caller must ensure that IRQ on the same CPU
+ * don't re-enter this function for the same counter.
*/
static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
{
- u64_stats_update_begin(&stat->syncp);
- stat->cnt += val;
- u64_stats_update_end(&stat->syncp);
+ __percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
}
/**
* blkg_stat_read - read the current value of a blkg_stat
* @stat: blkg_stat to read
- *
- * Read the current value of @stat. This function can be called without
- * synchroniztion and takes care of u64 atomicity.
*/
static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
{
- unsigned int start;
- uint64_t v;
-
- do {
- start = u64_stats_fetch_begin_irq(&stat->syncp);
- v = stat->cnt;
- } while (u64_stats_fetch_retry_irq(&stat->syncp, start));
-
- return v;
+ return percpu_counter_sum_positive(&stat->cpu_cnt);
}
/**
@@ -479,24 +545,46 @@
*/
static inline void blkg_stat_reset(struct blkg_stat *stat)
{
- stat->cnt = 0;
+ percpu_counter_set(&stat->cpu_cnt, 0);
+ atomic64_set(&stat->aux_cnt, 0);
}
/**
- * blkg_stat_merge - merge a blkg_stat into another
+ * blkg_stat_add_aux - add a blkg_stat into another's aux count
* @to: the destination blkg_stat
* @from: the source
*
- * Add @from's count to @to.
+ * Add @from's count including the aux one to @to's aux count.
*/
-static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from)
+static inline void blkg_stat_add_aux(struct blkg_stat *to,
+ struct blkg_stat *from)
{
- blkg_stat_add(to, blkg_stat_read(from));
+ atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
+ &to->aux_cnt);
}
-static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat)
+static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
{
- u64_stats_init(&rwstat->syncp);
+ int i, ret;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++) {
+ ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
+ if (ret) {
+ while (--i >= 0)
+ percpu_counter_destroy(&rwstat->cpu_cnt[i]);
+ return ret;
+ }
+ atomic64_set(&rwstat->aux_cnt[i], 0);
+ }
+ return 0;
+}
+
+static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
+{
+ int i;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++)
+ percpu_counter_destroy(&rwstat->cpu_cnt[i]);
}
/**
@@ -511,39 +599,38 @@
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
int rw, uint64_t val)
{
- u64_stats_update_begin(&rwstat->syncp);
+ struct percpu_counter *cnt;
if (rw & REQ_WRITE)
- rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
else
- rwstat->cnt[BLKG_RWSTAT_READ] += val;
- if (rw & REQ_SYNC)
- rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
- else
- rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
- u64_stats_update_end(&rwstat->syncp);
+ __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
+
+ if (rw & REQ_SYNC)
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
+ else
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
+
+ __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
}
/**
* blkg_rwstat_read - read the current values of a blkg_rwstat
* @rwstat: blkg_rwstat to read
*
- * Read the current snapshot of @rwstat and return it as the return value.
- * This function can be called without synchronization and takes care of
- * u64 atomicity.
+ * Read the current snapshot of @rwstat and return it in the aux counts.
*/
static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
{
- unsigned int start;
- struct blkg_rwstat tmp;
+ struct blkg_rwstat result;
+ int i;
- do {
- start = u64_stats_fetch_begin_irq(&rwstat->syncp);
- tmp = *rwstat;
- } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start));
-
- return tmp;
+ for (i = 0; i < BLKG_RWSTAT_NR; i++)
+ atomic64_set(&result.aux_cnt[i],
+ percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
+ return result;
}
/**
@@ -558,7 +645,8 @@
{
struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
- return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
+ return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
+ atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
}
/**
@@ -567,26 +655,71 @@
*/
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
{
- memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
+ int i;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++) {
+ percpu_counter_set(&rwstat->cpu_cnt[i], 0);
+ atomic64_set(&rwstat->aux_cnt[i], 0);
+ }
}
/**
- * blkg_rwstat_merge - merge a blkg_rwstat into another
+ * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
* @to: the destination blkg_rwstat
* @from: the source
*
- * Add @from's counts to @to.
+ * Add @from's count including the aux one to @to's aux count.
*/
-static inline void blkg_rwstat_merge(struct blkg_rwstat *to,
- struct blkg_rwstat *from)
+static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
+ struct blkg_rwstat *from)
{
struct blkg_rwstat v = blkg_rwstat_read(from);
int i;
- u64_stats_update_begin(&to->syncp);
for (i = 0; i < BLKG_RWSTAT_NR; i++)
- to->cnt[i] += v.cnt[i];
- u64_stats_update_end(&to->syncp);
+ atomic64_add(atomic64_read(&v.aux_cnt[i]) +
+ atomic64_read(&from->aux_cnt[i]),
+ &to->aux_cnt[i]);
+}
+
+#ifdef CONFIG_BLK_DEV_THROTTLING
+extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
+ struct bio *bio);
+#else
+static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
+ struct bio *bio) { return false; }
+#endif
+
+static inline bool blkcg_bio_issue_check(struct request_queue *q,
+ struct bio *bio)
+{
+ struct blkcg *blkcg;
+ struct blkcg_gq *blkg;
+ bool throtl = false;
+
+ rcu_read_lock();
+ blkcg = bio_blkcg(bio);
+
+ blkg = blkg_lookup(blkcg, q);
+ if (unlikely(!blkg)) {
+ spin_lock_irq(q->queue_lock);
+ blkg = blkg_lookup_create(blkcg, q);
+ if (IS_ERR(blkg))
+ blkg = NULL;
+ spin_unlock_irq(q->queue_lock);
+ }
+
+ throtl = blk_throtl_bio(q, blkg, bio);
+
+ if (!throtl) {
+ blkg = blkg ?: q->root_blkg;
+ blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags,
+ bio->bi_iter.bi_size);
+ blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1);
+ }
+
+ rcu_read_unlock();
+ return !throtl;
}
#else /* CONFIG_BLK_CGROUP */
@@ -642,6 +775,9 @@
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
+static inline bool blkcg_bio_issue_check(struct request_queue *q,
+ struct bio *bio) { return true; }
+
#define blk_queue_for_each_rl(rl, q) \
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 708923b9..38a5ff7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -584,7 +584,7 @@
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
-#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0)
+#define rq_data_dir(rq) ((int)((rq)->cmd_flags & 1))
/*
* Driver can handle struct request, if it either has an old style
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 9ebee53..397c5cd 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -46,6 +46,7 @@
unsigned long mount_timeout; /* jiffies */
unsigned long osd_idle_ttl; /* jiffies */
unsigned long osd_keepalive_timeout; /* jiffies */
+ unsigned long monc_ping_timeout; /* jiffies */
/*
* any type that can't be simply compared or doesn't need need
@@ -66,6 +67,7 @@
#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
+#define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 3775327..7e1252e 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -248,6 +248,8 @@
int in_base_pos; /* bytes read */
__le64 in_temp_ack; /* for reading an ack */
+ struct timespec last_keepalive_ack;
+
struct delayed_work work; /* send|recv work */
unsigned long delay; /* current delay interval */
};
@@ -285,6 +287,8 @@
extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
extern void ceph_con_keepalive(struct ceph_connection *con);
+extern bool ceph_con_keepalive_expired(struct ceph_connection *con,
+ unsigned long interval);
extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
size_t length, size_t alignment);
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 1c18872..0fe2656 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -84,10 +84,12 @@
#define CEPH_MSGR_TAG_MSG 7 /* message */
#define CEPH_MSGR_TAG_ACK 8 /* message ack */
#define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */
-#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */
+#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */
#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */
#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */
+#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */
+#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
/*
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 1f36945..1a96fda 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -27,7 +27,7 @@
#endif
#if IS_ENABLED(CONFIG_BLK_CGROUP)
-SUBSYS(blkio)
+SUBSYS(io)
#endif
#if IS_ENABLED(CONFIG_MEMCG)
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 31ce435..bdcf358 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -18,15 +18,6 @@
struct clock_event_device;
struct module;
-/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */
-enum clock_event_mode {
- CLOCK_EVT_MODE_UNUSED,
- CLOCK_EVT_MODE_SHUTDOWN,
- CLOCK_EVT_MODE_PERIODIC,
- CLOCK_EVT_MODE_ONESHOT,
- CLOCK_EVT_MODE_RESUME,
-};
-
/*
* Possible states of a clock event device.
*
@@ -86,16 +77,14 @@
* @min_delta_ns: minimum delta value in ns
* @mult: nanosecond to cycles multiplier
* @shift: nanoseconds to cycles divisor (power of two)
- * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE
* @state_use_accessors:current state of the device, assigned by the core code
* @features: features
* @retries: number of forced programming retries
- * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
- * @set_state_periodic: switch state to periodic, if !set_mode
- * @set_state_oneshot: switch state to oneshot, if !set_mode
- * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode
- * @set_state_shutdown: switch state to shutdown, if !set_mode
- * @tick_resume: resume clkevt device, if !set_mode
+ * @set_state_periodic: switch state to periodic
+ * @set_state_oneshot: switch state to oneshot
+ * @set_state_oneshot_stopped: switch state to oneshot_stopped
+ * @set_state_shutdown: switch state to shutdown
+ * @tick_resume: resume clkevt device
* @broadcast: function to broadcast events
* @min_delta_ticks: minimum delta value in ticks stored for reconfiguration
* @max_delta_ticks: maximum delta value in ticks stored for reconfiguration
@@ -116,18 +105,10 @@
u64 min_delta_ns;
u32 mult;
u32 shift;
- enum clock_event_mode mode;
enum clock_event_state state_use_accessors;
unsigned int features;
unsigned long retries;
- /*
- * State transition callback(s): Only one of the two groups should be
- * defined:
- * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
- * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume().
- */
- void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *);
int (*set_state_periodic)(struct clock_event_device *);
int (*set_state_oneshot)(struct clock_event_device *);
int (*set_state_oneshot_stopped)(struct clock_event_device *);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 71e4faf..9eeeb95 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -270,9 +270,12 @@
#define ICH_LR_EOI (1UL << 41)
#define ICH_LR_GROUP (1UL << 60)
+#define ICH_LR_HW (1UL << 61)
#define ICH_LR_STATE (3UL << 62)
#define ICH_LR_PENDING_BIT (1UL << 62)
#define ICH_LR_ACTIVE_BIT (1UL << 63)
+#define ICH_LR_PHYS_ID_SHIFT 32
+#define ICH_LR_PHYS_ID_MASK (0x3ffUL << ICH_LR_PHYS_ID_SHIFT)
#define ICH_MISR_EOI (1 << 0)
#define ICH_MISR_U (1 << 1)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index af3d29f..b8901df 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -75,11 +75,12 @@
#define GICH_LR_VIRTUALID (0x3ff << 0)
#define GICH_LR_PHYSID_CPUID_SHIFT (10)
-#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT)
+#define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT)
#define GICH_LR_STATE (3 << 28)
#define GICH_LR_PENDING_BIT (1 << 28)
#define GICH_LR_ACTIVE_BIT (1 << 29)
#define GICH_LR_EOI (1 << 19)
+#define GICH_LR_HW (1 << 31)
#define GICH_VMCR_CTRL_SHIFT 0
#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 7f653e8..f109423 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -21,8 +21,8 @@
*
* DEFINE_STATIC_KEY_TRUE(key);
* DEFINE_STATIC_KEY_FALSE(key);
- * static_key_likely()
- * statick_key_unlikely()
+ * static_branch_likely()
+ * static_branch_unlikely()
*
* Jump labels provide an interface to generate dynamic branches using
* self-modifying code. Assuming toolchain and architecture support, if we
@@ -45,12 +45,10 @@
* statement, setting the key to true requires us to patch in a jump
* to the out-of-line of true branch.
*
- * In addtion to static_branch_{enable,disable}, we can also reference count
+ * In addition to static_branch_{enable,disable}, we can also reference count
* the key or branch direction via static_branch_{inc,dec}. Thus,
* static_branch_inc() can be thought of as a 'make more true' and
- * static_branch_dec() as a 'make more false'. The inc()/dec()
- * interface is meant to be used exclusively from the inc()/dec() for a given
- * key.
+ * static_branch_dec() as a 'make more false'.
*
* Since this relies on modifying code, the branch modifying functions
* must be considered absolute slow paths (machine wide synchronization etc.).
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 123be25..5d4e9c4 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -266,6 +266,7 @@
}
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
+size_t kernfs_path_len(struct kernfs_node *kn);
char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
size_t buflen);
void pr_cont_kernfs_name(struct kernfs_node *kn);
@@ -332,6 +333,9 @@
static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{ return -ENOSYS; }
+static inline size_t kernfs_path_len(struct kernfs_node *kn)
+{ return 0; }
+
static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
size_t buflen)
{ return NULL; }
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index b63218f..d140b1e 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -16,7 +16,7 @@
#include <uapi/linux/kexec.h>
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
#include <linux/list.h>
#include <linux/linkage.h>
#include <linux/compat.h>
@@ -318,13 +318,24 @@
size_t crash_get_memory_size(void);
void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
-#else /* !CONFIG_KEXEC */
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len);
+void * __weak arch_kexec_kernel_image_load(struct kimage *image);
+int __weak arch_kimage_file_post_load_cleanup(struct kimage *image);
+int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+ unsigned long buf_len);
+int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr,
+ Elf_Shdr *sechdrs, unsigned int relsec);
+int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ unsigned int relsec);
+
+#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
static inline void crash_kexec(struct pt_regs *regs) { }
static inline int kexec_should_crash(struct task_struct *p) { return 0; }
#define kexec_in_progress false
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#endif /* !defined(__ASSEBMLY__) */
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 0555cc6..fcfd2bf 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -85,8 +85,6 @@
UMH_DISABLED,
};
-extern void usermodehelper_init(void);
-
extern int __usermodehelper_disable(enum umh_disable_depth depth);
extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 81089cf..1bef9e2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -242,6 +242,7 @@
int sigset_active;
sigset_t sigset;
struct kvm_vcpu_stat stat;
+ unsigned int halt_poll_ns;
#ifdef CONFIG_HAS_IOMEM
int mmio_needed;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d92b80b..ad800e6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -305,11 +305,9 @@
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
-
-struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
-
struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+
static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -345,6 +343,7 @@
}
struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
+ino_t page_cgroup_ino(struct page *page);
static inline bool mem_cgroup_disabled(void)
{
@@ -555,11 +554,6 @@
return &zone->lruvec;
}
-static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
-{
- return NULL;
-}
-
static inline bool mm_match_cgroup(struct mm_struct *mm,
struct mem_cgroup *memcg)
{
diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h
new file mode 100644
index 0000000..eb492d4
--- /dev/null
+++ b/include/linux/microchipphy.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 Microchip Technology
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _MICROCHIPPHY_H
+#define _MICROCHIPPHY_H
+
+#define LAN88XX_INT_MASK (0x19)
+#define LAN88XX_INT_MASK_MDINTPIN_EN_ (0x8000)
+#define LAN88XX_INT_MASK_SPEED_CHANGE_ (0x4000)
+#define LAN88XX_INT_MASK_LINK_CHANGE_ (0x2000)
+#define LAN88XX_INT_MASK_FDX_CHANGE_ (0x1000)
+#define LAN88XX_INT_MASK_AUTONEG_ERR_ (0x0800)
+#define LAN88XX_INT_MASK_AUTONEG_DONE_ (0x0400)
+#define LAN88XX_INT_MASK_POE_DETECT_ (0x0200)
+#define LAN88XX_INT_MASK_SYMBOL_ERR_ (0x0100)
+#define LAN88XX_INT_MASK_FAST_LINK_FAIL_ (0x0080)
+#define LAN88XX_INT_MASK_WOL_EVENT_ (0x0040)
+#define LAN88XX_INT_MASK_EXTENDED_INT_ (0x0020)
+#define LAN88XX_INT_MASK_RESERVED_ (0x0010)
+#define LAN88XX_INT_MASK_FALSE_CARRIER_ (0x0008)
+#define LAN88XX_INT_MASK_LINK_SPEED_DS_ (0x0004)
+#define LAN88XX_INT_MASK_MASTER_SLAVE_DONE_ (0x0002)
+#define LAN88XX_INT_MASK_RX__ER_ (0x0001)
+
+#define LAN88XX_INT_STS (0x1A)
+#define LAN88XX_INT_STS_INT_ACTIVE_ (0x8000)
+#define LAN88XX_INT_STS_SPEED_CHANGE_ (0x4000)
+#define LAN88XX_INT_STS_LINK_CHANGE_ (0x2000)
+#define LAN88XX_INT_STS_FDX_CHANGE_ (0x1000)
+#define LAN88XX_INT_STS_AUTONEG_ERR_ (0x0800)
+#define LAN88XX_INT_STS_AUTONEG_DONE_ (0x0400)
+#define LAN88XX_INT_STS_POE_DETECT_ (0x0200)
+#define LAN88XX_INT_STS_SYMBOL_ERR_ (0x0100)
+#define LAN88XX_INT_STS_FAST_LINK_FAIL_ (0x0080)
+#define LAN88XX_INT_STS_WOL_EVENT_ (0x0040)
+#define LAN88XX_INT_STS_EXTENDED_INT_ (0x0020)
+#define LAN88XX_INT_STS_RESERVED_ (0x0010)
+#define LAN88XX_INT_STS_FALSE_CARRIER_ (0x0008)
+#define LAN88XX_INT_STS_LINK_SPEED_DS_ (0x0004)
+#define LAN88XX_INT_STS_MASTER_SLAVE_DONE_ (0x0002)
+#define LAN88XX_INT_STS_RX_ER_ (0x0001)
+
+#define LAN88XX_EXT_PAGE_ACCESS (0x1F)
+#define LAN88XX_EXT_PAGE_SPACE_0 (0x0000)
+#define LAN88XX_EXT_PAGE_SPACE_1 (0x0001)
+#define LAN88XX_EXT_PAGE_SPACE_2 (0x0002)
+
+/* Extended Register Page 1 space */
+#define LAN88XX_EXT_MODE_CTRL (0x13)
+#define LAN88XX_EXT_MODE_CTRL_MDIX_MASK_ (0x000C)
+#define LAN88XX_EXT_MODE_CTRL_AUTO_MDIX_ (0x0000)
+#define LAN88XX_EXT_MODE_CTRL_MDI_ (0x0008)
+#define LAN88XX_EXT_MODE_CTRL_MDI_X_ (0x000C)
+
+/* MMD 3 Registers */
+#define LAN88XX_MMD3_CHIP_ID (32877)
+#define LAN88XX_MMD3_CHIP_REV (32878)
+
+#endif /* _MICROCHIPPHY_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f25a957..91c08f6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -20,6 +20,7 @@
#include <linux/shrinker.h>
#include <linux/resource.h>
#include <linux/page_ext.h>
+#include <linux/err.h>
struct mempolicy;
struct anon_vma;
@@ -1214,6 +1215,49 @@
int write, int force, struct page **pages);
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
+
+/* Container for pinned pfns / pages */
+struct frame_vector {
+ unsigned int nr_allocated; /* Number of frames we have space for */
+ unsigned int nr_frames; /* Number of frames stored in ptrs array */
+ bool got_ref; /* Did we pin pages by getting page ref? */
+ bool is_pfns; /* Does array contain pages or pfns? */
+ void *ptrs[0]; /* Array of pinned pfns / pages. Use
+ * pfns_vector_pages() or pfns_vector_pfns()
+ * for access */
+};
+
+struct frame_vector *frame_vector_create(unsigned int nr_frames);
+void frame_vector_destroy(struct frame_vector *vec);
+int get_vaddr_frames(unsigned long start, unsigned int nr_pfns,
+ bool write, bool force, struct frame_vector *vec);
+void put_vaddr_frames(struct frame_vector *vec);
+int frame_vector_to_pages(struct frame_vector *vec);
+void frame_vector_to_pfns(struct frame_vector *vec);
+
+static inline unsigned int frame_vector_count(struct frame_vector *vec)
+{
+ return vec->nr_frames;
+}
+
+static inline struct page **frame_vector_pages(struct frame_vector *vec)
+{
+ if (vec->is_pfns) {
+ int err = frame_vector_to_pages(vec);
+
+ if (err)
+ return ERR_PTR(err);
+ }
+ return (struct page **)(vec->ptrs);
+}
+
+static inline unsigned long *frame_vector_pfns(struct frame_vector *vec)
+{
+ if (!vec->is_pfns)
+ frame_vector_to_pfns(vec);
+ return (unsigned long *)(vec->ptrs);
+}
+
struct kvec;
int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
struct page **pages);
@@ -1873,11 +1917,19 @@
extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
-extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long pgoff, unsigned long *populate);
+ vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+static inline unsigned long
+do_mmap_pgoff(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long pgoff, unsigned long *populate)
+{
+ return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
+}
+
#ifdef CONFIG_MMU
extern int __mm_populate(unsigned long addr, unsigned long len,
int ignore_errors);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 61cd67f..a1a210d 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -66,6 +66,16 @@
unsigned long end);
/*
+ * clear_young is a lightweight version of clear_flush_young. Like the
+ * latter, it is supposed to test-and-clear the young/accessed bitflag
+ * in the secondary pte, but it may omit flushing the secondary tlb.
+ */
+ int (*clear_young)(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end);
+
+ /*
* test_young is called to check the young/accessed bitflag in
* the secondary pte. This is used to know if the page is
* frequently used without actually clearing the flag or tearing
@@ -203,6 +213,9 @@
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
unsigned long start,
unsigned long end);
+extern int __mmu_notifier_clear_young(struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end);
extern int __mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
@@ -231,6 +244,15 @@
return 0;
}
+static inline int mmu_notifier_clear_young(struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ if (mm_has_notifiers(mm))
+ return __mmu_notifier_clear_young(mm, start, end);
+ return 0;
+}
+
static inline int mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address)
{
@@ -311,6 +333,28 @@
__young; \
})
+#define ptep_clear_young_notify(__vma, __address, __ptep) \
+({ \
+ int __young; \
+ struct vm_area_struct *___vma = __vma; \
+ unsigned long ___address = __address; \
+ __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\
+ __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \
+ ___address + PAGE_SIZE); \
+ __young; \
+})
+
+#define pmdp_clear_young_notify(__vma, __address, __pmdp) \
+({ \
+ int __young; \
+ struct vm_area_struct *___vma = __vma; \
+ unsigned long ___address = __address; \
+ __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\
+ __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \
+ ___address + PMD_SIZE); \
+ __young; \
+})
+
#define ptep_clear_flush_notify(__vma, __address, __ptep) \
({ \
unsigned long ___addr = __address & PAGE_MASK; \
@@ -427,6 +471,8 @@
#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
+#define ptep_clear_young_notify ptep_test_and_clear_young
+#define pmdp_clear_young_notify pmdp_test_and_clear_young
#define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 9120edb..639e9b8 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -68,8 +68,17 @@
extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
-extern struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
- u32 dst_portid, gfp_t gfp_mask);
+
+extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
+ unsigned int ldiff, u32 dst_portid,
+ gfp_t gfp_mask);
+static inline struct sk_buff *
+netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid,
+ gfp_t gfp_mask)
+{
+ return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask);
+}
+
extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
__u32 group, gfp_t allocation);
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index b02f72b..f798e2a 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -522,10 +522,9 @@
* @speed: OUT - The link speed expressed as PCIe generation number.
* @width: OUT - The link width expressed as the number of PCIe lanes.
*
- * Set the translation of a memory window. The peer may access local memory
- * through the window starting at the address, up to the size. The address
- * must be aligned to the alignment specified by ntb_mw_get_range(). The size
- * must be aligned to the size alignment specified by ntb_mw_get_range().
+ * Get the current state of the ntb link. It is recommended to query the link
+ * state once after every link event. It is safe to query the link state in
+ * the context of the link event callback.
*
* Return: One if the link is up, zero if the link is down, otherwise a
* negative value indicating the error number.
@@ -795,7 +794,7 @@
}
/**
- * ntb_peer_db_clear() - clear bits in the local doorbell register
+ * ntb_peer_db_clear() - clear bits in the peer doorbell register
* @ntb: NTB device context.
* @db_bits: Doorbell bits to clear.
*
diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h
index 2862861..7243eb9 100644
--- a/include/linux/ntb_transport.h
+++ b/include/linux/ntb_transport.h
@@ -83,3 +83,4 @@
void ntb_transport_link_up(struct ntb_transport_qp *qp);
void ntb_transport_link_down(struct ntb_transport_qp *qp);
bool ntb_transport_link_query(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 41c9384..416509e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -109,6 +109,10 @@
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
PG_compound_lock,
#endif
+#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+ PG_young,
+ PG_idle,
+#endif
__NR_PAGEFLAGS,
/* Filesystems */
@@ -289,6 +293,13 @@
#define __PG_HWPOISON 0
#endif
+#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+TESTPAGEFLAG(Young, young)
+SETPAGEFLAG(Young, young)
+TESTCLEARFLAG(Young, young)
+PAGEFLAG(Idle, idle)
+#endif
+
/*
* On an anonymous page mapped into a user virtual memory area,
* page->mapping points to its anon_vma, not to a struct address_space;
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index c42981c..17f118a 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -26,6 +26,10 @@
PAGE_EXT_DEBUG_POISON, /* Page is poisoned */
PAGE_EXT_DEBUG_GUARD,
PAGE_EXT_OWNER,
+#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
+ PAGE_EXT_YOUNG,
+ PAGE_EXT_IDLE,
+#endif
};
/*
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
new file mode 100644
index 0000000..bf268fa
--- /dev/null
+++ b/include/linux/page_idle.h
@@ -0,0 +1,110 @@
+#ifndef _LINUX_MM_PAGE_IDLE_H
+#define _LINUX_MM_PAGE_IDLE_H
+
+#include <linux/bitops.h>
+#include <linux/page-flags.h>
+#include <linux/page_ext.h>
+
+#ifdef CONFIG_IDLE_PAGE_TRACKING
+
+#ifdef CONFIG_64BIT
+static inline bool page_is_young(struct page *page)
+{
+ return PageYoung(page);
+}
+
+static inline void set_page_young(struct page *page)
+{
+ SetPageYoung(page);
+}
+
+static inline bool test_and_clear_page_young(struct page *page)
+{
+ return TestClearPageYoung(page);
+}
+
+static inline bool page_is_idle(struct page *page)
+{
+ return PageIdle(page);
+}
+
+static inline void set_page_idle(struct page *page)
+{
+ SetPageIdle(page);
+}
+
+static inline void clear_page_idle(struct page *page)
+{
+ ClearPageIdle(page);
+}
+#else /* !CONFIG_64BIT */
+/*
+ * If there is not enough space to store Idle and Young bits in page flags, use
+ * page ext flags instead.
+ */
+extern struct page_ext_operations page_idle_ops;
+
+static inline bool page_is_young(struct page *page)
+{
+ return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
+}
+
+static inline void set_page_young(struct page *page)
+{
+ set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
+}
+
+static inline bool test_and_clear_page_young(struct page *page)
+{
+ return test_and_clear_bit(PAGE_EXT_YOUNG,
+ &lookup_page_ext(page)->flags);
+}
+
+static inline bool page_is_idle(struct page *page)
+{
+ return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
+}
+
+static inline void set_page_idle(struct page *page)
+{
+ set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
+}
+
+static inline void clear_page_idle(struct page *page)
+{
+ clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
+}
+#endif /* CONFIG_64BIT */
+
+#else /* !CONFIG_IDLE_PAGE_TRACKING */
+
+static inline bool page_is_young(struct page *page)
+{
+ return false;
+}
+
+static inline void set_page_young(struct page *page)
+{
+}
+
+static inline bool test_and_clear_page_young(struct page *page)
+{
+ return false;
+}
+
+static inline bool page_is_idle(struct page *page)
+{
+ return false;
+}
+
+static inline void set_page_idle(struct page *page)
+{
+}
+
+static inline void clear_page_idle(struct page *page)
+{
+}
+
+#endif /* CONFIG_IDLE_PAGE_TRACKING */
+
+#endif /* _LINUX_MM_PAGE_IDLE_H */
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index cab7ba5..e817722 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -34,6 +34,7 @@
int dev_pm_opp_get_opp_count(struct device *dev);
unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev);
+struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev);
struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
unsigned long freq,
@@ -80,6 +81,11 @@
return 0;
}
+static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
+{
+ return NULL;
+}
+
static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
unsigned long freq, bool available)
{
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 2110a81..317e16d 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -19,8 +19,8 @@
* under normal circumstances, used to verify that nobody uses
* non-initialized list entries.
*/
-#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA)
-#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA)
+#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA)
/********** include/linux/timer.h **********/
/*
@@ -69,10 +69,6 @@
#define ATM_POISON_FREE 0x12
#define ATM_POISON 0xdeadbeef
-/********** net/ **********/
-#define NEIGHBOR_DEAD 0xdeadbeef
-#define NETFILTER_LINK_POISON 0xdead57ac
-
/********** kernel/mutexes **********/
#define MUTEX_DEBUG_INIT 0x11
#define MUTEX_DEBUG_FREE 0x22
@@ -83,7 +79,4 @@
/********** security/ **********/
#define KEY_DESTROY 0xbd
-/********** sound/oss/ **********/
-#define OSS_POISON_FREE 0xAB
-
#endif
diff --git a/include/linux/printk.h b/include/linux/printk.h
index a6298b2..9729565 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -404,10 +404,10 @@
static DEFINE_RATELIMIT_STATE(_rs, \
DEFAULT_RATELIMIT_INTERVAL, \
DEFAULT_RATELIMIT_BURST); \
- DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt)); \
if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \
__ratelimit(&_rs)) \
- __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__); \
+ __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__); \
} while (0)
#elif defined(DEBUG)
#define pr_debug_ratelimited(fmt, ...) \
@@ -456,11 +456,17 @@
groupsize, buf, len, ascii) \
dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
groupsize, buf, len, ascii)
-#else
+#elif defined(DEBUG)
#define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \
groupsize, buf, len, ascii) \
print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \
groupsize, buf, len, ascii)
-#endif /* defined(CONFIG_DYNAMIC_DEBUG) */
+#else
+static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type,
+ int rowsize, int groupsize,
+ const void *buf, size_t len, bool ascii)
+{
+}
+#endif
#endif
diff --git a/include/linux/reset.h b/include/linux/reset.h
index da5602b..7f65f9c 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -74,6 +74,20 @@
return -ENOSYS;
}
+static inline struct reset_control *__must_check reset_control_get(
+ struct device *dev, const char *id)
+{
+ WARN_ON(1);
+ return ERR_PTR(-EINVAL);
+}
+
+static inline struct reset_control *__must_check devm_reset_control_get(
+ struct device *dev, const char *id)
+{
+ WARN_ON(1);
+ return ERR_PTR(-EINVAL);
+}
+
static inline struct reset_control *reset_control_get_optional(
struct device *dev, const char *id)
{
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index d4c7271..dde00de 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -114,13 +114,22 @@
ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
loff_t seq_lseek(struct file *, loff_t, int);
int seq_release(struct inode *, struct file *);
-int seq_escape(struct seq_file *, const char *, const char *);
-int seq_putc(struct seq_file *m, char c);
-int seq_puts(struct seq_file *m, const char *s);
int seq_write(struct seq_file *seq, const void *data, size_t len);
-__printf(2, 3) int seq_printf(struct seq_file *, const char *, ...);
-__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args);
+__printf(2, 0)
+void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
+__printf(2, 3)
+void seq_printf(struct seq_file *m, const char *fmt, ...);
+void seq_putc(struct seq_file *m, char c);
+void seq_puts(struct seq_file *m, const char *s);
+void seq_put_decimal_ull(struct seq_file *m, char delimiter,
+ unsigned long long num);
+void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num);
+void seq_escape(struct seq_file *m, const char *s, const char *esc);
+
+void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
+ int rowsize, int groupsize, const void *buf, size_t len,
+ bool ascii);
int seq_path(struct seq_file *, const struct path *, const char *);
int seq_file_path(struct seq_file *, struct file *, const char *);
@@ -134,10 +143,6 @@
void *__seq_open_private(struct file *, const struct seq_operations *, int);
int seq_open_private(struct file *, const struct seq_operations *, int);
int seq_release_private(struct inode *, struct file *);
-int seq_put_decimal_ull(struct seq_file *m, char delimiter,
- unsigned long long num);
-int seq_put_decimal_ll(struct seq_file *m, char delimiter,
- long long num);
static inline struct user_namespace *seq_user_ns(struct seq_file *seq)
{
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 71f711d..dabe643 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -48,24 +48,24 @@
#define ESCAPE_HEX 0x20
int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
- unsigned int flags, const char *esc);
+ unsigned int flags, const char *only);
static inline int string_escape_mem_any_np(const char *src, size_t isz,
- char *dst, size_t osz, const char *esc)
+ char *dst, size_t osz, const char *only)
{
- return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc);
+ return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, only);
}
static inline int string_escape_str(const char *src, char *dst, size_t sz,
- unsigned int flags, const char *esc)
+ unsigned int flags, const char *only)
{
- return string_escape_mem(src, strlen(src), dst, sz, flags, esc);
+ return string_escape_mem(src, strlen(src), dst, sz, flags, only);
}
static inline int string_escape_str_any_np(const char *src, char *dst,
- size_t sz, const char *esc)
+ size_t sz, const char *only)
{
- return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc);
+ return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, only);
}
#endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0800131..a460e2e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -885,4 +885,6 @@
const char __user *const __user *argv,
const char __user *const __user *envp, int flags);
+asmlinkage long sys_membarrier(int cmd, int flags);
+
#endif
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 037e9df..17292fe 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -92,23 +92,19 @@
struct thermal_cooling_device *);
int (*unbind) (struct thermal_zone_device *,
struct thermal_cooling_device *);
- int (*get_temp) (struct thermal_zone_device *, unsigned long *);
+ int (*get_temp) (struct thermal_zone_device *, int *);
int (*get_mode) (struct thermal_zone_device *,
enum thermal_device_mode *);
int (*set_mode) (struct thermal_zone_device *,
enum thermal_device_mode);
int (*get_trip_type) (struct thermal_zone_device *, int,
enum thermal_trip_type *);
- int (*get_trip_temp) (struct thermal_zone_device *, int,
- unsigned long *);
- int (*set_trip_temp) (struct thermal_zone_device *, int,
- unsigned long);
- int (*get_trip_hyst) (struct thermal_zone_device *, int,
- unsigned long *);
- int (*set_trip_hyst) (struct thermal_zone_device *, int,
- unsigned long);
- int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *);
- int (*set_emul_temp) (struct thermal_zone_device *, unsigned long);
+ int (*get_trip_temp) (struct thermal_zone_device *, int, int *);
+ int (*set_trip_temp) (struct thermal_zone_device *, int, int);
+ int (*get_trip_hyst) (struct thermal_zone_device *, int, int *);
+ int (*set_trip_hyst) (struct thermal_zone_device *, int, int);
+ int (*get_crit_temp) (struct thermal_zone_device *, int *);
+ int (*set_emul_temp) (struct thermal_zone_device *, int);
int (*get_trend) (struct thermal_zone_device *, int,
enum thermal_trend *);
int (*notify) (struct thermal_zone_device *, int,
@@ -332,9 +328,9 @@
* temperature.
*/
struct thermal_zone_of_device_ops {
- int (*get_temp)(void *, long *);
+ int (*get_temp)(void *, int *);
int (*get_trend)(void *, long *);
- int (*set_emul_temp)(void *, unsigned long);
+ int (*set_emul_temp)(void *, int);
};
/**
@@ -406,7 +402,7 @@
const struct thermal_cooling_device_ops *);
void thermal_cooling_device_unregister(struct thermal_cooling_device *);
struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name);
-int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp);
+int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
int get_tz_trend(struct thermal_zone_device *, int);
struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
@@ -457,7 +453,7 @@
const char *name)
{ return ERR_PTR(-ENODEV); }
static inline int thermal_zone_get_temp(
- struct thermal_zone_device *tz, unsigned long *temp)
+ struct thermal_zone_device *tz, int *temp)
{ return -ENODEV; }
static inline int get_tz_trend(struct thermal_zone_device *tz, int trip)
{ return -ENODEV; }
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 48d901f..e312219 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -147,11 +147,20 @@
cpumask_or(mask, mask, tick_nohz_full_mask);
}
+static inline int housekeeping_any_cpu(void)
+{
+ return cpumask_any_and(housekeeping_mask, cpu_online_mask);
+}
+
extern void tick_nohz_full_kick(void);
extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(void);
#else
+static inline int housekeeping_any_cpu(void)
+{
+ return smp_processor_id();
+}
static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index c924a28..42f8ec9 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -36,6 +36,8 @@
ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
};
+bool zpool_has_pool(char *type);
+
struct zpool *zpool_create_pool(char *type, char *name,
gfp_t gfp, const struct zpool_ops *ops);
diff --git a/include/media/videobuf2-memops.h b/include/media/videobuf2-memops.h
index 9f36641..6513c7e 100644
--- a/include/media/videobuf2-memops.h
+++ b/include/media/videobuf2-memops.h
@@ -15,6 +15,7 @@
#define _MEDIA_VIDEOBUF2_MEMOPS_H
#include <media/videobuf2-core.h>
+#include <linux/mm.h>
/**
* struct vb2_vmarea_handler - common vma refcount tracking handler
@@ -31,11 +32,9 @@
extern const struct vm_operations_struct vb2_common_vm_ops;
-int vb2_get_contig_userptr(unsigned long vaddr, unsigned long size,
- struct vm_area_struct **res_vma, dma_addr_t *res_pa);
-
-struct vm_area_struct *vb2_get_vma(struct vm_area_struct *vma);
-void vb2_put_vma(struct vm_area_struct *vma);
-
+struct frame_vector *vb2_create_framevec(unsigned long start,
+ unsigned long length,
+ bool write);
+void vb2_destroy_framevec(struct frame_vector *vec);
#endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 4e8f804..59160de 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -66,7 +66,6 @@
struct nlattr **);
int (*fill)(struct fib_rule *, struct sk_buff *,
struct fib_rule_hdr *);
- u32 (*default_pref)(struct fib_rules_ops *ops);
size_t (*nlmsg_payload)(struct fib_rule *);
/* Called after modifications to the rules set, must flush
@@ -118,5 +117,4 @@
struct fib_lookup_arg *);
int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
u32 flags);
-u32 fib_default_rule_pref(struct fib_rules_ops *ops);
#endif
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e3314e5..bfc5694 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -477,7 +477,9 @@
* @chandef: Channel definition for this BSS -- the hardware might be
* configured a higher bandwidth than this BSS uses, for example.
* @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation.
- * This field is only valid when the channel type is one of the HT types.
+ * This field is only valid when the channel is a wide HT/VHT channel.
+ * Note that with TDLS this can be the case (channel is HT, protection must
+ * be used from this field) even when the BSS association isn't using HT.
* @cqm_rssi_thold: Connection quality monitor RSSI threshold, a zero value
* implies disabled
* @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index bab824b..d4c6b5f 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -59,7 +59,7 @@
br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return NF_DROP;
+ return NF_ACCEPT;
}
#endif
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f5e23c6d..e8ad468 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -298,6 +298,7 @@
struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
gfp_t flags);
+void nf_ct_tmpl_free(struct nf_conn *tmpl);
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2a24668..aa8bee7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -125,7 +125,7 @@
static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
{
- return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1;
+ return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
}
unsigned int nft_parse_register(const struct nlattr *attr);
diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h
index 676b03b..11571b2 100644
--- a/include/scsi/scsi_common.h
+++ b/include/scsi/scsi_common.h
@@ -61,4 +61,9 @@
extern bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
struct scsi_sense_hdr *sshdr);
+extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq);
+int scsi_set_sense_information(u8 *buf, int buf_len, u64 info);
+extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
+ int desc_type);
+
#endif /* _SCSI_COMMON_H_ */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 50c2a36..fe89d7c 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -196,34 +196,13 @@
struct execute_work ew; /* used to get process context on put */
struct work_struct requeue_work;
- struct scsi_dh_data *scsi_dh_data;
+ struct scsi_device_handler *handler;
+ void *handler_data;
+
enum scsi_device_state sdev_state;
unsigned long sdev_data[0];
} __attribute__((aligned(sizeof(unsigned long))));
-typedef void (*activate_complete)(void *, int);
-struct scsi_device_handler {
- /* Used by the infrastructure */
- struct list_head list; /* list of scsi_device_handlers */
-
- /* Filled by the hardware handler */
- struct module *module;
- const char *name;
- int (*check_sense)(struct scsi_device *, struct scsi_sense_hdr *);
- struct scsi_dh_data *(*attach)(struct scsi_device *);
- void (*detach)(struct scsi_device *);
- int (*activate)(struct scsi_device *, activate_complete, void *);
- int (*prep_fn)(struct scsi_device *, struct request *);
- int (*set_params)(struct scsi_device *, const char *);
- bool (*match)(struct scsi_device *);
-};
-
-struct scsi_dh_data {
- struct scsi_device_handler *scsi_dh;
- struct scsi_device *sdev;
- struct kref kref;
-};
-
#define to_scsi_device(d) \
container_of(d, struct scsi_device, sdev_gendev)
#define class_to_sdev(d) \
diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
index 620c723..85d7317 100644
--- a/include/scsi/scsi_dh.h
+++ b/include/scsi/scsi_dh.h
@@ -55,11 +55,26 @@
SCSI_DH_NOSYS,
SCSI_DH_DRIVER_MAX,
};
-#if defined(CONFIG_SCSI_DH) || defined(CONFIG_SCSI_DH_MODULE)
+
+typedef void (*activate_complete)(void *, int);
+struct scsi_device_handler {
+ /* Used by the infrastructure */
+ struct list_head list; /* list of scsi_device_handlers */
+
+ /* Filled by the hardware handler */
+ struct module *module;
+ const char *name;
+ int (*check_sense)(struct scsi_device *, struct scsi_sense_hdr *);
+ int (*attach)(struct scsi_device *);
+ void (*detach)(struct scsi_device *);
+ int (*activate)(struct scsi_device *, activate_complete, void *);
+ int (*prep_fn)(struct scsi_device *, struct request *);
+ int (*set_params)(struct scsi_device *, const char *);
+};
+
+#ifdef CONFIG_SCSI_DH
extern int scsi_dh_activate(struct request_queue *, activate_complete, void *);
-extern int scsi_dh_handler_exist(const char *);
extern int scsi_dh_attach(struct request_queue *, const char *);
-extern void scsi_dh_detach(struct request_queue *);
extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t);
extern int scsi_dh_set_params(struct request_queue *, const char *);
#else
@@ -69,18 +84,10 @@
fn(data, 0);
return 0;
}
-static inline int scsi_dh_handler_exist(const char *name)
-{
- return 0;
-}
static inline int scsi_dh_attach(struct request_queue *req, const char *name)
{
return SCSI_DH_NOSYS;
}
-static inline void scsi_dh_detach(struct request_queue *q)
-{
- return;
-}
static inline const char *scsi_dh_attached_handler_name(struct request_queue *q,
gfp_t gfp)
{
diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h
index 8d1d7fa..dbb8c64 100644
--- a/include/scsi/scsi_eh.h
+++ b/include/scsi/scsi_eh.h
@@ -4,6 +4,7 @@
#include <linux/scatterlist.h>
#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_common.h>
struct scsi_device;
struct Scsi_Host;
@@ -21,14 +22,9 @@
return ((sshdr->response_code >= 0x70) && (sshdr->response_code & 1));
}
-extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
- int desc_type);
-
extern int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len,
u64 * info_out);
-extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq);
-
extern int scsi_ioctl_reset(struct scsi_device *, int __user *);
struct scsi_eh_save {
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 0aedbb2..373d334 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -62,6 +62,8 @@
/* T10 protection information disabled by default */
#define TA_DEFAULT_T10_PI 0
#define TA_DEFAULT_FABRIC_PROT_TYPE 0
+/* TPG status needs to be enabled to return sendtargets discovery endpoint info */
+#define TA_DEFAULT_TPG_ENABLED_SENDTARGETS 1
#define ISCSI_IOV_DATA_BUFFER 5
@@ -517,7 +519,6 @@
u16 cid;
/* Remote TCP Port */
u16 login_port;
- u16 local_port;
int net_size;
int login_family;
u32 auth_id;
@@ -527,9 +528,8 @@
u32 exp_statsn;
/* Per connection status sequence number */
u32 stat_sn;
-#define IPV6_ADDRESS_SPACE 48
- unsigned char login_ip[IPV6_ADDRESS_SPACE];
- unsigned char local_ip[IPV6_ADDRESS_SPACE];
+ struct sockaddr_storage login_sockaddr;
+ struct sockaddr_storage local_sockaddr;
int conn_usage_count;
int conn_waiting_on_uc;
atomic_t check_immediate_queue;
@@ -636,7 +636,7 @@
/* session wide counter: expected command sequence number */
u32 exp_cmd_sn;
/* session wide counter: maximum allowed command sequence number */
- u32 max_cmd_sn;
+ atomic_t max_cmd_sn;
struct list_head sess_ooo_cmdsn_list;
/* LIO specific session ID */
@@ -764,6 +764,7 @@
u32 default_erl;
u8 t10_pi;
u32 fabric_prot_type;
+ u32 tpg_enabled_sendtargets;
struct iscsi_portal_group *tpg;
};
@@ -776,12 +777,10 @@
enum iscsi_timer_flags_table np_login_timer_flags;
u32 np_exports;
enum np_flags_table np_flags;
- unsigned char np_ip[IPV6_ADDRESS_SPACE];
- u16 np_port;
spinlock_t np_thread_lock;
struct completion np_restart_comp;
struct socket *np_socket;
- struct __kernel_sockaddr_storage np_sockaddr;
+ struct sockaddr_storage np_sockaddr;
struct task_struct *np_thread;
struct timer_list np_login_timer;
void *np_context;
diff --git a/include/target/iscsi/iscsi_target_stat.h b/include/target/iscsi/iscsi_target_stat.h
index 3ff76b4..e615bb4 100644
--- a/include/target/iscsi/iscsi_target_stat.h
+++ b/include/target/iscsi/iscsi_target_stat.h
@@ -50,7 +50,7 @@
u64 last_fail_time; /* time stamp (jiffies) */
u32 last_fail_type;
int last_intr_fail_ip_family;
- unsigned char last_intr_fail_ip_addr[IPV6_ADDRESS_SPACE];
+ struct sockaddr_storage last_intr_fail_sockaddr;
char last_intr_fail_name[224];
} ____cacheline_aligned;
diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h
index e6bb166..90e37fa 100644
--- a/include/target/iscsi/iscsi_transport.h
+++ b/include/target/iscsi/iscsi_transport.h
@@ -9,7 +9,7 @@
int priv_size;
struct module *owner;
struct list_head t_node;
- int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *);
+ int (*iscsit_setup_np)(struct iscsi_np *, struct sockaddr_storage *);
int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *);
void (*iscsit_free_np)(struct iscsi_np *);
void (*iscsit_wait_conn)(struct iscsi_conn *);
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 1e5c8f9..56cf8e4 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -93,4 +93,6 @@
sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
+bool target_sense_desc_format(struct se_device *dev);
+
#endif /* TARGET_CORE_BACKEND_H */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 17ae2d6..ac9bf1c 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -6,6 +6,7 @@
#include <linux/dma-mapping.h>
#include <linux/blkdev.h>
#include <linux/percpu_ida.h>
+#include <linux/t10-pi.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -426,12 +427,6 @@
TARGET_DIF_CHECK_REFTAG = 0x1 << 2,
};
-struct se_dif_v1_tuple {
- __be16 guard_tag;
- __be16 app_tag;
- __be32 ref_tag;
-};
-
/* for sam_task_attr */
#define TCM_SIMPLE_TAG 0x20
#define TCM_HEAD_TAG 0x21
@@ -444,6 +439,9 @@
u8 scsi_asc;
u8 scsi_ascq;
u16 scsi_sense_length;
+ unsigned cmd_wait_set:1;
+ unsigned unknown_data_length:1;
+ bool state_active:1;
u64 tag; /* SAM command identifier aka task tag */
/* Delay for ALUA Active/NonOptimized state access in milliseconds */
int alua_nonop_delay;
@@ -455,11 +453,8 @@
unsigned int map_tag;
/* Transport protocol dependent state, see transport_state_table */
enum transport_state_table t_state;
- unsigned cmd_wait_set:1;
- unsigned unknown_data_length:1;
/* See se_cmd_flags_table */
u32 se_cmd_flags;
- u32 se_ordered_id;
/* Total size in bytes associated with command */
u32 data_length;
u32 residual_count;
@@ -477,7 +472,6 @@
struct se_tmr_req *se_tmr_req;
struct list_head se_cmd_list;
struct completion cmd_wait_comp;
- struct kref cmd_kref;
const struct target_core_fabric_ops *se_tfo;
sense_reason_t (*execute_cmd)(struct se_cmd *);
sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool);
@@ -497,6 +491,7 @@
#define CMD_T_REQUEST_STOP (1 << 8)
#define CMD_T_BUSY (1 << 9)
spinlock_t t_state_lock;
+ struct kref cmd_kref;
struct completion t_transport_stop_comp;
struct work_struct work;
@@ -509,8 +504,10 @@
struct scatterlist *t_bidi_data_sg;
unsigned int t_bidi_data_nents;
+ /* Used for lun->lun_ref counting */
+ int lun_ref_active;
+
struct list_head state_list;
- bool state_active;
/* old task stop completion, consider merging with some of the above */
struct completion task_stop_comp;
@@ -518,20 +515,17 @@
/* backend private data */
void *priv;
- /* Used for lun->lun_ref counting */
- int lun_ref_active;
-
/* DIF related members */
enum target_prot_op prot_op;
enum target_prot_type prot_type;
u8 prot_checks;
+ bool prot_pto;
u32 prot_length;
u32 reftag_seed;
struct scatterlist *t_prot_sg;
unsigned int t_prot_nents;
sense_reason_t pi_err;
sector_t bad_sector;
- bool prot_pto;
};
struct se_ua {
@@ -598,7 +592,6 @@
};
struct se_lun_acl {
- char initiatorname[TRANSPORT_IQN_LEN];
u64 mapped_lun;
struct se_node_acl *se_lun_nacl;
struct se_lun *se_lun;
@@ -685,7 +678,6 @@
#define SE_LUN_LINK_MAGIC 0xffff7771
u32 lun_link_magic;
u32 lun_access;
- u32 lun_flags;
u32 lun_index;
/* RELATIVE TARGET PORT IDENTIFER */
@@ -751,7 +743,6 @@
atomic_long_t write_bytes;
/* Active commands on this virtual SE device */
atomic_t simple_cmds;
- atomic_t dev_ordered_id;
atomic_t dev_ordered_sync;
atomic_t dev_qf_count;
u32 export_count;
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 18afef9..7fb2557 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -5,6 +5,19 @@
struct module *module;
const char *name;
size_t node_acl_size;
+ /*
+ * Limits number of scatterlist entries per SCF_SCSI_DATA_CDB payload.
+ * Setting this value tells target-core to enforce this limit, and
+ * report as INQUIRY EVPD=b0 MAXIMUM TRANSFER LENGTH.
+ *
+ * target-core will currently reset se_cmd->data_length to this
+ * maximum size, and set UNDERFLOW residual count if length exceeds
+ * this limit.
+ *
+ * XXX: Not all initiator hosts honor this block-limit EVPD
+ * XXX: Currently assumes single PAGE_SIZE per scatterlist entry
+ */
+ u32 max_data_sg_nents;
char *(*get_fabric_name)(void);
char *(*tpg_get_wwn)(struct se_portal_group *);
u16 (*tpg_get_tag)(struct se_portal_group *);
@@ -152,6 +165,7 @@
void transport_generic_request_failure(struct se_cmd *, sense_reason_t);
void __target_execute_cmd(struct se_cmd *);
int transport_lookup_tmr_lun(struct se_cmd *, u64);
+void core_allocate_nexus_loss_ua(struct se_node_acl *acl);
struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg,
unsigned char *);
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index a44062d..d6f8322 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -358,6 +358,36 @@
#endif
+TRACE_EVENT(kvm_halt_poll_ns,
+ TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
+ TP_ARGS(grow, vcpu_id, new, old),
+
+ TP_STRUCT__entry(
+ __field(bool, grow)
+ __field(unsigned int, vcpu_id)
+ __field(int, new)
+ __field(int, old)
+ ),
+
+ TP_fast_assign(
+ __entry->grow = grow;
+ __entry->vcpu_id = vcpu_id;
+ __entry->new = new;
+ __entry->old = old;
+ ),
+
+ TP_printk("vcpu %u: halt_poll_ns %d (%s %d)",
+ __entry->vcpu_id,
+ __entry->new,
+ __entry->grow ? "grow" : "shrink",
+ __entry->old)
+);
+
+#define trace_kvm_halt_poll_ns_grow(vcpu_id, new, old) \
+ trace_kvm_halt_poll_ns(true, vcpu_id, new, old)
+#define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \
+ trace_kvm_halt_poll_ns(false, vcpu_id, new, old)
+
#endif /* _TRACE_KVM_MAIN_H */
/* This part must be outside protection */
diff --git a/include/trace/events/thermal_power_allocator.h b/include/trace/events/thermal_power_allocator.h
index 12e1321..5afae8f 100644
--- a/include/trace/events/thermal_power_allocator.h
+++ b/include/trace/events/thermal_power_allocator.h
@@ -11,7 +11,7 @@
u32 total_req_power, u32 *granted_power,
u32 total_granted_power, size_t num_actors,
u32 power_range, u32 max_allocatable_power,
- unsigned long current_temp, s32 delta_temp),
+ int current_temp, s32 delta_temp),
TP_ARGS(tz, req_power, total_req_power, granted_power,
total_granted_power, num_actors, power_range,
max_allocatable_power, current_temp, delta_temp),
@@ -24,7 +24,7 @@
__field(size_t, num_actors )
__field(u32, power_range )
__field(u32, max_allocatable_power )
- __field(unsigned long, current_temp )
+ __field(int, current_temp )
__field(s32, delta_temp )
),
TP_fast_assign(
@@ -42,7 +42,7 @@
__entry->delta_temp = delta_temp;
),
- TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%lu delta_temperature=%d",
+ TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%d delta_temperature=%d",
__entry->tz_id,
__print_array(__get_dynamic_array(req_power),
__entry->num_actors, 4),
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index a7aa607..fff846b 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -131,6 +131,66 @@
TP_ARGS(inode, flags)
);
+#ifdef CREATE_TRACE_POINTS
+#ifdef CONFIG_CGROUP_WRITEBACK
+
+static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
+{
+ return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
+}
+
+static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
+{
+ struct cgroup *cgrp = wb->memcg_css->cgroup;
+ char *path;
+
+ path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
+ WARN_ON_ONCE(path != buf);
+}
+
+static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
+{
+ if (wbc->wb)
+ return __trace_wb_cgroup_size(wbc->wb);
+ else
+ return 2;
+}
+
+static inline void __trace_wbc_assign_cgroup(char *buf,
+ struct writeback_control *wbc)
+{
+ if (wbc->wb)
+ __trace_wb_assign_cgroup(buf, wbc->wb);
+ else
+ strcpy(buf, "/");
+}
+
+#else /* CONFIG_CGROUP_WRITEBACK */
+
+static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
+{
+ return 2;
+}
+
+static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
+{
+ strcpy(buf, "/");
+}
+
+static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
+{
+ return 2;
+}
+
+static inline void __trace_wbc_assign_cgroup(char *buf,
+ struct writeback_control *wbc)
+{
+ strcpy(buf, "/");
+}
+
+#endif /* CONFIG_CGROUP_WRITEBACK */
+#endif /* CREATE_TRACE_POINTS */
+
DECLARE_EVENT_CLASS(writeback_write_inode_template,
TP_PROTO(struct inode *inode, struct writeback_control *wbc),
@@ -141,6 +201,7 @@
__array(char, name, 32)
__field(unsigned long, ino)
__field(int, sync_mode)
+ __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
),
TP_fast_assign(
@@ -148,12 +209,14 @@
dev_name(inode_to_bdi(inode)->dev), 32);
__entry->ino = inode->i_ino;
__entry->sync_mode = wbc->sync_mode;
+ __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
),
- TP_printk("bdi %s: ino=%lu sync_mode=%d",
+ TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
__entry->name,
__entry->ino,
- __entry->sync_mode
+ __entry->sync_mode,
+ __get_str(cgroup)
)
);
@@ -172,8 +235,8 @@
);
DECLARE_EVENT_CLASS(writeback_work_class,
- TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work),
- TP_ARGS(bdi, work),
+ TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work),
+ TP_ARGS(wb, work),
TP_STRUCT__entry(
__array(char, name, 32)
__field(long, nr_pages)
@@ -183,10 +246,11 @@
__field(int, range_cyclic)
__field(int, for_background)
__field(int, reason)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
),
TP_fast_assign(
strncpy(__entry->name,
- bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+ wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32);
__entry->nr_pages = work->nr_pages;
__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
__entry->sync_mode = work->sync_mode;
@@ -194,9 +258,10 @@
__entry->range_cyclic = work->range_cyclic;
__entry->for_background = work->for_background;
__entry->reason = work->reason;
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
),
TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
- "kupdate=%d range_cyclic=%d background=%d reason=%s",
+ "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
__entry->name,
MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
__entry->nr_pages,
@@ -204,13 +269,14 @@
__entry->for_kupdate,
__entry->range_cyclic,
__entry->for_background,
- __print_symbolic(__entry->reason, WB_WORK_REASON)
+ __print_symbolic(__entry->reason, WB_WORK_REASON),
+ __get_str(cgroup)
)
);
#define DEFINE_WRITEBACK_WORK_EVENT(name) \
DEFINE_EVENT(writeback_work_class, name, \
- TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
- TP_ARGS(bdi, work))
+ TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), \
+ TP_ARGS(wb, work))
DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
@@ -230,6 +296,30 @@
);
DECLARE_EVENT_CLASS(writeback_class,
+ TP_PROTO(struct bdi_writeback *wb),
+ TP_ARGS(wb),
+ TP_STRUCT__entry(
+ __array(char, name, 32)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+ ),
+ TP_fast_assign(
+ strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
+ ),
+ TP_printk("bdi %s: cgroup=%s",
+ __entry->name,
+ __get_str(cgroup)
+ )
+);
+#define DEFINE_WRITEBACK_EVENT(name) \
+DEFINE_EVENT(writeback_class, name, \
+ TP_PROTO(struct bdi_writeback *wb), \
+ TP_ARGS(wb))
+
+DEFINE_WRITEBACK_EVENT(writeback_nowork);
+DEFINE_WRITEBACK_EVENT(writeback_wake_background);
+
+TRACE_EVENT(writeback_bdi_register,
TP_PROTO(struct backing_dev_info *bdi),
TP_ARGS(bdi),
TP_STRUCT__entry(
@@ -239,17 +329,9 @@
strncpy(__entry->name, dev_name(bdi->dev), 32);
),
TP_printk("bdi %s",
- __entry->name
+ __entry->name
)
);
-#define DEFINE_WRITEBACK_EVENT(name) \
-DEFINE_EVENT(writeback_class, name, \
- TP_PROTO(struct backing_dev_info *bdi), \
- TP_ARGS(bdi))
-
-DEFINE_WRITEBACK_EVENT(writeback_nowork);
-DEFINE_WRITEBACK_EVENT(writeback_wake_background);
-DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
DECLARE_EVENT_CLASS(wbc_class,
TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
@@ -265,6 +347,7 @@
__field(int, range_cyclic)
__field(long, range_start)
__field(long, range_end)
+ __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
),
TP_fast_assign(
@@ -278,11 +361,12 @@
__entry->range_cyclic = wbc->range_cyclic;
__entry->range_start = (long)wbc->range_start;
__entry->range_end = (long)wbc->range_end;
+ __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
),
TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
"bgrd=%d reclm=%d cyclic=%d "
- "start=0x%lx end=0x%lx",
+ "start=0x%lx end=0x%lx cgroup=%s",
__entry->name,
__entry->nr_to_write,
__entry->pages_skipped,
@@ -292,7 +376,9 @@
__entry->for_reclaim,
__entry->range_cyclic,
__entry->range_start,
- __entry->range_end)
+ __entry->range_end,
+ __get_str(cgroup)
+ )
)
#define DEFINE_WBC_EVENT(name) \
@@ -312,6 +398,7 @@
__field(long, age)
__field(int, moved)
__field(int, reason)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
),
TP_fast_assign(
unsigned long *older_than_this = work->older_than_this;
@@ -321,13 +408,15 @@
(jiffies - *older_than_this) * 1000 / HZ : -1;
__entry->moved = moved;
__entry->reason = work->reason;
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
),
- TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s",
+ TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
__entry->name,
__entry->older, /* older_than_this in jiffies */
__entry->age, /* older_than_this in relative milliseconds */
__entry->moved,
- __print_symbolic(__entry->reason, WB_WORK_REASON)
+ __print_symbolic(__entry->reason, WB_WORK_REASON),
+ __get_str(cgroup)
)
);
@@ -381,11 +470,11 @@
TRACE_EVENT(bdi_dirty_ratelimit,
- TP_PROTO(struct backing_dev_info *bdi,
+ TP_PROTO(struct bdi_writeback *wb,
unsigned long dirty_rate,
unsigned long task_ratelimit),
- TP_ARGS(bdi, dirty_rate, task_ratelimit),
+ TP_ARGS(wb, dirty_rate, task_ratelimit),
TP_STRUCT__entry(
__array(char, bdi, 32)
@@ -395,36 +484,39 @@
__field(unsigned long, dirty_ratelimit)
__field(unsigned long, task_ratelimit)
__field(unsigned long, balanced_dirty_ratelimit)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
),
TP_fast_assign(
- strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
- __entry->write_bw = KBps(bdi->wb.write_bandwidth);
- __entry->avg_write_bw = KBps(bdi->wb.avg_write_bandwidth);
+ strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ __entry->write_bw = KBps(wb->write_bandwidth);
+ __entry->avg_write_bw = KBps(wb->avg_write_bandwidth);
__entry->dirty_rate = KBps(dirty_rate);
- __entry->dirty_ratelimit = KBps(bdi->wb.dirty_ratelimit);
+ __entry->dirty_ratelimit = KBps(wb->dirty_ratelimit);
__entry->task_ratelimit = KBps(task_ratelimit);
__entry->balanced_dirty_ratelimit =
- KBps(bdi->wb.balanced_dirty_ratelimit);
+ KBps(wb->balanced_dirty_ratelimit);
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
),
TP_printk("bdi %s: "
"write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
"dirty_ratelimit=%lu task_ratelimit=%lu "
- "balanced_dirty_ratelimit=%lu",
+ "balanced_dirty_ratelimit=%lu cgroup=%s",
__entry->bdi,
__entry->write_bw, /* write bandwidth */
__entry->avg_write_bw, /* avg write bandwidth */
__entry->dirty_rate, /* bdi dirty rate */
__entry->dirty_ratelimit, /* base ratelimit */
__entry->task_ratelimit, /* ratelimit with position control */
- __entry->balanced_dirty_ratelimit /* the balanced ratelimit */
+ __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
+ __get_str(cgroup)
)
);
TRACE_EVENT(balance_dirty_pages,
- TP_PROTO(struct backing_dev_info *bdi,
+ TP_PROTO(struct bdi_writeback *wb,
unsigned long thresh,
unsigned long bg_thresh,
unsigned long dirty,
@@ -437,7 +529,7 @@
long pause,
unsigned long start_time),
- TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
+ TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
dirty_ratelimit, task_ratelimit,
dirtied, period, pause, start_time),
@@ -456,11 +548,12 @@
__field( long, pause)
__field(unsigned long, period)
__field( long, think)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
),
TP_fast_assign(
unsigned long freerun = (thresh + bg_thresh) / 2;
- strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
+ strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
__entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_wb_domain.dirty_limit +
@@ -478,6 +571,7 @@
__entry->period = period * 1000 / HZ;
__entry->pause = pause * 1000 / HZ;
__entry->paused = (jiffies - start_time) * 1000 / HZ;
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
),
@@ -486,7 +580,7 @@
"bdi_setpoint=%lu bdi_dirty=%lu "
"dirty_ratelimit=%lu task_ratelimit=%lu "
"dirtied=%u dirtied_pause=%u "
- "paused=%lu pause=%ld period=%lu think=%ld",
+ "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
__entry->bdi,
__entry->limit,
__entry->setpoint,
@@ -500,7 +594,8 @@
__entry->paused, /* ms */
__entry->pause, /* ms */
__entry->period, /* ms */
- __entry->think /* ms */
+ __entry->think, /* ms */
+ __get_str(cgroup)
)
);
@@ -514,6 +609,8 @@
__field(unsigned long, ino)
__field(unsigned long, state)
__field(unsigned long, dirtied_when)
+ __dynamic_array(char, cgroup,
+ __trace_wb_cgroup_size(inode_to_wb(inode)))
),
TP_fast_assign(
@@ -522,14 +619,16 @@
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
+ __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
),
- TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu",
+ TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
__entry->name,
__entry->ino,
show_inode_state(__entry->state),
__entry->dirtied_when,
- (jiffies - __entry->dirtied_when) / HZ
+ (jiffies - __entry->dirtied_when) / HZ,
+ __get_str(cgroup)
)
);
@@ -585,6 +684,7 @@
__field(unsigned long, writeback_index)
__field(long, nr_to_write)
__field(unsigned long, wrote)
+ __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
),
TP_fast_assign(
@@ -596,10 +696,11 @@
__entry->writeback_index = inode->i_mapping->writeback_index;
__entry->nr_to_write = nr_to_write;
__entry->wrote = nr_to_write - wbc->nr_to_write;
+ __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
),
TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
- "index=%lu to_write=%ld wrote=%lu",
+ "index=%lu to_write=%ld wrote=%lu cgroup=%s",
__entry->name,
__entry->ino,
show_inode_state(__entry->state),
@@ -607,7 +708,8 @@
(jiffies - __entry->dirtied_when) / HZ,
__entry->writeback_index,
__entry->nr_to_write,
- __entry->wrote
+ __entry->wrote,
+ __get_str(cgroup)
)
);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index e016bd9..8da542a 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -709,9 +709,11 @@
__SYSCALL(__NR_bpf, sys_bpf)
#define __NR_execveat 281
__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+#define __NR_membarrier 282
+__SYSCALL(__NR_membarrier, sys_membarrier)
#undef __NR_syscalls
-#define __NR_syscalls 282
+#define __NR_syscalls 283
/*
* All syscalls below here should go away really,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dbd16a2..fd5aa47 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -358,7 +358,7 @@
#define I915_PARAM_HAS_RESOURCE_STREAMER 36
typedef struct drm_i915_getparam {
- s32 param;
+ __s32 param;
/*
* WARNING: Using pointers instead of fixed-size u64 means we need to write
* compat32 code. Don't repeat this mistake.
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 70ff1d9..f7b2db4 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -252,6 +252,7 @@
header-y += media.h
header-y += media-bus-format.h
header-y += mei.h
+header-y += membarrier.h
header-y += memfd.h
header-y += mempolicy.h
header-y += meye.h
diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index 3429a3b..b56dfcf 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -39,6 +39,7 @@
#define EM_TI_C6000 140 /* TI C6X DSPs */
#define EM_AARCH64 183 /* ARM 64 bit */
#define EM_TILEPRO 188 /* Tilera TILEPro */
+#define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */
#define EM_TILEGX 191 /* Tilera TILE-Gx */
#define EM_FRV 0x5441 /* Fujitsu FR-V */
#define EM_AVR32 0x18ad /* Atmel AVR32 */
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index aa63ed0..ea9221b 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -42,6 +42,7 @@
#define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */
#define ETH_P_PUP 0x0200 /* Xerox PUP packet */
#define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */
+#define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */
#define ETH_P_IP 0x0800 /* Internet Protocol packet */
#define ETH_P_X25 0x0805 /* CCITT X.25 */
#define ETH_P_ARP 0x0806 /* Address Resolution packet */
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index a6c4962..5da5f87 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -33,6 +33,7 @@
#define KPF_THP 22
#define KPF_BALLOON 23
#define KPF_ZERO_PAGE 24
+#define KPF_IDLE 25
#endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0d831f9..a9256f0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -237,6 +237,7 @@
__u32 count;
__u64 data_offset; /* relative to kvm_run start */
} io;
+ /* KVM_EXIT_DEBUG */
struct {
struct kvm_debug_exit_arch arch;
} debug;
@@ -285,6 +286,7 @@
__u32 data;
__u8 is_write;
} dcr;
+ /* KVM_EXIT_INTERNAL_ERROR */
struct {
__u32 suberror;
/* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
@@ -295,6 +297,7 @@
struct {
__u64 gprs[32];
} osi;
+ /* KVM_EXIT_PAPR_HCALL */
struct {
__u64 nr;
__u64 ret;
@@ -819,6 +822,8 @@
#define KVM_CAP_DISABLE_QUIRKS 116
#define KVM_CAP_X86_SMM 117
#define KVM_CAP_MULTI_ADDRESS_SPACE 118
+#define KVM_CAP_GUEST_DEBUG_HW_BPS 119
+#define KVM_CAP_GUEST_DEBUG_HW_WPS 120
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
new file mode 100644
index 0000000..e0b108b
--- /dev/null
+++ b/include/uapi/linux/membarrier.h
@@ -0,0 +1,53 @@
+#ifndef _UAPI_LINUX_MEMBARRIER_H
+#define _UAPI_LINUX_MEMBARRIER_H
+
+/*
+ * linux/membarrier.h
+ *
+ * membarrier system call API
+ *
+ * Copyright (c) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * enum membarrier_cmd - membarrier system call command
+ * @MEMBARRIER_CMD_QUERY: Query the set of supported commands. It returns
+ * a bitmask of valid commands.
+ * @MEMBARRIER_CMD_SHARED: Execute a memory barrier on all running threads.
+ * Upon return from system call, the caller thread
+ * is ensured that all running threads have passed
+ * through a state where all memory accesses to
+ * user-space addresses match program order between
+ * entry to and return from the system call
+ * (non-running threads are de facto in such a
+ * state). This covers threads from all processes
+ * running on the system. This command returns 0.
+ *
+ * Command to be passed to the membarrier system call. The commands need to
+ * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
+ * the value 0.
+ */
+enum membarrier_cmd {
+ MEMBARRIER_CMD_QUERY = 0,
+ MEMBARRIER_CMD_SHARED = (1 << 0),
+};
+
+#endif /* _UAPI_LINUX_MEMBARRIER_H */
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index b67f99d..95c6521 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -42,10 +42,6 @@
#define TCMU_MAILBOX_VERSION 2
#define ALIGN_SIZE 64 /* Should be enough for most CPUs */
-/* See https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */
-#define xstr(s) str(s)
-#define str(s) #s
-
struct tcmu_mailbox {
__u16 version;
__u16 flags;
diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h
index a853168..7ddeeda 100644
--- a/include/uapi/xen/privcmd.h
+++ b/include/uapi/xen/privcmd.h
@@ -44,6 +44,10 @@
struct privcmd_mmap_entry {
__u64 va;
+ /*
+ * This should be a GFN. It's not possible to change the name because
+ * it's exposed to the user-space.
+ */
__u64 mfn;
__u64 npages;
};
diff --git a/include/xen/page.h b/include/xen/page.h
index a5983da..1daae48 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -3,9 +3,9 @@
#include <asm/xen/page.h>
-static inline unsigned long page_to_mfn(struct page *page)
+static inline unsigned long xen_page_to_gfn(struct page *page)
{
- return pfn_to_mfn(page_to_pfn(page));
+ return pfn_to_gfn(page_to_pfn(page));
}
struct xen_memory_region {
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 0ce4f32..e4e214a 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -30,7 +30,7 @@
struct vm_area_struct;
/*
- * xen_remap_domain_mfn_array() - map an array of foreign frames
+ * xen_remap_domain_gfn_array() - map an array of foreign frames
* @vma: VMA to map the pages into
* @addr: Address at which to map the pages
* @gfn: Array of GFNs to map
@@ -46,14 +46,14 @@
* Returns the number of successfully mapped frames, or a -ve error
* code.
*/
-int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *gfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages);
-/* xen_remap_domain_mfn_range() - map a range of foreign frames
+/* xen_remap_domain_gfn_range() - map a range of foreign frames
* @vma: VMA to map the pages into
* @addr: Address at which to map the pages
* @gfn: First GFN to map.
@@ -65,12 +65,12 @@
* Returns the number of successfully mapped frames, or a -ve error
* code.
*/
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t gfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages);
-int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
+int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
int numpgs, struct page **pages);
int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
diff --git a/init/Kconfig b/init/Kconfig
index 02da9f1..c24b6f7 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1602,6 +1602,18 @@
bugs/quirks. Disable this only if your target machine is
unaffected by PCI quirks.
+config MEMBARRIER
+ bool "Enable membarrier() system call" if EXPERT
+ default y
+ help
+ Enable the membarrier() system call that allows issuing memory
+ barriers across all running threads, which can be used to distribute
+ the cost of user-space memory barriers asymmetrically by transforming
+ pairs of memory barriers into pairs consisting of membarrier() and a
+ compiler barrier.
+
+ If unsure, say Y.
+
config EMBEDDED
bool "Embedded system"
option allnoconfig_y
diff --git a/init/initramfs.c b/init/initramfs.c
index ad1bd77..b32ad7d 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -526,14 +526,14 @@
static void __init free_initrd(void)
{
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
unsigned long crashk_end = (unsigned long)__va(crashk_res.end);
#endif
if (do_retain_initrd)
goto skip;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* If the initrd region is overlapped with crashkernel reserved region,
* free only memory that is not part of crashkernel region.
diff --git a/init/main.c b/init/main.c
index 5650655..9e64d70 100644
--- a/init/main.c
+++ b/init/main.c
@@ -877,7 +877,6 @@
static void __init do_basic_setup(void)
{
cpuset_init_smp();
- usermodehelper_init();
shmem_init();
driver_init();
init_irq_proc();
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 2b49159..71f448e 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -123,7 +123,7 @@
size_t len = src->m_ts;
size_t alen;
- BUG_ON(dst == NULL);
+ WARN_ON(dst == NULL);
if (src->m_ts > dst->m_ts)
return ERR_PTR(-EINVAL);
diff --git a/ipc/shm.c b/ipc/shm.c
index 4aef24d..222131e 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -159,7 +159,7 @@
* We raced in the idr lookup or with shm_destroy(). Either way, the
* ID is busted.
*/
- BUG_ON(IS_ERR(ipcp));
+ WARN_ON(IS_ERR(ipcp));
return container_of(ipcp, struct shmid_kernel, shm_perm);
}
@@ -393,7 +393,7 @@
return ret;
sfd->vm_ops = vma->vm_ops;
#ifdef CONFIG_MMU
- BUG_ON(!sfd->vm_ops->fault);
+ WARN_ON(!sfd->vm_ops->fault);
#endif
vma->vm_ops = &shm_vm_ops;
shm_open(vma);
diff --git a/kernel/Makefile b/kernel/Makefile
index e0d7587..53abf00 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -49,7 +49,9 @@
obj-$(CONFIG_MODULE_SIG) += module_signing.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
+obj-$(CONFIG_KEXEC_CORE) += kexec_core.o
obj-$(CONFIG_KEXEC) += kexec.o
+obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
@@ -98,6 +100,7 @@
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
obj-$(CONFIG_TORTURE_TEST) += torture.o
+obj-$(CONFIG_MEMBARRIER) += membarrier.o
obj-$(CONFIG_HAS_IOMEM) += memremap.o
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index dc9b464..35bac8e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -155,14 +155,15 @@
void __user *ukey = u64_to_ptr(attr->key);
void __user *uvalue = u64_to_ptr(attr->value);
int ufd = attr->map_fd;
- struct fd f = fdget(ufd);
struct bpf_map *map;
void *key, *value, *ptr;
+ struct fd f;
int err;
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
return -EINVAL;
+ f = fdget(ufd);
map = bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
@@ -213,14 +214,15 @@
void __user *ukey = u64_to_ptr(attr->key);
void __user *uvalue = u64_to_ptr(attr->value);
int ufd = attr->map_fd;
- struct fd f = fdget(ufd);
struct bpf_map *map;
void *key, *value;
+ struct fd f;
int err;
if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
return -EINVAL;
+ f = fdget(ufd);
map = bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
@@ -265,14 +267,15 @@
{
void __user *ukey = u64_to_ptr(attr->key);
int ufd = attr->map_fd;
- struct fd f = fdget(ufd);
struct bpf_map *map;
+ struct fd f;
void *key;
int err;
if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
return -EINVAL;
+ f = fdget(ufd);
map = bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
@@ -305,14 +308,15 @@
void __user *ukey = u64_to_ptr(attr->key);
void __user *unext_key = u64_to_ptr(attr->next_key);
int ufd = attr->map_fd;
- struct fd f = fdget(ufd);
struct bpf_map *map;
void *key, *next_key;
+ struct fd f;
int err;
if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
return -EINVAL;
+ f = fdget(ufd);
map = bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ed12e38..b074b23 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -283,7 +283,7 @@
[BPF_ALU64] = "alu64",
};
-static const char *const bpf_alu_string[] = {
+static const char *const bpf_alu_string[16] = {
[BPF_ADD >> 4] = "+=",
[BPF_SUB >> 4] = "-=",
[BPF_MUL >> 4] = "*=",
@@ -307,7 +307,7 @@
[BPF_DW >> 3] = "u64",
};
-static const char *const bpf_jmp_string[] = {
+static const char *const bpf_jmp_string[16] = {
[BPF_JA >> 4] = "jmp",
[BPF_JEQ >> 4] = "==",
[BPF_JGT >> 4] = ">",
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
index 9656a3c..009cc9a 100644
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -180,7 +180,7 @@
* low power state that may have caused some blocks in the same power domain
* to reset.
*
- * Must be called after cpu_pm_exit has been called on all cpus in the power
+ * Must be called after cpu_cluster_pm_enter has been called for the power
* domain, and before cpu_pm_exit has been called on any cpu in the power
* domain. Notified drivers can include VFP co-processor, interrupt controller
* and its PM extensions, local CPU timers context save/restore which
diff --git a/kernel/cred.c b/kernel/cred.c
index ec1c076..71179a0 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -20,11 +20,16 @@
#include <linux/cn_proc.h>
#if 0
-#define kdebug(FMT, ...) \
- printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#define kdebug(FMT, ...) \
+ printk("[%-5.5s%5u] " FMT "\n", \
+ current->comm, current->pid, ##__VA_ARGS__)
#else
-#define kdebug(FMT, ...) \
- no_printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#define kdebug(FMT, ...) \
+do { \
+ if (0) \
+ no_printk("[%-5.5s%5u] " FMT "\n", \
+ current->comm, current->pid, ##__VA_ARGS__); \
+} while (0)
#endif
static struct kmem_cache *cred_jar;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e818389..f548f69 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9094,7 +9094,7 @@
mutex_unlock(&swhash->hlist_mutex);
}
-#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
+#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void __perf_event_exit_context(void *__info)
{
struct remove_event re = { .detach_group = true };
diff --git a/kernel/extable.c b/kernel/extable.c
index c98f926..e820cce 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -18,7 +18,6 @@
#include <linux/ftrace.h>
#include <linux/memory.h>
#include <linux/module.h>
-#include <linux/ftrace.h>
#include <linux/mutex.h>
#include <linux/init.h>
diff --git a/kernel/kexec.c b/kernel/kexec.c
index a785c10..4c5edc3 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1,156 +1,22 @@
/*
- * kexec.c - kexec system call
+ * kexec.c - kexec_load system call
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
*
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
*/
-#define pr_fmt(fmt) "kexec: " fmt
-
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/file.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
#include <linux/kexec.h>
#include <linux/mutex.h>
#include <linux/list.h>
-#include <linux/highmem.h>
#include <linux/syscalls.h>
-#include <linux/reboot.h>
-#include <linux/ioport.h>
-#include <linux/hardirq.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
-#include <linux/utsname.h>
-#include <linux/numa.h>
-#include <linux/suspend.h>
-#include <linux/device.h>
-#include <linux/freezer.h>
-#include <linux/pm.h>
-#include <linux/cpu.h>
-#include <linux/console.h>
#include <linux/vmalloc.h>
-#include <linux/swap.h>
-#include <linux/syscore_ops.h>
-#include <linux/compiler.h>
-#include <linux/hugetlb.h>
+#include <linux/slab.h>
-#include <asm/page.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/sections.h>
-
-#include <crypto/hash.h>
-#include <crypto/sha.h>
-
-/* Per cpu memory for storing cpu states in case of system crash. */
-note_buf_t __percpu *crash_notes;
-
-/* vmcoreinfo stuff */
-static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
-
-/* Flag to indicate we are going to kexec a new kernel */
-bool kexec_in_progress = false;
-
-/*
- * Declare these symbols weak so that if architecture provides a purgatory,
- * these will be overridden.
- */
-char __weak kexec_purgatory[0];
-size_t __weak kexec_purgatory_size = 0;
-
-#ifdef CONFIG_KEXEC_FILE
-static int kexec_calculate_store_digests(struct kimage *image);
-#endif
-
-/* Location of the reserved area for the crash kernel */
-struct resource crashk_res = {
- .name = "Crash kernel",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-struct resource crashk_low_res = {
- .name = "Crash kernel",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-int kexec_should_crash(struct task_struct *p)
-{
- /*
- * If crash_kexec_post_notifiers is enabled, don't run
- * crash_kexec() here yet, which must be run after panic
- * notifiers in panic().
- */
- if (crash_kexec_post_notifiers)
- return 0;
- /*
- * There are 4 panic() calls in do_exit() path, each of which
- * corresponds to each of these 4 conditions.
- */
- if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
- return 1;
- return 0;
-}
-
-/*
- * When kexec transitions to the new kernel there is a one-to-one
- * mapping between physical and virtual addresses. On processors
- * where you can disable the MMU this is trivial, and easy. For
- * others it is still a simple predictable page table to setup.
- *
- * In that environment kexec copies the new kernel to its final
- * resting place. This means I can only support memory whose
- * physical address can fit in an unsigned long. In particular
- * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
- * If the assembly stub has more restrictive requirements
- * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
- * defined more restrictively in <asm/kexec.h>.
- *
- * The code for the transition from the current kernel to the
- * the new kernel is placed in the control_code_buffer, whose size
- * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
- * page of memory is necessary, but some architectures require more.
- * Because this memory must be identity mapped in the transition from
- * virtual to physical addresses it must live in the range
- * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
- * modifiable.
- *
- * The assembly stub in the control code buffer is passed a linked list
- * of descriptor pages detailing the source pages of the new kernel,
- * and the destination addresses of those source pages. As this data
- * structure is not used in the context of the current OS, it must
- * be self-contained.
- *
- * The code has been made to work with highmem pages and will use a
- * destination page in its final resting place (if it happens
- * to allocate it). The end product of this is that most of the
- * physical address space, and most of RAM can be used.
- *
- * Future directions include:
- * - allocating a page table with the control code buffer identity
- * mapped, to simplify machine_kexec and make kexec_on_panic more
- * reliable.
- */
-
-/*
- * KIMAGE_NO_DEST is an impossible destination address..., for
- * allocating pages whose destination address we do not care about.
- */
-#define KIMAGE_NO_DEST (-1UL)
-
-static int kimage_is_destination_range(struct kimage *image,
- unsigned long start, unsigned long end);
-static struct page *kimage_alloc_page(struct kimage *image,
- gfp_t gfp_mask,
- unsigned long dest);
+#include "kexec_internal.h"
static int copy_user_segment_list(struct kimage *image,
unsigned long nr_segments,
@@ -169,125 +35,6 @@
return ret;
}
-static int sanity_check_segment_list(struct kimage *image)
-{
- int result, i;
- unsigned long nr_segments = image->nr_segments;
-
- /*
- * Verify we have good destination addresses. The caller is
- * responsible for making certain we don't attempt to load
- * the new image into invalid or reserved areas of RAM. This
- * just verifies it is an address we can use.
- *
- * Since the kernel does everything in page size chunks ensure
- * the destination addresses are page aligned. Too many
- * special cases crop of when we don't do this. The most
- * insidious is getting overlapping destination addresses
- * simply because addresses are changed to page size
- * granularity.
- */
- result = -EADDRNOTAVAIL;
- for (i = 0; i < nr_segments; i++) {
- unsigned long mstart, mend;
-
- mstart = image->segment[i].mem;
- mend = mstart + image->segment[i].memsz;
- if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
- return result;
- if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
- return result;
- }
-
- /* Verify our destination addresses do not overlap.
- * If we alloed overlapping destination addresses
- * through very weird things can happen with no
- * easy explanation as one segment stops on another.
- */
- result = -EINVAL;
- for (i = 0; i < nr_segments; i++) {
- unsigned long mstart, mend;
- unsigned long j;
-
- mstart = image->segment[i].mem;
- mend = mstart + image->segment[i].memsz;
- for (j = 0; j < i; j++) {
- unsigned long pstart, pend;
- pstart = image->segment[j].mem;
- pend = pstart + image->segment[j].memsz;
- /* Do the segments overlap ? */
- if ((mend > pstart) && (mstart < pend))
- return result;
- }
- }
-
- /* Ensure our buffer sizes are strictly less than
- * our memory sizes. This should always be the case,
- * and it is easier to check up front than to be surprised
- * later on.
- */
- result = -EINVAL;
- for (i = 0; i < nr_segments; i++) {
- if (image->segment[i].bufsz > image->segment[i].memsz)
- return result;
- }
-
- /*
- * Verify we have good destination addresses. Normally
- * the caller is responsible for making certain we don't
- * attempt to load the new image into invalid or reserved
- * areas of RAM. But crash kernels are preloaded into a
- * reserved area of ram. We must ensure the addresses
- * are in the reserved area otherwise preloading the
- * kernel could corrupt things.
- */
-
- if (image->type == KEXEC_TYPE_CRASH) {
- result = -EADDRNOTAVAIL;
- for (i = 0; i < nr_segments; i++) {
- unsigned long mstart, mend;
-
- mstart = image->segment[i].mem;
- mend = mstart + image->segment[i].memsz - 1;
- /* Ensure we are within the crash kernel limits */
- if ((mstart < crashk_res.start) ||
- (mend > crashk_res.end))
- return result;
- }
- }
-
- return 0;
-}
-
-static struct kimage *do_kimage_alloc_init(void)
-{
- struct kimage *image;
-
- /* Allocate a controlling structure */
- image = kzalloc(sizeof(*image), GFP_KERNEL);
- if (!image)
- return NULL;
-
- image->head = 0;
- image->entry = &image->head;
- image->last_entry = &image->head;
- image->control_page = ~0; /* By default this does not apply */
- image->type = KEXEC_TYPE_DEFAULT;
-
- /* Initialize the list of control pages */
- INIT_LIST_HEAD(&image->control_pages);
-
- /* Initialize the list of destination pages */
- INIT_LIST_HEAD(&image->dest_pages);
-
- /* Initialize the list of unusable pages */
- INIT_LIST_HEAD(&image->unusable_pages);
-
- return image;
-}
-
-static void kimage_free_page_list(struct list_head *list);
-
static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
unsigned long nr_segments,
struct kexec_segment __user *segments,
@@ -354,873 +101,6 @@
return ret;
}
-#ifdef CONFIG_KEXEC_FILE
-static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
-{
- struct fd f = fdget(fd);
- int ret;
- struct kstat stat;
- loff_t pos;
- ssize_t bytes = 0;
-
- if (!f.file)
- return -EBADF;
-
- ret = vfs_getattr(&f.file->f_path, &stat);
- if (ret)
- goto out;
-
- if (stat.size > INT_MAX) {
- ret = -EFBIG;
- goto out;
- }
-
- /* Don't hand 0 to vmalloc, it whines. */
- if (stat.size == 0) {
- ret = -EINVAL;
- goto out;
- }
-
- *buf = vmalloc(stat.size);
- if (!*buf) {
- ret = -ENOMEM;
- goto out;
- }
-
- pos = 0;
- while (pos < stat.size) {
- bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
- stat.size - pos);
- if (bytes < 0) {
- vfree(*buf);
- ret = bytes;
- goto out;
- }
-
- if (bytes == 0)
- break;
- pos += bytes;
- }
-
- if (pos != stat.size) {
- ret = -EBADF;
- vfree(*buf);
- goto out;
- }
-
- *buf_len = pos;
-out:
- fdput(f);
- return ret;
-}
-
-/* Architectures can provide this probe function */
-int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
- return -ENOEXEC;
-}
-
-void * __weak arch_kexec_kernel_image_load(struct kimage *image)
-{
- return ERR_PTR(-ENOEXEC);
-}
-
-void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
-}
-
-int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
- return -EKEYREJECTED;
-}
-
-/* Apply relocations of type RELA */
-int __weak
-arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
- unsigned int relsec)
-{
- pr_err("RELA relocation unsupported.\n");
- return -ENOEXEC;
-}
-
-/* Apply relocations of type REL */
-int __weak
-arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
- unsigned int relsec)
-{
- pr_err("REL relocation unsupported.\n");
- return -ENOEXEC;
-}
-
-/*
- * Free up memory used by kernel, initrd, and command line. This is temporary
- * memory allocation which is not needed any more after these buffers have
- * been loaded into separate segments and have been copied elsewhere.
- */
-static void kimage_file_post_load_cleanup(struct kimage *image)
-{
- struct purgatory_info *pi = &image->purgatory_info;
-
- vfree(image->kernel_buf);
- image->kernel_buf = NULL;
-
- vfree(image->initrd_buf);
- image->initrd_buf = NULL;
-
- kfree(image->cmdline_buf);
- image->cmdline_buf = NULL;
-
- vfree(pi->purgatory_buf);
- pi->purgatory_buf = NULL;
-
- vfree(pi->sechdrs);
- pi->sechdrs = NULL;
-
- /* See if architecture has anything to cleanup post load */
- arch_kimage_file_post_load_cleanup(image);
-
- /*
- * Above call should have called into bootloader to free up
- * any data stored in kimage->image_loader_data. It should
- * be ok now to free it up.
- */
- kfree(image->image_loader_data);
- image->image_loader_data = NULL;
-}
-
-/*
- * In file mode list of segments is prepared by kernel. Copy relevant
- * data from user space, do error checking, prepare segment list
- */
-static int
-kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
- const char __user *cmdline_ptr,
- unsigned long cmdline_len, unsigned flags)
-{
- int ret = 0;
- void *ldata;
-
- ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
- &image->kernel_buf_len);
- if (ret)
- return ret;
-
- /* Call arch image probe handlers */
- ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
- image->kernel_buf_len);
-
- if (ret)
- goto out;
-
-#ifdef CONFIG_KEXEC_VERIFY_SIG
- ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
- image->kernel_buf_len);
- if (ret) {
- pr_debug("kernel signature verification failed.\n");
- goto out;
- }
- pr_debug("kernel signature verification successful.\n");
-#endif
- /* It is possible that there no initramfs is being loaded */
- if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
- ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
- &image->initrd_buf_len);
- if (ret)
- goto out;
- }
-
- if (cmdline_len) {
- image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
- if (!image->cmdline_buf) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
- cmdline_len);
- if (ret) {
- ret = -EFAULT;
- goto out;
- }
-
- image->cmdline_buf_len = cmdline_len;
-
- /* command line should be a string with last byte null */
- if (image->cmdline_buf[cmdline_len - 1] != '\0') {
- ret = -EINVAL;
- goto out;
- }
- }
-
- /* Call arch image load handlers */
- ldata = arch_kexec_kernel_image_load(image);
-
- if (IS_ERR(ldata)) {
- ret = PTR_ERR(ldata);
- goto out;
- }
-
- image->image_loader_data = ldata;
-out:
- /* In case of error, free up all allocated memory in this function */
- if (ret)
- kimage_file_post_load_cleanup(image);
- return ret;
-}
-
-static int
-kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
- int initrd_fd, const char __user *cmdline_ptr,
- unsigned long cmdline_len, unsigned long flags)
-{
- int ret;
- struct kimage *image;
- bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
-
- image = do_kimage_alloc_init();
- if (!image)
- return -ENOMEM;
-
- image->file_mode = 1;
-
- if (kexec_on_panic) {
- /* Enable special crash kernel control page alloc policy. */
- image->control_page = crashk_res.start;
- image->type = KEXEC_TYPE_CRASH;
- }
-
- ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
- cmdline_ptr, cmdline_len, flags);
- if (ret)
- goto out_free_image;
-
- ret = sanity_check_segment_list(image);
- if (ret)
- goto out_free_post_load_bufs;
-
- ret = -ENOMEM;
- image->control_code_page = kimage_alloc_control_pages(image,
- get_order(KEXEC_CONTROL_PAGE_SIZE));
- if (!image->control_code_page) {
- pr_err("Could not allocate control_code_buffer\n");
- goto out_free_post_load_bufs;
- }
-
- if (!kexec_on_panic) {
- image->swap_page = kimage_alloc_control_pages(image, 0);
- if (!image->swap_page) {
- pr_err("Could not allocate swap buffer\n");
- goto out_free_control_pages;
- }
- }
-
- *rimage = image;
- return 0;
-out_free_control_pages:
- kimage_free_page_list(&image->control_pages);
-out_free_post_load_bufs:
- kimage_file_post_load_cleanup(image);
-out_free_image:
- kfree(image);
- return ret;
-}
-#else /* CONFIG_KEXEC_FILE */
-static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
-#endif /* CONFIG_KEXEC_FILE */
-
-static int kimage_is_destination_range(struct kimage *image,
- unsigned long start,
- unsigned long end)
-{
- unsigned long i;
-
- for (i = 0; i < image->nr_segments; i++) {
- unsigned long mstart, mend;
-
- mstart = image->segment[i].mem;
- mend = mstart + image->segment[i].memsz;
- if ((end > mstart) && (start < mend))
- return 1;
- }
-
- return 0;
-}
-
-static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
-{
- struct page *pages;
-
- pages = alloc_pages(gfp_mask, order);
- if (pages) {
- unsigned int count, i;
- pages->mapping = NULL;
- set_page_private(pages, order);
- count = 1 << order;
- for (i = 0; i < count; i++)
- SetPageReserved(pages + i);
- }
-
- return pages;
-}
-
-static void kimage_free_pages(struct page *page)
-{
- unsigned int order, count, i;
-
- order = page_private(page);
- count = 1 << order;
- for (i = 0; i < count; i++)
- ClearPageReserved(page + i);
- __free_pages(page, order);
-}
-
-static void kimage_free_page_list(struct list_head *list)
-{
- struct list_head *pos, *next;
-
- list_for_each_safe(pos, next, list) {
- struct page *page;
-
- page = list_entry(pos, struct page, lru);
- list_del(&page->lru);
- kimage_free_pages(page);
- }
-}
-
-static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
- unsigned int order)
-{
- /* Control pages are special, they are the intermediaries
- * that are needed while we copy the rest of the pages
- * to their final resting place. As such they must
- * not conflict with either the destination addresses
- * or memory the kernel is already using.
- *
- * The only case where we really need more than one of
- * these are for architectures where we cannot disable
- * the MMU and must instead generate an identity mapped
- * page table for all of the memory.
- *
- * At worst this runs in O(N) of the image size.
- */
- struct list_head extra_pages;
- struct page *pages;
- unsigned int count;
-
- count = 1 << order;
- INIT_LIST_HEAD(&extra_pages);
-
- /* Loop while I can allocate a page and the page allocated
- * is a destination page.
- */
- do {
- unsigned long pfn, epfn, addr, eaddr;
-
- pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
- if (!pages)
- break;
- pfn = page_to_pfn(pages);
- epfn = pfn + count;
- addr = pfn << PAGE_SHIFT;
- eaddr = epfn << PAGE_SHIFT;
- if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
- kimage_is_destination_range(image, addr, eaddr)) {
- list_add(&pages->lru, &extra_pages);
- pages = NULL;
- }
- } while (!pages);
-
- if (pages) {
- /* Remember the allocated page... */
- list_add(&pages->lru, &image->control_pages);
-
- /* Because the page is already in it's destination
- * location we will never allocate another page at
- * that address. Therefore kimage_alloc_pages
- * will not return it (again) and we don't need
- * to give it an entry in image->segment[].
- */
- }
- /* Deal with the destination pages I have inadvertently allocated.
- *
- * Ideally I would convert multi-page allocations into single
- * page allocations, and add everything to image->dest_pages.
- *
- * For now it is simpler to just free the pages.
- */
- kimage_free_page_list(&extra_pages);
-
- return pages;
-}
-
-static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
- unsigned int order)
-{
- /* Control pages are special, they are the intermediaries
- * that are needed while we copy the rest of the pages
- * to their final resting place. As such they must
- * not conflict with either the destination addresses
- * or memory the kernel is already using.
- *
- * Control pages are also the only pags we must allocate
- * when loading a crash kernel. All of the other pages
- * are specified by the segments and we just memcpy
- * into them directly.
- *
- * The only case where we really need more than one of
- * these are for architectures where we cannot disable
- * the MMU and must instead generate an identity mapped
- * page table for all of the memory.
- *
- * Given the low demand this implements a very simple
- * allocator that finds the first hole of the appropriate
- * size in the reserved memory region, and allocates all
- * of the memory up to and including the hole.
- */
- unsigned long hole_start, hole_end, size;
- struct page *pages;
-
- pages = NULL;
- size = (1 << order) << PAGE_SHIFT;
- hole_start = (image->control_page + (size - 1)) & ~(size - 1);
- hole_end = hole_start + size - 1;
- while (hole_end <= crashk_res.end) {
- unsigned long i;
-
- if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
- break;
- /* See if I overlap any of the segments */
- for (i = 0; i < image->nr_segments; i++) {
- unsigned long mstart, mend;
-
- mstart = image->segment[i].mem;
- mend = mstart + image->segment[i].memsz - 1;
- if ((hole_end >= mstart) && (hole_start <= mend)) {
- /* Advance the hole to the end of the segment */
- hole_start = (mend + (size - 1)) & ~(size - 1);
- hole_end = hole_start + size - 1;
- break;
- }
- }
- /* If I don't overlap any segments I have found my hole! */
- if (i == image->nr_segments) {
- pages = pfn_to_page(hole_start >> PAGE_SHIFT);
- break;
- }
- }
- if (pages)
- image->control_page = hole_end;
-
- return pages;
-}
-
-
-struct page *kimage_alloc_control_pages(struct kimage *image,
- unsigned int order)
-{
- struct page *pages = NULL;
-
- switch (image->type) {
- case KEXEC_TYPE_DEFAULT:
- pages = kimage_alloc_normal_control_pages(image, order);
- break;
- case KEXEC_TYPE_CRASH:
- pages = kimage_alloc_crash_control_pages(image, order);
- break;
- }
-
- return pages;
-}
-
-static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
-{
- if (*image->entry != 0)
- image->entry++;
-
- if (image->entry == image->last_entry) {
- kimage_entry_t *ind_page;
- struct page *page;
-
- page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
- if (!page)
- return -ENOMEM;
-
- ind_page = page_address(page);
- *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
- image->entry = ind_page;
- image->last_entry = ind_page +
- ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
- }
- *image->entry = entry;
- image->entry++;
- *image->entry = 0;
-
- return 0;
-}
-
-static int kimage_set_destination(struct kimage *image,
- unsigned long destination)
-{
- int result;
-
- destination &= PAGE_MASK;
- result = kimage_add_entry(image, destination | IND_DESTINATION);
-
- return result;
-}
-
-
-static int kimage_add_page(struct kimage *image, unsigned long page)
-{
- int result;
-
- page &= PAGE_MASK;
- result = kimage_add_entry(image, page | IND_SOURCE);
-
- return result;
-}
-
-
-static void kimage_free_extra_pages(struct kimage *image)
-{
- /* Walk through and free any extra destination pages I may have */
- kimage_free_page_list(&image->dest_pages);
-
- /* Walk through and free any unusable pages I have cached */
- kimage_free_page_list(&image->unusable_pages);
-
-}
-static void kimage_terminate(struct kimage *image)
-{
- if (*image->entry != 0)
- image->entry++;
-
- *image->entry = IND_DONE;
-}
-
-#define for_each_kimage_entry(image, ptr, entry) \
- for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
- ptr = (entry & IND_INDIRECTION) ? \
- phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
-
-static void kimage_free_entry(kimage_entry_t entry)
-{
- struct page *page;
-
- page = pfn_to_page(entry >> PAGE_SHIFT);
- kimage_free_pages(page);
-}
-
-static void kimage_free(struct kimage *image)
-{
- kimage_entry_t *ptr, entry;
- kimage_entry_t ind = 0;
-
- if (!image)
- return;
-
- kimage_free_extra_pages(image);
- for_each_kimage_entry(image, ptr, entry) {
- if (entry & IND_INDIRECTION) {
- /* Free the previous indirection page */
- if (ind & IND_INDIRECTION)
- kimage_free_entry(ind);
- /* Save this indirection page until we are
- * done with it.
- */
- ind = entry;
- } else if (entry & IND_SOURCE)
- kimage_free_entry(entry);
- }
- /* Free the final indirection page */
- if (ind & IND_INDIRECTION)
- kimage_free_entry(ind);
-
- /* Handle any machine specific cleanup */
- machine_kexec_cleanup(image);
-
- /* Free the kexec control pages... */
- kimage_free_page_list(&image->control_pages);
-
- /*
- * Free up any temporary buffers allocated. This might hit if
- * error occurred much later after buffer allocation.
- */
- if (image->file_mode)
- kimage_file_post_load_cleanup(image);
-
- kfree(image);
-}
-
-static kimage_entry_t *kimage_dst_used(struct kimage *image,
- unsigned long page)
-{
- kimage_entry_t *ptr, entry;
- unsigned long destination = 0;
-
- for_each_kimage_entry(image, ptr, entry) {
- if (entry & IND_DESTINATION)
- destination = entry & PAGE_MASK;
- else if (entry & IND_SOURCE) {
- if (page == destination)
- return ptr;
- destination += PAGE_SIZE;
- }
- }
-
- return NULL;
-}
-
-static struct page *kimage_alloc_page(struct kimage *image,
- gfp_t gfp_mask,
- unsigned long destination)
-{
- /*
- * Here we implement safeguards to ensure that a source page
- * is not copied to its destination page before the data on
- * the destination page is no longer useful.
- *
- * To do this we maintain the invariant that a source page is
- * either its own destination page, or it is not a
- * destination page at all.
- *
- * That is slightly stronger than required, but the proof
- * that no problems will not occur is trivial, and the
- * implementation is simply to verify.
- *
- * When allocating all pages normally this algorithm will run
- * in O(N) time, but in the worst case it will run in O(N^2)
- * time. If the runtime is a problem the data structures can
- * be fixed.
- */
- struct page *page;
- unsigned long addr;
-
- /*
- * Walk through the list of destination pages, and see if I
- * have a match.
- */
- list_for_each_entry(page, &image->dest_pages, lru) {
- addr = page_to_pfn(page) << PAGE_SHIFT;
- if (addr == destination) {
- list_del(&page->lru);
- return page;
- }
- }
- page = NULL;
- while (1) {
- kimage_entry_t *old;
-
- /* Allocate a page, if we run out of memory give up */
- page = kimage_alloc_pages(gfp_mask, 0);
- if (!page)
- return NULL;
- /* If the page cannot be used file it away */
- if (page_to_pfn(page) >
- (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
- list_add(&page->lru, &image->unusable_pages);
- continue;
- }
- addr = page_to_pfn(page) << PAGE_SHIFT;
-
- /* If it is the destination page we want use it */
- if (addr == destination)
- break;
-
- /* If the page is not a destination page use it */
- if (!kimage_is_destination_range(image, addr,
- addr + PAGE_SIZE))
- break;
-
- /*
- * I know that the page is someones destination page.
- * See if there is already a source page for this
- * destination page. And if so swap the source pages.
- */
- old = kimage_dst_used(image, addr);
- if (old) {
- /* If so move it */
- unsigned long old_addr;
- struct page *old_page;
-
- old_addr = *old & PAGE_MASK;
- old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
- copy_highpage(page, old_page);
- *old = addr | (*old & ~PAGE_MASK);
-
- /* The old page I have found cannot be a
- * destination page, so return it if it's
- * gfp_flags honor the ones passed in.
- */
- if (!(gfp_mask & __GFP_HIGHMEM) &&
- PageHighMem(old_page)) {
- kimage_free_pages(old_page);
- continue;
- }
- addr = old_addr;
- page = old_page;
- break;
- } else {
- /* Place the page on the destination list I
- * will use it later.
- */
- list_add(&page->lru, &image->dest_pages);
- }
- }
-
- return page;
-}
-
-static int kimage_load_normal_segment(struct kimage *image,
- struct kexec_segment *segment)
-{
- unsigned long maddr;
- size_t ubytes, mbytes;
- int result;
- unsigned char __user *buf = NULL;
- unsigned char *kbuf = NULL;
-
- result = 0;
- if (image->file_mode)
- kbuf = segment->kbuf;
- else
- buf = segment->buf;
- ubytes = segment->bufsz;
- mbytes = segment->memsz;
- maddr = segment->mem;
-
- result = kimage_set_destination(image, maddr);
- if (result < 0)
- goto out;
-
- while (mbytes) {
- struct page *page;
- char *ptr;
- size_t uchunk, mchunk;
-
- page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
- if (!page) {
- result = -ENOMEM;
- goto out;
- }
- result = kimage_add_page(image, page_to_pfn(page)
- << PAGE_SHIFT);
- if (result < 0)
- goto out;
-
- ptr = kmap(page);
- /* Start with a clear page */
- clear_page(ptr);
- ptr += maddr & ~PAGE_MASK;
- mchunk = min_t(size_t, mbytes,
- PAGE_SIZE - (maddr & ~PAGE_MASK));
- uchunk = min(ubytes, mchunk);
-
- /* For file based kexec, source pages are in kernel memory */
- if (image->file_mode)
- memcpy(ptr, kbuf, uchunk);
- else
- result = copy_from_user(ptr, buf, uchunk);
- kunmap(page);
- if (result) {
- result = -EFAULT;
- goto out;
- }
- ubytes -= uchunk;
- maddr += mchunk;
- if (image->file_mode)
- kbuf += mchunk;
- else
- buf += mchunk;
- mbytes -= mchunk;
- }
-out:
- return result;
-}
-
-static int kimage_load_crash_segment(struct kimage *image,
- struct kexec_segment *segment)
-{
- /* For crash dumps kernels we simply copy the data from
- * user space to it's destination.
- * We do things a page at a time for the sake of kmap.
- */
- unsigned long maddr;
- size_t ubytes, mbytes;
- int result;
- unsigned char __user *buf = NULL;
- unsigned char *kbuf = NULL;
-
- result = 0;
- if (image->file_mode)
- kbuf = segment->kbuf;
- else
- buf = segment->buf;
- ubytes = segment->bufsz;
- mbytes = segment->memsz;
- maddr = segment->mem;
- while (mbytes) {
- struct page *page;
- char *ptr;
- size_t uchunk, mchunk;
-
- page = pfn_to_page(maddr >> PAGE_SHIFT);
- if (!page) {
- result = -ENOMEM;
- goto out;
- }
- ptr = kmap(page);
- ptr += maddr & ~PAGE_MASK;
- mchunk = min_t(size_t, mbytes,
- PAGE_SIZE - (maddr & ~PAGE_MASK));
- uchunk = min(ubytes, mchunk);
- if (mchunk > uchunk) {
- /* Zero the trailing part of the page */
- memset(ptr + uchunk, 0, mchunk - uchunk);
- }
-
- /* For file based kexec, source pages are in kernel memory */
- if (image->file_mode)
- memcpy(ptr, kbuf, uchunk);
- else
- result = copy_from_user(ptr, buf, uchunk);
- kexec_flush_icache_page(page);
- kunmap(page);
- if (result) {
- result = -EFAULT;
- goto out;
- }
- ubytes -= uchunk;
- maddr += mchunk;
- if (image->file_mode)
- kbuf += mchunk;
- else
- buf += mchunk;
- mbytes -= mchunk;
- }
-out:
- return result;
-}
-
-static int kimage_load_segment(struct kimage *image,
- struct kexec_segment *segment)
-{
- int result = -ENOMEM;
-
- switch (image->type) {
- case KEXEC_TYPE_DEFAULT:
- result = kimage_load_normal_segment(image, segment);
- break;
- case KEXEC_TYPE_CRASH:
- result = kimage_load_crash_segment(image, segment);
- break;
- }
-
- return result;
-}
-
/*
* Exec Kernel system call: for obvious reasons only root may call it.
*
@@ -1241,11 +121,6 @@
* kexec does not sync, or unmount filesystems so if you need
* that to happen you need to do that yourself.
*/
-struct kimage *kexec_image;
-struct kimage *kexec_crash_image;
-int kexec_load_disabled;
-
-static DEFINE_MUTEX(kexec_mutex);
SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
struct kexec_segment __user *, segments, unsigned long, flags)
@@ -1340,18 +215,6 @@
return result;
}
-/*
- * Add and remove page tables for crashkernel memory
- *
- * Provide an empty default implementation here -- architecture
- * code may override this
- */
-void __weak crash_map_reserved_pages(void)
-{}
-
-void __weak crash_unmap_reserved_pages(void)
-{}
-
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
compat_ulong_t, nr_segments,
@@ -1390,1391 +253,3 @@
return sys_kexec_load(entry, nr_segments, ksegments, flags);
}
#endif
-
-#ifdef CONFIG_KEXEC_FILE
-SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
- unsigned long, cmdline_len, const char __user *, cmdline_ptr,
- unsigned long, flags)
-{
- int ret = 0, i;
- struct kimage **dest_image, *image;
-
- /* We only trust the superuser with rebooting the system. */
- if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
- return -EPERM;
-
- /* Make sure we have a legal set of flags */
- if (flags != (flags & KEXEC_FILE_FLAGS))
- return -EINVAL;
-
- image = NULL;
-
- if (!mutex_trylock(&kexec_mutex))
- return -EBUSY;
-
- dest_image = &kexec_image;
- if (flags & KEXEC_FILE_ON_CRASH)
- dest_image = &kexec_crash_image;
-
- if (flags & KEXEC_FILE_UNLOAD)
- goto exchange;
-
- /*
- * In case of crash, new kernel gets loaded in reserved region. It is
- * same memory where old crash kernel might be loaded. Free any
- * current crash dump kernel before we corrupt it.
- */
- if (flags & KEXEC_FILE_ON_CRASH)
- kimage_free(xchg(&kexec_crash_image, NULL));
-
- ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
- cmdline_len, flags);
- if (ret)
- goto out;
-
- ret = machine_kexec_prepare(image);
- if (ret)
- goto out;
-
- ret = kexec_calculate_store_digests(image);
- if (ret)
- goto out;
-
- for (i = 0; i < image->nr_segments; i++) {
- struct kexec_segment *ksegment;
-
- ksegment = &image->segment[i];
- pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
- i, ksegment->buf, ksegment->bufsz, ksegment->mem,
- ksegment->memsz);
-
- ret = kimage_load_segment(image, &image->segment[i]);
- if (ret)
- goto out;
- }
-
- kimage_terminate(image);
-
- /*
- * Free up any temporary buffers allocated which are not needed
- * after image has been loaded
- */
- kimage_file_post_load_cleanup(image);
-exchange:
- image = xchg(dest_image, image);
-out:
- mutex_unlock(&kexec_mutex);
- kimage_free(image);
- return ret;
-}
-
-#endif /* CONFIG_KEXEC_FILE */
-
-void crash_kexec(struct pt_regs *regs)
-{
- /* Take the kexec_mutex here to prevent sys_kexec_load
- * running on one cpu from replacing the crash kernel
- * we are using after a panic on a different cpu.
- *
- * If the crash kernel was not located in a fixed area
- * of memory the xchg(&kexec_crash_image) would be
- * sufficient. But since I reuse the memory...
- */
- if (mutex_trylock(&kexec_mutex)) {
- if (kexec_crash_image) {
- struct pt_regs fixed_regs;
-
- crash_setup_regs(&fixed_regs, regs);
- crash_save_vmcoreinfo();
- machine_crash_shutdown(&fixed_regs);
- machine_kexec(kexec_crash_image);
- }
- mutex_unlock(&kexec_mutex);
- }
-}
-
-size_t crash_get_memory_size(void)
-{
- size_t size = 0;
- mutex_lock(&kexec_mutex);
- if (crashk_res.end != crashk_res.start)
- size = resource_size(&crashk_res);
- mutex_unlock(&kexec_mutex);
- return size;
-}
-
-void __weak crash_free_reserved_phys_range(unsigned long begin,
- unsigned long end)
-{
- unsigned long addr;
-
- for (addr = begin; addr < end; addr += PAGE_SIZE)
- free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
-}
-
-int crash_shrink_memory(unsigned long new_size)
-{
- int ret = 0;
- unsigned long start, end;
- unsigned long old_size;
- struct resource *ram_res;
-
- mutex_lock(&kexec_mutex);
-
- if (kexec_crash_image) {
- ret = -ENOENT;
- goto unlock;
- }
- start = crashk_res.start;
- end = crashk_res.end;
- old_size = (end == 0) ? 0 : end - start + 1;
- if (new_size >= old_size) {
- ret = (new_size == old_size) ? 0 : -EINVAL;
- goto unlock;
- }
-
- ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
- if (!ram_res) {
- ret = -ENOMEM;
- goto unlock;
- }
-
- start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
- end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
-
- crash_map_reserved_pages();
- crash_free_reserved_phys_range(end, crashk_res.end);
-
- if ((start == end) && (crashk_res.parent != NULL))
- release_resource(&crashk_res);
-
- ram_res->start = end;
- ram_res->end = crashk_res.end;
- ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
- ram_res->name = "System RAM";
-
- crashk_res.end = end - 1;
-
- insert_resource(&iomem_resource, ram_res);
- crash_unmap_reserved_pages();
-
-unlock:
- mutex_unlock(&kexec_mutex);
- return ret;
-}
-
-static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
- size_t data_len)
-{
- struct elf_note note;
-
- note.n_namesz = strlen(name) + 1;
- note.n_descsz = data_len;
- note.n_type = type;
- memcpy(buf, ¬e, sizeof(note));
- buf += (sizeof(note) + 3)/4;
- memcpy(buf, name, note.n_namesz);
- buf += (note.n_namesz + 3)/4;
- memcpy(buf, data, note.n_descsz);
- buf += (note.n_descsz + 3)/4;
-
- return buf;
-}
-
-static void final_note(u32 *buf)
-{
- struct elf_note note;
-
- note.n_namesz = 0;
- note.n_descsz = 0;
- note.n_type = 0;
- memcpy(buf, ¬e, sizeof(note));
-}
-
-void crash_save_cpu(struct pt_regs *regs, int cpu)
-{
- struct elf_prstatus prstatus;
- u32 *buf;
-
- if ((cpu < 0) || (cpu >= nr_cpu_ids))
- return;
-
- /* Using ELF notes here is opportunistic.
- * I need a well defined structure format
- * for the data I pass, and I need tags
- * on the data to indicate what information I have
- * squirrelled away. ELF notes happen to provide
- * all of that, so there is no need to invent something new.
- */
- buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
- if (!buf)
- return;
- memset(&prstatus, 0, sizeof(prstatus));
- prstatus.pr_pid = current->pid;
- elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
- buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
- &prstatus, sizeof(prstatus));
- final_note(buf);
-}
-
-static int __init crash_notes_memory_init(void)
-{
- /* Allocate memory for saving cpu registers. */
- crash_notes = alloc_percpu(note_buf_t);
- if (!crash_notes) {
- pr_warn("Kexec: Memory allocation for saving cpu register states failed\n");
- return -ENOMEM;
- }
- return 0;
-}
-subsys_initcall(crash_notes_memory_init);
-
-
-/*
- * parsing the "crashkernel" commandline
- *
- * this code is intended to be called from architecture specific code
- */
-
-
-/*
- * This function parses command lines in the format
- *
- * crashkernel=ramsize-range:size[,...][@offset]
- *
- * The function returns 0 on success and -EINVAL on failure.
- */
-static int __init parse_crashkernel_mem(char *cmdline,
- unsigned long long system_ram,
- unsigned long long *crash_size,
- unsigned long long *crash_base)
-{
- char *cur = cmdline, *tmp;
-
- /* for each entry of the comma-separated list */
- do {
- unsigned long long start, end = ULLONG_MAX, size;
-
- /* get the start of the range */
- start = memparse(cur, &tmp);
- if (cur == tmp) {
- pr_warn("crashkernel: Memory value expected\n");
- return -EINVAL;
- }
- cur = tmp;
- if (*cur != '-') {
- pr_warn("crashkernel: '-' expected\n");
- return -EINVAL;
- }
- cur++;
-
- /* if no ':' is here, than we read the end */
- if (*cur != ':') {
- end = memparse(cur, &tmp);
- if (cur == tmp) {
- pr_warn("crashkernel: Memory value expected\n");
- return -EINVAL;
- }
- cur = tmp;
- if (end <= start) {
- pr_warn("crashkernel: end <= start\n");
- return -EINVAL;
- }
- }
-
- if (*cur != ':') {
- pr_warn("crashkernel: ':' expected\n");
- return -EINVAL;
- }
- cur++;
-
- size = memparse(cur, &tmp);
- if (cur == tmp) {
- pr_warn("Memory value expected\n");
- return -EINVAL;
- }
- cur = tmp;
- if (size >= system_ram) {
- pr_warn("crashkernel: invalid size\n");
- return -EINVAL;
- }
-
- /* match ? */
- if (system_ram >= start && system_ram < end) {
- *crash_size = size;
- break;
- }
- } while (*cur++ == ',');
-
- if (*crash_size > 0) {
- while (*cur && *cur != ' ' && *cur != '@')
- cur++;
- if (*cur == '@') {
- cur++;
- *crash_base = memparse(cur, &tmp);
- if (cur == tmp) {
- pr_warn("Memory value expected after '@'\n");
- return -EINVAL;
- }
- }
- }
-
- return 0;
-}
-
-/*
- * That function parses "simple" (old) crashkernel command lines like
- *
- * crashkernel=size[@offset]
- *
- * It returns 0 on success and -EINVAL on failure.
- */
-static int __init parse_crashkernel_simple(char *cmdline,
- unsigned long long *crash_size,
- unsigned long long *crash_base)
-{
- char *cur = cmdline;
-
- *crash_size = memparse(cmdline, &cur);
- if (cmdline == cur) {
- pr_warn("crashkernel: memory value expected\n");
- return -EINVAL;
- }
-
- if (*cur == '@')
- *crash_base = memparse(cur+1, &cur);
- else if (*cur != ' ' && *cur != '\0') {
- pr_warn("crashkernel: unrecognized char\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-#define SUFFIX_HIGH 0
-#define SUFFIX_LOW 1
-#define SUFFIX_NULL 2
-static __initdata char *suffix_tbl[] = {
- [SUFFIX_HIGH] = ",high",
- [SUFFIX_LOW] = ",low",
- [SUFFIX_NULL] = NULL,
-};
-
-/*
- * That function parses "suffix" crashkernel command lines like
- *
- * crashkernel=size,[high|low]
- *
- * It returns 0 on success and -EINVAL on failure.
- */
-static int __init parse_crashkernel_suffix(char *cmdline,
- unsigned long long *crash_size,
- const char *suffix)
-{
- char *cur = cmdline;
-
- *crash_size = memparse(cmdline, &cur);
- if (cmdline == cur) {
- pr_warn("crashkernel: memory value expected\n");
- return -EINVAL;
- }
-
- /* check with suffix */
- if (strncmp(cur, suffix, strlen(suffix))) {
- pr_warn("crashkernel: unrecognized char\n");
- return -EINVAL;
- }
- cur += strlen(suffix);
- if (*cur != ' ' && *cur != '\0') {
- pr_warn("crashkernel: unrecognized char\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static __init char *get_last_crashkernel(char *cmdline,
- const char *name,
- const char *suffix)
-{
- char *p = cmdline, *ck_cmdline = NULL;
-
- /* find crashkernel and use the last one if there are more */
- p = strstr(p, name);
- while (p) {
- char *end_p = strchr(p, ' ');
- char *q;
-
- if (!end_p)
- end_p = p + strlen(p);
-
- if (!suffix) {
- int i;
-
- /* skip the one with any known suffix */
- for (i = 0; suffix_tbl[i]; i++) {
- q = end_p - strlen(suffix_tbl[i]);
- if (!strncmp(q, suffix_tbl[i],
- strlen(suffix_tbl[i])))
- goto next;
- }
- ck_cmdline = p;
- } else {
- q = end_p - strlen(suffix);
- if (!strncmp(q, suffix, strlen(suffix)))
- ck_cmdline = p;
- }
-next:
- p = strstr(p+1, name);
- }
-
- if (!ck_cmdline)
- return NULL;
-
- return ck_cmdline;
-}
-
-static int __init __parse_crashkernel(char *cmdline,
- unsigned long long system_ram,
- unsigned long long *crash_size,
- unsigned long long *crash_base,
- const char *name,
- const char *suffix)
-{
- char *first_colon, *first_space;
- char *ck_cmdline;
-
- BUG_ON(!crash_size || !crash_base);
- *crash_size = 0;
- *crash_base = 0;
-
- ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
-
- if (!ck_cmdline)
- return -EINVAL;
-
- ck_cmdline += strlen(name);
-
- if (suffix)
- return parse_crashkernel_suffix(ck_cmdline, crash_size,
- suffix);
- /*
- * if the commandline contains a ':', then that's the extended
- * syntax -- if not, it must be the classic syntax
- */
- first_colon = strchr(ck_cmdline, ':');
- first_space = strchr(ck_cmdline, ' ');
- if (first_colon && (!first_space || first_colon < first_space))
- return parse_crashkernel_mem(ck_cmdline, system_ram,
- crash_size, crash_base);
-
- return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
-}
-
-/*
- * That function is the entry point for command line parsing and should be
- * called from the arch-specific code.
- */
-int __init parse_crashkernel(char *cmdline,
- unsigned long long system_ram,
- unsigned long long *crash_size,
- unsigned long long *crash_base)
-{
- return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
- "crashkernel=", NULL);
-}
-
-int __init parse_crashkernel_high(char *cmdline,
- unsigned long long system_ram,
- unsigned long long *crash_size,
- unsigned long long *crash_base)
-{
- return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
- "crashkernel=", suffix_tbl[SUFFIX_HIGH]);
-}
-
-int __init parse_crashkernel_low(char *cmdline,
- unsigned long long system_ram,
- unsigned long long *crash_size,
- unsigned long long *crash_base)
-{
- return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
- "crashkernel=", suffix_tbl[SUFFIX_LOW]);
-}
-
-static void update_vmcoreinfo_note(void)
-{
- u32 *buf = vmcoreinfo_note;
-
- if (!vmcoreinfo_size)
- return;
- buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
- vmcoreinfo_size);
- final_note(buf);
-}
-
-void crash_save_vmcoreinfo(void)
-{
- vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
- update_vmcoreinfo_note();
-}
-
-void vmcoreinfo_append_str(const char *fmt, ...)
-{
- va_list args;
- char buf[0x50];
- size_t r;
-
- va_start(args, fmt);
- r = vscnprintf(buf, sizeof(buf), fmt, args);
- va_end(args);
-
- r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
-
- memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
-
- vmcoreinfo_size += r;
-}
-
-/*
- * provide an empty default implementation here -- architecture
- * code may override this
- */
-void __weak arch_crash_save_vmcoreinfo(void)
-{}
-
-unsigned long __weak paddr_vmcoreinfo_note(void)
-{
- return __pa((unsigned long)(char *)&vmcoreinfo_note);
-}
-
-static int __init crash_save_vmcoreinfo_init(void)
-{
- VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
- VMCOREINFO_PAGESIZE(PAGE_SIZE);
-
- VMCOREINFO_SYMBOL(init_uts_ns);
- VMCOREINFO_SYMBOL(node_online_map);
-#ifdef CONFIG_MMU
- VMCOREINFO_SYMBOL(swapper_pg_dir);
-#endif
- VMCOREINFO_SYMBOL(_stext);
- VMCOREINFO_SYMBOL(vmap_area_list);
-
-#ifndef CONFIG_NEED_MULTIPLE_NODES
- VMCOREINFO_SYMBOL(mem_map);
- VMCOREINFO_SYMBOL(contig_page_data);
-#endif
-#ifdef CONFIG_SPARSEMEM
- VMCOREINFO_SYMBOL(mem_section);
- VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
- VMCOREINFO_STRUCT_SIZE(mem_section);
- VMCOREINFO_OFFSET(mem_section, section_mem_map);
-#endif
- VMCOREINFO_STRUCT_SIZE(page);
- VMCOREINFO_STRUCT_SIZE(pglist_data);
- VMCOREINFO_STRUCT_SIZE(zone);
- VMCOREINFO_STRUCT_SIZE(free_area);
- VMCOREINFO_STRUCT_SIZE(list_head);
- VMCOREINFO_SIZE(nodemask_t);
- VMCOREINFO_OFFSET(page, flags);
- VMCOREINFO_OFFSET(page, _count);
- VMCOREINFO_OFFSET(page, mapping);
- VMCOREINFO_OFFSET(page, lru);
- VMCOREINFO_OFFSET(page, _mapcount);
- VMCOREINFO_OFFSET(page, private);
- VMCOREINFO_OFFSET(pglist_data, node_zones);
- VMCOREINFO_OFFSET(pglist_data, nr_zones);
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
- VMCOREINFO_OFFSET(pglist_data, node_mem_map);
-#endif
- VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
- VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
- VMCOREINFO_OFFSET(pglist_data, node_id);
- VMCOREINFO_OFFSET(zone, free_area);
- VMCOREINFO_OFFSET(zone, vm_stat);
- VMCOREINFO_OFFSET(zone, spanned_pages);
- VMCOREINFO_OFFSET(free_area, free_list);
- VMCOREINFO_OFFSET(list_head, next);
- VMCOREINFO_OFFSET(list_head, prev);
- VMCOREINFO_OFFSET(vmap_area, va_start);
- VMCOREINFO_OFFSET(vmap_area, list);
- VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
- log_buf_kexec_setup();
- VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
- VMCOREINFO_NUMBER(NR_FREE_PAGES);
- VMCOREINFO_NUMBER(PG_lru);
- VMCOREINFO_NUMBER(PG_private);
- VMCOREINFO_NUMBER(PG_swapcache);
- VMCOREINFO_NUMBER(PG_slab);
-#ifdef CONFIG_MEMORY_FAILURE
- VMCOREINFO_NUMBER(PG_hwpoison);
-#endif
- VMCOREINFO_NUMBER(PG_head_mask);
- VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
-#ifdef CONFIG_HUGETLBFS
- VMCOREINFO_SYMBOL(free_huge_page);
-#endif
-
- arch_crash_save_vmcoreinfo();
- update_vmcoreinfo_note();
-
- return 0;
-}
-
-subsys_initcall(crash_save_vmcoreinfo_init);
-
-#ifdef CONFIG_KEXEC_FILE
-static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
- struct kexec_buf *kbuf)
-{
- struct kimage *image = kbuf->image;
- unsigned long temp_start, temp_end;
-
- temp_end = min(end, kbuf->buf_max);
- temp_start = temp_end - kbuf->memsz;
-
- do {
- /* align down start */
- temp_start = temp_start & (~(kbuf->buf_align - 1));
-
- if (temp_start < start || temp_start < kbuf->buf_min)
- return 0;
-
- temp_end = temp_start + kbuf->memsz - 1;
-
- /*
- * Make sure this does not conflict with any of existing
- * segments
- */
- if (kimage_is_destination_range(image, temp_start, temp_end)) {
- temp_start = temp_start - PAGE_SIZE;
- continue;
- }
-
- /* We found a suitable memory range */
- break;
- } while (1);
-
- /* If we are here, we found a suitable memory range */
- kbuf->mem = temp_start;
-
- /* Success, stop navigating through remaining System RAM ranges */
- return 1;
-}
-
-static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
- struct kexec_buf *kbuf)
-{
- struct kimage *image = kbuf->image;
- unsigned long temp_start, temp_end;
-
- temp_start = max(start, kbuf->buf_min);
-
- do {
- temp_start = ALIGN(temp_start, kbuf->buf_align);
- temp_end = temp_start + kbuf->memsz - 1;
-
- if (temp_end > end || temp_end > kbuf->buf_max)
- return 0;
- /*
- * Make sure this does not conflict with any of existing
- * segments
- */
- if (kimage_is_destination_range(image, temp_start, temp_end)) {
- temp_start = temp_start + PAGE_SIZE;
- continue;
- }
-
- /* We found a suitable memory range */
- break;
- } while (1);
-
- /* If we are here, we found a suitable memory range */
- kbuf->mem = temp_start;
-
- /* Success, stop navigating through remaining System RAM ranges */
- return 1;
-}
-
-static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
-{
- struct kexec_buf *kbuf = (struct kexec_buf *)arg;
- unsigned long sz = end - start + 1;
-
- /* Returning 0 will take to next memory range */
- if (sz < kbuf->memsz)
- return 0;
-
- if (end < kbuf->buf_min || start > kbuf->buf_max)
- return 0;
-
- /*
- * Allocate memory top down with-in ram range. Otherwise bottom up
- * allocation.
- */
- if (kbuf->top_down)
- return locate_mem_hole_top_down(start, end, kbuf);
- return locate_mem_hole_bottom_up(start, end, kbuf);
-}
-
-/*
- * Helper function for placing a buffer in a kexec segment. This assumes
- * that kexec_mutex is held.
- */
-int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
- unsigned long memsz, unsigned long buf_align,
- unsigned long buf_min, unsigned long buf_max,
- bool top_down, unsigned long *load_addr)
-{
-
- struct kexec_segment *ksegment;
- struct kexec_buf buf, *kbuf;
- int ret;
-
- /* Currently adding segment this way is allowed only in file mode */
- if (!image->file_mode)
- return -EINVAL;
-
- if (image->nr_segments >= KEXEC_SEGMENT_MAX)
- return -EINVAL;
-
- /*
- * Make sure we are not trying to add buffer after allocating
- * control pages. All segments need to be placed first before
- * any control pages are allocated. As control page allocation
- * logic goes through list of segments to make sure there are
- * no destination overlaps.
- */
- if (!list_empty(&image->control_pages)) {
- WARN_ON(1);
- return -EINVAL;
- }
-
- memset(&buf, 0, sizeof(struct kexec_buf));
- kbuf = &buf;
- kbuf->image = image;
- kbuf->buffer = buffer;
- kbuf->bufsz = bufsz;
-
- kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
- kbuf->buf_align = max(buf_align, PAGE_SIZE);
- kbuf->buf_min = buf_min;
- kbuf->buf_max = buf_max;
- kbuf->top_down = top_down;
-
- /* Walk the RAM ranges and allocate a suitable range for the buffer */
- if (image->type == KEXEC_TYPE_CRASH)
- ret = walk_iomem_res("Crash kernel",
- IORESOURCE_MEM | IORESOURCE_BUSY,
- crashk_res.start, crashk_res.end, kbuf,
- locate_mem_hole_callback);
- else
- ret = walk_system_ram_res(0, -1, kbuf,
- locate_mem_hole_callback);
- if (ret != 1) {
- /* A suitable memory range could not be found for buffer */
- return -EADDRNOTAVAIL;
- }
-
- /* Found a suitable memory range */
- ksegment = &image->segment[image->nr_segments];
- ksegment->kbuf = kbuf->buffer;
- ksegment->bufsz = kbuf->bufsz;
- ksegment->mem = kbuf->mem;
- ksegment->memsz = kbuf->memsz;
- image->nr_segments++;
- *load_addr = ksegment->mem;
- return 0;
-}
-
-/* Calculate and store the digest of segments */
-static int kexec_calculate_store_digests(struct kimage *image)
-{
- struct crypto_shash *tfm;
- struct shash_desc *desc;
- int ret = 0, i, j, zero_buf_sz, sha_region_sz;
- size_t desc_size, nullsz;
- char *digest;
- void *zero_buf;
- struct kexec_sha_region *sha_regions;
- struct purgatory_info *pi = &image->purgatory_info;
-
- zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
- zero_buf_sz = PAGE_SIZE;
-
- tfm = crypto_alloc_shash("sha256", 0, 0);
- if (IS_ERR(tfm)) {
- ret = PTR_ERR(tfm);
- goto out;
- }
-
- desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
- desc = kzalloc(desc_size, GFP_KERNEL);
- if (!desc) {
- ret = -ENOMEM;
- goto out_free_tfm;
- }
-
- sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
- sha_regions = vzalloc(sha_region_sz);
- if (!sha_regions)
- goto out_free_desc;
-
- desc->tfm = tfm;
- desc->flags = 0;
-
- ret = crypto_shash_init(desc);
- if (ret < 0)
- goto out_free_sha_regions;
-
- digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
- if (!digest) {
- ret = -ENOMEM;
- goto out_free_sha_regions;
- }
-
- for (j = i = 0; i < image->nr_segments; i++) {
- struct kexec_segment *ksegment;
-
- ksegment = &image->segment[i];
- /*
- * Skip purgatory as it will be modified once we put digest
- * info in purgatory.
- */
- if (ksegment->kbuf == pi->purgatory_buf)
- continue;
-
- ret = crypto_shash_update(desc, ksegment->kbuf,
- ksegment->bufsz);
- if (ret)
- break;
-
- /*
- * Assume rest of the buffer is filled with zero and
- * update digest accordingly.
- */
- nullsz = ksegment->memsz - ksegment->bufsz;
- while (nullsz) {
- unsigned long bytes = nullsz;
-
- if (bytes > zero_buf_sz)
- bytes = zero_buf_sz;
- ret = crypto_shash_update(desc, zero_buf, bytes);
- if (ret)
- break;
- nullsz -= bytes;
- }
-
- if (ret)
- break;
-
- sha_regions[j].start = ksegment->mem;
- sha_regions[j].len = ksegment->memsz;
- j++;
- }
-
- if (!ret) {
- ret = crypto_shash_final(desc, digest);
- if (ret)
- goto out_free_digest;
- ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
- sha_regions, sha_region_sz, 0);
- if (ret)
- goto out_free_digest;
-
- ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
- digest, SHA256_DIGEST_SIZE, 0);
- if (ret)
- goto out_free_digest;
- }
-
-out_free_digest:
- kfree(digest);
-out_free_sha_regions:
- vfree(sha_regions);
-out_free_desc:
- kfree(desc);
-out_free_tfm:
- kfree(tfm);
-out:
- return ret;
-}
-
-/* Actually load purgatory. Lot of code taken from kexec-tools */
-static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
- unsigned long max, int top_down)
-{
- struct purgatory_info *pi = &image->purgatory_info;
- unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
- unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
- unsigned char *buf_addr, *src;
- int i, ret = 0, entry_sidx = -1;
- const Elf_Shdr *sechdrs_c;
- Elf_Shdr *sechdrs = NULL;
- void *purgatory_buf = NULL;
-
- /*
- * sechdrs_c points to section headers in purgatory and are read
- * only. No modifications allowed.
- */
- sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
-
- /*
- * We can not modify sechdrs_c[] and its fields. It is read only.
- * Copy it over to a local copy where one can store some temporary
- * data and free it at the end. We need to modify ->sh_addr and
- * ->sh_offset fields to keep track of permanent and temporary
- * locations of sections.
- */
- sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
- if (!sechdrs)
- return -ENOMEM;
-
- memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
-
- /*
- * We seem to have multiple copies of sections. First copy is which
- * is embedded in kernel in read only section. Some of these sections
- * will be copied to a temporary buffer and relocated. And these
- * sections will finally be copied to their final destination at
- * segment load time.
- *
- * Use ->sh_offset to reflect section address in memory. It will
- * point to original read only copy if section is not allocatable.
- * Otherwise it will point to temporary copy which will be relocated.
- *
- * Use ->sh_addr to contain final address of the section where it
- * will go during execution time.
- */
- for (i = 0; i < pi->ehdr->e_shnum; i++) {
- if (sechdrs[i].sh_type == SHT_NOBITS)
- continue;
-
- sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
- sechdrs[i].sh_offset;
- }
-
- /*
- * Identify entry point section and make entry relative to section
- * start.
- */
- entry = pi->ehdr->e_entry;
- for (i = 0; i < pi->ehdr->e_shnum; i++) {
- if (!(sechdrs[i].sh_flags & SHF_ALLOC))
- continue;
-
- if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
- continue;
-
- /* Make entry section relative */
- if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
- ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
- pi->ehdr->e_entry)) {
- entry_sidx = i;
- entry -= sechdrs[i].sh_addr;
- break;
- }
- }
-
- /* Determine how much memory is needed to load relocatable object. */
- buf_align = 1;
- bss_align = 1;
- buf_sz = 0;
- bss_sz = 0;
-
- for (i = 0; i < pi->ehdr->e_shnum; i++) {
- if (!(sechdrs[i].sh_flags & SHF_ALLOC))
- continue;
-
- align = sechdrs[i].sh_addralign;
- if (sechdrs[i].sh_type != SHT_NOBITS) {
- if (buf_align < align)
- buf_align = align;
- buf_sz = ALIGN(buf_sz, align);
- buf_sz += sechdrs[i].sh_size;
- } else {
- /* bss section */
- if (bss_align < align)
- bss_align = align;
- bss_sz = ALIGN(bss_sz, align);
- bss_sz += sechdrs[i].sh_size;
- }
- }
-
- /* Determine the bss padding required to align bss properly */
- bss_pad = 0;
- if (buf_sz & (bss_align - 1))
- bss_pad = bss_align - (buf_sz & (bss_align - 1));
-
- memsz = buf_sz + bss_pad + bss_sz;
-
- /* Allocate buffer for purgatory */
- purgatory_buf = vzalloc(buf_sz);
- if (!purgatory_buf) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (buf_align < bss_align)
- buf_align = bss_align;
-
- /* Add buffer to segment list */
- ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
- buf_align, min, max, top_down,
- &pi->purgatory_load_addr);
- if (ret)
- goto out;
-
- /* Load SHF_ALLOC sections */
- buf_addr = purgatory_buf;
- load_addr = curr_load_addr = pi->purgatory_load_addr;
- bss_addr = load_addr + buf_sz + bss_pad;
-
- for (i = 0; i < pi->ehdr->e_shnum; i++) {
- if (!(sechdrs[i].sh_flags & SHF_ALLOC))
- continue;
-
- align = sechdrs[i].sh_addralign;
- if (sechdrs[i].sh_type != SHT_NOBITS) {
- curr_load_addr = ALIGN(curr_load_addr, align);
- offset = curr_load_addr - load_addr;
- /* We already modifed ->sh_offset to keep src addr */
- src = (char *) sechdrs[i].sh_offset;
- memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
-
- /* Store load address and source address of section */
- sechdrs[i].sh_addr = curr_load_addr;
-
- /*
- * This section got copied to temporary buffer. Update
- * ->sh_offset accordingly.
- */
- sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
-
- /* Advance to the next address */
- curr_load_addr += sechdrs[i].sh_size;
- } else {
- bss_addr = ALIGN(bss_addr, align);
- sechdrs[i].sh_addr = bss_addr;
- bss_addr += sechdrs[i].sh_size;
- }
- }
-
- /* Update entry point based on load address of text section */
- if (entry_sidx >= 0)
- entry += sechdrs[entry_sidx].sh_addr;
-
- /* Make kernel jump to purgatory after shutdown */
- image->start = entry;
-
- /* Used later to get/set symbol values */
- pi->sechdrs = sechdrs;
-
- /*
- * Used later to identify which section is purgatory and skip it
- * from checksumming.
- */
- pi->purgatory_buf = purgatory_buf;
- return ret;
-out:
- vfree(sechdrs);
- vfree(purgatory_buf);
- return ret;
-}
-
-static int kexec_apply_relocations(struct kimage *image)
-{
- int i, ret;
- struct purgatory_info *pi = &image->purgatory_info;
- Elf_Shdr *sechdrs = pi->sechdrs;
-
- /* Apply relocations */
- for (i = 0; i < pi->ehdr->e_shnum; i++) {
- Elf_Shdr *section, *symtab;
-
- if (sechdrs[i].sh_type != SHT_RELA &&
- sechdrs[i].sh_type != SHT_REL)
- continue;
-
- /*
- * For section of type SHT_RELA/SHT_REL,
- * ->sh_link contains section header index of associated
- * symbol table. And ->sh_info contains section header
- * index of section to which relocations apply.
- */
- if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
- sechdrs[i].sh_link >= pi->ehdr->e_shnum)
- return -ENOEXEC;
-
- section = &sechdrs[sechdrs[i].sh_info];
- symtab = &sechdrs[sechdrs[i].sh_link];
-
- if (!(section->sh_flags & SHF_ALLOC))
- continue;
-
- /*
- * symtab->sh_link contain section header index of associated
- * string table.
- */
- if (symtab->sh_link >= pi->ehdr->e_shnum)
- /* Invalid section number? */
- continue;
-
- /*
- * Respective architecture needs to provide support for applying
- * relocations of type SHT_RELA/SHT_REL.
- */
- if (sechdrs[i].sh_type == SHT_RELA)
- ret = arch_kexec_apply_relocations_add(pi->ehdr,
- sechdrs, i);
- else if (sechdrs[i].sh_type == SHT_REL)
- ret = arch_kexec_apply_relocations(pi->ehdr,
- sechdrs, i);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-/* Load relocatable purgatory object and relocate it appropriately */
-int kexec_load_purgatory(struct kimage *image, unsigned long min,
- unsigned long max, int top_down,
- unsigned long *load_addr)
-{
- struct purgatory_info *pi = &image->purgatory_info;
- int ret;
-
- if (kexec_purgatory_size <= 0)
- return -EINVAL;
-
- if (kexec_purgatory_size < sizeof(Elf_Ehdr))
- return -ENOEXEC;
-
- pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
-
- if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
- || pi->ehdr->e_type != ET_REL
- || !elf_check_arch(pi->ehdr)
- || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
- return -ENOEXEC;
-
- if (pi->ehdr->e_shoff >= kexec_purgatory_size
- || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
- kexec_purgatory_size - pi->ehdr->e_shoff))
- return -ENOEXEC;
-
- ret = __kexec_load_purgatory(image, min, max, top_down);
- if (ret)
- return ret;
-
- ret = kexec_apply_relocations(image);
- if (ret)
- goto out;
-
- *load_addr = pi->purgatory_load_addr;
- return 0;
-out:
- vfree(pi->sechdrs);
- vfree(pi->purgatory_buf);
- return ret;
-}
-
-static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
- const char *name)
-{
- Elf_Sym *syms;
- Elf_Shdr *sechdrs;
- Elf_Ehdr *ehdr;
- int i, k;
- const char *strtab;
-
- if (!pi->sechdrs || !pi->ehdr)
- return NULL;
-
- sechdrs = pi->sechdrs;
- ehdr = pi->ehdr;
-
- for (i = 0; i < ehdr->e_shnum; i++) {
- if (sechdrs[i].sh_type != SHT_SYMTAB)
- continue;
-
- if (sechdrs[i].sh_link >= ehdr->e_shnum)
- /* Invalid strtab section number */
- continue;
- strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
- syms = (Elf_Sym *)sechdrs[i].sh_offset;
-
- /* Go through symbols for a match */
- for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
- if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
- continue;
-
- if (strcmp(strtab + syms[k].st_name, name) != 0)
- continue;
-
- if (syms[k].st_shndx == SHN_UNDEF ||
- syms[k].st_shndx >= ehdr->e_shnum) {
- pr_debug("Symbol: %s has bad section index %d.\n",
- name, syms[k].st_shndx);
- return NULL;
- }
-
- /* Found the symbol we are looking for */
- return &syms[k];
- }
- }
-
- return NULL;
-}
-
-void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
-{
- struct purgatory_info *pi = &image->purgatory_info;
- Elf_Sym *sym;
- Elf_Shdr *sechdr;
-
- sym = kexec_purgatory_find_symbol(pi, name);
- if (!sym)
- return ERR_PTR(-EINVAL);
-
- sechdr = &pi->sechdrs[sym->st_shndx];
-
- /*
- * Returns the address where symbol will finally be loaded after
- * kexec_load_segment()
- */
- return (void *)(sechdr->sh_addr + sym->st_value);
-}
-
-/*
- * Get or set value of a symbol. If "get_value" is true, symbol value is
- * returned in buf otherwise symbol value is set based on value in buf.
- */
-int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
- void *buf, unsigned int size, bool get_value)
-{
- Elf_Sym *sym;
- Elf_Shdr *sechdrs;
- struct purgatory_info *pi = &image->purgatory_info;
- char *sym_buf;
-
- sym = kexec_purgatory_find_symbol(pi, name);
- if (!sym)
- return -EINVAL;
-
- if (sym->st_size != size) {
- pr_err("symbol %s size mismatch: expected %lu actual %u\n",
- name, (unsigned long)sym->st_size, size);
- return -EINVAL;
- }
-
- sechdrs = pi->sechdrs;
-
- if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
- pr_err("symbol %s is in a bss section. Cannot %s\n", name,
- get_value ? "get" : "set");
- return -EINVAL;
- }
-
- sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
- sym->st_value;
-
- if (get_value)
- memcpy((void *)buf, sym_buf, size);
- else
- memcpy((void *)sym_buf, buf, size);
-
- return 0;
-}
-#endif /* CONFIG_KEXEC_FILE */
-
-/*
- * Move into place and start executing a preloaded standalone
- * executable. If nothing was preloaded return an error.
- */
-int kernel_kexec(void)
-{
- int error = 0;
-
- if (!mutex_trylock(&kexec_mutex))
- return -EBUSY;
- if (!kexec_image) {
- error = -EINVAL;
- goto Unlock;
- }
-
-#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context) {
- lock_system_sleep();
- pm_prepare_console();
- error = freeze_processes();
- if (error) {
- error = -EBUSY;
- goto Restore_console;
- }
- suspend_console();
- error = dpm_suspend_start(PMSG_FREEZE);
- if (error)
- goto Resume_console;
- /* At this point, dpm_suspend_start() has been called,
- * but *not* dpm_suspend_end(). We *must* call
- * dpm_suspend_end() now. Otherwise, drivers for
- * some devices (e.g. interrupt controllers) become
- * desynchronized with the actual state of the
- * hardware at resume time, and evil weirdness ensues.
- */
- error = dpm_suspend_end(PMSG_FREEZE);
- if (error)
- goto Resume_devices;
- error = disable_nonboot_cpus();
- if (error)
- goto Enable_cpus;
- local_irq_disable();
- error = syscore_suspend();
- if (error)
- goto Enable_irqs;
- } else
-#endif
- {
- kexec_in_progress = true;
- kernel_restart_prepare(NULL);
- migrate_to_reboot_cpu();
-
- /*
- * migrate_to_reboot_cpu() disables CPU hotplug assuming that
- * no further code needs to use CPU hotplug (which is true in
- * the reboot case). However, the kexec path depends on using
- * CPU hotplug again; so re-enable it here.
- */
- cpu_hotplug_enable();
- pr_emerg("Starting new kernel\n");
- machine_shutdown();
- }
-
- machine_kexec(kexec_image);
-
-#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context) {
- syscore_resume();
- Enable_irqs:
- local_irq_enable();
- Enable_cpus:
- enable_nonboot_cpus();
- dpm_resume_start(PMSG_RESTORE);
- Resume_devices:
- dpm_resume_end(PMSG_RESTORE);
- Resume_console:
- resume_console();
- thaw_processes();
- Restore_console:
- pm_restore_console();
- unlock_system_sleep();
- }
-#endif
-
- Unlock:
- mutex_unlock(&kexec_mutex);
- return error;
-}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
new file mode 100644
index 0000000..201b453
--- /dev/null
+++ b/kernel/kexec_core.c
@@ -0,0 +1,1534 @@
+/*
+ * kexec.c - kexec system call core code.
+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#define pr_fmt(fmt) "kexec: " fmt
+
+#include <linux/capability.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/kexec.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <linux/syscalls.h>
+#include <linux/reboot.h>
+#include <linux/ioport.h>
+#include <linux/hardirq.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/utsname.h>
+#include <linux/numa.h>
+#include <linux/suspend.h>
+#include <linux/device.h>
+#include <linux/freezer.h>
+#include <linux/pm.h>
+#include <linux/cpu.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/console.h>
+#include <linux/vmalloc.h>
+#include <linux/swap.h>
+#include <linux/syscore_ops.h>
+#include <linux/compiler.h>
+#include <linux/hugetlb.h>
+
+#include <asm/page.h>
+#include <asm/sections.h>
+
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include "kexec_internal.h"
+
+DEFINE_MUTEX(kexec_mutex);
+
+/* Per cpu memory for storing cpu states in case of system crash. */
+note_buf_t __percpu *crash_notes;
+
+/* vmcoreinfo stuff */
+static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
+u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+size_t vmcoreinfo_size;
+size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+
+/* Flag to indicate we are going to kexec a new kernel */
+bool kexec_in_progress = false;
+
+
+/* Location of the reserved area for the crash kernel */
+struct resource crashk_res = {
+ .name = "Crash kernel",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+struct resource crashk_low_res = {
+ .name = "Crash kernel",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+int kexec_should_crash(struct task_struct *p)
+{
+ /*
+ * If crash_kexec_post_notifiers is enabled, don't run
+ * crash_kexec() here yet, which must be run after panic
+ * notifiers in panic().
+ */
+ if (crash_kexec_post_notifiers)
+ return 0;
+ /*
+ * There are 4 panic() calls in do_exit() path, each of which
+ * corresponds to each of these 4 conditions.
+ */
+ if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
+ return 1;
+ return 0;
+}
+
+/*
+ * When kexec transitions to the new kernel there is a one-to-one
+ * mapping between physical and virtual addresses. On processors
+ * where you can disable the MMU this is trivial, and easy. For
+ * others it is still a simple predictable page table to setup.
+ *
+ * In that environment kexec copies the new kernel to its final
+ * resting place. This means I can only support memory whose
+ * physical address can fit in an unsigned long. In particular
+ * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
+ * If the assembly stub has more restrictive requirements
+ * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
+ * defined more restrictively in <asm/kexec.h>.
+ *
+ * The code for the transition from the current kernel to the
+ * the new kernel is placed in the control_code_buffer, whose size
+ * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
+ * page of memory is necessary, but some architectures require more.
+ * Because this memory must be identity mapped in the transition from
+ * virtual to physical addresses it must live in the range
+ * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
+ * modifiable.
+ *
+ * The assembly stub in the control code buffer is passed a linked list
+ * of descriptor pages detailing the source pages of the new kernel,
+ * and the destination addresses of those source pages. As this data
+ * structure is not used in the context of the current OS, it must
+ * be self-contained.
+ *
+ * The code has been made to work with highmem pages and will use a
+ * destination page in its final resting place (if it happens
+ * to allocate it). The end product of this is that most of the
+ * physical address space, and most of RAM can be used.
+ *
+ * Future directions include:
+ * - allocating a page table with the control code buffer identity
+ * mapped, to simplify machine_kexec and make kexec_on_panic more
+ * reliable.
+ */
+
+/*
+ * KIMAGE_NO_DEST is an impossible destination address..., for
+ * allocating pages whose destination address we do not care about.
+ */
+#define KIMAGE_NO_DEST (-1UL)
+
+static struct page *kimage_alloc_page(struct kimage *image,
+ gfp_t gfp_mask,
+ unsigned long dest);
+
+int sanity_check_segment_list(struct kimage *image)
+{
+ int result, i;
+ unsigned long nr_segments = image->nr_segments;
+
+ /*
+ * Verify we have good destination addresses. The caller is
+ * responsible for making certain we don't attempt to load
+ * the new image into invalid or reserved areas of RAM. This
+ * just verifies it is an address we can use.
+ *
+ * Since the kernel does everything in page size chunks ensure
+ * the destination addresses are page aligned. Too many
+ * special cases crop of when we don't do this. The most
+ * insidious is getting overlapping destination addresses
+ * simply because addresses are changed to page size
+ * granularity.
+ */
+ result = -EADDRNOTAVAIL;
+ for (i = 0; i < nr_segments; i++) {
+ unsigned long mstart, mend;
+
+ mstart = image->segment[i].mem;
+ mend = mstart + image->segment[i].memsz;
+ if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
+ return result;
+ if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
+ return result;
+ }
+
+ /* Verify our destination addresses do not overlap.
+ * If we alloed overlapping destination addresses
+ * through very weird things can happen with no
+ * easy explanation as one segment stops on another.
+ */
+ result = -EINVAL;
+ for (i = 0; i < nr_segments; i++) {
+ unsigned long mstart, mend;
+ unsigned long j;
+
+ mstart = image->segment[i].mem;
+ mend = mstart + image->segment[i].memsz;
+ for (j = 0; j < i; j++) {
+ unsigned long pstart, pend;
+
+ pstart = image->segment[j].mem;
+ pend = pstart + image->segment[j].memsz;
+ /* Do the segments overlap ? */
+ if ((mend > pstart) && (mstart < pend))
+ return result;
+ }
+ }
+
+ /* Ensure our buffer sizes are strictly less than
+ * our memory sizes. This should always be the case,
+ * and it is easier to check up front than to be surprised
+ * later on.
+ */
+ result = -EINVAL;
+ for (i = 0; i < nr_segments; i++) {
+ if (image->segment[i].bufsz > image->segment[i].memsz)
+ return result;
+ }
+
+ /*
+ * Verify we have good destination addresses. Normally
+ * the caller is responsible for making certain we don't
+ * attempt to load the new image into invalid or reserved
+ * areas of RAM. But crash kernels are preloaded into a
+ * reserved area of ram. We must ensure the addresses
+ * are in the reserved area otherwise preloading the
+ * kernel could corrupt things.
+ */
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ result = -EADDRNOTAVAIL;
+ for (i = 0; i < nr_segments; i++) {
+ unsigned long mstart, mend;
+
+ mstart = image->segment[i].mem;
+ mend = mstart + image->segment[i].memsz - 1;
+ /* Ensure we are within the crash kernel limits */
+ if ((mstart < crashk_res.start) ||
+ (mend > crashk_res.end))
+ return result;
+ }
+ }
+
+ return 0;
+}
+
+struct kimage *do_kimage_alloc_init(void)
+{
+ struct kimage *image;
+
+ /* Allocate a controlling structure */
+ image = kzalloc(sizeof(*image), GFP_KERNEL);
+ if (!image)
+ return NULL;
+
+ image->head = 0;
+ image->entry = &image->head;
+ image->last_entry = &image->head;
+ image->control_page = ~0; /* By default this does not apply */
+ image->type = KEXEC_TYPE_DEFAULT;
+
+ /* Initialize the list of control pages */
+ INIT_LIST_HEAD(&image->control_pages);
+
+ /* Initialize the list of destination pages */
+ INIT_LIST_HEAD(&image->dest_pages);
+
+ /* Initialize the list of unusable pages */
+ INIT_LIST_HEAD(&image->unusable_pages);
+
+ return image;
+}
+
+int kimage_is_destination_range(struct kimage *image,
+ unsigned long start,
+ unsigned long end)
+{
+ unsigned long i;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ unsigned long mstart, mend;
+
+ mstart = image->segment[i].mem;
+ mend = mstart + image->segment[i].memsz;
+ if ((end > mstart) && (start < mend))
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
+{
+ struct page *pages;
+
+ pages = alloc_pages(gfp_mask, order);
+ if (pages) {
+ unsigned int count, i;
+
+ pages->mapping = NULL;
+ set_page_private(pages, order);
+ count = 1 << order;
+ for (i = 0; i < count; i++)
+ SetPageReserved(pages + i);
+ }
+
+ return pages;
+}
+
+static void kimage_free_pages(struct page *page)
+{
+ unsigned int order, count, i;
+
+ order = page_private(page);
+ count = 1 << order;
+ for (i = 0; i < count; i++)
+ ClearPageReserved(page + i);
+ __free_pages(page, order);
+}
+
+void kimage_free_page_list(struct list_head *list)
+{
+ struct list_head *pos, *next;
+
+ list_for_each_safe(pos, next, list) {
+ struct page *page;
+
+ page = list_entry(pos, struct page, lru);
+ list_del(&page->lru);
+ kimage_free_pages(page);
+ }
+}
+
+static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
+ unsigned int order)
+{
+ /* Control pages are special, they are the intermediaries
+ * that are needed while we copy the rest of the pages
+ * to their final resting place. As such they must
+ * not conflict with either the destination addresses
+ * or memory the kernel is already using.
+ *
+ * The only case where we really need more than one of
+ * these are for architectures where we cannot disable
+ * the MMU and must instead generate an identity mapped
+ * page table for all of the memory.
+ *
+ * At worst this runs in O(N) of the image size.
+ */
+ struct list_head extra_pages;
+ struct page *pages;
+ unsigned int count;
+
+ count = 1 << order;
+ INIT_LIST_HEAD(&extra_pages);
+
+ /* Loop while I can allocate a page and the page allocated
+ * is a destination page.
+ */
+ do {
+ unsigned long pfn, epfn, addr, eaddr;
+
+ pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
+ if (!pages)
+ break;
+ pfn = page_to_pfn(pages);
+ epfn = pfn + count;
+ addr = pfn << PAGE_SHIFT;
+ eaddr = epfn << PAGE_SHIFT;
+ if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
+ kimage_is_destination_range(image, addr, eaddr)) {
+ list_add(&pages->lru, &extra_pages);
+ pages = NULL;
+ }
+ } while (!pages);
+
+ if (pages) {
+ /* Remember the allocated page... */
+ list_add(&pages->lru, &image->control_pages);
+
+ /* Because the page is already in it's destination
+ * location we will never allocate another page at
+ * that address. Therefore kimage_alloc_pages
+ * will not return it (again) and we don't need
+ * to give it an entry in image->segment[].
+ */
+ }
+ /* Deal with the destination pages I have inadvertently allocated.
+ *
+ * Ideally I would convert multi-page allocations into single
+ * page allocations, and add everything to image->dest_pages.
+ *
+ * For now it is simpler to just free the pages.
+ */
+ kimage_free_page_list(&extra_pages);
+
+ return pages;
+}
+
+static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
+ unsigned int order)
+{
+ /* Control pages are special, they are the intermediaries
+ * that are needed while we copy the rest of the pages
+ * to their final resting place. As such they must
+ * not conflict with either the destination addresses
+ * or memory the kernel is already using.
+ *
+ * Control pages are also the only pags we must allocate
+ * when loading a crash kernel. All of the other pages
+ * are specified by the segments and we just memcpy
+ * into them directly.
+ *
+ * The only case where we really need more than one of
+ * these are for architectures where we cannot disable
+ * the MMU and must instead generate an identity mapped
+ * page table for all of the memory.
+ *
+ * Given the low demand this implements a very simple
+ * allocator that finds the first hole of the appropriate
+ * size in the reserved memory region, and allocates all
+ * of the memory up to and including the hole.
+ */
+ unsigned long hole_start, hole_end, size;
+ struct page *pages;
+
+ pages = NULL;
+ size = (1 << order) << PAGE_SHIFT;
+ hole_start = (image->control_page + (size - 1)) & ~(size - 1);
+ hole_end = hole_start + size - 1;
+ while (hole_end <= crashk_res.end) {
+ unsigned long i;
+
+ if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
+ break;
+ /* See if I overlap any of the segments */
+ for (i = 0; i < image->nr_segments; i++) {
+ unsigned long mstart, mend;
+
+ mstart = image->segment[i].mem;
+ mend = mstart + image->segment[i].memsz - 1;
+ if ((hole_end >= mstart) && (hole_start <= mend)) {
+ /* Advance the hole to the end of the segment */
+ hole_start = (mend + (size - 1)) & ~(size - 1);
+ hole_end = hole_start + size - 1;
+ break;
+ }
+ }
+ /* If I don't overlap any segments I have found my hole! */
+ if (i == image->nr_segments) {
+ pages = pfn_to_page(hole_start >> PAGE_SHIFT);
+ image->control_page = hole_end;
+ break;
+ }
+ }
+
+ return pages;
+}
+
+
+struct page *kimage_alloc_control_pages(struct kimage *image,
+ unsigned int order)
+{
+ struct page *pages = NULL;
+
+ switch (image->type) {
+ case KEXEC_TYPE_DEFAULT:
+ pages = kimage_alloc_normal_control_pages(image, order);
+ break;
+ case KEXEC_TYPE_CRASH:
+ pages = kimage_alloc_crash_control_pages(image, order);
+ break;
+ }
+
+ return pages;
+}
+
+static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
+{
+ if (*image->entry != 0)
+ image->entry++;
+
+ if (image->entry == image->last_entry) {
+ kimage_entry_t *ind_page;
+ struct page *page;
+
+ page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
+ if (!page)
+ return -ENOMEM;
+
+ ind_page = page_address(page);
+ *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
+ image->entry = ind_page;
+ image->last_entry = ind_page +
+ ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
+ }
+ *image->entry = entry;
+ image->entry++;
+ *image->entry = 0;
+
+ return 0;
+}
+
+static int kimage_set_destination(struct kimage *image,
+ unsigned long destination)
+{
+ int result;
+
+ destination &= PAGE_MASK;
+ result = kimage_add_entry(image, destination | IND_DESTINATION);
+
+ return result;
+}
+
+
+static int kimage_add_page(struct kimage *image, unsigned long page)
+{
+ int result;
+
+ page &= PAGE_MASK;
+ result = kimage_add_entry(image, page | IND_SOURCE);
+
+ return result;
+}
+
+
+static void kimage_free_extra_pages(struct kimage *image)
+{
+ /* Walk through and free any extra destination pages I may have */
+ kimage_free_page_list(&image->dest_pages);
+
+ /* Walk through and free any unusable pages I have cached */
+ kimage_free_page_list(&image->unusable_pages);
+
+}
+void kimage_terminate(struct kimage *image)
+{
+ if (*image->entry != 0)
+ image->entry++;
+
+ *image->entry = IND_DONE;
+}
+
+#define for_each_kimage_entry(image, ptr, entry) \
+ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
+ ptr = (entry & IND_INDIRECTION) ? \
+ phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
+
+static void kimage_free_entry(kimage_entry_t entry)
+{
+ struct page *page;
+
+ page = pfn_to_page(entry >> PAGE_SHIFT);
+ kimage_free_pages(page);
+}
+
+void kimage_free(struct kimage *image)
+{
+ kimage_entry_t *ptr, entry;
+ kimage_entry_t ind = 0;
+
+ if (!image)
+ return;
+
+ kimage_free_extra_pages(image);
+ for_each_kimage_entry(image, ptr, entry) {
+ if (entry & IND_INDIRECTION) {
+ /* Free the previous indirection page */
+ if (ind & IND_INDIRECTION)
+ kimage_free_entry(ind);
+ /* Save this indirection page until we are
+ * done with it.
+ */
+ ind = entry;
+ } else if (entry & IND_SOURCE)
+ kimage_free_entry(entry);
+ }
+ /* Free the final indirection page */
+ if (ind & IND_INDIRECTION)
+ kimage_free_entry(ind);
+
+ /* Handle any machine specific cleanup */
+ machine_kexec_cleanup(image);
+
+ /* Free the kexec control pages... */
+ kimage_free_page_list(&image->control_pages);
+
+ /*
+ * Free up any temporary buffers allocated. This might hit if
+ * error occurred much later after buffer allocation.
+ */
+ if (image->file_mode)
+ kimage_file_post_load_cleanup(image);
+
+ kfree(image);
+}
+
+static kimage_entry_t *kimage_dst_used(struct kimage *image,
+ unsigned long page)
+{
+ kimage_entry_t *ptr, entry;
+ unsigned long destination = 0;
+
+ for_each_kimage_entry(image, ptr, entry) {
+ if (entry & IND_DESTINATION)
+ destination = entry & PAGE_MASK;
+ else if (entry & IND_SOURCE) {
+ if (page == destination)
+ return ptr;
+ destination += PAGE_SIZE;
+ }
+ }
+
+ return NULL;
+}
+
+static struct page *kimage_alloc_page(struct kimage *image,
+ gfp_t gfp_mask,
+ unsigned long destination)
+{
+ /*
+ * Here we implement safeguards to ensure that a source page
+ * is not copied to its destination page before the data on
+ * the destination page is no longer useful.
+ *
+ * To do this we maintain the invariant that a source page is
+ * either its own destination page, or it is not a
+ * destination page at all.
+ *
+ * That is slightly stronger than required, but the proof
+ * that no problems will not occur is trivial, and the
+ * implementation is simply to verify.
+ *
+ * When allocating all pages normally this algorithm will run
+ * in O(N) time, but in the worst case it will run in O(N^2)
+ * time. If the runtime is a problem the data structures can
+ * be fixed.
+ */
+ struct page *page;
+ unsigned long addr;
+
+ /*
+ * Walk through the list of destination pages, and see if I
+ * have a match.
+ */
+ list_for_each_entry(page, &image->dest_pages, lru) {
+ addr = page_to_pfn(page) << PAGE_SHIFT;
+ if (addr == destination) {
+ list_del(&page->lru);
+ return page;
+ }
+ }
+ page = NULL;
+ while (1) {
+ kimage_entry_t *old;
+
+ /* Allocate a page, if we run out of memory give up */
+ page = kimage_alloc_pages(gfp_mask, 0);
+ if (!page)
+ return NULL;
+ /* If the page cannot be used file it away */
+ if (page_to_pfn(page) >
+ (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
+ list_add(&page->lru, &image->unusable_pages);
+ continue;
+ }
+ addr = page_to_pfn(page) << PAGE_SHIFT;
+
+ /* If it is the destination page we want use it */
+ if (addr == destination)
+ break;
+
+ /* If the page is not a destination page use it */
+ if (!kimage_is_destination_range(image, addr,
+ addr + PAGE_SIZE))
+ break;
+
+ /*
+ * I know that the page is someones destination page.
+ * See if there is already a source page for this
+ * destination page. And if so swap the source pages.
+ */
+ old = kimage_dst_used(image, addr);
+ if (old) {
+ /* If so move it */
+ unsigned long old_addr;
+ struct page *old_page;
+
+ old_addr = *old & PAGE_MASK;
+ old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
+ copy_highpage(page, old_page);
+ *old = addr | (*old & ~PAGE_MASK);
+
+ /* The old page I have found cannot be a
+ * destination page, so return it if it's
+ * gfp_flags honor the ones passed in.
+ */
+ if (!(gfp_mask & __GFP_HIGHMEM) &&
+ PageHighMem(old_page)) {
+ kimage_free_pages(old_page);
+ continue;
+ }
+ addr = old_addr;
+ page = old_page;
+ break;
+ }
+ /* Place the page on the destination list, to be used later */
+ list_add(&page->lru, &image->dest_pages);
+ }
+
+ return page;
+}
+
+static int kimage_load_normal_segment(struct kimage *image,
+ struct kexec_segment *segment)
+{
+ unsigned long maddr;
+ size_t ubytes, mbytes;
+ int result;
+ unsigned char __user *buf = NULL;
+ unsigned char *kbuf = NULL;
+
+ result = 0;
+ if (image->file_mode)
+ kbuf = segment->kbuf;
+ else
+ buf = segment->buf;
+ ubytes = segment->bufsz;
+ mbytes = segment->memsz;
+ maddr = segment->mem;
+
+ result = kimage_set_destination(image, maddr);
+ if (result < 0)
+ goto out;
+
+ while (mbytes) {
+ struct page *page;
+ char *ptr;
+ size_t uchunk, mchunk;
+
+ page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
+ if (!page) {
+ result = -ENOMEM;
+ goto out;
+ }
+ result = kimage_add_page(image, page_to_pfn(page)
+ << PAGE_SHIFT);
+ if (result < 0)
+ goto out;
+
+ ptr = kmap(page);
+ /* Start with a clear page */
+ clear_page(ptr);
+ ptr += maddr & ~PAGE_MASK;
+ mchunk = min_t(size_t, mbytes,
+ PAGE_SIZE - (maddr & ~PAGE_MASK));
+ uchunk = min(ubytes, mchunk);
+
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
+ kunmap(page);
+ if (result) {
+ result = -EFAULT;
+ goto out;
+ }
+ ubytes -= uchunk;
+ maddr += mchunk;
+ if (image->file_mode)
+ kbuf += mchunk;
+ else
+ buf += mchunk;
+ mbytes -= mchunk;
+ }
+out:
+ return result;
+}
+
+static int kimage_load_crash_segment(struct kimage *image,
+ struct kexec_segment *segment)
+{
+ /* For crash dumps kernels we simply copy the data from
+ * user space to it's destination.
+ * We do things a page at a time for the sake of kmap.
+ */
+ unsigned long maddr;
+ size_t ubytes, mbytes;
+ int result;
+ unsigned char __user *buf = NULL;
+ unsigned char *kbuf = NULL;
+
+ result = 0;
+ if (image->file_mode)
+ kbuf = segment->kbuf;
+ else
+ buf = segment->buf;
+ ubytes = segment->bufsz;
+ mbytes = segment->memsz;
+ maddr = segment->mem;
+ while (mbytes) {
+ struct page *page;
+ char *ptr;
+ size_t uchunk, mchunk;
+
+ page = pfn_to_page(maddr >> PAGE_SHIFT);
+ if (!page) {
+ result = -ENOMEM;
+ goto out;
+ }
+ ptr = kmap(page);
+ ptr += maddr & ~PAGE_MASK;
+ mchunk = min_t(size_t, mbytes,
+ PAGE_SIZE - (maddr & ~PAGE_MASK));
+ uchunk = min(ubytes, mchunk);
+ if (mchunk > uchunk) {
+ /* Zero the trailing part of the page */
+ memset(ptr + uchunk, 0, mchunk - uchunk);
+ }
+
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
+ kexec_flush_icache_page(page);
+ kunmap(page);
+ if (result) {
+ result = -EFAULT;
+ goto out;
+ }
+ ubytes -= uchunk;
+ maddr += mchunk;
+ if (image->file_mode)
+ kbuf += mchunk;
+ else
+ buf += mchunk;
+ mbytes -= mchunk;
+ }
+out:
+ return result;
+}
+
+int kimage_load_segment(struct kimage *image,
+ struct kexec_segment *segment)
+{
+ int result = -ENOMEM;
+
+ switch (image->type) {
+ case KEXEC_TYPE_DEFAULT:
+ result = kimage_load_normal_segment(image, segment);
+ break;
+ case KEXEC_TYPE_CRASH:
+ result = kimage_load_crash_segment(image, segment);
+ break;
+ }
+
+ return result;
+}
+
+struct kimage *kexec_image;
+struct kimage *kexec_crash_image;
+int kexec_load_disabled;
+
+void crash_kexec(struct pt_regs *regs)
+{
+ /* Take the kexec_mutex here to prevent sys_kexec_load
+ * running on one cpu from replacing the crash kernel
+ * we are using after a panic on a different cpu.
+ *
+ * If the crash kernel was not located in a fixed area
+ * of memory the xchg(&kexec_crash_image) would be
+ * sufficient. But since I reuse the memory...
+ */
+ if (mutex_trylock(&kexec_mutex)) {
+ if (kexec_crash_image) {
+ struct pt_regs fixed_regs;
+
+ crash_setup_regs(&fixed_regs, regs);
+ crash_save_vmcoreinfo();
+ machine_crash_shutdown(&fixed_regs);
+ machine_kexec(kexec_crash_image);
+ }
+ mutex_unlock(&kexec_mutex);
+ }
+}
+
+size_t crash_get_memory_size(void)
+{
+ size_t size = 0;
+
+ mutex_lock(&kexec_mutex);
+ if (crashk_res.end != crashk_res.start)
+ size = resource_size(&crashk_res);
+ mutex_unlock(&kexec_mutex);
+ return size;
+}
+
+void __weak crash_free_reserved_phys_range(unsigned long begin,
+ unsigned long end)
+{
+ unsigned long addr;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE)
+ free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+}
+
+int crash_shrink_memory(unsigned long new_size)
+{
+ int ret = 0;
+ unsigned long start, end;
+ unsigned long old_size;
+ struct resource *ram_res;
+
+ mutex_lock(&kexec_mutex);
+
+ if (kexec_crash_image) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+ start = crashk_res.start;
+ end = crashk_res.end;
+ old_size = (end == 0) ? 0 : end - start + 1;
+ if (new_size >= old_size) {
+ ret = (new_size == old_size) ? 0 : -EINVAL;
+ goto unlock;
+ }
+
+ ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
+ if (!ram_res) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
+ end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
+
+ crash_map_reserved_pages();
+ crash_free_reserved_phys_range(end, crashk_res.end);
+
+ if ((start == end) && (crashk_res.parent != NULL))
+ release_resource(&crashk_res);
+
+ ram_res->start = end;
+ ram_res->end = crashk_res.end;
+ ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ ram_res->name = "System RAM";
+
+ crashk_res.end = end - 1;
+
+ insert_resource(&iomem_resource, ram_res);
+ crash_unmap_reserved_pages();
+
+unlock:
+ mutex_unlock(&kexec_mutex);
+ return ret;
+}
+
+static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
+ size_t data_len)
+{
+ struct elf_note note;
+
+ note.n_namesz = strlen(name) + 1;
+ note.n_descsz = data_len;
+ note.n_type = type;
+ memcpy(buf, ¬e, sizeof(note));
+ buf += (sizeof(note) + 3)/4;
+ memcpy(buf, name, note.n_namesz);
+ buf += (note.n_namesz + 3)/4;
+ memcpy(buf, data, note.n_descsz);
+ buf += (note.n_descsz + 3)/4;
+
+ return buf;
+}
+
+static void final_note(u32 *buf)
+{
+ struct elf_note note;
+
+ note.n_namesz = 0;
+ note.n_descsz = 0;
+ note.n_type = 0;
+ memcpy(buf, ¬e, sizeof(note));
+}
+
+void crash_save_cpu(struct pt_regs *regs, int cpu)
+{
+ struct elf_prstatus prstatus;
+ u32 *buf;
+
+ if ((cpu < 0) || (cpu >= nr_cpu_ids))
+ return;
+
+ /* Using ELF notes here is opportunistic.
+ * I need a well defined structure format
+ * for the data I pass, and I need tags
+ * on the data to indicate what information I have
+ * squirrelled away. ELF notes happen to provide
+ * all of that, so there is no need to invent something new.
+ */
+ buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
+ if (!buf)
+ return;
+ memset(&prstatus, 0, sizeof(prstatus));
+ prstatus.pr_pid = current->pid;
+ elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+ buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
+ final_note(buf);
+}
+
+static int __init crash_notes_memory_init(void)
+{
+ /* Allocate memory for saving cpu registers. */
+ size_t size, align;
+
+ /*
+ * crash_notes could be allocated across 2 vmalloc pages when percpu
+ * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
+ * pages are also on 2 continuous physical pages. In this case the
+ * 2nd part of crash_notes in 2nd page could be lost since only the
+ * starting address and size of crash_notes are exported through sysfs.
+ * Here round up the size of crash_notes to the nearest power of two
+ * and pass it to __alloc_percpu as align value. This can make sure
+ * crash_notes is allocated inside one physical page.
+ */
+ size = sizeof(note_buf_t);
+ align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
+
+ /*
+ * Break compile if size is bigger than PAGE_SIZE since crash_notes
+ * definitely will be in 2 pages with that.
+ */
+ BUILD_BUG_ON(size > PAGE_SIZE);
+
+ crash_notes = __alloc_percpu(size, align);
+ if (!crash_notes) {
+ pr_warn("Kexec: Memory allocation for saving cpu register states failed\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+subsys_initcall(crash_notes_memory_init);
+
+
+/*
+ * parsing the "crashkernel" commandline
+ *
+ * this code is intended to be called from architecture specific code
+ */
+
+
+/*
+ * This function parses command lines in the format
+ *
+ * crashkernel=ramsize-range:size[,...][@offset]
+ *
+ * The function returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_mem(char *cmdline,
+ unsigned long long system_ram,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
+{
+ char *cur = cmdline, *tmp;
+
+ /* for each entry of the comma-separated list */
+ do {
+ unsigned long long start, end = ULLONG_MAX, size;
+
+ /* get the start of the range */
+ start = memparse(cur, &tmp);
+ if (cur == tmp) {
+ pr_warn("crashkernel: Memory value expected\n");
+ return -EINVAL;
+ }
+ cur = tmp;
+ if (*cur != '-') {
+ pr_warn("crashkernel: '-' expected\n");
+ return -EINVAL;
+ }
+ cur++;
+
+ /* if no ':' is here, than we read the end */
+ if (*cur != ':') {
+ end = memparse(cur, &tmp);
+ if (cur == tmp) {
+ pr_warn("crashkernel: Memory value expected\n");
+ return -EINVAL;
+ }
+ cur = tmp;
+ if (end <= start) {
+ pr_warn("crashkernel: end <= start\n");
+ return -EINVAL;
+ }
+ }
+
+ if (*cur != ':') {
+ pr_warn("crashkernel: ':' expected\n");
+ return -EINVAL;
+ }
+ cur++;
+
+ size = memparse(cur, &tmp);
+ if (cur == tmp) {
+ pr_warn("Memory value expected\n");
+ return -EINVAL;
+ }
+ cur = tmp;
+ if (size >= system_ram) {
+ pr_warn("crashkernel: invalid size\n");
+ return -EINVAL;
+ }
+
+ /* match ? */
+ if (system_ram >= start && system_ram < end) {
+ *crash_size = size;
+ break;
+ }
+ } while (*cur++ == ',');
+
+ if (*crash_size > 0) {
+ while (*cur && *cur != ' ' && *cur != '@')
+ cur++;
+ if (*cur == '@') {
+ cur++;
+ *crash_base = memparse(cur, &tmp);
+ if (cur == tmp) {
+ pr_warn("Memory value expected after '@'\n");
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * That function parses "simple" (old) crashkernel command lines like
+ *
+ * crashkernel=size[@offset]
+ *
+ * It returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_simple(char *cmdline,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
+{
+ char *cur = cmdline;
+
+ *crash_size = memparse(cmdline, &cur);
+ if (cmdline == cur) {
+ pr_warn("crashkernel: memory value expected\n");
+ return -EINVAL;
+ }
+
+ if (*cur == '@')
+ *crash_base = memparse(cur+1, &cur);
+ else if (*cur != ' ' && *cur != '\0') {
+ pr_warn("crashkernel: unrecognized char\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#define SUFFIX_HIGH 0
+#define SUFFIX_LOW 1
+#define SUFFIX_NULL 2
+static __initdata char *suffix_tbl[] = {
+ [SUFFIX_HIGH] = ",high",
+ [SUFFIX_LOW] = ",low",
+ [SUFFIX_NULL] = NULL,
+};
+
+/*
+ * That function parses "suffix" crashkernel command lines like
+ *
+ * crashkernel=size,[high|low]
+ *
+ * It returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_suffix(char *cmdline,
+ unsigned long long *crash_size,
+ const char *suffix)
+{
+ char *cur = cmdline;
+
+ *crash_size = memparse(cmdline, &cur);
+ if (cmdline == cur) {
+ pr_warn("crashkernel: memory value expected\n");
+ return -EINVAL;
+ }
+
+ /* check with suffix */
+ if (strncmp(cur, suffix, strlen(suffix))) {
+ pr_warn("crashkernel: unrecognized char\n");
+ return -EINVAL;
+ }
+ cur += strlen(suffix);
+ if (*cur != ' ' && *cur != '\0') {
+ pr_warn("crashkernel: unrecognized char\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static __init char *get_last_crashkernel(char *cmdline,
+ const char *name,
+ const char *suffix)
+{
+ char *p = cmdline, *ck_cmdline = NULL;
+
+ /* find crashkernel and use the last one if there are more */
+ p = strstr(p, name);
+ while (p) {
+ char *end_p = strchr(p, ' ');
+ char *q;
+
+ if (!end_p)
+ end_p = p + strlen(p);
+
+ if (!suffix) {
+ int i;
+
+ /* skip the one with any known suffix */
+ for (i = 0; suffix_tbl[i]; i++) {
+ q = end_p - strlen(suffix_tbl[i]);
+ if (!strncmp(q, suffix_tbl[i],
+ strlen(suffix_tbl[i])))
+ goto next;
+ }
+ ck_cmdline = p;
+ } else {
+ q = end_p - strlen(suffix);
+ if (!strncmp(q, suffix, strlen(suffix)))
+ ck_cmdline = p;
+ }
+next:
+ p = strstr(p+1, name);
+ }
+
+ if (!ck_cmdline)
+ return NULL;
+
+ return ck_cmdline;
+}
+
+static int __init __parse_crashkernel(char *cmdline,
+ unsigned long long system_ram,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base,
+ const char *name,
+ const char *suffix)
+{
+ char *first_colon, *first_space;
+ char *ck_cmdline;
+
+ BUG_ON(!crash_size || !crash_base);
+ *crash_size = 0;
+ *crash_base = 0;
+
+ ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
+
+ if (!ck_cmdline)
+ return -EINVAL;
+
+ ck_cmdline += strlen(name);
+
+ if (suffix)
+ return parse_crashkernel_suffix(ck_cmdline, crash_size,
+ suffix);
+ /*
+ * if the commandline contains a ':', then that's the extended
+ * syntax -- if not, it must be the classic syntax
+ */
+ first_colon = strchr(ck_cmdline, ':');
+ first_space = strchr(ck_cmdline, ' ');
+ if (first_colon && (!first_space || first_colon < first_space))
+ return parse_crashkernel_mem(ck_cmdline, system_ram,
+ crash_size, crash_base);
+
+ return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
+}
+
+/*
+ * That function is the entry point for command line parsing and should be
+ * called from the arch-specific code.
+ */
+int __init parse_crashkernel(char *cmdline,
+ unsigned long long system_ram,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
+{
+ return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
+ "crashkernel=", NULL);
+}
+
+int __init parse_crashkernel_high(char *cmdline,
+ unsigned long long system_ram,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
+{
+ return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
+ "crashkernel=", suffix_tbl[SUFFIX_HIGH]);
+}
+
+int __init parse_crashkernel_low(char *cmdline,
+ unsigned long long system_ram,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
+{
+ return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
+ "crashkernel=", suffix_tbl[SUFFIX_LOW]);
+}
+
+static void update_vmcoreinfo_note(void)
+{
+ u32 *buf = vmcoreinfo_note;
+
+ if (!vmcoreinfo_size)
+ return;
+ buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
+ vmcoreinfo_size);
+ final_note(buf);
+}
+
+void crash_save_vmcoreinfo(void)
+{
+ vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
+ update_vmcoreinfo_note();
+}
+
+void vmcoreinfo_append_str(const char *fmt, ...)
+{
+ va_list args;
+ char buf[0x50];
+ size_t r;
+
+ va_start(args, fmt);
+ r = vscnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
+
+ memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
+
+ vmcoreinfo_size += r;
+}
+
+/*
+ * provide an empty default implementation here -- architecture
+ * code may override this
+ */
+void __weak arch_crash_save_vmcoreinfo(void)
+{}
+
+unsigned long __weak paddr_vmcoreinfo_note(void)
+{
+ return __pa((unsigned long)(char *)&vmcoreinfo_note);
+}
+
+static int __init crash_save_vmcoreinfo_init(void)
+{
+ VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
+ VMCOREINFO_PAGESIZE(PAGE_SIZE);
+
+ VMCOREINFO_SYMBOL(init_uts_ns);
+ VMCOREINFO_SYMBOL(node_online_map);
+#ifdef CONFIG_MMU
+ VMCOREINFO_SYMBOL(swapper_pg_dir);
+#endif
+ VMCOREINFO_SYMBOL(_stext);
+ VMCOREINFO_SYMBOL(vmap_area_list);
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+ VMCOREINFO_SYMBOL(mem_map);
+ VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#ifdef CONFIG_SPARSEMEM
+ VMCOREINFO_SYMBOL(mem_section);
+ VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
+ VMCOREINFO_STRUCT_SIZE(mem_section);
+ VMCOREINFO_OFFSET(mem_section, section_mem_map);
+#endif
+ VMCOREINFO_STRUCT_SIZE(page);
+ VMCOREINFO_STRUCT_SIZE(pglist_data);
+ VMCOREINFO_STRUCT_SIZE(zone);
+ VMCOREINFO_STRUCT_SIZE(free_area);
+ VMCOREINFO_STRUCT_SIZE(list_head);
+ VMCOREINFO_SIZE(nodemask_t);
+ VMCOREINFO_OFFSET(page, flags);
+ VMCOREINFO_OFFSET(page, _count);
+ VMCOREINFO_OFFSET(page, mapping);
+ VMCOREINFO_OFFSET(page, lru);
+ VMCOREINFO_OFFSET(page, _mapcount);
+ VMCOREINFO_OFFSET(page, private);
+ VMCOREINFO_OFFSET(pglist_data, node_zones);
+ VMCOREINFO_OFFSET(pglist_data, nr_zones);
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+ VMCOREINFO_OFFSET(pglist_data, node_mem_map);
+#endif
+ VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
+ VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
+ VMCOREINFO_OFFSET(pglist_data, node_id);
+ VMCOREINFO_OFFSET(zone, free_area);
+ VMCOREINFO_OFFSET(zone, vm_stat);
+ VMCOREINFO_OFFSET(zone, spanned_pages);
+ VMCOREINFO_OFFSET(free_area, free_list);
+ VMCOREINFO_OFFSET(list_head, next);
+ VMCOREINFO_OFFSET(list_head, prev);
+ VMCOREINFO_OFFSET(vmap_area, va_start);
+ VMCOREINFO_OFFSET(vmap_area, list);
+ VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
+ log_buf_kexec_setup();
+ VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
+ VMCOREINFO_NUMBER(NR_FREE_PAGES);
+ VMCOREINFO_NUMBER(PG_lru);
+ VMCOREINFO_NUMBER(PG_private);
+ VMCOREINFO_NUMBER(PG_swapcache);
+ VMCOREINFO_NUMBER(PG_slab);
+#ifdef CONFIG_MEMORY_FAILURE
+ VMCOREINFO_NUMBER(PG_hwpoison);
+#endif
+ VMCOREINFO_NUMBER(PG_head_mask);
+ VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
+#ifdef CONFIG_X86
+ VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
+#endif
+#ifdef CONFIG_HUGETLBFS
+ VMCOREINFO_SYMBOL(free_huge_page);
+#endif
+
+ arch_crash_save_vmcoreinfo();
+ update_vmcoreinfo_note();
+
+ return 0;
+}
+
+subsys_initcall(crash_save_vmcoreinfo_init);
+
+/*
+ * Move into place and start executing a preloaded standalone
+ * executable. If nothing was preloaded return an error.
+ */
+int kernel_kexec(void)
+{
+ int error = 0;
+
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
+ if (!kexec_image) {
+ error = -EINVAL;
+ goto Unlock;
+ }
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context) {
+ lock_system_sleep();
+ pm_prepare_console();
+ error = freeze_processes();
+ if (error) {
+ error = -EBUSY;
+ goto Restore_console;
+ }
+ suspend_console();
+ error = dpm_suspend_start(PMSG_FREEZE);
+ if (error)
+ goto Resume_console;
+ /* At this point, dpm_suspend_start() has been called,
+ * but *not* dpm_suspend_end(). We *must* call
+ * dpm_suspend_end() now. Otherwise, drivers for
+ * some devices (e.g. interrupt controllers) become
+ * desynchronized with the actual state of the
+ * hardware at resume time, and evil weirdness ensues.
+ */
+ error = dpm_suspend_end(PMSG_FREEZE);
+ if (error)
+ goto Resume_devices;
+ error = disable_nonboot_cpus();
+ if (error)
+ goto Enable_cpus;
+ local_irq_disable();
+ error = syscore_suspend();
+ if (error)
+ goto Enable_irqs;
+ } else
+#endif
+ {
+ kexec_in_progress = true;
+ kernel_restart_prepare(NULL);
+ migrate_to_reboot_cpu();
+
+ /*
+ * migrate_to_reboot_cpu() disables CPU hotplug assuming that
+ * no further code needs to use CPU hotplug (which is true in
+ * the reboot case). However, the kexec path depends on using
+ * CPU hotplug again; so re-enable it here.
+ */
+ cpu_hotplug_enable();
+ pr_emerg("Starting new kernel\n");
+ machine_shutdown();
+ }
+
+ machine_kexec(kexec_image);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context) {
+ syscore_resume();
+ Enable_irqs:
+ local_irq_enable();
+ Enable_cpus:
+ enable_nonboot_cpus();
+ dpm_resume_start(PMSG_RESTORE);
+ Resume_devices:
+ dpm_resume_end(PMSG_RESTORE);
+ Resume_console:
+ resume_console();
+ thaw_processes();
+ Restore_console:
+ pm_restore_console();
+ unlock_system_sleep();
+ }
+#endif
+
+ Unlock:
+ mutex_unlock(&kexec_mutex);
+ return error;
+}
+
+/*
+ * Add and remove page tables for crashkernel memory
+ *
+ * Provide an empty default implementation here -- architecture
+ * code may override this
+ */
+void __weak crash_map_reserved_pages(void)
+{}
+
+void __weak crash_unmap_reserved_pages(void)
+{}
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
new file mode 100644
index 0000000..6a9a3f2
--- /dev/null
+++ b/kernel/kexec_file.c
@@ -0,0 +1,1045 @@
+/*
+ * kexec: kexec_file_load system call
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ * Authors:
+ * Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/capability.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <linux/syscalls.h>
+#include <linux/vmalloc.h>
+#include "kexec_internal.h"
+
+/*
+ * Declare these symbols weak so that if architecture provides a purgatory,
+ * these will be overridden.
+ */
+char __weak kexec_purgatory[0];
+size_t __weak kexec_purgatory_size = 0;
+
+static int kexec_calculate_store_digests(struct kimage *image);
+
+static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
+{
+ struct fd f = fdget(fd);
+ int ret;
+ struct kstat stat;
+ loff_t pos;
+ ssize_t bytes = 0;
+
+ if (!f.file)
+ return -EBADF;
+
+ ret = vfs_getattr(&f.file->f_path, &stat);
+ if (ret)
+ goto out;
+
+ if (stat.size > INT_MAX) {
+ ret = -EFBIG;
+ goto out;
+ }
+
+ /* Don't hand 0 to vmalloc, it whines. */
+ if (stat.size == 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *buf = vmalloc(stat.size);
+ if (!*buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pos = 0;
+ while (pos < stat.size) {
+ bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
+ stat.size - pos);
+ if (bytes < 0) {
+ vfree(*buf);
+ ret = bytes;
+ goto out;
+ }
+
+ if (bytes == 0)
+ break;
+ pos += bytes;
+ }
+
+ if (pos != stat.size) {
+ ret = -EBADF;
+ vfree(*buf);
+ goto out;
+ }
+
+ *buf_len = pos;
+out:
+ fdput(f);
+ return ret;
+}
+
+/* Architectures can provide this probe function */
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ return -ENOEXEC;
+}
+
+void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+{
+ return ERR_PTR(-ENOEXEC);
+}
+
+int __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ return -EINVAL;
+}
+
+int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ return -EKEYREJECTED;
+}
+
+/* Apply relocations of type RELA */
+int __weak
+arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ unsigned int relsec)
+{
+ pr_err("RELA relocation unsupported.\n");
+ return -ENOEXEC;
+}
+
+/* Apply relocations of type REL */
+int __weak
+arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ unsigned int relsec)
+{
+ pr_err("REL relocation unsupported.\n");
+ return -ENOEXEC;
+}
+
+/*
+ * Free up memory used by kernel, initrd, and command line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+ * been loaded into separate segments and have been copied elsewhere.
+ */
+void kimage_file_post_load_cleanup(struct kimage *image)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+
+ vfree(image->kernel_buf);
+ image->kernel_buf = NULL;
+
+ vfree(image->initrd_buf);
+ image->initrd_buf = NULL;
+
+ kfree(image->cmdline_buf);
+ image->cmdline_buf = NULL;
+
+ vfree(pi->purgatory_buf);
+ pi->purgatory_buf = NULL;
+
+ vfree(pi->sechdrs);
+ pi->sechdrs = NULL;
+
+ /* See if architecture has anything to cleanup post load */
+ arch_kimage_file_post_load_cleanup(image);
+
+ /*
+ * Above call should have called into bootloader to free up
+ * any data stored in kimage->image_loader_data. It should
+ * be ok now to free it up.
+ */
+ kfree(image->image_loader_data);
+ image->image_loader_data = NULL;
+}
+
+/*
+ * In file mode list of segments is prepared by kernel. Copy relevant
+ * data from user space, do error checking, prepare segment list
+ */
+static int
+kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
+ const char __user *cmdline_ptr,
+ unsigned long cmdline_len, unsigned flags)
+{
+ int ret = 0;
+ void *ldata;
+
+ ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
+ &image->kernel_buf_len);
+ if (ret)
+ return ret;
+
+ /* Call arch image probe handlers */
+ ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
+ image->kernel_buf_len);
+
+ if (ret)
+ goto out;
+
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+ ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+ image->kernel_buf_len);
+ if (ret) {
+ pr_debug("kernel signature verification failed.\n");
+ goto out;
+ }
+ pr_debug("kernel signature verification successful.\n");
+#endif
+ /* It is possible that there no initramfs is being loaded */
+ if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
+ ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
+ &image->initrd_buf_len);
+ if (ret)
+ goto out;
+ }
+
+ if (cmdline_len) {
+ image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
+ if (!image->cmdline_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
+ cmdline_len);
+ if (ret) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ image->cmdline_buf_len = cmdline_len;
+
+ /* command line should be a string with last byte null */
+ if (image->cmdline_buf[cmdline_len - 1] != '\0') {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ /* Call arch image load handlers */
+ ldata = arch_kexec_kernel_image_load(image);
+
+ if (IS_ERR(ldata)) {
+ ret = PTR_ERR(ldata);
+ goto out;
+ }
+
+ image->image_loader_data = ldata;
+out:
+ /* In case of error, free up all allocated memory in this function */
+ if (ret)
+ kimage_file_post_load_cleanup(image);
+ return ret;
+}
+
+static int
+kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
+ int initrd_fd, const char __user *cmdline_ptr,
+ unsigned long cmdline_len, unsigned long flags)
+{
+ int ret;
+ struct kimage *image;
+ bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
+
+ image = do_kimage_alloc_init();
+ if (!image)
+ return -ENOMEM;
+
+ image->file_mode = 1;
+
+ if (kexec_on_panic) {
+ /* Enable special crash kernel control page alloc policy. */
+ image->control_page = crashk_res.start;
+ image->type = KEXEC_TYPE_CRASH;
+ }
+
+ ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
+ cmdline_ptr, cmdline_len, flags);
+ if (ret)
+ goto out_free_image;
+
+ ret = sanity_check_segment_list(image);
+ if (ret)
+ goto out_free_post_load_bufs;
+
+ ret = -ENOMEM;
+ image->control_code_page = kimage_alloc_control_pages(image,
+ get_order(KEXEC_CONTROL_PAGE_SIZE));
+ if (!image->control_code_page) {
+ pr_err("Could not allocate control_code_buffer\n");
+ goto out_free_post_load_bufs;
+ }
+
+ if (!kexec_on_panic) {
+ image->swap_page = kimage_alloc_control_pages(image, 0);
+ if (!image->swap_page) {
+ pr_err("Could not allocate swap buffer\n");
+ goto out_free_control_pages;
+ }
+ }
+
+ *rimage = image;
+ return 0;
+out_free_control_pages:
+ kimage_free_page_list(&image->control_pages);
+out_free_post_load_bufs:
+ kimage_file_post_load_cleanup(image);
+out_free_image:
+ kfree(image);
+ return ret;
+}
+
+SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
+ unsigned long, cmdline_len, const char __user *, cmdline_ptr,
+ unsigned long, flags)
+{
+ int ret = 0, i;
+ struct kimage **dest_image, *image;
+
+ /* We only trust the superuser with rebooting the system. */
+ if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+ return -EPERM;
+
+ /* Make sure we have a legal set of flags */
+ if (flags != (flags & KEXEC_FILE_FLAGS))
+ return -EINVAL;
+
+ image = NULL;
+
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
+
+ dest_image = &kexec_image;
+ if (flags & KEXEC_FILE_ON_CRASH)
+ dest_image = &kexec_crash_image;
+
+ if (flags & KEXEC_FILE_UNLOAD)
+ goto exchange;
+
+ /*
+ * In case of crash, new kernel gets loaded in reserved region. It is
+ * same memory where old crash kernel might be loaded. Free any
+ * current crash dump kernel before we corrupt it.
+ */
+ if (flags & KEXEC_FILE_ON_CRASH)
+ kimage_free(xchg(&kexec_crash_image, NULL));
+
+ ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
+ cmdline_len, flags);
+ if (ret)
+ goto out;
+
+ ret = machine_kexec_prepare(image);
+ if (ret)
+ goto out;
+
+ ret = kexec_calculate_store_digests(image);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[i];
+ pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+ i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+ ksegment->memsz);
+
+ ret = kimage_load_segment(image, &image->segment[i]);
+ if (ret)
+ goto out;
+ }
+
+ kimage_terminate(image);
+
+ /*
+ * Free up any temporary buffers allocated which are not needed
+ * after image has been loaded
+ */
+ kimage_file_post_load_cleanup(image);
+exchange:
+ image = xchg(dest_image, image);
+out:
+ mutex_unlock(&kexec_mutex);
+ kimage_free(image);
+ return ret;
+}
+
+static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
+ struct kexec_buf *kbuf)
+{
+ struct kimage *image = kbuf->image;
+ unsigned long temp_start, temp_end;
+
+ temp_end = min(end, kbuf->buf_max);
+ temp_start = temp_end - kbuf->memsz;
+
+ do {
+ /* align down start */
+ temp_start = temp_start & (~(kbuf->buf_align - 1));
+
+ if (temp_start < start || temp_start < kbuf->buf_min)
+ return 0;
+
+ temp_end = temp_start + kbuf->memsz - 1;
+
+ /*
+ * Make sure this does not conflict with any of existing
+ * segments
+ */
+ if (kimage_is_destination_range(image, temp_start, temp_end)) {
+ temp_start = temp_start - PAGE_SIZE;
+ continue;
+ }
+
+ /* We found a suitable memory range */
+ break;
+ } while (1);
+
+ /* If we are here, we found a suitable memory range */
+ kbuf->mem = temp_start;
+
+ /* Success, stop navigating through remaining System RAM ranges */
+ return 1;
+}
+
+static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
+ struct kexec_buf *kbuf)
+{
+ struct kimage *image = kbuf->image;
+ unsigned long temp_start, temp_end;
+
+ temp_start = max(start, kbuf->buf_min);
+
+ do {
+ temp_start = ALIGN(temp_start, kbuf->buf_align);
+ temp_end = temp_start + kbuf->memsz - 1;
+
+ if (temp_end > end || temp_end > kbuf->buf_max)
+ return 0;
+ /*
+ * Make sure this does not conflict with any of existing
+ * segments
+ */
+ if (kimage_is_destination_range(image, temp_start, temp_end)) {
+ temp_start = temp_start + PAGE_SIZE;
+ continue;
+ }
+
+ /* We found a suitable memory range */
+ break;
+ } while (1);
+
+ /* If we are here, we found a suitable memory range */
+ kbuf->mem = temp_start;
+
+ /* Success, stop navigating through remaining System RAM ranges */
+ return 1;
+}
+
+static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
+{
+ struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+ unsigned long sz = end - start + 1;
+
+ /* Returning 0 will take to next memory range */
+ if (sz < kbuf->memsz)
+ return 0;
+
+ if (end < kbuf->buf_min || start > kbuf->buf_max)
+ return 0;
+
+ /*
+ * Allocate memory top down with-in ram range. Otherwise bottom up
+ * allocation.
+ */
+ if (kbuf->top_down)
+ return locate_mem_hole_top_down(start, end, kbuf);
+ return locate_mem_hole_bottom_up(start, end, kbuf);
+}
+
+/*
+ * Helper function for placing a buffer in a kexec segment. This assumes
+ * that kexec_mutex is held.
+ */
+int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
+ unsigned long memsz, unsigned long buf_align,
+ unsigned long buf_min, unsigned long buf_max,
+ bool top_down, unsigned long *load_addr)
+{
+
+ struct kexec_segment *ksegment;
+ struct kexec_buf buf, *kbuf;
+ int ret;
+
+ /* Currently adding segment this way is allowed only in file mode */
+ if (!image->file_mode)
+ return -EINVAL;
+
+ if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+ return -EINVAL;
+
+ /*
+ * Make sure we are not trying to add buffer after allocating
+ * control pages. All segments need to be placed first before
+ * any control pages are allocated. As control page allocation
+ * logic goes through list of segments to make sure there are
+ * no destination overlaps.
+ */
+ if (!list_empty(&image->control_pages)) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ memset(&buf, 0, sizeof(struct kexec_buf));
+ kbuf = &buf;
+ kbuf->image = image;
+ kbuf->buffer = buffer;
+ kbuf->bufsz = bufsz;
+
+ kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
+ kbuf->buf_align = max(buf_align, PAGE_SIZE);
+ kbuf->buf_min = buf_min;
+ kbuf->buf_max = buf_max;
+ kbuf->top_down = top_down;
+
+ /* Walk the RAM ranges and allocate a suitable range for the buffer */
+ if (image->type == KEXEC_TYPE_CRASH)
+ ret = walk_iomem_res("Crash kernel",
+ IORESOURCE_MEM | IORESOURCE_BUSY,
+ crashk_res.start, crashk_res.end, kbuf,
+ locate_mem_hole_callback);
+ else
+ ret = walk_system_ram_res(0, -1, kbuf,
+ locate_mem_hole_callback);
+ if (ret != 1) {
+ /* A suitable memory range could not be found for buffer */
+ return -EADDRNOTAVAIL;
+ }
+
+ /* Found a suitable memory range */
+ ksegment = &image->segment[image->nr_segments];
+ ksegment->kbuf = kbuf->buffer;
+ ksegment->bufsz = kbuf->bufsz;
+ ksegment->mem = kbuf->mem;
+ ksegment->memsz = kbuf->memsz;
+ image->nr_segments++;
+ *load_addr = ksegment->mem;
+ return 0;
+}
+
+/* Calculate and store the digest of segments */
+static int kexec_calculate_store_digests(struct kimage *image)
+{
+ struct crypto_shash *tfm;
+ struct shash_desc *desc;
+ int ret = 0, i, j, zero_buf_sz, sha_region_sz;
+ size_t desc_size, nullsz;
+ char *digest;
+ void *zero_buf;
+ struct kexec_sha_region *sha_regions;
+ struct purgatory_info *pi = &image->purgatory_info;
+
+ zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
+ zero_buf_sz = PAGE_SIZE;
+
+ tfm = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto out;
+ }
+
+ desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
+ desc = kzalloc(desc_size, GFP_KERNEL);
+ if (!desc) {
+ ret = -ENOMEM;
+ goto out_free_tfm;
+ }
+
+ sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
+ sha_regions = vzalloc(sha_region_sz);
+ if (!sha_regions)
+ goto out_free_desc;
+
+ desc->tfm = tfm;
+ desc->flags = 0;
+
+ ret = crypto_shash_init(desc);
+ if (ret < 0)
+ goto out_free_sha_regions;
+
+ digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
+ if (!digest) {
+ ret = -ENOMEM;
+ goto out_free_sha_regions;
+ }
+
+ for (j = i = 0; i < image->nr_segments; i++) {
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[i];
+ /*
+ * Skip purgatory as it will be modified once we put digest
+ * info in purgatory.
+ */
+ if (ksegment->kbuf == pi->purgatory_buf)
+ continue;
+
+ ret = crypto_shash_update(desc, ksegment->kbuf,
+ ksegment->bufsz);
+ if (ret)
+ break;
+
+ /*
+ * Assume rest of the buffer is filled with zero and
+ * update digest accordingly.
+ */
+ nullsz = ksegment->memsz - ksegment->bufsz;
+ while (nullsz) {
+ unsigned long bytes = nullsz;
+
+ if (bytes > zero_buf_sz)
+ bytes = zero_buf_sz;
+ ret = crypto_shash_update(desc, zero_buf, bytes);
+ if (ret)
+ break;
+ nullsz -= bytes;
+ }
+
+ if (ret)
+ break;
+
+ sha_regions[j].start = ksegment->mem;
+ sha_regions[j].len = ksegment->memsz;
+ j++;
+ }
+
+ if (!ret) {
+ ret = crypto_shash_final(desc, digest);
+ if (ret)
+ goto out_free_digest;
+ ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
+ sha_regions, sha_region_sz, 0);
+ if (ret)
+ goto out_free_digest;
+
+ ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
+ digest, SHA256_DIGEST_SIZE, 0);
+ if (ret)
+ goto out_free_digest;
+ }
+
+out_free_digest:
+ kfree(digest);
+out_free_sha_regions:
+ vfree(sha_regions);
+out_free_desc:
+ kfree(desc);
+out_free_tfm:
+ kfree(tfm);
+out:
+ return ret;
+}
+
+/* Actually load purgatory. Lot of code taken from kexec-tools */
+static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
+ unsigned long max, int top_down)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+ unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
+ unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
+ unsigned char *buf_addr, *src;
+ int i, ret = 0, entry_sidx = -1;
+ const Elf_Shdr *sechdrs_c;
+ Elf_Shdr *sechdrs = NULL;
+ void *purgatory_buf = NULL;
+
+ /*
+ * sechdrs_c points to section headers in purgatory and are read
+ * only. No modifications allowed.
+ */
+ sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
+
+ /*
+ * We can not modify sechdrs_c[] and its fields. It is read only.
+ * Copy it over to a local copy where one can store some temporary
+ * data and free it at the end. We need to modify ->sh_addr and
+ * ->sh_offset fields to keep track of permanent and temporary
+ * locations of sections.
+ */
+ sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+ if (!sechdrs)
+ return -ENOMEM;
+
+ memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+
+ /*
+ * We seem to have multiple copies of sections. First copy is which
+ * is embedded in kernel in read only section. Some of these sections
+ * will be copied to a temporary buffer and relocated. And these
+ * sections will finally be copied to their final destination at
+ * segment load time.
+ *
+ * Use ->sh_offset to reflect section address in memory. It will
+ * point to original read only copy if section is not allocatable.
+ * Otherwise it will point to temporary copy which will be relocated.
+ *
+ * Use ->sh_addr to contain final address of the section where it
+ * will go during execution time.
+ */
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_NOBITS)
+ continue;
+
+ sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
+ sechdrs[i].sh_offset;
+ }
+
+ /*
+ * Identify entry point section and make entry relative to section
+ * start.
+ */
+ entry = pi->ehdr->e_entry;
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
+ continue;
+
+ /* Make entry section relative */
+ if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
+ ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
+ pi->ehdr->e_entry)) {
+ entry_sidx = i;
+ entry -= sechdrs[i].sh_addr;
+ break;
+ }
+ }
+
+ /* Determine how much memory is needed to load relocatable object. */
+ buf_align = 1;
+ bss_align = 1;
+ buf_sz = 0;
+ bss_sz = 0;
+
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ align = sechdrs[i].sh_addralign;
+ if (sechdrs[i].sh_type != SHT_NOBITS) {
+ if (buf_align < align)
+ buf_align = align;
+ buf_sz = ALIGN(buf_sz, align);
+ buf_sz += sechdrs[i].sh_size;
+ } else {
+ /* bss section */
+ if (bss_align < align)
+ bss_align = align;
+ bss_sz = ALIGN(bss_sz, align);
+ bss_sz += sechdrs[i].sh_size;
+ }
+ }
+
+ /* Determine the bss padding required to align bss properly */
+ bss_pad = 0;
+ if (buf_sz & (bss_align - 1))
+ bss_pad = bss_align - (buf_sz & (bss_align - 1));
+
+ memsz = buf_sz + bss_pad + bss_sz;
+
+ /* Allocate buffer for purgatory */
+ purgatory_buf = vzalloc(buf_sz);
+ if (!purgatory_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (buf_align < bss_align)
+ buf_align = bss_align;
+
+ /* Add buffer to segment list */
+ ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
+ buf_align, min, max, top_down,
+ &pi->purgatory_load_addr);
+ if (ret)
+ goto out;
+
+ /* Load SHF_ALLOC sections */
+ buf_addr = purgatory_buf;
+ load_addr = curr_load_addr = pi->purgatory_load_addr;
+ bss_addr = load_addr + buf_sz + bss_pad;
+
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ align = sechdrs[i].sh_addralign;
+ if (sechdrs[i].sh_type != SHT_NOBITS) {
+ curr_load_addr = ALIGN(curr_load_addr, align);
+ offset = curr_load_addr - load_addr;
+ /* We already modifed ->sh_offset to keep src addr */
+ src = (char *) sechdrs[i].sh_offset;
+ memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
+
+ /* Store load address and source address of section */
+ sechdrs[i].sh_addr = curr_load_addr;
+
+ /*
+ * This section got copied to temporary buffer. Update
+ * ->sh_offset accordingly.
+ */
+ sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
+
+ /* Advance to the next address */
+ curr_load_addr += sechdrs[i].sh_size;
+ } else {
+ bss_addr = ALIGN(bss_addr, align);
+ sechdrs[i].sh_addr = bss_addr;
+ bss_addr += sechdrs[i].sh_size;
+ }
+ }
+
+ /* Update entry point based on load address of text section */
+ if (entry_sidx >= 0)
+ entry += sechdrs[entry_sidx].sh_addr;
+
+ /* Make kernel jump to purgatory after shutdown */
+ image->start = entry;
+
+ /* Used later to get/set symbol values */
+ pi->sechdrs = sechdrs;
+
+ /*
+ * Used later to identify which section is purgatory and skip it
+ * from checksumming.
+ */
+ pi->purgatory_buf = purgatory_buf;
+ return ret;
+out:
+ vfree(sechdrs);
+ vfree(purgatory_buf);
+ return ret;
+}
+
+static int kexec_apply_relocations(struct kimage *image)
+{
+ int i, ret;
+ struct purgatory_info *pi = &image->purgatory_info;
+ Elf_Shdr *sechdrs = pi->sechdrs;
+
+ /* Apply relocations */
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ Elf_Shdr *section, *symtab;
+
+ if (sechdrs[i].sh_type != SHT_RELA &&
+ sechdrs[i].sh_type != SHT_REL)
+ continue;
+
+ /*
+ * For section of type SHT_RELA/SHT_REL,
+ * ->sh_link contains section header index of associated
+ * symbol table. And ->sh_info contains section header
+ * index of section to which relocations apply.
+ */
+ if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
+ sechdrs[i].sh_link >= pi->ehdr->e_shnum)
+ return -ENOEXEC;
+
+ section = &sechdrs[sechdrs[i].sh_info];
+ symtab = &sechdrs[sechdrs[i].sh_link];
+
+ if (!(section->sh_flags & SHF_ALLOC))
+ continue;
+
+ /*
+ * symtab->sh_link contain section header index of associated
+ * string table.
+ */
+ if (symtab->sh_link >= pi->ehdr->e_shnum)
+ /* Invalid section number? */
+ continue;
+
+ /*
+ * Respective architecture needs to provide support for applying
+ * relocations of type SHT_RELA/SHT_REL.
+ */
+ if (sechdrs[i].sh_type == SHT_RELA)
+ ret = arch_kexec_apply_relocations_add(pi->ehdr,
+ sechdrs, i);
+ else if (sechdrs[i].sh_type == SHT_REL)
+ ret = arch_kexec_apply_relocations(pi->ehdr,
+ sechdrs, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Load relocatable purgatory object and relocate it appropriately */
+int kexec_load_purgatory(struct kimage *image, unsigned long min,
+ unsigned long max, int top_down,
+ unsigned long *load_addr)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+ int ret;
+
+ if (kexec_purgatory_size <= 0)
+ return -EINVAL;
+
+ if (kexec_purgatory_size < sizeof(Elf_Ehdr))
+ return -ENOEXEC;
+
+ pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
+
+ if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
+ || pi->ehdr->e_type != ET_REL
+ || !elf_check_arch(pi->ehdr)
+ || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
+ return -ENOEXEC;
+
+ if (pi->ehdr->e_shoff >= kexec_purgatory_size
+ || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
+ kexec_purgatory_size - pi->ehdr->e_shoff))
+ return -ENOEXEC;
+
+ ret = __kexec_load_purgatory(image, min, max, top_down);
+ if (ret)
+ return ret;
+
+ ret = kexec_apply_relocations(image);
+ if (ret)
+ goto out;
+
+ *load_addr = pi->purgatory_load_addr;
+ return 0;
+out:
+ vfree(pi->sechdrs);
+ vfree(pi->purgatory_buf);
+ return ret;
+}
+
+static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
+ const char *name)
+{
+ Elf_Sym *syms;
+ Elf_Shdr *sechdrs;
+ Elf_Ehdr *ehdr;
+ int i, k;
+ const char *strtab;
+
+ if (!pi->sechdrs || !pi->ehdr)
+ return NULL;
+
+ sechdrs = pi->sechdrs;
+ ehdr = pi->ehdr;
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type != SHT_SYMTAB)
+ continue;
+
+ if (sechdrs[i].sh_link >= ehdr->e_shnum)
+ /* Invalid strtab section number */
+ continue;
+ strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
+ syms = (Elf_Sym *)sechdrs[i].sh_offset;
+
+ /* Go through symbols for a match */
+ for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
+ if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
+ continue;
+
+ if (strcmp(strtab + syms[k].st_name, name) != 0)
+ continue;
+
+ if (syms[k].st_shndx == SHN_UNDEF ||
+ syms[k].st_shndx >= ehdr->e_shnum) {
+ pr_debug("Symbol: %s has bad section index %d.\n",
+ name, syms[k].st_shndx);
+ return NULL;
+ }
+
+ /* Found the symbol we are looking for */
+ return &syms[k];
+ }
+ }
+
+ return NULL;
+}
+
+void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+ Elf_Sym *sym;
+ Elf_Shdr *sechdr;
+
+ sym = kexec_purgatory_find_symbol(pi, name);
+ if (!sym)
+ return ERR_PTR(-EINVAL);
+
+ sechdr = &pi->sechdrs[sym->st_shndx];
+
+ /*
+ * Returns the address where symbol will finally be loaded after
+ * kexec_load_segment()
+ */
+ return (void *)(sechdr->sh_addr + sym->st_value);
+}
+
+/*
+ * Get or set value of a symbol. If "get_value" is true, symbol value is
+ * returned in buf otherwise symbol value is set based on value in buf.
+ */
+int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
+ void *buf, unsigned int size, bool get_value)
+{
+ Elf_Sym *sym;
+ Elf_Shdr *sechdrs;
+ struct purgatory_info *pi = &image->purgatory_info;
+ char *sym_buf;
+
+ sym = kexec_purgatory_find_symbol(pi, name);
+ if (!sym)
+ return -EINVAL;
+
+ if (sym->st_size != size) {
+ pr_err("symbol %s size mismatch: expected %lu actual %u\n",
+ name, (unsigned long)sym->st_size, size);
+ return -EINVAL;
+ }
+
+ sechdrs = pi->sechdrs;
+
+ if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
+ pr_err("symbol %s is in a bss section. Cannot %s\n", name,
+ get_value ? "get" : "set");
+ return -EINVAL;
+ }
+
+ sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
+ sym->st_value;
+
+ if (get_value)
+ memcpy((void *)buf, sym_buf, size);
+ else
+ memcpy((void *)sym_buf, buf, size);
+
+ return 0;
+}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
new file mode 100644
index 0000000..e4392a6
--- /dev/null
+++ b/kernel/kexec_internal.h
@@ -0,0 +1,22 @@
+#ifndef LINUX_KEXEC_INTERNAL_H
+#define LINUX_KEXEC_INTERNAL_H
+
+#include <linux/kexec.h>
+
+struct kimage *do_kimage_alloc_init(void);
+int sanity_check_segment_list(struct kimage *image);
+void kimage_free_page_list(struct list_head *list);
+void kimage_free(struct kimage *image);
+int kimage_load_segment(struct kimage *image, struct kexec_segment *segment);
+void kimage_terminate(struct kimage *image);
+int kimage_is_destination_range(struct kimage *image,
+ unsigned long start, unsigned long end);
+
+extern struct mutex kexec_mutex;
+
+#ifdef CONFIG_KEXEC_FILE
+void kimage_file_post_load_cleanup(struct kimage *image);
+#else /* CONFIG_KEXEC_FILE */
+static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
+#endif /* CONFIG_KEXEC_FILE */
+#endif /* LINUX_KEXEC_INTERNAL_H */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 2777f40..da98d05 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -45,8 +45,6 @@
extern int max_threads;
-static struct workqueue_struct *khelper_wq;
-
#define CAP_BSET (void *)1
#define CAP_PI (void *)2
@@ -114,10 +112,11 @@
* @...: arguments as specified in the format string
*
* Load a module using the user mode module loader. The function returns
- * zero on success or a negative errno code on failure. Note that a
- * successful module load does not mean the module did not then unload
- * and exit on an error of its own. Callers must check that the service
- * they requested is now available not blindly invoke it.
+ * zero on success or a negative errno code or positive exit code from
+ * "modprobe" on failure. Note that a successful module load does not mean
+ * the module did not then unload and exit on an error of its own. Callers
+ * must check that the service they requested is now available not blindly
+ * invoke it.
*
* If module auto-loading support is disabled then this function
* becomes a no-operation.
@@ -213,7 +212,7 @@
/*
* This is the task which runs the usermode application
*/
-static int ____call_usermodehelper(void *data)
+static int call_usermodehelper_exec_async(void *data)
{
struct subprocess_info *sub_info = data;
struct cred *new;
@@ -223,12 +222,9 @@
flush_signal_handlers(current, 1);
spin_unlock_irq(¤t->sighand->siglock);
- /* We can run anywhere, unlike our parent keventd(). */
- set_cpus_allowed_ptr(current, cpu_all_mask);
-
/*
- * Our parent is keventd, which runs with elevated scheduling priority.
- * Avoid propagating that into the userspace child.
+ * Our parent (unbound workqueue) runs with elevated scheduling
+ * priority. Avoid propagating that into the userspace child.
*/
set_user_nice(current, 0);
@@ -258,7 +254,10 @@
(const char __user *const __user *)sub_info->envp);
out:
sub_info->retval = retval;
- /* wait_for_helper() will call umh_complete if UHM_WAIT_PROC. */
+ /*
+ * call_usermodehelper_exec_sync() will call umh_complete
+ * if UHM_WAIT_PROC.
+ */
if (!(sub_info->wait & UMH_WAIT_PROC))
umh_complete(sub_info);
if (!retval)
@@ -266,15 +265,14 @@
do_exit(0);
}
-/* Keventd can't block, but this (a child) can. */
-static int wait_for_helper(void *data)
+/* Handles UMH_WAIT_PROC. */
+static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
{
- struct subprocess_info *sub_info = data;
pid_t pid;
/* If SIGCLD is ignored sys_wait4 won't populate the status. */
kernel_sigaction(SIGCHLD, SIG_DFL);
- pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
+ pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
if (pid < 0) {
sub_info->retval = pid;
} else {
@@ -282,44 +280,60 @@
/*
* Normally it is bogus to call wait4() from in-kernel because
* wait4() wants to write the exit code to a userspace address.
- * But wait_for_helper() always runs as keventd, and put_user()
- * to a kernel address works OK for kernel threads, due to their
- * having an mm_segment_t which spans the entire address space.
+ * But call_usermodehelper_exec_sync() always runs as kernel
+ * thread (workqueue) and put_user() to a kernel address works
+ * OK for kernel threads, due to their having an mm_segment_t
+ * which spans the entire address space.
*
* Thus the __user pointer cast is valid here.
*/
sys_wait4(pid, (int __user *)&ret, 0, NULL);
/*
- * If ret is 0, either ____call_usermodehelper failed and the
- * real error code is already in sub_info->retval or
+ * If ret is 0, either call_usermodehelper_exec_async failed and
+ * the real error code is already in sub_info->retval or
* sub_info->retval is 0 anyway, so don't mess with it then.
*/
if (ret)
sub_info->retval = ret;
}
+ /* Restore default kernel sig handler */
+ kernel_sigaction(SIGCHLD, SIG_IGN);
+
umh_complete(sub_info);
- do_exit(0);
}
-/* This is run by khelper thread */
-static void __call_usermodehelper(struct work_struct *work)
+/*
+ * We need to create the usermodehelper kernel thread from a task that is affine
+ * to an optimized set of CPUs (or nohz housekeeping ones) such that they
+ * inherit a widest affinity irrespective of call_usermodehelper() callers with
+ * possibly reduced affinity (eg: per-cpu workqueues). We don't want
+ * usermodehelper targets to contend a busy CPU.
+ *
+ * Unbound workqueues provide such wide affinity and allow to block on
+ * UMH_WAIT_PROC requests without blocking pending request (up to some limit).
+ *
+ * Besides, workqueues provide the privilege level that caller might not have
+ * to perform the usermodehelper request.
+ *
+ */
+static void call_usermodehelper_exec_work(struct work_struct *work)
{
struct subprocess_info *sub_info =
container_of(work, struct subprocess_info, work);
- pid_t pid;
- if (sub_info->wait & UMH_WAIT_PROC)
- pid = kernel_thread(wait_for_helper, sub_info,
- CLONE_FS | CLONE_FILES | SIGCHLD);
- else
- pid = kernel_thread(____call_usermodehelper, sub_info,
+ if (sub_info->wait & UMH_WAIT_PROC) {
+ call_usermodehelper_exec_sync(sub_info);
+ } else {
+ pid_t pid;
+
+ pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
SIGCHLD);
-
- if (pid < 0) {
- sub_info->retval = pid;
- umh_complete(sub_info);
+ if (pid < 0) {
+ sub_info->retval = pid;
+ umh_complete(sub_info);
+ }
}
}
@@ -509,7 +523,7 @@
if (!sub_info)
goto out;
- INIT_WORK(&sub_info->work, __call_usermodehelper);
+ INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
sub_info->path = path;
sub_info->argv = argv;
sub_info->envp = envp;
@@ -531,8 +545,8 @@
* from interrupt context.
*
* Runs a user-space application. The application is started
- * asynchronously if wait is not set, and runs as a child of keventd.
- * (ie. it runs with full root capabilities).
+ * asynchronously if wait is not set, and runs as a child of system workqueues.
+ * (ie. it runs with full root capabilities and optimized affinity).
*/
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
{
@@ -544,7 +558,7 @@
return -EINVAL;
}
helper_lock();
- if (!khelper_wq || usermodehelper_disabled) {
+ if (usermodehelper_disabled) {
retval = -EBUSY;
goto out;
}
@@ -556,7 +570,7 @@
sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
sub_info->wait = wait;
- queue_work(khelper_wq, &sub_info->work);
+ queue_work(system_unbound_wq, &sub_info->work);
if (wait == UMH_NO_WAIT) /* task has freed sub_info */
goto unlock;
@@ -686,9 +700,3 @@
},
{ }
};
-
-void __init usermodehelper_init(void)
-{
- khelper_wq = create_singlethread_workqueue("khelper");
- BUG_ON(!khelper_wq);
-}
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 6683cce..e83b264 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -90,7 +90,7 @@
KERNEL_ATTR_RW(profiling);
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
static ssize_t kexec_loaded_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -134,7 +134,7 @@
}
KERNEL_ATTR_RO(vmcoreinfo);
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
/* whether file capabilities are enabled */
static ssize_t fscaps_show(struct kobject *kobj,
@@ -196,7 +196,7 @@
#ifdef CONFIG_PROFILING
&profiling_attr.attr,
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
&kexec_loaded_attr.attr,
&kexec_crash_loaded_attr.attr,
&kexec_crash_size_attr.attr,
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 337c881..87e9ce6a 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -289,7 +289,7 @@
if (pv_enabled())
goto queue;
- if (virt_queued_spin_lock(lock))
+ if (virt_spin_lock(lock))
return;
/*
diff --git a/kernel/membarrier.c b/kernel/membarrier.c
new file mode 100644
index 0000000..536c727
--- /dev/null
+++ b/kernel/membarrier.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * membarrier system call
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/syscalls.h>
+#include <linux/membarrier.h>
+
+/*
+ * Bitmask made from a "or" of all commands within enum membarrier_cmd,
+ * except MEMBARRIER_CMD_QUERY.
+ */
+#define MEMBARRIER_CMD_BITMASK (MEMBARRIER_CMD_SHARED)
+
+/**
+ * sys_membarrier - issue memory barriers on a set of threads
+ * @cmd: Takes command values defined in enum membarrier_cmd.
+ * @flags: Currently needs to be 0. For future extensions.
+ *
+ * If this system call is not implemented, -ENOSYS is returned. If the
+ * command specified does not exist, or if the command argument is invalid,
+ * this system call returns -EINVAL. For a given command, with flags argument
+ * set to 0, this system call is guaranteed to always return the same value
+ * until reboot.
+ *
+ * All memory accesses performed in program order from each targeted thread
+ * is guaranteed to be ordered with respect to sys_membarrier(). If we use
+ * the semantic "barrier()" to represent a compiler barrier forcing memory
+ * accesses to be performed in program order across the barrier, and
+ * smp_mb() to represent explicit memory barriers forcing full memory
+ * ordering across the barrier, we have the following ordering table for
+ * each pair of barrier(), sys_membarrier() and smp_mb():
+ *
+ * The pair ordering is detailed as (O: ordered, X: not ordered):
+ *
+ * barrier() smp_mb() sys_membarrier()
+ * barrier() X X O
+ * smp_mb() X O O
+ * sys_membarrier() O O O
+ */
+SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
+{
+ if (unlikely(flags))
+ return -EINVAL;
+ switch (cmd) {
+ case MEMBARRIER_CMD_QUERY:
+ return MEMBARRIER_CMD_BITMASK;
+ case MEMBARRIER_CMD_SHARED:
+ if (num_online_cpus() > 1)
+ synchronize_sched();
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index cf8c242..8f0324e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -835,7 +835,7 @@
.release = devkmsg_release,
};
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* This appends the listed symbols to /proc/vmcore
*
diff --git a/kernel/reboot.c b/kernel/reboot.c
index d20c85d..bd30a97 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -346,7 +346,7 @@
kernel_restart(buffer);
break;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
case LINUX_REBOOT_CMD_KEXEC:
ret = kernel_kexec();
break;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3595403..97d276f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -621,18 +621,21 @@
int i, cpu = smp_processor_id();
struct sched_domain *sd;
- if (!idle_cpu(cpu))
+ if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
return cpu;
rcu_read_lock();
for_each_domain(cpu, sd) {
for_each_cpu(i, sched_domain_span(sd)) {
- if (!idle_cpu(i)) {
+ if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
cpu = i;
goto unlock;
}
}
}
+
+ if (!is_housekeeping_cpu(cpu))
+ cpu = housekeeping_any_cpu();
unlock:
rcu_read_unlock();
return cpu;
@@ -5178,24 +5181,47 @@
break;
/*
- * Ensure rq->lock covers the entire task selection
- * until the migration.
+ * pick_next_task assumes pinned rq->lock.
*/
lockdep_pin_lock(&rq->lock);
next = pick_next_task(rq, &fake_task);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);
+ /*
+ * Rules for changing task_struct::cpus_allowed are holding
+ * both pi_lock and rq->lock, such that holding either
+ * stabilizes the mask.
+ *
+ * Drop rq->lock is not quite as disastrous as it usually is
+ * because !cpu_active at this point, which means load-balance
+ * will not interfere. Also, stop-machine.
+ */
+ lockdep_unpin_lock(&rq->lock);
+ raw_spin_unlock(&rq->lock);
+ raw_spin_lock(&next->pi_lock);
+ raw_spin_lock(&rq->lock);
+
+ /*
+ * Since we're inside stop-machine, _nothing_ should have
+ * changed the task, WARN if weird stuff happened, because in
+ * that case the above rq->lock drop is a fail too.
+ */
+ if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
+ raw_spin_unlock(&next->pi_lock);
+ continue;
+ }
+
/* Find suitable destination for @next, with force if needed. */
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
- lockdep_unpin_lock(&rq->lock);
rq = __migrate_task(rq, next, dest_cpu);
if (rq != dead_rq) {
raw_spin_unlock(&rq->lock);
rq = dead_rq;
raw_spin_lock(&rq->lock);
}
+ raw_spin_unlock(&next->pi_lock);
}
rq->stop = stop;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 03c3875..a02decf 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -245,3 +245,6 @@
/* execveat */
cond_syscall(sys_execveat);
+
+/* membarrier */
+cond_syscall(sys_membarrier);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 19b62b5..e69201d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -621,7 +621,7 @@
.proc_handler = proc_dointvec,
},
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
{
.procname = "kexec_load_disabled",
.data = &kexec_load_disabled,
@@ -1995,7 +1995,7 @@
int val = *valp;
if (val < 0) {
*negp = true;
- *lvalp = (unsigned long)-val;
+ *lvalp = -(unsigned long)val;
} else {
*negp = false;
*lvalp = (unsigned long)val;
@@ -2201,7 +2201,7 @@
int val = *valp;
if (val < 0) {
*negp = true;
- *lvalp = (unsigned long)-val;
+ *lvalp = -(unsigned long)val;
} else {
*negp = false;
*lvalp = (unsigned long)val;
@@ -2436,7 +2436,7 @@
unsigned long lval;
if (val < 0) {
*negp = true;
- lval = (unsigned long)-val;
+ lval = -(unsigned long)val;
} else {
*negp = false;
lval = (unsigned long)val;
@@ -2459,7 +2459,7 @@
unsigned long lval;
if (val < 0) {
*negp = true;
- lval = (unsigned long)-val;
+ lval = -(unsigned long)val;
} else {
*negp = false;
lval = (unsigned long)val;
@@ -2484,7 +2484,7 @@
unsigned long lval;
if (val < 0) {
*negp = true;
- lval = (unsigned long)-val;
+ lval = -(unsigned long)val;
} else {
*negp = false;
lval = (unsigned long)val;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 50eb107..a9b76a4 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -97,20 +97,6 @@
static int __clockevents_switch_state(struct clock_event_device *dev,
enum clock_event_state state)
{
- /* Transition with legacy set_mode() callback */
- if (dev->set_mode) {
- /* Legacy callback doesn't support new modes */
- if (state > CLOCK_EVT_STATE_ONESHOT)
- return -ENOSYS;
- /*
- * 'clock_event_state' and 'clock_event_mode' have 1-to-1
- * mapping until *_ONESHOT, and so a simple cast will work.
- */
- dev->set_mode((enum clock_event_mode)state, dev);
- dev->mode = (enum clock_event_mode)state;
- return 0;
- }
-
if (dev->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
@@ -204,12 +190,8 @@
{
int ret = 0;
- if (dev->set_mode) {
- dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
- dev->mode = CLOCK_EVT_MODE_RESUME;
- } else if (dev->tick_resume) {
+ if (dev->tick_resume)
ret = dev->tick_resume(dev);
- }
return ret;
}
@@ -460,26 +442,6 @@
}
EXPORT_SYMBOL_GPL(clockevents_unbind_device);
-/* Sanity check of state transition callbacks */
-static int clockevents_sanity_check(struct clock_event_device *dev)
-{
- /* Legacy set_mode() callback */
- if (dev->set_mode) {
- /* We shouldn't be supporting new modes now */
- WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
- dev->set_state_shutdown || dev->tick_resume ||
- dev->set_state_oneshot_stopped);
-
- BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
- return 0;
- }
-
- if (dev->features & CLOCK_EVT_FEAT_DUMMY)
- return 0;
-
- return 0;
-}
-
/**
* clockevents_register_device - register a clock event device
* @dev: device to register
@@ -488,8 +450,6 @@
{
unsigned long flags;
- BUG_ON(clockevents_sanity_check(dev));
-
/* Initialize state to DETACHED */
clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index d11c55b..4fcd99e 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -398,7 +398,6 @@
* the set mode function!
*/
clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
- dev->mode = CLOCK_EVT_MODE_UNUSED;
clockevents_exchange_device(dev, NULL);
dev->event_handler = clockevents_handle_noop;
td->evtdev = NULL;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3319e16..7c7ec451 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -290,16 +290,17 @@
__setup("nohz_full=", tick_nohz_full_setup);
static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+ unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DOWN_PREPARE:
/*
- * If we handle the timekeeping duty for full dynticks CPUs,
- * we can't safely shutdown that CPU.
+ * The boot CPU handles housekeeping duty (unbound timers,
+ * workqueues, timekeeping, ...) on behalf of full dynticks
+ * CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
return NOTIFY_BAD;
@@ -370,6 +371,12 @@
cpu_notifier(tick_nohz_cpu_down_callback, 0);
pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
cpumask_pr_args(tick_nohz_full_mask));
+
+ /*
+ * We need at least one CPU to handle housekeeping work such
+ * as timekeeping, unbound timers, workqueues, ...
+ */
+ WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
}
#endif
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f6ee2e6..3739ac6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1614,7 +1614,7 @@
negative = (tick_error < 0);
/* Sort out the magnitude of the correction */
- tick_error = abs(tick_error);
+ tick_error = abs64(tick_error);
for (adj = 0; tick_error > interval; adj++)
tick_error >>= 1;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 129c960..f75e35b 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -225,7 +225,7 @@
(unsigned long long) dev->min_delta_ns);
SEQ_printf(m, " mult: %u\n", dev->mult);
SEQ_printf(m, " shift: %u\n", dev->shift);
- SEQ_printf(m, " mode: %d\n", dev->mode);
+ SEQ_printf(m, " mode: %d\n", clockevent_get_state(dev));
SEQ_printf(m, " next_event: %Ld nsecs\n",
(unsigned long long) ktime_to_ns(dev->next_event));
@@ -233,40 +233,34 @@
print_name_offset(m, dev->set_next_event);
SEQ_printf(m, "\n");
- if (dev->set_mode) {
- SEQ_printf(m, " set_mode: ");
- print_name_offset(m, dev->set_mode);
+ if (dev->set_state_shutdown) {
+ SEQ_printf(m, " shutdown: ");
+ print_name_offset(m, dev->set_state_shutdown);
SEQ_printf(m, "\n");
- } else {
- if (dev->set_state_shutdown) {
- SEQ_printf(m, " shutdown: ");
- print_name_offset(m, dev->set_state_shutdown);
- SEQ_printf(m, "\n");
- }
+ }
- if (dev->set_state_periodic) {
- SEQ_printf(m, " periodic: ");
- print_name_offset(m, dev->set_state_periodic);
- SEQ_printf(m, "\n");
- }
+ if (dev->set_state_periodic) {
+ SEQ_printf(m, " periodic: ");
+ print_name_offset(m, dev->set_state_periodic);
+ SEQ_printf(m, "\n");
+ }
- if (dev->set_state_oneshot) {
- SEQ_printf(m, " oneshot: ");
- print_name_offset(m, dev->set_state_oneshot);
- SEQ_printf(m, "\n");
- }
+ if (dev->set_state_oneshot) {
+ SEQ_printf(m, " oneshot: ");
+ print_name_offset(m, dev->set_state_oneshot);
+ SEQ_printf(m, "\n");
+ }
- if (dev->set_state_oneshot_stopped) {
- SEQ_printf(m, " oneshot stopped: ");
- print_name_offset(m, dev->set_state_oneshot_stopped);
- SEQ_printf(m, "\n");
- }
+ if (dev->set_state_oneshot_stopped) {
+ SEQ_printf(m, " oneshot stopped: ");
+ print_name_offset(m, dev->set_state_oneshot_stopped);
+ SEQ_printf(m, "\n");
+ }
- if (dev->tick_resume) {
- SEQ_printf(m, " resume: ");
- print_name_offset(m, dev->tick_resume);
- SEQ_printf(m, "\n");
- }
+ if (dev->tick_resume) {
+ SEQ_printf(m, " resume: ");
+ print_name_offset(m, dev->tick_resume);
+ SEQ_printf(m, "\n");
}
SEQ_printf(m, " event_handler: ");
diff --git a/lib/bitmap.c b/lib/bitmap.c
index a578a01..8148143 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -367,7 +367,8 @@
nchunks = nbits = totaldigits = c = 0;
do {
- chunk = ndigits = 0;
+ chunk = 0;
+ ndigits = totaldigits;
/* Get the next chunk of the bitmap */
while (buflen) {
@@ -406,9 +407,9 @@
return -EOVERFLOW;
chunk = (chunk << 4) | hex_to_bin(c);
- ndigits++; totaldigits++;
+ totaldigits++;
}
- if (ndigits == 0)
+ if (ndigits == totaldigits)
return -EINVAL;
if (nchunks == 0 && chunk == 0)
continue;
@@ -505,7 +506,7 @@
int nmaskbits)
{
unsigned a, b;
- int c, old_c, totaldigits;
+ int c, old_c, totaldigits, ndigits;
const char __user __force *ubuf = (const char __user __force *)buf;
int at_start, in_range;
@@ -515,6 +516,7 @@
at_start = 1;
in_range = 0;
a = b = 0;
+ ndigits = totaldigits;
/* Get the next cpu# or a range of cpu#'s */
while (buflen) {
@@ -528,23 +530,27 @@
if (isspace(c))
continue;
- /*
- * If the last character was a space and the current
- * character isn't '\0', we've got embedded whitespace.
- * This is a no-no, so throw an error.
- */
- if (totaldigits && c && isspace(old_c))
- return -EINVAL;
-
/* A '\0' or a ',' signal the end of a cpu# or range */
if (c == '\0' || c == ',')
break;
+ /*
+ * whitespaces between digits are not allowed,
+ * but it's ok if whitespaces are on head or tail.
+ * when old_c is whilespace,
+ * if totaldigits == ndigits, whitespace is on head.
+ * if whitespace is on tail, it should not run here.
+ * as c was ',' or '\0',
+ * the last code line has broken the current loop.
+ */
+ if ((totaldigits != ndigits) && isspace(old_c))
+ return -EINVAL;
if (c == '-') {
if (at_start || in_range)
return -EINVAL;
b = 0;
in_range = 1;
+ at_start = 1;
continue;
}
@@ -557,15 +563,18 @@
at_start = 0;
totaldigits++;
}
+ if (ndigits == totaldigits)
+ continue;
+ /* if no digit is after '-', it's wrong*/
+ if (at_start && in_range)
+ return -EINVAL;
if (!(a <= b))
return -EINVAL;
if (b >= nmaskbits)
return -ERANGE;
- if (!at_start) {
- while (a <= b) {
- set_bit(a, maskp);
- a++;
- }
+ while (a <= b) {
+ set_bit(a, maskp);
+ a++;
}
} while (buflen && c == ',');
return 0;
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 6dd0335..0234361 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -743,12 +743,12 @@
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, long len,
+STATIC int INIT __decompress(unsigned char *buf, long len,
long (*fill)(void*, unsigned long),
long (*flush)(void*, unsigned long),
- unsigned char *outbuf,
+ unsigned char *outbuf, long olen,
long *pos,
- void(*error)(char *x))
+ void (*error)(char *x))
{
return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
}
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index d4c7891..555c06b 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -1,4 +1,5 @@
#ifdef STATIC
+#define PREBOOT
/* Pre-boot environment: included */
/* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots
@@ -33,23 +34,23 @@
}
/* Included from initramfs et al code */
-STATIC int INIT gunzip(unsigned char *buf, long len,
+STATIC int INIT __gunzip(unsigned char *buf, long len,
long (*fill)(void*, unsigned long),
long (*flush)(void*, unsigned long),
- unsigned char *out_buf,
+ unsigned char *out_buf, long out_len,
long *pos,
void(*error)(char *x)) {
u8 *zbuf;
struct z_stream_s *strm;
int rc;
- size_t out_len;
rc = -1;
if (flush) {
out_len = 0x8000; /* 32 K */
out_buf = malloc(out_len);
} else {
- out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */
+ if (!out_len)
+ out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */
}
if (!out_buf) {
error("Out of memory while allocating output buffer");
@@ -181,4 +182,24 @@
return rc; /* returns Z_OK (0) if successful */
}
-#define decompress gunzip
+#ifndef PREBOOT
+STATIC int INIT gunzip(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
+ unsigned char *out_buf,
+ long *pos,
+ void (*error)(char *x))
+{
+ return __gunzip(buf, len, fill, flush, out_buf, 0, pos, error);
+}
+#else
+STATIC int INIT __decompress(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
+ unsigned char *out_buf, long out_len,
+ long *pos,
+ void (*error)(char *x))
+{
+ return __gunzip(buf, len, fill, flush, out_buf, out_len, pos, error);
+}
+#endif
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 40f66eb..036fc88 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -196,12 +196,12 @@
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, long in_len,
+STATIC int INIT __decompress(unsigned char *buf, long in_len,
long (*fill)(void*, unsigned long),
long (*flush)(void*, unsigned long),
- unsigned char *output,
+ unsigned char *output, long out_len,
long *posp,
- void(*error)(char *x)
+ void (*error)(char *x)
)
{
return unlz4(buf, in_len - 4, fill, flush, output, posp, error);
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 0be83af..ed7a1fd 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -620,7 +620,7 @@
num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
p = (uint16_t *) large_malloc(num_probs * sizeof(*p));
- if (p == 0)
+ if (p == NULL)
goto exit_2;
num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
for (i = 0; i < num_probs; i++)
@@ -667,13 +667,12 @@
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, long in_len,
+STATIC int INIT __decompress(unsigned char *buf, long in_len,
long (*fill)(void*, unsigned long),
long (*flush)(void*, unsigned long),
- unsigned char *output,
+ unsigned char *output, long out_len,
long *posp,
- void(*error)(char *x)
- )
+ void (*error)(char *x))
{
return unlzma(buf, in_len - 4, fill, flush, output, posp, error);
}
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index b94a31b..f4c158e 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -31,6 +31,7 @@
*/
#ifdef STATIC
+#define PREBOOT
#include "lzo/lzo1x_decompress_safe.c"
#else
#include <linux/decompress/unlzo.h>
@@ -287,4 +288,14 @@
return ret;
}
-#define decompress unlzo
+#ifdef PREBOOT
+STATIC int INIT __decompress(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
+ unsigned char *out_buf, long olen,
+ long *pos,
+ void (*error)(char *x))
+{
+ return unlzo(buf, len, fill, flush, out_buf, pos, error);
+}
+#endif
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index b07a783..25d59a9 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -394,4 +394,14 @@
* This macro is used by architecture-specific files to decompress
* the kernel image.
*/
-#define decompress unxz
+#ifdef XZ_PREBOOT
+STATIC int INIT __decompress(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
+ unsigned char *out_buf, long olen,
+ long *pos,
+ void (*error)(char *x))
+{
+ return unxz(buf, len, fill, flush, out_buf, pos, error);
+}
+#endif
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index ec8da78..94be244 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -152,7 +152,7 @@
rv = _kstrtoull(s + 1, base, &tmp);
if (rv < 0)
return rv;
- if ((long long)(-tmp) >= 0)
+ if ((long long)-tmp > 0)
return -ERANGE;
*res = -tmp;
} else {
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index c98ae81..54036ce 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -410,7 +410,7 @@
* @dst: destination buffer (escaped)
* @osz: destination buffer size
* @flags: combination of the flags (bitwise OR):
- * %ESCAPE_SPACE:
+ * %ESCAPE_SPACE: (special white space, not space itself)
* '\f' - form feed
* '\n' - new line
* '\r' - carriage return
@@ -432,16 +432,18 @@
* all previous together
* %ESCAPE_HEX:
* '\xHH' - byte with hexadecimal value HH (2 digits)
- * @esc: NULL-terminated string of characters any of which, if found in
- * the source, has to be escaped
+ * @only: NULL-terminated string containing characters used to limit
+ * the selected escape class. If characters are included in @only
+ * that would not normally be escaped by the classes selected
+ * in @flags, they will be copied to @dst unescaped.
*
* Description:
* The process of escaping byte buffer includes several parts. They are applied
* in the following sequence.
* 1. The character is matched to the printable class, if asked, and in
* case of match it passes through to the output.
- * 2. The character is not matched to the one from @esc string and thus
- * must go as is to the output.
+ * 2. The character is not matched to the one from @only string and thus
+ * must go as-is to the output.
* 3. The character is checked if it falls into the class given by @flags.
* %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
* character. Note that they actually can't go together, otherwise
@@ -458,11 +460,11 @@
* dst for a '\0' terminator if and only if ret < osz.
*/
int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
- unsigned int flags, const char *esc)
+ unsigned int flags, const char *only)
{
char *p = dst;
char *end = p + osz;
- bool is_dict = esc && *esc;
+ bool is_dict = only && *only;
while (isz--) {
unsigned char c = *src++;
@@ -471,7 +473,7 @@
* Apply rules in the following sequence:
* - the character is printable, when @flags has
* %ESCAPE_NP bit set
- * - the @esc string is supplied and does not contain a
+ * - the @only string is supplied and does not contain a
* character under question
* - the character doesn't fall into a class of symbols
* defined by given @flags
@@ -479,7 +481,7 @@
* output buffer.
*/
if ((flags & ESCAPE_NP && isprint(c)) ||
- (is_dict && !strchr(esc, c))) {
+ (is_dict && !strchr(only, c))) {
/* do nothing */
} else {
if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index 4137bca..f355f67 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -260,6 +260,7 @@
{"4294967297", 10, 4294967297LL},
{"9223372036854775807", 10, 9223372036854775807LL},
+ {"-0", 10, 0LL},
{"-1", 10, -1LL},
{"-2", 10, -2LL},
{"-9223372036854775808", 10, LLONG_MIN},
@@ -277,11 +278,6 @@
{"-9223372036854775809", 10},
{"-18446744073709551614", 10},
{"-18446744073709551615", 10},
- /* negative zero isn't an integer in Linux */
- {"-0", 0},
- {"-0", 8},
- {"-0", 10},
- {"-0", 16},
/* sign is first character if any */
{"-+1", 0},
{"-+1", 8},
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 098c08e..c1efb1b 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -65,7 +65,7 @@
kfree(ptr);
}
-static noinline void __init kmalloc_large_oob_rigth(void)
+static noinline void __init kmalloc_large_oob_right(void)
{
char *ptr;
size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
@@ -114,7 +114,7 @@
kfree(ptr1);
return;
}
- ptr2[size1] = 'x';
+ ptr2[size2] = 'x';
kfree(ptr2);
}
@@ -259,7 +259,7 @@
kmalloc_oob_right();
kmalloc_oob_left();
kmalloc_node_oob_right();
- kmalloc_large_oob_rigth();
+ kmalloc_large_oob_right();
kmalloc_oob_krealloc_more();
kmalloc_oob_krealloc_less();
kmalloc_oob_16();
diff --git a/lib/zlib_deflate/deftree.c b/lib/zlib_deflate/deftree.c
index ddf3482..9b1756b 100644
--- a/lib/zlib_deflate/deftree.c
+++ b/lib/zlib_deflate/deftree.c
@@ -35,6 +35,7 @@
/* #include "deflate.h" */
#include <linux/zutil.h>
+#include <linux/bitrev.h>
#include "defutil.h"
#ifdef DEBUG_ZLIB
@@ -146,7 +147,6 @@
static void compress_block (deflate_state *s, ct_data *ltree,
ct_data *dtree);
static void set_data_type (deflate_state *s);
-static unsigned bi_reverse (unsigned value, int length);
static void bi_windup (deflate_state *s);
static void bi_flush (deflate_state *s);
static void copy_block (deflate_state *s, char *buf, unsigned len,
@@ -284,7 +284,7 @@
/* The static distance tree is trivial: */
for (n = 0; n < D_CODES; n++) {
static_dtree[n].Len = 5;
- static_dtree[n].Code = bi_reverse((unsigned)n, 5);
+ static_dtree[n].Code = bitrev32((u32)n) >> (32 - 5);
}
static_init_done = 1;
}
@@ -520,7 +520,7 @@
int len = tree[n].Len;
if (len == 0) continue;
/* Now reverse the bits */
- tree[n].Code = bi_reverse(next_code[len]++, len);
+ tree[n].Code = bitrev32((u32)(next_code[len]++)) >> (32 - len);
Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
diff --git a/lib/zlib_deflate/defutil.h b/lib/zlib_deflate/defutil.h
index b640b64..a8c3708 100644
--- a/lib/zlib_deflate/defutil.h
+++ b/lib/zlib_deflate/defutil.h
@@ -293,22 +293,6 @@
}
/* ===========================================================================
- * Reverse the first len bits of a code, using straightforward code (a faster
- * method would use a table)
- * IN assertion: 1 <= len <= 15
- */
-static inline unsigned bi_reverse(unsigned code, /* the value to invert */
- int len) /* its bit length */
-{
- register unsigned res = 0;
- do {
- res |= code & 1;
- code >>= 1, res <<= 1;
- } while (--len > 0);
- return res >> 1;
-}
-
-/* ===========================================================================
* Flush the bit buffer, keeping at most 7 bits in it.
*/
static inline void bi_flush(deflate_state *s)
diff --git a/mm/Kconfig b/mm/Kconfig
index 3a4070f..0d9fdcd 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -649,6 +649,18 @@
processes running early in the lifetime of the systemm until kswapd
finishes the initialisation.
+config IDLE_PAGE_TRACKING
+ bool "Enable idle page tracking"
+ depends on SYSFS && MMU
+ select PAGE_EXTENSION if !64BIT
+ help
+ This feature allows to estimate the amount of user pages that have
+ not been touched during a given period of time. This information can
+ be useful to tune memory cgroup limits and/or for job placement
+ within a compute cluster.
+
+ See Documentation/vm/idle_page_tracking.txt for more details.
+
config ZONE_DEVICE
bool "Device memory (pmem, etc...) hotplug support" if EXPERT
default !ZONE_DMA
@@ -665,3 +677,6 @@
mapping in an O_DIRECT operation, among other things.
If FS_DAX is enabled, then say Y.
+
+config FRAME_VECTOR
+ bool
diff --git a/mm/Makefile b/mm/Makefile
index b424d5e..2ed4319 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -79,3 +79,5 @@
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
+obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
+obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index ee8d7fd..2df8ddc 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -523,7 +523,7 @@
int ret = 0;
memcg = mem_cgroup_from_css(memcg_css);
- blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &blkio_cgrp_subsys);
+ blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
blkcg = css_to_blkcg(blkcg_css);
memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
blkcg_cgwb_list = &blkcg->cgwb_list;
@@ -645,7 +645,7 @@
/* see whether the blkcg association has changed */
blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
- &blkio_cgrp_subsys);
+ &io_cgrp_subsys);
if (unlikely(wb->blkcg_css != blkcg_css ||
!wb_tryget(wb)))
wb = NULL;
diff --git a/mm/debug.c b/mm/debug.c
index 76089dd..6c1b3ea 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -48,6 +48,10 @@
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
{1UL << PG_compound_lock, "compound_lock" },
#endif
+#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+ {1UL << PG_young, "young" },
+ {1UL << PG_idle, "idle" },
+#endif
};
static void dump_flags(unsigned long flags,
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index 23f744d..17ae14b 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <asm/fixmap.h>
+#include <asm/early_ioremap.h>
#ifdef CONFIG_MMU
static int early_ioremap_debug __initdata;
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
new file mode 100644
index 0000000..cdabcb9
--- /dev/null
+++ b/mm/frame_vector.c
@@ -0,0 +1,230 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+
+/*
+ * get_vaddr_frames() - map virtual addresses to pfns
+ * @start: starting user address
+ * @nr_frames: number of pages / pfns from start to map
+ * @write: whether pages will be written to by the caller
+ * @force: whether to force write access even if user mapping is
+ * readonly. See description of the same argument of
+ get_user_pages().
+ * @vec: structure which receives pages / pfns of the addresses mapped.
+ * It should have space for at least nr_frames entries.
+ *
+ * This function maps virtual addresses from @start and fills @vec structure
+ * with page frame numbers or page pointers to corresponding pages (choice
+ * depends on the type of the vma underlying the virtual address). If @start
+ * belongs to a normal vma, the function grabs reference to each of the pages
+ * to pin them in memory. If @start belongs to VM_IO | VM_PFNMAP vma, we don't
+ * touch page structures and the caller must make sure pfns aren't reused for
+ * anything else while he is using them.
+ *
+ * The function returns number of pages mapped which may be less than
+ * @nr_frames. In particular we stop mapping if there are more vmas of
+ * different type underlying the specified range of virtual addresses.
+ * When the function isn't able to map a single page, it returns error.
+ *
+ * This function takes care of grabbing mmap_sem as necessary.
+ */
+int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
+ bool write, bool force, struct frame_vector *vec)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int ret = 0;
+ int err;
+ int locked;
+
+ if (nr_frames == 0)
+ return 0;
+
+ if (WARN_ON_ONCE(nr_frames > vec->nr_allocated))
+ nr_frames = vec->nr_allocated;
+
+ down_read(&mm->mmap_sem);
+ locked = 1;
+ vma = find_vma_intersection(mm, start, start + 1);
+ if (!vma) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
+ vec->got_ref = true;
+ vec->is_pfns = false;
+ ret = get_user_pages_locked(current, mm, start, nr_frames,
+ write, force, (struct page **)(vec->ptrs), &locked);
+ goto out;
+ }
+
+ vec->got_ref = false;
+ vec->is_pfns = true;
+ do {
+ unsigned long *nums = frame_vector_pfns(vec);
+
+ while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) {
+ err = follow_pfn(vma, start, &nums[ret]);
+ if (err) {
+ if (ret == 0)
+ ret = err;
+ goto out;
+ }
+ start += PAGE_SIZE;
+ ret++;
+ }
+ /*
+ * We stop if we have enough pages or if VMA doesn't completely
+ * cover the tail page.
+ */
+ if (ret >= nr_frames || start < vma->vm_end)
+ break;
+ vma = find_vma_intersection(mm, start, start + 1);
+ } while (vma && vma->vm_flags & (VM_IO | VM_PFNMAP));
+out:
+ if (locked)
+ up_read(&mm->mmap_sem);
+ if (!ret)
+ ret = -EFAULT;
+ if (ret > 0)
+ vec->nr_frames = ret;
+ return ret;
+}
+EXPORT_SYMBOL(get_vaddr_frames);
+
+/**
+ * put_vaddr_frames() - drop references to pages if get_vaddr_frames() acquired
+ * them
+ * @vec: frame vector to put
+ *
+ * Drop references to pages if get_vaddr_frames() acquired them. We also
+ * invalidate the frame vector so that it is prepared for the next call into
+ * get_vaddr_frames().
+ */
+void put_vaddr_frames(struct frame_vector *vec)
+{
+ int i;
+ struct page **pages;
+
+ if (!vec->got_ref)
+ goto out;
+ pages = frame_vector_pages(vec);
+ /*
+ * frame_vector_pages() might needed to do a conversion when
+ * get_vaddr_frames() got pages but vec was later converted to pfns.
+ * But it shouldn't really fail to convert pfns back...
+ */
+ if (WARN_ON(IS_ERR(pages)))
+ goto out;
+ for (i = 0; i < vec->nr_frames; i++)
+ put_page(pages[i]);
+ vec->got_ref = false;
+out:
+ vec->nr_frames = 0;
+}
+EXPORT_SYMBOL(put_vaddr_frames);
+
+/**
+ * frame_vector_to_pages - convert frame vector to contain page pointers
+ * @vec: frame vector to convert
+ *
+ * Convert @vec to contain array of page pointers. If the conversion is
+ * successful, return 0. Otherwise return an error. Note that we do not grab
+ * page references for the page structures.
+ */
+int frame_vector_to_pages(struct frame_vector *vec)
+{
+ int i;
+ unsigned long *nums;
+ struct page **pages;
+
+ if (!vec->is_pfns)
+ return 0;
+ nums = frame_vector_pfns(vec);
+ for (i = 0; i < vec->nr_frames; i++)
+ if (!pfn_valid(nums[i]))
+ return -EINVAL;
+ pages = (struct page **)nums;
+ for (i = 0; i < vec->nr_frames; i++)
+ pages[i] = pfn_to_page(nums[i]);
+ vec->is_pfns = false;
+ return 0;
+}
+EXPORT_SYMBOL(frame_vector_to_pages);
+
+/**
+ * frame_vector_to_pfns - convert frame vector to contain pfns
+ * @vec: frame vector to convert
+ *
+ * Convert @vec to contain array of pfns.
+ */
+void frame_vector_to_pfns(struct frame_vector *vec)
+{
+ int i;
+ unsigned long *nums;
+ struct page **pages;
+
+ if (vec->is_pfns)
+ return;
+ pages = (struct page **)(vec->ptrs);
+ nums = (unsigned long *)pages;
+ for (i = 0; i < vec->nr_frames; i++)
+ nums[i] = page_to_pfn(pages[i]);
+ vec->is_pfns = true;
+}
+EXPORT_SYMBOL(frame_vector_to_pfns);
+
+/**
+ * frame_vector_create() - allocate & initialize structure for pinned pfns
+ * @nr_frames: number of pfns slots we should reserve
+ *
+ * Allocate and initialize struct pinned_pfns to be able to hold @nr_pfns
+ * pfns.
+ */
+struct frame_vector *frame_vector_create(unsigned int nr_frames)
+{
+ struct frame_vector *vec;
+ int size = sizeof(struct frame_vector) + sizeof(void *) * nr_frames;
+
+ if (WARN_ON_ONCE(nr_frames == 0))
+ return NULL;
+ /*
+ * This is absurdly high. It's here just to avoid strange effects when
+ * arithmetics overflows.
+ */
+ if (WARN_ON_ONCE(nr_frames > INT_MAX / sizeof(void *) / 2))
+ return NULL;
+ /*
+ * Avoid higher order allocations, use vmalloc instead. It should
+ * be rare anyway.
+ */
+ if (size <= PAGE_SIZE)
+ vec = kmalloc(size, GFP_KERNEL);
+ else
+ vec = vmalloc(size);
+ if (!vec)
+ return NULL;
+ vec->nr_allocated = nr_frames;
+ vec->nr_frames = 0;
+ return vec;
+}
+EXPORT_SYMBOL(frame_vector_create);
+
+/**
+ * frame_vector_destroy() - free memory allocated to carry frame vector
+ * @vec: Frame vector to free
+ *
+ * Free structure allocated by frame_vector_create() to carry frames.
+ */
+void frame_vector_destroy(struct frame_vector *vec)
+{
+ /* Make sure put_vaddr_frames() got called properly... */
+ VM_BUG_ON(vec->nr_frames > 0);
+ kvfree(vec);
+}
+EXPORT_SYMBOL(frame_vector_destroy);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b16279c..4b06b8d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -25,6 +25,7 @@
#include <linux/migrate.h>
#include <linux/hashtable.h>
#include <linux/userfaultfd_k.h>
+#include <linux/page_idle.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
@@ -1757,6 +1758,11 @@
/* clear PageTail before overwriting first_page */
smp_wmb();
+ if (page_is_young(page))
+ set_page_young(page_tail);
+ if (page_is_idle(page))
+ set_page_idle(page_tail);
+
/*
* __split_huge_page_splitting() already set the
* splitting bit in all pmd that could map this
@@ -2262,7 +2268,8 @@
VM_BUG_ON_PAGE(PageLRU(page), page);
/* If there is no mapped pte young don't collapse the page */
- if (pte_young(pteval) || PageReferenced(page) ||
+ if (pte_young(pteval) ||
+ page_is_young(page) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
referenced = true;
}
@@ -2693,7 +2700,8 @@
*/
if (page_count(page) != 1 + !!PageSwapCache(page))
goto out_unmap;
- if (pte_young(pteval) || PageReferenced(page) ||
+ if (pte_young(pteval) ||
+ page_is_young(page) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
referenced = true;
}
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index aeba0ed..9d26fd9 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -45,12 +45,9 @@
/*
* do a racy check with elevated page count, to make sure PG_hwpoison
* will only be set for the targeted owner (or on a free page).
- * We temporarily take page lock for try_get_mem_cgroup_from_page().
* memory_failure() will redo the check reliably inside page lock.
*/
- lock_page(hpage);
err = hwpoison_filter(hpage);
- unlock_page(hpage);
if (err)
goto put_out;
@@ -126,7 +123,7 @@
if (!dentry)
goto fail;
-#ifdef CONFIG_MEMCG_SWAP
+#ifdef CONFIG_MEMCG
dentry = debugfs_create_u64("corrupt-filter-memcg", 0600,
hwpoison_dir, &hwpoison_filter_memcg);
if (!dentry)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index f532f6a..77191ec 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -302,23 +302,14 @@
struct kmemleak_object *object)
{
const u8 *ptr = (const u8 *)object->pointer;
- int i, len, remaining;
- unsigned char linebuf[HEX_ROW_SIZE * 5];
+ size_t len;
/* limit the number of lines to HEX_MAX_LINES */
- remaining = len =
- min(object->size, (size_t)(HEX_MAX_LINES * HEX_ROW_SIZE));
+ len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE);
- seq_printf(seq, " hex dump (first %d bytes):\n", len);
- for (i = 0; i < len; i += HEX_ROW_SIZE) {
- int linelen = min(remaining, HEX_ROW_SIZE);
-
- remaining -= HEX_ROW_SIZE;
- hex_dump_to_buffer(ptr + i, linelen, HEX_ROW_SIZE,
- HEX_GROUP_SIZE, linebuf, sizeof(linebuf),
- HEX_ASCII);
- seq_printf(seq, " %s\n", linebuf);
- }
+ seq_printf(seq, " hex dump (first %zu bytes):\n", len);
+ seq_hex_dump(seq, " ", DUMP_PREFIX_NONE, HEX_ROW_SIZE,
+ HEX_GROUP_SIZE, ptr, len, HEX_ASCII);
}
/*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1742a2d..6ddaeba 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -441,6 +441,34 @@
return &memcg->css;
}
+/**
+ * page_cgroup_ino - return inode number of the memcg a page is charged to
+ * @page: the page
+ *
+ * Look up the closest online ancestor of the memory cgroup @page is charged to
+ * and return its inode number or 0 if @page is not charged to any cgroup. It
+ * is safe to call this function without holding a reference to @page.
+ *
+ * Note, this function is inherently racy, because there is nothing to prevent
+ * the cgroup inode from getting torn down and potentially reallocated a moment
+ * after page_cgroup_ino() returns, so it only should be used by callers that
+ * do not care (such as procfs interfaces).
+ */
+ino_t page_cgroup_ino(struct page *page)
+{
+ struct mem_cgroup *memcg;
+ unsigned long ino = 0;
+
+ rcu_read_lock();
+ memcg = READ_ONCE(page->mem_cgroup);
+ while (memcg && !(memcg->css.flags & CSS_ONLINE))
+ memcg = parent_mem_cgroup(memcg);
+ if (memcg)
+ ino = cgroup_ino(memcg->css.cgroup);
+ rcu_read_unlock();
+ return ino;
+}
+
static struct mem_cgroup_per_zone *
mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page)
{
@@ -2071,40 +2099,6 @@
css_put_many(&memcg->css, nr_pages);
}
-/*
- * try_get_mem_cgroup_from_page - look up page's memcg association
- * @page: the page
- *
- * Look up, get a css reference, and return the memcg that owns @page.
- *
- * The page must be locked to prevent racing with swap-in and page
- * cache charges. If coming from an unlocked page table, the caller
- * must ensure the page is on the LRU or this can race with charging.
- */
-struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
-{
- struct mem_cgroup *memcg;
- unsigned short id;
- swp_entry_t ent;
-
- VM_BUG_ON_PAGE(!PageLocked(page), page);
-
- memcg = page->mem_cgroup;
- if (memcg) {
- if (!css_tryget_online(&memcg->css))
- memcg = NULL;
- } else if (PageSwapCache(page)) {
- ent.val = page_private(page);
- id = lookup_swap_cgroup_id(ent);
- rcu_read_lock();
- memcg = mem_cgroup_from_id(id);
- if (memcg && !css_tryget_online(&memcg->css))
- memcg = NULL;
- rcu_read_unlock();
- }
- return memcg;
-}
-
static void lock_page_lru(struct page *page, int *isolated)
{
struct zone *zone = page_zone(page);
@@ -5301,8 +5295,20 @@
* the page lock, which serializes swap cache removal, which
* in turn serializes uncharging.
*/
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
if (page->mem_cgroup)
goto out;
+
+ if (do_swap_account) {
+ swp_entry_t ent = { .val = page_private(page), };
+ unsigned short id = lookup_swap_cgroup_id(ent);
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_id(id);
+ if (memcg && !css_tryget_online(&memcg->css))
+ memcg = NULL;
+ rcu_read_unlock();
+ }
}
if (PageTransHuge(page)) {
@@ -5310,8 +5316,6 @@
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
}
- if (do_swap_account && PageSwapCache(page))
- memcg = try_get_mem_cgroup_from_page(page);
if (!memcg)
memcg = get_mem_cgroup_from_mm(mm);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index eeda648..9588269 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -130,27 +130,15 @@
* can only guarantee that the page either belongs to the memcg tasks, or is
* a freed page.
*/
-#ifdef CONFIG_MEMCG_SWAP
+#ifdef CONFIG_MEMCG
u64 hwpoison_filter_memcg;
EXPORT_SYMBOL_GPL(hwpoison_filter_memcg);
static int hwpoison_filter_task(struct page *p)
{
- struct mem_cgroup *mem;
- struct cgroup_subsys_state *css;
- unsigned long ino;
-
if (!hwpoison_filter_memcg)
return 0;
- mem = try_get_mem_cgroup_from_page(p);
- if (!mem)
- return -EINVAL;
-
- css = &mem->css;
- ino = cgroup_ino(css->cgroup);
- css_put(css);
-
- if (ino != hwpoison_filter_memcg)
+ if (page_cgroup_ino(p) != hwpoison_filter_memcg)
return -EINVAL;
return 0;
diff --git a/mm/memory.c b/mm/memory.c
index 6cd0b21..9cb2747 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3233,7 +3233,7 @@
static int create_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd, unsigned int flags)
{
- if (!vma->vm_ops)
+ if (vma_is_anonymous(vma))
return do_huge_pmd_anonymous_page(mm, vma, address, pmd, flags);
if (vma->vm_ops->pmd_fault)
return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
@@ -3244,7 +3244,7 @@
unsigned long address, pmd_t *pmd, pmd_t orig_pmd,
unsigned int flags)
{
- if (!vma->vm_ops)
+ if (vma_is_anonymous(vma))
return do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd);
if (vma->vm_ops->pmd_fault)
return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
diff --git a/mm/migrate.c b/mm/migrate.c
index 02ce25d..c3cb566 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -37,6 +37,7 @@
#include <linux/gfp.h>
#include <linux/balloon_compaction.h>
#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
#include <asm/tlbflush.h>
@@ -524,6 +525,11 @@
__set_page_dirty_nobuffers(newpage);
}
+ if (page_is_young(page))
+ set_page_young(newpage);
+ if (page_is_idle(page))
+ set_page_idle(newpage);
+
/*
* Copy NUMA information to the new page, to prevent over-eager
* future migrations of this same page.
diff --git a/mm/mmap.c b/mm/mmap.c
index b6be324..971dd2c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -612,6 +612,8 @@
void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
struct rb_node **rb_link, struct rb_node *rb_parent)
{
+ WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops");
+
/* Update tracking information for the gap following the new vma. */
if (vma->vm_next)
vma_gap_update(vma->vm_next);
@@ -1260,14 +1262,12 @@
/*
* The caller must hold down_write(¤t->mm->mmap_sem).
*/
-
-unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long pgoff,
- unsigned long *populate)
+ unsigned long flags, vm_flags_t vm_flags,
+ unsigned long pgoff, unsigned long *populate)
{
struct mm_struct *mm = current->mm;
- vm_flags_t vm_flags;
*populate = 0;
@@ -1311,7 +1311,7 @@
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
- vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+ vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_LOCKED)
@@ -1638,6 +1638,12 @@
*/
WARN_ON_ONCE(addr != vma->vm_start);
+ /* All file mapping must have ->vm_ops set */
+ if (!vma->vm_ops) {
+ static const struct vm_operations_struct dummy_ops = {};
+ vma->vm_ops = &dummy_ops;
+ }
+
addr = vma->vm_start;
vm_flags = vma->vm_flags;
} else if (vm_flags & VM_SHARED) {
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 3b9b3d0..5fbdd36 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -123,6 +123,23 @@
return young;
}
+int __mmu_notifier_clear_young(struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct mmu_notifier *mn;
+ int young = 0, id;
+
+ id = srcu_read_lock(&srcu);
+ hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
+ if (mn->ops->clear_young)
+ young |= mn->ops->clear_young(mn, mm, start, end);
+ }
+ srcu_read_unlock(&srcu, id);
+
+ return young;
+}
+
int __mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address)
{
diff --git a/mm/nommu.c b/mm/nommu.c
index 1cc0709..ab14a20 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1233,18 +1233,19 @@
/*
* handle mapping creation for uClinux
*/
-unsigned long do_mmap_pgoff(struct file *file,
- unsigned long addr,
- unsigned long len,
- unsigned long prot,
- unsigned long flags,
- unsigned long pgoff,
- unsigned long *populate)
+unsigned long do_mmap(struct file *file,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long prot,
+ unsigned long flags,
+ vm_flags_t vm_flags,
+ unsigned long pgoff,
+ unsigned long *populate)
{
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *rb;
- unsigned long capabilities, vm_flags, result;
+ unsigned long capabilities, result;
int ret;
*populate = 0;
@@ -1262,7 +1263,7 @@
/* we've determined that we can make the mapping, now translate what we
* now know into VMA flags */
- vm_flags = determine_vm_flags(file, prot, flags, capabilities);
+ vm_flags |= determine_vm_flags(file, prot, flags, capabilities);
/* we're going to need to record the mapping */
region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5cccc12..0a931cd 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1289,7 +1289,7 @@
wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
- trace_bdi_dirty_ratelimit(wb->bdi, dirty_rate, task_ratelimit);
+ trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
}
static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
@@ -1683,7 +1683,7 @@
* do a reset, as it may be a light dirtier.
*/
if (pause < min_pause) {
- trace_balance_dirty_pages(bdi,
+ trace_balance_dirty_pages(wb,
sdtc->thresh,
sdtc->bg_thresh,
sdtc->dirty,
@@ -1712,7 +1712,7 @@
}
pause:
- trace_balance_dirty_pages(bdi,
+ trace_balance_dirty_pages(wb,
sdtc->thresh,
sdtc->bg_thresh,
sdtc->dirty,
diff --git a/mm/page_ext.c b/mm/page_ext.c
index d86fd2f..292ca7b 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -6,6 +6,7 @@
#include <linux/vmalloc.h>
#include <linux/kmemleak.h>
#include <linux/page_owner.h>
+#include <linux/page_idle.h>
/*
* struct page extension
@@ -59,6 +60,9 @@
#ifdef CONFIG_PAGE_OWNER
&page_owner_ops,
#endif
+#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
+ &page_idle_ops,
+#endif
};
static unsigned long total_usage;
diff --git a/mm/page_idle.c b/mm/page_idle.c
new file mode 100644
index 0000000..d5dd790
--- /dev/null
+++ b/mm/page_idle.c
@@ -0,0 +1,232 @@
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
+#include <linux/page_ext.h>
+#include <linux/page_idle.h>
+
+#define BITMAP_CHUNK_SIZE sizeof(u64)
+#define BITMAP_CHUNK_BITS (BITMAP_CHUNK_SIZE * BITS_PER_BYTE)
+
+/*
+ * Idle page tracking only considers user memory pages, for other types of
+ * pages the idle flag is always unset and an attempt to set it is silently
+ * ignored.
+ *
+ * We treat a page as a user memory page if it is on an LRU list, because it is
+ * always safe to pass such a page to rmap_walk(), which is essential for idle
+ * page tracking. With such an indicator of user pages we can skip isolated
+ * pages, but since there are not usually many of them, it will hardly affect
+ * the overall result.
+ *
+ * This function tries to get a user memory page by pfn as described above.
+ */
+static struct page *page_idle_get_page(unsigned long pfn)
+{
+ struct page *page;
+ struct zone *zone;
+
+ if (!pfn_valid(pfn))
+ return NULL;
+
+ page = pfn_to_page(pfn);
+ if (!page || !PageLRU(page) ||
+ !get_page_unless_zero(page))
+ return NULL;
+
+ zone = page_zone(page);
+ spin_lock_irq(&zone->lru_lock);
+ if (unlikely(!PageLRU(page))) {
+ put_page(page);
+ page = NULL;
+ }
+ spin_unlock_irq(&zone->lru_lock);
+ return page;
+}
+
+static int page_idle_clear_pte_refs_one(struct page *page,
+ struct vm_area_struct *vma,
+ unsigned long addr, void *arg)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ spinlock_t *ptl;
+ pmd_t *pmd;
+ pte_t *pte;
+ bool referenced = false;
+
+ if (unlikely(PageTransHuge(page))) {
+ pmd = page_check_address_pmd(page, mm, addr,
+ PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
+ if (pmd) {
+ referenced = pmdp_clear_young_notify(vma, addr, pmd);
+ spin_unlock(ptl);
+ }
+ } else {
+ pte = page_check_address(page, mm, addr, &ptl, 0);
+ if (pte) {
+ referenced = ptep_clear_young_notify(vma, addr, pte);
+ pte_unmap_unlock(pte, ptl);
+ }
+ }
+ if (referenced) {
+ clear_page_idle(page);
+ /*
+ * We cleared the referenced bit in a mapping to this page. To
+ * avoid interference with page reclaim, mark it young so that
+ * page_referenced() will return > 0.
+ */
+ set_page_young(page);
+ }
+ return SWAP_AGAIN;
+}
+
+static void page_idle_clear_pte_refs(struct page *page)
+{
+ /*
+ * Since rwc.arg is unused, rwc is effectively immutable, so we
+ * can make it static const to save some cycles and stack.
+ */
+ static const struct rmap_walk_control rwc = {
+ .rmap_one = page_idle_clear_pte_refs_one,
+ .anon_lock = page_lock_anon_vma_read,
+ };
+ bool need_lock;
+
+ if (!page_mapped(page) ||
+ !page_rmapping(page))
+ return;
+
+ need_lock = !PageAnon(page) || PageKsm(page);
+ if (need_lock && !trylock_page(page))
+ return;
+
+ rmap_walk(page, (struct rmap_walk_control *)&rwc);
+
+ if (need_lock)
+ unlock_page(page);
+}
+
+static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t pos, size_t count)
+{
+ u64 *out = (u64 *)buf;
+ struct page *page;
+ unsigned long pfn, end_pfn;
+ int bit;
+
+ if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE)
+ return -EINVAL;
+
+ pfn = pos * BITS_PER_BYTE;
+ if (pfn >= max_pfn)
+ return 0;
+
+ end_pfn = pfn + count * BITS_PER_BYTE;
+ if (end_pfn > max_pfn)
+ end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
+
+ for (; pfn < end_pfn; pfn++) {
+ bit = pfn % BITMAP_CHUNK_BITS;
+ if (!bit)
+ *out = 0ULL;
+ page = page_idle_get_page(pfn);
+ if (page) {
+ if (page_is_idle(page)) {
+ /*
+ * The page might have been referenced via a
+ * pte, in which case it is not idle. Clear
+ * refs and recheck.
+ */
+ page_idle_clear_pte_refs(page);
+ if (page_is_idle(page))
+ *out |= 1ULL << bit;
+ }
+ put_page(page);
+ }
+ if (bit == BITMAP_CHUNK_BITS - 1)
+ out++;
+ cond_resched();
+ }
+ return (char *)out - buf;
+}
+
+static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t pos, size_t count)
+{
+ const u64 *in = (u64 *)buf;
+ struct page *page;
+ unsigned long pfn, end_pfn;
+ int bit;
+
+ if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE)
+ return -EINVAL;
+
+ pfn = pos * BITS_PER_BYTE;
+ if (pfn >= max_pfn)
+ return -ENXIO;
+
+ end_pfn = pfn + count * BITS_PER_BYTE;
+ if (end_pfn > max_pfn)
+ end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
+
+ for (; pfn < end_pfn; pfn++) {
+ bit = pfn % BITMAP_CHUNK_BITS;
+ if ((*in >> bit) & 1) {
+ page = page_idle_get_page(pfn);
+ if (page) {
+ page_idle_clear_pte_refs(page);
+ set_page_idle(page);
+ put_page(page);
+ }
+ }
+ if (bit == BITMAP_CHUNK_BITS - 1)
+ in++;
+ cond_resched();
+ }
+ return (char *)in - buf;
+}
+
+static struct bin_attribute page_idle_bitmap_attr =
+ __BIN_ATTR(bitmap, S_IRUSR | S_IWUSR,
+ page_idle_bitmap_read, page_idle_bitmap_write, 0);
+
+static struct bin_attribute *page_idle_bin_attrs[] = {
+ &page_idle_bitmap_attr,
+ NULL,
+};
+
+static struct attribute_group page_idle_attr_group = {
+ .bin_attrs = page_idle_bin_attrs,
+ .name = "page_idle",
+};
+
+#ifndef CONFIG_64BIT
+static bool need_page_idle(void)
+{
+ return true;
+}
+struct page_ext_operations page_idle_ops = {
+ .need = need_page_idle,
+};
+#endif
+
+static int __init page_idle_init(void)
+{
+ int err;
+
+ err = sysfs_create_group(mm_kobj, &page_idle_attr_group);
+ if (err) {
+ pr_err("page_idle: register sysfs failed\n");
+ return err;
+ }
+ return 0;
+}
+subsys_initcall(page_idle_init);
diff --git a/mm/rmap.c b/mm/rmap.c
index 0db38e7..f5b5c1f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -59,6 +59,7 @@
#include <linux/migrate.h>
#include <linux/hugetlb.h>
#include <linux/backing-dev.h>
+#include <linux/page_idle.h>
#include <asm/tlbflush.h>
@@ -886,6 +887,11 @@
pte_unmap_unlock(pte, ptl);
}
+ if (referenced)
+ clear_page_idle(page);
+ if (test_and_clear_page_young(page))
+ referenced++;
+
if (referenced) {
pra->referenced++;
pra->vm_flags |= vma->vm_flags;
diff --git a/mm/swap.c b/mm/swap.c
index a3a0a2f..983f692 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -32,6 +32,7 @@
#include <linux/gfp.h>
#include <linux/uio.h>
#include <linux/hugetlb.h>
+#include <linux/page_idle.h>
#include "internal.h"
@@ -622,6 +623,8 @@
} else if (!PageReferenced(page)) {
SetPageReferenced(page);
}
+ if (page_is_idle(page))
+ clear_page_idle(page);
}
EXPORT_SYMBOL(mark_page_accessed);
diff --git a/mm/zpool.c b/mm/zpool.c
index 68d2dd8..8f670d3 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -100,6 +100,39 @@
}
/**
+ * zpool_has_pool() - Check if the pool driver is available
+ * @type The type of the zpool to check (e.g. zbud, zsmalloc)
+ *
+ * This checks if the @type pool driver is available. This will try to load
+ * the requested module, if needed, but there is no guarantee the module will
+ * still be loaded and available immediately after calling. If this returns
+ * true, the caller should assume the pool is available, but must be prepared
+ * to handle the @zpool_create_pool() returning failure. However if this
+ * returns false, the caller should assume the requested pool type is not
+ * available; either the requested pool type module does not exist, or could
+ * not be loaded, and calling @zpool_create_pool() with the pool type will
+ * fail.
+ *
+ * Returns: true if @type pool is available, false if not
+ */
+bool zpool_has_pool(char *type)
+{
+ struct zpool_driver *driver = zpool_get_driver(type);
+
+ if (!driver) {
+ request_module("zpool-%s", type);
+ driver = zpool_get_driver(type);
+ }
+
+ if (!driver)
+ return false;
+
+ zpool_put_driver(driver);
+ return true;
+}
+EXPORT_SYMBOL(zpool_has_pool);
+
+/**
* zpool_create_pool() - Create a new zpool
* @type The type of the zpool to create (e.g. zbud, zsmalloc)
* @name The name of the zpool (e.g. zram0, zswap)
diff --git a/mm/zswap.c b/mm/zswap.c
index 48a1d08..4043df7 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -80,85 +80,54 @@
static bool zswap_enabled;
module_param_named(enabled, zswap_enabled, bool, 0644);
-/* Compressor to be used by zswap (fixed at boot for now) */
+/* Crypto compressor to use */
#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
-static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
-module_param_named(compressor, zswap_compressor, charp, 0444);
+static char zswap_compressor[CRYPTO_MAX_ALG_NAME] = ZSWAP_COMPRESSOR_DEFAULT;
+static struct kparam_string zswap_compressor_kparam = {
+ .string = zswap_compressor,
+ .maxlen = sizeof(zswap_compressor),
+};
+static int zswap_compressor_param_set(const char *,
+ const struct kernel_param *);
+static struct kernel_param_ops zswap_compressor_param_ops = {
+ .set = zswap_compressor_param_set,
+ .get = param_get_string,
+};
+module_param_cb(compressor, &zswap_compressor_param_ops,
+ &zswap_compressor_kparam, 0644);
+
+/* Compressed storage zpool to use */
+#define ZSWAP_ZPOOL_DEFAULT "zbud"
+static char zswap_zpool_type[32 /* arbitrary */] = ZSWAP_ZPOOL_DEFAULT;
+static struct kparam_string zswap_zpool_kparam = {
+ .string = zswap_zpool_type,
+ .maxlen = sizeof(zswap_zpool_type),
+};
+static int zswap_zpool_param_set(const char *, const struct kernel_param *);
+static struct kernel_param_ops zswap_zpool_param_ops = {
+ .set = zswap_zpool_param_set,
+ .get = param_get_string,
+};
+module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_kparam, 0644);
/* The maximum percentage of memory that the compressed pool can occupy */
static unsigned int zswap_max_pool_percent = 20;
-module_param_named(max_pool_percent,
- zswap_max_pool_percent, uint, 0644);
-
-/* Compressed storage to use */
-#define ZSWAP_ZPOOL_DEFAULT "zbud"
-static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
-module_param_named(zpool, zswap_zpool_type, charp, 0444);
-
-/* zpool is shared by all of zswap backend */
-static struct zpool *zswap_pool;
-
-/*********************************
-* compression functions
-**********************************/
-/* per-cpu compression transforms */
-static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
-
-enum comp_op {
- ZSWAP_COMPOP_COMPRESS,
- ZSWAP_COMPOP_DECOMPRESS
-};
-
-static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen)
-{
- struct crypto_comp *tfm;
- int ret;
-
- tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
- switch (op) {
- case ZSWAP_COMPOP_COMPRESS:
- ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
- break;
- case ZSWAP_COMPOP_DECOMPRESS:
- ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
- break;
- default:
- ret = -EINVAL;
- }
-
- put_cpu();
- return ret;
-}
-
-static int __init zswap_comp_init(void)
-{
- if (!crypto_has_comp(zswap_compressor, 0, 0)) {
- pr_info("%s compressor not available\n", zswap_compressor);
- /* fall back to default compressor */
- zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
- if (!crypto_has_comp(zswap_compressor, 0, 0))
- /* can't even load the default compressor */
- return -ENODEV;
- }
- pr_info("using %s compressor\n", zswap_compressor);
-
- /* alloc percpu transforms */
- zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
- if (!zswap_comp_pcpu_tfms)
- return -ENOMEM;
- return 0;
-}
-
-static void __init zswap_comp_exit(void)
-{
- /* free percpu transforms */
- free_percpu(zswap_comp_pcpu_tfms);
-}
+module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
/*********************************
* data structures
**********************************/
+
+struct zswap_pool {
+ struct zpool *zpool;
+ struct crypto_comp * __percpu *tfm;
+ struct kref kref;
+ struct list_head list;
+ struct rcu_head rcu_head;
+ struct notifier_block notifier;
+ char tfm_name[CRYPTO_MAX_ALG_NAME];
+};
+
/*
* struct zswap_entry
*
@@ -166,22 +135,24 @@
* page within zswap.
*
* rbnode - links the entry into red-black tree for the appropriate swap type
+ * offset - the swap offset for the entry. Index into the red-black tree.
* refcount - the number of outstanding reference to the entry. This is needed
* to protect against premature freeing of the entry by code
* concurrent calls to load, invalidate, and writeback. The lock
* for the zswap_tree structure that contains the entry must
* be held while changing the refcount. Since the lock must
* be held, there is no reason to also make refcount atomic.
- * offset - the swap offset for the entry. Index into the red-black tree.
- * handle - zpool allocation handle that stores the compressed page data
* length - the length in bytes of the compressed page data. Needed during
* decompression
+ * pool - the zswap_pool the entry's data is in
+ * handle - zpool allocation handle that stores the compressed page data
*/
struct zswap_entry {
struct rb_node rbnode;
pgoff_t offset;
int refcount;
unsigned int length;
+ struct zswap_pool *pool;
unsigned long handle;
};
@@ -201,6 +172,51 @@
static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
+/* RCU-protected iteration */
+static LIST_HEAD(zswap_pools);
+/* protects zswap_pools list modification */
+static DEFINE_SPINLOCK(zswap_pools_lock);
+
+/* used by param callback function */
+static bool zswap_init_started;
+
+/*********************************
+* helpers and fwd declarations
+**********************************/
+
+#define zswap_pool_debug(msg, p) \
+ pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
+ zpool_get_type((p)->zpool))
+
+static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
+static int zswap_pool_get(struct zswap_pool *pool);
+static void zswap_pool_put(struct zswap_pool *pool);
+
+static const struct zpool_ops zswap_zpool_ops = {
+ .evict = zswap_writeback_entry
+};
+
+static bool zswap_is_full(void)
+{
+ return totalram_pages * zswap_max_pool_percent / 100 <
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
+}
+
+static void zswap_update_total_size(void)
+{
+ struct zswap_pool *pool;
+ u64 total = 0;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(pool, &zswap_pools, list)
+ total += zpool_get_total_size(pool->zpool);
+
+ rcu_read_unlock();
+
+ zswap_pool_total_size = total;
+}
+
/*********************************
* zswap entry functions
**********************************/
@@ -294,10 +310,11 @@
*/
static void zswap_free_entry(struct zswap_entry *entry)
{
- zpool_free(zswap_pool, entry->handle);
+ zpool_free(entry->pool->zpool, entry->handle);
+ zswap_pool_put(entry->pool);
zswap_entry_cache_free(entry);
atomic_dec(&zswap_stored_pages);
- zswap_pool_total_size = zpool_get_total_size(zswap_pool);
+ zswap_update_total_size();
}
/* caller must hold the tree lock */
@@ -339,35 +356,21 @@
**********************************/
static DEFINE_PER_CPU(u8 *, zswap_dstmem);
-static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
+static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
{
- struct crypto_comp *tfm;
u8 *dst;
switch (action) {
case CPU_UP_PREPARE:
- tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
- if (IS_ERR(tfm)) {
- pr_err("can't allocate compressor transform\n");
- return NOTIFY_BAD;
- }
- *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
if (!dst) {
pr_err("can't allocate compressor buffer\n");
- crypto_free_comp(tfm);
- *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
return NOTIFY_BAD;
}
per_cpu(zswap_dstmem, cpu) = dst;
break;
case CPU_DEAD:
case CPU_UP_CANCELED:
- tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
- if (tfm) {
- crypto_free_comp(tfm);
- *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
- }
dst = per_cpu(zswap_dstmem, cpu);
kfree(dst);
per_cpu(zswap_dstmem, cpu) = NULL;
@@ -378,43 +381,398 @@
return NOTIFY_OK;
}
-static int zswap_cpu_notifier(struct notifier_block *nb,
- unsigned long action, void *pcpu)
+static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
+ unsigned long action, void *pcpu)
{
- unsigned long cpu = (unsigned long)pcpu;
- return __zswap_cpu_notifier(action, cpu);
+ return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
}
-static struct notifier_block zswap_cpu_notifier_block = {
- .notifier_call = zswap_cpu_notifier
+static struct notifier_block zswap_dstmem_notifier = {
+ .notifier_call = zswap_cpu_dstmem_notifier,
};
-static int __init zswap_cpu_init(void)
+static int __init zswap_cpu_dstmem_init(void)
{
unsigned long cpu;
cpu_notifier_register_begin();
for_each_online_cpu(cpu)
- if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
+ if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
+ NOTIFY_BAD)
goto cleanup;
- __register_cpu_notifier(&zswap_cpu_notifier_block);
+ __register_cpu_notifier(&zswap_dstmem_notifier);
cpu_notifier_register_done();
return 0;
cleanup:
for_each_online_cpu(cpu)
- __zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
+ __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
cpu_notifier_register_done();
return -ENOMEM;
}
-/*********************************
-* helpers
-**********************************/
-static bool zswap_is_full(void)
+static void zswap_cpu_dstmem_destroy(void)
{
- return totalram_pages * zswap_max_pool_percent / 100 <
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
+ unsigned long cpu;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu)
+ __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
+ __unregister_cpu_notifier(&zswap_dstmem_notifier);
+ cpu_notifier_register_done();
+}
+
+static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
+ unsigned long action, unsigned long cpu)
+{
+ struct crypto_comp *tfm;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
+ break;
+ tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
+ if (IS_ERR_OR_NULL(tfm)) {
+ pr_err("could not alloc crypto comp %s : %ld\n",
+ pool->tfm_name, PTR_ERR(tfm));
+ return NOTIFY_BAD;
+ }
+ *per_cpu_ptr(pool->tfm, cpu) = tfm;
+ break;
+ case CPU_DEAD:
+ case CPU_UP_CANCELED:
+ tfm = *per_cpu_ptr(pool->tfm, cpu);
+ if (!IS_ERR_OR_NULL(tfm))
+ crypto_free_comp(tfm);
+ *per_cpu_ptr(pool->tfm, cpu) = NULL;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static int zswap_cpu_comp_notifier(struct notifier_block *nb,
+ unsigned long action, void *pcpu)
+{
+ unsigned long cpu = (unsigned long)pcpu;
+ struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
+
+ return __zswap_cpu_comp_notifier(pool, action, cpu);
+}
+
+static int zswap_cpu_comp_init(struct zswap_pool *pool)
+{
+ unsigned long cpu;
+
+ memset(&pool->notifier, 0, sizeof(pool->notifier));
+ pool->notifier.notifier_call = zswap_cpu_comp_notifier;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu)
+ if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
+ NOTIFY_BAD)
+ goto cleanup;
+ __register_cpu_notifier(&pool->notifier);
+ cpu_notifier_register_done();
+ return 0;
+
+cleanup:
+ for_each_online_cpu(cpu)
+ __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
+ cpu_notifier_register_done();
+ return -ENOMEM;
+}
+
+static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
+{
+ unsigned long cpu;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu)
+ __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
+ __unregister_cpu_notifier(&pool->notifier);
+ cpu_notifier_register_done();
+}
+
+/*********************************
+* pool functions
+**********************************/
+
+static struct zswap_pool *__zswap_pool_current(void)
+{
+ struct zswap_pool *pool;
+
+ pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
+ WARN_ON(!pool);
+
+ return pool;
+}
+
+static struct zswap_pool *zswap_pool_current(void)
+{
+ assert_spin_locked(&zswap_pools_lock);
+
+ return __zswap_pool_current();
+}
+
+static struct zswap_pool *zswap_pool_current_get(void)
+{
+ struct zswap_pool *pool;
+
+ rcu_read_lock();
+
+ pool = __zswap_pool_current();
+ if (!pool || !zswap_pool_get(pool))
+ pool = NULL;
+
+ rcu_read_unlock();
+
+ return pool;
+}
+
+static struct zswap_pool *zswap_pool_last_get(void)
+{
+ struct zswap_pool *pool, *last = NULL;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(pool, &zswap_pools, list)
+ last = pool;
+ if (!WARN_ON(!last) && !zswap_pool_get(last))
+ last = NULL;
+
+ rcu_read_unlock();
+
+ return last;
+}
+
+static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
+{
+ struct zswap_pool *pool;
+
+ assert_spin_locked(&zswap_pools_lock);
+
+ list_for_each_entry_rcu(pool, &zswap_pools, list) {
+ if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
+ continue;
+ if (strncmp(zpool_get_type(pool->zpool), type,
+ sizeof(zswap_zpool_type)))
+ continue;
+ /* if we can't get it, it's about to be destroyed */
+ if (!zswap_pool_get(pool))
+ continue;
+ return pool;
+ }
+
+ return NULL;
+}
+
+static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
+{
+ struct zswap_pool *pool;
+ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool) {
+ pr_err("pool alloc failed\n");
+ return NULL;
+ }
+
+ pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
+ if (!pool->zpool) {
+ pr_err("%s zpool not available\n", type);
+ goto error;
+ }
+ pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
+
+ strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
+ pool->tfm = alloc_percpu(struct crypto_comp *);
+ if (!pool->tfm) {
+ pr_err("percpu alloc failed\n");
+ goto error;
+ }
+
+ if (zswap_cpu_comp_init(pool))
+ goto error;
+ pr_debug("using %s compressor\n", pool->tfm_name);
+
+ /* being the current pool takes 1 ref; this func expects the
+ * caller to always add the new pool as the current pool
+ */
+ kref_init(&pool->kref);
+ INIT_LIST_HEAD(&pool->list);
+
+ zswap_pool_debug("created", pool);
+
+ return pool;
+
+error:
+ free_percpu(pool->tfm);
+ if (pool->zpool)
+ zpool_destroy_pool(pool->zpool);
+ kfree(pool);
+ return NULL;
+}
+
+static struct zswap_pool *__zswap_pool_create_fallback(void)
+{
+ if (!crypto_has_comp(zswap_compressor, 0, 0)) {
+ pr_err("compressor %s not available, using default %s\n",
+ zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
+ strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
+ sizeof(zswap_compressor));
+ }
+ if (!zpool_has_pool(zswap_zpool_type)) {
+ pr_err("zpool %s not available, using default %s\n",
+ zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
+ strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
+ sizeof(zswap_zpool_type));
+ }
+
+ return zswap_pool_create(zswap_zpool_type, zswap_compressor);
+}
+
+static void zswap_pool_destroy(struct zswap_pool *pool)
+{
+ zswap_pool_debug("destroying", pool);
+
+ zswap_cpu_comp_destroy(pool);
+ free_percpu(pool->tfm);
+ zpool_destroy_pool(pool->zpool);
+ kfree(pool);
+}
+
+static int __must_check zswap_pool_get(struct zswap_pool *pool)
+{
+ return kref_get_unless_zero(&pool->kref);
+}
+
+static void __zswap_pool_release(struct rcu_head *head)
+{
+ struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
+
+ /* nobody should have been able to get a kref... */
+ WARN_ON(kref_get_unless_zero(&pool->kref));
+
+ /* pool is now off zswap_pools list and has no references. */
+ zswap_pool_destroy(pool);
+}
+
+static void __zswap_pool_empty(struct kref *kref)
+{
+ struct zswap_pool *pool;
+
+ pool = container_of(kref, typeof(*pool), kref);
+
+ spin_lock(&zswap_pools_lock);
+
+ WARN_ON(pool == zswap_pool_current());
+
+ list_del_rcu(&pool->list);
+ call_rcu(&pool->rcu_head, __zswap_pool_release);
+
+ spin_unlock(&zswap_pools_lock);
+}
+
+static void zswap_pool_put(struct zswap_pool *pool)
+{
+ kref_put(&pool->kref, __zswap_pool_empty);
+}
+
+/*********************************
+* param callbacks
+**********************************/
+
+static int __zswap_param_set(const char *val, const struct kernel_param *kp,
+ char *type, char *compressor)
+{
+ struct zswap_pool *pool, *put_pool = NULL;
+ char str[kp->str->maxlen], *s;
+ int ret;
+
+ /*
+ * kp is either zswap_zpool_kparam or zswap_compressor_kparam, defined
+ * at the top of this file, so maxlen is CRYPTO_MAX_ALG_NAME (64) or
+ * 32 (arbitrary).
+ */
+ strlcpy(str, val, kp->str->maxlen);
+ s = strim(str);
+
+ /* if this is load-time (pre-init) param setting,
+ * don't create a pool; that's done during init.
+ */
+ if (!zswap_init_started)
+ return param_set_copystring(s, kp);
+
+ /* no change required */
+ if (!strncmp(kp->str->string, s, kp->str->maxlen))
+ return 0;
+
+ if (!type) {
+ type = s;
+ if (!zpool_has_pool(type)) {
+ pr_err("zpool %s not available\n", type);
+ return -ENOENT;
+ }
+ } else if (!compressor) {
+ compressor = s;
+ if (!crypto_has_comp(compressor, 0, 0)) {
+ pr_err("compressor %s not available\n", compressor);
+ return -ENOENT;
+ }
+ }
+
+ spin_lock(&zswap_pools_lock);
+
+ pool = zswap_pool_find_get(type, compressor);
+ if (pool) {
+ zswap_pool_debug("using existing", pool);
+ list_del_rcu(&pool->list);
+ } else {
+ spin_unlock(&zswap_pools_lock);
+ pool = zswap_pool_create(type, compressor);
+ spin_lock(&zswap_pools_lock);
+ }
+
+ if (pool)
+ ret = param_set_copystring(s, kp);
+ else
+ ret = -EINVAL;
+
+ if (!ret) {
+ put_pool = zswap_pool_current();
+ list_add_rcu(&pool->list, &zswap_pools);
+ } else if (pool) {
+ /* add the possibly pre-existing pool to the end of the pools
+ * list; if it's new (and empty) then it'll be removed and
+ * destroyed by the put after we drop the lock
+ */
+ list_add_tail_rcu(&pool->list, &zswap_pools);
+ put_pool = pool;
+ }
+
+ spin_unlock(&zswap_pools_lock);
+
+ /* drop the ref from either the old current pool,
+ * or the new pool we failed to add
+ */
+ if (put_pool)
+ zswap_pool_put(put_pool);
+
+ return ret;
+}
+
+static int zswap_compressor_param_set(const char *val,
+ const struct kernel_param *kp)
+{
+ return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
+}
+
+static int zswap_zpool_param_set(const char *val,
+ const struct kernel_param *kp)
+{
+ return __zswap_param_set(val, kp, NULL, zswap_compressor);
}
/*********************************
@@ -477,6 +835,7 @@
pgoff_t offset;
struct zswap_entry *entry;
struct page *page;
+ struct crypto_comp *tfm;
u8 *src, *dst;
unsigned int dlen;
int ret;
@@ -517,13 +876,15 @@
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+ src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
ZPOOL_MM_RO) + sizeof(struct zswap_header);
dst = kmap_atomic(page);
- ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
- entry->length, dst, &dlen);
+ tfm = *get_cpu_ptr(entry->pool->tfm);
+ ret = crypto_comp_decompress(tfm, src, entry->length,
+ dst, &dlen);
+ put_cpu_ptr(entry->pool->tfm);
kunmap_atomic(dst);
- zpool_unmap_handle(zswap_pool, entry->handle);
+ zpool_unmap_handle(entry->pool->zpool, entry->handle);
BUG_ON(ret);
BUG_ON(dlen != PAGE_SIZE);
@@ -572,6 +933,22 @@
return ret;
}
+static int zswap_shrink(void)
+{
+ struct zswap_pool *pool;
+ int ret;
+
+ pool = zswap_pool_last_get();
+ if (!pool)
+ return -ENOENT;
+
+ ret = zpool_shrink(pool->zpool, 1, NULL);
+
+ zswap_pool_put(pool);
+
+ return ret;
+}
+
/*********************************
* frontswap hooks
**********************************/
@@ -581,6 +958,7 @@
{
struct zswap_tree *tree = zswap_trees[type];
struct zswap_entry *entry, *dupentry;
+ struct crypto_comp *tfm;
int ret;
unsigned int dlen = PAGE_SIZE, len;
unsigned long handle;
@@ -596,7 +974,7 @@
/* reclaim space if needed */
if (zswap_is_full()) {
zswap_pool_limit_hit++;
- if (zpool_shrink(zswap_pool, 1, NULL)) {
+ if (zswap_shrink()) {
zswap_reject_reclaim_fail++;
ret = -ENOMEM;
goto reject;
@@ -611,33 +989,42 @@
goto reject;
}
- /* compress */
- dst = get_cpu_var(zswap_dstmem);
- src = kmap_atomic(page);
- ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
- kunmap_atomic(src);
- if (ret) {
+ /* if entry is successfully added, it keeps the reference */
+ entry->pool = zswap_pool_current_get();
+ if (!entry->pool) {
ret = -EINVAL;
goto freepage;
}
+ /* compress */
+ dst = get_cpu_var(zswap_dstmem);
+ tfm = *get_cpu_ptr(entry->pool->tfm);
+ src = kmap_atomic(page);
+ ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
+ kunmap_atomic(src);
+ put_cpu_ptr(entry->pool->tfm);
+ if (ret) {
+ ret = -EINVAL;
+ goto put_dstmem;
+ }
+
/* store */
len = dlen + sizeof(struct zswap_header);
- ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
- &handle);
+ ret = zpool_malloc(entry->pool->zpool, len,
+ __GFP_NORETRY | __GFP_NOWARN, &handle);
if (ret == -ENOSPC) {
zswap_reject_compress_poor++;
- goto freepage;
+ goto put_dstmem;
}
if (ret) {
zswap_reject_alloc_fail++;
- goto freepage;
+ goto put_dstmem;
}
- zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
+ zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
zhdr->swpentry = swp_entry(type, offset);
buf = (u8 *)(zhdr + 1);
memcpy(buf, dst, dlen);
- zpool_unmap_handle(zswap_pool, handle);
+ zpool_unmap_handle(entry->pool->zpool, handle);
put_cpu_var(zswap_dstmem);
/* populate entry */
@@ -660,12 +1047,14 @@
/* update stats */
atomic_inc(&zswap_stored_pages);
- zswap_pool_total_size = zpool_get_total_size(zswap_pool);
+ zswap_update_total_size();
return 0;
-freepage:
+put_dstmem:
put_cpu_var(zswap_dstmem);
+ zswap_pool_put(entry->pool);
+freepage:
zswap_entry_cache_free(entry);
reject:
return ret;
@@ -680,6 +1069,7 @@
{
struct zswap_tree *tree = zswap_trees[type];
struct zswap_entry *entry;
+ struct crypto_comp *tfm;
u8 *src, *dst;
unsigned int dlen;
int ret;
@@ -696,13 +1086,14 @@
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+ src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
ZPOOL_MM_RO) + sizeof(struct zswap_header);
dst = kmap_atomic(page);
- ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
- dst, &dlen);
+ tfm = *get_cpu_ptr(entry->pool->tfm);
+ ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
+ put_cpu_ptr(entry->pool->tfm);
kunmap_atomic(dst);
- zpool_unmap_handle(zswap_pool, entry->handle);
+ zpool_unmap_handle(entry->pool->zpool, entry->handle);
BUG_ON(ret);
spin_lock(&tree->lock);
@@ -755,10 +1146,6 @@
zswap_trees[type] = NULL;
}
-static const struct zpool_ops zswap_zpool_ops = {
- .evict = zswap_writeback_entry
-};
-
static void zswap_frontswap_init(unsigned type)
{
struct zswap_tree *tree;
@@ -839,49 +1226,40 @@
**********************************/
static int __init init_zswap(void)
{
- gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+ struct zswap_pool *pool;
- pr_info("loading zswap\n");
-
- zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
- &zswap_zpool_ops);
- if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
- pr_info("%s zpool not available\n", zswap_zpool_type);
- zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
- zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
- &zswap_zpool_ops);
- }
- if (!zswap_pool) {
- pr_err("%s zpool not available\n", zswap_zpool_type);
- pr_err("zpool creation failed\n");
- goto error;
- }
- pr_info("using %s pool\n", zswap_zpool_type);
+ zswap_init_started = true;
if (zswap_entry_cache_create()) {
pr_err("entry cache creation failed\n");
- goto cachefail;
+ goto cache_fail;
}
- if (zswap_comp_init()) {
- pr_err("compressor initialization failed\n");
- goto compfail;
+
+ if (zswap_cpu_dstmem_init()) {
+ pr_err("dstmem alloc failed\n");
+ goto dstmem_fail;
}
- if (zswap_cpu_init()) {
- pr_err("per-cpu initialization failed\n");
- goto pcpufail;
+
+ pool = __zswap_pool_create_fallback();
+ if (!pool) {
+ pr_err("pool creation failed\n");
+ goto pool_fail;
}
+ pr_info("loaded using pool %s/%s\n", pool->tfm_name,
+ zpool_get_type(pool->zpool));
+
+ list_add(&pool->list, &zswap_pools);
frontswap_register_ops(&zswap_frontswap_ops);
if (zswap_debugfs_init())
pr_warn("debugfs initialization failed\n");
return 0;
-pcpufail:
- zswap_comp_exit();
-compfail:
+
+pool_fail:
+ zswap_cpu_dstmem_destroy();
+dstmem_fail:
zswap_entry_cache_destroy();
-cachefail:
- zpool_destroy_pool(zswap_pool);
-error:
+cache_fail:
return -ENOMEM;
}
/* must be late so crypto has time to come up */
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index af5e187..ea748c9 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -16,7 +16,6 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/sock.h>
-#include <net/switchdev.h>
#include <uapi/linux/if_bridge.h>
#include "br_private.h"
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 3cd8cc9..5f5a02b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -117,10 +117,11 @@
return err;
}
-static void __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
- u16 vid)
+static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
+ u16 vid)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ int err = 0;
/* If driver uses VLAN ndo ops, use 8021q to delete vid
* on device, otherwise try switchdev ops to delete vid.
@@ -137,8 +138,12 @@
},
};
- switchdev_port_obj_del(dev, &vlan_obj);
+ err = switchdev_port_obj_del(dev, &vlan_obj);
+ if (err == -EOPNOTSUPP)
+ err = 0;
}
+
+ return err;
}
static int __vlan_del(struct net_port_vlans *v, u16 vid)
@@ -151,7 +156,11 @@
if (v->port_idx) {
struct net_bridge_port *p = v->parent.port;
- __vlan_vid_del(p->dev, p->br, vid);
+ int err;
+
+ err = __vlan_vid_del(p->dev, p->br, vid);
+ if (err)
+ return err;
}
clear_bit(vid, v->vlan_bitmap);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 69a4d30..54a00d6 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -357,6 +357,7 @@
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+ opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT;
/* get mon ip(s) */
/* ip1[:port1][,ip2[:port2]...] */
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 790fe89..4440edc 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -79,10 +79,6 @@
return 0;
}
-
-
-#define AES_KEY_SIZE 16
-
static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
{
return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index e3be1d2..525f454 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -163,6 +163,7 @@
static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
+static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
#ifdef CONFIG_LOCKDEP
static struct lock_class_key socket_class;
@@ -176,7 +177,7 @@
static void queue_con(struct ceph_connection *con);
static void cancel_con(struct ceph_connection *con);
-static void con_work(struct work_struct *);
+static void ceph_con_workfn(struct work_struct *);
static void con_fault(struct ceph_connection *con);
/*
@@ -276,22 +277,22 @@
ceph_msgr_wq = NULL;
}
- ceph_msgr_slab_exit();
-
BUG_ON(zero_page == NULL);
page_cache_release(zero_page);
zero_page = NULL;
+
+ ceph_msgr_slab_exit();
}
int ceph_msgr_init(void)
{
+ if (ceph_msgr_slab_init())
+ return -ENOMEM;
+
BUG_ON(zero_page != NULL);
zero_page = ZERO_PAGE(0);
page_cache_get(zero_page);
- if (ceph_msgr_slab_init())
- return -ENOMEM;
-
/*
* The number of active work items is limited by the number of
* connections, so leave @max_active at default.
@@ -749,7 +750,7 @@
mutex_init(&con->mutex);
INIT_LIST_HEAD(&con->out_queue);
INIT_LIST_HEAD(&con->out_sent);
- INIT_DELAYED_WORK(&con->work, con_work);
+ INIT_DELAYED_WORK(&con->work, ceph_con_workfn);
con->state = CON_STATE_CLOSED;
}
@@ -1351,7 +1352,15 @@
{
dout("prepare_write_keepalive %p\n", con);
con_out_kvec_reset(con);
- con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+ if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
+ struct timespec ts = CURRENT_TIME;
+ struct ceph_timespec ceph_ts;
+ ceph_encode_timespec(&ceph_ts, &ts);
+ con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
+ con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts);
+ } else {
+ con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive);
+ }
con_flag_set(con, CON_FLAG_WRITE_PENDING);
}
@@ -1625,6 +1634,12 @@
con->in_tag = CEPH_MSGR_TAG_READY;
}
+static void prepare_read_keepalive_ack(struct ceph_connection *con)
+{
+ dout("prepare_read_keepalive_ack %p\n", con);
+ con->in_base_pos = 0;
+}
+
/*
* Prepare to read a message.
*/
@@ -2322,13 +2337,6 @@
return ret;
BUG_ON(!con->in_msg ^ skip);
- if (con->in_msg && data_len > con->in_msg->data_length) {
- pr_warn("%s skipping long message (%u > %zd)\n",
- __func__, data_len, con->in_msg->data_length);
- ceph_msg_put(con->in_msg);
- con->in_msg = NULL;
- skip = 1;
- }
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
@@ -2457,6 +2465,17 @@
mutex_lock(&con->mutex);
}
+static int read_keepalive_ack(struct ceph_connection *con)
+{
+ struct ceph_timespec ceph_ts;
+ size_t size = sizeof(ceph_ts);
+ int ret = read_partial(con, size, size, &ceph_ts);
+ if (ret <= 0)
+ return ret;
+ ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts);
+ prepare_read_tag(con);
+ return 1;
+}
/*
* Write something to the socket. Called in a worker thread when the
@@ -2526,6 +2545,10 @@
do_next:
if (con->state == CON_STATE_OPEN) {
+ if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
+ prepare_write_keepalive(con);
+ goto more;
+ }
/* is anything else pending? */
if (!list_empty(&con->out_queue)) {
prepare_write_message(con);
@@ -2535,10 +2558,6 @@
prepare_write_ack(con);
goto more;
}
- if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
- prepare_write_keepalive(con);
- goto more;
- }
}
/* Nothing to do! */
@@ -2641,6 +2660,9 @@
case CEPH_MSGR_TAG_ACK:
prepare_read_ack(con);
break;
+ case CEPH_MSGR_TAG_KEEPALIVE2_ACK:
+ prepare_read_keepalive_ack(con);
+ break;
case CEPH_MSGR_TAG_CLOSE:
con_close_socket(con);
con->state = CON_STATE_CLOSED;
@@ -2684,6 +2706,12 @@
process_ack(con);
goto more;
}
+ if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) {
+ ret = read_keepalive_ack(con);
+ if (ret <= 0)
+ goto out;
+ goto more;
+ }
out:
dout("try_read done on %p ret %d\n", con, ret);
@@ -2799,7 +2827,7 @@
/*
* Do some work on a connection. Drop a connection ref when we're done.
*/
-static void con_work(struct work_struct *work)
+static void ceph_con_workfn(struct work_struct *work)
{
struct ceph_connection *con = container_of(work, struct ceph_connection,
work.work);
@@ -3101,6 +3129,20 @@
}
EXPORT_SYMBOL(ceph_con_keepalive);
+bool ceph_con_keepalive_expired(struct ceph_connection *con,
+ unsigned long interval)
+{
+ if (interval > 0 &&
+ (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
+ struct timespec now = CURRENT_TIME;
+ struct timespec ts;
+ jiffies_to_timespec(interval, &ts);
+ ts = timespec_add(con->last_keepalive_ack, ts);
+ return timespec_compare(&now, &ts) >= 0;
+ }
+ return false;
+}
+
static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
{
struct ceph_msg_data *data;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9d6ff121..edda016 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -149,6 +149,10 @@
CEPH_ENTITY_TYPE_MON, monc->cur_mon,
&monc->monmap->mon_inst[monc->cur_mon].addr);
+ /* send an initial keepalive to ensure our timestamp is
+ * valid by the time we are in an OPENED state */
+ ceph_con_keepalive(&monc->con);
+
/* initiatiate authentication handshake */
ret = ceph_auth_build_hello(monc->auth,
monc->m_auth->front.iov_base,
@@ -170,14 +174,19 @@
*/
static void __schedule_delayed(struct ceph_mon_client *monc)
{
- unsigned int delay;
+ struct ceph_options *opt = monc->client->options;
+ unsigned long delay;
- if (monc->cur_mon < 0 || __sub_expired(monc))
+ if (monc->cur_mon < 0 || __sub_expired(monc)) {
delay = 10 * HZ;
- else
+ } else {
delay = 20 * HZ;
- dout("__schedule_delayed after %u\n", delay);
- schedule_delayed_work(&monc->delayed_work, delay);
+ if (opt->monc_ping_timeout > 0)
+ delay = min(delay, opt->monc_ping_timeout / 3);
+ }
+ dout("__schedule_delayed after %lu\n", delay);
+ schedule_delayed_work(&monc->delayed_work,
+ round_jiffies_relative(delay));
}
/*
@@ -743,11 +752,23 @@
__close_session(monc);
__open_session(monc); /* continue hunting */
} else {
- ceph_con_keepalive(&monc->con);
+ struct ceph_options *opt = monc->client->options;
+ int is_auth = ceph_auth_is_authenticated(monc->auth);
+ if (ceph_con_keepalive_expired(&monc->con,
+ opt->monc_ping_timeout)) {
+ dout("monc keepalive timeout\n");
+ is_auth = 0;
+ __close_session(monc);
+ monc->hunting = true;
+ __open_session(monc);
+ }
- __validate_auth(monc);
+ if (!monc->hunting) {
+ ceph_con_keepalive(&monc->con);
+ __validate_auth(monc);
+ }
- if (ceph_auth_is_authenticated(monc->auth))
+ if (is_auth)
__send_subscribe(monc);
}
__schedule_delayed(monc);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5003367..80b94e3 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2817,8 +2817,9 @@
}
/*
- * lookup and return message for incoming reply. set up reply message
- * pages.
+ * Lookup and return message for incoming reply. Don't try to do
+ * anything about a larger than preallocated data portion of the
+ * message at the moment - for now, just skip the message.
*/
static struct ceph_msg *get_reply(struct ceph_connection *con,
struct ceph_msg_header *hdr,
@@ -2836,10 +2837,10 @@
mutex_lock(&osdc->request_mutex);
req = __lookup_request(osdc, tid);
if (!req) {
- *skip = 1;
+ pr_warn("%s osd%d tid %llu unknown, skipping\n",
+ __func__, osd->o_osd, tid);
m = NULL;
- dout("get_reply unknown tid %llu from osd%d\n", tid,
- osd->o_osd);
+ *skip = 1;
goto out;
}
@@ -2849,10 +2850,9 @@
ceph_msg_revoke_incoming(req->r_reply);
if (front_len > req->r_reply->front_alloc_len) {
- pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n",
- front_len, req->r_reply->front_alloc_len,
- (unsigned int)con->peer_name.type,
- le64_to_cpu(con->peer_name.num));
+ pr_warn("%s osd%d tid %llu front %d > preallocated %d\n",
+ __func__, osd->o_osd, req->r_tid, front_len,
+ req->r_reply->front_alloc_len);
m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS,
false);
if (!m)
@@ -2860,37 +2860,22 @@
ceph_msg_put(req->r_reply);
req->r_reply = m;
}
- m = ceph_msg_get(req->r_reply);
- if (data_len > 0) {
- struct ceph_osd_data *osd_data;
-
- /*
- * XXX This is assuming there is only one op containing
- * XXX page data. Probably OK for reads, but this
- * XXX ought to be done more generally.
- */
- osd_data = osd_req_op_extent_osd_data(req, 0);
- if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
- if (osd_data->pages &&
- unlikely(osd_data->length < data_len)) {
-
- pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n",
- tid, data_len, osd_data->length);
- *skip = 1;
- ceph_msg_put(m);
- m = NULL;
- goto out;
- }
- }
+ if (data_len > req->r_reply->data_length) {
+ pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
+ __func__, osd->o_osd, req->r_tid, data_len,
+ req->r_reply->data_length);
+ m = NULL;
+ *skip = 1;
+ goto out;
}
- *skip = 0;
+
+ m = ceph_msg_get(req->r_reply);
dout("get_reply tid %lld %p\n", tid, m);
out:
mutex_unlock(&osdc->request_mutex);
return m;
-
}
static struct ceph_msg *alloc_msg(struct ceph_connection *con,
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4a31258..7d8f581 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1300,7 +1300,7 @@
ceph_decode_addr(&addr);
pr_info("osd%d up\n", osd);
BUG_ON(osd >= map->max_osd);
- map->osd_state[osd] |= CEPH_OSD_UP;
+ map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS;
map->osd_addr[osd] = addr;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index ae8306e..bf77e36 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -44,7 +44,7 @@
}
EXPORT_SYMBOL(fib_default_rule_add);
-u32 fib_default_rule_pref(struct fib_rules_ops *ops)
+static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
{
struct list_head *pos;
struct fib_rule *rule;
@@ -60,7 +60,6 @@
return 0;
}
-EXPORT_SYMBOL(fib_default_rule_pref);
static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
@@ -299,8 +298,8 @@
}
rule->fr_net = net;
- if (tb[FRA_PRIORITY])
- rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+ rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
+ : fib_default_rule_pref(ops);
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
@@ -350,9 +349,6 @@
else
rule->suppress_ifgroup = -1;
- if (!tb[FRA_PRIORITY] && ops->default_pref)
- rule->pref = ops->default_pref(ops);
-
err = -EINVAL;
if (tb[FRA_GOTO]) {
if (rule->action != FR_ACT_GOTO)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 9d66a0f..295bbd6 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -229,7 +229,6 @@
.configure = dn_fib_rule_configure,
.compare = dn_fib_rule_compare,
.fill = dn_fib_rule_fill,
- .default_pref = fib_default_rule_pref,
.flush_cache = dn_fib_rule_flush_cache,
.nlgroup = RTNLGRP_DECnet_RULE,
.policy = dn_fib_rule_policy,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 18123d5..f2bda9e 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -318,7 +318,6 @@
.delete = fib4_rule_delete,
.compare = fib4_rule_compare,
.fill = fib4_rule_fill,
- .default_pref = fib_default_rule_pref,
.nlmsg_payload = fib4_rule_nlmsg_payload,
.flush_cache = fib4_rule_flush_cache,
.nlgroup = RTNLGRP_IPV4_RULE,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3a2c016..866ee89 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -233,7 +233,6 @@
.match = ipmr_rule_match,
.configure = ipmr_rule_configure,
.compare = ipmr_rule_compare,
- .default_pref = fib_default_rule_pref,
.fill = ipmr_rule_fill,
.nlgroup = RTNLGRP_IPV4_RULE,
.policy = ipmr_rule_policy,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 28011fb..c6ded6b 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -151,6 +151,21 @@
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
+static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+ if (event == CA_EVENT_TX_START) {
+ s32 delta = tcp_time_stamp - tcp_sk(sk)->lsndtime;
+ struct bictcp *ca = inet_csk_ca(sk);
+
+ /* We were application limited (idle) for a while.
+ * Shift epoch_start to keep cwnd growth to cubic curve.
+ */
+ if (ca->epoch_start && delta > 0)
+ ca->epoch_start += delta;
+ return;
+ }
+}
+
/* calculate the cubic root of x using a table lookup followed by one
* Newton-Raphson iteration.
* Avg err ~= 0.195%
@@ -450,6 +465,7 @@
.cong_avoid = bictcp_cong_avoid,
.set_state = bictcp_state,
.undo_cwnd = bictcp_undo_cwnd,
+ .cwnd_event = bictcp_cwnd_event,
.pkts_acked = bictcp_acked,
.owner = THIS_MODULE,
.name = "cubic",
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1188e4f..f9a8a12 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -164,6 +164,9 @@
struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_time_stamp;
+ if (tcp_packets_in_flight(tp) == 0)
+ tcp_ca_event(sk, CA_EVENT_TX_START);
+
tp->lsndtime = now;
/* If it is a reply for ato after last received
@@ -940,9 +943,6 @@
&md5);
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
- if (tcp_packets_in_flight(tp) == 0)
- tcp_ca_event(sk, CA_EVENT_TX_START);
-
/* if no packet is in qdisc/device queue, then allow XPS to select
* another queue. We can be called from tcp_tsq_handler()
* which holds one reference to sk_wmem_alloc.
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2367a16..9f777ec 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -258,11 +258,6 @@
return -ENOBUFS;
}
-static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
-{
- return 0x3FFF;
-}
-
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
@@ -279,7 +274,6 @@
.configure = fib6_rule_configure,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
- .default_pref = fib6_rule_default_pref,
.nlmsg_payload = fib6_rule_nlmsg_payload,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = fib6_rule_policy,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 74ceb73..0e004cc 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -217,7 +217,6 @@
.match = ip6mr_rule_match,
.configure = ip6mr_rule_configure,
.compare = ip6mr_rule_compare,
- .default_pref = fib_default_rule_pref,
.fill = ip6mr_rule_fill,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = ip6mr_rule_policy,
@@ -550,7 +549,7 @@
if (it->cache == &mrt->mfc6_unres_queue)
spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == mrt->mfc6_cache_array)
+ else if (it->cache == &mrt->mfc6_cache_array[it->ct])
read_unlock(&mrt_lock);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f45cac6..53617d7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1748,7 +1748,7 @@
return -EINVAL;
}
-int ip6_route_add(struct fib6_config *cfg)
+int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
{
int err;
struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1756,7 +1756,6 @@
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
struct fib6_table *table;
- struct mx6_config mxc = { .mx = NULL, };
int addr_type;
if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
@@ -1981,14 +1980,9 @@
cfg->fc_nlinfo.nl_net = dev_net(dev);
- err = ip6_convert_metrics(&mxc, cfg);
- if (err)
- goto out;
+ *rt_ret = rt;
- err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
-
- kfree(mxc.mx);
- return err;
+ return 0;
out:
if (dev)
dev_put(dev);
@@ -1996,6 +1990,35 @@
in6_dev_put(idev);
if (rt)
dst_free(&rt->dst);
+
+ *rt_ret = NULL;
+
+ return err;
+}
+
+int ip6_route_add(struct fib6_config *cfg)
+{
+ struct mx6_config mxc = { .mx = NULL, };
+ struct rt6_info *rt = NULL;
+ int err;
+
+ err = ip6_route_info_create(cfg, &rt);
+ if (err)
+ goto out;
+
+ err = ip6_convert_metrics(&mxc, cfg);
+ if (err)
+ goto out;
+
+ err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
+
+ kfree(mxc.mx);
+
+ return err;
+out:
+ if (rt)
+ dst_free(&rt->dst);
+
return err;
}
@@ -2776,19 +2799,78 @@
return err;
}
-static int ip6_route_multipath(struct fib6_config *cfg, int add)
+struct rt6_nh {
+ struct rt6_info *rt6_info;
+ struct fib6_config r_cfg;
+ struct mx6_config mxc;
+ struct list_head next;
+};
+
+static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
+{
+ struct rt6_nh *nh;
+
+ list_for_each_entry(nh, rt6_nh_list, next) {
+ pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
+ &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
+ nh->r_cfg.fc_ifindex);
+ }
+}
+
+static int ip6_route_info_append(struct list_head *rt6_nh_list,
+ struct rt6_info *rt, struct fib6_config *r_cfg)
+{
+ struct rt6_nh *nh;
+ struct rt6_info *rtnh;
+ int err = -EEXIST;
+
+ list_for_each_entry(nh, rt6_nh_list, next) {
+ /* check if rt6_info already exists */
+ rtnh = nh->rt6_info;
+
+ if (rtnh->dst.dev == rt->dst.dev &&
+ rtnh->rt6i_idev == rt->rt6i_idev &&
+ ipv6_addr_equal(&rtnh->rt6i_gateway,
+ &rt->rt6i_gateway))
+ return err;
+ }
+
+ nh = kzalloc(sizeof(*nh), GFP_KERNEL);
+ if (!nh)
+ return -ENOMEM;
+ nh->rt6_info = rt;
+ err = ip6_convert_metrics(&nh->mxc, r_cfg);
+ if (err) {
+ kfree(nh);
+ return err;
+ }
+ memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
+ list_add_tail(&nh->next, rt6_nh_list);
+
+ return 0;
+}
+
+static int ip6_route_multipath_add(struct fib6_config *cfg)
{
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
+ struct rt6_info *rt;
+ struct rt6_nh *err_nh;
+ struct rt6_nh *nh, *nh_safe;
int remaining;
int attrlen;
- int err = 0, last_err = 0;
+ int err = 1;
+ int nhn = 0;
+ int replace = (cfg->fc_nlinfo.nlh &&
+ (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
+ LIST_HEAD(rt6_nh_list);
remaining = cfg->fc_mp_len;
-beginning:
rtnh = (struct rtnexthop *)cfg->fc_mp;
- /* Parse a Multipath Entry */
+ /* Parse a Multipath Entry and build a list (rt6_nh_list) of
+ * rt6_info structs per nexthop
+ */
while (rtnh_ok(rtnh, remaining)) {
memcpy(&r_cfg, cfg, sizeof(*cfg));
if (rtnh->rtnh_ifindex)
@@ -2808,22 +2890,32 @@
if (nla)
r_cfg.fc_encap_type = nla_get_u16(nla);
}
- err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+
+ err = ip6_route_info_create(&r_cfg, &rt);
+ if (err)
+ goto cleanup;
+
+ err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
- last_err = err;
- /* If we are trying to remove a route, do not stop the
- * loop when ip6_route_del() fails (because next hop is
- * already gone), we should try to remove all next hops.
- */
- if (add) {
- /* If add fails, we should try to delete all
- * next hops that have been already added.
- */
- add = 0;
- remaining = cfg->fc_mp_len - remaining;
- goto beginning;
- }
+ dst_free(&rt->dst);
+ goto cleanup;
}
+
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ err_nh = NULL;
+ list_for_each_entry(nh, &rt6_nh_list, next) {
+ err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
+ /* nh->rt6_info is used or freed at this point, reset to NULL*/
+ nh->rt6_info = NULL;
+ if (err) {
+ if (replace && nhn)
+ ip6_print_replace_route_err(&rt6_nh_list);
+ err_nh = nh;
+ goto add_errout;
+ }
+
/* Because each route is added like a single route we remove
* these flags after the first nexthop: if there is a collision,
* we have already failed to add the first nexthop:
@@ -2833,6 +2925,62 @@
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
+ nhn++;
+ }
+
+ goto cleanup;
+
+add_errout:
+ /* Delete routes that were already added */
+ list_for_each_entry(nh, &rt6_nh_list, next) {
+ if (err_nh == nh)
+ break;
+ ip6_route_del(&nh->r_cfg);
+ }
+
+cleanup:
+ list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
+ if (nh->rt6_info)
+ dst_free(&nh->rt6_info->dst);
+ kfree(nh->mxc.mx);
+ list_del(&nh->next);
+ kfree(nh);
+ }
+
+ return err;
+}
+
+static int ip6_route_multipath_del(struct fib6_config *cfg)
+{
+ struct fib6_config r_cfg;
+ struct rtnexthop *rtnh;
+ int remaining;
+ int attrlen;
+ int err = 1, last_err = 0;
+
+ remaining = cfg->fc_mp_len;
+ rtnh = (struct rtnexthop *)cfg->fc_mp;
+
+ /* Parse a Multipath Entry */
+ while (rtnh_ok(rtnh, remaining)) {
+ memcpy(&r_cfg, cfg, sizeof(*cfg));
+ if (rtnh->rtnh_ifindex)
+ r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+ nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+ r_cfg.fc_flags |= RTF_GATEWAY;
+ }
+ }
+ err = ip6_route_del(&r_cfg);
+ if (err)
+ last_err = err;
+
rtnh = rtnh_next(rtnh, &remaining);
}
@@ -2849,7 +2997,7 @@
return err;
if (cfg.fc_mp)
- return ip6_route_multipath(&cfg, 0);
+ return ip6_route_multipath_del(&cfg);
else
return ip6_route_del(&cfg);
}
@@ -2864,7 +3012,7 @@
return err;
if (cfg.fc_mp)
- return ip6_route_multipath(&cfg, 1);
+ return ip6_route_multipath_add(&cfg);
else
return ip6_route_add(&cfg);
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 685ec13..17b1fe9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2468,6 +2468,10 @@
rssi_hyst == bss_conf->cqm_rssi_hyst)
return 0;
+ if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER &&
+ !(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI))
+ return -EOPNOTSUPP;
+
bss_conf->cqm_rssi_thold = rssi_thold;
bss_conf->cqm_rssi_hyst = rssi_hyst;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 705ef1d..cd7e55e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4267,6 +4267,8 @@
struct ieee80211_supported_band *sband;
struct cfg80211_chan_def chandef;
int ret;
+ u32 i;
+ bool have_80mhz;
sband = local->hw.wiphy->bands[cbss->channel->band];
@@ -4317,6 +4319,20 @@
}
}
+ /* Allow VHT if at least one channel on the sband supports 80 MHz */
+ have_80mhz = false;
+ for (i = 0; i < sband->n_channels; i++) {
+ if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
+ IEEE80211_CHAN_NO_80MHZ))
+ continue;
+
+ have_80mhz = true;
+ break;
+ }
+
+ if (!have_80mhz)
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+
ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
cbss->channel,
ht_cap, ht_oper, vht_oper,
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 9857693..9ce8883 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -716,7 +716,7 @@
/* Filter out rates that the STA does not support */
*mask &= sta->supp_rates[sband->band];
- for (i = 0; i < sizeof(mcs_mask); i++)
+ for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i];
sta_vht_cap = sta->vht_cap.vht_mcs.rx_mcs_map;
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index aee701a..4e202d0 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1249,6 +1249,58 @@
mutex_unlock(&local->chanctx_mtx);
}
+static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata)
+{
+ struct sta_info *sta;
+ bool result = false;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+ if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded ||
+ !test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
+ !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH) ||
+ !sta->sta.ht_cap.ht_supported)
+ continue;
+ result = true;
+ break;
+ }
+ rcu_read_unlock();
+
+ return result;
+}
+
+static void
+iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta)
+{
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ bool tdls_ht;
+ u16 protection = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED |
+ IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
+ IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
+ u16 opmode;
+
+ /* Nothing to do if the BSS connection uses HT */
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
+ return;
+
+ tdls_ht = (sta && sta->sta.ht_cap.ht_supported) ||
+ iee80211_tdls_have_ht_peers(sdata);
+
+ opmode = sdata->vif.bss_conf.ht_operation_mode;
+
+ if (tdls_ht)
+ opmode |= protection;
+ else
+ opmode &= ~protection;
+
+ if (opmode == sdata->vif.bss_conf.ht_operation_mode)
+ return;
+
+ sdata->vif.bss_conf.ht_operation_mode = opmode;
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
+}
+
int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
const u8 *peer, enum nl80211_tdls_operation oper)
{
@@ -1274,6 +1326,10 @@
return -ENOTSUPP;
}
+ /* protect possible bss_conf changes and avoid concurrency in
+ * ieee80211_bss_info_change_notify()
+ */
+ sdata_lock(sdata);
mutex_lock(&local->mtx);
tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
@@ -1287,16 +1343,18 @@
iee80211_tdls_recalc_chanctx(sdata);
- rcu_read_lock();
+ mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, peer);
if (!sta) {
- rcu_read_unlock();
+ mutex_unlock(&local->sta_mtx);
ret = -ENOLINK;
break;
}
+ iee80211_tdls_recalc_ht_protection(sdata, sta);
+
set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
- rcu_read_unlock();
+ mutex_unlock(&local->sta_mtx);
WARN_ON_ONCE(is_zero_ether_addr(sdata->u.mgd.tdls_peer) ||
!ether_addr_equal(sdata->u.mgd.tdls_peer, peer));
@@ -1318,6 +1376,11 @@
ieee80211_flush_queues(local, sdata, false);
ret = sta_info_destroy_addr(sdata, peer);
+
+ mutex_lock(&local->sta_mtx);
+ iee80211_tdls_recalc_ht_protection(sdata, NULL);
+ mutex_unlock(&local->sta_mtx);
+
iee80211_tdls_recalc_chanctx(sdata);
break;
default:
@@ -1335,6 +1398,7 @@
&sdata->u.mgd.request_smps_work);
mutex_unlock(&local->mtx);
+ sdata_unlock(sdata);
return ret;
}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 834ccdb..ff1c798 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -120,6 +120,7 @@
struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
struct ieee80211_sta_vht_cap own_cap;
u32 cap_info, i;
+ bool have_80mhz;
memset(vht_cap, 0, sizeof(*vht_cap));
@@ -129,6 +130,20 @@
if (!vht_cap_ie || !sband->vht_cap.vht_supported)
return;
+ /* Allow VHT if at least one channel on the sband supports 80 MHz */
+ have_80mhz = false;
+ for (i = 0; i < sband->n_channels; i++) {
+ if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
+ IEEE80211_CHAN_NO_80MHZ))
+ continue;
+
+ have_80mhz = true;
+ break;
+ }
+
+ if (!have_80mhz)
+ return;
+
/*
* A VHT STA must support 40 MHz, but if we verify that here
* then we break a few things - some APs (e.g. Netgear R6300v2
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index afe905c..691b54f 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -152,9 +152,13 @@
#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
#ifdef IP_SET_HASH_WITH_NET0
+/* cidr from 0 to SET_HOST_MASK() value and c = cidr + 1 */
#define NLEN(family) (SET_HOST_MASK(family) + 1)
+#define CIDR_POS(c) ((c) - 1)
#else
+/* cidr from 1 to SET_HOST_MASK() value and c = cidr + 1 */
#define NLEN(family) SET_HOST_MASK(family)
+#define CIDR_POS(c) ((c) - 2)
#endif
#else
@@ -305,7 +309,7 @@
} else if (h->nets[i].cidr[n] < cidr) {
j = i;
} else if (h->nets[i].cidr[n] == cidr) {
- h->nets[cidr - 1].nets[n]++;
+ h->nets[CIDR_POS(cidr)].nets[n]++;
return;
}
}
@@ -314,7 +318,7 @@
h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
}
h->nets[i].cidr[n] = cidr;
- h->nets[cidr - 1].nets[n] = 1;
+ h->nets[CIDR_POS(cidr)].nets[n] = 1;
}
static void
@@ -325,8 +329,8 @@
for (i = 0; i < nets_length; i++) {
if (h->nets[i].cidr[n] != cidr)
continue;
- h->nets[cidr - 1].nets[n]--;
- if (h->nets[cidr - 1].nets[n] > 0)
+ h->nets[CIDR_POS(cidr)].nets[n]--;
+ if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
return;
for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 3c862c0..a93dfeb 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -131,6 +131,13 @@
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
+static void
+hash_netnet4_init(struct hash_netnet4_elem *e)
+{
+ e->cidr[0] = HOST_MASK;
+ e->cidr[1] = HOST_MASK;
+}
+
static int
hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -160,7 +167,7 @@
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
+ struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, last;
u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
@@ -169,6 +176,7 @@
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ hash_netnet4_init(&e);
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
@@ -357,6 +365,13 @@
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
+static void
+hash_netnet6_init(struct hash_netnet6_elem *e)
+{
+ e->cidr[0] = HOST_MASK;
+ e->cidr[1] = HOST_MASK;
+}
+
static int
hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -385,13 +400,14 @@
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
+ struct hash_netnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ hash_netnet6_init(&e);
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 0c68734..9a14c23 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -142,6 +142,13 @@
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
+static void
+hash_netportnet4_init(struct hash_netportnet4_elem *e)
+{
+ e->cidr[0] = HOST_MASK;
+ e->cidr[1] = HOST_MASK;
+}
+
static int
hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -175,7 +182,7 @@
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
+ struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
@@ -185,6 +192,7 @@
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ hash_netportnet4_init(&e);
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -412,6 +420,13 @@
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
+static void
+hash_netportnet6_init(struct hash_netportnet6_elem *e)
+{
+ e->cidr[0] = HOST_MASK;
+ e->cidr[1] = HOST_MASK;
+}
+
static int
hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -445,7 +460,7 @@
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
+ struct hash_netportnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
@@ -454,6 +469,7 @@
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ hash_netportnet6_init(&e);
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eedf049..c09d6c7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -313,12 +313,13 @@
}
EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
-static void nf_ct_tmpl_free(struct nf_conn *tmpl)
+void nf_ct_tmpl_free(struct nf_conn *tmpl)
{
nf_ct_ext_destroy(tmpl);
nf_ct_ext_free(tmpl);
kfree(tmpl);
}
+EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
static void
destroy_conntrack(struct nf_conntrack *nfct)
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 888b955..c8a4a48 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -380,7 +380,7 @@
err3:
free_percpu(snet->stats);
err2:
- nf_conntrack_free(ct);
+ nf_ct_tmpl_free(ct);
err1:
return err;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 0c0e8ec..70277b1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -444,6 +444,7 @@
static void nfnetlink_rcv(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ u_int16_t res_id;
int msglen;
if (nlh->nlmsg_len < NLMSG_HDRLEN ||
@@ -468,7 +469,12 @@
nfgenmsg = nlmsg_data(nlh);
skb_pull(skb, msglen);
- nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
+ /* Work around old nft using host byte order */
+ if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES)
+ res_id = NFNL_SUBSYS_NFTABLES;
+ else
+ res_id = ntohs(nfgenmsg->res_id);
+ nfnetlink_rcv_batch(skb, nlh, res_id);
} else {
netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
}
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 685cc6a..a5cd6d9 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -301,7 +301,7 @@
__be32 **packet_id_ptr)
{
size_t size;
- size_t data_len = 0, cap_len = 0;
+ size_t data_len = 0, cap_len = 0, rem_len = 0;
unsigned int hlen = 0;
struct sk_buff *skb;
struct nlattr *nla;
@@ -360,6 +360,7 @@
hlen = min_t(unsigned int, hlen, data_len);
size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len;
+ rem_len = data_len - hlen;
break;
}
@@ -377,7 +378,7 @@
size += nla_total_size(seclen);
}
- skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
+ skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 8e52489..faf32d8 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -255,7 +255,7 @@
return 0;
err3:
- nf_conntrack_free(ct);
+ nf_ct_tmpl_free(ct);
err2:
nf_ct_l3proto_module_put(par->family);
err1:
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 50889be..7f86d3b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -674,12 +674,19 @@
mask = datagram_poll(file, sock, wait);
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (nlk->rx_ring.pg_vec) {
- if (netlink_has_valid_frame(&nlk->rx_ring))
- mask |= POLLIN | POLLRDNORM;
+ /* We could already have received frames in the normal receive
+ * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
+ * so if mask contains pollin/etc already, there's no point
+ * walking the ring.
+ */
+ if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ if (nlk->rx_ring.pg_vec) {
+ if (netlink_has_valid_frame(&nlk->rx_ring))
+ mask |= POLLIN | POLLRDNORM;
+ }
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
}
- spin_unlock_bh(&sk->sk_receive_queue.lock);
spin_lock_bh(&sk->sk_write_queue.lock);
if (nlk->tx_ring.pg_vec) {
@@ -1837,15 +1844,16 @@
}
EXPORT_SYMBOL(netlink_unicast);
-struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
- u32 dst_portid, gfp_t gfp_mask)
+struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
+ unsigned int ldiff, u32 dst_portid,
+ gfp_t gfp_mask)
{
#ifdef CONFIG_NETLINK_MMAP
+ unsigned int maxlen, linear_size;
struct sock *sk = NULL;
struct sk_buff *skb;
struct netlink_ring *ring;
struct nl_mmap_hdr *hdr;
- unsigned int maxlen;
sk = netlink_getsockbyportid(ssk, dst_portid);
if (IS_ERR(sk))
@@ -1856,7 +1864,11 @@
if (ring->pg_vec == NULL)
goto out_put;
- if (ring->frame_size - NL_MMAP_HDRLEN < size)
+ /* We need to account the full linear size needed as a ring
+ * slot cannot have non-linear parts.
+ */
+ linear_size = size + ldiff;
+ if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
goto out_put;
skb = alloc_skb_head(gfp_mask);
@@ -1870,13 +1882,14 @@
/* check again under lock */
maxlen = ring->frame_size - NL_MMAP_HDRLEN;
- if (maxlen < size)
+ if (maxlen < linear_size)
goto out_free;
netlink_forward_ring(ring);
hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
if (hdr == NULL)
goto err2;
+
netlink_ring_setup_skb(skb, sk, ring, hdr);
netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
atomic_inc(&ring->pending);
@@ -1902,7 +1915,7 @@
#endif
return alloc_skb(size, gfp_mask);
}
-EXPORT_SYMBOL_GPL(netlink_alloc_skb);
+EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
int netlink_has_listeners(struct sock *sk, unsigned int group)
{
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index af7cdef..2a071f4 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -5,6 +5,7 @@
config OPENVSWITCH
tristate "Open vSwitch"
depends on INET
+ depends on (!NF_CONNTRACK || NF_CONNTRACK)
select LIBCRC32C
select MPLS
select NET_MPLS_GSO
@@ -31,17 +32,6 @@
If unsure, say N.
-config OPENVSWITCH_CONNTRACK
- bool "Open vSwitch conntrack action support"
- depends on OPENVSWITCH
- depends on NF_CONNTRACK
- default OPENVSWITCH
- ---help---
- If you say Y here, then Open vSwitch module will be able to pass
- packets through conntrack.
-
- Say N to exclude this support and reduce the binary size.
-
config OPENVSWITCH_GRE
tristate "Open vSwitch GRE tunneling support"
depends on OPENVSWITCH
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 5b5913b..60f8090 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,7 +15,9 @@
vport-internal_dev.o \
vport-netdev.o
-openvswitch-$(CONFIG_OPENVSWITCH_CONNTRACK) += conntrack.o
+ifneq ($(CONFIG_NF_CONNTRACK),)
+openvswitch-y += conntrack.o
+endif
obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index 3cb3066..43f5dd7 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -19,7 +19,7 @@
struct ovs_conntrack_info;
enum ovs_key_attr;
-#if defined(CONFIG_OPENVSWITCH_CONNTRACK)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
void ovs_ct_init(struct net *);
void ovs_ct_exit(struct net *);
bool ovs_ct_verify(struct net *, enum ovs_key_attr attr);
@@ -82,5 +82,5 @@
}
static inline void ovs_ct_free_action(const struct nlattr *a) { }
-#endif
+#endif /* CONFIG_NF_CONNTRACK */
#endif /* ovs_conntrack.h */
diff --git a/net/rds/connection.c b/net/rds/connection.c
index a50e652..49adeef 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -70,7 +70,8 @@
} while (0)
/* rcu read lock must be held or the connection spinlock */
-static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
+static struct rds_connection *rds_conn_lookup(struct net *net,
+ struct hlist_head *head,
__be32 laddr, __be32 faddr,
struct rds_transport *trans)
{
@@ -78,7 +79,7 @@
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
- conn->c_trans == trans) {
+ conn->c_trans == trans && net == rds_conn_net(conn)) {
ret = conn;
break;
}
@@ -132,7 +133,7 @@
if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
goto new_conn;
rcu_read_lock();
- conn = rds_conn_lookup(head, laddr, faddr, trans);
+ conn = rds_conn_lookup(net, head, laddr, faddr, trans);
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
laddr == faddr && !is_outgoing) {
/* This is a looped back IB connection, and we're
@@ -189,6 +190,12 @@
}
}
+ if (trans == NULL) {
+ kmem_cache_free(rds_conn_slab, conn);
+ conn = ERR_PTR(-ENODEV);
+ goto out;
+ }
+
conn->c_trans = trans;
ret = trans->conn_alloc(conn, gfp);
@@ -239,7 +246,7 @@
if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
found = NULL;
else
- found = rds_conn_lookup(head, laddr, faddr, trans);
+ found = rds_conn_lookup(net, head, laddr, faddr, trans);
if (found) {
trans->conn_free(conn->c_transport_data);
kmem_cache_free(rds_conn_slab, conn);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index f12149a..b41e9ea 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -341,7 +341,15 @@
{
struct rfkill *rfkill;
- rfkill_global_states[type].cur = blocked;
+ if (type == RFKILL_TYPE_ALL) {
+ int i;
+
+ for (i = 0; i < NUM_RFKILL_TYPES; i++)
+ rfkill_global_states[i].cur = blocked;
+ } else {
+ rfkill_global_states[type].cur = blocked;
+ }
+
list_for_each_entry(rfkill, &rfkill_list, node) {
if (rfkill->type != type && type != RFKILL_TYPE_ALL)
continue;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4345790..b714333 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -506,14 +506,22 @@
if (IS_ERR(rt))
continue;
+ if (!dst)
+ dst = &rt->dst;
+
/* Ensure the src address belongs to the output
* interface.
*/
odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr,
false);
- if (!odev || odev->ifindex != fl4->flowi4_oif)
+ if (!odev || odev->ifindex != fl4->flowi4_oif) {
+ if (&rt->dst != dst)
+ dst_release(&rt->dst);
continue;
+ }
+ if (dst != &rt->dst)
+ dst_release(dst);
dst = &rt->dst;
break;
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 16c1c43..fda38f8 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -853,12 +853,8 @@
.cb = cb,
.idx = idx,
};
- int err;
- err = switchdev_port_obj_dump(dev, &dump.obj);
- if (err)
- return err;
-
+ switchdev_port_obj_dump(dev, &dump.obj);
return dump.idx;
}
EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 8b010c9..41042de 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -170,6 +170,30 @@
}
/**
+ * bclink_prepare_wakeup - prepare users for wakeup after congestion
+ * @bcl: broadcast link
+ * @resultq: queue for users which can be woken up
+ * Move a number of waiting users, as permitted by available space in
+ * the send queue, from link wait queue to specified queue for wakeup
+ */
+static void bclink_prepare_wakeup(struct tipc_link *bcl, struct sk_buff_head *resultq)
+{
+ int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,};
+ int imp, lim;
+ struct sk_buff *skb, *tmp;
+
+ skb_queue_walk_safe(&bcl->wakeupq, skb, tmp) {
+ imp = TIPC_SKB_CB(skb)->chain_imp;
+ lim = bcl->window + bcl->backlog[imp].limit;
+ pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
+ if ((pnd[imp] + bcl->backlog[imp].len) >= lim)
+ continue;
+ skb_unlink(skb, &bcl->wakeupq);
+ skb_queue_tail(resultq, skb);
+ }
+}
+
+/**
* tipc_bclink_wakeup_users - wake up pending users
*
* Called with no locks taken
@@ -177,8 +201,12 @@
void tipc_bclink_wakeup_users(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *bcl = tn->bcl;
+ struct sk_buff_head resultq;
- tipc_sk_rcv(net, &tn->bclink->link.wakeupq);
+ skb_queue_head_init(&resultq);
+ bclink_prepare_wakeup(bcl, &resultq);
+ tipc_sk_rcv(net, &resultq);
}
/**
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index b144485..2510b231 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2625,7 +2625,7 @@
* settings, user regulatory settings takes precedence.
*/
if (is_an_alpha2(alpha2))
- regulatory_hint_user(user_alpha2, NL80211_USER_REG_HINT_USER);
+ regulatory_hint_user(alpha2, NL80211_USER_REG_HINT_USER);
spin_lock(®_requests_lock);
list_splice_tail_init(&tmp_reg_req_list, ®_requests_list);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a51ca0e..f2a1131 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -264,6 +264,7 @@
__kernel|
__force|
__iomem|
+ __pmem|
__must_check|
__init_refok|
__kprobes|
@@ -584,7 +585,7 @@
our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)};
our $declaration_macros = qr{(?x:
- (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,2}\s*\(|
+ (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(|
(?:$Storage\s+)?LIST_HEAD\s*\(|
(?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
)};
@@ -1953,9 +1954,9 @@
our $clean = 1;
my $signoff = 0;
my $is_patch = 0;
-
my $in_header_lines = $file ? 0 : 1;
my $in_commit_log = 0; #Scanning lines before patch
+ my $commit_log_possible_stack_dump = 0;
my $commit_log_long_line = 0;
my $commit_log_has_diff = 0;
my $reported_maintainer_file = 0;
@@ -2166,11 +2167,15 @@
if ($showfile) {
$prefix = "$realfile:$realline: "
} elsif ($emacs) {
- $prefix = "$filename:$linenr: ";
+ if ($file) {
+ $prefix = "$filename:$realline: ";
+ } else {
+ $prefix = "$filename:$linenr: ";
+ }
}
if ($found_file) {
- if ($realfile =~ m@^(drivers/net/|net/)@) {
+ if ($realfile =~ m@^(?:drivers/net/|net/|drivers/staging/)@) {
$check = 1;
} else {
$check = $check_orig;
@@ -2310,16 +2315,42 @@
# Check for line lengths > 75 in commit log, warn once
if ($in_commit_log && !$commit_log_long_line &&
- length($line) > 75) {
+ length($line) > 75 &&
+ !($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ ||
+ # file delta changes
+ $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ ||
+ # filename then :
+ $line =~ /^\s*(?:Fixes:|Link:)/i ||
+ # A Fixes: or Link: line
+ $commit_log_possible_stack_dump)) {
WARN("COMMIT_LOG_LONG_LINE",
"Possible unwrapped commit description (prefer a maximum 75 chars per line)\n" . $herecurr);
$commit_log_long_line = 1;
}
+# Check if the commit log is in a possible stack dump
+ if ($in_commit_log && !$commit_log_possible_stack_dump &&
+ ($line =~ /^\s*(?:WARNING:|BUG:)/ ||
+ $line =~ /^\s*\[\s*\d+\.\d{6,6}\s*\]/ ||
+ # timestamp
+ $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/)) {
+ # stack dump address
+ $commit_log_possible_stack_dump = 1;
+ }
+
+# Reset possible stack dump if a blank line is found
+ if ($in_commit_log && $commit_log_possible_stack_dump &&
+ $line =~ /^\s*$/) {
+ $commit_log_possible_stack_dump = 0;
+ }
+
# Check for git id commit length and improperly formed commit descriptions
- if ($in_commit_log && $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i) {
- my $init_char = $1;
- my $orig_commit = lc($2);
+ if ($in_commit_log &&
+ ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i ||
+ ($line =~ /\b[0-9a-f]{12,40}\b/i &&
+ $line !~ /\bfixes:\s*[0-9a-f]{12,40}/i))) {
+ my $init_char = "c";
+ my $orig_commit = "";
my $short = 1;
my $long = 0;
my $case = 1;
@@ -2330,6 +2361,13 @@
my $orig_desc = "commit description";
my $description = "";
+ if ($line =~ /\b(c)ommit\s+([0-9a-f]{5,})\b/i) {
+ $init_char = $1;
+ $orig_commit = lc($2);
+ } elsif ($line =~ /\b([0-9a-f]{12,40})\b/i) {
+ $orig_commit = lc($1);
+ }
+
$short = 0 if ($line =~ /\bcommit\s+[0-9a-f]{12,40}/i);
$long = 1 if ($line =~ /\bcommit\s+[0-9a-f]{41,}/i);
$space = 0 if ($line =~ /\bcommit [0-9a-f]/i);
@@ -2738,6 +2776,8 @@
}
}
+# Block comment styles
+# Networking with an initial /*
if ($realfile =~ m@^(drivers/net/|net/)@ &&
$prevrawline =~ /^\+[ \t]*\/\*[ \t]*$/ &&
$rawline =~ /^\+[ \t]*\*/ &&
@@ -2746,22 +2786,23 @@
"networking block comments don't use an empty /* line, use /* Comment...\n" . $hereprev);
}
- if ($realfile =~ m@^(drivers/net/|net/)@ &&
- $prevrawline =~ /^\+[ \t]*\/\*/ && #starting /*
+# Block comments use * on subsequent lines
+ if ($prevline =~ /$;[ \t]*$/ && #ends in comment
+ $prevrawline =~ /^\+.*?\/\*/ && #starting /*
$prevrawline !~ /\*\/[ \t]*$/ && #no trailing */
$rawline =~ /^\+/ && #line is new
$rawline !~ /^\+[ \t]*\*/) { #no leading *
- WARN("NETWORKING_BLOCK_COMMENT_STYLE",
- "networking block comments start with * on subsequent lines\n" . $hereprev);
+ WARN("BLOCK_COMMENT_STYLE",
+ "Block comments use * on subsequent lines\n" . $hereprev);
}
- if ($realfile =~ m@^(drivers/net/|net/)@ &&
- $rawline !~ m@^\+[ \t]*\*/[ \t]*$@ && #trailing */
+# Block comments use */ on trailing lines
+ if ($rawline !~ m@^\+[ \t]*\*/[ \t]*$@ && #trailing */
$rawline !~ m@^\+.*/\*.*\*/[ \t]*$@ && #inline /*...*/
$rawline !~ m@^\+.*\*{2,}/[ \t]*$@ && #trailing **/
$rawline =~ m@^\+[ \t]*.+\*\/[ \t]*$@) { #non blank */
- WARN("NETWORKING_BLOCK_COMMENT_STYLE",
- "networking block comments put the trailing */ on a separate line\n" . $herecurr);
+ WARN("BLOCK_COMMENT_STYLE",
+ "Block comments use a trailing */ on a separate line\n" . $herecurr);
}
# check for missing blank lines after struct/union declarations
@@ -3067,15 +3108,22 @@
substr($s, 0, length($c), '');
- # Make sure we remove the line prefixes as we have
- # none on the first line, and are going to readd them
- # where necessary.
- $s =~ s/\n./\n/gs;
+ # remove inline comments
+ $s =~ s/$;/ /g;
+ $c =~ s/$;/ /g;
# Find out how long the conditional actually is.
my @newlines = ($c =~ /\n/gs);
my $cond_lines = 1 + $#newlines;
+ # Make sure we remove the line prefixes as we have
+ # none on the first line, and are going to readd them
+ # where necessary.
+ $s =~ s/\n./\n/gs;
+ while ($s =~ /\n\s+\\\n/) {
+ $cond_lines += $s =~ s/\n\s+\\\n/\n/g;
+ }
+
# We want to check the first line inside the block
# starting at the end of the conditional, so remove:
# 1) any blank line termination
@@ -3141,8 +3189,10 @@
#print "line<$line> prevline<$prevline> indent<$indent> sindent<$sindent> check<$check> continuation<$continuation> s<$s> cond_lines<$cond_lines> stat_real<$stat_real> stat<$stat>\n";
- if ($check && (($sindent % 8) != 0 ||
- ($sindent <= $indent && $s ne ''))) {
+ if ($check && $s ne '' &&
+ (($sindent % 8) != 0 ||
+ ($sindent < $indent) ||
+ ($sindent > $indent + 8))) {
WARN("SUSPECT_CODE_INDENT",
"suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n");
}
@@ -3439,13 +3489,15 @@
}
}
-# # no BUG() or BUG_ON()
-# if ($line =~ /\b(BUG|BUG_ON)\b/) {
-# print "Try to use WARN_ON & Recovery code rather than BUG() or BUG_ON()\n";
-# print "$herecurr";
-# $clean = 0;
-# }
+# avoid BUG() or BUG_ON()
+ if ($line =~ /\b(?:BUG|BUG_ON)\b/) {
+ my $msg_type = \&WARN;
+ $msg_type = \&CHK if ($file);
+ &{$msg_type}("AVOID_BUG",
+ "Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON()\n" . $herecurr);
+ }
+# avoid LINUX_VERSION_CODE
if ($line =~ /\bLINUX_VERSION_CODE\b/) {
WARN("LINUX_VERSION_CODE",
"LINUX_VERSION_CODE should be avoided, code should be for the version to which it is merged\n" . $herecurr);
@@ -3520,7 +3572,7 @@
# function brace can't be on same line, except for #defines of do while,
# or if closed on same line
if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and
- !($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) {
+ !($line=~/\#\s*define.*do\s\{/) and !($line=~/}/)) {
if (ERROR("OPEN_BRACE",
"open brace '{' following function declarations go on the next line\n" . $herecurr) &&
$fix) {
@@ -4032,8 +4084,8 @@
## }
#need space before brace following if, while, etc
- if (($line =~ /\(.*\){/ && $line !~ /\($Type\){/) ||
- $line =~ /do{/) {
+ if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\){/) ||
+ $line =~ /do\{/) {
if (ERROR("SPACING",
"space required before the open brace '{'\n" . $herecurr) &&
$fix) {
@@ -4179,6 +4231,35 @@
}
}
+# comparisons with a constant or upper case identifier on the left
+# avoid cases like "foo + BAR < baz"
+# only fix matches surrounded by parentheses to avoid incorrect
+# conversions like "FOO < baz() + 5" being "misfixed" to "baz() > FOO + 5"
+ if ($^V && $^V ge 5.10.0 &&
+ $line =~ /^\+(.*)\b($Constant|[A-Z_][A-Z0-9_]*)\s*($Compare)\s*($LvalOrFunc)/) {
+ my $lead = $1;
+ my $const = $2;
+ my $comp = $3;
+ my $to = $4;
+ my $newcomp = $comp;
+ if ($lead !~ /$Operators\s*$/ &&
+ $to !~ /^(?:Constant|[A-Z_][A-Z0-9_]*)$/ &&
+ WARN("CONSTANT_COMPARISON",
+ "Comparisons should place the constant on the right side of the test\n" . $herecurr) &&
+ $fix) {
+ if ($comp eq "<") {
+ $newcomp = ">";
+ } elsif ($comp eq "<=") {
+ $newcomp = ">=";
+ } elsif ($comp eq ">") {
+ $newcomp = "<";
+ } elsif ($comp eq ">=") {
+ $newcomp = "<=";
+ }
+ $fixed[$fixlinenr] =~ s/\(\s*\Q$const\E\s*$Compare\s*\Q$to\E\s*\)/($to $newcomp $const)/;
+ }
+ }
+
# Return of what appears to be an errno should normally be negative
if ($sline =~ /\breturn(?:\s*\(+\s*|\s+)(E[A-Z]+)(?:\s*\)+\s*|\s*)[;:,]/) {
my $name = $1;
@@ -4480,7 +4561,7 @@
$dstat !~ /^for\s*$Constant$/ && # for (...)
$dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar()
$dstat !~ /^do\s*{/ && # do {...
- $dstat !~ /^\({/ && # ({...
+ $dstat !~ /^\(\{/ && # ({...
$ctx !~ /^.\s*#\s*define\s+TRACE_(?:SYSTEM|INCLUDE_FILE|INCLUDE_PATH)\b/)
{
$ctx =~ s/\n*$//;
@@ -4789,16 +4870,20 @@
"Consecutive strings are generally better as a single string\n" . $herecurr);
}
-# check for %L{u,d,i} in strings
+# check for %L{u,d,i} and 0x%[udi] in strings
my $string;
while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) {
$string = substr($rawline, $-[1], $+[1] - $-[1]);
$string =~ s/%%/__/g;
- if ($string =~ /(?<!%)%L[udi]/) {
+ if ($string =~ /(?<!%)%[\*\d\.\$]*L[udi]/) {
WARN("PRINTF_L",
"\%Ld/%Lu are not-standard C, use %lld/%llu\n" . $herecurr);
last;
}
+ if ($string =~ /0x%[\*\d\.\$\Llzth]*[udi]/) {
+ ERROR("PRINTF_0xDECIMAL",
+ "Prefixing 0x with decimal output is defective\n" . $herecurr);
+ }
}
# check for line continuations in quoted strings with odd counts of "
@@ -4816,10 +4901,34 @@
# check for needless "if (<foo>) fn(<foo>)" uses
if ($prevline =~ /\bif\s*\(\s*($Lval)\s*\)/) {
- my $expr = '\s*\(\s*' . quotemeta($1) . '\s*\)\s*;';
- if ($line =~ /\b(kfree|usb_free_urb|debugfs_remove(?:_recursive)?)$expr/) {
- WARN('NEEDLESS_IF',
- "$1(NULL) is safe and this check is probably not required\n" . $hereprev);
+ my $tested = quotemeta($1);
+ my $expr = '\s*\(\s*' . $tested . '\s*\)\s*;';
+ if ($line =~ /\b(kfree|usb_free_urb|debugfs_remove(?:_recursive)?|(?:kmem_cache|mempool|dma_pool)_destroy)$expr/) {
+ my $func = $1;
+ if (WARN('NEEDLESS_IF',
+ "$func(NULL) is safe and this check is probably not required\n" . $hereprev) &&
+ $fix) {
+ my $do_fix = 1;
+ my $leading_tabs = "";
+ my $new_leading_tabs = "";
+ if ($lines[$linenr - 2] =~ /^\+(\t*)if\s*\(\s*$tested\s*\)\s*$/) {
+ $leading_tabs = $1;
+ } else {
+ $do_fix = 0;
+ }
+ if ($lines[$linenr - 1] =~ /^\+(\t+)$func\s*\(\s*$tested\s*\)\s*;\s*$/) {
+ $new_leading_tabs = $1;
+ if (length($leading_tabs) + 1 ne length($new_leading_tabs)) {
+ $do_fix = 0;
+ }
+ } else {
+ $do_fix = 0;
+ }
+ if ($do_fix) {
+ fix_delete_line($fixlinenr - 1, $prevrawline);
+ $fixed[$fixlinenr] =~ s/^\+$new_leading_tabs/\+$leading_tabs/;
+ }
+ }
}
}
diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c
index fd0db01..6ce5945 100644
--- a/scripts/extract-cert.c
+++ b/scripts/extract-cert.c
@@ -1,15 +1,15 @@
/* Extract X.509 certificate in DER form from PKCS#11 or PEM.
*
- * Copyright © 2014 Red Hat, Inc. All Rights Reserved.
- * Copyright © 2015 Intel Corporation.
+ * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved.
+ * Copyright © 2015 Intel Corporation.
*
* Authors: David Howells <dhowells@redhat.com>
* David Woodhouse <dwmw2@infradead.org>
*
* This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the licence, or (at your option) any later version.
*/
#define _GNU_SOURCE
#include <stdio.h>
@@ -86,7 +86,7 @@
ERR(!wb, "%s", cert_dst);
}
X509_NAME_oneline(X509_get_subject_name(x509), buf, sizeof(buf));
- ERR(!i2d_X509_bio(wb, x509), cert_dst);
+ ERR(!i2d_X509_bio(wb, x509), "%s", cert_dst);
if (kbuild_verbose)
fprintf(stderr, "Extracted cert: %s\n", buf);
}
diff --git a/scripts/sign-file.c b/scripts/sign-file.c
index 058bba3..c3899ca 100755
--- a/scripts/sign-file.c
+++ b/scripts/sign-file.c
@@ -1,12 +1,15 @@
/* Sign a module file using the given key.
*
- * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
+ * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved.
+ * Copyright © 2015 Intel Corporation.
+ *
+ * Authors: David Howells <dhowells@redhat.com>
+ * David Woodhouse <dwmw2@infradead.org>
*
* This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the licence, or (at your option) any later version.
*/
#define _GNU_SOURCE
#include <stdio.h>
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 73455089..03c1652 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -401,7 +401,7 @@
bool match = false;
RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
- lockdep_is_held(&devcgroup_mutex),
+ !lockdep_is_held(&devcgroup_mutex),
"device_cgroup:verify_new_ex called without proper synchronization");
if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 3d22014..5bed7716 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -472,7 +472,7 @@
return 0;
}
-static struct vm_operations_struct sel_mmap_policy_ops = {
+static const struct vm_operations_struct sel_mmap_policy_ops = {
.fault = sel_mmap_policy_fault,
.page_mkwrite = sel_mmap_policy_fault,
};
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4e6b090..a75b561 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1135,7 +1135,7 @@
/* override all pins as BIOS on old Amilo is broken */
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
- { 0x14, 0x0121411f }, /* HP */
+ { 0x14, 0x0121401f }, /* HP */
{ 0x15, 0x99030120 }, /* speaker */
{ 0x16, 0x99030130 }, /* bass speaker */
{ 0x17, 0x411111f0 }, /* N/A */
@@ -1155,7 +1155,7 @@
/* almost compatible with FUJITSU, but no bass and SPDIF */
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
- { 0x14, 0x0121411f }, /* HP */
+ { 0x14, 0x0121401f }, /* HP */
{ 0x15, 0x99030120 }, /* speaker */
{ 0x16, 0x411111f0 }, /* N/A */
{ 0x17, 0x411111f0 }, /* N/A */
@@ -1364,7 +1364,7 @@
SND_PCI_QUIRK(0x161f, 0x203d, "W810", ALC880_FIXUP_W810),
SND_PCI_QUIRK(0x161f, 0x205d, "Medion Rim 2150", ALC880_FIXUP_MEDION_RIM),
SND_PCI_QUIRK(0x1631, 0xe011, "PB 13201056", ALC880_FIXUP_6ST_AUTOMUTE),
- SND_PCI_QUIRK(0x1734, 0x107c, "FSC F1734", ALC880_FIXUP_F1734),
+ SND_PCI_QUIRK(0x1734, 0x107c, "FSC Amilo M1437", ALC880_FIXUP_FUJITSU),
SND_PCI_QUIRK(0x1734, 0x1094, "FSC Amilo M1451G", ALC880_FIXUP_FUJITSU),
SND_PCI_QUIRK(0x1734, 0x10ac, "FSC AMILO Xi 1526", ALC880_FIXUP_F1734),
SND_PCI_QUIRK(0x1734, 0x10b0, "FSC Amilo Pi1556", ALC880_FIXUP_FUJITSU),
@@ -5189,8 +5189,11 @@
SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
- SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
SND_PCI_QUIRK(0x1028, 0x06db, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
+ SND_PCI_QUIRK(0x1028, 0x06dd, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
+ SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
+ SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
+ SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -6381,6 +6384,7 @@
SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x05fe, "Dell XPS 15", ALC668_FIXUP_DELL_XPS13),
SND_PCI_QUIRK(0x1028, 0x060a, "Dell XPS 13", ALC668_FIXUP_DELL_XPS13),
+ SND_PCI_QUIRK(0x1028, 0x060d, "Dell M3800", ALC668_FIXUP_DELL_XPS13),
SND_PCI_QUIRK(0x1028, 0x0625, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x0626, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x0696, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c
index 784ceb8..35c1f6a 100644
--- a/sound/sparc/amd7930.c
+++ b/sound/sparc/amd7930.c
@@ -1064,6 +1064,7 @@
},
{},
};
+MODULE_DEVICE_TABLE(of, amd7930_match);
static struct platform_driver amd7930_sbus_driver = {
.driver = {
diff --git a/sound/usb/stream.c b/sound/usb/stream.c
index 310a382..9700860 100644
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c
@@ -377,7 +377,15 @@
snd_usb_init_substream(as, stream, fp);
- list_add(&as->list, &chip->pcm_list);
+ /*
+ * Keep using head insertion for M-Audio Audiophile USB (tm) which has a
+ * fix to swap capture stream order in conf/cards/USB-audio.conf
+ */
+ if (chip->usb_id == USB_ID(0x0763, 0x2003))
+ list_add(&as->list, &chip->pcm_list);
+ else
+ list_add_tail(&as->list, &chip->pcm_list);
+
chip->pcm_devs++;
snd_usb_proc_pcm_format_add(as);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index eb51325..284a76e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -768,8 +768,8 @@
if (!evsel->attr.sample_id_all) {
sample->cpu = 0;
sample->time = 0;
- sample->tid = event->comm.tid;
- sample->pid = event->comm.pid;
+ sample->tid = event->fork.tid;
+ sample->pid = event->fork.pid;
}
print_sample_start(sample, thread, evsel);
perf_event__fprintf(event, stdout);
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 1aa21c9..5b83f56 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -34,6 +34,8 @@
.disabled = 1,
.freq = 1,
};
+ struct cpu_map *cpus;
+ struct thread_map *threads;
attr.sample_freq = 500;
@@ -50,14 +52,19 @@
}
perf_evlist__add(evlist, evsel);
- evlist->cpus = cpu_map__dummy_new();
- evlist->threads = thread_map__new_by_tid(getpid());
- if (!evlist->cpus || !evlist->threads) {
+ cpus = cpu_map__dummy_new();
+ threads = thread_map__new_by_tid(getpid());
+ if (!cpus || !threads) {
err = -ENOMEM;
pr_debug("Not enough memory to create thread/cpu maps\n");
- goto out_delete_evlist;
+ goto out_free_maps;
}
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ cpus = NULL;
+ threads = NULL;
+
if (perf_evlist__open(evlist)) {
const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate";
@@ -107,6 +114,9 @@
err = -1;
}
+out_free_maps:
+ cpu_map__put(cpus);
+ thread_map__put(threads);
out_delete_evlist:
perf_evlist__delete(evlist);
return err;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 3a8fedef..add1638 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -43,6 +43,8 @@
};
const char *argv[] = { "true", NULL };
char sbuf[STRERR_BUFSIZE];
+ struct cpu_map *cpus;
+ struct thread_map *threads;
signal(SIGCHLD, sig_handler);
@@ -58,14 +60,19 @@
* perf_evlist__prepare_workload we'll fill in the only thread
* we're monitoring, the one forked there.
*/
- evlist->cpus = cpu_map__dummy_new();
- evlist->threads = thread_map__new_by_tid(-1);
- if (!evlist->cpus || !evlist->threads) {
+ cpus = cpu_map__dummy_new();
+ threads = thread_map__new_by_tid(-1);
+ if (!cpus || !threads) {
err = -ENOMEM;
pr_debug("Not enough memory to create thread/cpu maps\n");
- goto out_delete_evlist;
+ goto out_free_maps;
}
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ cpus = NULL;
+ threads = NULL;
+
err = perf_evlist__prepare_workload(evlist, &target, argv, false,
workload_exec_failed_signal);
if (err < 0) {
@@ -114,6 +121,9 @@
err = -1;
}
+out_free_maps:
+ cpu_map__put(cpus);
+ thread_map__put(threads);
out_delete_evlist:
perf_evlist__delete(evlist);
return err;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index cf86f2d..c04c60d 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1968,7 +1968,8 @@
&options[nr_options], dso);
nr_options += add_map_opt(browser, &actions[nr_options],
&options[nr_options],
- browser->selection->map);
+ browser->selection ?
+ browser->selection->map : NULL);
/* perf script support */
if (browser->he_selection) {
@@ -1976,6 +1977,15 @@
&actions[nr_options],
&options[nr_options],
thread, NULL);
+ /*
+ * Note that browser->selection != NULL
+ * when browser->he_selection is not NULL,
+ * so we don't need to check browser->selection
+ * before fetching browser->selection->sym like what
+ * we do before fetching browser->selection->map.
+ *
+ * See hist_browser__show_entry.
+ */
nr_options += add_script_opt(browser,
&actions[nr_options],
&options[nr_options],
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d51a520..c8fc8a2 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -124,6 +124,33 @@
free(evlist);
}
+static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
+ struct perf_evsel *evsel)
+{
+ /*
+ * We already have cpus for evsel (via PMU sysfs) so
+ * keep it, if there's no target cpu list defined.
+ */
+ if (!evsel->own_cpus || evlist->has_user_cpus) {
+ cpu_map__put(evsel->cpus);
+ evsel->cpus = cpu_map__get(evlist->cpus);
+ } else if (evsel->cpus != evsel->own_cpus) {
+ cpu_map__put(evsel->cpus);
+ evsel->cpus = cpu_map__get(evsel->own_cpus);
+ }
+
+ thread_map__put(evsel->threads);
+ evsel->threads = thread_map__get(evlist->threads);
+}
+
+static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel)
+ __perf_evlist__propagate_maps(evlist, evsel);
+}
+
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
{
entry->evlist = evlist;
@@ -133,18 +160,19 @@
if (!evlist->nr_entries++)
perf_evlist__set_id_pos(evlist);
+
+ __perf_evlist__propagate_maps(evlist, entry);
}
void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
- struct list_head *list,
- int nr_entries)
+ struct list_head *list)
{
- bool set_id_pos = !evlist->nr_entries;
+ struct perf_evsel *evsel, *temp;
- list_splice_tail(list, &evlist->entries);
- evlist->nr_entries += nr_entries;
- if (set_id_pos)
- perf_evlist__set_id_pos(evlist);
+ __evlist__for_each_safe(list, temp, evsel) {
+ list_del_init(&evsel->node);
+ perf_evlist__add(evlist, evsel);
+ }
}
void __perf_evlist__set_leader(struct list_head *list)
@@ -210,7 +238,7 @@
list_add_tail(&evsel->node, &head);
}
- perf_evlist__splice_list_tail(evlist, &head, nr_attrs);
+ perf_evlist__splice_list_tail(evlist, &head);
return 0;
@@ -1103,71 +1131,56 @@
return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
}
-static int perf_evlist__propagate_maps(struct perf_evlist *evlist,
- bool has_user_cpus)
-{
- struct perf_evsel *evsel;
-
- evlist__for_each(evlist, evsel) {
- /*
- * We already have cpus for evsel (via PMU sysfs) so
- * keep it, if there's no target cpu list defined.
- */
- if (evsel->cpus && has_user_cpus)
- cpu_map__put(evsel->cpus);
-
- if (!evsel->cpus || has_user_cpus)
- evsel->cpus = cpu_map__get(evlist->cpus);
-
- evsel->threads = thread_map__get(evlist->threads);
-
- if ((evlist->cpus && !evsel->cpus) ||
- (evlist->threads && !evsel->threads))
- return -ENOMEM;
- }
-
- return 0;
-}
-
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
{
- evlist->threads = thread_map__new_str(target->pid, target->tid,
- target->uid);
+ struct cpu_map *cpus;
+ struct thread_map *threads;
- if (evlist->threads == NULL)
+ threads = thread_map__new_str(target->pid, target->tid, target->uid);
+
+ if (!threads)
return -1;
if (target__uses_dummy_map(target))
- evlist->cpus = cpu_map__dummy_new();
+ cpus = cpu_map__dummy_new();
else
- evlist->cpus = cpu_map__new(target->cpu_list);
+ cpus = cpu_map__new(target->cpu_list);
- if (evlist->cpus == NULL)
+ if (!cpus)
goto out_delete_threads;
- return perf_evlist__propagate_maps(evlist, !!target->cpu_list);
+ evlist->has_user_cpus = !!target->cpu_list;
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ return 0;
out_delete_threads:
- thread_map__put(evlist->threads);
- evlist->threads = NULL;
+ thread_map__put(threads);
return -1;
}
-int perf_evlist__set_maps(struct perf_evlist *evlist,
- struct cpu_map *cpus,
- struct thread_map *threads)
+void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads)
{
- if (evlist->cpus)
+ /*
+ * Allow for the possibility that one or another of the maps isn't being
+ * changed i.e. don't put it. Note we are assuming the maps that are
+ * being applied are brand new and evlist is taking ownership of the
+ * original reference count of 1. If that is not the case it is up to
+ * the caller to increase the reference count.
+ */
+ if (cpus != evlist->cpus) {
cpu_map__put(evlist->cpus);
+ evlist->cpus = cpus;
+ }
- evlist->cpus = cpus;
-
- if (evlist->threads)
+ if (threads != evlist->threads) {
thread_map__put(evlist->threads);
+ evlist->threads = threads;
+ }
- evlist->threads = threads;
-
- return perf_evlist__propagate_maps(evlist, false);
+ perf_evlist__propagate_maps(evlist);
}
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
@@ -1387,6 +1400,8 @@
static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
{
+ struct cpu_map *cpus;
+ struct thread_map *threads;
int err = -ENOMEM;
/*
@@ -1398,20 +1413,19 @@
* error, and we may not want to do that fallback to a
* default cpu identity map :-\
*/
- evlist->cpus = cpu_map__new(NULL);
- if (evlist->cpus == NULL)
+ cpus = cpu_map__new(NULL);
+ if (!cpus)
goto out;
- evlist->threads = thread_map__new_dummy();
- if (evlist->threads == NULL)
- goto out_free_cpus;
+ threads = thread_map__new_dummy();
+ if (!threads)
+ goto out_put;
- err = 0;
+ perf_evlist__set_maps(evlist, cpus, threads);
out:
return err;
-out_free_cpus:
- cpu_map__put(evlist->cpus);
- evlist->cpus = NULL;
+out_put:
+ cpu_map__put(cpus);
goto out;
}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index b39a619..115d8b5 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -42,6 +42,7 @@
int nr_mmaps;
bool overwrite;
bool enabled;
+ bool has_user_cpus;
size_t mmap_len;
int id_pos;
int is_pos;
@@ -155,9 +156,8 @@
void perf_evlist__set_selected(struct perf_evlist *evlist,
struct perf_evsel *evsel);
-int perf_evlist__set_maps(struct perf_evlist *evlist,
- struct cpu_map *cpus,
- struct thread_map *threads);
+void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads);
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target);
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel);
@@ -179,8 +179,7 @@
bool perf_evlist__valid_read_format(struct perf_evlist *evlist);
void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
- struct list_head *list,
- int nr_entries);
+ struct list_head *list);
static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
{
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c53f791..5410483 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1033,6 +1033,7 @@
perf_evsel__free_config_terms(evsel);
close_cgroup(evsel->cgrp);
cpu_map__put(evsel->cpus);
+ cpu_map__put(evsel->own_cpus);
thread_map__put(evsel->threads);
zfree(&evsel->group_name);
zfree(&evsel->name);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 298e6bb..ef8925f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -98,6 +98,7 @@
struct cgroup_sel *cgrp;
void *handler;
struct cpu_map *cpus;
+ struct cpu_map *own_cpus;
struct thread_map *threads;
unsigned int sample_size;
int id_pos;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4181454..fce6634 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1438,7 +1438,7 @@
if (ph->needs_swap)
nr = bswap_32(nr);
- ph->env.nr_cpus_online = nr;
+ ph->env.nr_cpus_avail = nr;
ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
@@ -1447,7 +1447,7 @@
if (ph->needs_swap)
nr = bswap_32(nr);
- ph->env.nr_cpus_avail = nr;
+ ph->env.nr_cpus_online = nr;
return 0;
}
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index ea76862..eb0e7f8 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -623,7 +623,7 @@
if (err)
return err;
if (event->header.type == PERF_RECORD_EXIT) {
- err = intel_bts_process_tid_exit(bts, event->comm.tid);
+ err = intel_bts_process_tid_exit(bts, event->fork.tid);
if (err)
return err;
}
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index bb41c20..535d86f 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1494,7 +1494,7 @@
if (pt->timeless_decoding) {
if (event->header.type == PERF_RECORD_EXIT) {
err = intel_pt_process_timeless_queues(pt,
- event->comm.tid,
+ event->fork.tid,
sample->time);
}
} else if (timestamp) {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index d826e6f..21ed6ee 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -287,8 +287,8 @@
if (!evsel)
return NULL;
- if (cpus)
- evsel->cpus = cpu_map__get(cpus);
+ evsel->cpus = cpu_map__get(cpus);
+ evsel->own_cpus = cpu_map__get(cpus);
if (name)
evsel->name = strdup(name);
@@ -1140,10 +1140,9 @@
ret = parse_events__scanner(str, &data, PE_START_EVENTS);
perf_pmu__parse_cleanup();
if (!ret) {
- int entries = data.idx - evlist->nr_entries;
struct perf_evsel *last;
- perf_evlist__splice_list_tail(evlist, &data.list, entries);
+ perf_evlist__splice_list_tail(evlist, &data.list);
evlist->nr_groups += data.nr_groups;
last = perf_evlist__last(evlist);
last->cmdline_group_boundary = true;
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 591905a..9cd7081 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -255,7 +255,7 @@
list_add_tail(&term->list, head);
ALLOC_LIST(list);
- ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head));
+ ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
parse_events__free_terms(head);
$$ = list;
}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 0501511..89b05e22 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -6,6 +6,7 @@
TARGETS += ftrace
TARGETS += futex
TARGETS += kcmp
+TARGETS += membarrier
TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mount
diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore
new file mode 100644
index 0000000..020c44f4
--- /dev/null
+++ b/tools/testing/selftests/membarrier/.gitignore
@@ -0,0 +1 @@
+membarrier_test
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
new file mode 100644
index 0000000..877a503
--- /dev/null
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -0,0 +1,11 @@
+CFLAGS += -g -I../../../../usr/include/
+
+all:
+ $(CC) $(CFLAGS) membarrier_test.c -o membarrier_test
+
+TEST_PROGS := membarrier_test
+
+include ../lib.mk
+
+clean:
+ $(RM) membarrier_test
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
new file mode 100644
index 0000000..dde3125
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -0,0 +1,121 @@
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <linux/membarrier.h>
+#include <asm-generic/unistd.h>
+#include <sys/syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+enum test_membarrier_status {
+ TEST_MEMBARRIER_PASS = 0,
+ TEST_MEMBARRIER_FAIL,
+ TEST_MEMBARRIER_SKIP,
+};
+
+static int sys_membarrier(int cmd, int flags)
+{
+ return syscall(__NR_membarrier, cmd, flags);
+}
+
+static enum test_membarrier_status test_membarrier_cmd_fail(void)
+{
+ int cmd = -1, flags = 0;
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ printf("membarrier: Wrong command should fail but passed.\n");
+ return TEST_MEMBARRIER_FAIL;
+ }
+ return TEST_MEMBARRIER_PASS;
+}
+
+static enum test_membarrier_status test_membarrier_flags_fail(void)
+{
+ int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ printf("membarrier: Wrong flags should fail but passed.\n");
+ return TEST_MEMBARRIER_FAIL;
+ }
+ return TEST_MEMBARRIER_PASS;
+}
+
+static enum test_membarrier_status test_membarrier_success(void)
+{
+ int cmd = MEMBARRIER_CMD_SHARED, flags = 0;
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ printf("membarrier: Executing MEMBARRIER_CMD_SHARED failed. %s.\n",
+ strerror(errno));
+ return TEST_MEMBARRIER_FAIL;
+ }
+
+ printf("membarrier: MEMBARRIER_CMD_SHARED success.\n");
+ return TEST_MEMBARRIER_PASS;
+}
+
+static enum test_membarrier_status test_membarrier(void)
+{
+ enum test_membarrier_status status;
+
+ status = test_membarrier_cmd_fail();
+ if (status)
+ return status;
+ status = test_membarrier_flags_fail();
+ if (status)
+ return status;
+ status = test_membarrier_success();
+ if (status)
+ return status;
+ return TEST_MEMBARRIER_PASS;
+}
+
+static enum test_membarrier_status test_membarrier_query(void)
+{
+ int flags = 0, ret;
+
+ printf("membarrier MEMBARRIER_CMD_QUERY ");
+ ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags);
+ if (ret < 0) {
+ printf("failed. %s.\n", strerror(errno));
+ switch (errno) {
+ case ENOSYS:
+ /*
+ * It is valid to build a kernel with
+ * CONFIG_MEMBARRIER=n. However, this skips the tests.
+ */
+ return TEST_MEMBARRIER_SKIP;
+ case EINVAL:
+ default:
+ return TEST_MEMBARRIER_FAIL;
+ }
+ }
+ if (!(ret & MEMBARRIER_CMD_SHARED)) {
+ printf("command MEMBARRIER_CMD_SHARED is not supported.\n");
+ return TEST_MEMBARRIER_FAIL;
+ }
+ printf("syscall available.\n");
+ return TEST_MEMBARRIER_PASS;
+}
+
+int main(int argc, char **argv)
+{
+ switch (test_membarrier_query()) {
+ case TEST_MEMBARRIER_FAIL:
+ return ksft_exit_fail();
+ case TEST_MEMBARRIER_SKIP:
+ return ksft_exit_skip();
+ }
+ switch (test_membarrier()) {
+ case TEST_MEMBARRIER_FAIL:
+ return ksft_exit_fail();
+ case TEST_MEMBARRIER_SKIP:
+ return ksft_exit_skip();
+ }
+
+ printf("membarrier: tests done!\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index 9a43a59..421c607 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -116,8 +116,9 @@
v86->regs.eip = eip;
ret = vm86(VM86_ENTER, v86);
- if (ret == -1 && errno == ENOSYS) {
- printf("[SKIP]\tvm86 not supported\n");
+ if (ret == -1 && (errno == ENOSYS || errno == EPERM)) {
+ printf("[SKIP]\tvm86 %s\n",
+ errno == ENOSYS ? "not supported" : "not allowed");
return false;
}
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 98c95f2..76e38d2 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -64,10 +64,10 @@
int ret;
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
- ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
- timer->irq->irq,
- timer->irq->level);
+ kvm_vgic_set_phys_irq_active(timer->map, true);
+ ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
+ timer->map,
+ timer->irq->level);
WARN_ON(ret);
}
@@ -117,7 +117,8 @@
cycle_t cval, now;
if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
- !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
+ !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
+ kvm_vgic_get_phys_irq_active(timer->map))
return false;
cval = timer->cntv_cval;
@@ -184,10 +185,11 @@
timer_arm(timer, ns);
}
-void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *irq)
+int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+ const struct kvm_irq_level *irq)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct irq_phys_map *map;
/*
* The vcpu timer irq number cannot be determined in
@@ -196,6 +198,17 @@
* vcpu timer irq number when the vcpu is reset.
*/
timer->irq = irq;
+
+ /*
+ * Tell the VGIC that the virtual interrupt is tied to a
+ * physical interrupt. We do that once per VCPU.
+ */
+ map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
+ if (WARN_ON(IS_ERR(map)))
+ return PTR_ERR(map);
+
+ timer->map = map;
+ return 0;
}
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
@@ -335,6 +348,8 @@
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
timer_disarm(timer);
+ if (timer->map)
+ kvm_vgic_unmap_phys_irq(vcpu, timer->map);
}
void kvm_timer_enable(struct kvm *kvm)
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index f9b9c7c..8d7b04d 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -48,6 +48,10 @@
lr_desc.state |= LR_STATE_ACTIVE;
if (val & GICH_LR_EOI)
lr_desc.state |= LR_EOI_INT;
+ if (val & GICH_LR_HW) {
+ lr_desc.state |= LR_HW;
+ lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT;
+ }
return lr_desc;
}
@@ -55,7 +59,9 @@
static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
struct vgic_lr lr_desc)
{
- u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+ u32 lr_val;
+
+ lr_val = lr_desc.irq;
if (lr_desc.state & LR_STATE_PENDING)
lr_val |= GICH_LR_PENDING_BIT;
@@ -64,6 +70,14 @@
if (lr_desc.state & LR_EOI_INT)
lr_val |= GICH_LR_EOI;
+ if (lr_desc.state & LR_HW) {
+ lr_val |= GICH_LR_HW;
+ lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT;
+ }
+
+ if (lr_desc.irq < VGIC_NR_SGIS)
+ lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
+
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index dff0602..afbf925 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -67,6 +67,10 @@
lr_desc.state |= LR_STATE_ACTIVE;
if (val & ICH_LR_EOI)
lr_desc.state |= LR_EOI_INT;
+ if (val & ICH_LR_HW) {
+ lr_desc.state |= LR_HW;
+ lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0);
+ }
return lr_desc;
}
@@ -84,10 +88,17 @@
* Eventually we want to make this configurable, so we may revisit
* this in the future.
*/
- if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ switch (vcpu->kvm->arch.vgic.vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
lr_val |= ICH_LR_GROUP;
- else
- lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ if (lr_desc.irq < VGIC_NR_SGIS)
+ lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
+ break;
+ default:
+ BUG();
+ }
if (lr_desc.state & LR_STATE_PENDING)
lr_val |= ICH_LR_PENDING_BIT;
@@ -95,6 +106,10 @@
lr_val |= ICH_LR_ACTIVE_BIT;
if (lr_desc.state & LR_EOI_INT)
lr_val |= ICH_LR_EOI;
+ if (lr_desc.state & LR_HW) {
+ lr_val |= ICH_LR_HW;
+ lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
+ }
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index bc40137..9eb489a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -24,6 +24,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <linux/rculist.h>
#include <linux/uaccess.h>
#include <asm/kvm_emulate.h>
@@ -74,6 +75,28 @@
* cause the interrupt to become inactive in such a situation.
* Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
* inactive as long as the external input line is held high.
+ *
+ *
+ * Initialization rules: there are multiple stages to the vgic
+ * initialization, both for the distributor and the CPU interfaces.
+ *
+ * Distributor:
+ *
+ * - kvm_vgic_early_init(): initialization of static data that doesn't
+ * depend on any sizing information or emulation type. No allocation
+ * is allowed there.
+ *
+ * - vgic_init(): allocation and initialization of the generic data
+ * structures that depend on sizing information (number of CPUs,
+ * number of interrupts). Also initializes the vcpu specific data
+ * structures. Can be executed lazily for GICv2.
+ * [to be renamed to kvm_vgic_init??]
+ *
+ * CPU Interface:
+ *
+ * - kvm_vgic_cpu_early_init(): initialization of static data that
+ * doesn't depend on any sizing information or emulation type. No
+ * allocation is allowed there.
*/
#include "vgic.h"
@@ -82,6 +105,8 @@
static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
+ int virt_irq);
static const struct vgic_ops *vgic_ops;
static const struct vgic_params *vgic;
@@ -375,7 +400,7 @@
static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
{
- return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+ return !vgic_irq_is_queued(vcpu, irq);
}
/**
@@ -1115,6 +1140,39 @@
if (!vgic_irq_is_edge(vcpu, irq))
vlr.state |= LR_EOI_INT;
+ if (vlr.irq >= VGIC_NR_SGIS) {
+ struct irq_phys_map *map;
+ map = vgic_irq_map_search(vcpu, irq);
+
+ /*
+ * If we have a mapping, and the virtual interrupt is
+ * being injected, then we must set the state to
+ * active in the physical world. Otherwise the
+ * physical interrupt will fire and the guest will
+ * exit before processing the virtual interrupt.
+ */
+ if (map) {
+ int ret;
+
+ BUG_ON(!map->active);
+ vlr.hwirq = map->phys_irq;
+ vlr.state |= LR_HW;
+ vlr.state &= ~LR_EOI_INT;
+
+ ret = irq_set_irqchip_state(map->irq,
+ IRQCHIP_STATE_ACTIVE,
+ true);
+ WARN_ON(ret);
+
+ /*
+ * Make sure we're not going to sample this
+ * again, as a HW-backed interrupt cannot be
+ * in the PENDING_ACTIVE stage.
+ */
+ vgic_irq_set_queued(vcpu, irq);
+ }
+ }
+
vgic_set_lr(vcpu, lr_nr, vlr);
vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
}
@@ -1339,6 +1397,39 @@
return level_pending;
}
+/*
+ * Save the physical active state, and reset it to inactive.
+ *
+ * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
+ */
+static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
+{
+ struct irq_phys_map *map;
+ int ret;
+
+ if (!(vlr.state & LR_HW))
+ return 0;
+
+ map = vgic_irq_map_search(vcpu, vlr.irq);
+ BUG_ON(!map || !map->active);
+
+ ret = irq_get_irqchip_state(map->irq,
+ IRQCHIP_STATE_ACTIVE,
+ &map->active);
+
+ WARN_ON(ret);
+
+ if (map->active) {
+ ret = irq_set_irqchip_state(map->irq,
+ IRQCHIP_STATE_ACTIVE,
+ false);
+ WARN_ON(ret);
+ return 0;
+ }
+
+ return 1;
+}
+
/* Sync back the VGIC state after a guest run */
static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
@@ -1353,14 +1444,31 @@
elrsr = vgic_get_elrsr(vcpu);
elrsr_ptr = u64_to_bitmask(&elrsr);
- /* Clear mappings for empty LRs */
- for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
+ /* Deal with HW interrupts, and clear mappings for empty LRs */
+ for (lr = 0; lr < vgic->nr_lr; lr++) {
struct vgic_lr vlr;
- if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
+ if (!test_bit(lr, vgic_cpu->lr_used))
continue;
vlr = vgic_get_lr(vcpu, lr);
+ if (vgic_sync_hwirq(vcpu, vlr)) {
+ /*
+ * So this is a HW interrupt that the guest
+ * EOI-ed. Clean the LR state and allow the
+ * interrupt to be sampled again.
+ */
+ vlr.state = 0;
+ vlr.hwirq = 0;
+ vgic_set_lr(vcpu, lr, vlr);
+ vgic_irq_clear_queued(vcpu, vlr.irq);
+ set_bit(lr, elrsr_ptr);
+ }
+
+ if (!test_bit(lr, elrsr_ptr))
+ continue;
+
+ clear_bit(lr, vgic_cpu->lr_used);
BUG_ON(vlr.irq >= dist->nr_irqs);
vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
@@ -1447,7 +1555,8 @@
}
static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
- unsigned int irq_num, bool level)
+ struct irq_phys_map *map,
+ unsigned int irq_num, bool level)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
@@ -1455,6 +1564,9 @@
int enabled;
bool ret = true, can_inject = true;
+ if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
+ return -EINVAL;
+
spin_lock(&dist->lock);
vcpu = kvm_get_vcpu(kvm, cpuid);
@@ -1517,28 +1629,17 @@
out:
spin_unlock(&dist->lock);
- return ret ? cpuid : -EINVAL;
+ if (ret) {
+ /* kick the specified vcpu */
+ kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
+ }
+
+ return 0;
}
-/**
- * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
- * @kvm: The VM structure pointer
- * @cpuid: The CPU for PPIs
- * @irq_num: The IRQ number that is assigned to the device
- * @level: Edge-triggered: true: to trigger the interrupt
- * false: to ignore the call
- * Level-sensitive true: activates an interrupt
- * false: deactivates an interrupt
- *
- * The GIC is not concerned with devices being active-LOW or active-HIGH for
- * level-sensitive interrupts. You can think of the level parameter as 1
- * being HIGH and 0 being LOW and all devices being active-HIGH.
- */
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
- bool level)
+static int vgic_lazy_init(struct kvm *kvm)
{
int ret = 0;
- int vcpu_id;
if (unlikely(!vgic_initialized(kvm))) {
/*
@@ -1547,29 +1648,73 @@
* be explicitly initialized once setup with the respective
* KVM device call.
*/
- if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
- ret = -EBUSY;
- goto out;
- }
+ if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
+ return -EBUSY;
+
mutex_lock(&kvm->lock);
ret = vgic_init(kvm);
mutex_unlock(&kvm->lock);
-
- if (ret)
- goto out;
}
- if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
+ return ret;
+}
+
+/**
+ * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
+ * @kvm: The VM structure pointer
+ * @cpuid: The CPU for PPIs
+ * @irq_num: The IRQ number that is assigned to the device. This IRQ
+ * must not be mapped to a HW interrupt.
+ * @level: Edge-triggered: true: to trigger the interrupt
+ * false: to ignore the call
+ * Level-sensitive true: raise the input signal
+ * false: lower the input signal
+ *
+ * The GIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts. You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
+ bool level)
+{
+ struct irq_phys_map *map;
+ int ret;
+
+ ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+
+ map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
+ if (map)
return -EINVAL;
- vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
- if (vcpu_id >= 0) {
- /* kick the specified vcpu */
- kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
- }
+ return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
+}
-out:
- return ret;
+/**
+ * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
+ * @kvm: The VM structure pointer
+ * @cpuid: The CPU for PPIs
+ * @map: Pointer to a irq_phys_map structure describing the mapping
+ * @level: Edge-triggered: true: to trigger the interrupt
+ * false: to ignore the call
+ * Level-sensitive true: raise the input signal
+ * false: lower the input signal
+ *
+ * The GIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts. You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
+ struct irq_phys_map *map, bool level)
+{
+ int ret;
+
+ ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+
+ return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
}
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1583,6 +1728,188 @@
return IRQ_HANDLED;
}
+static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
+ int virt_irq)
+{
+ if (virt_irq < VGIC_NR_PRIVATE_IRQS)
+ return &vcpu->arch.vgic_cpu.irq_phys_map_list;
+ else
+ return &vcpu->kvm->arch.vgic.irq_phys_map_list;
+}
+
+/**
+ * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
+ * @vcpu: The VCPU pointer
+ * @virt_irq: The virtual irq number
+ * @irq: The Linux IRQ number
+ *
+ * Establish a mapping between a guest visible irq (@virt_irq) and a
+ * Linux irq (@irq). On injection, @virt_irq will be associated with
+ * the physical interrupt represented by @irq. This mapping can be
+ * established multiple times as long as the parameters are the same.
+ *
+ * Returns a valid pointer on success, and an error pointer otherwise
+ */
+struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
+ int virt_irq, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
+ struct irq_phys_map *map;
+ struct irq_phys_map_entry *entry;
+ struct irq_desc *desc;
+ struct irq_data *data;
+ int phys_irq;
+
+ desc = irq_to_desc(irq);
+ if (!desc) {
+ kvm_err("%s: no interrupt descriptor\n", __func__);
+ return ERR_PTR(-EINVAL);
+ }
+
+ data = irq_desc_get_irq_data(desc);
+ while (data->parent_data)
+ data = data->parent_data;
+
+ phys_irq = data->hwirq;
+
+ /* Create a new mapping */
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock(&dist->irq_phys_map_lock);
+
+ /* Try to match an existing mapping */
+ map = vgic_irq_map_search(vcpu, virt_irq);
+ if (map) {
+ /* Make sure this mapping matches */
+ if (map->phys_irq != phys_irq ||
+ map->irq != irq)
+ map = ERR_PTR(-EINVAL);
+
+ /* Found an existing, valid mapping */
+ goto out;
+ }
+
+ map = &entry->map;
+ map->virt_irq = virt_irq;
+ map->phys_irq = phys_irq;
+ map->irq = irq;
+
+ list_add_tail_rcu(&entry->entry, root);
+
+out:
+ spin_unlock(&dist->irq_phys_map_lock);
+ /* If we've found a hit in the existing list, free the useless
+ * entry */
+ if (IS_ERR(map) || map != &entry->map)
+ kfree(entry);
+ return map;
+}
+
+static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
+ int virt_irq)
+{
+ struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
+ struct irq_phys_map_entry *entry;
+ struct irq_phys_map *map;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(entry, root, entry) {
+ map = &entry->map;
+ if (map->virt_irq == virt_irq) {
+ rcu_read_unlock();
+ return map;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
+{
+ struct irq_phys_map_entry *entry;
+
+ entry = container_of(rcu, struct irq_phys_map_entry, rcu);
+ kfree(entry);
+}
+
+/**
+ * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
+ *
+ * Return the logical active state of a mapped interrupt. This doesn't
+ * necessarily reflects the current HW state.
+ */
+bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
+{
+ BUG_ON(!map);
+ return map->active;
+}
+
+/**
+ * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
+ *
+ * Set the logical active state of a mapped interrupt. This doesn't
+ * immediately affects the HW state.
+ */
+void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
+{
+ BUG_ON(!map);
+ map->active = active;
+}
+
+/**
+ * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
+ * @vcpu: The VCPU pointer
+ * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
+ *
+ * Remove an existing mapping between virtual and physical interrupts.
+ */
+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct irq_phys_map_entry *entry;
+ struct list_head *root;
+
+ if (!map)
+ return -EINVAL;
+
+ root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
+
+ spin_lock(&dist->irq_phys_map_lock);
+
+ list_for_each_entry(entry, root, entry) {
+ if (&entry->map == map) {
+ list_del_rcu(&entry->entry);
+ call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
+ break;
+ }
+ }
+
+ spin_unlock(&dist->irq_phys_map_lock);
+
+ return 0;
+}
+
+static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct irq_phys_map_entry *entry;
+
+ spin_lock(&dist->irq_phys_map_lock);
+
+ list_for_each_entry(entry, root, entry) {
+ list_del_rcu(&entry->entry);
+ call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
+ }
+
+ spin_unlock(&dist->irq_phys_map_lock);
+}
+
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1591,6 +1918,7 @@
kfree(vgic_cpu->active_shared);
kfree(vgic_cpu->pend_act_shared);
kfree(vgic_cpu->vgic_irq_lr_map);
+ vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
vgic_cpu->pending_shared = NULL;
vgic_cpu->active_shared = NULL;
vgic_cpu->pend_act_shared = NULL;
@@ -1628,6 +1956,17 @@
}
/**
+ * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage
+ *
+ * No memory allocation should be performed here, only static init.
+ */
+void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list);
+}
+
+/**
* kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
*
* The host's GIC naturally limits the maximum amount of VCPUs a guest
@@ -1664,6 +2003,7 @@
kfree(dist->irq_spi_target);
kfree(dist->irq_pending_on_cpu);
kfree(dist->irq_active_on_cpu);
+ vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list);
dist->irq_sgi_sources = NULL;
dist->irq_spi_cpu = NULL;
dist->irq_spi_target = NULL;
@@ -1787,6 +2127,18 @@
return 0;
}
+/**
+ * kvm_vgic_early_init - Earliest possible vgic initialization stage
+ *
+ * No memory allocation should be performed here, only static init.
+ */
+void kvm_vgic_early_init(struct kvm *kvm)
+{
+ spin_lock_init(&kvm->arch.vgic.lock);
+ spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock);
+ INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list);
+}
+
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
int i, vcpu_lock_idx = -1, ret;
@@ -1832,7 +2184,6 @@
if (ret)
goto out_unlock;
- spin_lock_init(&kvm->arch.vgic.lock);
kvm->arch.vgic.in_kernel = true;
kvm->arch.vgic.vgic_model = type;
kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 21c1424..d7ea8e2 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -213,11 +213,15 @@
goto out;
r = -EINVAL;
- if (ue->flags)
+ if (ue->flags) {
+ kfree(e);
goto out;
+ }
r = setup_routing_entry(new, e, ue);
- if (r)
+ if (r) {
+ kfree(e);
goto out;
+ }
++ue;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d8db2f8f..a25a731 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,9 +66,18 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
-static unsigned int halt_poll_ns;
+/* halt polling only reduces halt latency by 5-7 us, 500us is enough */
+static unsigned int halt_poll_ns = 500000;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+/* Default doubles per-vcpu halt_poll_ns. */
+static unsigned int halt_poll_ns_grow = 2;
+module_param(halt_poll_ns_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu halt_poll_ns . */
+static unsigned int halt_poll_ns_shrink;
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
+
/*
* Ordering of locks:
*
@@ -217,6 +226,7 @@
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
+ vcpu->halt_poll_ns = 0;
init_waitqueue_head(&vcpu->wq);
kvm_async_pf_vcpu_init(vcpu);
@@ -387,6 +397,36 @@
return young;
}
+static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ int young, idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+ /*
+ * Even though we do not flush TLB, this will still adversely
+ * affect performance on pre-Haswell Intel EPT, where there is
+ * no EPT Access Bit to clear so that we have to tear down EPT
+ * tables instead. If we find this unacceptable, we can always
+ * add a parameter to kvm_age_hva so that it effectively doesn't
+ * do anything on clear_young.
+ *
+ * Also note that currently we never issue secondary TLB flushes
+ * from clear_young, leaving this job up to the regular system
+ * cadence. If we find this inaccurate, we might come up with a
+ * more sophisticated heuristic later.
+ */
+ young = kvm_age_hva(kvm, start, end);
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ return young;
+}
+
static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address)
@@ -419,6 +459,7 @@
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
+ .clear_young = kvm_mmu_notifier_clear_young,
.test_young = kvm_mmu_notifier_test_young,
.change_pte = kvm_mmu_notifier_change_pte,
.release = kvm_mmu_notifier_release,
@@ -1906,6 +1947,35 @@
}
EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
+static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+ int old, val;
+
+ old = val = vcpu->halt_poll_ns;
+ /* 10us base */
+ if (val == 0 && halt_poll_ns_grow)
+ val = 10000;
+ else
+ val *= halt_poll_ns_grow;
+
+ vcpu->halt_poll_ns = val;
+ trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
+}
+
+static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+ int old, val;
+
+ old = val = vcpu->halt_poll_ns;
+ if (halt_poll_ns_shrink == 0)
+ val = 0;
+ else
+ val /= halt_poll_ns_shrink;
+
+ vcpu->halt_poll_ns = val;
+ trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
+}
+
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
{
if (kvm_arch_vcpu_runnable(vcpu)) {
@@ -1928,10 +1998,11 @@
ktime_t start, cur;
DEFINE_WAIT(wait);
bool waited = false;
+ u64 block_ns;
start = cur = ktime_get();
- if (halt_poll_ns) {
- ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
+ if (vcpu->halt_poll_ns) {
+ ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
do {
/*
@@ -1960,7 +2031,21 @@
cur = ktime_get();
out:
- trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
+ block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
+
+ if (halt_poll_ns) {
+ if (block_ns <= vcpu->halt_poll_ns)
+ ;
+ /* we had a long block, shrink polling */
+ else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ shrink_halt_poll_ns(vcpu);
+ /* we had a short halt and our poll time is too small */
+ else if (vcpu->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
+ grow_halt_poll_ns(vcpu);
+ }
+
+ trace_kvm_vcpu_wakeup(block_ns, waited);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_block);