Merge branch 'topic/huang-d3cold-v7' into next
* topic/huang-d3cold-v7:
PCI/PM: add PCIe runtime D3cold support
PCI: do not call pci_set_power_state with PCI_D3cold
PCI/PM: add runtime PM support to PCIe port
ACPI/PM: specify lowest allowed state for device sleep state
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index 140942f..e14a2ff 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -264,7 +264,7 @@
static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev)
{
- pci_set_power_state(dev, PCI_D3cold);
+ pci_set_power_state(dev, PCI_D3hot);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 88561029..1cc02ca 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -716,8 +716,9 @@
* @dev: device to examine; its driver model wakeup flags control
* whether it should be able to wake up the system
* @d_min_p: used to store the upper limit of allowed states range
- * Return value: preferred power state of the device on success, -ENODEV on
- * failure (ie. if there's no 'struct acpi_device' for @dev)
+ * @d_max_in: specify the lowest allowed states
+ * Return value: preferred power state of the device on success, -ENODEV
+ * (ie. if there's no 'struct acpi_device' for @dev) or -EINVAL on failure
*
* Find the lowest power (highest number) ACPI device power state that
* device @dev can be in while the system is in the sleep state represented
@@ -732,13 +733,15 @@
* via @wake.
*/
-int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
+int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p, int d_max_in)
{
acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
struct acpi_device *adev;
char acpi_method[] = "_SxD";
unsigned long long d_min, d_max;
+ if (d_max_in < ACPI_STATE_D0 || d_max_in > ACPI_STATE_D3)
+ return -EINVAL;
if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &adev))) {
printk(KERN_DEBUG "ACPI handle has no context!\n");
return -ENODEV;
@@ -746,8 +749,10 @@
acpi_method[2] = '0' + acpi_target_sleep_state;
/*
- * If the sleep state is S0, we will return D3, but if the device has
- * _S0W, we will use the value from _S0W
+ * If the sleep state is S0, the lowest limit from ACPI is D3,
+ * but if the device has _S0W, we will use the value from _S0W
+ * as the lowest limit from ACPI. Finally, we will constrain
+ * the lowest limit with the specified one.
*/
d_min = ACPI_STATE_D0;
d_max = ACPI_STATE_D3;
@@ -791,8 +796,17 @@
}
}
+ if (d_max_in < d_min)
+ return -EINVAL;
if (d_min_p)
*d_min_p = d_min;
+ /* constrain d_max with specified lowest limit (max number) */
+ if (d_max > d_max_in) {
+ for (d_max = d_max_in; d_max > d_min; d_max--) {
+ if (adev->power.states[d_max].flags.valid)
+ break;
+ }
+ }
return d_max;
}
#endif /* CONFIG_PM */
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c
index 85cc771..9d5eed7 100644
--- a/drivers/misc/cb710/core.c
+++ b/drivers/misc/cb710/core.c
@@ -180,7 +180,7 @@
pci_save_state(pdev);
pci_disable_device(pdev);
if (state.event & PM_EVENT_SLEEP)
- pci_set_power_state(pdev, PCI_D3cold);
+ pci_set_power_state(pdev, PCI_D3hot);
return 0;
}
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 87f4c50..fbf7b26 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -48,6 +48,12 @@
if (event != ACPI_NOTIFY_DEVICE_WAKE || !pci_dev)
return;
+ if (pci_dev->current_state == PCI_D3cold) {
+ pci_wakeup_event(pci_dev);
+ pm_runtime_resume(&pci_dev->dev);
+ return;
+ }
+
if (!pci_dev->pm_cap || !pci_dev->pme_support
|| pci_check_pme_status(pci_dev)) {
if (pci_dev->pme_poll)
@@ -201,9 +207,13 @@
static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
{
- int acpi_state;
+ int acpi_state, d_max;
- acpi_state = acpi_pm_device_sleep_state(&pdev->dev, NULL);
+ if (pdev->no_d3cold)
+ d_max = ACPI_STATE_D3_HOT;
+ else
+ d_max = ACPI_STATE_D3_COLD;
+ acpi_state = acpi_pm_device_sleep_state(&pdev->dev, NULL, d_max);
if (acpi_state < 0)
return PCI_POWER_ERROR;
@@ -310,7 +320,13 @@
static int acpi_pci_run_wake(struct pci_dev *dev, bool enable)
{
- if (dev->pme_interrupt)
+ /*
+ * Per PCI Express Base Specification Revision 2.0 section
+ * 5.3.3.2 Link Wakeup, platform support is needed for D3cold
+ * waking up to power on the main link even if there is PME
+ * support for D3cold
+ */
+ if (dev->pme_interrupt && !dev->runtime_d3cold)
return 0;
if (!acpi_pm_device_run_wake(&dev->dev, enable))
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index bf0cee6..ca2e4c7 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1019,10 +1019,13 @@
if (!pm || !pm->runtime_suspend)
return -ENOSYS;
+ pci_dev->no_d3cold = false;
error = pm->runtime_suspend(dev);
suspend_report_result(pm->runtime_suspend, error);
if (error)
return error;
+ if (!pci_dev->d3cold_allowed)
+ pci_dev->no_d3cold = true;
pci_fixup_device(pci_fixup_suspend, pci_dev);
@@ -1044,6 +1047,7 @@
static int pci_pm_runtime_resume(struct device *dev)
{
+ int rc;
struct pci_dev *pci_dev = to_pci_dev(dev);
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
@@ -1054,7 +1058,11 @@
__pci_enable_wake(pci_dev, PCI_D0, true, false);
pci_fixup_device(pci_fixup_resume, pci_dev);
- return pm->runtime_resume(dev);
+ rc = pm->runtime_resume(dev);
+
+ pci_dev->runtime_d3cold = false;
+
+ return rc;
}
static int pci_pm_runtime_idle(struct device *dev)
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index a0b435f..6869009 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -28,6 +28,7 @@
#include <linux/pci-aspm.h>
#include <linux/slab.h>
#include <linux/vgaarb.h>
+#include <linux/pm_runtime.h>
#include "pci.h"
static int sysfs_initialized; /* = 0 */
@@ -378,6 +379,31 @@
#endif
+#if defined(CONFIG_PM_RUNTIME) && defined(CONFIG_ACPI)
+static ssize_t d3cold_allowed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ unsigned long val;
+
+ if (strict_strtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ pdev->d3cold_allowed = !!val;
+ pm_runtime_resume(dev);
+
+ return count;
+}
+
+static ssize_t d3cold_allowed_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ return sprintf (buf, "%u\n", pdev->d3cold_allowed);
+}
+#endif
+
struct device_attribute pci_dev_attrs[] = {
__ATTR_RO(resource),
__ATTR_RO(vendor),
@@ -402,6 +428,9 @@
__ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store),
__ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store),
#endif
+#if defined(CONFIG_PM_RUNTIME) && defined(CONFIG_ACPI)
+ __ATTR(d3cold_allowed, 0644, d3cold_allowed_show, d3cold_allowed_store),
+#endif
__ATTR_NULL,
};
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 87928fd..3f8b74f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -587,7 +587,8 @@
dev_info(&dev->dev, "Refused to change power state, "
"currently in D%d\n", dev->current_state);
- /* According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT
+ /*
+ * According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT
* INTERFACE SPECIFICATION, REV. 1.2", a device transitioning
* from D3hot to D0 _may_ perform an internal reset, thereby
* going to "D0 Uninitialized" rather than "D0 Initialized".
@@ -619,6 +620,16 @@
if (dev->pm_cap) {
u16 pmcsr;
+ /*
+ * Configuration space is not accessible for device in
+ * D3cold, so just keep or set D3cold for safety
+ */
+ if (dev->current_state == PCI_D3cold)
+ return;
+ if (state == PCI_D3cold) {
+ dev->current_state = PCI_D3cold;
+ return;
+ }
pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
} else {
@@ -659,8 +670,50 @@
*/
static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state)
{
- if (state == PCI_D0)
+ if (state == PCI_D0) {
pci_platform_power_transition(dev, PCI_D0);
+ /*
+ * Mandatory power management transition delays, see
+ * PCI Express Base Specification Revision 2.0 Section
+ * 6.6.1: Conventional Reset. Do not delay for
+ * devices powered on/off by corresponding bridge,
+ * because have already delayed for the bridge.
+ */
+ if (dev->runtime_d3cold) {
+ msleep(dev->d3cold_delay);
+ /*
+ * When powering on a bridge from D3cold, the
+ * whole hierarchy may be powered on into
+ * D0uninitialized state, resume them to give
+ * them a chance to suspend again
+ */
+ pci_wakeup_bus(dev->subordinate);
+ }
+ }
+}
+
+/**
+ * __pci_dev_set_current_state - Set current state of a PCI device
+ * @dev: Device to handle
+ * @data: pointer to state to be set
+ */
+static int __pci_dev_set_current_state(struct pci_dev *dev, void *data)
+{
+ pci_power_t state = *(pci_power_t *)data;
+
+ dev->current_state = state;
+ return 0;
+}
+
+/**
+ * __pci_bus_set_current_state - Walk given bus and set current state of devices
+ * @bus: Top bus of the subtree to walk.
+ * @state: state to be set
+ */
+static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
+{
+ if (bus)
+ pci_walk_bus(bus, __pci_dev_set_current_state, &state);
}
/**
@@ -672,8 +725,15 @@
*/
int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state)
{
- return state >= PCI_D0 ?
- pci_platform_power_transition(dev, state) : -EINVAL;
+ int ret;
+
+ if (state < PCI_D0)
+ return -EINVAL;
+ ret = pci_platform_power_transition(dev, state);
+ /* Power off the bridge may power off the whole hierarchy */
+ if (!ret && state == PCI_D3cold)
+ __pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
+ return ret;
}
EXPORT_SYMBOL_GPL(__pci_complete_power_transition);
@@ -697,8 +757,8 @@
int error;
/* bound the state we're entering */
- if (state > PCI_D3hot)
- state = PCI_D3hot;
+ if (state > PCI_D3cold)
+ state = PCI_D3cold;
else if (state < PCI_D0)
state = PCI_D0;
else if ((state == PCI_D1 || state == PCI_D2) && pci_no_d1d2(dev))
@@ -713,10 +773,15 @@
/* This device is quirked not to be put into D3, so
don't put it in D3 */
- if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
+ if (state >= PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
return 0;
- error = pci_raw_set_power_state(dev, state);
+ /*
+ * To put device in D3cold, we put device into D3hot in native
+ * way, then put device into D3cold with platform ops
+ */
+ error = pci_raw_set_power_state(dev, state > PCI_D3hot ?
+ PCI_D3hot : state);
if (!__pci_complete_power_transition(dev, state))
error = 0;
@@ -1460,6 +1525,28 @@
}
/**
+ * pci_wakeup - Wake up a PCI device
+ * @dev: Device to handle.
+ * @ign: ignored parameter
+ */
+static int pci_wakeup(struct pci_dev *pci_dev, void *ign)
+{
+ pci_wakeup_event(pci_dev);
+ pm_request_resume(&pci_dev->dev);
+ return 0;
+}
+
+/**
+ * pci_wakeup_bus - Walk given bus and wake up devices on it
+ * @bus: Top bus of the subtree to walk.
+ */
+void pci_wakeup_bus(struct pci_bus *bus)
+{
+ if (bus)
+ pci_walk_bus(bus, pci_wakeup, NULL);
+}
+
+/**
* pci_pme_capable - check the capability of PCI device to generate PME#
* @dev: PCI device to handle.
* @state: PCI state from which device will issue PME#.
@@ -1480,6 +1567,16 @@
if (!list_empty(&pci_pme_list)) {
list_for_each_entry_safe(pme_dev, n, &pci_pme_list, list) {
if (pme_dev->dev->pme_poll) {
+ struct pci_dev *bridge;
+
+ bridge = pme_dev->dev->bus->self;
+ /*
+ * If bridge is in low power state, the
+ * configuration space of subordinate devices
+ * may be not accessible
+ */
+ if (bridge && bridge->current_state != PCI_D0)
+ continue;
pci_pme_wakeup(pme_dev->dev, NULL);
} else {
list_del(&pme_dev->list);
@@ -1706,6 +1803,10 @@
if (target_state == PCI_POWER_ERROR)
return -EIO;
+ /* D3cold during system suspend/hibernate is not supported */
+ if (target_state > PCI_D3hot)
+ target_state = PCI_D3hot;
+
pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev));
error = pci_set_power_state(dev, target_state);
@@ -1743,12 +1844,16 @@
if (target_state == PCI_POWER_ERROR)
return -EIO;
+ dev->runtime_d3cold = target_state == PCI_D3cold;
+
__pci_enable_wake(dev, target_state, true, pci_dev_run_wake(dev));
error = pci_set_power_state(dev, target_state);
- if (error)
+ if (error) {
__pci_enable_wake(dev, target_state, true, false);
+ dev->runtime_d3cold = false;
+ }
return error;
}
@@ -1818,6 +1923,7 @@
dev->pm_cap = pm;
dev->d3_delay = PCI_PM_D3_WAIT;
+ dev->d3cold_delay = PCI_PM_D3COLD_WAIT;
dev->d1_support = false;
dev->d2_support = false;
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4884d77..ec1512d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -70,6 +70,7 @@
extern void pci_disable_enabled_device(struct pci_dev *dev);
extern int pci_finish_runtime_suspend(struct pci_dev *dev);
extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
+extern void pci_wakeup_bus(struct pci_bus *bus);
extern void pci_pm_init(struct pci_dev *dev);
extern void platform_pci_wakeup_init(struct pci_dev *dev);
extern void pci_allocate_cap_save_buffers(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index e0610bd..3a7eefc 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/pm.h>
+#include <linux/pm_runtime.h>
#include <linux/init.h>
#include <linux/pcieport_if.h>
#include <linux/aer.h>
@@ -99,6 +100,51 @@
return 0;
}
+#ifdef CONFIG_PM_RUNTIME
+struct d3cold_info {
+ bool no_d3cold;
+ unsigned int d3cold_delay;
+};
+
+static int pci_dev_d3cold_info(struct pci_dev *pdev, void *data)
+{
+ struct d3cold_info *info = data;
+
+ info->d3cold_delay = max_t(unsigned int, pdev->d3cold_delay,
+ info->d3cold_delay);
+ if (pdev->no_d3cold)
+ info->no_d3cold = true;
+ return 0;
+}
+
+static int pcie_port_runtime_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct d3cold_info d3cold_info = {
+ .no_d3cold = false,
+ .d3cold_delay = PCI_PM_D3_WAIT,
+ };
+
+ /*
+ * If any subordinate device disable D3cold, we should not put
+ * the port into D3cold. The D3cold delay of port should be
+ * the max of that of all subordinate devices.
+ */
+ pci_walk_bus(pdev->subordinate, pci_dev_d3cold_info, &d3cold_info);
+ pdev->no_d3cold = d3cold_info.no_d3cold;
+ pdev->d3cold_delay = d3cold_info.d3cold_delay;
+ return 0;
+}
+
+static int pcie_port_runtime_resume(struct device *dev)
+{
+ return 0;
+}
+#else
+#define pcie_port_runtime_suspend NULL
+#define pcie_port_runtime_resume NULL
+#endif
+
static const struct dev_pm_ops pcie_portdrv_pm_ops = {
.suspend = pcie_port_device_suspend,
.resume = pcie_port_device_resume,
@@ -107,6 +153,8 @@
.poweroff = pcie_port_device_suspend,
.restore = pcie_port_device_resume,
.resume_noirq = pcie_port_resume_noirq,
+ .runtime_suspend = pcie_port_runtime_suspend,
+ .runtime_resume = pcie_port_runtime_resume,
};
#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops)
@@ -117,6 +165,14 @@
#endif /* !PM */
/*
+ * PCIe port runtime suspend is broken for some chipsets, so use a
+ * black list to disable runtime PM for these chipsets.
+ */
+static const struct pci_device_id port_runtime_pm_black_list[] = {
+ { /* end: all zeroes */ }
+};
+
+/*
* pcie_portdrv_probe - Probe PCI-Express port devices
* @dev: PCI-Express port device being probed
*
@@ -144,12 +200,16 @@
return status;
pci_save_state(dev);
+ if (!pci_match_id(port_runtime_pm_black_list, dev))
+ pm_runtime_put_noidle(&dev->dev);
return 0;
}
static void pcie_portdrv_remove(struct pci_dev *dev)
{
+ if (!pci_match_id(port_runtime_pm_black_list, dev))
+ pm_runtime_get_noresume(&dev->dev);
pcie_port_device_remove(dev);
pci_disable_device(dev);
}
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index d21e8f5..507a8e2 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -170,8 +170,8 @@
}
if (acpi_bus_power_manageable(handle)) {
- int power_state = acpi_pm_device_sleep_state(&dev->dev, NULL);
-
+ int power_state = acpi_pm_device_sleep_state(&dev->dev, NULL,
+ ACPI_STATE_D3);
if (power_state < 0)
power_state = (state.event == PM_EVENT_ON) ?
ACPI_STATE_D0 : ACPI_STATE_D3;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 4579740..6a6c46e 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -415,13 +415,13 @@
int acpi_disable_wakeup_device_power(struct acpi_device *dev);
#ifdef CONFIG_PM
-int acpi_pm_device_sleep_state(struct device *, int *);
+int acpi_pm_device_sleep_state(struct device *, int *, int);
#else
-static inline int acpi_pm_device_sleep_state(struct device *d, int *p)
+static inline int acpi_pm_device_sleep_state(struct device *d, int *p, int m)
{
if (p)
*p = ACPI_STATE_D0;
- return ACPI_STATE_D3;
+ return (m >= ACPI_STATE_D0 && m <= ACPI_STATE_D3) ? m : ACPI_STATE_D0;
}
#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c739df9..0769aa4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -132,9 +132,10 @@
return pci_power_names[1 + (int) state];
}
-#define PCI_PM_D2_DELAY 200
-#define PCI_PM_D3_WAIT 10
-#define PCI_PM_BUS_WAIT 50
+#define PCI_PM_D2_DELAY 200
+#define PCI_PM_D3_WAIT 10
+#define PCI_PM_D3COLD_WAIT 100
+#define PCI_PM_BUS_WAIT 50
/** The pci_channel state describes connectivity between the CPU and
* the pci device. If some PCI bus between here and the pci device
@@ -278,11 +279,18 @@
unsigned int pme_poll:1; /* Poll device's PME status bit */
unsigned int d1_support:1; /* Low power state D1 is supported */
unsigned int d2_support:1; /* Low power state D2 is supported */
- unsigned int no_d1d2:1; /* Only allow D0 and D3 */
+ unsigned int no_d1d2:1; /* D1 and D2 are forbidden */
+ unsigned int no_d3cold:1; /* D3cold is forbidden */
+ unsigned int d3cold_allowed:1; /* D3cold is allowed by user */
unsigned int mmio_always_on:1; /* disallow turning off io/mem
decoding during bar sizing */
unsigned int wakeup_prepared:1;
+ unsigned int runtime_d3cold:1; /* whether go through runtime
+ D3cold, not set for devices
+ powered on/off by the
+ corresponding bridge */
unsigned int d3_delay; /* D3->D0 transition time in ms */
+ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */
#ifdef CONFIG_PCIEASPM
struct pcie_link_state *link_state; /* ASPM link state. */