ACPI / PM: Rework device power management to follow ACPI 6

The ACPI 6 specification has made some changes in the device power
management area.  In particular:

 * The D3hot power state is now supposed to be always available
   (instead of D3cold) and D3cold is only regarded as valid if the
   _PR3 object is present for the given device.

 * The required ordering of transitions into power states deeper than
   D0 is now such that for a transition into state Dx the _PSx method
   is supposed to be executed first, if present, and the states of
   the power resources the device depends on are supposed to be
   changed after that.

 * It is now explicitly forbidden to transition devices from
   lower-power (deeper) into higher-power (shallower) power states
   other than D0.

Those changes have been made so the specification reflects the
Windows' device power management code that the vast majority of
systems using ACPI is validated against.

To avoid artificial differences in ACPI device power management
between Windows and Linux, modify the ACPI device power management
code to follow the new specification.  Add comments explaining the
code flow in some unclear places.

This only may affect some real corner cases in which the OS behavior
expected by the firmware is different from the Windows one, but that's
quite unlikely.  The transition ordering change affects transitions
to D1 and D2 which are rarely used (if at all) and into D3hot and
D3cold for devices actually having _PR3, but those are likely to
be validated against Windows anyway.  The other changes may affect
code calling acpi_device_get_power() or acpi_device_update_power()
where ACPI_STATE_D3_HOT may be returned instead of ACPI_STATE_D3_COLD
(that's why the ACPI fan driver needs to be updated too) and since
transitions into ACPI_STATE_D3_HOT may remove power now, it is better
to avoid this one in acpi_pm_device_sleep_state() if the "no power
off" PM QoS flag is set.

The only existing user of acpi_device_can_poweroff() really cares
about the case when _PR3 is present, so the change in that function
should not cause any problems to happen too.

A plus is that PCI_D3hot can be mapped to ACPI_STATE_D3_HOT
now and the compatibility with older systems should be covered
automatically.

In any case, if any real problems result from this, it still will
be better to follow the Windows' behavior (which now is reflected
by the specification too) in general and handle the cases when it
doesn't work via quirks.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 735db11..87c16a5 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -98,17 +98,16 @@
 
 		/*
 		 * The power resources settings may indicate a power state
-		 * shallower than the actual power state of the device.
+		 * shallower than the actual power state of the device, because
+		 * the same power resources may be referenced by other devices.
 		 *
-		 * Moreover, on systems predating ACPI 4.0, if the device
-		 * doesn't depend on any power resources and _PSC returns 3,
-		 * that means "power off".  We need to maintain compatibility
-		 * with those systems.
+		 * For systems predating ACPI 4.0 we assume that D3hot is the
+		 * deepest state that can be supported.
 		 */
 		if (psc > result && psc < ACPI_STATE_D3_COLD)
 			result = psc;
 		else if (result == ACPI_STATE_UNKNOWN)
-			result = psc > ACPI_STATE_D2 ? ACPI_STATE_D3_COLD : psc;
+			result = psc > ACPI_STATE_D2 ? ACPI_STATE_D3_HOT : psc;
 	}
 
 	/*
@@ -153,8 +152,8 @@
  */
 int acpi_device_set_power(struct acpi_device *device, int state)
 {
+	int target_state = state;
 	int result = 0;
-	bool cut_power = false;
 
 	if (!device || !device->flags.power_manageable
 	    || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD))
@@ -169,11 +168,21 @@
 		return 0;
 	}
 
-	if (!device->power.states[state].flags.valid) {
+	if (state == ACPI_STATE_D3_COLD) {
+		/*
+		 * For transitions to D3cold we need to execute _PS3 and then
+		 * possibly drop references to the power resources in use.
+		 */
+		state = ACPI_STATE_D3_HOT;
+		/* If _PR3 is not available, use D3hot as the target state. */
+		if (!device->power.states[ACPI_STATE_D3_COLD].flags.valid)
+			target_state = state;
+	} else if (!device->power.states[state].flags.valid) {
 		dev_warn(&device->dev, "Power state %s not supported\n",
 			 acpi_power_state_string(state));
 		return -ENODEV;
 	}
+
 	if (!device->power.flags.ignore_parent &&
 	    device->parent && (state < device->parent->power.state)) {
 		dev_warn(&device->dev,
@@ -183,39 +192,38 @@
 		return -ENODEV;
 	}
 
-	/* For D3cold we should first transition into D3hot. */
-	if (state == ACPI_STATE_D3_COLD
-	    && device->power.states[ACPI_STATE_D3_COLD].flags.os_accessible) {
-		state = ACPI_STATE_D3_HOT;
-		cut_power = true;
-	}
-
-	if (state < device->power.state && state != ACPI_STATE_D0
-	    && device->power.state >= ACPI_STATE_D3_HOT) {
-		dev_warn(&device->dev,
-			 "Cannot transition to non-D0 state from D3\n");
-		return -ENODEV;
-	}
-
 	/*
 	 * Transition Power
 	 * ----------------
-	 * In accordance with the ACPI specification first apply power (via
-	 * power resources) and then evaluate _PSx.
+	 * In accordance with ACPI 6, _PSx is executed before manipulating power
+	 * resources, unless the target state is D0, in which case _PS0 is
+	 * supposed to be executed after turning the power resources on.
 	 */
-	if (device->power.flags.power_resources) {
-		result = acpi_power_transition(device, state);
+	if (state > ACPI_STATE_D0) {
+		/*
+		 * According to ACPI 6, devices cannot go from lower-power
+		 * (deeper) states to higher-power (shallower) states.
+		 */
+		if (state < device->power.state) {
+			dev_warn(&device->dev, "Cannot transition from %s to %s\n",
+				 acpi_power_state_string(device->power.state),
+				 acpi_power_state_string(state));
+			return -ENODEV;
+		}
+
+		result = acpi_dev_pm_explicit_set(device, state);
 		if (result)
 			goto end;
-	}
-	result = acpi_dev_pm_explicit_set(device, state);
-	if (result)
-		goto end;
 
-	if (cut_power) {
-		device->power.state = state;
-		state = ACPI_STATE_D3_COLD;
-		result = acpi_power_transition(device, state);
+		if (device->power.flags.power_resources)
+			result = acpi_power_transition(device, target_state);
+	} else {
+		if (device->power.flags.power_resources) {
+			result = acpi_power_transition(device, ACPI_STATE_D0);
+			if (result)
+				goto end;
+		}
+		result = acpi_dev_pm_explicit_set(device, ACPI_STATE_D0);
 	}
 
  end:
@@ -264,13 +272,24 @@
 		return result;
 
 	if (state < ACPI_STATE_D3_COLD && device->power.flags.power_resources) {
+		/* Reference count the power resources. */
 		result = acpi_power_on_resources(device, state);
 		if (result)
 			return result;
 
-		result = acpi_dev_pm_explicit_set(device, state);
-		if (result)
-			return result;
+		if (state == ACPI_STATE_D0) {
+			/*
+			 * If _PSC is not present and the state inferred from
+			 * power resources appears to be D0, it still may be
+			 * necessary to execute _PS0 at this point, because
+			 * another device using the same power resources may
+			 * have been put into D0 previously and that's why we
+			 * see D0 here.
+			 */
+			result = acpi_dev_pm_explicit_set(device, state);
+			if (result)
+				return result;
+		}
 	} else if (state == ACPI_STATE_UNKNOWN) {
 		/*
 		 * No power resources and missing _PSC?  Cross fingers and make
@@ -603,12 +622,12 @@
 	if (d_max_in < ACPI_STATE_D0 || d_max_in > ACPI_STATE_D3_COLD)
 		return -EINVAL;
 
-	if (d_max_in > ACPI_STATE_D3_HOT) {
+	if (d_max_in > ACPI_STATE_D2) {
 		enum pm_qos_flags_status stat;
 
 		stat = dev_pm_qos_flags(dev, PM_QOS_FLAG_NO_POWER_OFF);
 		if (stat == PM_QOS_FLAGS_ALL)
-			d_max_in = ACPI_STATE_D3_HOT;
+			d_max_in = ACPI_STATE_D2;
 	}
 
 	adev = ACPI_COMPANION(dev);