[ACPI] Reduce acpi-cpufreq switching latency by 50%

The acpi-cpufreq driver does a P-state get after a P-state set
to verify whether set went through successfully. This test
is kind of redundant as set goes throught most of the times,
and the test is also expensive as a get of P-states can
take a lot of time (same as a set operation) as it goes
through SMM mode. Effectively, we are doubling the P-state
latency due to this get opertion.

momdule parameter "acpi_pstate_strict" restores orginal paranoia.

http://bugzilla.kernel.org/show_bug.cgi?id=5129

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 60a9e54..822c8ce 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -31,6 +31,7 @@
 #include <linux/cpufreq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/compiler.h>
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/uaccess.h>
@@ -57,6 +58,8 @@
 
 static struct cpufreq_driver acpi_cpufreq_driver;
 
+static unsigned int acpi_pstate_strict;
+
 static int
 acpi_processor_write_port(
 	u16	port,
@@ -163,34 +166,44 @@
 	}
 
 	/*
-	 * Then we read the 'status_register' and compare the value with the
-	 * target state's 'status' to make sure the transition was successful.
-	 * Note that we'll poll for up to 1ms (100 cycles of 10us) before
-	 * giving up.
+	 * Assume the write went through when acpi_pstate_strict is not used.
+	 * As read status_register is an expensive operation and there 
+	 * are no specific error cases where an IO port write will fail.
 	 */
+	if (acpi_pstate_strict) {
+		/* Then we read the 'status_register' and compare the value 
+		 * with the target state's 'status' to make sure the 
+		 * transition was successful.
+		 * Note that we'll poll for up to 1ms (100 cycles of 10us) 
+		 * before giving up.
+		 */
 
-	port = data->acpi_data.status_register.address;
-	bit_width = data->acpi_data.status_register.bit_width;
+		port = data->acpi_data.status_register.address;
+		bit_width = data->acpi_data.status_register.bit_width;
 
-	dprintk("Looking for 0x%08x from port 0x%04x\n",
-		(u32) data->acpi_data.states[state].status, port);
+		dprintk("Looking for 0x%08x from port 0x%04x\n",
+			(u32) data->acpi_data.states[state].status, port);
 
-	for (i=0; i<100; i++) {
-		ret = acpi_processor_read_port(port, bit_width, &value);
-		if (ret) {	
-			dprintk("Invalid port width 0x%04x\n", bit_width);
-			retval = ret;
-			goto migrate_end;
+		for (i=0; i<100; i++) {
+			ret = acpi_processor_read_port(port, bit_width, &value);
+			if (ret) {	
+				dprintk("Invalid port width 0x%04x\n", bit_width);
+				retval = ret;
+				goto migrate_end;
+			}
+			if (value == (u32) data->acpi_data.states[state].status)
+				break;
+			udelay(10);
 		}
-		if (value == (u32) data->acpi_data.states[state].status)
-			break;
-		udelay(10);
+	} else {
+		i = 0;
+		value = (u32) data->acpi_data.states[state].status;
 	}
 
 	/* notify cpufreq */
 	cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
 
-	if (value != (u32) data->acpi_data.states[state].status) {
+	if (unlikely(value != (u32) data->acpi_data.states[state].status)) {
 		unsigned int tmp = cpufreq_freqs.new;
 		cpufreq_freqs.new = cpufreq_freqs.old;
 		cpufreq_freqs.old = tmp;
@@ -537,6 +550,8 @@
 	return;
 }
 
+module_param(acpi_pstate_strict, uint, 0644);
+MODULE_PARM_DESC(acpi_pstate_strict, "value 0 or non-zero. non-zero -> strict ACPI checks are performed during frequency changes.");
 
 late_initcall(acpi_cpufreq_init);
 module_exit(acpi_cpufreq_exit);