Merge remote-tracking branch '4.9/tmp-e6b0c64f' into 4.9

* 4.9/tmp-e6b0c64f:
  Linux 4.9.41
  ASoC: dpcm: Avoid putting stream state to STOP when FE stream is paused
  ASoC: Intel: Skylake: Release FW ctx in cleanup
  scsi: bfa: Increase requested firmware version to 3.2.5.1
  scsi: snic: Return error code on memory allocation failure
  scsi: fnic: Avoid sending reset to firmware when another reset is in progress
  HID: ignore Petzl USB headlamp
  ASoC: Intel: bytcr-rt5640: fix settings in internal clock mode
  perf/x86: Set pmu->module in Intel PMU modules
  x86/platform/intel-mid: Rename 'spidev' to 'mrfld_spidev'
  ALSA: usb-audio: test EP_FLAG_RUNNING at urb completion
  ARCv2: IRQ: Call entry/exit functions for chained handlers in MCIP
  sh_eth: enable RX descriptor word 0 shift on SH7734
  ASoC: fsl_ssi: set fifo watermark to more reliable value
  net: usb: asix_devices: add .reset_resume for USB PM
  nvmem: imx-ocotp: Fix wrong register size
  arm64: mm: fix show_pte KERN_CONT fallout
  vfio-pci: Handle error from pci_iomap
  video: fbdev: cobalt_lcdfb: Handle return NULL error from devm_ioremap
  perf symbols: Robustify reading of build-id from sysfs
  perf tools: Install tools/lib/traceevent plugins with install-bin
  xfrm: Don't use sk_family for socket policy lookups
  tools lib traceevent: Fix prev/next_prio for deadline tasks
  Xen: ARM: Zero reserved fields of xatp before making hypervisor call
  Btrfs: adjust outstanding_extents counter properly when dio write is split
  benet: stricter vxlan offloading check in be_features_check
  Btrfs: fix lockdep warning about log_mutex
  Btrfs: use down_read_nested to make lockdep silent
  usb: gadget: Fix copy/pasted error message
  ACPI / scan: Prefer devices without _HID/_CID for _ADR matching
  ARM: s3c2410_defconfig: Fix invalid values for NF_CT_PROTO_*
  perf probe: Fix to get correct modname from elf header
  ARM64: zynqmp: Fix i2c node's compatible string
  ARM64: zynqmp: Fix W=1 dtc 1.4 warnings
  usb: dwc3: omap: fix race of pm runtime with irq handler in probe
  dmaengine: ti-dma-crossbar: Add some 'of_node_put()' in error path.
  l2tp: consider '::' as wildcard address in l2tp_ip6 socket lookup
  dmaengine: ioatdma: workaround SKX ioatdma version
  dmaengine: ioatdma: Add Skylake PCI Dev ID
  openrisc: Add _text symbol to fix ksym build error
  irqchip/mxs: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND
  ASoC: nau8825: fix invalid configuration in Pre-Scalar of FLL
  spi: dw: Make debugfs name unique between instances
  ASoC: tlv320aic3x: Mark the RESET register as volatile
  irqchip/keystone: Fix "scheduling while atomic" on rt
  vfio-pci: use 32-bit comparisons for register address for gcc-4.5
  drm/msm: Verify that MSM_SUBMIT_BO_FLAGS are set
  drm/msm: Put back the vaddr in submit_reloc()
  drm/msm: Ensure that the hardware write pointer is valid
  net/mlx4_core: Fix raw qp flow steering rules under SRIOV
  net/mlx4: Remove BUG_ON from ICM allocation routine
  net/mlx4_core: Use-after-free causes a resource leak in flow-steering detach
  ipv6: Should use consistent conditional judgement for ip6 fragment between __ip6_append_data and ip6_finish_output
  net/mlx5: Disable RoCE on the e-switch management port under switchdev mode
  ARM: dts: n900: Mark eMMC slot with no-sdio and no-sd flags
  ARM: dts: am57xx-idk: Put USB2 port in peripheral mode
  dt-bindings: input: Specify the interrupt number of TPS65217 power button
  dt-bindings: power/supply: Update TPS65217 properties
  ARM: omap2+: fixing wrong strcat for Non-NULL terminated string
  r8169: add support for RTL8168 series add-on card.
  x86/mce/AMD: Make the init code more robust
  device-dax: fix sysfs duplicate warnings
  net: skb_needs_check() accepts CHECKSUM_NONE for tx
  pstore: Use dynamic spinlock initializer
  pstore: Correctly initialize spinlock and flags
  pstore: Allow prz to control need for locking
  v4l: s5c73m3: fix negation operator
  dentry name snapshots
  ipmi/watchdog: fix watchdog timeout set on reboot
  RDMA/uverbs: Fix the check for port number
  sched/cgroup: Move sched_online_group() back into css_online() to fix crash
  mailbox: handle empty message in tx_tick
  mailbox: skip complete wait event if timer expired
  mailbox: always wait in mbox_send_message for blocking Tx mode
  wil6210: fix deadlock when using fw_no_recovery option
  ath10k: fix null deref on wmi-tlv when trying spectral scan
  isdn/i4l: fix buffer overflow
  isdn: Fix a sleep-in-atomic bug
  net: phy: Do not perform software reset for Generic PHY
  nfc: fdp: fix NULL pointer dereference
  nfc: Fix hangup of RC-S380* in port100_send_ack()
  smp/hotplug: Replace BUG_ON and react useful
  smp/hotplug: Move unparking of percpu threads to the control CPU
  drm: rcar-du: Simplify and fix probe error handling
  Staging: comedi: comedi_fops: Avoid orphaned proc entry
  Revert "powerpc/numa: Fix percpu allocations to be NUMA aware"
  KVM: PPC: Book3S HV: Save/restore host values of debug registers
  KVM: PPC: Book3S HV: Restore critical SPRs to host values on guest exit
  drm/nouveau/bar/gf100: fix access to upper half of BAR2
  drm/nouveau/disp/nv50-: bump max chans to 21
  drm/vmwgfx: Fix gcc-7.1.1 warning
  md/raid5: add thread_group worker async_tx_issue_pending_all
  KVM: PPC: Book3S HV: Enable TM before accessing TM registers
  crypto: authencesn - Fix digest_null crash
  NFSv4.1: Fix a race where CB_NOTIFY_LOCK fails to wake a waiter
  NFS: invalidate file size when taking a lock.
  powerpc/pseries: Fix of_node_put() underflow during reconfig remove
  parisc: Suspend lockup detectors before system halt
  parisc: Extend disabled preemption in copy_user_page
  parisc: Prevent TLB speculation on flushed pages on CPUs that only support equivalent aliases
  ALSA: hda - Add missing NVIDIA GPU codec IDs to patch table
  ALSA: fm801: Initialize chip after IRQ handler is registered
  jfs: Don't clear SGID when inheriting ACLs
  net: reduce skb_warn_bad_offload() noise
  pstore: Make spinlock per zone instead of global
  af_key: Add lock to key dump
  ANDROID: sched/fair: Add a backup_cpu to find_best_target
  ANDROID: sched/fair: Try to estimate possible idle states.
  ANDROID: sched/fair: Sync task util before EAS wakeup
  ANDROID: Revert "sched/fair: ensure utilization signals are synchronized before use"
  ANDROID: sched/fair: kick nohz idle balance for misfit task
  ANDROID: sched/fair: Update signals of nohz cpus if we are going idle
  ANDROID: events: add tracepoint for find_best_target
  ANDROID: sched/fair: streamline find_best_target heuristics
  UPSTREAM: cpufreq: schedutil: Trace frequency only if it has changed
  UPSTREAM: cpufreq: schedutil: Avoid reducing frequency of busy CPUs prematurely
  UPSTREAM: cpufreq: schedutil: Refactor sugov_next_freq_shared()
  UPSTREAM: cpufreq: schedutil: Pass sg_policy to get_next_freq()
  UPSTREAM: cpufreq: schedutil: Rectify comment in sugov_irq_work() function
  UPSTREAM: cpufreq: schedutil: irq-work and mutex are only used in slow path
  UPSTREAM: cpufreq: schedutil: enable fast switch earlier
  UPSTREAM: cpufreq: schedutil: Avoid indented labels
  ANDROID: sched/{fair,tune}: simplify fair.c code
  ANDROID: FIXUP: sched/tune: update accouting before CPU capacity
  ANDROID: sched: walt: fix window misalignment when HZ=300
  ANDROID: sched/fair: Remove remnants of commit 608d49484ee466
  ANDROID: schedstats/eas: guard properly to avoid breaking non-smp schedstats users
  ANDROID: sched/tune: don't use schedtune before it is ready
  ANDROID: sched/fair: use SCHED_CAPACITY_SCALE for energy normalization
  ANDROID: sched/{fair,tune}: use reciprocal_value to compute boost margin
  ANDROID: sched/tune: Initialize raw_spin_lock in boosted_groups
  ANDROID: sched/tune: report when SchedTune has not been initialized
  ANDROID: sched/tune: fix sched_energy_diff tracepoint
  ANDROID: sched/tune: increase group count to 5
  ANDROID: cpufreq/schedutil: use boosted_cpu_util for PELT to match WALT
  ANDROID: sched/fair: Fix sched_group_energy() to support per-cpu capacity states
  ANDROID: sched/fair: discount task contribution to find CPU with lowest utilization
  ANDROID: sched/fair: ensure utilization signals are synchronized before use
  ANDROID: sched/fair: remove task util from own cpu when placing waking task
  ANDROID: trace:sched: Make util_avg in load_avg trace reflect PELT/WALT as used
  ANDROID: sched/fair: Add eas (& cas) specific rq, sd and task stats
  ANDROID: sched/core: Fix PELT jump to max OPP upon util increase
  ANDROID: sched: EAS & 'single cpu per cluster'/cpu hotplug interoperability
  UPSTREAM: sched/core: Fix group_entity's share update
  UPSTREAM: sched/fair: Propagate asynchrous detach
  UPSTREAM: sched/fair: Propagate load during synchronous attach/detach
  UPSTREAM: sched/fair: Factorize attach/detach entity
  ANDROID: sched/fair: Simplify idle_idx handling in select_idle_sibling()
  ANDROID: sched/fair: refactor find_best_target() for simplicity
  ANDROID: sched/fair: Change cpu iteration order in find_best_target()
  ANDROID: sched/core: Add first cpu w/ max/min orig capacity to root domain
  ANDROID: sched/core: Remove remnants of commit fd5c98da1a42
  ANDROID: sched: Remove sysctl_sched_is_big_little
  ANDROID: sched/fair: Code !is_big_little path into select_energy_cpu_brute()
  ANDROID: EAS: sched/fair: Re-integrate 'honor sync wakeups' into wakeup path
  ANDROID: Fixup!: sched/fair.c: Set SchedTune specific struct energy_env.task
  ANDROID: sched/fair: Energy-aware wake-up task placement
  ANDROID: sched/fair: Add energy_diff dead-zone margin
  ANDROID: arm64: Set SD_ASYM_CPUCAPACITY sched_domain flag on DIE level
  UPSTREAM: sched/fair: Fix incorrect comment for capacity_margin
  UPSTREAM: sched/fair: Avoid pulling tasks from non-overloaded higher capacity groups
  UPSTREAM: sched/fair: Add per-CPU min capacity to sched_group_capacity
  UPSTREAM: sched/fair: Consider spare capacity in find_idlest_group()
  UPSTREAM: sched/fair: Compute task/cpu utilization at wake-up correctly
  ANDROID: Partial Revert: "ANDROID: sched: Add cpu capacity awareness to wakeup balancing"
  ANDROID: sched/fair: Decommission energy_aware_wake_cpu()
  ANDROID: Revert "WIP: sched: Consider spare cpu capacity at task wake-up"
  FROM-LIST: cpufreq: schedutil: Redefine the rate_limit_us tunable
  ANDROID: cpufreq: schedutil: add up/down frequency transition rate limits
  ANDROID: trace/sched: add rq utilization signal for WALT
  ANDROID: sched/cpufreq: make schedutil use WALT signal
  ANDROID: sched: cpufreq: use rt_avg as estimate of required RT CPU capacity
  UPSTREAM: cpufreq: schedutil: move slow path from workqueue to SCHED_FIFO task
  ANDROID: sched/cpufreq: fix tunables for schedfreq governor
  cpufreq: interactive governor drops bits in time calculation
  DEBUG: sched/fair: Fix sched_load_avg_cpu events for task_groups
  DEBUG: sched/fair: Fix missing sched_load_avg_cpu events
  sched: Consider misfit tasks when load-balancing
  ANDROID: binder: Don't BUG_ON(!spin_is_locked()).

Conflicts:
	drivers/cpufreq/cpufreq_interactive.c
	include/trace/events/sched.h
	kernel/cpu.c
	kernel/sched/cpufreq_schedutil.c
	kernel/sched/debug.c
	kernel/sched/fair.c
	kernel/sched/tune.c
	kernel/sched/walt.c
	kernel/sched/walt.h

Change-Id: I04f9e2c5cc6c638742472465080eaa0473f1c799
Signed-off-by: Kyle Yan <kyan@codeaurora.org>
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 0afd701..8806fd8 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -17,6 +17,11 @@
 #include <trace/events/power.h>
 #include <linux/sched/sysctl.h>
 #include "sched.h"
+#include "tune.h"
+
+#ifdef CONFIG_SCHED_WALT
+unsigned long boosted_cpu_util(int cpu);
+#endif
 
 #define SUGOV_KTHREAD_PRIORITY	50
 
@@ -71,6 +76,11 @@ struct sugov_cpu {
 	unsigned long max;
 	unsigned int flags;
 	unsigned int cpu;
+
+	/* The field below is for single-CPU policies only. */
+#ifdef CONFIG_NO_HZ_COMMON
+	unsigned long saved_idle_calls;
+#endif
 };
 
 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -100,22 +110,20 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 {
 	struct cpufreq_policy *policy = sg_policy->policy;
 
+	if (sg_policy->next_freq == next_freq)
+		return;
+
+	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 
 	if (policy->fast_switch_enabled) {
-		if (sg_policy->next_freq == next_freq) {
-			trace_cpu_frequency(policy->cur, smp_processor_id());
-			return;
-		}
-		sg_policy->next_freq = next_freq;
 		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
 		if (next_freq == CPUFREQ_ENTRY_INVALID)
 			return;
 
 		policy->cur = next_freq;
 		trace_cpu_frequency(next_freq, smp_processor_id());
-	} else if (sg_policy->next_freq != next_freq) {
-		sg_policy->next_freq = next_freq;
+	} else {
 		sg_policy->work_in_progress = true;
 		irq_work_queue(&sg_policy->irq_work);
 	}
@@ -171,6 +179,7 @@ static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
 	*max = cfs_max;
 
 	*util = cpu_util_freq(cpu, &loadcpu->walt_load);
+	*util = boosted_cpu_util(cpu);
 }
 
 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
@@ -283,6 +292,19 @@ static void sugov_walt_adjust(struct sugov_cpu *sg_cpu, unsigned long *util,
 		*util = max(*util, sg_cpu->walt_load.pl);
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
+static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
+{
+	unsigned long idle_calls = tick_nohz_get_idle_calls();
+	bool ret = idle_calls == sg_cpu->saved_idle_calls;
+
+	sg_cpu->saved_idle_calls = idle_calls;
+	return ret;
+}
+#else
+static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
+#endif /* CONFIG_NO_HZ_COMMON */
+
 static void sugov_update_single(struct update_util_data *hook, u64 time,
 				unsigned int flags)
 {
@@ -291,6 +313,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned long util, max;
 	unsigned int next_f;
+	bool busy;
 
 	sugov_set_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
@@ -298,6 +321,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	if (!sugov_should_update_freq(sg_policy, time))
 		return;
 
+	busy = sugov_cpu_is_busy(sg_cpu);
 	flags &= ~SCHED_CPUFREQ_RT_DL;
 
 	if (flags & SCHED_CPUFREQ_RT_DL) {
@@ -309,35 +333,29 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 				   sg_policy->policy->cur);
 		sugov_walt_adjust(sg_cpu, &util, &max);
 		next_f = get_next_freq(sg_policy, util, max);
+		/*
+		 * Do not reduce the frequency if the CPU has not been idle
+		 * recently, as the reduction is likely to be premature then.
+		 */
+		if (busy && next_f < sg_policy->next_freq)
+			next_f = sg_policy->next_freq;
 	}
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
-					   unsigned long util, unsigned long max,
-					   unsigned int flags)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	unsigned int max_f = policy->cpuinfo.max_freq;
 	u64 last_freq_update_time = sg_policy->last_freq_update_time;
+	unsigned long util = 0, max = 1;
 	unsigned int j;
 
-	if (flags & SCHED_CPUFREQ_RT_DL)
-		return max_f;
-
-	sugov_iowait_boost(sg_cpu, &util, &max);
-	sugov_walt_adjust(sg_cpu, &util, &max);
-
 	for_each_cpu(j, policy->cpus) {
-		struct sugov_cpu *j_sg_cpu;
+		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
 		unsigned long j_util, j_max;
 		s64 delta_ns;
 
-		if (j == sg_cpu->cpu)
-			continue;
-
-		j_sg_cpu = &per_cpu(sugov_cpu, j);
 		/*
 		 * If the CPU utilization was last updated before the previous
 		 * frequency update and the time elapsed between the last update
@@ -351,7 +369,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 			continue;
 		}
 		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
-			return max_f;
+			return policy->cpuinfo.max_freq;
 
 		j_util = j_sg_cpu->util;
 		j_max = j_sg_cpu->max;
@@ -404,7 +422,11 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 				sg_cpu->walt_load.pl, flags);
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
+		if (flags & SCHED_CPUFREQ_RT_DL)
+			next_f = sg_policy->policy->cpuinfo.max_freq;
+		else
+			next_f = sugov_next_freq_shared(sg_cpu);
+
 		sugov_update_commit(sg_policy, time, next_f);
 	}