Merge tag 'arm-perf-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into next/drivers

From Will Deacon:
Addition of an ARM PMU driver for the CCI-400 coherent interconnect
found on big.LITTLE systems, such as TC2.

This was originally sent for 3.12, but the device-tree bindings hadn't
settled down so here's the improved version for 3.13. I think the code
has benefited from the wait.

* tag 'arm-perf-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux:
  drivers: CCI: fix the error handle in cci_pmu_probe()
  drivers: CCI: add ARM CCI PMU support
  ARM: dts: Document the CCI PMU DT bindings
  ARM: dts: Improve CCI bindings documentation

Signed-off-by: Kevin Hilman <khilman@linaro.org>
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt
index 92d36e2..f28d82b 100644
--- a/Documentation/devicetree/bindings/arm/cci.txt
+++ b/Documentation/devicetree/bindings/arm/cci.txt
@@ -36,14 +36,18 @@
 
 	- reg
 		Usage: required
-		Value type: <prop-encoded-array>
+		Value type: Integer cells. A register entry, expressed as a pair
+			    of cells, containing base and size.
 		Definition: A standard property. Specifies base physical
 			    address of CCI control registers common to all
 			    interfaces.
 
 	- ranges:
 		Usage: required
-		Value type: <prop-encoded-array>
+		Value type: Integer cells. An array of range entries, expressed
+			    as a tuple of cells, containing child address,
+			    parent address and the size of the region in the
+			    child address space.
 		Definition: A standard property. Follow rules in the ePAPR for
 			    hierarchical bus addressing. CCI interfaces
 			    addresses refer to the parent node addressing
@@ -74,11 +78,49 @@
 
 		- reg:
 			Usage: required
-			Value type: <prop-encoded-array>
+			Value type: Integer cells. A register entry, expressed
+				    as a pair of cells, containing base and
+				    size.
 			Definition: the base address and size of the
 				    corresponding interface programming
 				    registers.
 
+	- CCI PMU node
+
+		Parent node must be CCI interconnect node.
+
+		A CCI pmu node must contain the following properties:
+
+		- compatible
+			Usage: required
+			Value type: <string>
+			Definition: must be "arm,cci-400-pmu"
+
+		- reg:
+			Usage: required
+			Value type: Integer cells. A register entry, expressed
+				    as a pair of cells, containing base and
+				    size.
+			Definition: the base address and size of the
+				    corresponding interface programming
+				    registers.
+
+		- interrupts:
+			Usage: required
+			Value type: Integer cells. Array of interrupt specifier
+				    entries, as defined in
+				    ../interrupt-controller/interrupts.txt.
+			Definition: list of counter overflow interrupts, one per
+				    counter. The interrupts must be specified
+				    starting with the cycle counter overflow
+				    interrupt, followed by counter0 overflow
+				    interrupt, counter1 overflow interrupt,...
+				    ,counterN overflow interrupt.
+
+				    The CCI PMU has an interrupt signal for each
+				    counter. The number of interrupts must be
+				    equal to the number of counters.
+
 * CCI interconnect bus masters
 
 	Description: masters in the device tree connected to a CCI port
@@ -144,7 +186,7 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0x0 0x2c090000 0 0x1000>;
-		ranges = <0x0 0x0 0x2c090000 0x6000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
 
 		cci_control0: slave-if@1000 {
 			compatible = "arm,cci-400-ctrl-if";
@@ -163,6 +205,16 @@
 			interface-type = "ace";
 			reg = <0x5000 0x1000>;
 		};
+
+		pmu@9000 {
+			 compatible = "arm,cci-400-pmu";
+			 reg = <0x9000 0x5000>;
+			 interrupts = <0 101 4>,
+				      <0 102 4>,
+				      <0 103 4>,
+				      <0 104 4>,
+				      <0 105 4>;
+		};
 	};
 
 This CCI node corresponds to a CCI component whose control registers sits
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 2009266..bb5b90e 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -18,11 +18,21 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 
 #include <asm/cacheflush.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
 #include <asm/smp_plat.h>
 
+#define DRIVER_NAME		"CCI-400"
+#define DRIVER_NAME_PMU		DRIVER_NAME " PMU"
+#define PMU_NAME		"CCI_400"
+
 #define CCI_PORT_CTRL		0x0
 #define CCI_CTRL_STATUS		0xc
 
@@ -54,6 +64,568 @@
 static void __iomem *cci_ctrl_base;
 static unsigned long cci_ctrl_phys;
 
+#ifdef CONFIG_HW_PERF_EVENTS
+
+#define CCI_PMCR		0x0100
+#define CCI_PID2		0x0fe8
+
+#define CCI_PMCR_CEN		0x00000001
+#define CCI_PMCR_NCNT_MASK	0x0000f800
+#define CCI_PMCR_NCNT_SHIFT	11
+
+#define CCI_PID2_REV_MASK	0xf0
+#define CCI_PID2_REV_SHIFT	4
+
+/* Port ids */
+#define CCI_PORT_S0	0
+#define CCI_PORT_S1	1
+#define CCI_PORT_S2	2
+#define CCI_PORT_S3	3
+#define CCI_PORT_S4	4
+#define CCI_PORT_M0	5
+#define CCI_PORT_M1	6
+#define CCI_PORT_M2	7
+
+#define CCI_REV_R0		0
+#define CCI_REV_R1		1
+#define CCI_REV_R0_P4		4
+#define CCI_REV_R1_P2		6
+
+#define CCI_PMU_EVT_SEL		0x000
+#define CCI_PMU_CNTR		0x004
+#define CCI_PMU_CNTR_CTRL	0x008
+#define CCI_PMU_OVRFLW		0x00c
+
+#define CCI_PMU_OVRFLW_FLAG	1
+
+#define CCI_PMU_CNTR_BASE(idx)	((idx) * SZ_4K)
+
+/*
+ * Instead of an event id to monitor CCI cycles, a dedicated counter is
+ * provided. Use 0xff to represent CCI cycles and hope that no future revisions
+ * make use of this event in hardware.
+ */
+enum cci400_perf_events {
+	CCI_PMU_CYCLES = 0xff
+};
+
+#define CCI_PMU_EVENT_MASK		0xff
+#define CCI_PMU_EVENT_SOURCE(event)	((event >> 5) & 0x7)
+#define CCI_PMU_EVENT_CODE(event)	(event & 0x1f)
+
+#define CCI_PMU_MAX_HW_EVENTS 5   /* CCI PMU has 4 counters + 1 cycle counter */
+
+#define CCI_PMU_CYCLE_CNTR_IDX		0
+#define CCI_PMU_CNTR0_IDX		1
+#define CCI_PMU_CNTR_LAST(cci_pmu)	(CCI_PMU_CYCLE_CNTR_IDX + cci_pmu->num_events - 1)
+
+/*
+ * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8
+ * ports and bits 4:0 are event codes. There are different event codes
+ * associated with each port type.
+ *
+ * Additionally, the range of events associated with the port types changed
+ * between Rev0 and Rev1.
+ *
+ * The constants below define the range of valid codes for each port type for
+ * the different revisions and are used to validate the event to be monitored.
+ */
+
+#define CCI_REV_R0_SLAVE_PORT_MIN_EV	0x00
+#define CCI_REV_R0_SLAVE_PORT_MAX_EV	0x13
+#define CCI_REV_R0_MASTER_PORT_MIN_EV	0x14
+#define CCI_REV_R0_MASTER_PORT_MAX_EV	0x1a
+
+#define CCI_REV_R1_SLAVE_PORT_MIN_EV	0x00
+#define CCI_REV_R1_SLAVE_PORT_MAX_EV	0x14
+#define CCI_REV_R1_MASTER_PORT_MIN_EV	0x00
+#define CCI_REV_R1_MASTER_PORT_MAX_EV	0x11
+
+struct pmu_port_event_ranges {
+	u8 slave_min;
+	u8 slave_max;
+	u8 master_min;
+	u8 master_max;
+};
+
+static struct pmu_port_event_ranges port_event_range[] = {
+	[CCI_REV_R0] = {
+		.slave_min = CCI_REV_R0_SLAVE_PORT_MIN_EV,
+		.slave_max = CCI_REV_R0_SLAVE_PORT_MAX_EV,
+		.master_min = CCI_REV_R0_MASTER_PORT_MIN_EV,
+		.master_max = CCI_REV_R0_MASTER_PORT_MAX_EV,
+	},
+	[CCI_REV_R1] = {
+		.slave_min = CCI_REV_R1_SLAVE_PORT_MIN_EV,
+		.slave_max = CCI_REV_R1_SLAVE_PORT_MAX_EV,
+		.master_min = CCI_REV_R1_MASTER_PORT_MIN_EV,
+		.master_max = CCI_REV_R1_MASTER_PORT_MAX_EV,
+	},
+};
+
+struct cci_pmu_drv_data {
+	void __iomem *base;
+	struct arm_pmu *cci_pmu;
+	int nr_irqs;
+	int irqs[CCI_PMU_MAX_HW_EVENTS];
+	unsigned long active_irqs;
+	struct perf_event *events[CCI_PMU_MAX_HW_EVENTS];
+	unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)];
+	struct pmu_port_event_ranges *port_ranges;
+	struct pmu_hw_events hw_events;
+};
+static struct cci_pmu_drv_data *pmu;
+
+static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++)
+		if (irq == irqs[i])
+			return true;
+
+	return false;
+}
+
+static int probe_cci_revision(void)
+{
+	int rev;
+	rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK;
+	rev >>= CCI_PID2_REV_SHIFT;
+
+	if (rev <= CCI_REV_R0_P4)
+		return CCI_REV_R0;
+	else if (rev <= CCI_REV_R1_P2)
+		return CCI_REV_R1;
+
+	return -ENOENT;
+}
+
+static struct pmu_port_event_ranges *port_range_by_rev(void)
+{
+	int rev = probe_cci_revision();
+
+	if (rev < 0)
+		return NULL;
+
+	return &port_event_range[rev];
+}
+
+static int pmu_is_valid_slave_event(u8 ev_code)
+{
+	return pmu->port_ranges->slave_min <= ev_code &&
+		ev_code <= pmu->port_ranges->slave_max;
+}
+
+static int pmu_is_valid_master_event(u8 ev_code)
+{
+	return pmu->port_ranges->master_min <= ev_code &&
+		ev_code <= pmu->port_ranges->master_max;
+}
+
+static int pmu_validate_hw_event(u8 hw_event)
+{
+	u8 ev_source = CCI_PMU_EVENT_SOURCE(hw_event);
+	u8 ev_code = CCI_PMU_EVENT_CODE(hw_event);
+
+	switch (ev_source) {
+	case CCI_PORT_S0:
+	case CCI_PORT_S1:
+	case CCI_PORT_S2:
+	case CCI_PORT_S3:
+	case CCI_PORT_S4:
+		/* Slave Interface */
+		if (pmu_is_valid_slave_event(ev_code))
+			return hw_event;
+		break;
+	case CCI_PORT_M0:
+	case CCI_PORT_M1:
+	case CCI_PORT_M2:
+		/* Master Interface */
+		if (pmu_is_valid_master_event(ev_code))
+			return hw_event;
+		break;
+	}
+
+	return -ENOENT;
+}
+
+static int pmu_is_valid_counter(struct arm_pmu *cci_pmu, int idx)
+{
+	return CCI_PMU_CYCLE_CNTR_IDX <= idx &&
+		idx <= CCI_PMU_CNTR_LAST(cci_pmu);
+}
+
+static u32 pmu_read_register(int idx, unsigned int offset)
+{
+	return readl_relaxed(pmu->base + CCI_PMU_CNTR_BASE(idx) + offset);
+}
+
+static void pmu_write_register(u32 value, int idx, unsigned int offset)
+{
+	return writel_relaxed(value, pmu->base + CCI_PMU_CNTR_BASE(idx) + offset);
+}
+
+static void pmu_disable_counter(int idx)
+{
+	pmu_write_register(0, idx, CCI_PMU_CNTR_CTRL);
+}
+
+static void pmu_enable_counter(int idx)
+{
+	pmu_write_register(1, idx, CCI_PMU_CNTR_CTRL);
+}
+
+static void pmu_set_event(int idx, unsigned long event)
+{
+	event &= CCI_PMU_EVENT_MASK;
+	pmu_write_register(event, idx, CCI_PMU_EVT_SEL);
+}
+
+static u32 pmu_get_max_counters(void)
+{
+	u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI_PMCR) &
+		      CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
+
+	/* add 1 for cycle counter */
+	return n_cnts + 1;
+}
+
+static struct pmu_hw_events *pmu_get_hw_events(void)
+{
+	return &pmu->hw_events;
+}
+
+static int pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_event = &event->hw;
+	unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK;
+	int idx;
+
+	if (cci_event == CCI_PMU_CYCLES) {
+		if (test_and_set_bit(CCI_PMU_CYCLE_CNTR_IDX, hw->used_mask))
+			return -EAGAIN;
+
+		return CCI_PMU_CYCLE_CNTR_IDX;
+	}
+
+	for (idx = CCI_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
+		if (!test_and_set_bit(idx, hw->used_mask))
+			return idx;
+
+	/* No counters available */
+	return -EAGAIN;
+}
+
+static int pmu_map_event(struct perf_event *event)
+{
+	int mapping;
+	u8 config = event->attr.config & CCI_PMU_EVENT_MASK;
+
+	if (event->attr.type < PERF_TYPE_MAX)
+		return -ENOENT;
+
+	if (config == CCI_PMU_CYCLES)
+		mapping = config;
+	else
+		mapping = pmu_validate_hw_event(config);
+
+	return mapping;
+}
+
+static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
+{
+	int i;
+	struct platform_device *pmu_device = cci_pmu->plat_device;
+
+	if (unlikely(!pmu_device))
+		return -ENODEV;
+
+	if (pmu->nr_irqs < 1) {
+		dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Register all available CCI PMU interrupts. In the interrupt handler
+	 * we iterate over the counters checking for interrupt source (the
+	 * overflowing counter) and clear it.
+	 *
+	 * This should allow handling of non-unique interrupt for the counters.
+	 */
+	for (i = 0; i < pmu->nr_irqs; i++) {
+		int err = request_irq(pmu->irqs[i], handler, IRQF_SHARED,
+				"arm-cci-pmu", cci_pmu);
+		if (err) {
+			dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n",
+				pmu->irqs[i]);
+			return err;
+		}
+
+		set_bit(i, &pmu->active_irqs);
+	}
+
+	return 0;
+}
+
+static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
+{
+	unsigned long flags;
+	struct arm_pmu *cci_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	int idx, handled = IRQ_NONE;
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+	regs = get_irq_regs();
+	/*
+	 * Iterate over counters and update the corresponding perf events.
+	 * This should work regardless of whether we have per-counter overflow
+	 * interrupt or a combined overflow interrupt.
+	 */
+	for (idx = CCI_PMU_CYCLE_CNTR_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) {
+		struct perf_event *event = events->events[idx];
+		struct hw_perf_event *hw_counter;
+
+		if (!event)
+			continue;
+
+		hw_counter = &event->hw;
+
+		/* Did this counter overflow? */
+		if (!pmu_read_register(idx, CCI_PMU_OVRFLW) & CCI_PMU_OVRFLW_FLAG)
+			continue;
+
+		pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW);
+
+		handled = IRQ_HANDLED;
+
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, hw_counter->last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cci_pmu->disable(event);
+	}
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+
+	return IRQ_RETVAL(handled);
+}
+
+static void pmu_free_irq(struct arm_pmu *cci_pmu)
+{
+	int i;
+
+	for (i = 0; i < pmu->nr_irqs; i++) {
+		if (!test_and_clear_bit(i, &pmu->active_irqs))
+			continue;
+
+		free_irq(pmu->irqs[i], cci_pmu);
+	}
+}
+
+static void pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Configure the event to count, unless you are counting cycles */
+	if (idx != CCI_PMU_CYCLE_CNTR_IDX)
+		pmu_set_event(idx, hw_counter->config_base);
+
+	pmu_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void pmu_disable_event(struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	pmu_disable_counter(idx);
+}
+
+static void pmu_start(struct arm_pmu *cci_pmu)
+{
+	u32 val;
+	unsigned long flags;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Enable all the PMU counters. */
+	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI_PMCR);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void pmu_stop(struct arm_pmu *cci_pmu)
+{
+	u32 val;
+	unsigned long flags;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable all the PMU counters. */
+	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI_PMCR);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u32 pmu_read_counter(struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+	u32 value;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return 0;
+	}
+	value = pmu_read_register(idx, CCI_PMU_CNTR);
+
+	return value;
+}
+
+static void pmu_write_counter(struct perf_event *event, u32 value)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+	else
+		pmu_write_register(value, idx, CCI_PMU_CNTR);
+}
+
+static int cci_pmu_init(struct arm_pmu *cci_pmu, struct platform_device *pdev)
+{
+	*cci_pmu = (struct arm_pmu){
+		.name             = PMU_NAME,
+		.max_period       = (1LLU << 32) - 1,
+		.get_hw_events    = pmu_get_hw_events,
+		.get_event_idx    = pmu_get_event_idx,
+		.map_event        = pmu_map_event,
+		.request_irq      = pmu_request_irq,
+		.handle_irq       = pmu_handle_irq,
+		.free_irq         = pmu_free_irq,
+		.enable           = pmu_enable_event,
+		.disable          = pmu_disable_event,
+		.start            = pmu_start,
+		.stop             = pmu_stop,
+		.read_counter     = pmu_read_counter,
+		.write_counter    = pmu_write_counter,
+	};
+
+	cci_pmu->plat_device = pdev;
+	cci_pmu->num_events = pmu_get_max_counters();
+
+	return armpmu_register(cci_pmu, -1);
+}
+
+static const struct of_device_id arm_cci_pmu_matches[] = {
+	{
+		.compatible = "arm,cci-400-pmu",
+	},
+	{},
+};
+
+static int cci_pmu_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int i, ret, irq;
+
+	pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pmu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pmu->base))
+		return -ENOMEM;
+
+	/*
+	 * CCI PMU has 5 overflow signals - one per counter; but some may be tied
+	 * together to a common interrupt.
+	 */
+	pmu->nr_irqs = 0;
+	for (i = 0; i < CCI_PMU_MAX_HW_EVENTS; i++) {
+		irq = platform_get_irq(pdev, i);
+		if (irq < 0)
+			break;
+
+		if (is_duplicate_irq(irq, pmu->irqs, pmu->nr_irqs))
+			continue;
+
+		pmu->irqs[pmu->nr_irqs++] = irq;
+	}
+
+	/*
+	 * Ensure that the device tree has as many interrupts as the number
+	 * of counters.
+	 */
+	if (i < CCI_PMU_MAX_HW_EVENTS) {
+		dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n",
+			i, CCI_PMU_MAX_HW_EVENTS);
+		return -EINVAL;
+	}
+
+	pmu->port_ranges = port_range_by_rev();
+	if (!pmu->port_ranges) {
+		dev_warn(&pdev->dev, "CCI PMU version not supported\n");
+		return -EINVAL;
+	}
+
+	pmu->cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*(pmu->cci_pmu)), GFP_KERNEL);
+	if (!pmu->cci_pmu)
+		return -ENOMEM;
+
+	pmu->hw_events.events = pmu->events;
+	pmu->hw_events.used_mask = pmu->used_mask;
+	raw_spin_lock_init(&pmu->hw_events.pmu_lock);
+
+	ret = cci_pmu_init(pmu->cci_pmu, pdev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int cci_platform_probe(struct platform_device *pdev)
+{
+	if (!cci_probed())
+		return -ENODEV;
+
+	return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
+}
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
 struct cpu_port {
 	u64 mpidr;
 	u32 port;
@@ -120,7 +692,7 @@
 }
 EXPORT_SYMBOL_GPL(cci_ace_get_port);
 
-static void __init cci_ace_init_ports(void)
+static void cci_ace_init_ports(void)
 {
 	int port, cpu;
 	struct device_node *cpun;
@@ -386,7 +958,7 @@
 	{},
 };
 
-static int __init cci_probe(void)
+static int cci_probe(void)
 {
 	struct cci_nb_ports const *cci_config;
 	int ret, i, nb_ace = 0, nb_ace_lite = 0;
@@ -490,7 +1062,7 @@
 static int cci_init_status = -EAGAIN;
 static DEFINE_MUTEX(cci_probing);
 
-static int __init cci_init(void)
+static int cci_init(void)
 {
 	if (cci_init_status != -EAGAIN)
 		return cci_init_status;
@@ -502,18 +1074,55 @@
 	return cci_init_status;
 }
 
+#ifdef CONFIG_HW_PERF_EVENTS
+static struct platform_driver cci_pmu_driver = {
+	.driver = {
+		   .name = DRIVER_NAME_PMU,
+		   .of_match_table = arm_cci_pmu_matches,
+		  },
+	.probe = cci_pmu_probe,
+};
+
+static struct platform_driver cci_platform_driver = {
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .of_match_table = arm_cci_matches,
+		  },
+	.probe = cci_platform_probe,
+};
+
+static int __init cci_platform_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&cci_pmu_driver);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&cci_platform_driver);
+}
+
+#else
+
+static int __init cci_platform_init(void)
+{
+	return 0;
+}
+
+#endif
 /*
  * To sort out early init calls ordering a helper function is provided to
  * check if the CCI driver has beed initialized. Function check if the driver
  * has been initialized, if not it calls the init function that probes
  * the driver and updates the return value.
  */
-bool __init cci_probed(void)
+bool cci_probed(void)
 {
 	return cci_init() == 0;
 }
 EXPORT_SYMBOL_GPL(cci_probed);
 
 early_initcall(cci_init);
+core_initcall(cci_platform_init);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("ARM CCI support");