Merge tag 'remoteproc-for-3.6' of git://git.kernel.org/pub/scm/linux/kernel/git/ohad/remoteproc

Pull remoteproc update from Ohad Ben-Cohen:
 - custom binary format support from Sjur Brændeland
 - groundwork for recovery and runtime pm support
 - some cleanups and API simplifications

Fix up conflicts in drivers/remoteproc/remoteproc_core.c due to clashes
with earlier cleanups by Sjur Brændeland (with part of the cleanups
moved into the new remoteproc_elf_loader.c file).

* tag 'remoteproc-for-3.6' of git://git.kernel.org/pub/scm/linux/kernel/git/ohad/remoteproc:
  MAINTAINERS: add remoteproc's git
  remoteproc: Support custom firmware handlers
  remoteproc: Move Elf related functions to separate file
  remoteproc: Add function rproc_get_boot_addr
  remoteproc: Pass struct fw to load_segments and find_rsc_table.
  remoteproc: adopt the driver core's alloc/add/del/put naming
  remoteproc: remove the get_by_name/put API
  remoteproc: support non-iommu carveout assignment
  remoteproc: simplify unregister/free interfaces
  remoteproc: remove the now-redundant kref
  remoteproc: maintain a generic child device for each rproc
  remoteproc: allocate vrings on demand, free when not needed
diff --git a/Documentation/remoteproc.txt b/Documentation/remoteproc.txt
index 70a048c..23a09b8 100644
--- a/Documentation/remoteproc.txt
+++ b/Documentation/remoteproc.txt
@@ -36,8 +36,7 @@
       Note: to use this function you should already have a valid rproc
       handle. There are several ways to achieve that cleanly (devres, pdata,
       the way remoteproc_rpmsg.c does this, or, if this becomes prevalent, we
-      might also consider using dev_archdata for this). See also
-      rproc_get_by_name() below.
+      might also consider using dev_archdata for this).
 
   void rproc_shutdown(struct rproc *rproc)
     - Power off a remote processor (previously booted with rproc_boot()).
@@ -51,30 +50,6 @@
         which means that the @rproc handle stays valid even after
         rproc_shutdown() returns, and users can still use it with a subsequent
         rproc_boot(), if needed.
-      - don't call rproc_shutdown() to unroll rproc_get_by_name(), exactly
-        because rproc_shutdown() _does not_ decrement the refcount of @rproc.
-        To decrement the refcount of @rproc, use rproc_put() (but _only_ if
-        you acquired @rproc using rproc_get_by_name()).
-
-  struct rproc *rproc_get_by_name(const char *name)
-    - Find an rproc handle using the remote processor's name, and then
-      boot it. If it's already powered on, then just immediately return
-      (successfully). Returns the rproc handle on success, and NULL on failure.
-      This function increments the remote processor's refcount, so always
-      use rproc_put() to decrement it back once rproc isn't needed anymore.
-      Note: currently rproc_get_by_name() and rproc_put() are not used anymore
-      by the rpmsg bus and its drivers. We need to scrutinize the use cases
-      that still need them, and see if we can migrate them to use the non
-      name-based boot/shutdown interface.
-
-  void rproc_put(struct rproc *rproc)
-    - Decrement @rproc's power refcount and shut it down if it reaches zero
-      (essentially by just calling rproc_shutdown), and then decrement @rproc's
-      validity refcount too.
-      After this function returns, @rproc may _not_ be used anymore, and its
-      handle should be considered invalid.
-      This function should be called _iff_ the @rproc handle was grabbed by
-      calling rproc_get_by_name().
 
 3. Typical usage
 
@@ -115,21 +90,21 @@
       This function should be used by rproc implementations during
       initialization of the remote processor.
       After creating an rproc handle using this function, and when ready,
-      implementations should then call rproc_register() to complete
+      implementations should then call rproc_add() to complete
       the registration of the remote processor.
       On success, the new rproc is returned, and on failure, NULL.
 
       Note: _never_ directly deallocate @rproc, even if it was not registered
-      yet. Instead, if you just need to unroll rproc_alloc(), use rproc_free().
+      yet. Instead, when you need to unroll rproc_alloc(), use rproc_put().
 
-  void rproc_free(struct rproc *rproc)
+  void rproc_put(struct rproc *rproc)
     - Free an rproc handle that was allocated by rproc_alloc.
-      This function should _only_ be used if @rproc was only allocated,
-      but not registered yet.
-      If @rproc was already successfully registered (by calling
-      rproc_register()), then use rproc_unregister() instead.
+      This function essentially unrolls rproc_alloc(), by decrementing the
+      rproc's refcount. It doesn't directly free rproc; that would happen
+      only if there are no other references to rproc and its refcount now
+      dropped to zero.
 
-  int rproc_register(struct rproc *rproc)
+  int rproc_add(struct rproc *rproc)
     - Register @rproc with the remoteproc framework, after it has been
       allocated with rproc_alloc().
       This is called by the platform-specific rproc implementation, whenever
@@ -142,20 +117,15 @@
       of registering this remote processor, additional virtio drivers might get
       probed.
 
-  int rproc_unregister(struct rproc *rproc)
-    - Unregister a remote processor, and decrement its refcount.
-      If its refcount drops to zero, then @rproc will be freed. If not,
-      it will be freed later once the last reference is dropped.
-
+  int rproc_del(struct rproc *rproc)
+    - Unroll rproc_add().
       This function should be called when the platform specific rproc
       implementation decides to remove the rproc device. it should
-      _only_ be called if a previous invocation of rproc_register()
+      _only_ be called if a previous invocation of rproc_add()
       has completed successfully.
 
-      After rproc_unregister() returns, @rproc is _not_ valid anymore and
-      it shouldn't be used. More specifically, don't call rproc_free()
-      or try to directly free @rproc after rproc_unregister() returns;
-      none of these are needed, and calling them is a bug.
+      After rproc_del() returns, @rproc is still valid, and its
+      last refcount should be decremented by calling rproc_put().
 
       Returns 0 on success and -EINVAL if @rproc isn't valid.
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 48778c9..8f363b0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5725,6 +5725,7 @@
 
 REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM
 M:	Ohad Ben-Cohen <ohad@wizery.com>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/remoteproc.git
 S:	Maintained
 F:	drivers/remoteproc/
 F:	Documentation/remoteproc.txt
diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
index 5445d9b..934ce6e 100644
--- a/drivers/remoteproc/Makefile
+++ b/drivers/remoteproc/Makefile
@@ -6,4 +6,5 @@
 remoteproc-y				:= remoteproc_core.o
 remoteproc-y				+= remoteproc_debugfs.o
 remoteproc-y				+= remoteproc_virtio.o
+remoteproc-y				+= remoteproc_elf_loader.o
 obj-$(CONFIG_OMAP_REMOTEPROC)		+= omap_remoteproc.o
diff --git a/drivers/remoteproc/omap_remoteproc.c b/drivers/remoteproc/omap_remoteproc.c
index de138e3..a1f7ac1 100644
--- a/drivers/remoteproc/omap_remoteproc.c
+++ b/drivers/remoteproc/omap_remoteproc.c
@@ -66,7 +66,7 @@
 {
 	mbox_msg_t msg = (mbox_msg_t) data;
 	struct omap_rproc *oproc = container_of(this, struct omap_rproc, nb);
-	struct device *dev = oproc->rproc->dev;
+	struct device *dev = oproc->rproc->dev.parent;
 	const char *name = oproc->rproc->name;
 
 	dev_dbg(dev, "mbox msg: 0x%x\n", msg);
@@ -92,12 +92,13 @@
 static void omap_rproc_kick(struct rproc *rproc, int vqid)
 {
 	struct omap_rproc *oproc = rproc->priv;
+	struct device *dev = rproc->dev.parent;
 	int ret;
 
 	/* send the index of the triggered virtqueue in the mailbox payload */
 	ret = omap_mbox_msg_send(oproc->mbox, vqid);
 	if (ret)
-		dev_err(rproc->dev, "omap_mbox_msg_send failed: %d\n", ret);
+		dev_err(dev, "omap_mbox_msg_send failed: %d\n", ret);
 }
 
 /*
@@ -110,7 +111,8 @@
 static int omap_rproc_start(struct rproc *rproc)
 {
 	struct omap_rproc *oproc = rproc->priv;
-	struct platform_device *pdev = to_platform_device(rproc->dev);
+	struct device *dev = rproc->dev.parent;
+	struct platform_device *pdev = to_platform_device(dev);
 	struct omap_rproc_pdata *pdata = pdev->dev.platform_data;
 	int ret;
 
@@ -120,7 +122,7 @@
 	oproc->mbox = omap_mbox_get(pdata->mbox_name, &oproc->nb);
 	if (IS_ERR(oproc->mbox)) {
 		ret = PTR_ERR(oproc->mbox);
-		dev_err(rproc->dev, "omap_mbox_get failed: %d\n", ret);
+		dev_err(dev, "omap_mbox_get failed: %d\n", ret);
 		return ret;
 	}
 
@@ -133,13 +135,13 @@
 	 */
 	ret = omap_mbox_msg_send(oproc->mbox, RP_MBOX_ECHO_REQUEST);
 	if (ret) {
-		dev_err(rproc->dev, "omap_mbox_get failed: %d\n", ret);
+		dev_err(dev, "omap_mbox_get failed: %d\n", ret);
 		goto put_mbox;
 	}
 
 	ret = pdata->device_enable(pdev);
 	if (ret) {
-		dev_err(rproc->dev, "omap_device_enable failed: %d\n", ret);
+		dev_err(dev, "omap_device_enable failed: %d\n", ret);
 		goto put_mbox;
 	}
 
@@ -153,7 +155,8 @@
 /* power off the remote processor */
 static int omap_rproc_stop(struct rproc *rproc)
 {
-	struct platform_device *pdev = to_platform_device(rproc->dev);
+	struct device *dev = rproc->dev.parent;
+	struct platform_device *pdev = to_platform_device(dev);
 	struct omap_rproc_pdata *pdata = pdev->dev.platform_data;
 	struct omap_rproc *oproc = rproc->priv;
 	int ret;
@@ -196,14 +199,14 @@
 
 	platform_set_drvdata(pdev, rproc);
 
-	ret = rproc_register(rproc);
+	ret = rproc_add(rproc);
 	if (ret)
 		goto free_rproc;
 
 	return 0;
 
 free_rproc:
-	rproc_free(rproc);
+	rproc_put(rproc);
 	return ret;
 }
 
@@ -211,7 +214,10 @@
 {
 	struct rproc *rproc = platform_get_drvdata(pdev);
 
-	return rproc_unregister(rproc);
+	rproc_del(rproc);
+	rproc_put(rproc);
+
+	return 0;
 }
 
 static struct platform_driver omap_rproc_driver = {
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 66324ee..d5c2dbf 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -35,7 +35,7 @@
 #include <linux/debugfs.h>
 #include <linux/remoteproc.h>
 #include <linux/iommu.h>
-#include <linux/klist.h>
+#include <linux/idr.h>
 #include <linux/elf.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_ring.h>
@@ -43,29 +43,13 @@
 
 #include "remoteproc_internal.h"
 
-static void klist_rproc_get(struct klist_node *n);
-static void klist_rproc_put(struct klist_node *n);
-
-/*
- * klist of the available remote processors.
- *
- * We need this in order to support name-based lookups (needed by the
- * rproc_get_by_name()).
- *
- * That said, we don't use rproc_get_by_name() at this point.
- * The use cases that do require its existence should be
- * scrutinized, and hopefully migrated to rproc_boot() using device-based
- * binding.
- *
- * If/when this materializes, we could drop the klist (and the by_name
- * API).
- */
-static DEFINE_KLIST(rprocs, klist_rproc_get, klist_rproc_put);
-
 typedef int (*rproc_handle_resources_t)(struct rproc *rproc,
 				struct resource_table *table, int len);
 typedef int (*rproc_handle_resource_t)(struct rproc *rproc, void *, int avail);
 
+/* Unique indices for remoteproc devices */
+static DEFINE_IDA(rproc_dev_index);
+
 /*
  * This is the IOMMU fault handler we register with the IOMMU API
  * (when relevant; not all remote processors access memory through
@@ -92,7 +76,7 @@
 static int rproc_enable_iommu(struct rproc *rproc)
 {
 	struct iommu_domain *domain;
-	struct device *dev = rproc->dev;
+	struct device *dev = rproc->dev.parent;
 	int ret;
 
 	/*
@@ -137,7 +121,7 @@
 static void rproc_disable_iommu(struct rproc *rproc)
 {
 	struct iommu_domain *domain = rproc->domain;
-	struct device *dev = rproc->dev;
+	struct device *dev = rproc->dev.parent;
 
 	if (!domain)
 		return;
@@ -165,7 +149,7 @@
  * but only on kernel direct mapped RAM memory. Instead, we're just using
  * here the output of the DMA API, which should be more correct.
  */
-static void *rproc_da_to_va(struct rproc *rproc, u64 da, int len)
+void *rproc_da_to_va(struct rproc *rproc, u64 da, int len)
 {
 	struct rproc_mem_entry *carveout;
 	void *ptr = NULL;
@@ -188,107 +172,67 @@
 
 	return ptr;
 }
+EXPORT_SYMBOL(rproc_da_to_va);
 
-/**
- * rproc_load_segments() - load firmware segments to memory
- * @rproc: remote processor which will be booted using these fw segments
- * @elf_data: the content of the ELF firmware image
- * @len: firmware size (in bytes)
- *
- * This function loads the firmware segments to memory, where the remote
- * processor expects them.
- *
- * Some remote processors will expect their code and data to be placed
- * in specific device addresses, and can't have them dynamically assigned.
- *
- * We currently support only those kind of remote processors, and expect
- * the program header's paddr member to contain those addresses. We then go
- * through the physically contiguous "carveout" memory regions which we
- * allocated (and mapped) earlier on behalf of the remote processor,
- * and "translate" device address to kernel addresses, so we can copy the
- * segments where they are expected.
- *
- * Currently we only support remote processors that required carveout
- * allocations and got them mapped onto their iommus. Some processors
- * might be different: they might not have iommus, and would prefer to
- * directly allocate memory for every segment/resource. This is not yet
- * supported, though.
- */
-static int
-rproc_load_segments(struct rproc *rproc, const u8 *elf_data, size_t len)
-{
-	struct device *dev = rproc->dev;
-	struct elf32_hdr *ehdr;
-	struct elf32_phdr *phdr;
-	int i, ret = 0;
-
-	ehdr = (struct elf32_hdr *)elf_data;
-	phdr = (struct elf32_phdr *)(elf_data + ehdr->e_phoff);
-
-	/* go through the available ELF segments */
-	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
-		u32 da = phdr->p_paddr;
-		u32 memsz = phdr->p_memsz;
-		u32 filesz = phdr->p_filesz;
-		u32 offset = phdr->p_offset;
-		void *ptr;
-
-		if (phdr->p_type != PT_LOAD)
-			continue;
-
-		dev_dbg(dev, "phdr: type %d da 0x%x memsz 0x%x filesz 0x%x\n",
-					phdr->p_type, da, memsz, filesz);
-
-		if (filesz > memsz) {
-			dev_err(dev, "bad phdr filesz 0x%x memsz 0x%x\n",
-							filesz, memsz);
-			ret = -EINVAL;
-			break;
-		}
-
-		if (offset + filesz > len) {
-			dev_err(dev, "truncated fw: need 0x%x avail 0x%zx\n",
-					offset + filesz, len);
-			ret = -EINVAL;
-			break;
-		}
-
-		/* grab the kernel address for this device address */
-		ptr = rproc_da_to_va(rproc, da, memsz);
-		if (!ptr) {
-			dev_err(dev, "bad phdr da 0x%x mem 0x%x\n", da, memsz);
-			ret = -EINVAL;
-			break;
-		}
-
-		/* put the segment where the remote processor expects it */
-		if (phdr->p_filesz)
-			memcpy(ptr, elf_data + phdr->p_offset, filesz);
-
-		/*
-		 * Zero out remaining memory for this segment.
-		 *
-		 * This isn't strictly required since dma_alloc_coherent already
-		 * did this for us. albeit harmless, we may consider removing
-		 * this.
-		 */
-		if (memsz > filesz)
-			memset(ptr + filesz, 0, memsz - filesz);
-	}
-
-	return ret;
-}
-
-static int
-__rproc_handle_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i)
+int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
 {
 	struct rproc *rproc = rvdev->rproc;
-	struct device *dev = rproc->dev;
-	struct fw_rsc_vdev_vring *vring = &rsc->vring[i];
+	struct device *dev = &rproc->dev;
+	struct rproc_vring *rvring = &rvdev->vring[i];
 	dma_addr_t dma;
 	void *va;
 	int ret, size, notifyid;
 
+	/* actual size of vring (in bytes) */
+	size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
+
+	if (!idr_pre_get(&rproc->notifyids, GFP_KERNEL)) {
+		dev_err(dev, "idr_pre_get failed\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Allocate non-cacheable memory for the vring. In the future
+	 * this call will also configure the IOMMU for us
+	 * TODO: let the rproc know the da of this vring
+	 */
+	va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+	if (!va) {
+		dev_err(dev->parent, "dma_alloc_coherent failed\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Assign an rproc-wide unique index for this vring
+	 * TODO: assign a notifyid for rvdev updates as well
+	 * TODO: let the rproc know the notifyid of this vring
+	 * TODO: support predefined notifyids (via resource table)
+	 */
+	ret = idr_get_new(&rproc->notifyids, rvring, &notifyid);
+	if (ret) {
+		dev_err(dev, "idr_get_new failed: %d\n", ret);
+		dma_free_coherent(dev->parent, size, va, dma);
+		return ret;
+	}
+
+	dev_dbg(dev, "vring%d: va %p dma %x size %x idr %d\n", i, va,
+					dma, size, notifyid);
+
+	rvring->va = va;
+	rvring->dma = dma;
+	rvring->notifyid = notifyid;
+
+	return 0;
+}
+
+static int
+rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i)
+{
+	struct rproc *rproc = rvdev->rproc;
+	struct device *dev = &rproc->dev;
+	struct fw_rsc_vdev_vring *vring = &rsc->vring[i];
+	struct rproc_vring *rvring = &rvdev->vring[i];
+
 	dev_dbg(dev, "vdev rsc: vring%d: da %x, qsz %d, align %d\n",
 				i, vring->da, vring->num, vring->align);
 
@@ -305,62 +249,20 @@
 		return -EINVAL;
 	}
 
-	/* actual size of vring (in bytes) */
-	size = PAGE_ALIGN(vring_size(vring->num, vring->align));
-
-	if (!idr_pre_get(&rproc->notifyids, GFP_KERNEL)) {
-		dev_err(dev, "idr_pre_get failed\n");
-		return -ENOMEM;
-	}
-
-	/*
-	 * Allocate non-cacheable memory for the vring. In the future
-	 * this call will also configure the IOMMU for us
-	 */
-	va = dma_alloc_coherent(dev, size, &dma, GFP_KERNEL);
-	if (!va) {
-		dev_err(dev, "dma_alloc_coherent failed\n");
-		return -EINVAL;
-	}
-
-	/* assign an rproc-wide unique index for this vring */
-	/* TODO: assign a notifyid for rvdev updates as well */
-	ret = idr_get_new(&rproc->notifyids, &rvdev->vring[i], &notifyid);
-	if (ret) {
-		dev_err(dev, "idr_get_new failed: %d\n", ret);
-		dma_free_coherent(dev, size, va, dma);
-		return ret;
-	}
-
-	/* let the rproc know the da and notifyid of this vring */
-	/* TODO: expose this to remote processor */
-	vring->da = dma;
-	vring->notifyid = notifyid;
-
-	dev_dbg(dev, "vring%d: va %p dma %x size %x idr %d\n", i, va,
-					dma, size, notifyid);
-
-	rvdev->vring[i].len = vring->num;
-	rvdev->vring[i].align = vring->align;
-	rvdev->vring[i].va = va;
-	rvdev->vring[i].dma = dma;
-	rvdev->vring[i].notifyid = notifyid;
-	rvdev->vring[i].rvdev = rvdev;
+	rvring->len = vring->num;
+	rvring->align = vring->align;
+	rvring->rvdev = rvdev;
 
 	return 0;
 }
 
-static void __rproc_free_vrings(struct rproc_vdev *rvdev, int i)
+void rproc_free_vring(struct rproc_vring *rvring)
 {
-	struct rproc *rproc = rvdev->rproc;
+	int size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
+	struct rproc *rproc = rvring->rvdev->rproc;
 
-	for (i--; i >= 0; i--) {
-		struct rproc_vring *rvring = &rvdev->vring[i];
-		int size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
-
-		dma_free_coherent(rproc->dev, size, rvring->va, rvring->dma);
-		idr_remove(&rproc->notifyids, rvring->notifyid);
-	}
+	dma_free_coherent(rproc->dev.parent, size, rvring->va, rvring->dma);
+	idr_remove(&rproc->notifyids, rvring->notifyid);
 }
 
 /**
@@ -393,14 +295,14 @@
 static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 								int avail)
 {
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 	struct rproc_vdev *rvdev;
 	int i, ret;
 
 	/* make sure resource isn't truncated */
 	if (sizeof(*rsc) + rsc->num_of_vrings * sizeof(struct fw_rsc_vdev_vring)
 			+ rsc->config_len > avail) {
-		dev_err(rproc->dev, "vdev rsc is truncated\n");
+		dev_err(dev, "vdev rsc is truncated\n");
 		return -EINVAL;
 	}
 
@@ -425,11 +327,11 @@
 
 	rvdev->rproc = rproc;
 
-	/* allocate the vrings */
+	/* parse the vrings */
 	for (i = 0; i < rsc->num_of_vrings; i++) {
-		ret = __rproc_handle_vring(rvdev, rsc, i);
+		ret = rproc_parse_vring(rvdev, rsc, i);
 		if (ret)
-			goto free_vrings;
+			goto free_rvdev;
 	}
 
 	/* remember the device features */
@@ -440,12 +342,11 @@
 	/* it is now safe to add the virtio device */
 	ret = rproc_add_virtio_dev(rvdev, rsc->id);
 	if (ret)
-		goto free_vrings;
+		goto free_rvdev;
 
 	return 0;
 
-free_vrings:
-	__rproc_free_vrings(rvdev, i);
+free_rvdev:
 	kfree(rvdev);
 	return ret;
 }
@@ -470,12 +371,12 @@
 								int avail)
 {
 	struct rproc_mem_entry *trace;
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 	void *ptr;
 	char name[15];
 
 	if (sizeof(*rsc) > avail) {
-		dev_err(rproc->dev, "trace rsc is truncated\n");
+		dev_err(dev, "trace rsc is truncated\n");
 		return -EINVAL;
 	}
 
@@ -552,6 +453,7 @@
 								int avail)
 {
 	struct rproc_mem_entry *mapping;
+	struct device *dev = &rproc->dev;
 	int ret;
 
 	/* no point in handling this resource without a valid iommu domain */
@@ -559,25 +461,25 @@
 		return -EINVAL;
 
 	if (sizeof(*rsc) > avail) {
-		dev_err(rproc->dev, "devmem rsc is truncated\n");
+		dev_err(dev, "devmem rsc is truncated\n");
 		return -EINVAL;
 	}
 
 	/* make sure reserved bytes are zeroes */
 	if (rsc->reserved) {
-		dev_err(rproc->dev, "devmem rsc has non zero reserved bytes\n");
+		dev_err(dev, "devmem rsc has non zero reserved bytes\n");
 		return -EINVAL;
 	}
 
 	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
 	if (!mapping) {
-		dev_err(rproc->dev, "kzalloc mapping failed\n");
+		dev_err(dev, "kzalloc mapping failed\n");
 		return -ENOMEM;
 	}
 
 	ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags);
 	if (ret) {
-		dev_err(rproc->dev, "failed to map devmem: %d\n", ret);
+		dev_err(dev, "failed to map devmem: %d\n", ret);
 		goto out;
 	}
 
@@ -592,7 +494,7 @@
 	mapping->len = rsc->len;
 	list_add_tail(&mapping->node, &rproc->mappings);
 
-	dev_dbg(rproc->dev, "mapped devmem pa 0x%x, da 0x%x, len 0x%x\n",
+	dev_dbg(dev, "mapped devmem pa 0x%x, da 0x%x, len 0x%x\n",
 					rsc->pa, rsc->da, rsc->len);
 
 	return 0;
@@ -624,13 +526,13 @@
 				struct fw_rsc_carveout *rsc, int avail)
 {
 	struct rproc_mem_entry *carveout, *mapping;
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 	dma_addr_t dma;
 	void *va;
 	int ret;
 
 	if (sizeof(*rsc) > avail) {
-		dev_err(rproc->dev, "carveout rsc is truncated\n");
+		dev_err(dev, "carveout rsc is truncated\n");
 		return -EINVAL;
 	}
 
@@ -656,9 +558,9 @@
 		goto free_mapping;
 	}
 
-	va = dma_alloc_coherent(dev, rsc->len, &dma, GFP_KERNEL);
+	va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL);
 	if (!va) {
-		dev_err(dev, "failed to dma alloc carveout: %d\n", rsc->len);
+		dev_err(dev->parent, "dma_alloc_coherent err: %d\n", rsc->len);
 		ret = -ENOMEM;
 		goto free_carv;
 	}
@@ -702,23 +604,27 @@
 		list_add_tail(&mapping->node, &rproc->mappings);
 
 		dev_dbg(dev, "carveout mapped 0x%x to 0x%x\n", rsc->da, dma);
-
-		/*
-		 * Some remote processors might need to know the pa
-		 * even though they are behind an IOMMU. E.g., OMAP4's
-		 * remote M3 processor needs this so it can control
-		 * on-chip hardware accelerators that are not behind
-		 * the IOMMU, and therefor must know the pa.
-		 *
-		 * Generally we don't want to expose physical addresses
-		 * if we don't have to (remote processors are generally
-		 * _not_ trusted), so we might want to do this only for
-		 * remote processor that _must_ have this (e.g. OMAP4's
-		 * dual M3 subsystem).
-		 */
-		rsc->pa = dma;
 	}
 
+	/*
+	 * Some remote processors might need to know the pa
+	 * even though they are behind an IOMMU. E.g., OMAP4's
+	 * remote M3 processor needs this so it can control
+	 * on-chip hardware accelerators that are not behind
+	 * the IOMMU, and therefor must know the pa.
+	 *
+	 * Generally we don't want to expose physical addresses
+	 * if we don't have to (remote processors are generally
+	 * _not_ trusted), so we might want to do this only for
+	 * remote processor that _must_ have this (e.g. OMAP4's
+	 * dual M3 subsystem).
+	 *
+	 * Non-IOMMU processors might also want to have this info.
+	 * In this case, the device address and the physical address
+	 * are the same.
+	 */
+	rsc->pa = dma;
+
 	carveout->va = va;
 	carveout->len = rsc->len;
 	carveout->dma = dma;
@@ -729,7 +635,7 @@
 	return 0;
 
 dma_free:
-	dma_free_coherent(dev, rsc->len, va, dma);
+	dma_free_coherent(dev->parent, rsc->len, va, dma);
 free_carv:
 	kfree(carveout);
 free_mapping:
@@ -752,7 +658,7 @@
 static int
 rproc_handle_boot_rsc(struct rproc *rproc, struct resource_table *table, int len)
 {
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 	rproc_handle_resource_t handler;
 	int ret = 0, i;
 
@@ -791,7 +697,7 @@
 static int
 rproc_handle_virtio_rsc(struct rproc *rproc, struct resource_table *table, int len)
 {
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 	int ret = 0, i;
 
 	for (i = 0; i < table->num; i++) {
@@ -822,85 +728,6 @@
 }
 
 /**
- * rproc_find_rsc_table() - find the resource table
- * @rproc: the rproc handle
- * @elf_data: the content of the ELF firmware image
- * @len: firmware size (in bytes)
- * @tablesz: place holder for providing back the table size
- *
- * This function finds the resource table inside the remote processor's
- * firmware. It is used both upon the registration of @rproc (in order
- * to look for and register the supported virito devices), and when the
- * @rproc is booted.
- *
- * Returns the pointer to the resource table if it is found, and write its
- * size into @tablesz. If a valid table isn't found, NULL is returned
- * (and @tablesz isn't set).
- */
-static struct resource_table *
-rproc_find_rsc_table(struct rproc *rproc, const u8 *elf_data, size_t len,
-							int *tablesz)
-{
-	struct elf32_hdr *ehdr;
-	struct elf32_shdr *shdr;
-	const char *name_table;
-	struct device *dev = rproc->dev;
-	struct resource_table *table = NULL;
-	int i;
-
-	ehdr = (struct elf32_hdr *)elf_data;
-	shdr = (struct elf32_shdr *)(elf_data + ehdr->e_shoff);
-	name_table = elf_data + shdr[ehdr->e_shstrndx].sh_offset;
-
-	/* look for the resource table and handle it */
-	for (i = 0; i < ehdr->e_shnum; i++, shdr++) {
-		int size = shdr->sh_size;
-		int offset = shdr->sh_offset;
-
-		if (strcmp(name_table + shdr->sh_name, ".resource_table"))
-			continue;
-
-		table = (struct resource_table *)(elf_data + offset);
-
-		/* make sure we have the entire table */
-		if (offset + size > len) {
-			dev_err(dev, "resource table truncated\n");
-			return NULL;
-		}
-
-		/* make sure table has at least the header */
-		if (sizeof(struct resource_table) > size) {
-			dev_err(dev, "header-less resource table\n");
-			return NULL;
-		}
-
-		/* we don't support any version beyond the first */
-		if (table->ver != 1) {
-			dev_err(dev, "unsupported fw ver: %d\n", table->ver);
-			return NULL;
-		}
-
-		/* make sure reserved bytes are zeroes */
-		if (table->reserved[0] || table->reserved[1]) {
-			dev_err(dev, "non zero reserved bytes\n");
-			return NULL;
-		}
-
-		/* make sure the offsets array isn't truncated */
-		if (table->num * sizeof(table->offset[0]) +
-				sizeof(struct resource_table) > size) {
-			dev_err(dev, "resource table incomplete\n");
-			return NULL;
-		}
-
-		*tablesz = shdr->sh_size;
-		break;
-	}
-
-	return table;
-}
-
-/**
  * rproc_resource_cleanup() - clean up and free all acquired resources
  * @rproc: rproc handle
  *
@@ -910,7 +737,7 @@
 static void rproc_resource_cleanup(struct rproc *rproc)
 {
 	struct rproc_mem_entry *entry, *tmp;
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 
 	/* clean up debugfs trace entries */
 	list_for_each_entry_safe(entry, tmp, &rproc->traces, node) {
@@ -922,7 +749,7 @@
 
 	/* clean up carveout allocations */
 	list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) {
-		dma_free_coherent(dev, entry->len, entry->va, entry->dma);
+		dma_free_coherent(dev->parent, entry->len, entry->va, entry->dma);
 		list_del(&entry->node);
 		kfree(entry);
 	}
@@ -943,74 +770,13 @@
 	}
 }
 
-/* make sure this fw image is sane */
-static int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
-{
-	const char *name = rproc->firmware;
-	struct device *dev = rproc->dev;
-	struct elf32_hdr *ehdr;
-	char class;
-
-	if (!fw) {
-		dev_err(dev, "failed to load %s\n", name);
-		return -EINVAL;
-	}
-
-	if (fw->size < sizeof(struct elf32_hdr)) {
-		dev_err(dev, "Image is too small\n");
-		return -EINVAL;
-	}
-
-	ehdr = (struct elf32_hdr *)fw->data;
-
-	/* We only support ELF32 at this point */
-	class = ehdr->e_ident[EI_CLASS];
-	if (class != ELFCLASS32) {
-		dev_err(dev, "Unsupported class: %d\n", class);
-		return -EINVAL;
-	}
-
-	/* We assume the firmware has the same endianess as the host */
-# ifdef __LITTLE_ENDIAN
-	if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {
-# else /* BIG ENDIAN */
-	if (ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
-# endif
-		dev_err(dev, "Unsupported firmware endianess\n");
-		return -EINVAL;
-	}
-
-	if (fw->size < ehdr->e_shoff + sizeof(struct elf32_shdr)) {
-		dev_err(dev, "Image is too small\n");
-		return -EINVAL;
-	}
-
-	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG)) {
-		dev_err(dev, "Image is corrupted (bad magic)\n");
-		return -EINVAL;
-	}
-
-	if (ehdr->e_phnum == 0) {
-		dev_err(dev, "No loadable segments\n");
-		return -EINVAL;
-	}
-
-	if (ehdr->e_phoff > fw->size) {
-		dev_err(dev, "Firmware size is too small\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 /*
  * take a firmware and boot a remote processor with it.
  */
 static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 {
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 	const char *name = rproc->firmware;
-	struct elf32_hdr *ehdr;
 	struct resource_table *table;
 	int ret, tablesz;
 
@@ -1018,8 +784,6 @@
 	if (ret)
 		return ret;
 
-	ehdr = (struct elf32_hdr *)fw->data;
-
 	dev_info(dev, "Booting fw image %s, size %zd\n", name, fw->size);
 
 	/*
@@ -1032,15 +796,10 @@
 		return ret;
 	}
 
-	/*
-	 * The ELF entry point is the rproc's boot addr (though this is not
-	 * a configurable property of all remote processors: some will always
-	 * boot at a specific hardcoded address).
-	 */
-	rproc->bootaddr = ehdr->e_entry;
+	rproc->bootaddr = rproc_get_boot_addr(rproc, fw);
 
 	/* look for the resource table */
-	table = rproc_find_rsc_table(rproc, fw->data, fw->size, &tablesz);
+	table = rproc_find_rsc_table(rproc, fw, &tablesz);
 	if (!table) {
 		ret = -EINVAL;
 		goto clean_up;
@@ -1054,7 +813,7 @@
 	}
 
 	/* load the ELF segments to memory */
-	ret = rproc_load_segments(rproc, fw->data, fw->size);
+	ret = rproc_load_segments(rproc, fw);
 	if (ret) {
 		dev_err(dev, "Failed to load program segments: %d\n", ret);
 		goto clean_up;
@@ -1097,7 +856,7 @@
 		goto out;
 
 	/* look for the resource table */
-	table = rproc_find_rsc_table(rproc, fw->data, fw->size, &tablesz);
+	table = rproc_find_rsc_table(rproc, fw,  &tablesz);
 	if (!table)
 		goto out;
 
@@ -1108,7 +867,7 @@
 
 out:
 	release_firmware(fw);
-	/* allow rproc_unregister() contexts, if any, to proceed */
+	/* allow rproc_del() contexts, if any, to proceed */
 	complete_all(&rproc->firmware_loading_complete);
 }
 
@@ -1134,7 +893,7 @@
 		return -EINVAL;
 	}
 
-	dev = rproc->dev;
+	dev = &rproc->dev;
 
 	ret = mutex_lock_interruptible(&rproc->lock);
 	if (ret) {
@@ -1150,7 +909,7 @@
 	}
 
 	/* prevent underlying implementation from being removed */
-	if (!try_module_get(dev->driver->owner)) {
+	if (!try_module_get(dev->parent->driver->owner)) {
 		dev_err(dev, "%s: can't get owner\n", __func__);
 		ret = -EINVAL;
 		goto unlock_mutex;
@@ -1177,7 +936,7 @@
 
 downref_rproc:
 	if (ret) {
-		module_put(dev->driver->owner);
+		module_put(dev->parent->driver->owner);
 		atomic_dec(&rproc->power);
 	}
 unlock_mutex:
@@ -1204,14 +963,10 @@
  *   which means that the @rproc handle stays valid even after rproc_shutdown()
  *   returns, and users can still use it with a subsequent rproc_boot(), if
  *   needed.
- * - don't call rproc_shutdown() to unroll rproc_get_by_name(), exactly
- *   because rproc_shutdown() _does not_ decrement the refcount of @rproc.
- *   To decrement the refcount of @rproc, use rproc_put() (but _only_ if
- *   you acquired @rproc using rproc_get_by_name()).
  */
 void rproc_shutdown(struct rproc *rproc)
 {
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 	int ret;
 
 	ret = mutex_lock_interruptible(&rproc->lock);
@@ -1244,148 +999,12 @@
 out:
 	mutex_unlock(&rproc->lock);
 	if (!ret)
-		module_put(dev->driver->owner);
+		module_put(dev->parent->driver->owner);
 }
 EXPORT_SYMBOL(rproc_shutdown);
 
 /**
- * rproc_release() - completely deletes the existence of a remote processor
- * @kref: the rproc's kref
- *
- * This function should _never_ be called directly.
- *
- * The only reasonable location to use it is as an argument when kref_put'ing
- * @rproc's refcount.
- *
- * This way it will be called when no one holds a valid pointer to this @rproc
- * anymore (and obviously after it is removed from the rprocs klist).
- *
- * Note: this function is not static because rproc_vdev_release() needs it when
- * it decrements @rproc's refcount.
- */
-void rproc_release(struct kref *kref)
-{
-	struct rproc *rproc = container_of(kref, struct rproc, refcount);
-	struct rproc_vdev *rvdev, *rvtmp;
-
-	dev_info(rproc->dev, "removing %s\n", rproc->name);
-
-	rproc_delete_debug_dir(rproc);
-
-	/* clean up remote vdev entries */
-	list_for_each_entry_safe(rvdev, rvtmp, &rproc->rvdevs, node) {
-		__rproc_free_vrings(rvdev, RVDEV_NUM_VRINGS);
-		list_del(&rvdev->node);
-	}
-
-	/*
-	 * At this point no one holds a reference to rproc anymore,
-	 * so we can directly unroll rproc_alloc()
-	 */
-	rproc_free(rproc);
-}
-
-/* will be called when an rproc is added to the rprocs klist */
-static void klist_rproc_get(struct klist_node *n)
-{
-	struct rproc *rproc = container_of(n, struct rproc, node);
-
-	kref_get(&rproc->refcount);
-}
-
-/* will be called when an rproc is removed from the rprocs klist */
-static void klist_rproc_put(struct klist_node *n)
-{
-	struct rproc *rproc = container_of(n, struct rproc, node);
-
-	kref_put(&rproc->refcount, rproc_release);
-}
-
-static struct rproc *next_rproc(struct klist_iter *i)
-{
-	struct klist_node *n;
-
-	n = klist_next(i);
-	if (!n)
-		return NULL;
-
-	return container_of(n, struct rproc, node);
-}
-
-/**
- * rproc_get_by_name() - find a remote processor by name and boot it
- * @name: name of the remote processor
- *
- * Finds an rproc handle using the remote processor's name, and then
- * boot it. If it's already powered on, then just immediately return
- * (successfully).
- *
- * Returns the rproc handle on success, and NULL on failure.
- *
- * This function increments the remote processor's refcount, so always
- * use rproc_put() to decrement it back once rproc isn't needed anymore.
- *
- * Note: currently this function (and its counterpart rproc_put()) are not
- * being used. We need to scrutinize the use cases
- * that still need them, and see if we can migrate them to use the non
- * name-based boot/shutdown interface.
- */
-struct rproc *rproc_get_by_name(const char *name)
-{
-	struct rproc *rproc;
-	struct klist_iter i;
-	int ret;
-
-	/* find the remote processor, and upref its refcount */
-	klist_iter_init(&rprocs, &i);
-	while ((rproc = next_rproc(&i)) != NULL)
-		if (!strcmp(rproc->name, name)) {
-			kref_get(&rproc->refcount);
-			break;
-		}
-	klist_iter_exit(&i);
-
-	/* can't find this rproc ? */
-	if (!rproc) {
-		pr_err("can't find remote processor %s\n", name);
-		return NULL;
-	}
-
-	ret = rproc_boot(rproc);
-	if (ret < 0) {
-		kref_put(&rproc->refcount, rproc_release);
-		return NULL;
-	}
-
-	return rproc;
-}
-EXPORT_SYMBOL(rproc_get_by_name);
-
-/**
- * rproc_put() - decrement the refcount of a remote processor, and shut it down
- * @rproc: the remote processor
- *
- * This function tries to shutdown @rproc, and it then decrements its
- * refcount.
- *
- * After this function returns, @rproc may _not_ be used anymore, and its
- * handle should be considered invalid.
- *
- * This function should be called _iff_ the @rproc handle was grabbed by
- * calling rproc_get_by_name().
- */
-void rproc_put(struct rproc *rproc)
-{
-	/* try to power off the remote processor */
-	rproc_shutdown(rproc);
-
-	/* downref rproc's refcount */
-	kref_put(&rproc->refcount, rproc_release);
-}
-EXPORT_SYMBOL(rproc_put);
-
-/**
- * rproc_register() - register a remote processor
+ * rproc_add() - register a remote processor
  * @rproc: the remote processor handle to register
  *
  * Registers @rproc with the remoteproc framework, after it has been
@@ -1404,15 +1023,16 @@
  * of registering this remote processor, additional virtio drivers might be
  * probed.
  */
-int rproc_register(struct rproc *rproc)
+int rproc_add(struct rproc *rproc)
 {
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 	int ret = 0;
 
-	/* expose to rproc_get_by_name users */
-	klist_add_tail(&rproc->node, &rprocs);
+	ret = device_add(dev);
+	if (ret < 0)
+		return ret;
 
-	dev_info(rproc->dev, "%s is available\n", rproc->name);
+	dev_info(dev, "%s is available\n", rproc->name);
 
 	dev_info(dev, "Note: remoteproc is still under development and considered experimental.\n");
 	dev_info(dev, "THE BINARY FORMAT IS NOT YET FINALIZED, and backward compatibility isn't yet guaranteed.\n");
@@ -1420,7 +1040,7 @@
 	/* create debugfs entries */
 	rproc_create_debug_dir(rproc);
 
-	/* rproc_unregister() calls must wait until async loader completes */
+	/* rproc_del() calls must wait until async loader completes */
 	init_completion(&rproc->firmware_loading_complete);
 
 	/*
@@ -1437,12 +1057,42 @@
 	if (ret < 0) {
 		dev_err(dev, "request_firmware_nowait failed: %d\n", ret);
 		complete_all(&rproc->firmware_loading_complete);
-		klist_remove(&rproc->node);
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL(rproc_register);
+EXPORT_SYMBOL(rproc_add);
+
+/**
+ * rproc_type_release() - release a remote processor instance
+ * @dev: the rproc's device
+ *
+ * This function should _never_ be called directly.
+ *
+ * It will be called by the driver core when no one holds a valid pointer
+ * to @dev anymore.
+ */
+static void rproc_type_release(struct device *dev)
+{
+	struct rproc *rproc = container_of(dev, struct rproc, dev);
+
+	dev_info(&rproc->dev, "releasing %s\n", rproc->name);
+
+	rproc_delete_debug_dir(rproc);
+
+	idr_remove_all(&rproc->notifyids);
+	idr_destroy(&rproc->notifyids);
+
+	if (rproc->index >= 0)
+		ida_simple_remove(&rproc_dev_index, rproc->index);
+
+	kfree(rproc);
+}
+
+static struct device_type rproc_type = {
+	.name		= "remoteproc",
+	.release	= rproc_type_release,
+};
 
 /**
  * rproc_alloc() - allocate a remote processor handle
@@ -1459,13 +1109,13 @@
  * of the remote processor.
  *
  * After creating an rproc handle using this function, and when ready,
- * implementations should then call rproc_register() to complete
+ * implementations should then call rproc_add() to complete
  * the registration of the remote processor.
  *
  * On success the new rproc is returned, and on failure, NULL.
  *
  * Note: _never_ directly deallocate @rproc, even if it was not registered
- * yet. Instead, if you just need to unroll rproc_alloc(), use rproc_free().
+ * yet. Instead, when you need to unroll rproc_alloc(), use rproc_put().
  */
 struct rproc *rproc_alloc(struct device *dev, const char *name,
 				const struct rproc_ops *ops,
@@ -1482,15 +1132,29 @@
 		return NULL;
 	}
 
-	rproc->dev = dev;
 	rproc->name = name;
 	rproc->ops = ops;
 	rproc->firmware = firmware;
 	rproc->priv = &rproc[1];
 
+	device_initialize(&rproc->dev);
+	rproc->dev.parent = dev;
+	rproc->dev.type = &rproc_type;
+
+	/* Assign a unique device index and name */
+	rproc->index = ida_simple_get(&rproc_dev_index, 0, 0, GFP_KERNEL);
+	if (rproc->index < 0) {
+		dev_err(dev, "ida_simple_get failed: %d\n", rproc->index);
+		put_device(&rproc->dev);
+		return NULL;
+	}
+
+	dev_set_name(&rproc->dev, "remoteproc%d", rproc->index);
+
 	atomic_set(&rproc->power, 0);
 
-	kref_init(&rproc->refcount);
+	/* Set ELF as the default fw_ops handler */
+	rproc->fw_ops = &rproc_elf_fw_ops;
 
 	mutex_init(&rproc->lock);
 
@@ -1508,47 +1172,38 @@
 EXPORT_SYMBOL(rproc_alloc);
 
 /**
- * rproc_free() - free an rproc handle that was allocated by rproc_alloc
+ * rproc_put() - unroll rproc_alloc()
  * @rproc: the remote processor handle
  *
- * This function should _only_ be used if @rproc was only allocated,
- * but not registered yet.
+ * This function decrements the rproc dev refcount.
  *
- * If @rproc was already successfully registered (by calling rproc_register()),
- * then use rproc_unregister() instead.
+ * If no one holds any reference to rproc anymore, then its refcount would
+ * now drop to zero, and it would be freed.
  */
-void rproc_free(struct rproc *rproc)
+void rproc_put(struct rproc *rproc)
 {
-	idr_remove_all(&rproc->notifyids);
-	idr_destroy(&rproc->notifyids);
-
-	kfree(rproc);
+	put_device(&rproc->dev);
 }
-EXPORT_SYMBOL(rproc_free);
+EXPORT_SYMBOL(rproc_put);
 
 /**
- * rproc_unregister() - unregister a remote processor
+ * rproc_del() - unregister a remote processor
  * @rproc: rproc handle to unregister
  *
- * Unregisters a remote processor, and decrements its refcount.
- * If its refcount drops to zero, then @rproc will be freed. If not,
- * it will be freed later once the last reference is dropped.
- *
  * This function should be called when the platform specific rproc
  * implementation decides to remove the rproc device. it should
- * _only_ be called if a previous invocation of rproc_register()
+ * _only_ be called if a previous invocation of rproc_add()
  * has completed successfully.
  *
- * After rproc_unregister() returns, @rproc is _not_ valid anymore and
- * it shouldn't be used. More specifically, don't call rproc_free()
- * or try to directly free @rproc after rproc_unregister() returns;
- * none of these are needed, and calling them is a bug.
+ * After rproc_del() returns, @rproc isn't freed yet, because
+ * of the outstanding reference created by rproc_alloc. To decrement that
+ * one last refcount, one still needs to call rproc_put().
  *
  * Returns 0 on success and -EINVAL if @rproc isn't valid.
  */
-int rproc_unregister(struct rproc *rproc)
+int rproc_del(struct rproc *rproc)
 {
-	struct rproc_vdev *rvdev;
+	struct rproc_vdev *rvdev, *tmp;
 
 	if (!rproc)
 		return -EINVAL;
@@ -1557,22 +1212,19 @@
 	wait_for_completion(&rproc->firmware_loading_complete);
 
 	/* clean up remote vdev entries */
-	list_for_each_entry(rvdev, &rproc->rvdevs, node)
+	list_for_each_entry_safe(rvdev, tmp, &rproc->rvdevs, node)
 		rproc_remove_virtio_dev(rvdev);
 
-	/* the rproc is downref'ed as soon as it's removed from the klist */
-	klist_del(&rproc->node);
-
-	/* the rproc will only be released after its refcount drops to zero */
-	kref_put(&rproc->refcount, rproc_release);
+	device_del(&rproc->dev);
 
 	return 0;
 }
-EXPORT_SYMBOL(rproc_unregister);
+EXPORT_SYMBOL(rproc_del);
 
 static int __init remoteproc_init(void)
 {
 	rproc_init_debugfs();
+
 	return 0;
 }
 module_init(remoteproc_init);
diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c
index 85d31a6..0383385 100644
--- a/drivers/remoteproc/remoteproc_debugfs.c
+++ b/drivers/remoteproc/remoteproc_debugfs.c
@@ -124,7 +124,7 @@
 	tfile = debugfs_create_file(name, 0400, rproc->dbg_dir,
 						trace, &trace_rproc_ops);
 	if (!tfile) {
-		dev_err(rproc->dev, "failed to create debugfs trace entry\n");
+		dev_err(&rproc->dev, "failed to create debugfs trace entry\n");
 		return NULL;
 	}
 
@@ -141,7 +141,7 @@
 
 void rproc_create_debug_dir(struct rproc *rproc)
 {
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 
 	if (!rproc_dbg)
 		return;
diff --git a/drivers/remoteproc/remoteproc_elf_loader.c b/drivers/remoteproc/remoteproc_elf_loader.c
new file mode 100644
index 0000000..e1f89d6
--- /dev/null
+++ b/drivers/remoteproc/remoteproc_elf_loader.c
@@ -0,0 +1,295 @@
+/*
+ * Remote Processor Framework Elf loader
+ *
+ * Copyright (C) 2011 Texas Instruments, Inc.
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * Ohad Ben-Cohen <ohad@wizery.com>
+ * Brian Swetland <swetland@google.com>
+ * Mark Grosen <mgrosen@ti.com>
+ * Fernando Guzman Lugo <fernando.lugo@ti.com>
+ * Suman Anna <s-anna@ti.com>
+ * Robert Tivy <rtivy@ti.com>
+ * Armando Uribe De Leon <x0095078@ti.com>
+ * Sjur Brændeland <sjur.brandeland@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)    "%s: " fmt, __func__
+
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include <linux/remoteproc.h>
+#include <linux/elf.h>
+
+#include "remoteproc_internal.h"
+
+/**
+ * rproc_elf_sanity_check() - Sanity Check ELF firmware image
+ * @rproc: the remote processor handle
+ * @fw: the ELF firmware image
+ *
+ * Make sure this fw image is sane.
+ */
+static int
+rproc_elf_sanity_check(struct rproc *rproc, const struct firmware *fw)
+{
+	const char *name = rproc->firmware;
+	struct device *dev = &rproc->dev;
+	struct elf32_hdr *ehdr;
+	char class;
+
+	if (!fw) {
+		dev_err(dev, "failed to load %s\n", name);
+		return -EINVAL;
+	}
+
+	if (fw->size < sizeof(struct elf32_hdr)) {
+		dev_err(dev, "Image is too small\n");
+		return -EINVAL;
+	}
+
+	ehdr = (struct elf32_hdr *)fw->data;
+
+	/* We only support ELF32 at this point */
+	class = ehdr->e_ident[EI_CLASS];
+	if (class != ELFCLASS32) {
+		dev_err(dev, "Unsupported class: %d\n", class);
+		return -EINVAL;
+	}
+
+	/* We assume the firmware has the same endianess as the host */
+# ifdef __LITTLE_ENDIAN
+	if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {
+# else /* BIG ENDIAN */
+	if (ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
+# endif
+		dev_err(dev, "Unsupported firmware endianess\n");
+		return -EINVAL;
+	}
+
+	if (fw->size < ehdr->e_shoff + sizeof(struct elf32_shdr)) {
+		dev_err(dev, "Image is too small\n");
+		return -EINVAL;
+	}
+
+	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG)) {
+		dev_err(dev, "Image is corrupted (bad magic)\n");
+		return -EINVAL;
+	}
+
+	if (ehdr->e_phnum == 0) {
+		dev_err(dev, "No loadable segments\n");
+		return -EINVAL;
+	}
+
+	if (ehdr->e_phoff > fw->size) {
+		dev_err(dev, "Firmware size is too small\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * rproc_elf_get_boot_addr() - Get rproc's boot address.
+ * @rproc: the remote processor handle
+ * @fw: the ELF firmware image
+ *
+ * This function returns the entry point address of the ELF
+ * image.
+ *
+ * Note that the boot address is not a configurable property of all remote
+ * processors. Some will always boot at a specific hard-coded address.
+ */
+static
+u32 rproc_elf_get_boot_addr(struct rproc *rproc, const struct firmware *fw)
+{
+	struct elf32_hdr *ehdr  = (struct elf32_hdr *)fw->data;
+
+	return ehdr->e_entry;
+}
+
+/**
+ * rproc_elf_load_segments() - load firmware segments to memory
+ * @rproc: remote processor which will be booted using these fw segments
+ * @fw: the ELF firmware image
+ *
+ * This function loads the firmware segments to memory, where the remote
+ * processor expects them.
+ *
+ * Some remote processors will expect their code and data to be placed
+ * in specific device addresses, and can't have them dynamically assigned.
+ *
+ * We currently support only those kind of remote processors, and expect
+ * the program header's paddr member to contain those addresses. We then go
+ * through the physically contiguous "carveout" memory regions which we
+ * allocated (and mapped) earlier on behalf of the remote processor,
+ * and "translate" device address to kernel addresses, so we can copy the
+ * segments where they are expected.
+ *
+ * Currently we only support remote processors that required carveout
+ * allocations and got them mapped onto their iommus. Some processors
+ * might be different: they might not have iommus, and would prefer to
+ * directly allocate memory for every segment/resource. This is not yet
+ * supported, though.
+ */
+static int
+rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw)
+{
+	struct device *dev = &rproc->dev;
+	struct elf32_hdr *ehdr;
+	struct elf32_phdr *phdr;
+	int i, ret = 0;
+	const u8 *elf_data = fw->data;
+
+	ehdr = (struct elf32_hdr *)elf_data;
+	phdr = (struct elf32_phdr *)(elf_data + ehdr->e_phoff);
+
+	/* go through the available ELF segments */
+	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+		u32 da = phdr->p_paddr;
+		u32 memsz = phdr->p_memsz;
+		u32 filesz = phdr->p_filesz;
+		u32 offset = phdr->p_offset;
+		void *ptr;
+
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		dev_dbg(dev, "phdr: type %d da 0x%x memsz 0x%x filesz 0x%x\n",
+					phdr->p_type, da, memsz, filesz);
+
+		if (filesz > memsz) {
+			dev_err(dev, "bad phdr filesz 0x%x memsz 0x%x\n",
+							filesz, memsz);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (offset + filesz > fw->size) {
+			dev_err(dev, "truncated fw: need 0x%x avail 0x%zx\n",
+					offset + filesz, fw->size);
+			ret = -EINVAL;
+			break;
+		}
+
+		/* grab the kernel address for this device address */
+		ptr = rproc_da_to_va(rproc, da, memsz);
+		if (!ptr) {
+			dev_err(dev, "bad phdr da 0x%x mem 0x%x\n", da, memsz);
+			ret = -EINVAL;
+			break;
+		}
+
+		/* put the segment where the remote processor expects it */
+		if (phdr->p_filesz)
+			memcpy(ptr, elf_data + phdr->p_offset, filesz);
+
+		/*
+		 * Zero out remaining memory for this segment.
+		 *
+		 * This isn't strictly required since dma_alloc_coherent already
+		 * did this for us. albeit harmless, we may consider removing
+		 * this.
+		 */
+		if (memsz > filesz)
+			memset(ptr + filesz, 0, memsz - filesz);
+	}
+
+	return ret;
+}
+
+/**
+ * rproc_elf_find_rsc_table() - find the resource table
+ * @rproc: the rproc handle
+ * @fw: the ELF firmware image
+ * @tablesz: place holder for providing back the table size
+ *
+ * This function finds the resource table inside the remote processor's
+ * firmware. It is used both upon the registration of @rproc (in order
+ * to look for and register the supported virito devices), and when the
+ * @rproc is booted.
+ *
+ * Returns the pointer to the resource table if it is found, and write its
+ * size into @tablesz. If a valid table isn't found, NULL is returned
+ * (and @tablesz isn't set).
+ */
+static struct resource_table *
+rproc_elf_find_rsc_table(struct rproc *rproc, const struct firmware *fw,
+							int *tablesz)
+{
+	struct elf32_hdr *ehdr;
+	struct elf32_shdr *shdr;
+	const char *name_table;
+	struct device *dev = &rproc->dev;
+	struct resource_table *table = NULL;
+	int i;
+	const u8 *elf_data = fw->data;
+
+	ehdr = (struct elf32_hdr *)elf_data;
+	shdr = (struct elf32_shdr *)(elf_data + ehdr->e_shoff);
+	name_table = elf_data + shdr[ehdr->e_shstrndx].sh_offset;
+
+	/* look for the resource table and handle it */
+	for (i = 0; i < ehdr->e_shnum; i++, shdr++) {
+		int size = shdr->sh_size;
+		int offset = shdr->sh_offset;
+
+		if (strcmp(name_table + shdr->sh_name, ".resource_table"))
+			continue;
+
+		table = (struct resource_table *)(elf_data + offset);
+
+		/* make sure we have the entire table */
+		if (offset + size > fw->size) {
+			dev_err(dev, "resource table truncated\n");
+			return NULL;
+		}
+
+		/* make sure table has at least the header */
+		if (sizeof(struct resource_table) > size) {
+			dev_err(dev, "header-less resource table\n");
+			return NULL;
+		}
+
+		/* we don't support any version beyond the first */
+		if (table->ver != 1) {
+			dev_err(dev, "unsupported fw ver: %d\n", table->ver);
+			return NULL;
+		}
+
+		/* make sure reserved bytes are zeroes */
+		if (table->reserved[0] || table->reserved[1]) {
+			dev_err(dev, "non zero reserved bytes\n");
+			return NULL;
+		}
+
+		/* make sure the offsets array isn't truncated */
+		if (table->num * sizeof(table->offset[0]) +
+				sizeof(struct resource_table) > size) {
+			dev_err(dev, "resource table incomplete\n");
+			return NULL;
+		}
+
+		*tablesz = shdr->sh_size;
+		break;
+	}
+
+	return table;
+}
+
+const struct rproc_fw_ops rproc_elf_fw_ops = {
+	.load = rproc_elf_load_segments,
+	.find_rsc_table = rproc_elf_find_rsc_table,
+	.sanity_check = rproc_elf_sanity_check,
+	.get_boot_addr = rproc_elf_get_boot_addr
+};
diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
index 9f336d6..a690ebe 100644
--- a/drivers/remoteproc/remoteproc_internal.h
+++ b/drivers/remoteproc/remoteproc_internal.h
@@ -21,9 +21,27 @@
 #define REMOTEPROC_INTERNAL_H
 
 #include <linux/irqreturn.h>
+#include <linux/firmware.h>
 
 struct rproc;
 
+/**
+ * struct rproc_fw_ops - firmware format specific operations.
+ * @find_rsc_table:	finds the resource table inside the firmware image
+ * @load:		load firmeware to memory, where the remote processor
+ *			expects to find it
+ * @sanity_check:	sanity check the fw image
+ * @get_boot_addr:	get boot address to entry point specified in firmware
+ */
+struct rproc_fw_ops {
+	struct resource_table *(*find_rsc_table) (struct rproc *rproc,
+						const struct firmware *fw,
+						int *tablesz);
+	int (*load)(struct rproc *rproc, const struct firmware *fw);
+	int (*sanity_check)(struct rproc *rproc, const struct firmware *fw);
+	u32 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw);
+};
+
 /* from remoteproc_core.c */
 void rproc_release(struct kref *kref);
 irqreturn_t rproc_vq_interrupt(struct rproc *rproc, int vq_id);
@@ -41,4 +59,48 @@
 void rproc_init_debugfs(void);
 void rproc_exit_debugfs(void);
 
+void rproc_free_vring(struct rproc_vring *rvring);
+int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
+
+void *rproc_da_to_va(struct rproc *rproc, u64 da, int len);
+
+static inline
+int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
+{
+	if (rproc->fw_ops->sanity_check)
+		return rproc->fw_ops->sanity_check(rproc, fw);
+
+	return 0;
+}
+
+static inline
+u32 rproc_get_boot_addr(struct rproc *rproc, const struct firmware *fw)
+{
+	if (rproc->fw_ops->get_boot_addr)
+		return rproc->fw_ops->get_boot_addr(rproc, fw);
+
+	return 0;
+}
+
+static inline
+int rproc_load_segments(struct rproc *rproc, const struct firmware *fw)
+{
+	if (rproc->fw_ops->load)
+		return rproc->fw_ops->load(rproc, fw);
+
+	return -EINVAL;
+}
+
+static inline
+struct resource_table *rproc_find_rsc_table(struct rproc *rproc,
+				const struct firmware *fw, int *tablesz)
+{
+	if (rproc->fw_ops->find_rsc_table)
+		return rproc->fw_ops->find_rsc_table(rproc, fw, tablesz);
+
+	return NULL;
+}
+
+extern const struct rproc_fw_ops rproc_elf_fw_ops;
+
 #endif /* REMOTEPROC_INTERNAL_H */
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index ecf6121..3541b44 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -36,7 +36,7 @@
 	struct rproc *rproc = rvring->rvdev->rproc;
 	int notifyid = rvring->notifyid;
 
-	dev_dbg(rproc->dev, "kicking vq index: %d\n", notifyid);
+	dev_dbg(&rproc->dev, "kicking vq index: %d\n", notifyid);
 
 	rproc->ops->kick(rproc, notifyid);
 }
@@ -57,7 +57,7 @@
 {
 	struct rproc_vring *rvring;
 
-	dev_dbg(rproc->dev, "vq index %d is interrupted\n", notifyid);
+	dev_dbg(&rproc->dev, "vq index %d is interrupted\n", notifyid);
 
 	rvring = idr_find(&rproc->notifyids, notifyid);
 	if (!rvring || !rvring->vq)
@@ -74,17 +74,21 @@
 {
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct rproc *rproc = vdev_to_rproc(vdev);
+	struct device *dev = &rproc->dev;
 	struct rproc_vring *rvring;
 	struct virtqueue *vq;
 	void *addr;
-	int len, size;
+	int len, size, ret;
 
 	/* we're temporarily limited to two virtqueues per rvdev */
 	if (id >= ARRAY_SIZE(rvdev->vring))
 		return ERR_PTR(-EINVAL);
 
-	rvring = &rvdev->vring[id];
+	ret = rproc_alloc_vring(rvdev, id);
+	if (ret)
+		return ERR_PTR(ret);
 
+	rvring = &rvdev->vring[id];
 	addr = rvring->va;
 	len = rvring->len;
 
@@ -92,7 +96,7 @@
 	size = vring_size(len, rvring->align);
 	memset(addr, 0, size);
 
-	dev_dbg(rproc->dev, "vring%d: va %p qsz %d notifyid %d\n",
+	dev_dbg(dev, "vring%d: va %p qsz %d notifyid %d\n",
 					id, addr, len, rvring->notifyid);
 
 	/*
@@ -102,7 +106,8 @@
 	vq = vring_new_virtqueue(len, rvring->align, vdev, false, addr,
 					rproc_virtio_notify, callback, name);
 	if (!vq) {
-		dev_err(rproc->dev, "vring_new_virtqueue %s failed\n", name);
+		dev_err(dev, "vring_new_virtqueue %s failed\n", name);
+		rproc_free_vring(rvring);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -125,6 +130,7 @@
 		rvring = vq->priv;
 		rvring->vq = NULL;
 		vring_del_virtqueue(vq);
+		rproc_free_vring(rvring);
 	}
 }
 
@@ -147,7 +153,7 @@
 	/* now that the vqs are all set, boot the remote processor */
 	ret = rproc_boot(rproc);
 	if (ret) {
-		dev_err(rproc->dev, "rproc_boot() failed %d\n", ret);
+		dev_err(&rproc->dev, "rproc_boot() failed %d\n", ret);
 		goto error;
 	}
 
@@ -219,7 +225,7 @@
 
 /*
  * This function is called whenever vdev is released, and is responsible
- * to decrement the remote processor's refcount taken when vdev was
+ * to decrement the remote processor's refcount which was taken when vdev was
  * added.
  *
  * Never call this function directly; it will be called by the driver
@@ -228,9 +234,13 @@
 static void rproc_vdev_release(struct device *dev)
 {
 	struct virtio_device *vdev = dev_to_virtio(dev);
+	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct rproc *rproc = vdev_to_rproc(vdev);
 
-	kref_put(&rproc->refcount, rproc_release);
+	list_del(&rvdev->node);
+	kfree(rvdev);
+
+	put_device(&rproc->dev);
 }
 
 /**
@@ -245,7 +255,7 @@
 int rproc_add_virtio_dev(struct rproc_vdev *rvdev, int id)
 {
 	struct rproc *rproc = rvdev->rproc;
-	struct device *dev = rproc->dev;
+	struct device *dev = &rproc->dev;
 	struct virtio_device *vdev = &rvdev->vdev;
 	int ret;
 
@@ -262,11 +272,11 @@
 	 * Therefore we must increment the rproc refcount here, and decrement
 	 * it _only_ when the vdev is released.
 	 */
-	kref_get(&rproc->refcount);
+	get_device(&rproc->dev);
 
 	ret = register_virtio_device(vdev);
 	if (ret) {
-		kref_put(&rproc->refcount, rproc_release);
+		put_device(&rproc->dev);
 		dev_err(dev, "failed to register vdev: %d\n", ret);
 		goto out;
 	}
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index f56c8ba..590cfaf 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -956,7 +956,8 @@
 	vrp->svq = vqs[1];
 
 	/* allocate coherent memory for the buffers */
-	bufs_va = dma_alloc_coherent(vdev->dev.parent, RPMSG_TOTAL_BUF_SPACE,
+	bufs_va = dma_alloc_coherent(vdev->dev.parent->parent,
+				RPMSG_TOTAL_BUF_SPACE,
 				&vrp->bufs_dma, GFP_KERNEL);
 	if (!bufs_va)
 		goto vqs_del;
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index f1ffabb..131b539 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -36,7 +36,6 @@
 #define REMOTEPROC_H
 
 #include <linux/types.h>
-#include <linux/kref.h>
 #include <linux/klist.h>
 #include <linux/mutex.h>
 #include <linux/virtio.h>
@@ -369,8 +368,8 @@
  * @firmware: name of firmware file to be loaded
  * @priv: private data which belongs to the platform-specific rproc module
  * @ops: platform-specific start/stop rproc handlers
- * @dev: underlying device
- * @refcount: refcount of users that have a valid pointer to this rproc
+ * @dev: virtual device for refcounting and common remoteproc behavior
+ * @fw_ops: firmware-specific handlers
  * @power: refcount of users who need this rproc powered up
  * @state: state of the device
  * @lock: lock which protects concurrent manipulations of the rproc
@@ -383,6 +382,7 @@
  * @bootaddr: address of first instruction to boot rproc with (optional)
  * @rvdevs: list of remote virtio devices
  * @notifyids: idr for dynamically assigning rproc-wide unique notify ids
+ * @index: index of this rproc device
  */
 struct rproc {
 	struct klist_node node;
@@ -391,8 +391,8 @@
 	const char *firmware;
 	void *priv;
 	const struct rproc_ops *ops;
-	struct device *dev;
-	struct kref refcount;
+	struct device dev;
+	const struct rproc_fw_ops *fw_ops;
 	atomic_t power;
 	unsigned int state;
 	struct mutex lock;
@@ -405,6 +405,7 @@
 	u32 bootaddr;
 	struct list_head rvdevs;
 	struct idr notifyids;
+	int index;
 };
 
 /* we currently support only two vrings per rvdev */
@@ -450,15 +451,12 @@
 	unsigned long gfeatures;
 };
 
-struct rproc *rproc_get_by_name(const char *name);
-void rproc_put(struct rproc *rproc);
-
 struct rproc *rproc_alloc(struct device *dev, const char *name,
 				const struct rproc_ops *ops,
 				const char *firmware, int len);
-void rproc_free(struct rproc *rproc);
-int rproc_register(struct rproc *rproc);
-int rproc_unregister(struct rproc *rproc);
+void rproc_put(struct rproc *rproc);
+int rproc_add(struct rproc *rproc);
+int rproc_del(struct rproc *rproc);
 
 int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);