Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc updates from David Miller:
 "Just a couple of fixes/cleanups:

   - Correct NUMA latency calculations on sparc64, from Nitin Gupta.

   - ASI_ST_BLKINIT_MRU_S value was wrong, from Rob Gardner.

   - Fix non-faulting load handling of non-quad values, also from Rob
     Gardner.

   - Cleanup VISsave assembler, from Sam Ravnborg.

   - Fix iommu-common code so it doesn't emit rediculous warnings on
     some architectures, particularly ARM"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Fix numa distance values
  sparc64: Don't restrict fp regs for no-fault loads
  iommu-common: Fix error code used in iommu_tbl_range_{alloc,free}().
  sparc64: use ENTRY/ENDPROC in VISsave
  sparc64: Fix incorrect ASI_ST_BLKINIT_MRU_S value
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 01d1704..bec481a 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -31,6 +31,9 @@
 	 cpu_all_mask : \
 	 cpumask_of_node(pcibus_to_node(bus)))
 
+int __node_distance(int, int);
+#define node_distance(a, b) __node_distance(a, b)
+
 #else /* CONFIG_NUMA */
 
 #include <asm-generic/topology.h>
diff --git a/arch/sparc/include/uapi/asm/asi.h b/arch/sparc/include/uapi/asm/asi.h
index aace6f3..7ad7203d 100644
--- a/arch/sparc/include/uapi/asm/asi.h
+++ b/arch/sparc/include/uapi/asm/asi.h
@@ -279,7 +279,7 @@
 				      * Most-Recently-Used, primary,
 				      * implicit
 				      */
-#define ASI_ST_BLKINIT_MRU_S	0xf2 /* (NG4) init-store, twin load,
+#define ASI_ST_BLKINIT_MRU_S	0xf3 /* (NG4) init-store, twin load,
 				      * Most-Recently-Used, secondary,
 				      * implicit
 				      */
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 5320689..3768682 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -161,7 +161,7 @@
 
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
-	if (unlikely(entry == DMA_ERROR_CODE))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		return NULL;
 
 	return iommu->page_table + entry;
@@ -253,7 +253,7 @@
 	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
 	iommu = dev->archdata.iommu;
 
-	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
 
 	order = get_order(size);
 	if (order < 10)
@@ -426,7 +426,7 @@
 	iommu_free_ctx(iommu, ctx);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 }
 
 static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -492,7 +492,7 @@
 					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
-		if (unlikely(entry == DMA_ERROR_CODE)) {
+		if (unlikely(entry == IOMMU_ERROR_CODE)) {
 			if (printk_ratelimit())
 				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
 				       " npages %lx\n", iommu, paddr, npages);
@@ -571,7 +571,7 @@
 				iopte_make_dummy(iommu, base + j);
 
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
-					     DMA_ERROR_CODE);
+					     IOMMU_ERROR_CODE);
 
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -648,7 +648,7 @@
 			iopte_make_dummy(iommu, base + i);
 
 		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
-				     DMA_ERROR_CODE);
+				     IOMMU_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 1ae5eb1..59d5038 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -1953,7 +1953,7 @@
 
 	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
 				      npages, NULL, (unsigned long)-1, 0);
-	if (unlikely(entry < 0))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		return NULL;
 
 	return iommu->page_table + entry;
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index d2fe57d..836e8ce 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -159,7 +159,7 @@
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
 
-	if (unlikely(entry == DMA_ERROR_CODE))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		goto range_alloc_fail;
 
 	*dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
@@ -187,7 +187,7 @@
 	return ret;
 
 iommu_map_fail:
-	iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
 
 range_alloc_fail:
 	free_pages(first_page, order);
@@ -226,7 +226,7 @@
 	devhandle = pbm->devhandle;
 	entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
 	dma_4v_iommu_demap(&devhandle, entry, npages);
-	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
 	order = get_order(size);
 	if (order < 10)
 		free_pages((unsigned long)cpu, order);
@@ -256,7 +256,7 @@
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
 
-	if (unlikely(entry == DMA_ERROR_CODE))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		goto bad;
 
 	bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
@@ -288,7 +288,7 @@
 	return DMA_ERROR_CODE;
 
 iommu_map_fail:
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 	return DMA_ERROR_CODE;
 }
 
@@ -317,7 +317,7 @@
 	bus_addr &= IO_PAGE_MASK;
 	entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
 	dma_4v_iommu_demap(&devhandle, entry, npages);
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 }
 
 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -376,7 +376,7 @@
 					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
-		if (unlikely(entry == DMA_ERROR_CODE)) {
+		if (unlikely(entry == IOMMU_ERROR_CODE)) {
 			if (printk_ratelimit())
 				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
 				       " npages %lx\n", iommu, paddr, npages);
@@ -451,7 +451,7 @@
 			npages = iommu_num_pages(s->dma_address, s->dma_length,
 						 IO_PAGE_SIZE);
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
-					     DMA_ERROR_CODE);
+					     IOMMU_ERROR_CODE);
 			/* XXX demap? XXX */
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -496,7 +496,7 @@
 		entry = ((dma_handle - tbl->table_map_base) >> shift);
 		dma_4v_iommu_demap(&devhandle, entry, npages);
 		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
-				     DMA_ERROR_CODE);
+				     IOMMU_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index 62098a8..d89e97b 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -436,24 +436,26 @@
 int handle_ldf_stq(u32 insn, struct pt_regs *regs)
 {
 	unsigned long addr = compute_effective_address(regs, insn, 0);
-	int freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+	int freg;
 	struct fpustate *f = FPUSTATE;
 	int asi = decode_asi(insn, regs);
-	int flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+	int flag;
 
 	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
 
 	save_and_clear_fpu();
 	current_thread_info()->xfsr[0] &= ~0x1c000;
-	if (freg & 3) {
-		current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */;
-		do_fpother(regs);
-		return 0;
-	}
 	if (insn & 0x200000) {
 		/* STQ */
 		u64 first = 0, second = 0;
 		
+		freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+		flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+		if (freg & 3) {
+			current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */;
+			do_fpother(regs);
+			return 0;
+		}
 		if (current_thread_info()->fpsaved[0] & flag) {
 			first = *(u64 *)&f->regs[freg];
 			second = *(u64 *)&f->regs[freg+2];
@@ -513,6 +515,12 @@
 		case 0x100000: size = 4; break;
 		default: size = 2; break;
 		}
+		if (size == 1)
+			freg = (insn >> 25) & 0x1f;
+		else
+			freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+		flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+
 		for (i = 0; i < size; i++)
 			data[i] = 0;
 		
diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S
index a063d84..62c2647b 100644
--- a/arch/sparc/lib/VISsave.S
+++ b/arch/sparc/lib/VISsave.S
@@ -6,24 +6,23 @@
  * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/linkage.h>
+
 #include <asm/asi.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
 
-	.text
-	.globl		VISenter, VISenterhalf
-
 	/* On entry: %o5=current FPRS value, %g7 is callers address */
 	/* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
 
 	/* Nothing special need be done here to handle pre-emption, this
 	 * FPU save/restore mechanism is already preemption safe.
 	 */
-
+	.text
 	.align		32
-VISenter:
+ENTRY(VISenter)
 	ldub		[%g6 + TI_FPDEPTH], %g1
 	brnz,a,pn	%g1, 1f
 	 cmp		%g1, 1
@@ -79,3 +78,4 @@
 	.align		32
 80:	jmpl		%g7 + %g0, %g0
 	 nop
+ENDPROC(VISenter)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 4ac88b7..3025bd5 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -93,6 +93,8 @@
 static struct linux_prom64_registers pavail[MAX_BANKS];
 static int pavail_ents;
 
+u64 numa_latency[MAX_NUMNODES][MAX_NUMNODES];
+
 static int cmp_p64(const void *a, const void *b)
 {
 	const struct linux_prom64_registers *x = a, *y = b;
@@ -1157,6 +1159,48 @@
 	return NULL;
 }
 
+int __node_distance(int from, int to)
+{
+	if ((from >= MAX_NUMNODES) || (to >= MAX_NUMNODES)) {
+		pr_warn("Returning default NUMA distance value for %d->%d\n",
+			from, to);
+		return (from == to) ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+	}
+	return numa_latency[from][to];
+}
+
+static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
+{
+	int i;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		struct node_mem_mask *n = &node_masks[i];
+
+		if ((grp->mask == n->mask) && (grp->match == n->val))
+			break;
+	}
+	return i;
+}
+
+static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp,
+					  int index)
+{
+	u64 arc;
+
+	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
+		int tnode;
+		u64 target = mdesc_arc_target(md, arc);
+		struct mdesc_mlgroup *m = find_mlgroup(target);
+
+		if (!m)
+			continue;
+		tnode = find_best_numa_node_for_mlgroup(m);
+		if (tnode == MAX_NUMNODES)
+			continue;
+		numa_latency[index][tnode] = m->latency;
+	}
+}
+
 static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
 				      int index)
 {
@@ -1220,9 +1264,16 @@
 static int __init numa_parse_mdesc(void)
 {
 	struct mdesc_handle *md = mdesc_grab();
-	int i, err, count;
+	int i, j, err, count;
 	u64 node;
 
+	/* Some sane defaults for numa latency values */
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		for (j = 0; j < MAX_NUMNODES; j++)
+			numa_latency[i][j] = (i == j) ?
+				LOCAL_DISTANCE : REMOTE_DISTANCE;
+	}
+
 	node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
 	if (node == MDESC_NODE_NULL) {
 		mdesc_release(md);
@@ -1245,6 +1296,23 @@
 		count++;
 	}
 
+	count = 0;
+	mdesc_for_each_node_by_name(md, node, "group") {
+		find_numa_latencies_for_group(md, node, count);
+		count++;
+	}
+
+	/* Normalize numa latency matrix according to ACPI SLIT spec. */
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		u64 self_latency = numa_latency[i][i];
+
+		for (j = 0; j < MAX_NUMNODES; j++) {
+			numa_latency[i][j] =
+				(numa_latency[i][j] * LOCAL_DISTANCE) /
+				self_latency;
+		}
+	}
+
 	add_node_ranges();
 
 	for (i = 0; i < num_node_masks; i++) {
diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h
index bbced83..376a27c 100644
--- a/include/linux/iommu-common.h
+++ b/include/linux/iommu-common.h
@@ -7,6 +7,7 @@
 
 #define IOMMU_POOL_HASHBITS     4
 #define IOMMU_NR_POOLS          (1 << IOMMU_POOL_HASHBITS)
+#define IOMMU_ERROR_CODE	(~(unsigned long) 0)
 
 struct iommu_pool {
 	unsigned long	start;
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
index b1c93e9..858dc1a 100644
--- a/lib/iommu-common.c
+++ b/lib/iommu-common.c
@@ -11,10 +11,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/hash.h>
 
-#ifndef	DMA_ERROR_CODE
-#define	DMA_ERROR_CODE (~(dma_addr_t)0x0)
-#endif
-
 static unsigned long iommu_large_alloc = 15;
 
 static	DEFINE_PER_CPU(unsigned int, iommu_hash_common);
@@ -123,7 +119,7 @@
 	/* Sanity check */
 	if (unlikely(npages == 0)) {
 		WARN_ON_ONCE(1);
-		return DMA_ERROR_CODE;
+		return IOMMU_ERROR_CODE;
 	}
 
 	if (largealloc) {
@@ -206,7 +202,7 @@
 			goto again;
 		} else {
 			/* give up */
-			n = DMA_ERROR_CODE;
+			n = IOMMU_ERROR_CODE;
 			goto bail;
 		}
 	}
@@ -259,7 +255,7 @@
 	unsigned long flags;
 	unsigned long shift = iommu->table_shift;
 
-	if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */
+	if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */
 		entry = (dma_addr - iommu->table_map_base) >> shift;
 	pool = get_pool(iommu, entry);