libnvdimm, pfn: add 'align' attribute, default to HPAGE_SIZE

When setting aside capacity for struct page it must be aligned to the
largest mapping size that is to be made available via DAX.  Make the
alignment configurable to enable support for 1GiB page-size mappings.

The offset for PFN_MODE_RAM may now be larger than SZ_8K, so fixup the
offset check in nvdimm_namespace_attach_pfn().

Reported-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 2ce428e..e4e9f9a 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -146,6 +146,7 @@
 	int id;
 	u8 *uuid;
 	struct device dev;
+	unsigned long align;
 	unsigned long npfns;
 	enum nd_pfn_mode mode;
 	struct nd_pfn_sb *pfn_sb;
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 613ffcca..95ecd7b 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -103,6 +103,52 @@
 }
 static DEVICE_ATTR_RW(mode);
 
+static ssize_t align_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+	return sprintf(buf, "%lx\n", nd_pfn->align);
+}
+
+static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf)
+{
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	if (!is_power_of_2(val) || val < PAGE_SIZE || val > SZ_1G)
+		return -EINVAL;
+
+	if (nd_pfn->dev.driver)
+		return -EBUSY;
+	else
+		nd_pfn->align = val;
+
+	return 0;
+}
+
+static ssize_t align_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	rc = __align_store(nd_pfn, buf);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(align);
+
 static ssize_t uuid_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -164,6 +210,7 @@
 	&dev_attr_mode.attr,
 	&dev_attr_namespace.attr,
 	&dev_attr_uuid.attr,
+	&dev_attr_align.attr,
 	NULL,
 };
 
@@ -199,6 +246,7 @@
 	}
 
 	nd_pfn->mode = PFN_MODE_NONE;
+	nd_pfn->align = HPAGE_SIZE;
 	dev = &nd_pfn->dev;
 	dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id);
 	dev->parent = &nd_region->dev;
@@ -269,6 +317,12 @@
 			return -EINVAL;
 	}
 
+	if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
+		dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
+				nd_pfn->align, nvdimm_namespace_capacity(ndns));
+		return -EINVAL;
+	}
+
 	/*
 	 * These warnings are verbose because they can only trigger in
 	 * the case where the physical address alignment of the
@@ -283,6 +337,13 @@
 		return -EBUSY;
 	}
 
+	nd_pfn->align = 1UL << ilog2(offset);
+	if (!is_power_of_2(offset) || offset < PAGE_SIZE) {
+		dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
+				offset);
+		return -ENXIO;
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(nd_pfn_validate);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 520c003..5ba351e 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -258,9 +258,9 @@
 	 * ->direct_access() to those that are included in the memmap.
 	 */
 	if (nd_pfn->mode == PFN_MODE_PMEM)
-		offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE);
+		offset = ALIGN(SZ_8K + 64 * npfns, nd_pfn->align);
 	else if (nd_pfn->mode == PFN_MODE_RAM)
-		offset = SZ_8K;
+		offset = ALIGN(SZ_8K, nd_pfn->align);
 	else
 		goto err;
 
@@ -325,7 +325,7 @@
 	offset = le64_to_cpu(pfn_sb->dataoff);
 	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
 	if (nd_pfn->mode == PFN_MODE_RAM) {
-		if (offset != SZ_8K)
+		if (offset < SZ_8K)
 			return -EINVAL;
 		nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
 		altmap = NULL;