Matias Bjørling | ca06408 | 2015-10-29 17:57:29 +0900 | [diff] [blame^] | 1 | /* |
| 2 | * nvme-lightnvm.c - LightNVM NVMe device |
| 3 | * |
| 4 | * Copyright (C) 2014-2015 IT University of Copenhagen |
| 5 | * Initial release: Matias Bjorling <mb@lightnvm.io> |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU General Public License version |
| 9 | * 2 as published by the Free Software Foundation. |
| 10 | * |
| 11 | * This program is distributed in the hope that it will be useful, but |
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU General Public License |
| 17 | * along with this program; see the file COPYING. If not, write to |
| 18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, |
| 19 | * USA. |
| 20 | * |
| 21 | */ |
| 22 | |
| 23 | #include "nvme.h" |
| 24 | |
| 25 | #ifdef CONFIG_NVM |
| 26 | |
| 27 | #include <linux/nvme.h> |
| 28 | #include <linux/bitops.h> |
| 29 | #include <linux/lightnvm.h> |
| 30 | #include <linux/vmalloc.h> |
| 31 | |
| 32 | enum nvme_nvm_admin_opcode { |
| 33 | nvme_nvm_admin_identity = 0xe2, |
| 34 | nvme_nvm_admin_get_l2p_tbl = 0xea, |
| 35 | nvme_nvm_admin_get_bb_tbl = 0xf2, |
| 36 | nvme_nvm_admin_set_bb_tbl = 0xf1, |
| 37 | }; |
| 38 | |
| 39 | struct nvme_nvm_hb_rw { |
| 40 | __u8 opcode; |
| 41 | __u8 flags; |
| 42 | __u16 command_id; |
| 43 | __le32 nsid; |
| 44 | __u64 rsvd2; |
| 45 | __le64 metadata; |
| 46 | __le64 prp1; |
| 47 | __le64 prp2; |
| 48 | __le64 spba; |
| 49 | __le16 length; |
| 50 | __le16 control; |
| 51 | __le32 dsmgmt; |
| 52 | __le64 slba; |
| 53 | }; |
| 54 | |
| 55 | struct nvme_nvm_ph_rw { |
| 56 | __u8 opcode; |
| 57 | __u8 flags; |
| 58 | __u16 command_id; |
| 59 | __le32 nsid; |
| 60 | __u64 rsvd2; |
| 61 | __le64 metadata; |
| 62 | __le64 prp1; |
| 63 | __le64 prp2; |
| 64 | __le64 spba; |
| 65 | __le16 length; |
| 66 | __le16 control; |
| 67 | __le32 dsmgmt; |
| 68 | __le64 resv; |
| 69 | }; |
| 70 | |
| 71 | struct nvme_nvm_identity { |
| 72 | __u8 opcode; |
| 73 | __u8 flags; |
| 74 | __u16 command_id; |
| 75 | __le32 nsid; |
| 76 | __u64 rsvd[2]; |
| 77 | __le64 prp1; |
| 78 | __le64 prp2; |
| 79 | __le32 chnl_off; |
| 80 | __u32 rsvd11[5]; |
| 81 | }; |
| 82 | |
| 83 | struct nvme_nvm_l2ptbl { |
| 84 | __u8 opcode; |
| 85 | __u8 flags; |
| 86 | __u16 command_id; |
| 87 | __le32 nsid; |
| 88 | __le32 cdw2[4]; |
| 89 | __le64 prp1; |
| 90 | __le64 prp2; |
| 91 | __le64 slba; |
| 92 | __le32 nlb; |
| 93 | __le16 cdw14[6]; |
| 94 | }; |
| 95 | |
| 96 | struct nvme_nvm_bbtbl { |
| 97 | __u8 opcode; |
| 98 | __u8 flags; |
| 99 | __u16 command_id; |
| 100 | __le32 nsid; |
| 101 | __u64 rsvd[2]; |
| 102 | __le64 prp1; |
| 103 | __le64 prp2; |
| 104 | __le32 prp1_len; |
| 105 | __le32 prp2_len; |
| 106 | __le32 lbb; |
| 107 | __u32 rsvd11[3]; |
| 108 | }; |
| 109 | |
| 110 | struct nvme_nvm_erase_blk { |
| 111 | __u8 opcode; |
| 112 | __u8 flags; |
| 113 | __u16 command_id; |
| 114 | __le32 nsid; |
| 115 | __u64 rsvd[2]; |
| 116 | __le64 prp1; |
| 117 | __le64 prp2; |
| 118 | __le64 spba; |
| 119 | __le16 length; |
| 120 | __le16 control; |
| 121 | __le32 dsmgmt; |
| 122 | __le64 resv; |
| 123 | }; |
| 124 | |
| 125 | struct nvme_nvm_command { |
| 126 | union { |
| 127 | struct nvme_common_command common; |
| 128 | struct nvme_nvm_identity identity; |
| 129 | struct nvme_nvm_hb_rw hb_rw; |
| 130 | struct nvme_nvm_ph_rw ph_rw; |
| 131 | struct nvme_nvm_l2ptbl l2p; |
| 132 | struct nvme_nvm_bbtbl get_bb; |
| 133 | struct nvme_nvm_bbtbl set_bb; |
| 134 | struct nvme_nvm_erase_blk erase; |
| 135 | }; |
| 136 | }; |
| 137 | |
| 138 | struct nvme_nvm_id_group { |
| 139 | __u8 mtype; |
| 140 | __u8 fmtype; |
| 141 | __le16 res16; |
| 142 | __u8 num_ch; |
| 143 | __u8 num_lun; |
| 144 | __u8 num_pln; |
| 145 | __le16 num_blk; |
| 146 | __le16 num_pg; |
| 147 | __le16 fpg_sz; |
| 148 | __le16 csecs; |
| 149 | __le16 sos; |
| 150 | __le32 trdt; |
| 151 | __le32 trdm; |
| 152 | __le32 tprt; |
| 153 | __le32 tprm; |
| 154 | __le32 tbet; |
| 155 | __le32 tbem; |
| 156 | __le32 mpos; |
| 157 | __le16 cpar; |
| 158 | __u8 reserved[913]; |
| 159 | } __packed; |
| 160 | |
| 161 | struct nvme_nvm_addr_format { |
| 162 | __u8 ch_offset; |
| 163 | __u8 ch_len; |
| 164 | __u8 lun_offset; |
| 165 | __u8 lun_len; |
| 166 | __u8 pln_offset; |
| 167 | __u8 pln_len; |
| 168 | __u8 blk_offset; |
| 169 | __u8 blk_len; |
| 170 | __u8 pg_offset; |
| 171 | __u8 pg_len; |
| 172 | __u8 sect_offset; |
| 173 | __u8 sect_len; |
| 174 | __u8 res[4]; |
| 175 | } __packed; |
| 176 | |
| 177 | struct nvme_nvm_id { |
| 178 | __u8 ver_id; |
| 179 | __u8 vmnt; |
| 180 | __u8 cgrps; |
| 181 | __u8 res[5]; |
| 182 | __le32 cap; |
| 183 | __le32 dom; |
| 184 | struct nvme_nvm_addr_format ppaf; |
| 185 | __u8 ppat; |
| 186 | __u8 resv[223]; |
| 187 | struct nvme_nvm_id_group groups[4]; |
| 188 | } __packed; |
| 189 | |
| 190 | /* |
| 191 | * Check we didn't inadvertently grow the command struct |
| 192 | */ |
| 193 | static inline void _nvme_nvm_check_size(void) |
| 194 | { |
| 195 | BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); |
| 196 | BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64); |
| 197 | BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); |
| 198 | BUILD_BUG_ON(sizeof(struct nvme_nvm_bbtbl) != 64); |
| 199 | BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64); |
| 200 | BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); |
| 201 | BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); |
| 202 | BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128); |
| 203 | BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); |
| 204 | } |
| 205 | |
| 206 | static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) |
| 207 | { |
| 208 | struct nvme_nvm_id_group *src; |
| 209 | struct nvm_id_group *dst; |
| 210 | int i, end; |
| 211 | |
| 212 | end = min_t(u32, 4, nvm_id->cgrps); |
| 213 | |
| 214 | for (i = 0; i < end; i++) { |
| 215 | src = &nvme_nvm_id->groups[i]; |
| 216 | dst = &nvm_id->groups[i]; |
| 217 | |
| 218 | dst->mtype = src->mtype; |
| 219 | dst->fmtype = src->fmtype; |
| 220 | dst->num_ch = src->num_ch; |
| 221 | dst->num_lun = src->num_lun; |
| 222 | dst->num_pln = src->num_pln; |
| 223 | |
| 224 | dst->num_pg = le16_to_cpu(src->num_pg); |
| 225 | dst->num_blk = le16_to_cpu(src->num_blk); |
| 226 | dst->fpg_sz = le16_to_cpu(src->fpg_sz); |
| 227 | dst->csecs = le16_to_cpu(src->csecs); |
| 228 | dst->sos = le16_to_cpu(src->sos); |
| 229 | |
| 230 | dst->trdt = le32_to_cpu(src->trdt); |
| 231 | dst->trdm = le32_to_cpu(src->trdm); |
| 232 | dst->tprt = le32_to_cpu(src->tprt); |
| 233 | dst->tprm = le32_to_cpu(src->tprm); |
| 234 | dst->tbet = le32_to_cpu(src->tbet); |
| 235 | dst->tbem = le32_to_cpu(src->tbem); |
| 236 | dst->mpos = le32_to_cpu(src->mpos); |
| 237 | |
| 238 | dst->cpar = le16_to_cpu(src->cpar); |
| 239 | } |
| 240 | |
| 241 | return 0; |
| 242 | } |
| 243 | |
| 244 | static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id) |
| 245 | { |
| 246 | struct nvme_ns *ns = q->queuedata; |
| 247 | struct nvme_nvm_id *nvme_nvm_id; |
| 248 | struct nvme_nvm_command c = {}; |
| 249 | int ret; |
| 250 | |
| 251 | c.identity.opcode = nvme_nvm_admin_identity; |
| 252 | c.identity.nsid = cpu_to_le32(ns->ns_id); |
| 253 | c.identity.chnl_off = 0; |
| 254 | |
| 255 | nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL); |
| 256 | if (!nvme_nvm_id) |
| 257 | return -ENOMEM; |
| 258 | |
| 259 | ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, nvme_nvm_id, |
| 260 | sizeof(struct nvme_nvm_id)); |
| 261 | if (ret) { |
| 262 | ret = -EIO; |
| 263 | goto out; |
| 264 | } |
| 265 | |
| 266 | nvm_id->ver_id = nvme_nvm_id->ver_id; |
| 267 | nvm_id->vmnt = nvme_nvm_id->vmnt; |
| 268 | nvm_id->cgrps = nvme_nvm_id->cgrps; |
| 269 | nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); |
| 270 | nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); |
| 271 | |
| 272 | ret = init_grps(nvm_id, nvme_nvm_id); |
| 273 | out: |
| 274 | kfree(nvme_nvm_id); |
| 275 | return ret; |
| 276 | } |
| 277 | |
| 278 | static int nvme_nvm_get_l2p_tbl(struct request_queue *q, u64 slba, u32 nlb, |
| 279 | nvm_l2p_update_fn *update_l2p, void *priv) |
| 280 | { |
| 281 | struct nvme_ns *ns = q->queuedata; |
| 282 | struct nvme_dev *dev = ns->dev; |
| 283 | struct nvme_nvm_command c = {}; |
| 284 | u32 len = queue_max_hw_sectors(q) << 9; |
| 285 | u64 nlb_pr_rq = len / sizeof(u64); |
| 286 | u64 cmd_slba = slba; |
| 287 | void *entries; |
| 288 | int ret = 0; |
| 289 | |
| 290 | c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl; |
| 291 | c.l2p.nsid = cpu_to_le32(ns->ns_id); |
| 292 | entries = kmalloc(len, GFP_KERNEL); |
| 293 | if (!entries) |
| 294 | return -ENOMEM; |
| 295 | |
| 296 | while (nlb) { |
| 297 | u32 cmd_nlb = min_t(u32, nlb_pr_rq, nlb); |
| 298 | |
| 299 | c.l2p.slba = cpu_to_le64(cmd_slba); |
| 300 | c.l2p.nlb = cpu_to_le32(cmd_nlb); |
| 301 | |
| 302 | ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, |
| 303 | entries, len); |
| 304 | if (ret) { |
| 305 | dev_err(dev->dev, "L2P table transfer failed (%d)\n", |
| 306 | ret); |
| 307 | ret = -EIO; |
| 308 | goto out; |
| 309 | } |
| 310 | |
| 311 | if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { |
| 312 | ret = -EINTR; |
| 313 | goto out; |
| 314 | } |
| 315 | |
| 316 | cmd_slba += cmd_nlb; |
| 317 | nlb -= cmd_nlb; |
| 318 | } |
| 319 | |
| 320 | out: |
| 321 | kfree(entries); |
| 322 | return ret; |
| 323 | } |
| 324 | |
| 325 | static int nvme_nvm_get_bb_tbl(struct request_queue *q, int lunid, |
| 326 | unsigned int nr_blocks, |
| 327 | nvm_bb_update_fn *update_bbtbl, void *priv) |
| 328 | { |
| 329 | struct nvme_ns *ns = q->queuedata; |
| 330 | struct nvme_dev *dev = ns->dev; |
| 331 | struct nvme_nvm_command c = {}; |
| 332 | void *bb_bitmap; |
| 333 | u16 bb_bitmap_size; |
| 334 | int ret = 0; |
| 335 | |
| 336 | c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl; |
| 337 | c.get_bb.nsid = cpu_to_le32(ns->ns_id); |
| 338 | c.get_bb.lbb = cpu_to_le32(lunid); |
| 339 | bb_bitmap_size = ((nr_blocks >> 15) + 1) * PAGE_SIZE; |
| 340 | bb_bitmap = kmalloc(bb_bitmap_size, GFP_KERNEL); |
| 341 | if (!bb_bitmap) |
| 342 | return -ENOMEM; |
| 343 | |
| 344 | bitmap_zero(bb_bitmap, nr_blocks); |
| 345 | |
| 346 | ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_bitmap, |
| 347 | bb_bitmap_size); |
| 348 | if (ret) { |
| 349 | dev_err(dev->dev, "get bad block table failed (%d)\n", ret); |
| 350 | ret = -EIO; |
| 351 | goto out; |
| 352 | } |
| 353 | |
| 354 | ret = update_bbtbl(lunid, bb_bitmap, nr_blocks, priv); |
| 355 | if (ret) { |
| 356 | ret = -EINTR; |
| 357 | goto out; |
| 358 | } |
| 359 | |
| 360 | out: |
| 361 | kfree(bb_bitmap); |
| 362 | return ret; |
| 363 | } |
| 364 | |
| 365 | static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, |
| 366 | struct nvme_ns *ns, struct nvme_nvm_command *c) |
| 367 | { |
| 368 | c->ph_rw.opcode = rqd->opcode; |
| 369 | c->ph_rw.nsid = cpu_to_le32(ns->ns_id); |
| 370 | c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa); |
| 371 | c->ph_rw.control = cpu_to_le16(rqd->flags); |
| 372 | c->ph_rw.length = cpu_to_le16(rqd->nr_pages - 1); |
| 373 | |
| 374 | if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD) |
| 375 | c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns, |
| 376 | rqd->bio->bi_iter.bi_sector)); |
| 377 | } |
| 378 | |
| 379 | static void nvme_nvm_end_io(struct request *rq, int error) |
| 380 | { |
| 381 | struct nvm_rq *rqd = rq->end_io_data; |
| 382 | struct nvm_dev *dev = rqd->dev; |
| 383 | |
| 384 | if (dev->mt->end_io(rqd, error)) |
| 385 | pr_err("nvme: err status: %x result: %lx\n", |
| 386 | rq->errors, (unsigned long)rq->special); |
| 387 | |
| 388 | kfree(rq->cmd); |
| 389 | blk_mq_free_request(rq); |
| 390 | } |
| 391 | |
| 392 | static int nvme_nvm_submit_io(struct request_queue *q, struct nvm_rq *rqd) |
| 393 | { |
| 394 | struct nvme_ns *ns = q->queuedata; |
| 395 | struct request *rq; |
| 396 | struct bio *bio = rqd->bio; |
| 397 | struct nvme_nvm_command *cmd; |
| 398 | |
| 399 | rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0); |
| 400 | if (IS_ERR(rq)) |
| 401 | return -ENOMEM; |
| 402 | |
| 403 | cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); |
| 404 | if (!cmd) { |
| 405 | blk_mq_free_request(rq); |
| 406 | return -ENOMEM; |
| 407 | } |
| 408 | |
| 409 | rq->cmd_type = REQ_TYPE_DRV_PRIV; |
| 410 | rq->ioprio = bio_prio(bio); |
| 411 | |
| 412 | if (bio_has_data(bio)) |
| 413 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
| 414 | |
| 415 | rq->__data_len = bio->bi_iter.bi_size; |
| 416 | rq->bio = rq->biotail = bio; |
| 417 | |
| 418 | nvme_nvm_rqtocmd(rq, rqd, ns, cmd); |
| 419 | |
| 420 | rq->cmd = (unsigned char *)cmd; |
| 421 | rq->cmd_len = sizeof(struct nvme_nvm_command); |
| 422 | rq->special = (void *)0; |
| 423 | |
| 424 | rq->end_io_data = rqd; |
| 425 | |
| 426 | blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io); |
| 427 | |
| 428 | return 0; |
| 429 | } |
| 430 | |
| 431 | static int nvme_nvm_erase_block(struct request_queue *q, struct nvm_rq *rqd) |
| 432 | { |
| 433 | struct nvme_ns *ns = q->queuedata; |
| 434 | struct nvme_nvm_command c = {}; |
| 435 | |
| 436 | c.erase.opcode = NVM_OP_ERASE; |
| 437 | c.erase.nsid = cpu_to_le32(ns->ns_id); |
| 438 | c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa); |
| 439 | c.erase.length = cpu_to_le16(rqd->nr_pages - 1); |
| 440 | |
| 441 | return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); |
| 442 | } |
| 443 | |
| 444 | static void *nvme_nvm_create_dma_pool(struct request_queue *q, char *name) |
| 445 | { |
| 446 | struct nvme_ns *ns = q->queuedata; |
| 447 | struct nvme_dev *dev = ns->dev; |
| 448 | |
| 449 | return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0); |
| 450 | } |
| 451 | |
| 452 | static void nvme_nvm_destroy_dma_pool(void *pool) |
| 453 | { |
| 454 | struct dma_pool *dma_pool = pool; |
| 455 | |
| 456 | dma_pool_destroy(dma_pool); |
| 457 | } |
| 458 | |
| 459 | static void *nvme_nvm_dev_dma_alloc(struct request_queue *q, void *pool, |
| 460 | gfp_t mem_flags, dma_addr_t *dma_handler) |
| 461 | { |
| 462 | return dma_pool_alloc(pool, mem_flags, dma_handler); |
| 463 | } |
| 464 | |
| 465 | static void nvme_nvm_dev_dma_free(void *pool, void *ppa_list, |
| 466 | dma_addr_t dma_handler) |
| 467 | { |
| 468 | dma_pool_free(pool, ppa_list, dma_handler); |
| 469 | } |
| 470 | |
| 471 | static struct nvm_dev_ops nvme_nvm_dev_ops = { |
| 472 | .identity = nvme_nvm_identity, |
| 473 | |
| 474 | .get_l2p_tbl = nvme_nvm_get_l2p_tbl, |
| 475 | |
| 476 | .get_bb_tbl = nvme_nvm_get_bb_tbl, |
| 477 | |
| 478 | .submit_io = nvme_nvm_submit_io, |
| 479 | .erase_block = nvme_nvm_erase_block, |
| 480 | |
| 481 | .create_dma_pool = nvme_nvm_create_dma_pool, |
| 482 | .destroy_dma_pool = nvme_nvm_destroy_dma_pool, |
| 483 | .dev_dma_alloc = nvme_nvm_dev_dma_alloc, |
| 484 | .dev_dma_free = nvme_nvm_dev_dma_free, |
| 485 | |
| 486 | .max_phys_sect = 64, |
| 487 | }; |
| 488 | |
| 489 | int nvme_nvm_register(struct request_queue *q, char *disk_name) |
| 490 | { |
| 491 | return nvm_register(q, disk_name, &nvme_nvm_dev_ops); |
| 492 | } |
| 493 | |
| 494 | void nvme_nvm_unregister(struct request_queue *q, char *disk_name) |
| 495 | { |
| 496 | nvm_unregister(disk_name); |
| 497 | } |
| 498 | |
| 499 | int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) |
| 500 | { |
| 501 | struct nvme_dev *dev = ns->dev; |
| 502 | struct pci_dev *pdev = to_pci_dev(dev->dev); |
| 503 | |
| 504 | /* QEMU NVMe simulator - PCI ID + Vendor specific bit */ |
| 505 | if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == 0x5845 && |
| 506 | id->vs[0] == 0x1) |
| 507 | return 1; |
| 508 | |
| 509 | /* CNEX Labs - PCI ID + Vendor specific bit */ |
| 510 | if (pdev->vendor == 0x1d1d && pdev->device == 0x2807 && |
| 511 | id->vs[0] == 0x1) |
| 512 | return 1; |
| 513 | |
| 514 | return 0; |
| 515 | } |
| 516 | #else |
| 517 | int nvme_nvm_register(struct request_queue *q, char *disk_name) |
| 518 | { |
| 519 | return 0; |
| 520 | } |
| 521 | void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {}; |
| 522 | int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) |
| 523 | { |
| 524 | return 0; |
| 525 | } |
| 526 | #endif /* CONFIG_NVM */ |