| /* |
| * Copyright (C) 2015, SUSE |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2, or (at your option) |
| * any later version. |
| * |
| */ |
| |
| |
| #include <linux/module.h> |
| #include <linux/dlm.h> |
| #include <linux/sched.h> |
| #include "md.h" |
| #include "bitmap.h" |
| #include "md-cluster.h" |
| |
| #define LVB_SIZE 64 |
| |
| struct dlm_lock_resource { |
| dlm_lockspace_t *ls; |
| struct dlm_lksb lksb; |
| char *name; /* lock name. */ |
| uint32_t flags; /* flags to pass to dlm_lock() */ |
| struct completion completion; /* completion for synchronized locking */ |
| void (*bast)(void *arg, int mode); /* blocking AST function pointer*/ |
| struct mddev *mddev; /* pointing back to mddev. */ |
| }; |
| |
| struct suspend_info { |
| int slot; |
| sector_t lo; |
| sector_t hi; |
| struct list_head list; |
| }; |
| |
| struct resync_info { |
| __le64 lo; |
| __le64 hi; |
| }; |
| |
| struct md_cluster_info { |
| /* dlm lock space and resources for clustered raid. */ |
| dlm_lockspace_t *lockspace; |
| int slot_number; |
| struct completion completion; |
| struct dlm_lock_resource *sb_lock; |
| struct mutex sb_mutex; |
| struct dlm_lock_resource *bitmap_lockres; |
| struct list_head suspend_list; |
| spinlock_t suspend_lock; |
| struct md_thread *recovery_thread; |
| unsigned long recovery_map; |
| }; |
| |
| static void sync_ast(void *arg) |
| { |
| struct dlm_lock_resource *res; |
| |
| res = (struct dlm_lock_resource *) arg; |
| complete(&res->completion); |
| } |
| |
| static int dlm_lock_sync(struct dlm_lock_resource *res, int mode) |
| { |
| int ret = 0; |
| |
| init_completion(&res->completion); |
| ret = dlm_lock(res->ls, mode, &res->lksb, |
| res->flags, res->name, strlen(res->name), |
| 0, sync_ast, res, res->bast); |
| if (ret) |
| return ret; |
| wait_for_completion(&res->completion); |
| return res->lksb.sb_status; |
| } |
| |
| static int dlm_unlock_sync(struct dlm_lock_resource *res) |
| { |
| return dlm_lock_sync(res, DLM_LOCK_NL); |
| } |
| |
| static struct dlm_lock_resource *lockres_init(struct mddev *mddev, |
| char *name, void (*bastfn)(void *arg, int mode), int with_lvb) |
| { |
| struct dlm_lock_resource *res = NULL; |
| int ret, namelen; |
| struct md_cluster_info *cinfo = mddev->cluster_info; |
| |
| res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL); |
| if (!res) |
| return NULL; |
| res->ls = cinfo->lockspace; |
| res->mddev = mddev; |
| namelen = strlen(name); |
| res->name = kzalloc(namelen + 1, GFP_KERNEL); |
| if (!res->name) { |
| pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name); |
| goto out_err; |
| } |
| strlcpy(res->name, name, namelen + 1); |
| if (with_lvb) { |
| res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL); |
| if (!res->lksb.sb_lvbptr) { |
| pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name); |
| goto out_err; |
| } |
| res->flags = DLM_LKF_VALBLK; |
| } |
| |
| if (bastfn) |
| res->bast = bastfn; |
| |
| res->flags |= DLM_LKF_EXPEDITE; |
| |
| ret = dlm_lock_sync(res, DLM_LOCK_NL); |
| if (ret) { |
| pr_err("md-cluster: Unable to lock NL on new lock resource %s\n", name); |
| goto out_err; |
| } |
| res->flags &= ~DLM_LKF_EXPEDITE; |
| res->flags |= DLM_LKF_CONVERT; |
| |
| return res; |
| out_err: |
| kfree(res->lksb.sb_lvbptr); |
| kfree(res->name); |
| kfree(res); |
| return NULL; |
| } |
| |
| static void lockres_free(struct dlm_lock_resource *res) |
| { |
| if (!res) |
| return; |
| |
| init_completion(&res->completion); |
| dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res); |
| wait_for_completion(&res->completion); |
| |
| kfree(res->name); |
| kfree(res->lksb.sb_lvbptr); |
| kfree(res); |
| } |
| |
| static char *pretty_uuid(char *dest, char *src) |
| { |
| int i, len = 0; |
| |
| for (i = 0; i < 16; i++) { |
| if (i == 4 || i == 6 || i == 8 || i == 10) |
| len += sprintf(dest + len, "-"); |
| len += sprintf(dest + len, "%02x", (__u8)src[i]); |
| } |
| return dest; |
| } |
| |
| static void add_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres, |
| sector_t lo, sector_t hi) |
| { |
| struct resync_info *ri; |
| |
| ri = (struct resync_info *)lockres->lksb.sb_lvbptr; |
| ri->lo = cpu_to_le64(lo); |
| ri->hi = cpu_to_le64(hi); |
| } |
| |
| static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres) |
| { |
| struct resync_info ri; |
| struct suspend_info *s = NULL; |
| sector_t hi = 0; |
| |
| dlm_lock_sync(lockres, DLM_LOCK_CR); |
| memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info)); |
| hi = le64_to_cpu(ri.hi); |
| if (ri.hi > 0) { |
| s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL); |
| if (!s) |
| goto out; |
| s->hi = hi; |
| s->lo = le64_to_cpu(ri.lo); |
| } |
| dlm_unlock_sync(lockres); |
| out: |
| return s; |
| } |
| |
| void recover_bitmaps(struct md_thread *thread) |
| { |
| struct mddev *mddev = thread->mddev; |
| struct md_cluster_info *cinfo = mddev->cluster_info; |
| struct dlm_lock_resource *bm_lockres; |
| char str[64]; |
| int slot, ret; |
| struct suspend_info *s, *tmp; |
| sector_t lo, hi; |
| |
| while (cinfo->recovery_map) { |
| slot = fls64((u64)cinfo->recovery_map) - 1; |
| |
| /* Clear suspend_area associated with the bitmap */ |
| spin_lock_irq(&cinfo->suspend_lock); |
| list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) |
| if (slot == s->slot) { |
| list_del(&s->list); |
| kfree(s); |
| } |
| spin_unlock_irq(&cinfo->suspend_lock); |
| |
| snprintf(str, 64, "bitmap%04d", slot); |
| bm_lockres = lockres_init(mddev, str, NULL, 1); |
| if (!bm_lockres) { |
| pr_err("md-cluster: Cannot initialize bitmaps\n"); |
| goto clear_bit; |
| } |
| |
| ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); |
| if (ret) { |
| pr_err("md-cluster: Could not DLM lock %s: %d\n", |
| str, ret); |
| goto clear_bit; |
| } |
| ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi); |
| if (ret) { |
| pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); |
| goto dlm_unlock; |
| } |
| if (hi > 0) { |
| /* TODO:Wait for current resync to get over */ |
| set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| if (lo < mddev->recovery_cp) |
| mddev->recovery_cp = lo; |
| md_check_recovery(mddev); |
| } |
| dlm_unlock: |
| dlm_unlock_sync(bm_lockres); |
| clear_bit: |
| clear_bit(slot, &cinfo->recovery_map); |
| } |
| } |
| |
| static void recover_prep(void *arg) |
| { |
| } |
| |
| static void recover_slot(void *arg, struct dlm_slot *slot) |
| { |
| struct mddev *mddev = arg; |
| struct md_cluster_info *cinfo = mddev->cluster_info; |
| |
| pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n", |
| mddev->bitmap_info.cluster_name, |
| slot->nodeid, slot->slot, |
| cinfo->slot_number); |
| set_bit(slot->slot - 1, &cinfo->recovery_map); |
| if (!cinfo->recovery_thread) { |
| cinfo->recovery_thread = md_register_thread(recover_bitmaps, |
| mddev, "recover"); |
| if (!cinfo->recovery_thread) { |
| pr_warn("md-cluster: Could not create recovery thread\n"); |
| return; |
| } |
| } |
| md_wakeup_thread(cinfo->recovery_thread); |
| } |
| |
| static void recover_done(void *arg, struct dlm_slot *slots, |
| int num_slots, int our_slot, |
| uint32_t generation) |
| { |
| struct mddev *mddev = arg; |
| struct md_cluster_info *cinfo = mddev->cluster_info; |
| |
| cinfo->slot_number = our_slot; |
| complete(&cinfo->completion); |
| } |
| |
| static const struct dlm_lockspace_ops md_ls_ops = { |
| .recover_prep = recover_prep, |
| .recover_slot = recover_slot, |
| .recover_done = recover_done, |
| }; |
| |
| static int gather_all_resync_info(struct mddev *mddev, int total_slots) |
| { |
| struct md_cluster_info *cinfo = mddev->cluster_info; |
| int i, ret = 0; |
| struct dlm_lock_resource *bm_lockres; |
| struct suspend_info *s; |
| char str[64]; |
| |
| |
| for (i = 0; i < total_slots; i++) { |
| memset(str, '\0', 64); |
| snprintf(str, 64, "bitmap%04d", i); |
| bm_lockres = lockres_init(mddev, str, NULL, 1); |
| if (!bm_lockres) |
| return -ENOMEM; |
| if (i == (cinfo->slot_number - 1)) |
| continue; |
| |
| bm_lockres->flags |= DLM_LKF_NOQUEUE; |
| ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); |
| if (ret == -EAGAIN) { |
| memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE); |
| s = read_resync_info(mddev, bm_lockres); |
| if (s) { |
| pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n", |
| __func__, __LINE__, |
| (unsigned long long) s->lo, |
| (unsigned long long) s->hi, i); |
| spin_lock_irq(&cinfo->suspend_lock); |
| s->slot = i; |
| list_add(&s->list, &cinfo->suspend_list); |
| spin_unlock_irq(&cinfo->suspend_lock); |
| } |
| ret = 0; |
| lockres_free(bm_lockres); |
| continue; |
| } |
| if (ret) |
| goto out; |
| /* TODO: Read the disk bitmap sb and check if it needs recovery */ |
| dlm_unlock_sync(bm_lockres); |
| lockres_free(bm_lockres); |
| } |
| out: |
| return ret; |
| } |
| |
| static int join(struct mddev *mddev, int nodes) |
| { |
| struct md_cluster_info *cinfo; |
| int ret, ops_rv; |
| char str[64]; |
| |
| if (!try_module_get(THIS_MODULE)) |
| return -ENOENT; |
| |
| cinfo = kzalloc(sizeof(struct md_cluster_info), GFP_KERNEL); |
| if (!cinfo) |
| return -ENOMEM; |
| |
| init_completion(&cinfo->completion); |
| |
| mutex_init(&cinfo->sb_mutex); |
| mddev->cluster_info = cinfo; |
| |
| memset(str, 0, 64); |
| pretty_uuid(str, mddev->uuid); |
| ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name, |
| DLM_LSFL_FS, LVB_SIZE, |
| &md_ls_ops, mddev, &ops_rv, &cinfo->lockspace); |
| if (ret) |
| goto err; |
| wait_for_completion(&cinfo->completion); |
| if (nodes <= cinfo->slot_number) { |
| pr_err("md-cluster: Slot allotted(%d) greater than available slots(%d)", cinfo->slot_number - 1, |
| nodes); |
| ret = -ERANGE; |
| goto err; |
| } |
| cinfo->sb_lock = lockres_init(mddev, "cmd-super", |
| NULL, 0); |
| if (!cinfo->sb_lock) { |
| ret = -ENOMEM; |
| goto err; |
| } |
| |
| pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); |
| snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); |
| cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1); |
| if (!cinfo->bitmap_lockres) |
| goto err; |
| if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) { |
| pr_err("Failed to get bitmap lock\n"); |
| ret = -EINVAL; |
| goto err; |
| } |
| |
| INIT_LIST_HEAD(&cinfo->suspend_list); |
| spin_lock_init(&cinfo->suspend_lock); |
| |
| ret = gather_all_resync_info(mddev, nodes); |
| if (ret) |
| goto err; |
| |
| return 0; |
| err: |
| lockres_free(cinfo->bitmap_lockres); |
| lockres_free(cinfo->sb_lock); |
| if (cinfo->lockspace) |
| dlm_release_lockspace(cinfo->lockspace, 2); |
| mddev->cluster_info = NULL; |
| kfree(cinfo); |
| module_put(THIS_MODULE); |
| return ret; |
| } |
| |
| static int leave(struct mddev *mddev) |
| { |
| struct md_cluster_info *cinfo = mddev->cluster_info; |
| |
| if (!cinfo) |
| return 0; |
| md_unregister_thread(&cinfo->recovery_thread); |
| lockres_free(cinfo->sb_lock); |
| lockres_free(cinfo->bitmap_lockres); |
| dlm_release_lockspace(cinfo->lockspace, 2); |
| return 0; |
| } |
| |
| /* slot_number(): Returns the MD slot number to use |
| * DLM starts the slot numbers from 1, wheras cluster-md |
| * wants the number to be from zero, so we deduct one |
| */ |
| static int slot_number(struct mddev *mddev) |
| { |
| struct md_cluster_info *cinfo = mddev->cluster_info; |
| |
| return cinfo->slot_number - 1; |
| } |
| |
| static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) |
| { |
| struct md_cluster_info *cinfo = mddev->cluster_info; |
| |
| add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi); |
| /* Re-acquire the lock to refresh LVB */ |
| dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW); |
| } |
| |
| static struct md_cluster_operations cluster_ops = { |
| .join = join, |
| .leave = leave, |
| .slot_number = slot_number, |
| .resync_info_update = resync_info_update, |
| }; |
| |
| static int __init cluster_init(void) |
| { |
| pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n"); |
| pr_info("Registering Cluster MD functions\n"); |
| register_md_cluster_operations(&cluster_ops, THIS_MODULE); |
| return 0; |
| } |
| |
| static void cluster_exit(void) |
| { |
| unregister_md_cluster_operations(); |
| } |
| |
| module_init(cluster_init); |
| module_exit(cluster_exit); |
| MODULE_LICENSE("GPL"); |
| MODULE_DESCRIPTION("Clustering support for MD"); |