blob: fe7e35cdc9d54d91a684fc5416ec39f470993b36 [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001// SPDX-License-Identifier: GPL-2.0-only
Nathan Fontenot410bccf2010-09-10 09:42:36 +00002/*
3 * Support for Partition Mobility/Migration
4 *
5 * Copyright (C) 2010 Nathan Fontenot
6 * Copyright (C) 2010 IBM Corporation
Nathan Fontenot410bccf2010-09-10 09:42:36 +00007 */
8
Nathan Lynch494a66f2019-06-27 00:30:43 -05009
10#define pr_fmt(fmt) "mobility: " fmt
11
Nathan Lynche59a1752019-06-11 23:45:05 -050012#include <linux/cpu.h>
Nathan Fontenot410bccf2010-09-10 09:42:36 +000013#include <linux/kernel.h>
14#include <linux/kobject.h>
Nathan Lynch9327dc02020-12-07 15:51:44 -060015#include <linux/nmi.h>
Nathan Lynchccfb5bd2019-08-02 14:29:26 -050016#include <linux/sched.h>
Nathan Fontenot410bccf2010-09-10 09:42:36 +000017#include <linux/smp.h>
Paul Gortmakerb56eade2011-05-27 13:27:45 -040018#include <linux/stat.h>
Nathan Lynch9327dc02020-12-07 15:51:44 -060019#include <linux/stop_machine.h>
Nathan Fontenot410bccf2010-09-10 09:42:36 +000020#include <linux/completion.h>
21#include <linux/device.h>
22#include <linux/delay.h>
23#include <linux/slab.h>
Christophe Leroy5c35a022018-07-05 16:24:59 +000024#include <linux/stringify.h>
Nathan Fontenot410bccf2010-09-10 09:42:36 +000025
Michael Ellerman8e83e902014-07-16 12:02:43 +100026#include <asm/machdep.h>
Nathan Fontenot410bccf2010-09-10 09:42:36 +000027#include <asm/rtas.h>
28#include "pseries.h"
Nathan Lynche610a462019-06-11 23:45:06 -050029#include "../../kernel/cacheinfo.h"
Nathan Fontenot410bccf2010-09-10 09:42:36 +000030
31static struct kobject *mobility_kobj;
32
33struct update_props_workarea {
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -080034 __be32 phandle;
35 __be32 state;
36 __be64 reserved;
37 __be32 nprops;
Tyrel Datwylerd0ef4402013-08-14 22:23:47 -070038} __packed;
Nathan Fontenot410bccf2010-09-10 09:42:36 +000039
40#define NODE_ACTION_MASK 0xff000000
41#define NODE_COUNT_MASK 0x00ffffff
42
43#define DELETE_DT_NODE 0x01000000
44#define UPDATE_DT_NODE 0x02000000
45#define ADD_DT_NODE 0x03000000
46
Nathan Fontenot762ec152013-04-24 05:47:11 +000047#define MIGRATION_SCOPE (1)
John Allen675d8ee2017-01-06 13:28:54 -060048#define PRRN_SCOPE -2
Nathan Fontenot762ec152013-04-24 05:47:11 +000049
50static int mobility_rtas_call(int token, char *buf, s32 scope)
Nathan Fontenot410bccf2010-09-10 09:42:36 +000051{
52 int rc;
53
54 spin_lock(&rtas_data_buf_lock);
55
56 memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
Nathan Fontenot762ec152013-04-24 05:47:11 +000057 rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
Nathan Fontenot410bccf2010-09-10 09:42:36 +000058 memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
59
60 spin_unlock(&rtas_data_buf_lock);
61 return rc;
62}
63
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -080064static int delete_dt_node(__be32 phandle)
Nathan Fontenot410bccf2010-09-10 09:42:36 +000065{
66 struct device_node *dn;
67
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -080068 dn = of_find_node_by_phandle(be32_to_cpu(phandle));
Nathan Fontenot410bccf2010-09-10 09:42:36 +000069 if (!dn)
70 return -ENOENT;
71
Nathan Lynch5d8b1f92019-06-27 00:30:44 -050072 pr_debug("removing node %pOFfp\n", dn);
73
Nathan Fontenot410bccf2010-09-10 09:42:36 +000074 dlpar_detach_node(dn);
Tyrel Datwyler14cd820a2013-08-14 22:23:51 -070075 of_node_put(dn);
Nathan Fontenot410bccf2010-09-10 09:42:36 +000076 return 0;
77}
78
79static int update_dt_property(struct device_node *dn, struct property **prop,
80 const char *name, u32 vd, char *value)
81{
82 struct property *new_prop = *prop;
Nathan Fontenot410bccf2010-09-10 09:42:36 +000083 int more = 0;
84
85 /* A negative 'vd' value indicates that only part of the new property
86 * value is contained in the buffer and we need to call
87 * ibm,update-properties again to get the rest of the value.
88 *
89 * A negative value is also the two's compliment of the actual value.
90 */
91 if (vd & 0x80000000) {
92 vd = ~vd + 1;
93 more = 1;
94 }
95
96 if (new_prop) {
97 /* partial property fixup */
98 char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL);
99 if (!new_data)
100 return -ENOMEM;
101
102 memcpy(new_data, new_prop->value, new_prop->length);
103 memcpy(new_data + new_prop->length, value, vd);
104
105 kfree(new_prop->value);
106 new_prop->value = new_data;
107 new_prop->length += vd;
108 } else {
109 new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
110 if (!new_prop)
111 return -ENOMEM;
112
113 new_prop->name = kstrdup(name, GFP_KERNEL);
114 if (!new_prop->name) {
115 kfree(new_prop);
116 return -ENOMEM;
117 }
118
119 new_prop->length = vd;
120 new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
121 if (!new_prop->value) {
122 kfree(new_prop->name);
123 kfree(new_prop);
124 return -ENOMEM;
125 }
126
127 memcpy(new_prop->value, value, vd);
128 *prop = new_prop;
129 }
130
131 if (!more) {
Nathan Lynch5d8b1f92019-06-27 00:30:44 -0500132 pr_debug("updating node %pOF property %s\n", dn, name);
Nathan Fontenot79d1c712012-10-02 16:58:46 +0000133 of_update_property(dn, new_prop);
Tyrel Datwylerd8e533b2013-08-14 22:23:45 -0700134 *prop = NULL;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000135 }
136
137 return 0;
138}
139
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800140static int update_dt_node(__be32 phandle, s32 scope)
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000141{
142 struct update_props_workarea *upwa;
143 struct device_node *dn;
144 struct property *prop = NULL;
Tyrel Datwyler638a4052013-08-14 22:23:46 -0700145 int i, rc, rtas_rc;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000146 char *prop_data;
147 char *rtas_buf;
148 int update_properties_token;
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800149 u32 nprops;
Nathan Fontenot2e9b7b02013-04-24 05:49:36 +0000150 u32 vd;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000151
152 update_properties_token = rtas_token("ibm,update-properties");
153 if (update_properties_token == RTAS_UNKNOWN_SERVICE)
154 return -EINVAL;
155
156 rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
157 if (!rtas_buf)
158 return -ENOMEM;
159
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800160 dn = of_find_node_by_phandle(be32_to_cpu(phandle));
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000161 if (!dn) {
162 kfree(rtas_buf);
163 return -ENOENT;
164 }
165
166 upwa = (struct update_props_workarea *)&rtas_buf[0];
167 upwa->phandle = phandle;
168
169 do {
Tyrel Datwyler638a4052013-08-14 22:23:46 -0700170 rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
Nathan Fontenot762ec152013-04-24 05:47:11 +0000171 scope);
Tyrel Datwyler638a4052013-08-14 22:23:46 -0700172 if (rtas_rc < 0)
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000173 break;
174
175 prop_data = rtas_buf + sizeof(*upwa);
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800176 nprops = be32_to_cpu(upwa->nprops);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000177
Tyrel Datwylerc8f5a572013-08-14 22:23:48 -0700178 /* On the first call to ibm,update-properties for a node the
179 * the first property value descriptor contains an empty
180 * property name, the property value length encoded as u32,
181 * and the property value is the node path being updated.
Nathan Fontenot2e9b7b02013-04-24 05:49:36 +0000182 */
Tyrel Datwylerc8f5a572013-08-14 22:23:48 -0700183 if (*prop_data == 0) {
184 prop_data++;
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800185 vd = be32_to_cpu(*(__be32 *)prop_data);
Tyrel Datwylerc8f5a572013-08-14 22:23:48 -0700186 prop_data += vd + sizeof(vd);
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800187 nprops--;
Tyrel Datwylerc8f5a572013-08-14 22:23:48 -0700188 }
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000189
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800190 for (i = 0; i < nprops; i++) {
Nathan Fontenot2e9b7b02013-04-24 05:49:36 +0000191 char *prop_name;
192
193 prop_name = prop_data;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000194 prop_data += strlen(prop_name) + 1;
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800195 vd = be32_to_cpu(*(__be32 *)prop_data);
Nathan Fontenot2e9b7b02013-04-24 05:49:36 +0000196 prop_data += sizeof(vd);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000197
198 switch (vd) {
199 case 0x00000000:
200 /* name only property, nothing to do */
201 break;
202
203 case 0x80000000:
Suraj Jitindar Singh925e2d12016-04-28 15:34:55 +1000204 of_remove_property(dn, of_find_property(dn,
205 prop_name, NULL));
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000206 prop = NULL;
207 break;
208
209 default:
210 rc = update_dt_property(dn, &prop, prop_name,
211 vd, prop_data);
212 if (rc) {
Nathan Lynch2d5be6f2020-12-07 15:51:41 -0600213 pr_err("updating %s property failed: %d\n",
214 prop_name, rc);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000215 }
216
217 prop_data += vd;
Nathan Lynchaa5e5c92020-12-07 15:51:40 -0600218 break;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000219 }
Nathan Lynchccfb5bd2019-08-02 14:29:26 -0500220
221 cond_resched();
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000222 }
Nathan Lynchccfb5bd2019-08-02 14:29:26 -0500223
224 cond_resched();
Tyrel Datwyler638a4052013-08-14 22:23:46 -0700225 } while (rtas_rc == 1);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000226
227 of_node_put(dn);
228 kfree(rtas_buf);
229 return 0;
230}
231
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800232static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000233{
234 struct device_node *dn;
235 struct device_node *parent_dn;
236 int rc;
237
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800238 parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
Tyrel Datwyler8d5ff322013-08-14 22:23:50 -0700239 if (!parent_dn)
240 return -ENOENT;
241
242 dn = dlpar_configure_connector(drc_index, parent_dn);
Tyrel Datwylerb537ca62017-09-20 17:02:52 -0400243 if (!dn) {
244 of_node_put(parent_dn);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000245 return -ENOENT;
Tyrel Datwylerb537ca62017-09-20 17:02:52 -0400246 }
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000247
Rob Herring215ee762017-08-21 10:16:49 -0500248 rc = dlpar_attach_node(dn, parent_dn);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000249 if (rc)
250 dlpar_free_cc_nodes(dn);
251
Nathan Lynch5d8b1f92019-06-27 00:30:44 -0500252 pr_debug("added node %pOFfp\n", dn);
253
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000254 of_node_put(parent_dn);
255 return rc;
256}
257
Nathan Fontenot762ec152013-04-24 05:47:11 +0000258int pseries_devicetree_update(s32 scope)
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000259{
260 char *rtas_buf;
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800261 __be32 *data;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000262 int update_nodes_token;
263 int rc;
264
265 update_nodes_token = rtas_token("ibm,update-nodes");
266 if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
Nathan Lynchb06a6712020-12-07 15:51:39 -0600267 return 0;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000268
269 rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
270 if (!rtas_buf)
271 return -ENOMEM;
272
273 do {
Nathan Fontenot762ec152013-04-24 05:47:11 +0000274 rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000275 if (rc && rc != 1)
276 break;
277
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800278 data = (__be32 *)rtas_buf + 4;
279 while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000280 int i;
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800281 u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
282 u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000283
284 data++;
285
286 for (i = 0; i < node_count; i++) {
Tyrel Datwylerf6ff0412015-03-04 11:59:33 -0800287 __be32 phandle = *data++;
288 __be32 drc_index;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000289
290 switch (action) {
291 case DELETE_DT_NODE:
292 delete_dt_node(phandle);
293 break;
294 case UPDATE_DT_NODE:
Nathan Fontenot762ec152013-04-24 05:47:11 +0000295 update_dt_node(phandle, scope);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000296 break;
297 case ADD_DT_NODE:
298 drc_index = *data++;
299 add_dt_node(phandle, drc_index);
300 break;
301 }
Nathan Lynchccfb5bd2019-08-02 14:29:26 -0500302
303 cond_resched();
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000304 }
305 }
Nathan Lynchccfb5bd2019-08-02 14:29:26 -0500306
307 cond_resched();
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000308 } while (rc == 1);
309
310 kfree(rtas_buf);
311 return rc;
312}
313
314void post_mobility_fixup(void)
315{
316 int rc;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000317
Nathan Lynchc3ae9782020-12-07 15:51:42 -0600318 rtas_activate_firmware();
Haren Myneni39a33b52014-02-19 12:56:52 -0800319
Nathan Lynche59a1752019-06-11 23:45:05 -0500320 /*
321 * We don't want CPUs to go online/offline while the device
322 * tree is being updated.
323 */
324 cpus_read_lock();
325
Nathan Lynche610a462019-06-11 23:45:06 -0500326 /*
327 * It's common for the destination firmware to replace cache
328 * nodes. Release all of the cacheinfo hierarchy's references
329 * before updating the device tree.
330 */
331 cacheinfo_teardown();
332
Haren Myneni39a33b52014-02-19 12:56:52 -0800333 rc = pseries_devicetree_update(MIGRATION_SCOPE);
334 if (rc)
Nathan Lynch2d5be6f2020-12-07 15:51:41 -0600335 pr_err("device tree update failed: %d\n", rc);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000336
Nathan Lynche610a462019-06-11 23:45:06 -0500337 cacheinfo_rebuild();
338
Nathan Lynche59a1752019-06-11 23:45:05 -0500339 cpus_read_unlock();
340
Daniel Axtensda631f72020-11-17 16:59:16 +1100341 /* Possibly switch to a new L1 flush type */
342 pseries_setup_security_mitigations();
Michael Ellerman921bc6c2018-03-14 19:40:42 -0300343
Kajol Jain373b3732020-05-25 16:13:07 +0530344 /* Reinitialise system information for hv-24x7 */
345 read_24x7_sys_info();
346
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000347 return;
348}
349
Nathan Lynchd9213312020-12-07 15:51:43 -0600350static int poll_vasi_state(u64 handle, unsigned long *res)
351{
352 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
353 long hvrc;
354 int ret;
355
356 hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle);
357 switch (hvrc) {
358 case H_SUCCESS:
359 ret = 0;
360 *res = retbuf[0];
361 break;
362 case H_PARAMETER:
363 ret = -EINVAL;
364 break;
365 case H_FUNCTION:
366 ret = -EOPNOTSUPP;
367 break;
368 case H_HARDWARE:
369 default:
370 pr_err("unexpected H_VASI_STATE result %ld\n", hvrc);
371 ret = -EIO;
372 break;
373 }
374 return ret;
375}
376
377static int wait_for_vasi_session_suspending(u64 handle)
378{
379 unsigned long state;
380 int ret;
381
382 /*
383 * Wait for transition from H_VASI_ENABLED to
384 * H_VASI_SUSPENDING. Treat anything else as an error.
385 */
386 while (true) {
387 ret = poll_vasi_state(handle, &state);
388
389 if (ret != 0 || state == H_VASI_SUSPENDING) {
390 break;
391 } else if (state == H_VASI_ENABLED) {
392 ssleep(1);
393 } else {
394 pr_err("unexpected H_VASI_STATE result %lu\n", state);
395 ret = -EIO;
396 break;
397 }
398 }
399
400 /*
401 * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or
402 * ibm,suspend-me are also unimplemented, we'll recover then.
403 */
404 if (ret == -EOPNOTSUPP)
405 ret = 0;
406
407 return ret;
408}
409
Nathan Lynch9327dc02020-12-07 15:51:44 -0600410static void prod_single(unsigned int target_cpu)
411{
412 long hvrc;
413 int hwid;
414
415 hwid = get_hard_smp_processor_id(target_cpu);
416 hvrc = plpar_hcall_norets(H_PROD, hwid);
417 if (hvrc == H_SUCCESS)
418 return;
419 pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n",
420 target_cpu, hwid, hvrc);
421}
422
423static void prod_others(void)
424{
425 unsigned int cpu;
426
427 for_each_online_cpu(cpu) {
428 if (cpu != smp_processor_id())
429 prod_single(cpu);
430 }
431}
432
433static u16 clamp_slb_size(void)
434{
435 u16 prev = mmu_slb_size;
436
437 slb_set_size(SLB_MIN_SIZE);
438
439 return prev;
440}
441
442static int do_suspend(void)
443{
444 u16 saved_slb_size;
445 int status;
446 int ret;
447
448 pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id());
449
450 /*
451 * The destination processor model may have fewer SLB entries
452 * than the source. We reduce mmu_slb_size to a safe minimum
453 * before suspending in order to minimize the possibility of
454 * programming non-existent entries on the destination. If
455 * suspend fails, we restore it before returning. On success
456 * the OF reconfig path will update it from the new device
457 * tree after resuming on the destination.
458 */
459 saved_slb_size = clamp_slb_size();
460
461 ret = rtas_ibm_suspend_me(&status);
462 if (ret != 0) {
463 pr_err("ibm,suspend-me error: %d\n", status);
464 slb_set_size(saved_slb_size);
465 }
466
467 return ret;
468}
469
470static int do_join(void *arg)
471{
472 atomic_t *counter = arg;
473 long hvrc;
474 int ret;
475
476 /* Must ensure MSR.EE off for H_JOIN. */
477 hard_irq_disable();
478 hvrc = plpar_hcall_norets(H_JOIN);
479
480 switch (hvrc) {
481 case H_CONTINUE:
482 /*
483 * All other CPUs are offline or in H_JOIN. This CPU
484 * attempts the suspend.
485 */
486 ret = do_suspend();
487 break;
488 case H_SUCCESS:
489 /*
490 * The suspend is complete and this cpu has received a
491 * prod.
492 */
493 ret = 0;
494 break;
495 case H_BAD_MODE:
496 case H_HARDWARE:
497 default:
498 ret = -EIO;
499 pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
500 hvrc, smp_processor_id());
501 break;
502 }
503
504 if (atomic_inc_return(counter) == 1) {
505 pr_info("CPU %u waking all threads\n", smp_processor_id());
506 prod_others();
507 }
508 /*
509 * Execution may have been suspended for several seconds, so
510 * reset the watchdog.
511 */
512 touch_nmi_watchdog();
513 return ret;
514}
515
Nathan Lynch37cddc72020-12-07 15:51:45 -0600516/*
517 * Abort reason code byte 0. We use only the 'Migrating partition' value.
518 */
519enum vasi_aborting_entity {
520 ORCHESTRATOR = 1,
521 VSP_SOURCE = 2,
522 PARTITION_FIRMWARE = 3,
523 PLATFORM_FIRMWARE = 4,
524 VSP_TARGET = 5,
525 MIGRATING_PARTITION = 6,
526};
527
528static void pseries_cancel_migration(u64 handle, int err)
529{
530 u32 reason_code;
531 u32 detail;
532 u8 entity;
533 long hvrc;
534
535 entity = MIGRATING_PARTITION;
536 detail = abs(err) & 0xffffff;
537 reason_code = (entity << 24) | detail;
538
539 hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle,
540 H_VASI_SIGNAL_CANCEL, reason_code);
541 if (hvrc)
542 pr_err("H_VASI_SIGNAL error: %ld\n", hvrc);
543}
544
Nathan Lynchaeca35b2020-12-07 15:51:46 -0600545static int pseries_suspend(u64 handle)
546{
547 const unsigned int max_attempts = 5;
548 unsigned int retry_interval_ms = 1;
549 unsigned int attempt = 1;
550 int ret;
551
552 while (true) {
553 atomic_t counter = ATOMIC_INIT(0);
554 unsigned long vasi_state;
555 int vasi_err;
556
557 ret = stop_machine(do_join, &counter, cpu_online_mask);
558 if (ret == 0)
559 break;
560 /*
561 * Encountered an error. If the VASI stream is still
562 * in Suspending state, it's likely a transient
563 * condition related to some device in the partition
564 * and we can retry in the hope that the cause has
565 * cleared after some delay.
566 *
567 * A better design would allow drivers etc to prepare
568 * for the suspend and avoid conditions which prevent
569 * the suspend from succeeding. For now, we have this
570 * mitigation.
571 */
572 pr_notice("Partition suspend attempt %u of %u error: %d\n",
573 attempt, max_attempts, ret);
574
575 if (attempt == max_attempts)
576 break;
577
578 vasi_err = poll_vasi_state(handle, &vasi_state);
579 if (vasi_err == 0) {
580 if (vasi_state != H_VASI_SUSPENDING) {
581 pr_notice("VASI state %lu after failed suspend\n",
582 vasi_state);
583 break;
584 }
585 } else if (vasi_err != -EOPNOTSUPP) {
586 pr_err("VASI state poll error: %d", vasi_err);
587 break;
588 }
589
590 pr_notice("Will retry partition suspend after %u ms\n",
591 retry_interval_ms);
592
593 msleep(retry_interval_ms);
594 retry_interval_ms *= 10;
595 attempt++;
596 }
597
598 return ret;
599}
600
Nathan Lynch9327dc02020-12-07 15:51:44 -0600601static int pseries_migrate_partition(u64 handle)
602{
Nathan Lynch9327dc02020-12-07 15:51:44 -0600603 int ret;
604
605 ret = wait_for_vasi_session_suspending(handle);
606 if (ret)
607 return ret;
608
Nathan Lynchaeca35b2020-12-07 15:51:46 -0600609 ret = pseries_suspend(handle);
Nathan Lynch9327dc02020-12-07 15:51:44 -0600610 if (ret == 0)
611 post_mobility_fixup();
Nathan Lynch37cddc72020-12-07 15:51:45 -0600612 else
613 pseries_cancel_migration(handle, ret);
Nathan Lynch9327dc02020-12-07 15:51:44 -0600614
615 return ret;
616}
617
Greg Kroah-Hartman6f428092017-06-06 15:32:03 +0200618static ssize_t migration_store(struct class *class,
619 struct class_attribute *attr, const char *buf,
620 size_t count)
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000621{
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000622 u64 streamid;
623 int rc;
624
Daniel Walter1618bd52014-08-08 14:24:01 -0700625 rc = kstrtou64(buf, 0, &streamid);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000626 if (rc)
627 return rc;
628
Nathan Lynch9327dc02020-12-07 15:51:44 -0600629 rc = pseries_migrate_partition(streamid);
Nathan Lynchd9213312020-12-07 15:51:43 -0600630 if (rc)
631 return rc;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000632
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000633 return count;
634}
635
Tyrel Datwyler288a2982015-03-04 18:25:38 -0800636/*
637 * Used by drmgr to determine the kernel behavior of the migration interface.
638 *
639 * Version 1: Performs all PAPR requirements for migration including
640 * firmware activation and device tree update.
641 */
642#define MIGRATION_API_VERSION 1
643
Greg Kroah-Hartman6f428092017-06-06 15:32:03 +0200644static CLASS_ATTR_WO(migration);
Russell Currey57ad583f2017-01-12 14:54:13 +1100645static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION));
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000646
647static int __init mobility_sysfs_init(void)
648{
649 int rc;
650
651 mobility_kobj = kobject_create_and_add("mobility", kernel_kobj);
652 if (!mobility_kobj)
653 return -ENOMEM;
654
655 rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
Tyrel Datwyler288a2982015-03-04 18:25:38 -0800656 if (rc)
Nathan Lynch494a66f2019-06-27 00:30:43 -0500657 pr_err("unable to create migration sysfs file (%d)\n", rc);
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000658
Tyrel Datwyler288a2982015-03-04 18:25:38 -0800659 rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
660 if (rc)
Nathan Lynch494a66f2019-06-27 00:30:43 -0500661 pr_err("unable to create api_version sysfs file (%d)\n", rc);
Tyrel Datwyler288a2982015-03-04 18:25:38 -0800662
663 return 0;
Nathan Fontenot410bccf2010-09-10 09:42:36 +0000664}
Michael Ellerman8e83e902014-07-16 12:02:43 +1000665machine_device_initcall(pseries, mobility_sysfs_init);