forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cpu.c
3224 lines (2759 loc) · 79.1 KB
/
cpu.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* CPU control.
* (C) 2001, 2002, 2003, 2004 Rusty Russell
*
* This code is licenced under the GPL.
*/
#include <linux/sched/mm.h>
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched/signal.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/isolation.h>
#include <linux/sched/task.h>
#include <linux/sched/smt.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/oom.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/bug.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
#include <linux/mutex.h>
#include <linux/gfp.h>
#include <linux/suspend.h>
#include <linux/lockdep.h>
#include <linux/tick.h>
#include <linux/irq.h>
#include <linux/nmi.h>
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
#include <linux/scs.h>
#include <linux/percpu-rwsem.h>
#include <linux/cpuset.h>
#include <linux/random.h>
#include <linux/cc_platform.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
#include <trace/events/cpuhp.h>
#include "smpboot.h"
/**
* struct cpuhp_cpu_state - Per cpu hotplug state storage
* @state: The current cpu state
* @target: The target state
* @fail: Current CPU hotplug callback state
* @thread: Pointer to the hotplug thread
* @should_run: Thread should execute
* @rollback: Perform a rollback
* @single: Single callback invocation
* @bringup: Single callback bringup or teardown selector
* @cpu: CPU number
* @node: Remote CPU node; for multi-instance, do a
* single entry callback for install/remove
* @last: For multi-instance rollback, remember how far we got
* @cb_state: The state for a single callback (install/uninstall)
* @result: Result of the operation
* @ap_sync_state: State for AP synchronization
* @done_up: Signal completion to the issuer of the task for cpu-up
* @done_down: Signal completion to the issuer of the task for cpu-down
*/
struct cpuhp_cpu_state {
enum cpuhp_state state;
enum cpuhp_state target;
enum cpuhp_state fail;
#ifdef CONFIG_SMP
struct task_struct *thread;
bool should_run;
bool rollback;
bool single;
bool bringup;
struct hlist_node *node;
struct hlist_node *last;
enum cpuhp_state cb_state;
int result;
atomic_t ap_sync_state;
struct completion done_up;
struct completion done_down;
#endif
};
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
.fail = CPUHP_INVALID,
};
#ifdef CONFIG_SMP
cpumask_t cpus_booted_once_mask;
#endif
#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
static struct lockdep_map cpuhp_state_up_map =
STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
static struct lockdep_map cpuhp_state_down_map =
STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
static inline void cpuhp_lock_acquire(bool bringup)
{
lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}
static inline void cpuhp_lock_release(bool bringup)
{
lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}
#else
static inline void cpuhp_lock_acquire(bool bringup) { }
static inline void cpuhp_lock_release(bool bringup) { }
#endif
/**
* struct cpuhp_step - Hotplug state machine step
* @name: Name of the step
* @startup: Startup function of the step
* @teardown: Teardown function of the step
* @cant_stop: Bringup/teardown can't be stopped at this step
* @multi_instance: State has multiple instances which get added afterwards
*/
struct cpuhp_step {
const char *name;
union {
int (*single)(unsigned int cpu);
int (*multi)(unsigned int cpu,
struct hlist_node *node);
} startup;
union {
int (*single)(unsigned int cpu);
int (*multi)(unsigned int cpu,
struct hlist_node *node);
} teardown;
/* private: */
struct hlist_head list;
/* public: */
bool cant_stop;
bool multi_instance;
};
static DEFINE_MUTEX(cpuhp_state_mutex);
static struct cpuhp_step cpuhp_hp_states[];
static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{
return cpuhp_hp_states + state;
}
static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
{
return bringup ? !step->startup.single : !step->teardown.single;
}
/**
* cpuhp_invoke_callback - Invoke the callbacks for a given state
* @cpu: The cpu for which the callback should be invoked
* @state: The state to do callbacks for
* @bringup: True if the bringup callback should be invoked
* @node: For multi-instance, do a single entry callback for install/remove
* @lastp: For multi-instance rollback, remember how far we got
*
* Called from cpu hotplug and from the state register machinery.
*
* Return: %0 on success or a negative errno code
*/
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
bool bringup, struct hlist_node *node,
struct hlist_node **lastp)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
struct cpuhp_step *step = cpuhp_get_step(state);
int (*cbm)(unsigned int cpu, struct hlist_node *node);
int (*cb)(unsigned int cpu);
int ret, cnt;
if (st->fail == state) {
st->fail = CPUHP_INVALID;
return -EAGAIN;
}
if (cpuhp_step_empty(bringup, step)) {
WARN_ON_ONCE(1);
return 0;
}
if (!step->multi_instance) {
WARN_ON_ONCE(lastp && *lastp);
cb = bringup ? step->startup.single : step->teardown.single;
trace_cpuhp_enter(cpu, st->target, state, cb);
ret = cb(cpu);
trace_cpuhp_exit(cpu, st->state, state, ret);
return ret;
}
cbm = bringup ? step->startup.multi : step->teardown.multi;
/* Single invocation for instance add/remove */
if (node) {
WARN_ON_ONCE(lastp && *lastp);
trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
ret = cbm(cpu, node);
trace_cpuhp_exit(cpu, st->state, state, ret);
return ret;
}
/* State transition. Invoke on all instances */
cnt = 0;
hlist_for_each(node, &step->list) {
if (lastp && node == *lastp)
break;
trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
ret = cbm(cpu, node);
trace_cpuhp_exit(cpu, st->state, state, ret);
if (ret) {
if (!lastp)
goto err;
*lastp = node;
return ret;
}
cnt++;
}
if (lastp)
*lastp = NULL;
return 0;
err:
/* Rollback the instances if one failed */
cbm = !bringup ? step->startup.multi : step->teardown.multi;
if (!cbm)
return ret;
hlist_for_each(node, &step->list) {
if (!cnt--)
break;
trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
ret = cbm(cpu, node);
trace_cpuhp_exit(cpu, st->state, state, ret);
/*
* Rollback must not fail,
*/
WARN_ON_ONCE(ret);
}
return ret;
}
#ifdef CONFIG_SMP
static bool cpuhp_is_ap_state(enum cpuhp_state state)
{
/*
* The extra check for CPUHP_TEARDOWN_CPU is only for documentation
* purposes as that state is handled explicitly in cpu_down.
*/
return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}
static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
struct completion *done = bringup ? &st->done_up : &st->done_down;
wait_for_completion(done);
}
static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
struct completion *done = bringup ? &st->done_up : &st->done_down;
complete(done);
}
/*
* The former STARTING/DYING states, ran with IRQs disabled and must not fail.
*/
static bool cpuhp_is_atomic_state(enum cpuhp_state state)
{
return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
}
/* Synchronization state management */
enum cpuhp_sync_state {
SYNC_STATE_DEAD,
SYNC_STATE_KICKED,
SYNC_STATE_SHOULD_DIE,
SYNC_STATE_ALIVE,
SYNC_STATE_SHOULD_ONLINE,
SYNC_STATE_ONLINE,
};
#ifdef CONFIG_HOTPLUG_CORE_SYNC
/**
* cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
* @state: The synchronization state to set
*
* No synchronization point. Just update of the synchronization state, but implies
* a full barrier so that the AP changes are visible before the control CPU proceeds.
*/
static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
{
atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
(void)atomic_xchg(st, state);
}
void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
enum cpuhp_sync_state next_state)
{
atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
ktime_t now, end, start = ktime_get();
int sync;
end = start + 10ULL * NSEC_PER_SEC;
sync = atomic_read(st);
while (1) {
if (sync == state) {
if (!atomic_try_cmpxchg(st, &sync, next_state))
continue;
return true;
}
now = ktime_get();
if (now > end) {
/* Timeout. Leave the state unchanged */
return false;
} else if (now - start < NSEC_PER_MSEC) {
/* Poll for one millisecond */
arch_cpuhp_sync_state_poll();
} else {
usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
}
sync = atomic_read(st);
}
return true;
}
#else /* CONFIG_HOTPLUG_CORE_SYNC */
static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
/**
* cpuhp_ap_report_dead - Update synchronization state to DEAD
*
* No synchronization point. Just update of the synchronization state.
*/
void cpuhp_ap_report_dead(void)
{
cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
}
void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
/*
* Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
* because the AP cannot issue complete() at this stage.
*/
static void cpuhp_bp_sync_dead(unsigned int cpu)
{
atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
int sync = atomic_read(st);
do {
/* CPU can have reported dead already. Don't overwrite that! */
if (sync == SYNC_STATE_DEAD)
break;
} while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));
if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
/* CPU reached dead state. Invoke the cleanup function */
arch_cpuhp_cleanup_dead_cpu(cpu);
return;
}
/* No further action possible. Emit message and give up. */
pr_err("CPU%u failed to report dead state\n", cpu);
}
#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
/**
* cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
*
* Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
* for the BP to release it.
*/
void cpuhp_ap_sync_alive(void)
{
atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);
/* Wait for the control CPU to release it. */
while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
cpu_relax();
}
static bool cpuhp_can_boot_ap(unsigned int cpu)
{
atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
int sync = atomic_read(st);
again:
switch (sync) {
case SYNC_STATE_DEAD:
/* CPU is properly dead */
break;
case SYNC_STATE_KICKED:
/* CPU did not come up in previous attempt */
break;
case SYNC_STATE_ALIVE:
/* CPU is stuck cpuhp_ap_sync_alive(). */
break;
default:
/* CPU failed to report online or dead and is in limbo state. */
return false;
}
/* Prepare for booting */
if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
goto again;
return true;
}
void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
/*
* Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
* because the AP cannot issue complete() so early in the bringup.
*/
static int cpuhp_bp_sync_alive(unsigned int cpu)
{
int ret = 0;
if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
return 0;
if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
pr_err("CPU%u failed to report alive state\n", cpu);
ret = -EIO;
}
/* Let the architecture cleanup the kick alive mechanics. */
arch_cpuhp_cleanup_kick_cpu(cpu);
return ret;
}
#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
static DEFINE_MUTEX(cpu_add_remove_lock);
bool cpuhp_tasks_frozen;
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
/*
* The following two APIs (cpu_maps_update_begin/done) must be used when
* attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
*/
void cpu_maps_update_begin(void)
{
mutex_lock(&cpu_add_remove_lock);
}
void cpu_maps_update_done(void)
{
mutex_unlock(&cpu_add_remove_lock);
}
/*
* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
* Should always be manipulated under cpu_add_remove_lock
*/
static int cpu_hotplug_disabled;
#ifdef CONFIG_HOTPLUG_CPU
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
void cpus_read_lock(void)
{
percpu_down_read(&cpu_hotplug_lock);
}
EXPORT_SYMBOL_GPL(cpus_read_lock);
int cpus_read_trylock(void)
{
return percpu_down_read_trylock(&cpu_hotplug_lock);
}
EXPORT_SYMBOL_GPL(cpus_read_trylock);
void cpus_read_unlock(void)
{
percpu_up_read(&cpu_hotplug_lock);
}
EXPORT_SYMBOL_GPL(cpus_read_unlock);
void cpus_write_lock(void)
{
percpu_down_write(&cpu_hotplug_lock);
}
void cpus_write_unlock(void)
{
percpu_up_write(&cpu_hotplug_lock);
}
void lockdep_assert_cpus_held(void)
{
/*
* We can't have hotplug operations before userspace starts running,
* and some init codepaths will knowingly not take the hotplug lock.
* This is all valid, so mute lockdep until it makes sense to report
* unheld locks.
*/
if (system_state < SYSTEM_RUNNING)
return;
percpu_rwsem_assert_held(&cpu_hotplug_lock);
}
#ifdef CONFIG_LOCKDEP
int lockdep_is_cpus_held(void)
{
return percpu_rwsem_is_held(&cpu_hotplug_lock);
}
#endif
static void lockdep_acquire_cpus_lock(void)
{
rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
}
static void lockdep_release_cpus_lock(void)
{
rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
}
/*
* Wait for currently running CPU hotplug operations to complete (if any) and
* disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
* the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
* hotplug path before performing hotplug operations. So acquiring that lock
* guarantees mutual exclusion from any currently running hotplug operations.
*/
void cpu_hotplug_disable(void)
{
cpu_maps_update_begin();
cpu_hotplug_disabled++;
cpu_maps_update_done();
}
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
static void __cpu_hotplug_enable(void)
{
if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
return;
cpu_hotplug_disabled--;
}
void cpu_hotplug_enable(void)
{
cpu_maps_update_begin();
__cpu_hotplug_enable();
cpu_maps_update_done();
}
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
#else
static void lockdep_acquire_cpus_lock(void)
{
}
static void lockdep_release_cpus_lock(void)
{
}
#endif /* CONFIG_HOTPLUG_CPU */
/*
* Architectures that need SMT-specific errata handling during SMT hotplug
* should override this.
*/
void __weak arch_smt_update(void) { }
#ifdef CONFIG_HOTPLUG_SMT
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
static unsigned int cpu_smt_max_threads __ro_after_init;
unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX;
void __init cpu_smt_disable(bool force)
{
if (!cpu_smt_possible())
return;
if (force) {
pr_info("SMT: Force disabled\n");
cpu_smt_control = CPU_SMT_FORCE_DISABLED;
} else {
pr_info("SMT: disabled\n");
cpu_smt_control = CPU_SMT_DISABLED;
}
cpu_smt_num_threads = 1;
}
/*
* The decision whether SMT is supported can only be done after the full
* CPU identification. Called from architecture code.
*/
void __init cpu_smt_set_num_threads(unsigned int num_threads,
unsigned int max_threads)
{
WARN_ON(!num_threads || (num_threads > max_threads));
if (max_threads == 1)
cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
cpu_smt_max_threads = max_threads;
/*
* If SMT has been disabled via the kernel command line or SMT is
* not supported, set cpu_smt_num_threads to 1 for consistency.
* If enabled, take the architecture requested number of threads
* to bring up into account.
*/
if (cpu_smt_control != CPU_SMT_ENABLED)
cpu_smt_num_threads = 1;
else if (num_threads < cpu_smt_num_threads)
cpu_smt_num_threads = num_threads;
}
static int __init smt_cmdline_disable(char *str)
{
cpu_smt_disable(str && !strcmp(str, "force"));
return 0;
}
early_param("nosmt", smt_cmdline_disable);
/*
* For Archicture supporting partial SMT states check if the thread is allowed.
* Otherwise this has already been checked through cpu_smt_max_threads when
* setting the SMT level.
*/
static inline bool cpu_smt_thread_allowed(unsigned int cpu)
{
#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC
return topology_smt_thread_allowed(cpu);
#else
return true;
#endif
}
static inline bool cpu_smt_allowed(unsigned int cpu)
{
if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
return true;
if (topology_is_primary_thread(cpu))
return true;
/*
* On x86 it's required to boot all logical CPUs at least once so
* that the init code can get a chance to set CR4.MCE on each
* CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
* core will shutdown the machine.
*/
return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
}
/* Returns true if SMT is supported and not forcefully (irreversibly) disabled */
bool cpu_smt_possible(void)
{
return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
}
EXPORT_SYMBOL_GPL(cpu_smt_possible);
#else
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
#endif
static inline enum cpuhp_state
cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
bool bringup = st->state < target;
st->rollback = false;
st->last = NULL;
st->target = target;
st->single = false;
st->bringup = bringup;
if (cpu_dying(cpu) != !bringup)
set_cpu_dying(cpu, !bringup);
return prev_state;
}
static inline void
cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state prev_state)
{
bool bringup = !st->bringup;
st->target = prev_state;
/*
* Already rolling back. No need invert the bringup value or to change
* the current state.
*/
if (st->rollback)
return;
st->rollback = true;
/*
* If we have st->last we need to undo partial multi_instance of this
* state first. Otherwise start undo at the previous state.
*/
if (!st->last) {
if (st->bringup)
st->state--;
else
st->state++;
}
st->bringup = bringup;
if (cpu_dying(cpu) != !bringup)
set_cpu_dying(cpu, !bringup);
}
/* Regular hotplug invocation of the AP hotplug thread */
static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
{
if (!st->single && st->state == st->target)
return;
st->result = 0;
/*
* Make sure the above stores are visible before should_run becomes
* true. Paired with the mb() above in cpuhp_thread_fun()
*/
smp_mb();
st->should_run = true;
wake_up_process(st->thread);
wait_for_ap_thread(st, st->bringup);
}
static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
enum cpuhp_state prev_state;
int ret;
prev_state = cpuhp_set_state(cpu, st, target);
__cpuhp_kick_ap(st);
if ((ret = st->result)) {
cpuhp_reset_state(cpu, st, prev_state);
__cpuhp_kick_ap(st);
}
return ret;
}
static int bringup_wait_for_ap_online(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
wait_for_ap_thread(st, true);
if (WARN_ON_ONCE((!cpu_online(cpu))))
return -ECANCELED;
/* Unpark the hotplug thread of the target cpu */
kthread_unpark(st->thread);
/*
* SMT soft disabling on X86 requires to bring the CPU out of the
* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
* CPU marked itself as booted_once in notify_cpu_starting() so the
* cpu_smt_allowed() check will now return false if this is not the
* primary sibling.
*/
if (!cpu_smt_allowed(cpu))
return -ECANCELED;
return 0;
}
#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
static int cpuhp_kick_ap_alive(unsigned int cpu)
{
if (!cpuhp_can_boot_ap(cpu))
return -EAGAIN;
return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
}
static int cpuhp_bringup_ap(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int ret;
/*
* Some architectures have to walk the irq descriptors to
* setup the vector space for the cpu which comes online.
* Prevent irq alloc/free across the bringup.
*/
irq_lock_sparse();
ret = cpuhp_bp_sync_alive(cpu);
if (ret)
goto out_unlock;
ret = bringup_wait_for_ap_online(cpu);
if (ret)
goto out_unlock;
irq_unlock_sparse();
if (st->target <= CPUHP_AP_ONLINE_IDLE)
return 0;
return cpuhp_kick_ap(cpu, st, st->target);
out_unlock:
irq_unlock_sparse();
return ret;
}
#else
static int bringup_cpu(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
struct task_struct *idle = idle_thread_get(cpu);
int ret;
if (!cpuhp_can_boot_ap(cpu))
return -EAGAIN;
/*
* Some architectures have to walk the irq descriptors to
* setup the vector space for the cpu which comes online.
*
* Prevent irq alloc/free across the bringup by acquiring the
* sparse irq lock. Hold it until the upcoming CPU completes the
* startup in cpuhp_online_idle() which allows to avoid
* intermediate synchronization points in the architecture code.
*/
irq_lock_sparse();
ret = __cpu_up(cpu, idle);
if (ret)
goto out_unlock;
ret = cpuhp_bp_sync_alive(cpu);
if (ret)
goto out_unlock;
ret = bringup_wait_for_ap_online(cpu);
if (ret)
goto out_unlock;
irq_unlock_sparse();
if (st->target <= CPUHP_AP_ONLINE_IDLE)
return 0;
return cpuhp_kick_ap(cpu, st, st->target);
out_unlock:
irq_unlock_sparse();
return ret;
}
#endif
static int finish_cpu(unsigned int cpu)
{
struct task_struct *idle = idle_thread_get(cpu);
struct mm_struct *mm = idle->active_mm;
/*
* idle_task_exit() will have switched to &init_mm, now
* clean up any remaining active_mm state.
*/
if (mm != &init_mm)
idle->active_mm = &init_mm;
mmdrop_lazy_tlb(mm);
return 0;
}
/*
* Hotplug state machine related functions
*/
/*
* Get the next state to run. Empty ones will be skipped. Returns true if a
* state must be run.
*
* st->state will be modified ahead of time, to match state_to_run, as if it
* has already ran.
*/
static bool cpuhp_next_state(bool bringup,
enum cpuhp_state *state_to_run,
struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
do {
if (bringup) {
if (st->state >= target)
return false;
*state_to_run = ++st->state;
} else {
if (st->state <= target)
return false;
*state_to_run = st->state--;
}
if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
break;
} while (true);
return true;
}
static int __cpuhp_invoke_callback_range(bool bringup,
unsigned int cpu,
struct cpuhp_cpu_state *st,
enum cpuhp_state target,
bool nofail)
{
enum cpuhp_state state;
int ret = 0;
while (cpuhp_next_state(bringup, &state, st, target)) {
int err;
err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
if (!err)
continue;
if (nofail) {
pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
cpu, bringup ? "UP" : "DOWN",
cpuhp_get_step(st->state)->name,
st->state, err);
ret = -1;
} else {
ret = err;
break;
}
}
return ret;
}
static inline int cpuhp_invoke_callback_range(bool bringup,
unsigned int cpu,
struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
}
static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
unsigned int cpu,
struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
__cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
}
static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
{
if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
return true;
/*
* When CPU hotplug is disabled, then taking the CPU down is not
* possible because takedown_cpu() and the architecture and
* subsystem specific mechanisms are not available. So the CPU
* which would be completely unplugged again needs to stay around
* in the current state.
*/
return st->state <= CPUHP_BRINGUP_CPU;
}
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;