blob: 918e52df090e88ee0a03096bee651e6c3a4d6dce [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/kernel/softirq.c
3 *
4 * Copyright (C) 1992 Linus Torvalds
5 *
6 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7 */
8
9#include <linux/module.h>
10#include <linux/kernel_stat.h>
11#include <linux/interrupt.h>
12#include <linux/init.h>
13#include <linux/mm.h>
14#include <linux/notifier.h>
15#include <linux/percpu.h>
16#include <linux/cpu.h>
17#include <linux/kthread.h>
18#include <linux/rcupdate.h>
Andrew Morton78eef012006-03-22 00:08:16 -080019#include <linux/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
21#include <asm/irq.h>
22/*
23 - No shared variables, all the data are CPU local.
24 - If a softirq needs serialization, let it serialize itself
25 by its own spinlocks.
26 - Even if softirq is serialized, only local cpu is marked for
27 execution. Hence, we get something sort of weak cpu binding.
28 Though it is still not clear, will it result in better locality
29 or will not.
30
31 Examples:
32 - NET RX softirq. It is multithreaded and does not require
33 any global serialization.
34 - NET TX softirq. It kicks software netdevice queues, hence
35 it is logically serialized per device, but this serialization
36 is invisible to common code.
37 - Tasklets: serialized wrt itself.
38 */
39
40#ifndef __ARCH_IRQ_STAT
41irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42EXPORT_SYMBOL(irq_stat);
43#endif
44
45static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46
47static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48
49/*
50 * we cannot loop indefinitely here to avoid userspace starvation,
51 * but we also don't want to introduce a worst case 1/HZ latency
52 * to the pending events, so lets the scheduler to balance
53 * the softirq load for us.
54 */
55static inline void wakeup_softirqd(void)
56{
57 /* Interrupts are disabled: no need to stop preemption */
58 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59
60 if (tsk && tsk->state != TASK_RUNNING)
61 wake_up_process(tsk);
62}
63
64/*
Ingo Molnarde30a2b2006-07-03 00:24:42 -070065 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately:
67 */
Tim Chen3c829c32006-07-30 03:04:02 -070068#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -070069static void __local_bh_disable(unsigned long ip)
70{
71 unsigned long flags;
72
73 WARN_ON_ONCE(in_irq());
74
75 raw_local_irq_save(flags);
76 add_preempt_count(SOFTIRQ_OFFSET);
77 /*
78 * Were softirqs turned off above:
79 */
80 if (softirq_count() == SOFTIRQ_OFFSET)
81 trace_softirqs_off(ip);
82 raw_local_irq_restore(flags);
83}
Tim Chen3c829c32006-07-30 03:04:02 -070084#else /* !CONFIG_TRACE_IRQFLAGS */
85static inline void __local_bh_disable(unsigned long ip)
86{
87 add_preempt_count(SOFTIRQ_OFFSET);
88 barrier();
89}
90#endif /* CONFIG_TRACE_IRQFLAGS */
Ingo Molnarde30a2b2006-07-03 00:24:42 -070091
92void local_bh_disable(void)
93{
94 __local_bh_disable((unsigned long)__builtin_return_address(0));
95}
96
97EXPORT_SYMBOL(local_bh_disable);
98
99void __local_bh_enable(void)
100{
101 WARN_ON_ONCE(in_irq());
102
103 /*
104 * softirqs should never be enabled by __local_bh_enable(),
105 * it always nests inside local_bh_enable() sections:
106 */
107 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
108
109 sub_preempt_count(SOFTIRQ_OFFSET);
110}
111EXPORT_SYMBOL_GPL(__local_bh_enable);
112
113/*
114 * Special-case - softirqs can safely be enabled in
115 * cond_resched_softirq(), or by __do_softirq(),
116 * without processing still-pending softirqs:
117 */
118void _local_bh_enable(void)
119{
120 WARN_ON_ONCE(in_irq());
121 WARN_ON_ONCE(!irqs_disabled());
122
123 if (softirq_count() == SOFTIRQ_OFFSET)
124 trace_softirqs_on((unsigned long)__builtin_return_address(0));
125 sub_preempt_count(SOFTIRQ_OFFSET);
126}
127
128EXPORT_SYMBOL(_local_bh_enable);
129
130void local_bh_enable(void)
131{
Tim Chen3c829c32006-07-30 03:04:02 -0700132#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700133 unsigned long flags;
134
135 WARN_ON_ONCE(in_irq());
Tim Chen3c829c32006-07-30 03:04:02 -0700136#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700137 WARN_ON_ONCE(irqs_disabled());
138
Tim Chen3c829c32006-07-30 03:04:02 -0700139#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700140 local_irq_save(flags);
Tim Chen3c829c32006-07-30 03:04:02 -0700141#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700142 /*
143 * Are softirqs going to be turned on now:
144 */
145 if (softirq_count() == SOFTIRQ_OFFSET)
146 trace_softirqs_on((unsigned long)__builtin_return_address(0));
147 /*
148 * Keep preemption disabled until we are done with
149 * softirq processing:
150 */
151 sub_preempt_count(SOFTIRQ_OFFSET - 1);
152
153 if (unlikely(!in_interrupt() && local_softirq_pending()))
154 do_softirq();
155
156 dec_preempt_count();
Tim Chen3c829c32006-07-30 03:04:02 -0700157#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700158 local_irq_restore(flags);
Tim Chen3c829c32006-07-30 03:04:02 -0700159#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700160 preempt_check_resched();
161}
162EXPORT_SYMBOL(local_bh_enable);
163
164void local_bh_enable_ip(unsigned long ip)
165{
Tim Chen3c829c32006-07-30 03:04:02 -0700166#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700167 unsigned long flags;
168
169 WARN_ON_ONCE(in_irq());
170
171 local_irq_save(flags);
Tim Chen3c829c32006-07-30 03:04:02 -0700172#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700173 /*
174 * Are softirqs going to be turned on now:
175 */
176 if (softirq_count() == SOFTIRQ_OFFSET)
177 trace_softirqs_on(ip);
178 /*
179 * Keep preemption disabled until we are done with
180 * softirq processing:
181 */
182 sub_preempt_count(SOFTIRQ_OFFSET - 1);
183
184 if (unlikely(!in_interrupt() && local_softirq_pending()))
185 do_softirq();
186
187 dec_preempt_count();
Tim Chen3c829c32006-07-30 03:04:02 -0700188#ifdef CONFIG_TRACE_IRQFLAGS
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700189 local_irq_restore(flags);
Tim Chen3c829c32006-07-30 03:04:02 -0700190#endif
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700191 preempt_check_resched();
192}
193EXPORT_SYMBOL(local_bh_enable_ip);
194
195/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
197 * and we fall back to softirqd after that.
198 *
199 * This number has been established via experimentation.
200 * The two things to balance is latency against fairness -
201 * we want to handle softirqs as soon as possible, but they
202 * should not be able to lock up the box.
203 */
204#define MAX_SOFTIRQ_RESTART 10
205
206asmlinkage void __do_softirq(void)
207{
208 struct softirq_action *h;
209 __u32 pending;
210 int max_restart = MAX_SOFTIRQ_RESTART;
211 int cpu;
212
213 pending = local_softirq_pending();
Paul Mackerras829035fd2006-07-03 00:25:40 -0700214 account_system_vtime(current);
215
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700216 __local_bh_disable((unsigned long)__builtin_return_address(0));
217 trace_softirq_enter();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219 cpu = smp_processor_id();
220restart:
221 /* Reset the pending bitmask before enabling irqs */
Andi Kleen3f744782005-09-12 18:49:24 +0200222 set_softirq_pending(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
Andrew Mortonc70f5d62005-07-30 10:22:49 -0700224 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225
226 h = softirq_vec;
227
228 do {
229 if (pending & 1) {
230 h->action(h);
231 rcu_bh_qsctr_inc(cpu);
232 }
233 h++;
234 pending >>= 1;
235 } while (pending);
236
Andrew Mortonc70f5d62005-07-30 10:22:49 -0700237 local_irq_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
239 pending = local_softirq_pending();
240 if (pending && --max_restart)
241 goto restart;
242
243 if (pending)
244 wakeup_softirqd();
245
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700246 trace_softirq_exit();
Paul Mackerras829035fd2006-07-03 00:25:40 -0700247
248 account_system_vtime(current);
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700249 _local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250}
251
252#ifndef __ARCH_HAS_DO_SOFTIRQ
253
254asmlinkage void do_softirq(void)
255{
256 __u32 pending;
257 unsigned long flags;
258
259 if (in_interrupt())
260 return;
261
262 local_irq_save(flags);
263
264 pending = local_softirq_pending();
265
266 if (pending)
267 __do_softirq();
268
269 local_irq_restore(flags);
270}
271
272EXPORT_SYMBOL(do_softirq);
273
274#endif
275
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
277# define invoke_softirq() __do_softirq()
278#else
279# define invoke_softirq() do_softirq()
280#endif
281
282/*
283 * Exit an interrupt context. Process softirqs if needed and possible:
284 */
285void irq_exit(void)
286{
287 account_system_vtime(current);
Ingo Molnarde30a2b2006-07-03 00:24:42 -0700288 trace_hardirq_exit();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 sub_preempt_count(IRQ_EXIT_OFFSET);
290 if (!in_interrupt() && local_softirq_pending())
291 invoke_softirq();
292 preempt_enable_no_resched();
293}
294
295/*
296 * This function must run with irqs disabled!
297 */
298inline fastcall void raise_softirq_irqoff(unsigned int nr)
299{
300 __raise_softirq_irqoff(nr);
301
302 /*
303 * If we're in an interrupt or softirq, we're done
304 * (this also catches softirq-disabled code). We will
305 * actually run the softirq once we return from
306 * the irq or softirq.
307 *
308 * Otherwise we wake up ksoftirqd to make sure we
309 * schedule the softirq soon.
310 */
311 if (!in_interrupt())
312 wakeup_softirqd();
313}
314
315EXPORT_SYMBOL(raise_softirq_irqoff);
316
317void fastcall raise_softirq(unsigned int nr)
318{
319 unsigned long flags;
320
321 local_irq_save(flags);
322 raise_softirq_irqoff(nr);
323 local_irq_restore(flags);
324}
325
326void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
327{
328 softirq_vec[nr].data = data;
329 softirq_vec[nr].action = action;
330}
331
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332/* Tasklets */
333struct tasklet_head
334{
335 struct tasklet_struct *list;
336};
337
338/* Some compilers disobey section attribute on statics when not
339 initialized -- RR */
340static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
341static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
342
343void fastcall __tasklet_schedule(struct tasklet_struct *t)
344{
345 unsigned long flags;
346
347 local_irq_save(flags);
348 t->next = __get_cpu_var(tasklet_vec).list;
349 __get_cpu_var(tasklet_vec).list = t;
350 raise_softirq_irqoff(TASKLET_SOFTIRQ);
351 local_irq_restore(flags);
352}
353
354EXPORT_SYMBOL(__tasklet_schedule);
355
356void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
357{
358 unsigned long flags;
359
360 local_irq_save(flags);
361 t->next = __get_cpu_var(tasklet_hi_vec).list;
362 __get_cpu_var(tasklet_hi_vec).list = t;
363 raise_softirq_irqoff(HI_SOFTIRQ);
364 local_irq_restore(flags);
365}
366
367EXPORT_SYMBOL(__tasklet_hi_schedule);
368
369static void tasklet_action(struct softirq_action *a)
370{
371 struct tasklet_struct *list;
372
373 local_irq_disable();
374 list = __get_cpu_var(tasklet_vec).list;
375 __get_cpu_var(tasklet_vec).list = NULL;
376 local_irq_enable();
377
378 while (list) {
379 struct tasklet_struct *t = list;
380
381 list = list->next;
382
383 if (tasklet_trylock(t)) {
384 if (!atomic_read(&t->count)) {
385 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
386 BUG();
387 t->func(t->data);
388 tasklet_unlock(t);
389 continue;
390 }
391 tasklet_unlock(t);
392 }
393
394 local_irq_disable();
395 t->next = __get_cpu_var(tasklet_vec).list;
396 __get_cpu_var(tasklet_vec).list = t;
397 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
398 local_irq_enable();
399 }
400}
401
402static void tasklet_hi_action(struct softirq_action *a)
403{
404 struct tasklet_struct *list;
405
406 local_irq_disable();
407 list = __get_cpu_var(tasklet_hi_vec).list;
408 __get_cpu_var(tasklet_hi_vec).list = NULL;
409 local_irq_enable();
410
411 while (list) {
412 struct tasklet_struct *t = list;
413
414 list = list->next;
415
416 if (tasklet_trylock(t)) {
417 if (!atomic_read(&t->count)) {
418 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
419 BUG();
420 t->func(t->data);
421 tasklet_unlock(t);
422 continue;
423 }
424 tasklet_unlock(t);
425 }
426
427 local_irq_disable();
428 t->next = __get_cpu_var(tasklet_hi_vec).list;
429 __get_cpu_var(tasklet_hi_vec).list = t;
430 __raise_softirq_irqoff(HI_SOFTIRQ);
431 local_irq_enable();
432 }
433}
434
435
436void tasklet_init(struct tasklet_struct *t,
437 void (*func)(unsigned long), unsigned long data)
438{
439 t->next = NULL;
440 t->state = 0;
441 atomic_set(&t->count, 0);
442 t->func = func;
443 t->data = data;
444}
445
446EXPORT_SYMBOL(tasklet_init);
447
448void tasklet_kill(struct tasklet_struct *t)
449{
450 if (in_interrupt())
451 printk("Attempt to kill tasklet from interrupt\n");
452
453 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
454 do
455 yield();
456 while (test_bit(TASKLET_STATE_SCHED, &t->state));
457 }
458 tasklet_unlock_wait(t);
459 clear_bit(TASKLET_STATE_SCHED, &t->state);
460}
461
462EXPORT_SYMBOL(tasklet_kill);
463
464void __init softirq_init(void)
465{
466 open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
467 open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
468}
469
470static int ksoftirqd(void * __bind_cpu)
471{
472 set_user_nice(current, 19);
473 current->flags |= PF_NOFREEZE;
474
475 set_current_state(TASK_INTERRUPTIBLE);
476
477 while (!kthread_should_stop()) {
478 preempt_disable();
479 if (!local_softirq_pending()) {
480 preempt_enable_no_resched();
481 schedule();
482 preempt_disable();
483 }
484
485 __set_current_state(TASK_RUNNING);
486
487 while (local_softirq_pending()) {
488 /* Preempt disable stops cpu going offline.
489 If already offline, we'll be on wrong CPU:
490 don't process */
491 if (cpu_is_offline((long)__bind_cpu))
492 goto wait_to_die;
493 do_softirq();
494 preempt_enable_no_resched();
495 cond_resched();
496 preempt_disable();
497 }
498 preempt_enable();
499 set_current_state(TASK_INTERRUPTIBLE);
500 }
501 __set_current_state(TASK_RUNNING);
502 return 0;
503
504wait_to_die:
505 preempt_enable();
506 /* Wait for kthread_stop */
507 set_current_state(TASK_INTERRUPTIBLE);
508 while (!kthread_should_stop()) {
509 schedule();
510 set_current_state(TASK_INTERRUPTIBLE);
511 }
512 __set_current_state(TASK_RUNNING);
513 return 0;
514}
515
516#ifdef CONFIG_HOTPLUG_CPU
517/*
518 * tasklet_kill_immediate is called to remove a tasklet which can already be
519 * scheduled for execution on @cpu.
520 *
521 * Unlike tasklet_kill, this function removes the tasklet
522 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
523 *
524 * When this function is called, @cpu must be in the CPU_DEAD state.
525 */
526void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
527{
528 struct tasklet_struct **i;
529
530 BUG_ON(cpu_online(cpu));
531 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
532
533 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
534 return;
535
536 /* CPU is dead, so no lock needed. */
537 for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
538 if (*i == t) {
539 *i = t->next;
540 return;
541 }
542 }
543 BUG();
544}
545
546static void takeover_tasklets(unsigned int cpu)
547{
548 struct tasklet_struct **i;
549
550 /* CPU is dead, so no lock needed. */
551 local_irq_disable();
552
553 /* Find end, append list for that CPU. */
554 for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
555 *i = per_cpu(tasklet_vec, cpu).list;
556 per_cpu(tasklet_vec, cpu).list = NULL;
557 raise_softirq_irqoff(TASKLET_SOFTIRQ);
558
559 for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
560 *i = per_cpu(tasklet_hi_vec, cpu).list;
561 per_cpu(tasklet_hi_vec, cpu).list = NULL;
562 raise_softirq_irqoff(HI_SOFTIRQ);
563
564 local_irq_enable();
565}
566#endif /* CONFIG_HOTPLUG_CPU */
567
Chandra Seetharaman8c78f302006-07-30 03:03:35 -0700568static int __cpuinit cpu_callback(struct notifier_block *nfb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 unsigned long action,
570 void *hcpu)
571{
572 int hotcpu = (unsigned long)hcpu;
573 struct task_struct *p;
574
575 switch (action) {
576 case CPU_UP_PREPARE:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
578 if (IS_ERR(p)) {
579 printk("ksoftirqd for %i failed\n", hotcpu);
580 return NOTIFY_BAD;
581 }
582 kthread_bind(p, hotcpu);
583 per_cpu(ksoftirqd, hotcpu) = p;
584 break;
585 case CPU_ONLINE:
586 wake_up_process(per_cpu(ksoftirqd, hotcpu));
587 break;
588#ifdef CONFIG_HOTPLUG_CPU
589 case CPU_UP_CANCELED:
Heiko Carstensfc75cdf2006-06-25 05:49:10 -0700590 if (!per_cpu(ksoftirqd, hotcpu))
591 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 /* Unbind so it can run. Fall thru. */
Heiko Carstensa4c4af72005-11-07 00:58:38 -0800593 kthread_bind(per_cpu(ksoftirqd, hotcpu),
594 any_online_cpu(cpu_online_map));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 case CPU_DEAD:
596 p = per_cpu(ksoftirqd, hotcpu);
597 per_cpu(ksoftirqd, hotcpu) = NULL;
598 kthread_stop(p);
599 takeover_tasklets(hotcpu);
600 break;
601#endif /* CONFIG_HOTPLUG_CPU */
602 }
603 return NOTIFY_OK;
604}
605
Chandra Seetharaman8c78f302006-07-30 03:03:35 -0700606static struct notifier_block __cpuinitdata cpu_nfb = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 .notifier_call = cpu_callback
608};
609
610__init int spawn_ksoftirqd(void)
611{
612 void *cpu = (void *)(long)smp_processor_id();
Akinobu Mita07dccf32006-09-29 02:00:22 -0700613 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
614
615 BUG_ON(err == NOTIFY_BAD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
617 register_cpu_notifier(&cpu_nfb);
618 return 0;
619}
Andrew Morton78eef012006-03-22 00:08:16 -0800620
621#ifdef CONFIG_SMP
622/*
623 * Call a function on all processors
624 */
625int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
626{
627 int ret = 0;
628
629 preempt_disable();
630 ret = smp_call_function(func, info, retry, wait);
631 local_irq_disable();
632 func(info);
633 local_irq_enable();
634 preempt_enable();
635 return ret;
636}
637EXPORT_SYMBOL(on_each_cpu);
638#endif