自学内容网 自学内容网

连续调用usleep_range导致的lockup

现象

crng_finalize_init后出现死锁

[    3.610168] random: crng init done
[   13.615447] suspected lockup task = kworker/0:3, pid: 42
[   13.620788] pc = 0x0
[   23.615447] suspected lockup task = kworker/0:3, pid: 42
[   23.620788] pc = 0x0
[   33.615447] suspected lockup task = kworker/0:3, pid: 42
[   33.620788] pc = 0x0
[   43.615447] suspected lockup task = kworker/0:3, pid: 42
[   43.620788] pc = 0x0

分析

最后一句打印的地方,添加栈回溯看什么触发的

--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -829,6 +829,7 @@ static void crng_finalize_init(struct crng_state *crng)
        wake_up_interruptible(&crng_init_wait);
        kill_fasync(&fasync, SIGIO, POLL_IN);
        pr_notice("crng init done\n");
+       dump_stack();
        if (unseeded_warning.missed) {
                pr_notice("random: %d get_random_xx warning(s) missed "
                                "due to ratelimiting\n",

也即初始化的时候,发送mmc的request的时候,触发了中断

[    3.636016] Workqueue: events_freezable mmc_rescan
[    3.640838] [<c0013245>] (unwind_backtrace) from [<c00109b7>] (show_stack+0xb/0xc)
[    3.648468] [<c00109b7>] (show_stack) from [<c0208e49>] (crng_finalize_init.constprop.18+0xc9/0xfc)
[    3.657592] [<c0208e49>] (crng_finalize_init.constprop.18) from [<c0209285>] (crng_reseed.constprop.14+0xe9/0x174)
[    3.668029] [<c0209285>] (crng_reseed.constprop.14) from [<c020981b>] (add_interrupt_randomness+0x10b/0x160)
[    3.677917] [<c020981b>] (add_interrupt_randomness) from [<c004e5e7>] (handle_irq_event_percpu+0x1b/0x3c)
[    3.687561] [<c004e5e7>] (handle_irq_event_percpu) from [<c004e62f>] (handle_irq_event+0x27/0x3c)
[    3.696502] [<c004e62f>] (handle_irq_event) from [<c0050ae3>] (handle_level_irq+0x67/0xb8)
[    3.704833] [<c0050ae3>] (handle_level_irq) from [<c004dea3>] (generic_handle_irq+0x17/0x20)
[    3.713317] [<c004dea3>] (generic_handle_irq) from [<c004e0c9>] (__handle_domain_irq+0x35/0x70)
[    3.722076] [<c004e0c9>] (__handle_domain_irq) from [<c0009ad3>] (__irq_svc+0x53/0x94)
[    3.730072] Exception stack(0xc5de5d00 to 0xc5de5d48)
[    3.735137] 5d00: 00008001 ffff8c9d ffffff9c 00000000 c5c3d000 c066ac40 00000001 c5c3d810
[    3.743377] 5d20: c5c3d640 00000013 00000000 c5de5dc0 10624dd3 c5de5d50 c042a2b9 c02dbc96
[    3.751617] 5d40: a0000033 ffffffff
[    3.755126] [<c0009ad3>] (__irq_svc) from [<c02dbc96>] (sdhci_request+0xd2/0x270)
[    3.762664] [<c02dbc96>] (sdhci_request) from [<c02c97bb>] (mmc_start_request+0x5f/0x7c)
[    3.770812] [<c02c97bb>] (mmc_start_request) from [<c02c990d>] (mmc_wait_for_req+0x45/0x8c)
[    3.779235] [<c02c990d>] (mmc_wait_for_req) from [<c02d05a3>] (mmc_app_send_scr+0x8f/0xb8)
[    3.787567] [<c02d05a3>] (mmc_app_send_scr) from [<c02cfbfb>] (mmc_sd_setup_card+0x2f/0x250)
[    3.796051] [<c02cfbfb>] (mmc_sd_setup_card) from [<c02cfec9>] (mmc_sd_init_card+0xad/0x2fc)
[    3.804565] [<c02cfec9>] (mmc_sd_init_card) from [<c02d023f>] (mmc_attach_sd+0x77/0xf4)
[    3.812622] [<c02d023f>] (mmc_attach_sd) from [<c02caba5>] (mmc_rescan+0x21d/0x290)
[    3.820312] [<c02caba5>] (mmc_rescan) from [<c0034293>] (process_one_work+0x127/0x2e4)
[    3.828308] [<c0034293>] (process_one_work) from [<c003454b>] (worker_thread+0xfb/0x3e4)
[    3.836456] [<c003454b>] (worker_thread) from [<c0037a01>] (kthread+0x105/0x108)
[    3.843902] [<c0037a01>] (kthread) from [<c00090e9>] (ret_from_fork+0x11/0x28)
[    3.851165] Exception stack(0xc5de5fb0 to 0xc5de5ff8)
[    3.856262] 5fa0:                                     00000000 00000000 00000000 00000000
[    3.864471] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[    3.872711] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000
[   13.875732] suspected lockup task = kworker/0:2, pid: 39
[   13.881072] pc = 0x0
[   23.875732] suspected lockup task = kworker/0:2, pid: 39
[   23.881072] pc = 0x0

添加打印,看看触发了什么中断,发现是不同的中断触发的,那就不是某个中断处理引发的问题

--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -664,7 +664,7 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
        struct pt_regs *old_regs = set_irq_regs(regs);
        unsigned int irq = hwirq;
        int ret = 0;
-
+       printk("__handle_domain_irq irq = %d", irq);
        irq_enter();

add_interrupt_randomness

在中断处理中,add_interrupt_randomness()函数利用设备两次中断的间隔时间作为噪声源将随机数据加入熵池。这个产生的随机值,可供usleep_range使用

handle_irq

分析下这个中断随机值的产生流程

int generic_handle_irq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);

if (!desc)
return -EINVAL;
generic_handle_irq_desc(desc);
return 0;
}

static inline void generic_handle_irq_desc(struct irq_desc *desc)
{
desc->handle_irq(desc);
}

irq_set_chip_and_handler

中断控制器通过irq_set_chip_and_handler将上面的desc->handle_irq(desc)赋值为handle_level_irq;类似的处理看《这篇

static int mmp_irq_domain_map(struct irq_domain *d, unsigned int irq,
                              irq_hw_number_t hw)
{
        irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq);
        return 0;
}

//
void handle_level_irq(struct irq_desc *desc)
{
raw_spin_lock(&desc->lock);
mask_ack_irq(desc);

if (!irq_may_run(desc))
goto out_unlock;

desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

/*
 * If its disabled or no action available
 * keep it masked and get out of here
 */
if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
desc->istate |= IRQS_PENDING;
goto out_unlock;
}

kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);

cond_unmask_irq(desc);

out_unlock:
raw_spin_unlock(&desc->lock);
}

//
irqreturn_t handle_irq_event(struct irq_desc *desc)
{
irqreturn_t ret;

desc->istate &= ~IRQS_PENDING;
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock(&desc->lock);

ret = handle_irq_event_percpu(desc);

raw_spin_lock(&desc->lock);
irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
return ret;
}

handle_irq_event_percpu

__handle_irq_event_percpu执行中断处理函数,这个add_interrupt_randomness函数利用设备两次中断的间隔时间作为噪声源将随机数据加入熵池,供产生随机数使用

irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval;
unsigned int flags = 0;

retval = __handle_irq_event_percpu(desc, &flags);

add_interrupt_randomness(desc->irq_data.irq, flags);

if (!noirqdebug)
note_interrupt(desc, retval);
return retval;
}

action->handler执行request_irq注册的中断处理函数handler

irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags)
{
irqreturn_t retval = IRQ_NONE;
unsigned int irq = desc->irq_data.irq;
struct irqaction *action;

record_irq_time(desc);

for_each_action_of_desc(desc, action) {
irqreturn_t res;

trace_irq_handler_entry(irq, action);
res = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, res);

if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",
      irq, action->handler))
local_irq_disable();

switch (res) {
case IRQ_WAKE_THREAD:
/*
 * Catch drivers which return WAKE_THREAD but
 * did not set up a thread function
 */
if (unlikely(!action->thread_fn)) {
warn_no_thread(irq, action);
break;
}

__irq_wake_thread(desc, action);

/* Fall through - to add to randomness */
case IRQ_HANDLED:
*flags |= action->flags;
break;

default:
break;
}

retval |= res;
}

return retval;
}

结论

正常的打印,后面该接着打印pcie相关的信息,最终发现是pcie主机驱动里,加复位操作的时候,连续调用usleep_range引起的死锁


原文地址:https://blog.csdn.net/weixin_44586903/article/details/140695931

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!