不论请求来自Block层还是SCSI层,当请求完成时的入口只有一个:SCSI设备上报到内核的中断处理函数,请求完成逐级向上传递,直到应用层。和很多的request-response模型一样,IO请求的完成分为以下3类4种:
- 请求完成+请求成功
- 请求完成+请求失败+重试
- 请求完成+请求失败+错误处理
- 请求响应超时
Init
以megasas的PCIe RAID卡为例,其在SCSI子系统中的请求完成的初始化如下,
准备硬件中断handler和tasklet结构到scsi_host_template:
static struct megasas_instance_template megasas_instance_template_gen2 = {
.service_isr = megasas_isr,
.tasklet = megasas_complete_cmd_dpc,
};
注册工作队列和硬件中断handler,当磁盘完成SCSI请求时会上报该中断:
megasas_probe_one()
INIT_WORK(&instance->work_init, process_fw_state_change_wq);
megasas_init_fw()
tasklet_init(&instance->isr_tasklet, instance->instancet->tasklet);
request_irq(service_isr)
系统还为Block层的request的完成注册了softirq:
blk_softirq_init()
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
register_hotcpu_notifier(&blk_cpu_notifier);
做好了准备工作,开始讨论请求完成的这三种情况
Complete
当磁盘完成SCSI请求时会上报中断,内核最终会执行注册的handler,请求完成的传递路径如下所示,经由**硬件中断->work_struct + tasklet->软件中断-(->完成量)**逐层传递。
megasas_isr()
megasas_deplete_reply_queue()
some work in top half
schedule_work(&instance->work_init)
tasklet_schedule(&instance->isr_tasklet)
系统在合适的时机调度到该tasklet,其流程如下:
megasas_complete_cmd_dpc()
megasas_complete_cmd()
cmd->scmd->result = //设置result,在blk_done_softirq()中需要读取执行结果
cmd->scmd->scsi_done(cmd->scmd); //scsi_request_fn()中构造scsi_cmnd时注册为scsi_done()
blk_complete_request()
__blk_complete_request()
raise_softirq_irqoff(BLOCK_SOFTIRQ)
Success
作为该softirq的handler的blk_done_softirq()的流程如下:
blk_done_softirq()
rq->q->softirq_done_fn(rq); //scsi_softirq_done()
disposition = scsi_decide_disposition(cmd)
host_byte(scmd->result) //读取result
case SUCCESS:
scsi_finish_command()
scsi_cmd_to_driver()
drv->done(cmd) //sd_done()
sd_dif_complete() //回收bio内存
scsi_io_completion() //Completion processing for block device I/O requests
scsi_end_request()
blk_update_request()
req_bio_endio()
blk_finish_request()
req->end_io() //blk_end_sync_rq()
complete(waiting) //唤醒队列中发自SCSI层的命令,发自Block层的命令不需要
scsi_release_buffers(cmd);
scsi_free_sgtable() //回收SG数据
scsi_put_command(cmd);
scsi_run_queue()
__scsi_queue_insert()
Failed – retry
blk_done_softirq()
rq->q->softirq_done_fn(rq); //scsi_softirq_done()
disposition = scsi_decide_disposition(cmd)
host_byte(scmd->result) //读取result
case NEEDS_RETRY:
case ADD_TO_MLQUEUE:
scsi_queue_insert()
__scsi_queue_insert()
blk_requeue_request(q, cmd->request) //进入Block层
blk_delete_timer(rq)
blk_clear_rq_complete(rq)
elv_requeue_request(q, rq)
kblockd_schedule_work(&device->requeue_work)
queue_work(kblockd_workqueue, work)
Failed – eh
scsi_host_alloc()
shost->ehandler = kthread_run(scsi_error_handler, shost);
blk_done_softirq()
rq->q->softirq_done_fn(rq); //scsi_softirq_done()
disposition = scsi_decide_disposition(cmd)
host_byte(scmd->result) //读取result
default:
scsi_eh_scmd_add()
scsi_host_set_state()
list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
scsi_eh_wakeup(shost)
wakeup(shost->ehandler)
scsi_finish_command()
scsi_error_handler()
while(true)
shost->transportt->eh_strategy_handler(shost);
scsi_unjam_host(shost);
scsi_eh_get_sense()
list_for_each_entry_safe()
scsi_request_sense(scmd)
scsi_send_eh_cmnd()
scsi_eh_prep_cmnd()
scmd->scsi_done = scsi_eh_done
shost->hostt->queuecommand(shost, scmd)
scsi_eh_restore_cmnd(scmd, &ses)
wait_for_completion_timeout(&done, timeout)
scsi_eh_completed_normally(scmd)
scsi_eh_restore_cmnd(scmd, &ses)
scsi_decide_disposition(scmd)
scsi_eh_finish_cmd(scmd, done_q)
list_move_tail(&scmd->eh_entry, done_q)
scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
scsi_eh_flush_done_q(&eh_done_q);
可以看到,该中断处理函数的工作,需要借助于request_queue中已经注册好回调函数,比如在SCSI子系统中注册的scsi_softirq_done()与sd_done(),此外,参照前文,对于发自SCSI子系统的SCSI命令,其发送线程都会等待完成量waiting的完成,这里,在request的回调函数blk_end_sync_rq()中,该完成量被完成,其线程可以被唤醒,相比之下,发自Block层的SCSI命令就不会等待这个完成量
Timeout
static struct scsi_host_template megasas_template = {
.eh_timed_out = megasas_reset_timer
}
megasas_reset_timer()
instance->host->can_queue = instance->throttlequeuedepth;
instance->last_time = jiffies;
instance->flag |= MEGASAS_FW_BUSY;