Linux SCSI 子系统 V

不论请求来自Block层还是SCSI层,当请求完成时的入口只有一个:SCSI设备上报到内核的中断处理函数,请求完成逐级向上传递,直到应用层。和很多的request-response模型一样,IO请求的完成分为以下3类4种:

  1. 请求完成+请求成功
  2. 请求完成+请求失败+重试
  3. 请求完成+请求失败+错误处理
  4. 请求响应超时

Init

以megasas的PCIe RAID卡为例,其在SCSI子系统中的请求完成的初始化如下,

准备硬件中断handler和tasklet结构到scsi_host_template:

static struct megasas_instance_template megasas_instance_template_gen2 = {
  .service_isr = megasas_isr,     
  .tasklet = megasas_complete_cmd_dpc,
};

注册工作队列和硬件中断handler,当磁盘完成SCSI请求时会上报该中断

megasas_probe_one()
	INIT_WORK(&instance->work_init, process_fw_state_change_wq);
	megasas_init_fw()
		tasklet_init(&instance->isr_tasklet, instance->instancet->tasklet);
	request_irq(service_isr)

系统还为Block层的request的完成注册了softirq:

blk_softirq_init()
	INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
	register_hotcpu_notifier(&blk_cpu_notifier);

做好了准备工作,开始讨论请求完成的这三种情况

Complete

当磁盘完成SCSI请求时会上报中断,内核最终会执行注册的handler,请求完成的传递路径如下所示,经由**硬件中断->work_struct + tasklet->软件中断-(->完成量)**逐层传递。

megasas_isr()
	megasas_deplete_reply_queue()
		some work in top half
		schedule_work(&instance->work_init)
		tasklet_schedule(&instance->isr_tasklet)

系统在合适的时机调度到该tasklet,其流程如下:

megasas_complete_cmd_dpc()
	megasas_complete_cmd()
		cmd->scmd->result =					//设置result,在blk_done_softirq()中需要读取执行结果
		cmd->scmd->scsi_done(cmd->scmd);	//scsi_request_fn()中构造scsi_cmnd时注册为scsi_done()
		blk_complete_request()
			__blk_complete_request()
				raise_softirq_irqoff(BLOCK_SOFTIRQ)

Success

作为该softirq的handler的blk_done_softirq()的流程如下:

blk_done_softirq()
    rq->q->softirq_done_fn(rq);     //scsi_softirq_done()
    	disposition = scsi_decide_disposition(cmd)
    		host_byte(scmd->result)			//读取result
    	case SUCCESS:
        scsi_finish_command()
            scsi_cmd_to_driver()
            drv->done(cmd)          //sd_done()
            	sd_dif_complete()	//回收bio内存
            scsi_io_completion()    //Completion processing for block device I/O requests
            scsi_end_request()
                blk_update_request()
              		req_bio_endio()
                blk_finish_request()
                    req->end_io()   		//blk_end_sync_rq()
                        complete(waiting)	//唤醒队列中发自SCSI层的命令,发自Block层的命令不需要
          		scsi_release_buffers(cmd);
          			scsi_free_sgtable()		//回收SG数据
          		scsi_put_command(cmd);
                scsi_run_queue()
            __scsi_queue_insert()

Failed – retry

blk_done_softirq()
    rq->q->softirq_done_fn(rq);     //scsi_softirq_done()
        disposition = scsi_decide_disposition(cmd)
    		host_byte(scmd->result)			//读取result
    	case NEEDS_RETRY:
    	case ADD_TO_MLQUEUE:
    		scsi_queue_insert()
    			__scsi_queue_insert()
    			blk_requeue_request(q, cmd->request)	//进入Block层
    				blk_delete_timer(rq)
    				blk_clear_rq_complete(rq)
    				elv_requeue_request(q, rq)
    			kblockd_schedule_work(&device->requeue_work)
    				queue_work(kblockd_workqueue, work)

Failed – eh

scsi_host_alloc()    
	shost->ehandler = kthread_run(scsi_error_handler, shost);
blk_done_softirq()
    rq->q->softirq_done_fn(rq);     //scsi_softirq_done()
        disposition = scsi_decide_disposition(cmd)
    		host_byte(scmd->result)			//读取result
    	default:
        	scsi_eh_scmd_add()
        		scsi_host_set_state()
        		list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
        		scsi_eh_wakeup(shost)
        			wakeup(shost->ehandler)
        	scsi_finish_command()
scsi_error_handler()
	while(true)
	shost->transportt->eh_strategy_handler(shost);
	scsi_unjam_host(shost);
		scsi_eh_get_sense()
			list_for_each_entry_safe()
			scsi_request_sense(scmd)
				scsi_send_eh_cmnd()
					scsi_eh_prep_cmnd()
					scmd->scsi_done = scsi_eh_done
					shost->hostt->queuecommand(shost, scmd)
					scsi_eh_restore_cmnd(scmd, &ses)
					wait_for_completion_timeout(&done, timeout)
					scsi_eh_completed_normally(scmd)
					scsi_eh_restore_cmnd(scmd, &ses)
			scsi_decide_disposition(scmd)
			scsi_eh_finish_cmd(scmd, done_q)
				list_move_tail(&scmd->eh_entry, done_q)
		scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
		scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
		scsi_eh_flush_done_q(&eh_done_q);

可以看到,该中断处理函数的工作,需要借助于request_queue中已经注册好回调函数,比如在SCSI子系统中注册的scsi_softirq_done()与sd_done(),此外,参照前文,对于发自SCSI子系统的SCSI命令,其发送线程都会等待完成量waiting的完成,这里,在request的回调函数blk_end_sync_rq()中,该完成量被完成,其线程可以被唤醒,相比之下,发自Block层的SCSI命令就不会等待这个完成量

Timeout

static struct scsi_host_template megasas_template = {
	.eh_timed_out = megasas_reset_timer
}
megasas_reset_timer()	
	instance->host->can_queue = instance->throttlequeuedepth;
	instance->last_time = jiffies;
	instance->flag |= MEGASAS_FW_BUSY;

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.