本文聚焦于sysfs Inside Tree的另一个层次的问题:对总线设备的描述和管理。对设备的管理,主要集中于sysfs Inside Tree中的5个概念:device,bus,driver,class和interface,本文也围绕这几个概念展开。
在硬件系统中,计算机通过形形色色的总线将设备组织成一个树状结构,这棵树的根就是CPU核。在软件的世界,同样有一棵树用于描述硬件信息。很容易想到,软件拓扑要严格的对应硬件结构才会清晰易懂,但是易懂并不是设备管理的目标(至少不是唯一的目标),所以实际的Device Driver子系统在此基础上充分发挥了代码重用,面向对象,合理的数据结构等设计方式,使整个Device Driver子系统既保持了与硬件变化相适配的软件数据结构,又提供了高效的管理与驱动方式。

当我们说一个总线的时候,其硬件实质就是一个”控制器+线”,而这个控制器又是挂载到另外一个总线(Parent)上的,所以如果站在另外一条总线(Parent)上看,这条总线又是一个Device,既然是Device,那么是要有对应的Driver来存储诸如硬件寄存器读写等驱动方法… ,所以,在Device Driver 子系统中,Bus,Device Driver并不是完全独立的,而是你中有我,我中有你的关系。无论是什么关系,显然,他们在内核中都是作为对kobject的再次封装,服从sysfs的组织和管理。
bus_type
对于一种总线(包括但不限于硬件上的总线),内核使用bus_type对其进行封装。从总线控制器的角度,既然所有的设备都是挂接在这条总线上的,那么就要遵守一些公共的协议,比如设备的上下线,设备的固有属性等等,这些公共的部分都被抽象出来通过bus_type管理。
//include/linux/device.h 104 struct bus_type { 105 const char *name; 106 const char *dev_name; 107 struct device *dev_root; 108 struct device_attribute *dev_attrs; /* use dev_groups instead */ 109 const struct attribute_group **bus_groups; 110 const struct attribute_group **dev_groups; 111 const struct attribute_group **drv_groups; 113 int (*match)(struct device *dev, struct device_driver *drv); 114 int (*uevent)(struct device *dev, struct kobj_uevent_env *env); 115 int (*probe)(struct device *dev); 116 int (*remove)(struct device *dev); 117 void (*shutdown)(struct device *dev); 119 int (*online)(struct device *dev); 120 int (*offline)(struct device *dev); 122 int (*suspend)(struct device *dev, pm_message_t state); 123 int (*resume)(struct device *dev); 125 const struct dev_pm_ops *pm; 127 const struct iommu_ops *iommu_ops; 129 struct subsys_private *p; 130 struct lock_class_key lock_key; 131 };
–105–>总线名,
–106–>设备名,用于设备命名的标准化,比如sdb, sdc etc
–107–>设备的根节点,是该总线上所有Device的默认Parent,是总线本身也是一个设备的体现
–108–>总线设备的默认属性
–109–>总线的默认属性组
–110–>总线设备的默认属性组
–111–>总线设备驱动的默认属性组
–113–>总线设备与总线设备驱动的匹配方法,当一个总线设备或总线设备驱动被加载时,回调该方法尝试匹配
–114–>总线设备发生变化时,回调该触发uevent事件
–115–>总线设备和总线设备驱动匹配时,回调该方法将总线设备对象传入总线设备驱动的probe()以初始化
–116–>当一个总线设备被从总线移除时,回调该方法
–117–>系统ShutDown时回调该方法停止总线设备
–119–>回调该方法使总线设备上线
–120–>回调该方法使总线设备下线
–122–>回调该方法使总线设备休眠
–123–>回调该方法唤醒休眠的总线设备
–125–>总线的PowerManagement操作方法集,回调设备驱动的PowerManagement方法
–127–>IOMMU方法集
–129–>总线的私有数据,供总线设备驱动使用
//drivers/base/base.h 28 struct subsys_private { 29 struct kset subsys; 30 struct kset *devices_kset; 31 struct list_head interfaces; 32 struct mutex mutex; 34 struct kset *drivers_kset; 35 struct klist klist_devices; 36 struct klist klist_drivers; 37 struct blocking_notifier_head bus_notifier; 38 unsigned int drivers_autoprobe:1; 39 struct bus_type *bus; 41 struct kset glue_dirs; 42 struct class *class; 43 };
–29–>对应/sys/xxx/下的一个目录,xxx可以是class或bus等sys下一级目录
–30–>对应/sys/xxx/###/devices
–31–>关联的interface
–34–>对应/sys/xxx/###/drivers
–38–>如果为1, 表示总线类型支持驱动自动探测设备
bus_register()
873 int bus_register(struct bus_type *bus) 874 { 875 int retval; 876 struct subsys_private *priv; 877 struct lock_class_key *key = &bus->lock_key; 879 priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL); 883 priv->bus = bus; 884 bus->p = priv; 886 BLOCKING_INIT_NOTIFIER_HEAD(&priv->bus_notifier); 888 retval = kobject_set_name(&priv->subsys.kobj, "%s", bus->name); 892 priv->subsys.kobj.kset = bus_kset; 893 priv->subsys.kobj.ktype = &bus_ktype; 894 priv->drivers_autoprobe = 1; 896 retval = kset_register(&priv->subsys); 900 retval = bus_create_file(bus, &bus_attr_uevent); 904 priv->devices_kset = kset_create_and_add("devices", NULL, &priv->subsys.kobj); 911 priv->drivers_kset = kset_create_and_add("drivers", NULL, &priv->subsys.kobj); 923 retval = add_probe_files(bus); 927 retval = bus_add_groups(bus, bus->bus_groups); 932 return 0; 948 }
–883-884–>建立bus_type和subsys_private的关系
–904–>创建/sys/bus/###/devices
–911–>创建/sys/bus/###/drivers
–923–>创建/sys/bus/###/drivers_probe和/sys/bus/###/drivers_autoprobe。前者是一个只写文件,向其中写入任何值都会导致总线对设备的重新扫描。后者可以读写总线中的autoprobe域
device_type
device是内核对于一个设备的抽象,这里设备不仅包括真实接入系统的KeyBoard等真实设备,还包括主板上的诸多控制器,以及虚拟总线上一个没有对应物理实体的虚拟设备。
//include/linux/device.h 501 struct device_type { 502 const char *name; 503 const struct attribute_group **groups; 504 int (*uevent)(struct device *dev, struct kobj_uevent_env *env); 505 char *(*devnode)(struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid); 507 void (*release)(struct device *dev); 509 const struct dev_pm_ops *pm; 510 }; 513 struct device_attribute { 514 struct attribute attr; 515 ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf); 517 ssize_t (*store)(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); 519 }; 539 #define DEVICE_ATTR(_name, _mode, _show, _store) \ 540 struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store) 556 #define DEVICE_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ 557 struct device_attribute dev_attr_##_name = \ 558 __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
–539-558–>device属性构造宏
//drivers/base/base.h 71 struct device_private { 72 struct klist klist_children; 73 struct klist_node knode_parent; 74 struct klist_node knode_driver; 75 struct klist_node knode_bus; 76 struct list_head deferred_probe; 77 struct device *device; 78 };
//include/linux/device.h 730 struct device { 731 struct device *parent; 733 struct device_private *p; 735 struct kobject kobj; 736 const char *init_name; /* initial name of the device */ 737 const struct device_type *type; 743 struct bus_type *bus; /* type of bus device is on */ 744 struct device_driver *driver; /* which driver has allocated this device */ 746 void *platform_data; /* Platform specific data, device core doesn't touch it */ 748 void *driver_data; /* Driver data, set and get with dev_set/get_drvdata */ 750 struct dev_pm_info power; 751 struct dev_pm_domain *pm_domain; 758 int numa_node; /* NUMA node this device is close to */ 760 u64 *dma_mask; /* dma mask (if dma'able device) */ 761 u64 coherent_dma_mask; 766 unsigned long dma_pfn_offset; 768 struct device_dma_parameters *dma_parms; 770 struct list_head dma_pools; /* dma pools (if dma'ble) */ 772 struct dma_coherent_mem *dma_mem; /* internal for coherent mem override */ 775 struct cma *cma_area; /* contiguous memory area for dma allocations */ 778 /* arch specific additions */ 779 struct dev_archdata archdata; 781 struct device_node *of_node; /* associated device tree node */ 782 struct acpi_dev_node acpi_node; /* associated ACPI device node */ 784 dev_t devt; /* dev_t, creates the sysfs "dev" */ 785 u32 id; /* device instance */ 787 spinlock_t devres_lock; 788 struct list_head devres_head; 790 struct klist_node knode_class; 791 struct class *class; 792 const struct attribute_group **groups; /* optional groups */ 794 void (*release)(struct device *dev); 795 struct iommu_group *iommu_group; 797 bool offline_disabled:1; 798 bool offline:1; 799 };
–731–>设备的父设备,通常是总线控制器
–733–>设备的私有数据
–735–>设备的内嵌kobject
–736–>设备的初始名
–737–>设备类型XXX
–743–>设备依附总线的类型
–744–>驱动该设备的驱动对象
–746–>BSP相关信息
–748–>驱动相关数据
–750–>电源管理相关信息
–751–>PowerManagement相关的回调方法
–758–>在NUMA系统中,离设备较近的NUMA节点
–760–>可DMA设备的DMA掩码
–761–>可DMA设备的coherent DMA掩码
–766–>DMA内存在RAM中间的相对位置
–768–>为IOMMU代码设置段边界
–770–>可DMA设备的DMA池
–772–>用于相干内存覆写XXX
–775–>连续分配的DMA内存空间
–779–>CPU架构相关信息
–781–>关联的”设备树”节点
–782–>关联的ACPI设备节点
–784–>设备号,如果major非0,则根据该编号在sys下创建dev属性文件
–785–>XXX
–788–>设备资源列表
–790–>用于将设备添加到class的node
–791–>设备归属的class
–792–>可选的设备属性组
–793–>相关资源已经释放后,回调该接口释放device对象
–717–>设备所属的IOMMU组
–719–>离线去使能,如果为1,设备不会被offline
–720–>offline标志,回调bus_type->offline()后会将该位置位
device_register()
//drivers/base/core.c 1125 int device_register(struct device *dev) 1126 { 1127 device_initialize(dev); 1128 return device_add(dev); 1129 }
device_register()–>device_initialize()
//drivers/base/core.c 653 void device_initialize(struct device *dev) 654 { 655 dev->kobj.kset = devices_kset; 656 kobject_init(&dev->kobj, &device_ktype); 657 INIT_LIST_HEAD(&dev->dma_pools); 658 mutex_init(&dev->mutex); 659 lockdep_set_novalidate_class(&dev->mutex); 660 spin_lock_init(&dev->devres_lock); 661 INIT_LIST_HEAD(&dev->devres_head); 662 device_pm_init(dev); 663 set_dev_node(dev, -1); 664 }
–655–>device_kset是在__init devices_init()中创建的,对应/sys/devices,该kset中在创建之初就已经封装了device_uevent_ops,可以为dev所用
–656–>kobj_type device_ktype中已经封装了device_release以及dev_sysfs_ops方法
–662–>XXX
–663–>XXX
device_register()–>device_add()
//drivers/base/core.c 963 int device_add(struct device *dev) 964 { 1001 parent = get_device(dev->parent); 1002 kobj = get_device_parent(dev, parent); 1003 if (kobj) 1004 dev->kobj.parent = kobj; 1030 error = bus_add_device(dev); 1036 device_pm_add(dev); 1058 bus_probe_device(dev); 1104 }
–1001-1004–>获取设备的parent
–1020-1033–>填充device实例注册到sysfs
–1030–>根据device->subsys_private->knode_bus将device添加到Bus
–1036–>将device添加到PowerManagement核心
–1058–>探测总线上的设备
device_register()–>device_add()–>get_device_parent()
该函数用来根据device的Parent-Child关系获取kobject的Parent-Child,影响设置的因素有三个:
- device->class
- device->parent
- device->parent->class
而依据device->parent, device在sysfs中有如下位置:
- /sys/devices/…/parent_device/device_name
- /sys/devices/…/parent_device_name/class_name/device_name
- /sys/devices/virtual/class_name/device_name
//drivers/base/core.c 729 static struct kobject *get_device_parent(struct device *dev, struct device *parent) 731 { 732 if (dev->class) { 751 if (parent == NULL) 752 parent_kobj = virtual_device_parent(dev); 753 else if (parent->class && !dev->class->ns_type) 754 return &parent->kobj; 755 else 756 parent_kobj = &parent->kobj; 762 list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry) 763 if (k->parent == parent_kobj) { 764 kobj = kobject_get(k); 765 break; 766 } 777 return k; 778 } 781 if (!parent && dev->bus && dev->bus->dev_root) 782 return &dev->bus->dev_root->kobj; 784 if (parent) 785 return &parent->kobj; 786 return NULL; 787 }
–752–>device->class && device->parent==NULL, for “/sys/devices/vitual/class_name/device_name”
–753–>device->class && parent->class && !dev->class->ns_type , for “/sys/devices/parent_device_name/class_name/device_name”
–756–>device->class , for “/sys/devices/parent_name/device_name“
device_register()–>device_add()–>bus_probe_device()–>device_attach()
bus_probe_device()是匹配的入口函数,其核心在于device_attach()
//drivers/base/dd.c 429 int device_attach(struct device *dev) 430 { 434 if (dev->driver) { 439 ret = device_bind_driver(dev); 440 if (ret == 0) 441 ret = 1; 442 else { 443 dev->driver = NULL; 444 ret = 0; 445 } 446 } else { 447 ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); 448 pm_request_idle(dev); 449 } 452 return ret; 453 }
–434-439–>如果device中指定了driver,则根据其指定的绑定
–446-447–>如果没有指定,则遍历bus,匹配到driver再绑定,匹配的方法是__device_attach()
405 static int __device_attach(struct device_driver *drv, void *data) 406 { 407 struct device *dev = data; 409 if (!driver_match_device(drv, dev)) 410 return 0; 412 return driver_probe_device(drv, dev); 413 }
device_register()–>device_add()–>bus_probe_device()–>device_attach()–>__device_attach()–>driver_match_device()
driver_match_device()是__device_attach()的前部分工作。只有一句return drv->bus->match ? drv->bus->match(dev, drv) : 1;
即回调bus_type->match(dev,drv)。该函数在匹配成功之后返回1, 并对device的相关域赋值。如果没有定义match()回调函数,直接返回1, 认为是总线类型自动匹配了设备。
device_register()–>device_add()–>bus_probe_device()–>device_attach()–>__device_attach()–>driver_probe_device()–>really_probe()
匹配成功之后就要执行really_probe()进行绑定以及探测。
//drivers/base/dd.c 278 static int really_probe(struct device *dev, struct device_driver *drv) 279 { 280 int ret = 0; 288 dev->driver = drv; 295 if (driver_sysfs_add(dev)) { 299 } 301 if (dev->bus->probe) { 302 ret = dev->bus->probe(dev); 305 } else if (drv->probe) { 306 ret = drv->probe(dev); 309 } 311 driver_bound(dev); 312 ret = 1; 347 return ret; 348 }
–288–>完成初步绑定
–295–>在sysfs中加载device->driver
–301-309–>依情况,回调bus->probe()或driver->probe(),
–311–>核心是klist_add_tail(&dev->p->knode_driver, &dev->driver->p->klist_devices);
,即将device和driver剩余的连接件相连。
device_driver
Driver VS Device的关系是1 : n的
//include/linux/device.h 265 struct driver_attribute { 266 struct attribute attr; 267 ssize_t (*show)(struct device_driver *driver, char *buf); 268 ssize_t (*store)(struct device_driver *driver, const char *buf, size_t count); 270 }; 271 272 #define DRIVER_ATTR(_name, _mode, _show, _store) \ 273 struct driver_attribute driver_attr_##_name = __ATTR(_name, _mode, _show, _store) 278 #define DRIVER_ATTR_WO(_name) \ 279 struct driver_attribute driver_attr_##_name = __ATTR_WO(_name)
–265-270–>driver属性结构,VS device_attribute
–272-279–>driver属性构造宏, VS DEVICE_ATTR
//include/linux/device.h 229 struct device_driver { 230 const char *name; 231 struct bus_type *bus; 233 struct module *owner; 234 const char *mod_name; /* used for built-in modules */ 236 bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ 238 const struct of_device_id *of_match_table; 239 const struct acpi_device_id *acpi_match_table; 241 int (*probe) (struct device *dev); 242 int (*remove) (struct device *dev); 243 void (*shutdown) (struct device *dev); 244 int (*suspend) (struct device *dev, pm_message_t state); 245 int (*resume) (struct device *dev); 246 const struct attribute_group **groups; 248 const struct dev_pm_ops *pm; 250 struct driver_private *p; 251 };
–230–>设备驱动名
–231–>设备驱动依附的总线
–232–>模块的owner
–234–>XXX
–236–>通过sysfs去使能bind/unbind
–238–>通过设备树(open firmware)描述设备信息时的匹配表
–239–>ACPI匹配表
–241–>一旦和设备匹配成功,总线的probe方法会以设备device对象为参数回调该方法
–242–>一旦设备从总线移除,总线的remove方法会以设备device对象为参数回调该方法
–243–>系统ShutDown时静默设备回调的方法
–244–>休眠设备时回调的方法
–245–>唤醒休眠设备时回调的方法
–246–>由Driver核心自动创建的默认属性
–248–>设备使用PowerManagement方法集
–250–>驱动的私有方法
driver_register()
147 int driver_register(struct device_driver *drv) 148 { 149 int ret; 150 struct device_driver *other; 160 other = driver_find(drv->name, drv->bus); 167 ret = bus_add_driver(drv); 170 ret = driver_add_groups(drv, drv->groups); 175 kobject_uevent(&drv->p->kobj, KOBJ_ADD); 177 return ret; 178 }
–160–>根据name查找系统中是否已经注册了该driver,如果已经存在,返回EBUSY
–167–>将driver注册到内核
–170–>为driver->driver_private->kobj添加属性组
–175–>构建KOBJ_ADD事件通知用户层
driver_register()–>bus_add_driver()->driver_attach()->__driver_attach()
666 int bus_add_driver(struct device_driver *drv) 667 { 668 struct bus_type *bus; 669 struct driver_private *priv; 672 bus = bus_get(drv->bus); 678 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 683 klist_init(&priv->klist_devices, NULL, NULL); 684 priv->driver = drv; 685 drv->p = priv; 686 priv->kobj.kset = bus->p->drivers_kset; 687 error = kobject_init_and_add(&priv->kobj, &driver_ktype, NULL,"%s", drv->name); 692 klist_add_tail(&priv->knode_bus, &bus->p->klist_drivers); 693 if (drv->bus->p->drivers_autoprobe) { 694 error = driver_attach(drv);} 698 module_add_driver(drv->owner, drv); 700 error = driver_create_file(drv, &driver_attr_uevent); 705 error = driver_add_groups(drv, bus->drv_groups); 712 if (!drv->suppress_bind_attrs) { 713 error = add_bind_files(drv); 719 } 721 return 0; 730 }
–678-685–>构造driver_private对象并与device_driver对象相连
–686–>将bus_type->subsys_private->kset赋值给driver_private的kset
–687–>注册device_driver对象到sysfs
–692–>建立driver_private和bus_type->subsys_private->klist_drivers的联系
–693–>如果driver支持自动匹配,则调用driver_attach()匹配之
–705–>为device_driver添加属性组
–712–>如果允许绑定属性,则调用add_bind_files()在sysfs下创建相应文件
driver_register()–>bus_add_driver()->driver_attach()->__driver_attach()
driver_attach()只有一句return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
即以driver_attach()为方法寻求device用于绑定driver,其实现和device_attach()基本相同XXX(why there’re diff)
456 static int __driver_attach(struct device *dev, void *data) 457 { 458 struct device_driver *drv = data; 470 if (!driver_match_device(drv, dev)) 471 return 0; 476 if (!dev->driver) 477 driver_probe_device(drv, dev); 482 return 0; 483 }
class
一个class实例对应/sys/class/###的一个目录,和所有/sys/class下的目录一样,在用户态看到该目录里都是指向/sys/devices/下某设备的符号链接。
//include/linux/device.h 352 struct class { 353 const char *name; 354 struct module *owner; 355 356 struct class_attribute *class_attrs; 357 const struct attribute_group **dev_groups; 358 struct kobject *dev_kobj; 359 360 int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env); 361 char *(*devnode)(struct device *dev, umode_t *mode); 362 363 void (*class_release)(struct class *class); 364 void (*dev_release)(struct device *dev); 365 366 int (*suspend)(struct device *dev, pm_message_t state); 367 int (*resume)(struct device *dev); 368 369 const struct kobj_ns_type_operations *ns_type; 370 const void *(*namespace)(struct device *dev); 371 372 const struct dev_pm_ops *pm; 373 374 struct subsys_private *p; 375 };
–353–>class名
–353–>class的默认属性及其操作方法
–357–>class的默认属性组
–358–>内嵌kobject
–360–>触发uevent事件时的回调函数
–361–>XXX
–363–>销毁class时回调函数
–364–>销毁device时回调函数
–366–>休眠device时回调函数
–367–>唤醒device时回调函数
–369–>XXX
–370–>XXX
–372–>默认的设备PowerManagement方法集
__class_register()
//drivers/base/class.c 165 int __class_register(struct class *cls, struct lock_class_key *key) 166 { 167 struct subsys_private *cp; 168 int error; 172 cp = kzalloc(sizeof(*cp), GFP_KERNEL); 175 ... 179 error = kobject_set_name(&cp->subsys.kobj, "%s", cls->name); 186 if (!cls->dev_kobj) 187 cls->dev_kobj = sysfs_dev_char_kobj; 194 cp->subsys.kobj.kset = class_kset; 196 cp->subsys.kobj.ktype = &class_ktype; 197 cp->class = cls; 198 cls->p = cp; 199 200 error = kset_register(&cp->subsys); 205 error = add_class_attrs(class_get(cls)); 206 class_put(cls); 207 return error; 208 }
–172-175–>部分初始化subsys_private结构
–179–>根据class->name配置subsys_private的kobject
–186–>XXX
–194-195–>继续初始化cp,
–196–>建立subsys_private和class的联系
–200–>注册subsys_private->kset到内核
–205–>注册class->class_attribute到内核
class_interface
每个类都有一个interface链表,当设备被添加到一个class时,回调用这个interface链表上的所有add_dev(),即便时先将设备添加到class而后注册interface,也会在interface被注册到class的时候对所有归属于该class的设备进行add_dev()。通过这种方式,可以实现设备”热插拔”的效果。
device_add() { if (dev->class) list_for_each_entry(class_intf,&dev->class->p->interfaces, node) if (class_intf->add_dev) class_intf->add_dev(dev, class_intf); }
//include/linux/device.h 468 struct class_interface { 469 struct list_head node; 470 struct class *class; 471 472 int (*add_dev) (struct device *, struct class_interface *); 473 void (*remove_dev) (struct device *, struct class_interface *); 474 };
–469–>链入所属类的接口列表的连接件
–470–>所属类的指针
–472–>在interface被注册到class,或者device被添加到interface所在的class时,调用此函数向interface添加device。参见”drivers/base/core.c/device_add() +1073”
–473–>在interface被注销自class,或者device被从interface所在额class移除时,调用此函数从interface移除device
class_interface_register()
439 int class_interface_register(struct class_interface *class_intf) 440 { 441 struct class *parent; 442 struct class_dev_iter iter; 443 struct device *dev; 448 parent = class_get(class_intf->class); 453 list_add_tail(&class_intf->node, &parent->p->interfaces); 454 if (class_intf->add_dev) { 455 class_dev_iter_init(&iter, parent, NULL, NULL); 456 while ((dev = class_dev_iter_next(&iter))) 457 class_intf->add_dev(dev, class_intf); 458 class_dev_iter_exit(&iter); 459 } 462 return 0; 463 }
–454-459–>如果interface提供了add_dev的方法,则对class下的所有设备调用该方法。