这一篇主要是学习网络设备驱动框架性,具体的实例分析可以参考Linux 驱动框架---dm9000分析 。Linux 对于网络设备的驱动的定义分了四层分别是网络接口层对上是IP,ARP等网络协议,因为网络协议还是相对复杂且不会变动特别大肯定是由内核来实现;网络设备接口层实际上就是对网络设备操作的封装,封装成一个结构体由驱动工程师来填充内容从而做到抽象;设备驱动功能层其实就是网络设备接口层封装好的接口的具体实现以操作硬件设备完成指定动作的软件部分;网络设备与媒介层(MAC和PHY硬件部分)。网络协议接口层就是对上提供的发送和接收接口。发送接口接受上层协议下发的应用数据(已经使用struct sk_buff 数据结构封装)然后调用设备接口层驱动硬件完成数据发送;其次是数据接收其主要是在由物理层接收完数据后同样用struct sk_buff 结构封装后交给网络协议层的接口。驱动框架的简单的层级结构如下:

Linux 驱动框架---net驱动框架

 网络协议接口层对网络层提供两个了主要接口用于数据的发送(int dev_queue_xmit(struct sk_buff* skb))和接收(int netif_rx(struct sk_buff* skb))。

数据发送

数据发送接口由应用程序和协议栈主动在内核空间调用,这一部分偏内核部分(内核实现)的就是操作net_device 上的一个queue将数据包放进去,最后进行合适的调度调用网络设备接口层进而进入设备驱动功能层完成数据包的发送。

int dev_queue_xmit(struct sk_buff *skb)
{
    return __dev_queue_xmit(skb, NULL);
}
=======》
static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) { struct net_device *dev = skb->dev; struct netdev_queue *txq; struct Qdisc *q; int rc = -ENOMEM; skb_reset_mac_header(skb); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); skb_update_prio(skb); txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); #endif trace_net_dev_queue(skb); if (q->enqueue) { rc = __dev_xmit_skb(skb, q, dev, txq); goto out; } /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... Really, it is unlikely that netif_tx_lock protection is necessary here. (f.e. loopback and IP tunnels are clean ignoring statistics counters.) However, it is possible, that they rely on protection made by us here. Check this and shot the lock. It is not prone from deadlocks. Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ if (txq->xmit_lock_owner != cpu) { if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) goto recursion_alert; HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { __this_cpu_inc(xmit_recursion); rc = dev_hard_start_xmit(skb, dev, txq); __this_cpu_dec(xmit_recursion); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; } } HARD_TX_UNLOCK(dev, txq); net_crit_ratelimited("Virtual device %s asks to queue packet!\n", dev->name); } else { /* Recursion is detected! It is possible, * unfortunately */ recursion_alert: net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", dev->name); } } rc = -ENETDOWN; rcu_read_unlock_bh(); atomic_long_inc(&dev->tx_dropped); kfree_skb(skb); return rc; out: rcu_read_unlock_bh(); return rc; }

数据接收

数据接收接口也是内核部分代码由内核实现,是内核留给驱动层接口上报接收到数据的接口路径,网络设备驱动在中断或轮询中收到数据包后需要打包一个socket数据包(struct sk_buff)然后通过内核数据接收接口上报给内核上层。

int netif_rx(struct sk_buff *skb)
{
    trace_netif_rx_entry(skb);

    return netif_rx_internal(skb);
}
static int netif_rx_internal(struct sk_buff *skb)
{
    int ret;

    net_timestamp_check(netdev_tstamp_prequeue, skb);

    trace_netif_rx(skb);
#ifdef CONFIG_RPS
    if (static_key_false(&rps_needed)) {
        struct rps_dev_flow voidflow, *rflow = &voidflow;
        int cpu;

        preempt_disable();
        rcu_read_lock();

        cpu = get_rps_cpu(skb->dev, skb, &rflow);
        if (cpu < 0)
            cpu = smp_processor_id();

        ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

        rcu_read_unlock();
        preempt_enable();
    } else
#endif
    {
        unsigned int qtail;
        ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
        put_cpu();
    }
    return ret;
}

由上面的代码可以看出来 struct sk_buff 在网络驱动中是一个非常重要的数据结构。网络数据的整个传输过程就是对这个数据结构的访问修改读取的过程,所以来简单了解一下这个数据结构

struct sk_buff {
    /* These two members must be first. */
    struct sk_buff        *next;
    struct sk_buff        *prev;

    union {
        ktime_t        tstamp;
        struct skb_mstamp skb_mstamp;
    };

    struct sock        *sk;
    struct net_device    *dev;

    /*
     * This is the control buffer. It is free to use for every
     * layer. Please put your private variables there. If you
     * want to keep them across layers you have to do a skb_clone()
     * first. This is owned by whoever has the skb queued ATM.
     */
    char            cb[48] __aligned(8);

    unsigned long        _skb_refdst;
#ifdef CONFIG_XFRM
    struct    sec_path    *sp;
#endif
    unsigned int        len,
                data_len;
    __u16            mac_len,
                hdr_len;
    union {
        __wsum        csum;
        struct {
            __u16    csum_start;
            __u16    csum_offset;
        };
    };
    __u32            priority;
    kmemcheck_bitfield_begin(flags1);
    __u8            ignore_df:1,
                cloned:1,
                ip_summed:2,
                nohdr:1,
                nfctinfo:3;
    __u8            pkt_type:3,
                fclone:2,
                ipvs_property:1,
                peeked:1,
                nf_trace:1;
    kmemcheck_bitfield_end(flags1);
    __be16            protocol;

    void            (*destructor)(struct sk_buff *skb);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
    struct nf_conntrack    *nfct;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
    struct nf_bridge_info    *nf_bridge;
#endif

    int            skb_iif;

    __u32            hash;

    __be16            vlan_proto;
    __u16            vlan_tci;

#ifdef CONFIG_NET_SCHED
    __u16            tc_index;    /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
    __u16            tc_verd;    /* traffic control verdict */
#endif
#endif

    __u16            queue_mapping;
    kmemcheck_bitfield_begin(flags2);
#ifdef CONFIG_IPV6_NDISC_NODETYPE
    __u8            ndisc_nodetype:2;
#endif
    __u8            pfmemalloc:1;
    __u8            ooo_okay:1;
    __u8            l4_hash:1;
    __u8            wifi_acked_valid:1;
    __u8            wifi_acked:1;
    __u8            no_fcs:1;
    __u8            head_frag:1;
    /* Encapsulation protocol and NIC drivers should use
     * this flag to indicate to each other if the skb contains
     * encapsulated packet or not and maybe use the inner packet
     * headers if needed
     */
    __u8            encapsulation:1;
    __u8            encap_hdr_csum:1;
    __u8            csum_valid:1;
    __u8            csum_complete_sw:1;
    /* 3/5 bit hole (depending on ndisc_nodetype presence) */
    kmemcheck_bitfield_end(flags2);

#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
    union {
        unsigned int    napi_id;
        dma_cookie_t    dma_cookie;
    };
#endif
#ifdef CONFIG_NETWORK_SECMARK
    __u32            secmark;
#endif
    union {
        __u32        mark;
        __u32        dropcount;
        __u32        reserved_tailroom;
    };

    __be16            inner_protocol;
    __u16            inner_transport_header;
    __u16            inner_network_header;
    __u16            inner_mac_header;
    __u16            transport_header;
    __u16            network_header;
    __u16            mac_header;
    /* These elements must be at the end, see alloc_skb() for details.  */
    sk_buff_data_t        tail;
    sk_buff_data_t        end;
    unsigned char        *head,
                *data;
    unsigned int        truesize;
    atomic_t        users;
};

成员还是比较多的但是内核提供了对应的接口函数来直观的操作他,其中数据相关重要的成员有四个head、data、tail、end对这个结构的关系如下图

 Linux 驱动框架---net驱动框架

 

 这里只是框架性的记录了网络驱动的分层相关的内容具体的细节实现参考实例分析,除此之外与之相关的驱动接口有如下几个。

申请套接字缓冲区

struct sk_buff* alloc_sbk(unsigned int len,gfp_t priority)
struct sk_buff* dev_alloc_skb(unsigned int len) 

其中len为数据缓冲区的大小,对应释放的接口有

void kfree_skb(struct sk_buff* skb);
void dev_kfree_skb(unsigned int len); 
void dev_kfree_skb_irq(unsigned int len);
//其实就是在上面两种接口的封装增加了是否为中断中的判断。
void dev_kfree_skb_any(unsigned int len);

数据包操作

//skb->tail向后移动len,skb->len增加len
unsigned char *skb_put(struct sk_buff* skb,unsigned int len);
//skb->data向前移动len,skb->len增加len
unsigned char *skb_push(struct sk_buff* skb,unsigned int len);
//skb->data向后移动len,skb->len减少len
unsigned char *skb_pull(struct sk_buff* skb,unsigned int len);
//skb->tail和skb->data同时向后移动len
unsigned char *skb_reserve(struct sk_buff* skb,unsigned int len);

上面这些接口会在驱动中调用具体驱动分析时在详细学习。

网络设备接口

内核抽象了网络设备接口的封装但是这个结构十分庞大且复杂其中涵盖统计属性、配置等定义如下:

  1 struct net_device {
  2 
  3     /*
  4      * This is the first field of the "visible" part of this structure
  5      * (i.e. as seen by users in the "Space.c" file).  It is the name
  6      * of the interface.
  7      */
  8     char            name[IFNAMSIZ];
  9 
 10     /* device name hash chain, please keep it close to name[] */
 11     struct hlist_node    name_hlist;
 12 
 13     /* snmp alias */
 14     char             *ifalias;
 15 
 16     /*
 17      *    I/O specific fields
 18      *    FIXME: Merge these and struct ifmap into one
 19      */
 20     unsigned long        mem_end;    /* shared mem end    */
 21     unsigned long        mem_start;    /* shared mem start    */
 22     unsigned long        base_addr;    /* device I/O address    */
 23     int            irq;        /* device IRQ number    */
 24 
 25     /*
 26      *    Some hardware also needs these fields, but they are not
 27      *    part of the usual set specified in Space.c.
 28      */
 29 
 30     unsigned long        state;
 31 
 32     struct list_head    dev_list;
 33     struct list_head    napi_list;
 34     struct list_head    unreg_list;
 35     struct list_head    close_list;
 36 
 37     /* directly linked devices, like slaves for bonding */
 38     struct {
 39         struct list_head upper;
 40         struct list_head lower;
 41     } adj_list;
 42 
 43     /* all linked devices, *including* neighbours */
 44     struct {
 45         struct list_head upper;
 46         struct list_head lower;
 47     } all_adj_list;
 48 
 49 
 50     /* currently active device features */
 51     netdev_features_t    features;
 52     /* user-changeable features */
 53     netdev_features_t    hw_features;
 54     /* user-requested features */
 55     netdev_features_t    wanted_features;
 56     /* mask of features inheritable by VLAN devices */
 57     netdev_features_t    vlan_features;
 58     /* mask of features inherited by encapsulating devices
 59      * This field indicates what encapsulation offloads
 60      * the hardware is capable of doing, and drivers will
 61      * need to set them appropriately.
 62      */
 63     netdev_features_t    hw_enc_features;
 64     /* mask of fetures inheritable by MPLS */
 65     netdev_features_t    mpls_features;
 66 
 67     /* Interface index. Unique device identifier    */
 68     int            ifindex;
 69     int            iflink;
 70 
 71     struct net_device_stats    stats;
 72 
 73     /* dropped packets by core network, Do not use this in drivers */
 74     atomic_long_t        rx_dropped;
 75     atomic_long_t        tx_dropped;
 76 
 77     /* Stats to monitor carrier on<->off transitions */
 78     atomic_t        carrier_changes;
 79 
 80 #ifdef CONFIG_WIRELESS_EXT
 81     /* List of functions to handle Wireless Extensions (instead of ioctl).
 82      * See <net/iw_handler.h> for details. Jean II */
 83     const struct iw_handler_def *    wireless_handlers;
 84     /* Instance data managed by the core of Wireless Extensions. */
 85     struct iw_public_data *    wireless_data;
 86 #endif
 87     /* Management operations */
 88     const struct net_device_ops *netdev_ops;
 89     const struct ethtool_ops *ethtool_ops;
 90     const struct forwarding_accel_ops *fwd_ops;
 91 
 92     /* Hardware header description */
 93     const struct header_ops *header_ops;
 94 
 95     unsigned int        flags;    /* interface flags (a la BSD)    */
 96     unsigned int        priv_flags; /* Like 'flags' but invisible to userspace.
 97                          * See if.h for definitions. */
 98     unsigned short        gflags;
 99     unsigned short        padded;    /* How much padding added by alloc_netdev() */
100 
101     unsigned char        operstate; /* RFC2863 operstate */
102     unsigned char        link_mode; /* mapping policy to operstate */
103 
104     unsigned char        if_port;    /* Selectable AUI, TP,..*/
105     unsigned char        dma;        /* DMA channel        */
106 
107     unsigned int        mtu;    /* interface MTU value        */
108     unsigned short        type;    /* interface hardware type    */
109     unsigned short        hard_header_len;    /* hardware hdr length    */
110 
111     /* extra head- and tailroom the hardware may need, but not in all cases
112      * can this be guaranteed, especially tailroom. Some cases also use
113      * LL_MAX_HEADER instead to allocate the skb.
114      */
115     unsigned short        needed_headroom;
116     unsigned short        needed_tailroom;
117 
118     /* Interface address info. */
119     unsigned char        perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
120     unsigned char        addr_assign_type; /* hw address assignment type */
121     unsigned char        addr_len;    /* hardware address length    */
122     unsigned short        neigh_priv_len;
123     unsigned short          dev_id;        /* Used to differentiate devices
124                          * that share the same link
125                          * layer address
126                          */
127     unsigned short          dev_port;    /* Used to differentiate
128                          * devices that share the same
129                          * function
130                          */
131     spinlock_t        addr_list_lock;
132     struct netdev_hw_addr_list    uc;    /* Unicast mac addresses */
133     struct netdev_hw_addr_list    mc;    /* Multicast mac addresses */
134     struct netdev_hw_addr_list    dev_addrs; /* list of device
135                             * hw addresses
136                             */
137 #ifdef CONFIG_SYSFS
138     struct kset        *queues_kset;
139 #endif
140 
141     bool            uc_promisc;
142     unsigned int        promiscuity;
143     unsigned int        allmulti;
144 
145 
146     /* Protocol specific pointers */
147 
148 #if IS_ENABLED(CONFIG_VLAN_8021Q)
149     struct vlan_info __rcu    *vlan_info;    /* VLAN info */
150 #endif
151 #if IS_ENABLED(CONFIG_NET_DSA)
152     struct dsa_switch_tree    *dsa_ptr;    /* dsa specific data */
153 #endif
154 #if IS_ENABLED(CONFIG_TIPC)
155     struct tipc_bearer __rcu *tipc_ptr;    /* TIPC specific data */
156 #endif
157     void             *atalk_ptr;    /* AppleTalk link     */
158     struct in_device __rcu    *ip_ptr;    /* IPv4 specific data    */
159     struct dn_dev __rcu     *dn_ptr;        /* DECnet specific data */
160     struct inet6_dev __rcu    *ip6_ptr;       /* IPv6 specific data */
161     void            *ax25_ptr;    /* AX.25 specific data */
162     struct wireless_dev    *ieee80211_ptr;    /* IEEE 802.11 specific data,
163                            assign before registering */
164 
165 /*
166  * Cache lines mostly used on receive path (including eth_type_trans())
167  */
168     unsigned long        last_rx;    /* Time of last Rx */
169 
170     /* Interface address info used in eth_type_trans() */
171     unsigned char        *dev_addr;    /* hw address, (before bcast
172                            because most packets are
173                            unicast) */
174 
175 
176 #ifdef CONFIG_SYSFS
177     struct netdev_rx_queue    *_rx;
178 
179     /* Number of RX queues allocated at register_netdev() time */
180     unsigned int        num_rx_queues;
181 
182     /* Number of RX queues currently active in device */
183     unsigned int        real_num_rx_queues;
184 
185 #endif
186 
187     rx_handler_func_t __rcu    *rx_handler;
188     void __rcu        *rx_handler_data;
189 
190     struct netdev_queue __rcu *ingress_queue;
191     unsigned char        broadcast[MAX_ADDR_LEN];    /* hw bcast add    */
192 
193 
194 /*
195  * Cache lines mostly used on transmit path
196  */
197     struct netdev_queue    *_tx ____cacheline_aligned_in_smp;
198 
199     /* Number of TX queues allocated at alloc_netdev_mq() time  */
200     unsigned int        num_tx_queues;
201 
202     /* Number of TX queues currently active in device  */
203     unsigned int        real_num_tx_queues;
204 
205     /* root qdisc from userspace point of view */
206     struct Qdisc        *qdisc;
207 
208     unsigned long        tx_queue_len;    /* Max frames per queue allowed */
209     spinlock_t        tx_global_lock;
210 
211 #ifdef CONFIG_XPS
212     struct xps_dev_maps __rcu *xps_maps;
213 #endif
214 #ifdef CONFIG_RFS_ACCEL
215     /* CPU reverse-mapping for RX completion interrupts, indexed
216      * by RX queue number.  Assigned by driver.  This must only be
217      * set if the ndo_rx_flow_steer operation is defined. */
218     struct cpu_rmap        *rx_cpu_rmap;
219 #endif
220 
221     /* These may be needed for future network-power-down code. */
222 
223     /*
224      * trans_start here is expensive for high speed devices on SMP,
225      * please use netdev_queue->trans_start instead.
226      */
227     unsigned long        trans_start;    /* Time (in jiffies) of last Tx    */
228 
229     int            watchdog_timeo; /* used by dev_watchdog() */
230     struct timer_list    watchdog_timer;
231 
232     /* Number of references to this device */
233     int __percpu        *pcpu_refcnt;
234 
235     /* delayed register/unregister */
236     struct list_head    todo_list;
237     /* device index hash chain */
238     struct hlist_node    index_hlist;
239 
240     struct list_head    link_watch_list;
241 
242     /* register/unregister state machine */
243     enum { NETREG_UNINITIALIZED=0,
244            NETREG_REGISTERED,    /* completed register_netdevice */
245            NETREG_UNREGISTERING,    /* called unregister_netdevice */
246            NETREG_UNREGISTERED,    /* completed unregister todo */
247            NETREG_RELEASED,        /* called free_netdev */
248            NETREG_DUMMY,        /* dummy device for NAPI poll */
249     } reg_state:8;
250 
251     bool dismantle; /* device is going do be freed */
252 
253     enum {
254         RTNL_LINK_INITIALIZED,
255         RTNL_LINK_INITIALIZING,
256     } rtnl_link_state:16;
257 
258     /* Called from unregister, can be used to call free_netdev */
259     void (*destructor)(struct net_device *dev);
260 
261 #ifdef CONFIG_NETPOLL
262     struct netpoll_info __rcu    *npinfo;
263 #endif
264 
265 #ifdef CONFIG_NET_NS
266     /* Network namespace this network device is inside */
267     struct net        *nd_net;
268 #endif
269 
270     /* mid-layer private */
271     union {
272         void                *ml_priv;
273         struct pcpu_lstats __percpu    *lstats; /* loopback stats */
274         struct pcpu_sw_netstats __percpu    *tstats;
275         struct pcpu_dstats __percpu    *dstats; /* dummy stats */
276         struct pcpu_vstats __percpu    *vstats; /* veth stats */
277     };
278     /* GARP */
279     struct garp_port __rcu    *garp_port;
280     /* MRP */
281     struct mrp_port __rcu    *mrp_port;
282 
283     /* class/net/name entry */
284     struct device        dev;
285     /* space for optional device, statistics, and wireless sysfs groups */
286     const struct attribute_group *sysfs_groups[4];
287     /* space for optional per-rx queue attributes */
288     const struct attribute_group *sysfs_rx_queue_group;
289 
290     /* rtnetlink link ops */
291     const struct rtnl_link_ops *rtnl_link_ops;
292 
293     /* for setting kernel sock attribute on TCP connection setup */
294 #define GSO_MAX_SIZE        65536
295     unsigned int        gso_max_size;
296 #define GSO_MAX_SEGS        65535
297     u16            gso_max_segs;
298 
299 #ifdef CONFIG_DCB
300     /* Data Center Bridging netlink ops */
301     const struct dcbnl_rtnl_ops *dcbnl_ops;
302 #endif
303     u8 num_tc;
304     struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
305     u8 prio_tc_map[TC_BITMASK + 1];
306 
307 #if IS_ENABLED(CONFIG_FCOE)
308     /* max exchange id for FCoE LRO by ddp */
309     unsigned int        fcoe_ddp_xid;
310 #endif
311 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
312     struct netprio_map __rcu *priomap;
313 #endif
314     /* phy device may attach itself for hardware timestamping */
315     struct phy_device *phydev;
316 
317     struct lock_class_key *qdisc_tx_busylock;
318 
319     /* group the device belongs to */
320     int group;
321 
322     struct pm_qos_request    pm_qos_req;
323 };
struct net_device

相关文章: