linux 网卡驱动构架,详解网卡驱动架构

1.Linux网络子系统

系统调用接口层

为应用程序提供访问网络子系统的统一方法。

协议无关层

提供通用的方法来使用传输层协议。

协议栈的实现

实现具体的网络协议

设备无关层

协议与设备驱动之前通信的通用接口

设备驱动程序

2.重要数据结构

2.1 网卡描述结构

在Linux内核中，每个网卡都由一个net_device结构来描述，其中的一些重要成员有：

char name[IFNAMSIZ]

设备名,如：eth%d

unsigned long base_addr

I/O 基地址

const struct net_device_ops *netdev_ops;

记录了网卡所支持的操作

2.2 网卡操作集合

类似于字符设备驱动中的file_operations结构，net_device_ops结构记录了网卡所支持的操作。

static const struct net_device_ops dm9000_netdev_ops =

{

.ndo_open = dm9000_open,

.ndo_stop = dm9000_stop,

.ndo_start_xmit = dm9000_start_xmit,

.ndo_do_ioctl = dm9000_ioctl,

.ndo_validate_addr = eth_validate_addr,

.ndo_set_mac_address = eth_mac_addr,

};

2.3 网络数据包

Linux内核中的每个网络数据包都由一个套接字缓冲区结构 struct sk_buff 描述，即一个sk_buff结构就是一个网络包,指向sk_buff的指针通常被称做skb。

3.网卡驱动架构分析

cs89x0.c:

/* cs89x0.c: A Crystal Semiconductor (Now Cirrus Logic) CS89[02]0

* driver for linux.

static const struct net_device_ops net_ops = { //net_device_ops结构，各种网卡操作函数接口

.ndo_open= net_open,

.ndo_stop= net_close,

.ndo_tx_timeout= net_timeout,

.ndo_start_xmit = net_send_packet,

.ndo_get_stats= net_get_stats,

.ndo_set_multicast_list = set_multicast_list,

.ndo_set_mac_address = set_mac_address,

#ifdef CONFIG_NET_POLL_CONTROLLER

.ndo_poll_controller= net_poll_controller,

#endif

.ndo_change_mtu= eth_change_mtu,

.ndo_validate_addr= eth_validate_addr,

};

static netdev_tx_t net_send_packet(struct sk_buff *skb,struct net_device *dev) //网卡的发送函数

{

struct net_local *lp = netdev_priv(dev);

unsigned long flags;

if (net_debug > 3) {

printk("%s: sent %d byte packet of type %xn",

dev->name, skb->len,

(skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]);

}

/* keep the upload from being interrupted, since we

ask the chip to start transmitting before the

whole packet has been completely uploaded. */

spin_lock_irqsave(&lp->lock, flags);

netif_stop_queue(dev); //驱动程序通知网络子系统暂停数据包传输，从来进行实现流量控制。

/* initiate a transmit sequence */

writeword(dev->base_addr, TX_CMD_PORT, lp->send_cmd);

writeword(dev->base_addr, TX_LEN_PORT, skb->len);

/* Test to see if the chip has allocated memory for the packet */

if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) {

* It hasn't. But that shouldn't happen since

* we're waiting for TxOk, so return 1 and requeue this packet.

spin_unlock_irqrestore(&lp->lock, flags);

if (net_debug) printk("cs89x0: Tx buffer not free!n");

return NETDEV_TX_BUSY;

}

/* Write the contents of the packet */

writewords(dev->base_addr, TX_FRAME_PORT,skb->data,(skb->len+1) >>1); //将skb的内容写入寄存器，发送

spin_unlock_irqrestore(&lp->lock, flags);

dev->stats.tx_bytes += skb->len;

dev_kfree_skb (skb); //释放skb

* We DO NOT call netif_wake_queue() here.

* We also DO NOT call netif_start_queue().

* Either of these would cause another bottom half run through

* net_send_packet() before this packet has fully gone out. That causes

* us to hit the "Gasp!" above and the send is rescheduled. it runs like

* a dog. We just return and wait for the Tx completion interrupt handler

* to restart the netdevice layer

return NETDEV_TX_OK;

}

/* The typical workload of the driver:

Handle the network interface interrupts. */

static irqreturn_t net_interrupt(int irq, void *dev_id) //网卡中断处理函数

{

struct net_device *dev = dev_id;

struct net_local *lp;

int ioaddr, status;

int handled = 0;

ioaddr = dev->base_addr;

lp = netdev_priv(dev);

/* we MUST read all the events out of the ISQ, otherwise we'll never

get interrupted again. As a consequence, we can't have any limit

on the number of times we loop in the interrupt handler. The

hardware guarantees that eventually we'll run out of events. Of

course, if you're on a slow machine, and packets are arriving

faster than you can read them off, you're screwed. Hasta la

vista, */

while ((status = readword(dev->base_addr, ISQ_PORT))) {

if (net_debug > 4)printk("%s: event=%04xn", dev->name, status);

handled = 1;

switch(status & ISQ_EVENT_MASK) {

case ISQ_RECEIVER_EVENT:

/* Got a packet(s). */

net_rx(dev);

break;

case ISQ_TRANSMITTER_EVENT:

dev->stats.tx_packets++;

netif_wake_queue(dev);/* Inform upper layers. */ //发送中停止设备，在这里被唤醒

if ((status & (TX_OK |

TX_LOST_CRS |

TX_SQE_ERROR |

TX_LATE_COL |

TX_16_COL)) != TX_OK) {

if ((status & TX_OK) == 0)

dev->stats.tx_errors++;

if (status & TX_LOST_CRS)

dev->stats.tx_carrier_errors++;

if (status & TX_SQE_ERROR)

dev->stats.tx_heartbeat_errors++;

if (status & TX_LATE_COL)

dev->stats.tx_window_errors++;

if (status & TX_16_COL)

dev->stats.tx_aborted_errors++;

}

break;

case ISQ_BUFFER_EVENT:

if (status & READY_FOR_TX) {

/* we tried to transmit a packet earlier,

but inexplicably ran out of buffers.

That shouldn't happen since we only ever

load one packet. Shrug. Do the right

thing anyway. */

netif_wake_queue(dev);/* Inform upper layers. */

}

if (status & TX_UNDERRUN) {

if (net_debug > 0) printk("%s: transmit underrunn", dev->name);

lp->send_underrun++;

if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;

else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;

/* transmit cycle is done, although

frame wasn't transmitted - this

avoids having to wait for the upper

layers to timeout on us, in the

event of a tx underrun */

netif_wake_queue(dev);/* Inform upper layers. */

}

#if ALLOW_DMA

if (lp->use_dma && (status & RX_DMA)) {

int count = readreg(dev, PP_DmaFrameCnt);

while(count) {

if (net_debug > 5)

printk("%s: receiving %d DMA framesn", dev->name, count);

if (net_debug > 2 && count >1)

printk("%s: receiving %d DMA framesn", dev->name, count);

dma_rx(dev);

if (--count == 0)

count = readreg(dev, PP_DmaFrameCnt);

if (net_debug > 2 && count > 0)

printk("%s: continuing with %d DMA framesn", dev->name, count);

}

#endif

break;

case ISQ_RX_MISS_EVENT:

dev->stats.rx_missed_errors += (status >> 6);

break;

case ISQ_TX_COL_EVENT:

dev->stats.collisions += (status >> 6);

break;

}

return IRQ_RETVAL(handled);

}

...........

/* We have a good packet(s), get it/them out of the buffers. */

static void

net_rx(struct net_device *dev) //网卡的接收函数

{

struct sk_buff *skb;

int status, length;

int ioaddr = dev->base_addr;

status = readword(ioaddr, RX_FRAME_PORT); //读取待接收的数据状态

length = readword(ioaddr, RX_FRAME_PORT); //读取接收数据的长度

if ((status & RX_OK) == 0) {

count_rx_errors(status, dev);

return;

}

/* Malloc up new buffer. */

skb = dev_alloc_skb(length + 2); //分配skb长度+2的结构

if (skb == NULL) {

#if 0/* Again, this seems a cruel thing to do */

printk(KERN_WARNING "%s: Memory squeeze, dropping packet.n", dev->name);

#endif

dev->stats.rx_dropped++;

return;

}

skb_reserve(skb, 2);/* longword align L3 header */

readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1); //从网卡寄存器中读出数据，存入skb

if (length & 1)

skb->data[length-1] = readword(ioaddr, RX_FRAME_PORT);

if (net_debug > 3) {

printk("%s: received %d byte packet of type %xn",

dev->name, length,

(skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]);

}

skb->protocol=eth_type_trans(skb,dev);

netif_rx(skb); //将接收到的数据包skb交给协议栈-netif_rx

dev->stats.rx_packets++;

dev->stats.rx_bytes += length;

}

#if ALLOW_DMA

static void release_dma_buff(struct net_local *lp)

{

if (lp->dma_buff) {

free_pages((unsigned long)(lp->dma_buff), get_order(lp->dmasize * 1024));

lp->dma_buff = NULL;

}

#endif

/* The inverse routine to net_open(). */

static int

net_close(struct net_device *dev)

{

#if ALLOW_DMA

struct net_local *lp = netdev_priv(dev);

#endif

netif_stop_queue(dev);

writereg(dev, PP_RxCFG, 0);

writereg(dev, PP_TxCFG, 0);

writereg(dev, PP_BufCFG, 0);

writereg(dev, PP_BusCTL, 0);

free_irq(dev->irq, dev);

#if ALLOW_DMA

if (lp->use_dma && lp->dma) {

free_dma(dev->dma);

release_dma_buff(lp);

}

#endif

/* Update the statistics here. */

return 0;

}

........

int __init init_module(void) //网卡初始化

{

struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); //分配net_device结构空间

struct net_local *lp;

int ret = 0;

#if DEBUGGING

net_debug = debug;

#else

debug = 0;

#endif

if (!dev)

return -ENOMEM;

dev->irq = irq; //初始化分配的dev

dev->base_addr = io;

lp = netdev_priv(dev);

#if ALLOW_DMA

if (use_dma) {

lp->use_dma = use_dma;

lp->dma = dma;

lp->dmasize = dmasize;

}

#endif

spin_lock_init(&lp->lock);

/* boy, they'd better get these right */

if (!strcmp(media, "rj45"))

lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T;

else if (!strcmp(media, "aui"))

lp->adapter_cnf = A_CNF_MEDIA_AUI | A_CNF_AUI;

else if (!strcmp(media, "bnc"))

lp->adapter_cnf = A_CNF_MEDIA_10B_2 | A_CNF_10B_2;

else

lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T;

if (duplex==-1)

lp->auto_neg_cnf = AUTO_NEG_ENABLE;

if (io == 0) {

printk(KERN_ERR "cs89x0.c: Module autoprobing not allowed.n");

printk(KERN_ERR "cs89x0.c: Append io=0xNNNn");

ret = -EPERM;

goto out;

} else if (io <= 0x1ff) {

ret = -ENXIO;

goto out;

}

#if ALLOW_DMA

if (use_dma && dmasize != 16 && dmasize != 64) {

printk(KERN_ERR "cs89x0.c: dma size must be either 16K or 64K, not %dKn", dmasize);

ret = -EPERM;

goto out;

}

#endif

ret = cs89x0_probe1(dev, io, 1); //这里依然在初始化：硬件初始化、MAC地址、注册网卡驱动(register_netdev)

if (ret)

goto out;

dev_cs89x0 = dev;

return 0;

out:

free_netdev(dev);

return ret;

}

void __exit

cleanup_module(void)

{

unregister_netdev(dev_cs89x0);

writeword(dev_cs89x0->base_addr, ADD_PORT, PP_ChipID);

release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT);

free_netdev(dev_cs89x0);

}

#endif /* MODULE */

* Local variables:

* version-control: t

* kept-new-versions: 5

* c-indent-level: 8

* tab-width: 8

* End: