virtnet_poll
函数在virtnet_probe
阶段注册,其调用路径为:
virtnet_probe
ini_vqs
virtnet_alloc_queues
for (i = 0; i < vi->max_queue_pairs; ++i)
netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, napi_weight);
其具体实现为:
static int virtnet_poll(struct napi_struct *napi, int budget)
{
struct receive_queue *rq =
container_of(napi, struct receive_queue, napi);
unsigned int received;
//将send_queue中used ring的chain descriptor归还到descriptor table
virtnet_poll_cleantx(rq);
//接收网络包
received = virtnet_receive(rq, budget);
/* Out of packets? */
if (received < budget)
virtqueue_napi_complete(napi, rq->vq, received);
return received;
}
驱动在接收数据包之前会先调用virtnet_poll_cleantx
函数将对应发送队列的描述符表项回收:
static void virtnet_poll_cleantx(struct receive_queue *rq)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int index = vq2rxq(rq->vq); // 获取接收队列与之对应的发送队列索引
struct send_queue *sq = &vi->sq[index];
struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
if (!sq->napi.weight)
return;
if (__netif_tx_trylock(txq)) {
free_old_xmit_skbs(sq); // 释放发送队列中已发送描述符
__netif_tx_unlock(txq);
}
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq);
}
在free_old_xmit_skbs
函数中会循环调用virtqueue_get_buf
函数完成所有包的回收工作:
static void free_old_xmit_skbs(struct send_queue *sq)
{
struct sk_buff *skb;
unsigned int len;
struct virtnet_info *vi = sq->vq->vdev->priv;
struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
unsigned int packets = 0;
unsigned int bytes = 0;
while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
pr_debug("Sent skb %p\n", skb);
bytes += skb->len;
packets++;
dev_kfree_skb_any(skb); // 释放skb
}
/* Avoid overhead when no packets have been processed
* happens when called speculatively from start_xmit.
*/
if (!packets)
return;
u64_stats_update_begin(&stats->tx_syncp);
stats->tx_bytes += bytes;
stats->tx_packets += packets;
u64_stats_update_end(&stats->tx_syncp);
}
virtqueue_get_buf
函数中调用virtqueue_get_buf_ctx
:
void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
void **ctx)
{
struct vring_virtqueue *vq = to_vvq(_vq);
void *ret;
unsigned int i;
u16 last_used;
START_USE(vq);
if (unlikely(vq->broken)) {
END_USE(vq);
return NULL;
}
// 依据当前last_used_index位置和used vring idx对比
// 判断本端在used ring是否还有未处理的buffer
if (!more_used(vq)) {
pr_debug("No more buffers in queue\n");
END_USE(vq);
return NULL;
}
/* Only get used array entries after they have been exposed by host. */
virtio_rmb(vq->weak_barriers);
// 获取要消费的used ring的下标
last_used = (vq->last_used_idx & (vq->vring.num - 1));
// 从used成员中获取指向的desc ring中的下标
i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
// 获取这个报文的实际长度,len是指desc链中所有desc中报文的总长度
*len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
if (unlikely(i >= vq->vring.num)) {
BAD_RING(vq, "id %u out of range\n", i);
return NULL;
}
if (unlikely(!vq->desc_state[i].data)) {
BAD_RING(vq, "id %u is not a head!\n", i);
return NULL;
}
/* detach_buf clears data, so grab it now. */
// 获取desc链首地址(GVA)
ret = vq->desc_state[i].data;
//报文已成功提取,释放掉该desc链
// 释放(通过flag标记结束)
detach_buf(vq, i, ctx);
vq->last_used_idx++; // 已处理
/* If we expect an interrupt for the next entry, tell host
* by writing event index and flush out the write before
* the read in the next get_buf call. */
// 如果前端驱动希望接收到中断
// 此处写入event idx
if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
virtio_store_mb(vq->weak_barriers,
&vring_used_event(&vq->vring),
cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); // 告诉对端目前处理位置
#ifdef DEBUG
vq->last_add_time_valid = false;
#endif
END_USE(vq);
return ret; // 返回指向报文的虚拟机地址
}
在detach_buf
函数中,对每一个desc表项完成unmap:
static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
void **ctx)
{
unsigned int i, j;
__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
/* Clear data ptr. */
vq->desc_state[head].data = NULL;
/* Put back on free list: unmap first-level descriptors and find end */
i = head;
while (vq->vring.desc[i].flags & nextflag) {
vring_unmap_one(vq, &vq->vring.desc[i]);
i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
vq->vq.num_free++;
}
vring_unmap_one(vq, &vq->vring.desc[i]);
vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); // 实现添加到desc table的空闲表头
vq->free_head = head; // 重新设置空闲表头
/* Plus final descriptor */
vq->vq.num_free++;
if (vq->indirect) { ...
} else if (ctx) {
*ctx = vq->desc_state[head].indir_desc;
}
}
完成发送队列desc链的回收工作后,就开始接收网络包了:
static int virtnet_receive(struct receive_queue *rq, int budget)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int len, received = 0, bytes = 0;
void *buf;
struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
if (vi->mergeable_rx_bufs) {
void *ctx;
while (received < budget &&
(buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
bytes += receive_buf(vi, rq, buf, len, ctx);
received++;
}
} else {
// receive_queue中used ring的chain descriptor回收,buf是GVA,该地址在虚拟机中可以使用
while (received < budget &&
(buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
bytes += receive_buf(vi, rq, buf, len, NULL); // 接收报文数据
received++;
}
}
if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
if (!try_fill_recv(vi, rq, GFP_ATOMIC))
schedule_delayed_work(&vi->refill, 0);
}
u64_stats_update_begin(&stats->rx_syncp);
stats->rx_bytes += bytes;
stats->rx_packets += received;
u64_stats_update_end(&stats->rx_syncp);
return received;
}
在receive_buf
函数中,会将buf缓冲区中内容组织为skb,最后交给网络协议栈,完成报文回收工作。