searchusermenu
  • 发布文章
  • 消息中心
点赞
收藏
评论
分享
原创

virtio net收包函数virtnet_poll分析

2024-10-14 09:40:26
28
0

virtnet_poll函数在virtnet_probe阶段注册,其调用路径为:

virtnet_probe
  ini_vqs
    virtnet_alloc_queues
      for (i = 0; i < vi->max_queue_pairs; ++i)
        netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, napi_weight);

其具体实现为:

static int virtnet_poll(struct napi_struct *napi, int budget)
{
	struct receive_queue *rq =
		container_of(napi, struct receive_queue, napi);
	unsigned int received;

	//将send_queue中used ring的chain descriptor归还到descriptor table
	virtnet_poll_cleantx(rq);

	//接收网络包
	received = virtnet_receive(rq, budget);

	/* Out of packets? */
	if (received < budget)
		virtqueue_napi_complete(napi, rq->vq, received);

	return received;
}

驱动在接收数据包之前会先调用virtnet_poll_cleantx函数将对应发送队列的描述符表项回收:

static void virtnet_poll_cleantx(struct receive_queue *rq)
{
	struct virtnet_info *vi = rq->vq->vdev->priv;
	unsigned int index = vq2rxq(rq->vq); // 获取接收队列与之对应的发送队列索引
	struct send_queue *sq = &vi->sq[index];
	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);

	if (!sq->napi.weight)
		return;

	if (__netif_tx_trylock(txq)) {
		free_old_xmit_skbs(sq); // 释放发送队列中已发送描述符
		__netif_tx_unlock(txq);
	}

	if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
		netif_tx_wake_queue(txq);
}

free_old_xmit_skbs函数中会循环调用virtqueue_get_buf函数完成所有包的回收工作:

static void free_old_xmit_skbs(struct send_queue *sq)
{
	struct sk_buff *skb;
	unsigned int len;
	struct virtnet_info *vi = sq->vq->vdev->priv;
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
	unsigned int packets = 0;
	unsigned int bytes = 0;

	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
		pr_debug("Sent skb %p\n", skb);

		bytes += skb->len;
		packets++;

		dev_kfree_skb_any(skb); // 释放skb
	}

	/* Avoid overhead when no packets have been processed
	 * happens when called speculatively from start_xmit.
	 */
	if (!packets)
		return;

	u64_stats_update_begin(&stats->tx_syncp);
	stats->tx_bytes += bytes;
	stats->tx_packets += packets;
	u64_stats_update_end(&stats->tx_syncp);
}

virtqueue_get_buf函数中调用virtqueue_get_buf_ctx

void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
			    void **ctx)
{
	struct vring_virtqueue *vq = to_vvq(_vq);
	void *ret;
	unsigned int i;
	u16 last_used;

	START_USE(vq);

	if (unlikely(vq->broken)) {
		END_USE(vq);
		return NULL;
	}
    // 依据当前last_used_index位置和used vring idx对比
	// 判断本端在used ring是否还有未处理的buffer
	if (!more_used(vq)) {
		pr_debug("No more buffers in queue\n");
		END_USE(vq);
		return NULL;
	}

	/* Only get used array entries after they have been exposed by host. */
	virtio_rmb(vq->weak_barriers);

	// 获取要消费的used ring的下标
	last_used = (vq->last_used_idx & (vq->vring.num - 1));
	// 从used成员中获取指向的desc ring中的下标
	i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
	// 获取这个报文的实际长度,len是指desc链中所有desc中报文的总长度
	*len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);

	if (unlikely(i >= vq->vring.num)) {
		BAD_RING(vq, "id %u out of range\n", i);
		return NULL;
	}
	if (unlikely(!vq->desc_state[i].data)) {
		BAD_RING(vq, "id %u is not a head!\n", i);
		return NULL;
	}

	/* detach_buf clears data, so grab it now. */
    // 获取desc链首地址(GVA) 
	ret = vq->desc_state[i].data;
	//报文已成功提取,释放掉该desc链
	// 释放(通过flag标记结束)
	detach_buf(vq, i, ctx);
	vq->last_used_idx++; // 已处理
	/* If we expect an interrupt for the next entry, tell host
	 * by writing event index and flush out the write before
	 * the read in the next get_buf call. */
	// 如果前端驱动希望接收到中断
	// 此处写入event idx
	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
		virtio_store_mb(vq->weak_barriers,
				&vring_used_event(&vq->vring),
				cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); // 告诉对端目前处理位置

#ifdef DEBUG
	vq->last_add_time_valid = false;
#endif

	END_USE(vq);
	return ret; // 返回指向报文的虚拟机地址
}

detach_buf函数中,对每一个desc表项完成unmap:

static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
		       void **ctx)
{
	unsigned int i, j;
	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);

	/* Clear data ptr. */
	vq->desc_state[head].data = NULL;

	/* Put back on free list: unmap first-level descriptors and find end */
	i = head;

	while (vq->vring.desc[i].flags & nextflag) {
		vring_unmap_one(vq, &vq->vring.desc[i]);
		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
		vq->vq.num_free++;
	}

	vring_unmap_one(vq, &vq->vring.desc[i]);
	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); // 实现添加到desc table的空闲表头
	vq->free_head = head; // 重新设置空闲表头

	/* Plus final descriptor */
	vq->vq.num_free++;

	if (vq->indirect) { ...
	} else if (ctx) {
		*ctx = vq->desc_state[head].indir_desc;
	}
}

完成发送队列desc链的回收工作后,就开始接收网络包了:

static int virtnet_receive(struct receive_queue *rq, int budget)
{
	struct virtnet_info *vi = rq->vq->vdev->priv;
	unsigned int len, received = 0, bytes = 0;
	void *buf;
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);

	if (vi->mergeable_rx_bufs) {
		void *ctx;

		while (received < budget &&
		       (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { 
			bytes += receive_buf(vi, rq, buf, len, ctx);
			received++;
		}
	} else {
		// receive_queue中used ring的chain descriptor回收,buf是GVA,该地址在虚拟机中可以使用
		while (received < budget &&
		       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
			bytes += receive_buf(vi, rq, buf, len, NULL); // 接收报文数据
			received++;
		}
	}

	if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
		if (!try_fill_recv(vi, rq, GFP_ATOMIC))
			schedule_delayed_work(&vi->refill, 0);
	}

	u64_stats_update_begin(&stats->rx_syncp);
	stats->rx_bytes += bytes;
	stats->rx_packets += received;
	u64_stats_update_end(&stats->rx_syncp);

	return received;
}

receive_buf函数中,会将buf缓冲区中内容组织为skb,最后交给网络协议栈,完成报文回收工作。

0条评论
0 / 1000
c****q
8文章数
0粉丝数
c****q
8 文章 | 0 粉丝
原创

virtio net收包函数virtnet_poll分析

2024-10-14 09:40:26
28
0

virtnet_poll函数在virtnet_probe阶段注册,其调用路径为:

virtnet_probe
  ini_vqs
    virtnet_alloc_queues
      for (i = 0; i < vi->max_queue_pairs; ++i)
        netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, napi_weight);

其具体实现为:

static int virtnet_poll(struct napi_struct *napi, int budget)
{
	struct receive_queue *rq =
		container_of(napi, struct receive_queue, napi);
	unsigned int received;

	//将send_queue中used ring的chain descriptor归还到descriptor table
	virtnet_poll_cleantx(rq);

	//接收网络包
	received = virtnet_receive(rq, budget);

	/* Out of packets? */
	if (received < budget)
		virtqueue_napi_complete(napi, rq->vq, received);

	return received;
}

驱动在接收数据包之前会先调用virtnet_poll_cleantx函数将对应发送队列的描述符表项回收:

static void virtnet_poll_cleantx(struct receive_queue *rq)
{
	struct virtnet_info *vi = rq->vq->vdev->priv;
	unsigned int index = vq2rxq(rq->vq); // 获取接收队列与之对应的发送队列索引
	struct send_queue *sq = &vi->sq[index];
	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);

	if (!sq->napi.weight)
		return;

	if (__netif_tx_trylock(txq)) {
		free_old_xmit_skbs(sq); // 释放发送队列中已发送描述符
		__netif_tx_unlock(txq);
	}

	if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
		netif_tx_wake_queue(txq);
}

free_old_xmit_skbs函数中会循环调用virtqueue_get_buf函数完成所有包的回收工作:

static void free_old_xmit_skbs(struct send_queue *sq)
{
	struct sk_buff *skb;
	unsigned int len;
	struct virtnet_info *vi = sq->vq->vdev->priv;
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
	unsigned int packets = 0;
	unsigned int bytes = 0;

	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
		pr_debug("Sent skb %p\n", skb);

		bytes += skb->len;
		packets++;

		dev_kfree_skb_any(skb); // 释放skb
	}

	/* Avoid overhead when no packets have been processed
	 * happens when called speculatively from start_xmit.
	 */
	if (!packets)
		return;

	u64_stats_update_begin(&stats->tx_syncp);
	stats->tx_bytes += bytes;
	stats->tx_packets += packets;
	u64_stats_update_end(&stats->tx_syncp);
}

virtqueue_get_buf函数中调用virtqueue_get_buf_ctx

void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
			    void **ctx)
{
	struct vring_virtqueue *vq = to_vvq(_vq);
	void *ret;
	unsigned int i;
	u16 last_used;

	START_USE(vq);

	if (unlikely(vq->broken)) {
		END_USE(vq);
		return NULL;
	}
    // 依据当前last_used_index位置和used vring idx对比
	// 判断本端在used ring是否还有未处理的buffer
	if (!more_used(vq)) {
		pr_debug("No more buffers in queue\n");
		END_USE(vq);
		return NULL;
	}

	/* Only get used array entries after they have been exposed by host. */
	virtio_rmb(vq->weak_barriers);

	// 获取要消费的used ring的下标
	last_used = (vq->last_used_idx & (vq->vring.num - 1));
	// 从used成员中获取指向的desc ring中的下标
	i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
	// 获取这个报文的实际长度,len是指desc链中所有desc中报文的总长度
	*len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);

	if (unlikely(i >= vq->vring.num)) {
		BAD_RING(vq, "id %u out of range\n", i);
		return NULL;
	}
	if (unlikely(!vq->desc_state[i].data)) {
		BAD_RING(vq, "id %u is not a head!\n", i);
		return NULL;
	}

	/* detach_buf clears data, so grab it now. */
    // 获取desc链首地址(GVA) 
	ret = vq->desc_state[i].data;
	//报文已成功提取,释放掉该desc链
	// 释放(通过flag标记结束)
	detach_buf(vq, i, ctx);
	vq->last_used_idx++; // 已处理
	/* If we expect an interrupt for the next entry, tell host
	 * by writing event index and flush out the write before
	 * the read in the next get_buf call. */
	// 如果前端驱动希望接收到中断
	// 此处写入event idx
	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
		virtio_store_mb(vq->weak_barriers,
				&vring_used_event(&vq->vring),
				cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); // 告诉对端目前处理位置

#ifdef DEBUG
	vq->last_add_time_valid = false;
#endif

	END_USE(vq);
	return ret; // 返回指向报文的虚拟机地址
}

detach_buf函数中,对每一个desc表项完成unmap:

static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
		       void **ctx)
{
	unsigned int i, j;
	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);

	/* Clear data ptr. */
	vq->desc_state[head].data = NULL;

	/* Put back on free list: unmap first-level descriptors and find end */
	i = head;

	while (vq->vring.desc[i].flags & nextflag) {
		vring_unmap_one(vq, &vq->vring.desc[i]);
		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
		vq->vq.num_free++;
	}

	vring_unmap_one(vq, &vq->vring.desc[i]);
	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); // 实现添加到desc table的空闲表头
	vq->free_head = head; // 重新设置空闲表头

	/* Plus final descriptor */
	vq->vq.num_free++;

	if (vq->indirect) { ...
	} else if (ctx) {
		*ctx = vq->desc_state[head].indir_desc;
	}
}

完成发送队列desc链的回收工作后,就开始接收网络包了:

static int virtnet_receive(struct receive_queue *rq, int budget)
{
	struct virtnet_info *vi = rq->vq->vdev->priv;
	unsigned int len, received = 0, bytes = 0;
	void *buf;
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);

	if (vi->mergeable_rx_bufs) {
		void *ctx;

		while (received < budget &&
		       (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { 
			bytes += receive_buf(vi, rq, buf, len, ctx);
			received++;
		}
	} else {
		// receive_queue中used ring的chain descriptor回收,buf是GVA,该地址在虚拟机中可以使用
		while (received < budget &&
		       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
			bytes += receive_buf(vi, rq, buf, len, NULL); // 接收报文数据
			received++;
		}
	}

	if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
		if (!try_fill_recv(vi, rq, GFP_ATOMIC))
			schedule_delayed_work(&vi->refill, 0);
	}

	u64_stats_update_begin(&stats->rx_syncp);
	stats->rx_bytes += bytes;
	stats->rx_packets += received;
	u64_stats_update_end(&stats->rx_syncp);

	return received;
}

receive_buf函数中,会将buf缓冲区中内容组织为skb,最后交给网络协议栈,完成报文回收工作。

文章来自个人专栏
linux虚拟化
3 文章 | 1 订阅
0条评论
0 / 1000
请输入你的评论
1
1