virtio net收包函数virtnet_poll分析-天翼云开发者社区

virtnet_poll函数在virtnet_probe阶段注册,其调用路径为:

virtnet_probe
  ini_vqs
    virtnet_alloc_queues
      for (i = 0; i < vi->max_queue_pairs; ++i)
        netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, napi_weight);

其具体实现为：

static int virtnet_poll(struct napi_struct *napi, int budget)
{
	struct receive_queue *rq =
		container_of(napi, struct receive_queue, napi);
	unsigned int received;

	//将send_queue中used ring的chain descriptor归还到descriptor table
	virtnet_poll_cleantx(rq);

	//接收网络包
	received = virtnet_receive(rq, budget);

	/* Out of packets? */
	if (received < budget)
		virtqueue_napi_complete(napi, rq->vq, received);

	return received;
}

驱动在接收数据包之前会先调用virtnet_poll_cleantx函数将对应发送队列的描述符表项回收：

static void virtnet_poll_cleantx(struct receive_queue *rq)
{
	struct virtnet_info *vi = rq->vq->vdev->priv;
	unsigned int index = vq2rxq(rq->vq); // 获取接收队列与之对应的发送队列索引
	struct send_queue *sq = &vi->sq[index];
	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);

	if (!sq->napi.weight)
		return;

	if (__netif_tx_trylock(txq)) {
		free_old_xmit_skbs(sq); // 释放发送队列中已发送描述符
		__netif_tx_unlock(txq);
	}

	if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
		netif_tx_wake_queue(txq);
}

在free_old_xmit_skbs函数中会循环调用virtqueue_get_buf函数完成所有包的回收工作：

static void free_old_xmit_skbs(struct send_queue *sq)
{
	struct sk_buff *skb;
	unsigned int len;
	struct virtnet_info *vi = sq->vq->vdev->priv;
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
	unsigned int packets = 0;
	unsigned int bytes = 0;

	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
		pr_debug("Sent skb %p\n", skb);

		bytes += skb->len;
		packets++;

		dev_kfree_skb_any(skb); // 释放skb
	}

	/* Avoid overhead when no packets have been processed
	 * happens when called speculatively from start_xmit.
	 */
	if (!packets)
		return;

	u64_stats_update_begin(&stats->tx_syncp);
	stats->tx_bytes += bytes;
	stats->tx_packets += packets;
	u64_stats_update_end(&stats->tx_syncp);
}

virtqueue_get_buf函数中调用virtqueue_get_buf_ctx：

void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
			    void **ctx)
{
	struct vring_virtqueue *vq = to_vvq(_vq);
	void *ret;
	unsigned int i;
	u16 last_used;

	START_USE(vq);

	if (unlikely(vq->broken)) {
		END_USE(vq);
		return NULL;
	}
    // 依据当前last_used_index位置和used vring idx对比
	// 判断本端在used ring是否还有未处理的buffer
	if (!more_used(vq)) {
		pr_debug("No more buffers in queue\n");
		END_USE(vq);
		return NULL;
	}

	/* Only get used array entries after they have been exposed by host. */
	virtio_rmb(vq->weak_barriers);

	// 获取要消费的used ring的下标
	last_used = (vq->last_used_idx & (vq->vring.num - 1));
	// 从used成员中获取指向的desc ring中的下标
	i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
	// 获取这个报文的实际长度，len是指desc链中所有desc中报文的总长度
	*len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);

	if (unlikely(i >= vq->vring.num)) {
		BAD_RING(vq, "id %u out of range\n", i);
		return NULL;
	}
	if (unlikely(!vq->desc_state[i].data)) {
		BAD_RING(vq, "id %u is not a head!\n", i);
		return NULL;
	}

	/* detach_buf clears data, so grab it now. */
    // 获取desc链首地址(GVA) 
	ret = vq->desc_state[i].data;
	//报文已成功提取，释放掉该desc链
	// 释放（通过flag标记结束）
	detach_buf(vq, i, ctx);
	vq->last_used_idx++; // 已处理
	/* If we expect an interrupt for the next entry, tell host
	 * by writing event index and flush out the write before
	 * the read in the next get_buf call. */
	// 如果前端驱动希望接收到中断
	// 此处写入event idx
	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
		virtio_store_mb(vq->weak_barriers,
				&vring_used_event(&vq->vring),
				cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); // 告诉对端目前处理位置

#ifdef DEBUG
	vq->last_add_time_valid = false;
#endif

	END_USE(vq);
	return ret; // 返回指向报文的虚拟机地址
}

在detach_buf函数中，对每一个desc表项完成unmap：

static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
		       void **ctx)
{
	unsigned int i, j;
	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);

	/* Clear data ptr. */
	vq->desc_state[head].data = NULL;

	/* Put back on free list: unmap first-level descriptors and find end */
	i = head;

	while (vq->vring.desc[i].flags & nextflag) {
		vring_unmap_one(vq, &vq->vring.desc[i]);
		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
		vq->vq.num_free++;
	}

	vring_unmap_one(vq, &vq->vring.desc[i]);
	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); // 实现添加到desc table的空闲表头
	vq->free_head = head; // 重新设置空闲表头

	/* Plus final descriptor */
	vq->vq.num_free++;

	if (vq->indirect) { ...
	} else if (ctx) {
		*ctx = vq->desc_state[head].indir_desc;
	}
}

完成发送队列desc链的回收工作后，就开始接收网络包了：

static int virtnet_receive(struct receive_queue *rq, int budget)
{
	struct virtnet_info *vi = rq->vq->vdev->priv;
	unsigned int len, received = 0, bytes = 0;
	void *buf;
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);

	if (vi->mergeable_rx_bufs) {
		void *ctx;

		while (received < budget &&
		       (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { 
			bytes += receive_buf(vi, rq, buf, len, ctx);
			received++;
		}
	} else {
		// receive_queue中used ring的chain descriptor回收，buf是GVA，该地址在虚拟机中可以使用
		while (received < budget &&
		       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
			bytes += receive_buf(vi, rq, buf, len, NULL); // 接收报文数据
			received++;
		}
	}

	if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
		if (!try_fill_recv(vi, rq, GFP_ATOMIC))
			schedule_delayed_work(&vi->refill, 0);
	}

	u64_stats_update_begin(&stats->rx_syncp);
	stats->rx_bytes += bytes;
	stats->rx_packets += received;
	u64_stats_update_end(&stats->rx_syncp);

	return received;
}

在receive_buf函数中，会将buf缓冲区中内容组织为skb，最后交给网络协议栈，完成报文回收工作。

static int virtnet_poll(struct napi_struct *napi, int budget) { struct receive_queue *rq = container_of(napi, struct receive_queue, napi); unsigned int received; //将send_queue中used ring的chain descriptor归还到descriptor table virtnet_poll_cleantx(rq); //接收网络包 received = virtnet_receive(rq, budget); /* Out of packets? */ if (received < budget) virtqueue_napi_complete(napi, rq->vq, received); return received; }

static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; unsigned int index = vq2rxq(rq->vq); // 获取接收队列与之对应的发送队列索引 struct send_queue *sq = &vi->sq[index]; struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); if (!sq->napi.weight) return; if (__netif_tx_trylock(txq)) { free_old_xmit_skbs(sq); // 释放发送队列中已发送描述符 __netif_tx_unlock(txq); } if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) netif_tx_wake_queue(txq); }

static void free_old_xmit_skbs(struct send_queue *sq) { struct sk_buff *skb; unsigned int len; struct virtnet_info *vi = sq->vq->vdev->priv; struct virtnet_stats *stats = this_cpu_ptr(vi->stats); unsigned int packets = 0; unsigned int bytes = 0; while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); bytes += skb->len; packets++; dev_kfree_skb_any(skb); // 释放skb } /* Avoid overhead when no packets have been processed * happens when called speculatively from start_xmit. */ if (!packets) return; u64_stats_update_begin(&stats->tx_syncp); stats->tx_bytes += bytes; stats->tx_packets += packets; u64_stats_update_end(&stats->tx_syncp); }

void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); void *ret; unsigned int i; u16 last_used; START_USE(vq); if (unlikely(vq->broken)) { END_USE(vq); return NULL; } // 依据当前last_used_index位置和used vring idx对比 // 判断本端在used ring是否还有未处理的buffer if (!more_used(vq)) { pr_debug("No more buffers in queue\n"); END_USE(vq); return NULL; } /* Only get used array entries after they have been exposed by host. */ virtio_rmb(vq->weak_barriers); // 获取要消费的used ring的下标 last_used = (vq->last_used_idx & (vq->vring.num - 1)); // 从used成员中获取指向的desc ring中的下标 i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id); // 获取这个报文的实际长度，len是指desc链中所有desc中报文的总长度 *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len); if (unlikely(i >= vq->vring.num)) { BAD_RING(vq, "id %u out of range\n", i); return NULL; } if (unlikely(!vq->desc_state[i].data)) { BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } /* detach_buf clears data, so grab it now. */ // 获取desc链首地址(GVA) ret = vq->desc_state[i].data; //报文已成功提取，释放掉该desc链 // 释放（通过flag标记结束） detach_buf(vq, i, ctx); vq->last_used_idx++; // 已处理 /* If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before * the read in the next get_buf call. */ // 如果前端驱动希望接收到中断 // 此处写入event idx if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) virtio_store_mb(vq->weak_barriers, &vring_used_event(&vq->vring), cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); // 告诉对端目前处理位置 #ifdef DEBUG vq->last_add_time_valid = false; #endif END_USE(vq); return ret; // 返回指向报文的虚拟机地址 }

static void detach_buf(struct vring_virtqueue *vq, unsigned int head, void **ctx) { unsigned int i, j; __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); /* Clear data ptr. */ vq->desc_state[head].data = NULL; /* Put back on free list: unmap first-level descriptors and find end */ i = head; while (vq->vring.desc[i].flags & nextflag) { vring_unmap_one(vq, &vq->vring.desc[i]); i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); vq->vq.num_free++; } vring_unmap_one(vq, &vq->vring.desc[i]); vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); // 实现添加到desc table的空闲表头 vq->free_head = head; // 重新设置空闲表头 /* Plus final descriptor */ vq->vq.num_free++; if (vq->indirect) { ... } else if (ctx) { *ctx = vq->desc_state[head].indir_desc; } }

static int virtnet_receive(struct receive_queue *rq, int budget) { struct virtnet_info *vi = rq->vq->vdev->priv; unsigned int len, received = 0, bytes = 0; void *buf; struct virtnet_stats *stats = this_cpu_ptr(vi->stats); if (vi->mergeable_rx_bufs) { void *ctx; while (received < budget && (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { bytes += receive_buf(vi, rq, buf, len, ctx); received++; } } else { // receive_queue中used ring的chain descriptor回收，buf是GVA，该地址在虚拟机中可以使用 while (received < budget && (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { bytes += receive_buf(vi, rq, buf, len, NULL); // 接收报文数据 received++; } } if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { if (!try_fill_recv(vi, rq, GFP_ATOMIC)) schedule_delayed_work(&vi->refill, 0); } u64_stats_update_begin(&stats->rx_syncp); stats->rx_bytes += bytes; stats->rx_packets += received; u64_stats_update_end(&stats->rx_syncp); return received; }

息壤智算

应用商城

定价

合作伙伴

开发者

支持与服务

了解天翼云

virtio net收包函数virtnet_poll分析

virtio net收包函数virtnet_poll分析

活动

息壤智算

应用商城

定价

合作伙伴

开发者

支持与服务

了解天翼云

virtio net收包函数virtnet_poll分析

virtio net收包函数virtnet_poll分析