From cc229884d3f77ec3b1240e467e0236c3e0647c0c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 9 Jul 2013 13:19:18 +0300 Subject: [PATCH 1/2] virtio: support unlocked queue poll This adds a way to check ring empty state after enable_cb outside any locks. Will be used by virtio_net. Note: there's room for more optimization: caller is likely to have a memory barrier already, which means we might be able to get rid of a barrier here. Deferring this optimization until we do some benchmarking. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++---------- include/linux/virtio.h | 4 +++ 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5217baf5528c..37d58f84dc50 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -606,6 +606,55 @@ void virtqueue_disable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); +/** + * virtqueue_enable_cb_prepare - restart callbacks after disable_cb + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns current queue state + * in an opaque unsigned value. This value should be later tested by + * virtqueue_poll, to detect a possible race between the driver checking for + * more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 last_used_idx; + + START_USE(vq); + + /* We optimistically turn back on interrupts, then check if there was + * more to do. */ + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ + vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx; + END_USE(vq); + return last_used_idx; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); + +/** + * virtqueue_poll - query pending used buffers + * @vq: the struct virtqueue we're talking about. + * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). + * + * Returns "true" if there are pending used buffers in the queue. + * + * This does not need to be serialized. + */ +bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + virtio_mb(vq->weak_barriers); + return (u16)last_used_idx != vq->vring.used->idx; +} +EXPORT_SYMBOL_GPL(virtqueue_poll); + /** * virtqueue_enable_cb - restart callbacks after disable_cb. * @vq: the struct virtqueue we're talking about. @@ -619,25 +668,8 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb); */ bool virtqueue_enable_cb(struct virtqueue *_vq) { - struct vring_virtqueue *vq = to_vvq(_vq); - - START_USE(vq); - - /* We optimistically turn back on interrupts, then check if there was - * more to do. */ - /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to - * either clear the flags bit or point the event index at the next - * entry. Always do both to keep code simple. */ - vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; - vring_used_event(&vq->vring) = vq->last_used_idx; - virtio_mb(vq->weak_barriers); - if (unlikely(more_used(vq))) { - END_USE(vq); - return false; - } - - END_USE(vq); - return true; + unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); + return !virtqueue_poll(_vq, last_used_idx); } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 9ff8645b7e0b..72398eea6e86 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -70,6 +70,10 @@ void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); +unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); + +bool virtqueue_poll(struct virtqueue *vq, unsigned); + bool virtqueue_enable_cb_delayed(struct virtqueue *vq); void *virtqueue_detach_unused_buf(struct virtqueue *vq); From cbdadbbf0c790f79350a8f36029208944c5487d0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 9 Jul 2013 08:13:04 +0300 Subject: [PATCH 2/2] virtio_net: fix race in RX VQ processing virtio net called virtqueue_enable_cq on RX path after napi_complete, so with NAPI_STATE_SCHED clear - outside the implicit napi lock. This violates the requirement to synchronize virtqueue_enable_cq wrt virtqueue_add_buf. In particular, used event can move backwards, causing us to lose interrupts. In a debug build, this can trigger panic within START_USE. Jason Wang reports that he can trigger the races artificially, by adding udelay() in virtqueue_enable_cb() after virtio_mb(). However, we must call napi_complete to clear NAPI_STATE_SCHED before polling the virtqueue for used buffers, otherwise napi_schedule_prep in a callback will fail, causing us to lose RX events. To fix, call virtqueue_enable_cb_prepare with NAPI_STATE_SCHED set (under napi lock), later call virtqueue_poll with NAPI_STATE_SCHED clear (outside the lock). Reported-by: Jason Wang Tested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c9e00387d999..42d670a468f8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -602,7 +602,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget) container_of(napi, struct receive_queue, napi); struct virtnet_info *vi = rq->vq->vdev->priv; void *buf; - unsigned int len, received = 0; + unsigned int r, len, received = 0; again: while (received < budget && @@ -619,8 +619,9 @@ again: /* Out of packets? */ if (received < budget) { + r = virtqueue_enable_cb_prepare(rq->vq); napi_complete(napi); - if (unlikely(!virtqueue_enable_cb(rq->vq)) && + if (unlikely(virtqueue_poll(rq->vq, r)) && napi_schedule_prep(napi)) { virtqueue_disable_cb(rq->vq); __napi_schedule(napi);