[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 6/7] block/nbd: decouple reconnect from drain
From: |
Roman Kagan |
Subject: |
[PATCH 6/7] block/nbd: decouple reconnect from drain |
Date: |
Mon, 15 Mar 2021 09:06:10 +0300 |
The reconnection logic doesn't need to stop while in a drained section.
Moreover it has to be active during the drained section, as the requests
that were caught in-flight with the connection to the server broken can
only usefully get drained if the connection is restored. Otherwise such
requests can only either stall resulting in a deadlock (before
8c517de24a), or be aborted defeating the purpose of the reconnection
machinery (after 8c517de24a).
Since the pieces of the reconnection logic are now properly migrated
from one aio_context to another, it appears safe to just stop messing
with the drained section in the reconnection code.
Fixes: 5ad81b4946 ("nbd: Restrict connection_co reentrance")
Fixes: 8c517de24a ("block/nbd: fix drain dead-lock because of nbd
reconnect-delay")
Signed-off-by: Roman Kagan <rvkagan@yandex-team.ru>
---
block/nbd.c | 79 +++--------------------------------------------------
1 file changed, 4 insertions(+), 75 deletions(-)
diff --git a/block/nbd.c b/block/nbd.c
index a3eb9b9079..a5a9e4aca5 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -117,8 +117,6 @@ typedef struct BDRVNBDState {
Coroutine *connection_co;
Coroutine *teardown_co;
QemuCoSleepState *connection_co_sleep_ns_state;
- bool drained;
- bool wait_drained_end;
int in_flight;
NBDClientState state;
int connect_status;
@@ -311,12 +309,6 @@ static void nbd_client_attach_aio_context_bh(void *opaque)
qemu_mutex_unlock(&thr->mutex);
if (s->connection_co) {
- /*
- * The node is still drained, so we know the coroutine has yielded in
- * nbd_read_eof(), the only place where bs->in_flight can reach 0, or
- * it is entered for the first time. Both places are safe for entering
- * the coroutine.
- */
qemu_aio_coroutine_enter(bs->aio_context, s->connection_co);
}
bdrv_dec_in_flight(bs);
@@ -344,37 +336,6 @@ static void nbd_client_attach_aio_context(BlockDriverState
*bs,
aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs);
}
-static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs)
-{
- BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
- s->drained = true;
- if (s->connection_co_sleep_ns_state) {
- qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
- }
-
- nbd_co_establish_connection_cancel(bs, false);
-
- reconnect_delay_timer_del(s);
-
- if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) {
- s->state = NBD_CLIENT_CONNECTING_NOWAIT;
- qemu_co_queue_restart_all(&s->free_sema);
- }
-}
-
-static void coroutine_fn nbd_client_co_drain_end(BlockDriverState *bs)
-{
- BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
- s->drained = false;
- if (s->wait_drained_end) {
- s->wait_drained_end = false;
- aio_co_wake(s->connection_co);
- }
-}
-
-
static void nbd_teardown_connection(BlockDriverState *bs)
{
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
@@ -686,16 +647,6 @@ static coroutine_fn void
nbd_reconnect_attempt(BDRVNBDState *s)
ret = nbd_client_handshake(s->bs, &local_err);
- if (s->drained) {
- s->wait_drained_end = true;
- while (s->drained) {
- /*
- * We may be entered once from nbd_client_attach_aio_context_bh
- * and then from nbd_client_co_drain_end. So here is a loop.
- */
- qemu_coroutine_yield();
- }
- }
bdrv_inc_in_flight(s->bs);
out:
@@ -724,26 +675,10 @@ static coroutine_fn void
nbd_co_reconnect_loop(BDRVNBDState *s)
nbd_reconnect_attempt(s);
while (nbd_client_connecting(s)) {
- if (s->drained) {
- bdrv_dec_in_flight(s->bs);
- s->wait_drained_end = true;
- while (s->drained) {
- /*
- * We may be entered once from nbd_client_attach_aio_context_bh
- * and then from nbd_client_co_drain_end. So here is a loop.
- */
- qemu_coroutine_yield();
- }
- bdrv_inc_in_flight(s->bs);
- } else {
- qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
- &s->connection_co_sleep_ns_state);
- if (s->drained) {
- continue;
- }
- if (timeout < max_timeout) {
- timeout *= 2;
- }
+ qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
+ &s->connection_co_sleep_ns_state);
+ if (timeout < max_timeout) {
+ timeout *= 2;
}
nbd_reconnect_attempt(s);
@@ -2548,8 +2483,6 @@ static BlockDriver bdrv_nbd = {
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_client_detach_aio_context,
.bdrv_attach_aio_context = nbd_client_attach_aio_context,
- .bdrv_co_drain_begin = nbd_client_co_drain_begin,
- .bdrv_co_drain_end = nbd_client_co_drain_end,
.bdrv_refresh_filename = nbd_refresh_filename,
.bdrv_co_block_status = nbd_client_co_block_status,
.bdrv_dirname = nbd_dirname,
@@ -2577,8 +2510,6 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_client_detach_aio_context,
.bdrv_attach_aio_context = nbd_client_attach_aio_context,
- .bdrv_co_drain_begin = nbd_client_co_drain_begin,
- .bdrv_co_drain_end = nbd_client_co_drain_end,
.bdrv_refresh_filename = nbd_refresh_filename,
.bdrv_co_block_status = nbd_client_co_block_status,
.bdrv_dirname = nbd_dirname,
@@ -2606,8 +2537,6 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_client_detach_aio_context,
.bdrv_attach_aio_context = nbd_client_attach_aio_context,
- .bdrv_co_drain_begin = nbd_client_co_drain_begin,
- .bdrv_co_drain_end = nbd_client_co_drain_end,
.bdrv_refresh_filename = nbd_refresh_filename,
.bdrv_co_block_status = nbd_client_co_block_status,
.bdrv_dirname = nbd_dirname,
--
2.30.2
[PATCH 5/7] block/nbd: better document a case in nbd_co_establish_connection, Roman Kagan, 2021/03/15
[PATCH 3/7] block/nbd: assert attach/detach runs in the proper context, Roman Kagan, 2021/03/15
[PATCH 4/7] block/nbd: transfer reconnection stuff across aio_context switch, Roman Kagan, 2021/03/15
[PATCH 1/7] block/nbd: avoid touching freed connect_thread, Roman Kagan, 2021/03/15