Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/cart/crt_rpc.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -1992,3 +1992,12 @@ crt_req_get_proto_ver(crt_rpc_t *req)
{
return (req->cr_opc & CRT_PROTO_VER_MASK) >> 16;
}

void
crt_rpc_dump(crt_rpc_t *req, const char *msg)
{
struct crt_rpc_priv *rpc_priv;

rpc_priv = container_of(req, struct crt_rpc_priv, crp_pub);
RPC_INFO(rpc_priv, "%s\n", msg);
}
4 changes: 2 additions & 2 deletions src/dtx/dtx_rpc.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -743,7 +743,7 @@ dtx_rpc(struct ds_cont_child *cont,d_list_t *dti_list, struct dtx_entry **dtes,
dca->dca_steps = length;

/* Use helper ULT to handle DTX RPC if there are enough helper XS. */
if (dss_has_enough_helper()) {
if (0 && dss_has_enough_helper()) {
rc = ABT_eventual_create(0, &dca->dca_chore_eventual);
if (rc != ABT_SUCCESS) {
D_ERROR("failed to create eventual: %d\n", rc);
Expand Down
10 changes: 9 additions & 1 deletion src/include/cart/api.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -2286,6 +2286,14 @@ int crt_group_secondary_modify(crt_group_t *grp, d_rank_list_t *sec_ranks,
d_rank_list_t *prim_ranks, crt_group_mod_op_t op,
uint32_t version);

/**
* Dump CaRT RPC information.
*
* \param[in] req The rpc structure.
* \param[in] fmt The message to be logged.
*/
void crt_rpc_dump(crt_rpc_t *req, const char *msg);

/**
* Initialize swim on the specified context index.
*
Expand Down
14 changes: 8 additions & 6 deletions src/object/cli_obj.c
Original file line number Diff line number Diff line change
Expand Up @@ -1725,7 +1725,7 @@ dc_obj_retry_delay(tse_task_t *task, uint32_t opc, int err, uint32_t *retry_cnt,
/* Randomly delay [1, max_delay - 5] for DER_OVERLOAD_RETRY case. */
if (err == -DER_OVERLOAD_RETRY) {
delay = daos_rpc_rand_delay(timeout_sec) << 20;
} else if (++(*retry_cnt) > 1) {
} else if (++(*retry_cnt) > 1 || obj_is_modification_opc(opc)) {
/* Randomly delay [31 ~ 1023] us if it is not the first retried object RPC. */
delay = (d_rand() | ((1 << 5) - 1)) & ((1 << 10) - 1);
/* Rebuild is being established on the server side, wait a bit longer */
Expand All @@ -1739,16 +1739,18 @@ dc_obj_retry_delay(tse_task_t *task, uint32_t opc, int err, uint32_t *retry_cnt,
delay <<= 8;
break;
case DAOS_OBJ_RPC_CPD:
/* 8 times of the delay for compounded RPC. */
delay <<= 3;
delay <<= (*retry_cnt + 3);
break;
default:
if (obj_is_modification_opc(opc))
delay <<= (*retry_cnt + 1);
else
delay <<= (*retry_cnt - 1);
break;
}

/* Increase delay after multiple times retry. */
if (*retry_cnt >= 5)
delay <<= 1;
if (*retry_cnt > 10 || delay > 3000000)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can just 10X each time and start from 10us, so we eventually retry after each 10 seconds after 6 tries.
Let's see if it can help first.

delay = 3000000 + ((d_rand() | ((1 << 5) - 1)) & ((1 << 10) - 1));
}
}

Expand Down
8 changes: 6 additions & 2 deletions src/object/srv_obj.c
Original file line number Diff line number Diff line change
Expand Up @@ -2883,14 +2883,18 @@ ds_obj_tgt_update_handler(crt_rpc_t *rpc)
* Pre-allocate DTX entry for handling resend under such case.
*/
rc = obj_local_rw(rpc, &ioc, dth);
if (rc != 0)
if (rc != 0) {
DL_CDEBUG(
rc == -DER_INPROGRESS || rc == -DER_TX_RESTART || rc == -DER_ALREADY ||
(rc == -DER_EXIST &&
(orw->orw_api_flags & (DAOS_COND_DKEY_INSERT | DAOS_COND_AKEY_INSERT))) ||
(rc == -DER_NONEXIST &&
(orw->orw_api_flags & (DAOS_COND_DKEY_UPDATE | DAOS_COND_AKEY_UPDATE))),
DB_IO, DLOG_ERR, rc, DF_UOID, DP_UOID(orw->orw_oid));
DB_IO, DLOG_ERR, rc, "tgt_update " DF_UOID " with TX " DF_DTI,
DP_UOID(orw->orw_oid), DP_DTI(&orw->orw_dti));
if (unlikely(rc == -DER_AGAIN))
crt_rpc_dump(rpc, "tgt_update");
}

out:
if (dth != NULL)
Expand Down
6 changes: 4 additions & 2 deletions src/vos/vos_dtx.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
Expand Down Expand Up @@ -1523,8 +1523,10 @@ vos_dtx_validation(struct dtx_handle *dth)
/* The DTX has been ever aborted. Return -DER_AGAIN to make related client to retry sometime
* later without triggering dtx_abort().
*/
if (dth->dth_aborted || rc == DTX_ST_ABORTED || rc == DTX_ST_ABORTING)
if (dth->dth_aborted || rc == DTX_ST_ABORTED || rc == DTX_ST_ABORTING) {
D_WARN("Current DTX " DF_DTI " is aborted: %d\n", DP_DTI(&dth->dth_xid), rc);
return -DER_AGAIN;
}

return rc > 0 ? 0 : rc;
}
Expand Down
5 changes: 4 additions & 1 deletion src/vos/vos_obj_cache.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2026 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -360,8 +361,10 @@ vos_obj_hold(struct daos_lru_cache *occ, struct vos_container *cont,
obj = container_of(lret, struct vos_object, obj_llink);
}

if (obj->obj_zombie)
if (obj->obj_zombie) {
D_WARN("Hit zombie obj " DF_UOID ", need to retry\n", DP_UOID(oid));
D_GOTO(failed, rc = -DER_AGAIN);
}

if (intent == DAOS_INTENT_KILL && !(flags & VOS_OBJ_KILL_DKEY)) {
if (obj != &obj_local) {
Expand Down
Loading