Skip to content

Commit cd459ce

Browse files
committed
prov/shm: add SAR buffer locking
There seems to be a possible hang/race condition in the shm provider where a SAR buffer is set as READY before the data has been saved. Add a lock when progressing into the SAR buffer until we determine a better solution. Signed-off-by: aingerson <alexia.ingerson@intel.com>
1 parent d74b029 commit cd459ce

1 file changed

Lines changed: 7 additions & 0 deletions

File tree

prov/shm/src/smr_progress.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ static int smr_progress_resp_entry(struct smr_ep *ep, struct smr_resp *resp,
8282
sar_msg->sar[1].status == SMR_SAR_FREE)
8383
break;
8484

85+
if (peer_smr != ep->region) {
86+
if (fastlock_tryacquire(&peer_smr->lock))
87+
return -FI_EAGAIN;
88+
}
8589
if (pending->cmd.msg.hdr.op == ofi_op_read_req)
8690
smr_try_progress_from_sar(sar_msg, resp,
8791
&pending->cmd, pending->iface,
@@ -94,6 +98,9 @@ static int smr_progress_resp_entry(struct smr_ep *ep, struct smr_resp *resp,
9498
pending->device, pending->iov,
9599
pending->iov_count, &pending->bytes_done,
96100
&pending->next);
101+
if (peer_smr != ep->region)
102+
fastlock_release(&peer_smr->lock);
103+
97104
if (pending->bytes_done != pending->cmd.msg.hdr.size ||
98105
sar_msg->sar[0].status != SMR_SAR_FREE ||
99106
sar_msg->sar[1].status != SMR_SAR_FREE)

0 commit comments

Comments
 (0)