Skip to content

Commit 443654d

Browse files
author
tac0turtle
committed
use retry instead of failure
1 parent 5db30d0 commit 443654d

File tree

4 files changed

+40
-19
lines changed

4 files changed

+40
-19
lines changed

block/metrics_helpers.go

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ import (
55
"time"
66
)
77

8+
// DA metric modes
9+
const (
10+
DAModeRetry = "retry"
11+
DAModeSuccess = "success"
12+
DAModeFail = "fail"
13+
)
14+
815
// MetricsTimer helps track operation duration
916
type MetricsTimer struct {
1017
start time.Time
@@ -75,21 +82,25 @@ func (m *Manager) recordError(errorType string, recoverable bool) {
7582
}
7683
}
7784

78-
// recordDAMetrics records DA-related metrics
79-
func (m *Manager) recordDAMetrics(operation string, success bool) {
85+
// recordDAMetrics records DA-related metrics with three modes: "success", "fail", "retry"
86+
func (m *Manager) recordDAMetrics(operation string, mode string) {
8087
switch operation {
8188
case "submission":
82-
m.metrics.DASubmissionAttempts.Add(1)
83-
if success {
89+
switch mode {
90+
case "retry":
91+
m.metrics.DASubmissionAttempts.Add(1)
92+
case "success":
8493
m.metrics.DASubmissionSuccesses.Add(1)
85-
} else {
94+
case "fail":
8695
m.metrics.DASubmissionFailures.Add(1)
8796
}
8897
case "retrieval":
89-
m.metrics.DARetrievalAttempts.Add(1)
90-
if success {
98+
switch mode {
99+
case "retry":
100+
m.metrics.DARetrievalAttempts.Add(1)
101+
case "success":
91102
m.metrics.DARetrievalSuccesses.Add(1)
92-
} else {
103+
case "fail":
93104
m.metrics.DARetrievalFailures.Add(1)
94105
}
95106
}

block/metrics_test.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,11 +141,13 @@ func TestMetricsHelpers(t *testing.T) {
141141
})
142142

143143
t.Run("recordDAMetrics", func(t *testing.T) {
144-
// Should not panic
145-
m.recordDAMetrics("submission", true)
146-
m.recordDAMetrics("submission", false)
147-
m.recordDAMetrics("retrieval", true)
148-
m.recordDAMetrics("retrieval", false)
144+
// Should not panic with three modes: retry, success, fail
145+
m.recordDAMetrics("submission", DAModeRetry)
146+
m.recordDAMetrics("submission", DAModeSuccess)
147+
m.recordDAMetrics("submission", DAModeFail)
148+
m.recordDAMetrics("retrieval", DAModeRetry)
149+
m.recordDAMetrics("retrieval", DAModeSuccess)
150+
m.recordDAMetrics("retrieval", DAModeFail)
149151
})
150152

151153
t.Run("recordBlockProductionMetrics", func(t *testing.T) {

block/retriever.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func (m *Manager) processNextDAHeaderAndData(ctx context.Context) error {
7373
blobsResp, fetchErr := m.fetchBlobs(ctx, daHeight)
7474
if fetchErr == nil {
7575
// Record successful DA retrieval
76-
m.recordDAMetrics("retrieval", true)
76+
m.recordDAMetrics("retrieval", DAModeSuccess)
7777

7878
if blobsResp.Code == coreda.StatusNotFound {
7979
m.logger.Debug("no blob data found", "daHeight", daHeight, "reason", blobsResp.Message)
@@ -215,13 +215,15 @@ func (m *Manager) fetchBlobs(ctx context.Context, daHeight uint64) (coreda.Resul
215215
ctx, cancel := context.WithTimeout(ctx, dAefetcherTimeout)
216216
defer cancel()
217217

218-
// Record DA retrieval attempt
219-
m.recordDAMetrics("retrieval", false)
218+
// Record DA retrieval retry attempt
219+
m.recordDAMetrics("retrieval", DAModeRetry)
220220

221221
// TODO: we should maintain the original error instead of creating a new one as we lose context by creating a new error.
222222
blobsRes := types.RetrieveWithHelpers(ctx, m.da, m.logger, daHeight, []byte(m.genesis.ChainID))
223223
switch blobsRes.Code {
224224
case coreda.StatusError:
225+
// Record failed DA retrieval
226+
m.recordDAMetrics("retrieval", DAModeFail)
225227
err = fmt.Errorf("failed to retrieve block: %s", blobsRes.Message)
226228
case coreda.StatusHeightFromFuture:
227229
// Keep the root cause intact for callers that may rely on errors.Is/As.

block/submitter.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,16 +114,16 @@ func submitToDA[T any](
114114

115115
submitctx, submitCtxCancel := context.WithTimeout(ctx, 60*time.Second)
116116

117-
// Record DA submission attempt
118-
m.recordDAMetrics("submission", false)
117+
// Record DA submission retry attempt
118+
m.recordDAMetrics("submission", DAModeRetry)
119119

120120
res := types.SubmitWithHelpers(submitctx, m.da, m.logger, currMarshaled, gasPrice, nil)
121121
submitCtxCancel()
122122

123123
switch res.Code {
124124
case coreda.StatusSuccess:
125125
// Record successful DA submission
126-
m.recordDAMetrics("submission", true)
126+
m.recordDAMetrics("submission", DAModeSuccess)
127127

128128
m.logger.Info(fmt.Sprintf("successfully submitted %s to DA layer", itemType), "gasPrice", gasPrice, "count", res.SubmittedCount)
129129
if res.SubmittedCount == uint64(remLen) {
@@ -144,6 +144,8 @@ func submitToDA[T any](
144144
m.logger.Debug("resetting DA layer submission options", "backoff", backoff, "gasPrice", gasPrice)
145145
case coreda.StatusNotIncludedInBlock, coreda.StatusAlreadyInMempool:
146146
m.logger.Error("DA layer submission failed", "error", res.Message, "attempt", attempt)
147+
// Record failed DA submission (will retry)
148+
m.recordDAMetrics("submission", DAModeFail)
147149
backoff = m.config.DA.BlockTime.Duration * time.Duration(m.config.DA.MempoolTTL)
148150
if m.gasMultiplier > 0 && gasPrice != -1 {
149151
gasPrice = gasPrice * m.gasMultiplier
@@ -156,11 +158,15 @@ func submitToDA[T any](
156158
fallthrough
157159
default:
158160
m.logger.Error("DA layer submission failed", "error", res.Message, "attempt", attempt)
161+
// Record failed DA submission (will retry)
162+
m.recordDAMetrics("submission", DAModeFail)
159163
backoff = m.exponentialBackoff(backoff)
160164
}
161165
attempt++
162166
}
163167
if !submittedAll {
168+
// Record final failure after all retries are exhausted
169+
m.recordDAMetrics("submission", DAModeFail)
164170
// If not all items are submitted, the remaining items will be retried in the next submission loop.
165171
return fmt.Errorf("failed to submit all %s(s) to DA layer, submitted %d items (%d left) after %d attempts", itemType, numSubmitted, remLen, attempt)
166172
}

0 commit comments

Comments
 (0)