Skip to content

Commit 237a252

Browse files
authored
Merge pull request #38 from tass500/feat/iter-32-step-timeout-cancel
Feat/iter 32 step timeout cancel
2 parents b7ea322 + 77a8d51 commit 237a252

5 files changed

Lines changed: 273 additions & 27 deletions

File tree

backend/LowCodePlatform.Backend.Tests/WorkflowRunEndpointsTests.cs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,108 @@ private static async Task AuthenticateAsync(HttpClient client, string tenantSlug
9898
client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token);
9999
}
100100

101+
[Fact]
102+
public async Task Workflow_step_timeoutMs_should_fail_step_and_run_with_timeout_error()
103+
{
104+
var mgmtDbPath = Path.Combine(Path.GetTempPath(), $"lcp-test-mgmt-{Guid.NewGuid():N}.db");
105+
var tenantDbPath = Path.Combine(Path.GetTempPath(), $"lcp-test-tenant-t1-{Guid.NewGuid():N}.db");
106+
107+
var managementCs = $"Data Source={mgmtDbPath}";
108+
var tenantCs = $"Data Source={tenantDbPath}";
109+
110+
await InitializeDatabasesAsync(managementCs, "t1", tenantCs, CancellationToken.None);
111+
112+
await using var factory = new TestAppFactory("t1", mgmtDbPath, tenantDbPath);
113+
using var client = CreateTenantClient(factory, "t1");
114+
115+
await AuthenticateAsync(client, "t1");
116+
117+
var createReq = new
118+
{
119+
name = "wf-timeout",
120+
definitionJson = "{\"steps\":[{\"type\":\"delay\",\"ms\":200,\"timeoutMs\":50}]}"
121+
};
122+
using var createResp = await client.PostAsJsonAsync("/api/workflows", createReq);
123+
Assert.Equal(HttpStatusCode.OK, createResp.StatusCode);
124+
125+
var created = await createResp.Content.ReadFromJsonAsync<Dictionary<string, object?>>();
126+
Assert.NotNull(created);
127+
var wfId = Guid.Parse(created!["workflowDefinitionId"]!.ToString()!);
128+
129+
using var startResp = await client.PostAsync($"/api/workflows/{wfId}/runs", content: null);
130+
Assert.Equal(HttpStatusCode.OK, startResp.StatusCode);
131+
132+
var startPayload = await startResp.Content.ReadFromJsonAsync<Dictionary<string, object?>>();
133+
Assert.NotNull(startPayload);
134+
var runId = Guid.Parse(startPayload!["workflowRunId"]!.ToString()!);
135+
136+
using var getResp = await client.GetAsync($"/api/workflows/runs/{runId}");
137+
Assert.Equal(HttpStatusCode.OK, getResp.StatusCode);
138+
139+
var details = await getResp.Content.ReadFromJsonAsync<Dictionary<string, object?>>();
140+
Assert.NotNull(details);
141+
142+
Assert.Equal("failed", details!["state"]?.ToString());
143+
Assert.Equal("workflow_step_timed_out", details!["errorCode"]?.ToString());
144+
}
145+
146+
[Fact]
147+
public async Task Workflow_run_start_cancellation_should_mark_run_canceled_in_db()
148+
{
149+
var mgmtDbPath = Path.Combine(Path.GetTempPath(), $"lcp-test-mgmt-{Guid.NewGuid():N}.db");
150+
var tenantDbPath = Path.Combine(Path.GetTempPath(), $"lcp-test-tenant-t1-{Guid.NewGuid():N}.db");
151+
152+
var managementCs = $"Data Source={mgmtDbPath}";
153+
var tenantCs = $"Data Source={tenantDbPath}";
154+
155+
await InitializeDatabasesAsync(managementCs, "t1", tenantCs, CancellationToken.None);
156+
157+
await using var factory = new TestAppFactory("t1", mgmtDbPath, tenantDbPath);
158+
using var client = CreateTenantClient(factory, "t1");
159+
160+
await AuthenticateAsync(client, "t1");
161+
162+
var createReq = new
163+
{
164+
name = "wf-cancel",
165+
definitionJson = "{\"steps\":[{\"type\":\"delay\",\"ms\":200}]}"
166+
};
167+
using var createResp = await client.PostAsJsonAsync("/api/workflows", createReq);
168+
Assert.Equal(HttpStatusCode.OK, createResp.StatusCode);
169+
170+
var created = await createResp.Content.ReadFromJsonAsync<Dictionary<string, object?>>();
171+
Assert.NotNull(created);
172+
var wfId = Guid.Parse(created!["workflowDefinitionId"]!.ToString()!);
173+
174+
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(50));
175+
try
176+
{
177+
using var _ = await client.PostAsync($"/api/workflows/{wfId}/runs", content: null, cancellationToken: cts.Token);
178+
}
179+
catch (OperationCanceledException)
180+
{
181+
// expected: client canceled the request
182+
}
183+
184+
await Task.Delay(150);
185+
186+
using var listResp = await client.GetAsync($"/api/workflows/{wfId}/runs");
187+
Assert.Equal(HttpStatusCode.OK, listResp.StatusCode);
188+
189+
var list = await listResp.Content.ReadFromJsonAsync<Dictionary<string, object?>>();
190+
Assert.NotNull(list);
191+
192+
var itemsJson = JsonSerializer.Serialize(list!["items"]);
193+
using var doc = JsonDocument.Parse(itemsJson);
194+
var itemsArr = doc.RootElement;
195+
Assert.Equal(JsonValueKind.Array, itemsArr.ValueKind);
196+
Assert.True(itemsArr.GetArrayLength() >= 1);
197+
198+
var latest = itemsArr[0];
199+
Assert.Equal("canceled", latest.GetProperty("state").GetString());
200+
Assert.Equal("canceled", latest.GetProperty("errorCode").GetString());
201+
}
202+
101203
[Fact]
102204
public async Task Workflow_run_start_and_status_should_work()
103205
{

backend/Services/WorkflowRunnerService.cs

Lines changed: 132 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ public sealed class WorkflowRunnerService
1313

1414
private sealed record RetryPolicy(int MaxAttempts, int DelayMs, double BackoffFactor, int? MaxDelayMs);
1515

16+
private sealed record StepPolicy(RetryPolicy Retry, int? TimeoutMs);
17+
1618
private readonly PlatformDbContext _db;
1719

1820
public WorkflowRunnerService(PlatformDbContext db)
@@ -76,6 +78,33 @@ private static void ExecuteRequireAsync(WorkflowStepRun step, JsonObject context
7678
}
7779
}
7880

81+
private static StepPolicy ParseStepPolicy(string? stepConfigJson)
82+
{
83+
var retry = ParseRetryPolicy(stepConfigJson);
84+
if (string.IsNullOrWhiteSpace(stepConfigJson))
85+
return new StepPolicy(Retry: retry, TimeoutMs: null);
86+
87+
try
88+
{
89+
using var doc = JsonDocument.Parse(stepConfigJson);
90+
var root = doc.RootElement;
91+
92+
int? timeoutMs = null;
93+
if (root.TryGetProperty("timeoutMs", out var timeoutEl)
94+
&& timeoutEl.TryGetInt32(out var parsed)
95+
&& parsed >= 1)
96+
{
97+
timeoutMs = parsed;
98+
}
99+
100+
return new StepPolicy(Retry: retry, TimeoutMs: timeoutMs);
101+
}
102+
catch
103+
{
104+
return new StepPolicy(Retry: retry, TimeoutMs: null);
105+
}
106+
}
107+
79108
private static void ExecuteSetAsync(WorkflowStepRun step)
80109
{
81110
if (string.IsNullOrWhiteSpace(step.StepConfigJson))
@@ -673,32 +702,72 @@ public async Task<WorkflowRun> StartAsync(WorkflowDefinition wf, string traceId,
673702
_db.WorkflowRuns.Add(run);
674703
await _db.SaveChangesAsync(ct);
675704

676-
foreach (var step in run.Steps.OrderBy(x => x.StepKey))
705+
try
677706
{
678-
await ExecuteStepAsync(run, step, context, ct);
679-
await _db.SaveChangesAsync(ct);
680-
681-
if (step.State == WorkflowStepRunStates.Succeeded && !string.IsNullOrWhiteSpace(step.OutputJson))
707+
foreach (var step in run.Steps.OrderBy(x => x.StepKey))
682708
{
683-
try
709+
await ExecuteStepAsync(run, step, context, ct);
710+
await _db.SaveChangesAsync(ct);
711+
712+
if (step.State == WorkflowStepRunStates.Succeeded && !string.IsNullOrWhiteSpace(step.OutputJson))
713+
{
714+
try
715+
{
716+
context[step.StepKey] = JsonNode.Parse(step.OutputJson);
717+
}
718+
catch
719+
{
720+
// ignore invalid output json; step succeeded but context won't have it
721+
}
722+
}
723+
724+
if (step.State == WorkflowStepRunStates.Failed)
684725
{
685-
context[step.StepKey] = JsonNode.Parse(step.OutputJson);
726+
run.State = WorkflowRunStates.Failed;
727+
run.ErrorCode = step.LastErrorCode;
728+
run.ErrorMessage = step.LastErrorMessage;
729+
run.FinishedAtUtc = DateTime.UtcNow;
730+
await _db.SaveChangesAsync(ct);
731+
return run;
686732
}
687-
catch
733+
734+
if (step.State == WorkflowStepRunStates.Canceled)
688735
{
689-
// ignore invalid output json; step succeeded but context won't have it
736+
run.State = WorkflowRunStates.Canceled;
737+
run.ErrorCode = step.LastErrorCode;
738+
run.ErrorMessage = step.LastErrorMessage;
739+
run.FinishedAtUtc = DateTime.UtcNow;
740+
await _db.SaveChangesAsync(ct);
741+
return run;
690742
}
691743
}
744+
}
745+
catch (OperationCanceledException) when (ct.IsCancellationRequested)
746+
{
747+
var now = DateTime.UtcNow;
748+
run.State = WorkflowRunStates.Canceled;
749+
run.ErrorCode = "canceled";
750+
run.ErrorMessage = "Canceled.";
751+
run.FinishedAtUtc = now;
692752

693-
if (step.State == WorkflowStepRunStates.Failed)
753+
foreach (var step in run.Steps.Where(x => x.State is WorkflowStepRunStates.Pending or WorkflowStepRunStates.Running))
694754
{
695-
run.State = WorkflowRunStates.Failed;
696-
run.ErrorCode = step.LastErrorCode;
697-
run.ErrorMessage = step.LastErrorMessage;
698-
run.FinishedAtUtc = DateTime.UtcNow;
699-
await _db.SaveChangesAsync(ct);
700-
return run;
755+
step.State = WorkflowStepRunStates.Canceled;
756+
step.FinishedAtUtc ??= now;
757+
step.LastErrorCode ??= "canceled";
758+
step.LastErrorMessage ??= "Canceled.";
759+
}
760+
761+
try
762+
{
763+
await _db.SaveChangesAsync(CancellationToken.None);
701764
}
765+
catch
766+
{
767+
// ignore
768+
}
769+
770+
throw;
702771
}
703772

704773
run.State = WorkflowRunStates.Succeeded;
@@ -759,17 +828,17 @@ private static List<WorkflowStepRun> BuildSteps(WorkflowDefinition wf)
759828

760829
private async Task ExecuteStepAsync(WorkflowRun run, WorkflowStepRun step, JsonObject context, CancellationToken ct)
761830
{
762-
var policy = ParseRetryPolicy(step.StepConfigJson);
831+
var policy = ParseStepPolicy(step.StepConfigJson);
763832

764833
var startedAt = DateTime.UtcNow;
765834
step.StartedAtUtc = startedAt;
766835

767836
Exception? lastEx = null;
768-
for (var attemptNumber = 1; attemptNumber <= policy.MaxAttempts; attemptNumber += 1)
837+
for (var attemptNumber = 1; attemptNumber <= policy.Retry.MaxAttempts; attemptNumber += 1)
769838
{
770839
ct.ThrowIfCancellationRequested();
771840

772-
var delayMs = GetRetryDelayMs(policy, attemptNumber);
841+
var delayMs = GetRetryDelayMs(policy.Retry, attemptNumber);
773842
if (delayMs > 0)
774843
await Task.Delay(delayMs, ct);
775844

@@ -779,21 +848,59 @@ private async Task ExecuteStepAsync(WorkflowRun run, WorkflowStepRun step, JsonO
779848
step.LastErrorMessage = null;
780849
step.OutputJson = null;
781850

851+
CancellationToken attemptCt = ct;
852+
CancellationTokenSource? timeoutCts = null;
853+
if (policy.TimeoutMs is not null)
854+
{
855+
timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
856+
timeoutCts.CancelAfter(policy.TimeoutMs.Value);
857+
attemptCt = timeoutCts.Token;
858+
}
859+
782860
try
783861
{
784862
InterpolateStepConfigJson(step, context);
785863

786-
await ExecuteStepBodyAsync(step, context, ct);
864+
await ExecuteStepBodyAsync(step, context, attemptCt);
787865

788866
step.State = WorkflowStepRunStates.Succeeded;
789867
step.FinishedAtUtc = DateTime.UtcNow;
790868
return;
791869
}
870+
catch (OperationCanceledException oce)
871+
{
872+
lastEx = oce;
873+
874+
if (ct.IsCancellationRequested)
875+
{
876+
step.State = WorkflowStepRunStates.Canceled;
877+
step.FinishedAtUtc = DateTime.UtcNow;
878+
step.LastErrorCode ??= "canceled";
879+
step.LastErrorMessage ??= "Canceled.";
880+
881+
run.ErrorCode = step.LastErrorCode;
882+
run.ErrorMessage = step.LastErrorMessage;
883+
return;
884+
}
885+
886+
// timeout (linked token canceled)
887+
step.LastErrorCode ??= "workflow_step_timed_out";
888+
step.LastErrorMessage ??= $"Step timed out after {policy.TimeoutMs} ms.";
889+
890+
if (attemptNumber < policy.Retry.MaxAttempts)
891+
continue;
892+
893+
step.State = WorkflowStepRunStates.Failed;
894+
step.FinishedAtUtc = DateTime.UtcNow;
895+
run.ErrorCode = step.LastErrorCode;
896+
run.ErrorMessage = step.LastErrorMessage;
897+
return;
898+
}
792899
catch (Exception ex)
793900
{
794901
lastEx = ex;
795902

796-
if (attemptNumber < policy.MaxAttempts)
903+
if (attemptNumber < policy.Retry.MaxAttempts)
797904
continue;
798905

799906
step.State = WorkflowStepRunStates.Failed;
@@ -805,6 +912,10 @@ private async Task ExecuteStepAsync(WorkflowRun run, WorkflowStepRun step, JsonO
805912
run.ErrorMessage = step.LastErrorMessage;
806913
return;
807914
}
915+
finally
916+
{
917+
timeoutCts?.Dispose();
918+
}
808919
}
809920

810921
step.State = WorkflowStepRunStates.Failed;

docs/live/02_allapot.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Drift-proof observability egy greenfield lowcode platformban.
2121
- `require`
2222
- `domainCommand`
2323
- `unstable`
24-
- Jelenlegi fókusz (roadmap): Iteráció 31 — step-level retry/backoff
24+
- Jelenlegi fókusz (roadmap): Iteráció 32 — step timeout / cancellation hardening
2525

2626
- **Frontend (Angular)**
2727
- Drift-proof “now”: kliens oldali **`serverNowOffsetMs`** kalibráció `serverTimeUtc` alapján.

0 commit comments

Comments
 (0)