Skip to content

Commit 4914aae

Browse files
authored
Fix TempLocationManagerTest flakiness on JDK 8 (#10513)
1 parent 71d8c26 commit 4914aae

File tree

1 file changed

+71
-47
lines changed

1 file changed

+71
-47
lines changed

internal-api/src/test/java/datadog/trace/util/TempLocationManagerTest.java

Lines changed: 71 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
import java.util.List;
2222
import java.util.Properties;
2323
import java.util.UUID;
24-
import java.util.concurrent.Phaser;
24+
import java.util.concurrent.CountDownLatch;
2525
import java.util.concurrent.TimeUnit;
26-
import java.util.concurrent.TimeoutException;
2726
import java.util.concurrent.atomic.AtomicBoolean;
27+
import java.util.concurrent.atomic.AtomicReference;
2828
import java.util.concurrent.locks.LockSupport;
2929
import java.util.stream.Stream;
3030
import org.junit.jupiter.api.Test;
@@ -126,12 +126,18 @@ void testCleanup(String subPath) throws Exception {
126126
@ParameterizedTest
127127
@ValueSource(strings = {"preVisitDirectory", "visitFile", "postVisitDirectory"})
128128
void testConcurrentCleanup(String section) throws Exception {
129-
/* This test simulates concurrent cleanup
130-
It utilizes a special hook to create synchronization points in the filetree walking routine,
131-
allowing to delete the files at various points of execution.
132-
The test makes sure that the cleanup is not interrupted and the file and directory being deleted
133-
stays deleted.
134-
*/
129+
/*
130+
* This test simulates concurrent cleanup.
131+
* It utilizes a special hook to create synchronization points in the filetree walking routine,
132+
* allowing to delete the files at various points of execution.
133+
* The test makes sure that the cleanup is not interrupted and the file and directory being
134+
* deleted stays deleted.
135+
*
136+
* Synchronization uses CountDownLatch for deterministic coordination:
137+
* 1. Cleanup thread reaches hook point, signals via hookReached latch
138+
* 2. Main thread deletes file, signals via proceedSignal latch
139+
* 3. Cleanup thread continues and completes
140+
*/
135141
Path baseDir =
136142
Files.createTempDirectory(
137143
"ddprof-test-",
@@ -146,20 +152,32 @@ void testConcurrentCleanup(String section) throws Exception {
146152
fakeTempDir.toFile().deleteOnExit();
147153
fakeTempFile.toFile().deleteOnExit();
148154

149-
Phaser phaser = new Phaser(2);
150-
151-
Duration phaserTimeout =
152-
Duration.ofSeconds(5); // wait at most 5 seconds for phaser coordination
155+
CountDownLatch hookReached = new CountDownLatch(1);
156+
CountDownLatch proceedSignal = new CountDownLatch(1);
153157
AtomicBoolean withTimeout = new AtomicBoolean(false);
158+
AtomicReference<Throwable> cleanupError = new AtomicReference<>();
159+
154160
TempLocationManager.CleanupHook blocker =
155161
new TempLocationManager.CleanupHook() {
162+
private void syncPoint(boolean timeout) {
163+
withTimeout.compareAndSet(false, timeout);
164+
hookReached.countDown();
165+
try {
166+
if (!proceedSignal.await(30, TimeUnit.SECONDS)) {
167+
cleanupError.set(
168+
new AssertionError("Cleanup thread: proceed signal timeout after 30s"));
169+
}
170+
} catch (InterruptedException e) {
171+
Thread.currentThread().interrupt();
172+
cleanupError.set(e);
173+
}
174+
}
175+
156176
@Override
157177
public FileVisitResult preVisitDirectory(
158178
Path dir, BasicFileAttributes attrs, boolean timeout) throws IOException {
159179
if (section.equals("preVisitDirectory") && dir.equals(fakeTempDir)) {
160-
withTimeout.compareAndSet(false, timeout);
161-
arriveOrAwaitAdvance(phaser, phaserTimeout);
162-
arriveOrAwaitAdvance(phaser, phaserTimeout);
180+
syncPoint(timeout);
163181
}
164182
return null;
165183
}
@@ -168,9 +186,7 @@ public FileVisitResult preVisitDirectory(
168186
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs, boolean timeout)
169187
throws IOException {
170188
if (section.equals("visitFile") && file.equals(fakeTempFile)) {
171-
withTimeout.compareAndSet(false, timeout);
172-
arriveOrAwaitAdvance(phaser, phaserTimeout);
173-
arriveOrAwaitAdvance(phaser, phaserTimeout);
189+
syncPoint(timeout);
174190
}
175191
return null;
176192
}
@@ -179,33 +195,51 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs, boolean t
179195
public FileVisitResult postVisitDirectory(Path dir, IOException exc, boolean timeout)
180196
throws IOException {
181197
if (section.equals("postVisitDirectory") && dir.equals(fakeTempDir)) {
182-
withTimeout.compareAndSet(false, timeout);
183-
arriveOrAwaitAdvance(phaser, phaserTimeout);
184-
arriveOrAwaitAdvance(phaser, phaserTimeout);
198+
syncPoint(timeout);
185199
}
186200
return null;
187201
}
188202
};
189203

190204
TempLocationManager mgr = instance(baseDir, true, blocker);
191205

192-
// wait for the cleanup start
193-
if (arriveOrAwaitAdvance(phaser, phaserTimeout)) {
194-
Files.deleteIfExists(fakeTempFile);
195-
assertTrue(arriveOrAwaitAdvance(phaser, phaserTimeout));
206+
// Wait for cleanup to reach the synchronization point
207+
boolean reached = hookReached.await(30, TimeUnit.SECONDS);
208+
assertTrue(reached, "Cleanup thread should reach hook within 30 seconds");
209+
210+
// Now we know cleanup is paused at the hook - safe to delete
211+
Files.deleteIfExists(fakeTempFile);
212+
213+
// Signal cleanup to proceed
214+
proceedSignal.countDown();
215+
216+
// Wait for cleanup to complete
217+
assertTrue(mgr.waitForCleanup(30, TimeUnit.SECONDS), "Cleanup should complete within 30s");
218+
219+
// Check for errors in cleanup thread
220+
Throwable error = cleanupError.get();
221+
if (error != null) {
222+
throw new AssertionError("Cleanup thread encountered error", error);
196223
}
197-
assertFalse(
198-
withTimeout.get()); // if the cleaner times out it does not make sense to continue here
199-
mgr.waitForCleanup(30, TimeUnit.SECONDS);
200224

225+
assertFalse(withTimeout.get(), "Cleanup should not have timed out");
201226
assertFalse(Files.exists(fakeTempFile));
202227
assertFalse(Files.exists(fakeTempDir));
203228
}
204229

205230
@ParameterizedTest
206231
@MethodSource("timeoutTestArguments")
207232
void testCleanupWithTimeout(boolean shouldSucceed, String section) throws Exception {
208-
long timeoutMs = 10;
233+
/*
234+
* Test that cleanup correctly handles timeout conditions.
235+
* Uses a hook that introduces delays at specific points in the file tree walk.
236+
*
237+
* Timing strategy (more generous for JDK 8 compatibility):
238+
* - Base delay: 100ms (was 10ms - too tight for JDK 8 timer resolution)
239+
* - Success case: 500ms timeout with 100ms delays → plenty of margin
240+
* - Failure case: 20ms timeout with 100ms delays → guaranteed timeout
241+
*/
242+
long delayMs = 100;
209243
AtomicBoolean withTimeout = new AtomicBoolean(false);
210244
TempLocationManager.CleanupHook delayer =
211245
new TempLocationManager.CleanupHook() {
@@ -214,7 +248,7 @@ public FileVisitResult preVisitDirectory(
214248
Path dir, BasicFileAttributes attrs, boolean timeout) throws IOException {
215249
withTimeout.compareAndSet(false, timeout);
216250
if (section.equals("preVisitDirectory")) {
217-
waitFor(Duration.ofMillis(timeoutMs));
251+
waitFor(Duration.ofMillis(delayMs));
218252
}
219253
return TempLocationManager.CleanupHook.super.preVisitDirectory(dir, attrs, timeout);
220254
}
@@ -224,7 +258,7 @@ public FileVisitResult visitFileFailed(Path file, IOException exc, boolean timeo
224258
throws IOException {
225259
withTimeout.compareAndSet(false, timeout);
226260
if (section.equals("visitFileFailed")) {
227-
waitFor(Duration.ofMillis(timeoutMs));
261+
waitFor(Duration.ofMillis(delayMs));
228262
}
229263
return TempLocationManager.CleanupHook.super.visitFileFailed(file, exc, timeout);
230264
}
@@ -234,7 +268,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc, boolean tim
234268
throws IOException {
235269
withTimeout.compareAndSet(false, timeout);
236270
if (section.equals("postVisitDirectory")) {
237-
waitFor(Duration.ofMillis(timeoutMs));
271+
waitFor(Duration.ofMillis(delayMs));
238272
}
239273
return TempLocationManager.CleanupHook.super.postVisitDirectory(dir, exc, timeout);
240274
}
@@ -244,7 +278,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs, boolean t
244278
throws IOException {
245279
withTimeout.compareAndSet(false, timeout);
246280
if (section.equals("visitFile")) {
247-
waitFor(Duration.ofMillis(timeoutMs));
281+
waitFor(Duration.ofMillis(delayMs));
248282
}
249283
return TempLocationManager.CleanupHook.super.visitFile(file, attrs, timeout);
250284
}
@@ -259,8 +293,10 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs, boolean t
259293
Files.createDirectories(otherTempdir);
260294
Files.createFile(mytempdir.resolve("dummy"));
261295
Files.createFile(otherTempdir.resolve("dummy"));
296+
// Success: 500ms timeout (5 * 100ms) with 100ms delays → should complete
297+
// Failure: 20ms timeout (0.2 * 100ms) with 100ms delays → should timeout
262298
boolean rslt =
263-
instance.cleanup((long) (timeoutMs * (shouldSucceed ? 20 : 0.5d)), TimeUnit.MILLISECONDS);
299+
instance.cleanup((long) (delayMs * (shouldSucceed ? 5 : 0.2d)), TimeUnit.MILLISECONDS);
264300
assertEquals(shouldSucceed, rslt);
265301
assertNotEquals(shouldSucceed, withTimeout.get()); // timeout = !shouldSucceed
266302
}
@@ -276,8 +312,7 @@ private static Stream<Arguments> timeoutTestArguments() {
276312
}
277313

278314
private TempLocationManager instance(
279-
Path baseDir, boolean withStartupCleanup, TempLocationManager.CleanupHook cleanupHook)
280-
throws IOException {
315+
Path baseDir, boolean withStartupCleanup, TempLocationManager.CleanupHook cleanupHook) {
281316
Properties props = new Properties();
282317
props.put(ProfilingConfig.PROFILING_TEMP_DIR, baseDir.toString());
283318
props.put(
@@ -297,15 +332,4 @@ private void waitFor(Duration timeout) {
297332
}
298333
}
299334
}
300-
301-
private boolean arriveOrAwaitAdvance(Phaser phaser, Duration timeout) {
302-
try {
303-
System.err.println("===> waiting " + phaser.getPhase());
304-
phaser.awaitAdvanceInterruptibly(phaser.arrive(), timeout.toMillis(), TimeUnit.MILLISECONDS);
305-
System.err.println("===> done waiting " + phaser.getPhase());
306-
return true;
307-
} catch (InterruptedException | TimeoutException ignored) {
308-
return false;
309-
}
310-
}
311335
}

0 commit comments

Comments
 (0)