Skip to content

Commit 936832d

Browse files
authored
Handle journal_v2 file creation failure due to OOM (netdata#19965)
1 parent c688d51 commit 936832d

4 files changed

Lines changed: 18 additions & 12 deletions

File tree

src/database/engine/cache.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2552,7 +2552,7 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_
25522552
pgc_queue_unlock(cache, &cache->hot);
25532553

25542554
// callback
2555-
cb(section, datafile_fileno, type, JudyL_metrics, JudyL_extents_pos, count_of_unique_extents, count_of_unique_metrics, count_of_unique_pages, data);
2555+
bool success = cb(section, datafile_fileno, type, JudyL_metrics, JudyL_extents_pos, count_of_unique_extents, count_of_unique_metrics, count_of_unique_pages, data);
25562556

25572557
{
25582558
Pvoid_t *PValue1;
@@ -2567,12 +2567,14 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_
25672567
while ((PValue2 = JudyLFirstThenNext(mi->JudyL_pages_by_start_time, &start_time, &start_time_first))) {
25682568
struct jv2_page_info *pi = *PValue2;
25692569

2570-
// balance-parents: transition from hot to clean directly
25712570
yield_the_processor(); // do not lock too aggressively
2572-
page_set_clean(cache, pi->page, true, false, PGC_QUEUE_LOCK_PRIO_LOW);
2573-
page_transition_unlock(cache, pi->page);
2574-
page_release(cache, pi->page, true);
2571+
if (likely(success))
2572+
page_set_clean(cache, pi->page, true, false, PGC_QUEUE_LOCK_PRIO_LOW);
2573+
else
2574+
page_flag_clear(pi->page, PGC_PAGE_IS_BEING_MIGRATED_TO_V2);
25752575

2576+
page_transition_unlock(cache, pi->page);
2577+
page_release(cache, pi->page, success);
25762578
// before balance-parents:
25772579
// page_transition_unlock(cache, pi->page);
25782580
// pgc_page_hot_to_dirty_and_release(cache, pi->page, true);

src/database/engine/cache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ int64_t pgc_get_wanted_cache_size(PGC *cache);
227227
void pgc_page_hot_set_end_time_s(PGC *cache, PGC_PAGE *page, time_t end_time_s, size_t additional_bytes);
228228
bool pgc_page_to_clean_evict_or_release(PGC *cache, PGC_PAGE *page);
229229

230-
typedef void (*migrate_to_v2_callback)(Word_t section, unsigned datafile_fileno, uint8_t type, Pvoid_t JudyL_metrics, Pvoid_t JudyL_extents_pos, size_t count_of_unique_extents, size_t count_of_unique_metrics, size_t count_of_unique_pages, void *data);
230+
typedef bool (*migrate_to_v2_callback)(Word_t section, unsigned datafile_fileno, uint8_t type, Pvoid_t JudyL_metrics, Pvoid_t JudyL_extents_pos, size_t count_of_unique_extents, size_t count_of_unique_metrics, size_t count_of_unique_pages, void *data);
231231
void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_fileno, uint8_t type, migrate_to_v2_callback cb, void *data);
232232
void pgc_open_evict_clean_pages_of_datafile(PGC *cache, struct rrdengine_datafile *datafile);
233233
size_t pgc_count_clean_pages_having_data_ptr(PGC *cache, Word_t section, void *ptr);

src/database/engine/journalfile.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,7 +1293,7 @@ static void *journalfile_v2_write_descriptors(struct journal_v2_header *j2_heade
12931293
// startup : if the migration is done during agent startup
12941294
// this will allow us to optimize certain things
12951295

1296-
void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno __maybe_unused, uint8_t type __maybe_unused,
1296+
bool journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno __maybe_unused, uint8_t type __maybe_unused,
12971297
Pvoid_t JudyL_metrics, Pvoid_t JudyL_extents_pos,
12981298
size_t number_of_extents, size_t number_of_metrics, size_t number_of_pages, void *user_data)
12991299
{
@@ -1346,8 +1346,10 @@ void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno
13461346

13471347
int fd_v2;
13481348
uint8_t *data_start = nd_mmap_advanced(path, total_file_size, MAP_SHARED, 0, false, true, &fd_v2);
1349-
if(!data_start)
1350-
out_of_memory(__FUNCTION__, total_file_size, path);
1349+
if(!data_start) {
1350+
nd_log_daemon(NDLP_WARNING, "DBENGINE: Failed to allocate %"PRIu64" bytes of memory for journal file '%s'. Will retry later", total_file_size, path);
1351+
return false;
1352+
}
13511353

13521354
memset(data_start, 0, extent_offset);
13531355

@@ -1492,7 +1494,7 @@ void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno
14921494
internal_error(true, "DBENGINE: ACTIVATING NEW INDEX JNL %llu", (now_monotonic_usec() - start_loading) / USEC_PER_MS);
14931495
ctx_current_disk_space_increase(ctx, total_file_size);
14941496
freez(uuid_list);
1495-
return;
1497+
return true;
14961498
}
14971499
else {
14981500
netdata_log_info("DBENGINE: failed to build index '%s', file will be skipped", path);
@@ -1506,7 +1508,7 @@ void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno
15061508
freez(uuid_list);
15071509

15081510
if (likely(resize_file_to == total_file_size))
1509-
return;
1511+
return true;
15101512

15111513
int ret = truncate(path, (long) resize_file_to);
15121514
if (ret < 0) {
@@ -1516,6 +1518,8 @@ void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno
15161518
}
15171519
else
15181520
ctx_current_disk_space_increase(ctx, resize_file_to);
1521+
1522+
return true;
15191523
}
15201524

15211525
int journalfile_load(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,

src/database/engine/journalfile.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ int journalfile_load(struct rrdengine_instance *ctx, struct rrdengine_journalfil
264264
struct rrdengine_datafile *datafile);
265265
void journalfile_v2_populate_retention_to_mrg(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile);
266266

267-
void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno __maybe_unused, uint8_t type __maybe_unused,
267+
bool journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno __maybe_unused, uint8_t type __maybe_unused,
268268
Pvoid_t JudyL_metrics, Pvoid_t JudyL_extents_pos,
269269
size_t number_of_extents, size_t number_of_metrics, size_t number_of_pages, void *user_data);
270270

0 commit comments

Comments
 (0)