Skip to content

Commit 7f8dbdd

Browse files
committed
fix merge
1 parent cc5560c commit 7f8dbdd

3 files changed

Lines changed: 22 additions & 4 deletions

File tree

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656

5757
5. The data.table test suite is a bit more robust to lacking UTF-8 support via a new `requires_utf8` argument to `test()` to skip tests when UTF-8 support is not available, [#7336](https://github.com/Rdatatable/data.table/issues/7336). Thanks @MichaelChirico for the suggestion and @ben-schwen for the implementation.
5858

59-
4. Grouping operations with constant `list()` expressions in `j` are now optimized to avoid per-group allocation overhead, [#712](https://github.com/Rdatatable/data.table/issues/712). Thanks @macrakis for the report and @ben-schwen for the fix.
59+
6. Grouping operations with constant `list()` expressions in `j` are now optimized to avoid per-group allocation overhead, [#712](https://github.com/Rdatatable/data.table/issues/712). Thanks @macrakis for the report and @ben-schwen for the fix.
6060

6161
## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025
6262

R/data.table.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,18 @@ replace_dot_alias = function(e) {
189189
list(jsub=jsub, jvnames=jvnames, funi=funi+1L)
190190
}
191191

192+
# Optimize constant list() expressions to avoid per-group allocation overhead
193+
# e.g., list(1) -> 1, where the value is a simple atomic constant, #712
194+
# return NULL for no optimization possible
195+
.optimize_constant_list = function(jsub) {
196+
if (!jsub %iscall% "list") return(NULL)
197+
if (length(jsub) != 2L) return(NULL)
198+
if (is.null(jsub[[2L]])) return(NULL)
199+
if (is.call(jsub[[2L]])) return(NULL)
200+
if (!is_constantish(jsub[[2L]])) return(NULL)
201+
jsub[[2L]]
202+
}
203+
192204
# Optimize .SD subsetting patterns like .SD[1], head(.SD), first(.SD)
193205
# return NULL for no optimization possible
194206
.optimize_sd_subset = function(jsub, sdvars, SDenv, envir) {
@@ -505,6 +517,12 @@ replace_dot_alias = function(e) {
505517
return(list(GForce=FALSE, jsub=jsub, jvnames=jvnames))
506518
}
507519

520+
# Step 0: Unwrap constant list() to avoid per-group allocation, #712
521+
if (!is.null(unwrapped_consts <- .optimize_constant_list(jsub))) {
522+
if (verbose) catf("Optimized j from '%s' to bare constant '%s'\n", deparse(jsub), deparse(unwrapped_consts, width.cutoff=200L, nlines=1L))
523+
jsub = unwrapped_consts
524+
}
525+
508526
# Step 1: Apply lapply(.SD) optimization
509527
lapply_result = .optimize_lapply(jsub, jvnames, sdvars, SDenv, verbose, envir)
510528
jsub = lapply_result$jsub

inst/tests/tests.Rraw

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21487,7 +21487,7 @@ test(2362.53, optimize=0:2, dt[, list(lapply(.SD, sum), list()), b, verbose=TRUE
2148721487

2148821488
# dt[, j=list(var), by] is slower than dt[, j=var, by], #712
2148921489
dt = data.table(x=rep(1:3, 2L), y=1L)
21490-
test(2363.1, dt[, .(1), by=x, verbose=TRUE], dt[, 1, by=x], output="lapply optimization changed j from")
21490+
test(2363.1, dt[, .(1), by=x, verbose=TRUE], dt[, 1, by=x], output="Optimized j from.*to bare constant")
2149121491
dt = data.table(x=1:5, key="x")
21492-
test(2363.2, dt[dt, list(1), by=.EACHI, verbose=TRUE], dt[dt, 1, by=.EACHI], output="lapply optimization changed j from")
21493-
test(2363.3, dt[dt, list(x), by=.EACHI, verbose=TRUE], dt[dt, x, by=.EACHI], output="lapply optimization changed j from")
21492+
test(2363.2, dt[dt, list(1), by=.EACHI, verbose=TRUE], dt[dt, 1, by=.EACHI], output="Optimized j from.*to bare constant")
21493+
test(2363.3, dt[dt, list(x), by=.EACHI, verbose=TRUE], dt[dt, x, by=.EACHI], output="Optimized j from.*to bare constant")

0 commit comments

Comments
 (0)