Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
344 changes: 40 additions & 304 deletions extensions/functions_datetime.yaml

Large diffs are not rendered by default.

30 changes: 0 additions & 30 deletions grammar/FuncTestCaseParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ argument
| stringArg
| decimalArg
| dateArg
| timeArg
| timestampArg
| timestampTzArg
| intervalYearArg
| intervalDayArg
| intervalCompoundArg
Expand Down Expand Up @@ -161,18 +158,6 @@ dateArg
: DateLiteral DoubleColon dateType
;

timeArg
: TimeLiteral DoubleColon timeType
;

timestampArg
: TimestampLiteral DoubleColon timestampType
;

timestampTzArg
: TimestampTzLiteral DoubleColon timestampTZType
;

intervalYearArg
: IntervalYearLiteral DoubleColon intervalYearType
;
Expand Down Expand Up @@ -254,10 +239,7 @@ scalarType
| floatType #float
| stringType #string
| binaryType #binary
| timestampType #timestamp
| timestampTZType #timestampTz
| dateType #date
| timeType #time
| intervalYearType #intervalYear
| UUID isnull=QMark? #uuid
| UserDefined Identifier isnull=QMark? #userDefined
Expand Down Expand Up @@ -287,18 +269,6 @@ dateType
: Date isnull=QMark?
;

timeType
: Time isnull=QMark?
;

timestampType
: (Ts | Timestamp) isnull=QMark?
;

timestampTZType
: (TsTZ | Timestamp_TZ) isnull=QMark?
;

intervalYearType
: (IYear | Interval_Year) isnull=QMark?
;
Expand Down
5 changes: 0 additions & 5 deletions grammar/SubstraitLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@ FP32 : 'FP32';
FP64 : 'FP64';
String : 'STRING';
Binary : 'BINARY';
Timestamp: 'TIMESTAMP';
Timestamp_TZ: 'TIMESTAMP_TZ';
Date : 'DATE';
Time : 'TIME';
Interval_Year: 'INTERVAL_YEAR';
Interval_Day: 'INTERVAL_DAY';
Interval_Compound: 'INTERVAL_COMPOUND';
Expand All @@ -52,8 +49,6 @@ UserDefined: 'U!';
Bool: 'BOOL';
Str: 'STR';
VBin: 'VBIN';
Ts: 'TS';
TsTZ: 'TSTZ';
IYear: 'IYEAR';
IDay: 'IDAY';
ICompound: 'ICOMPOUND';
Expand Down
3 changes: 0 additions & 3 deletions grammar/SubstraitType.g4
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ scalarType
| FP64 #fp64
| String #string
| Binary #binary
| Timestamp #timestamp
| Timestamp_TZ #timestampTz
| Date #date
| Time #time
| Interval_Year #intervalYear
| UUID #uuid
;
Expand Down
11 changes: 2 additions & 9 deletions proto/substrait/algebra.proto
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,8 @@ message Expression {
}

message Literal {
reserved 14, 17, 27;

oneof literal_type {
bool boolean = 1;
int32 i8 = 2;
Expand All @@ -1007,14 +1009,8 @@ message Expression {
double fp64 = 11;
string string = 12;
bytes binary = 13;
// Timestamp in units of microseconds since the UNIX epoch.
// Deprecated in favor of `precision_timestamp`.
int64 timestamp = 14 [deprecated = true];
// Date in units of days since the UNIX epoch.
int32 date = 16;
// Time in units of microseconds past midnight.
// Deprecated in favor of `precision_time`.
int64 time = 17 [deprecated = true];
IntervalYearToMonth interval_year_to_month = 19;
IntervalDayToSecond interval_day_to_second = 20;
IntervalCompound interval_compound = 36;
Expand All @@ -1028,9 +1024,6 @@ message Expression {
PrecisionTimestamp precision_timestamp_tz = 35;
Struct struct = 25;
Map map = 26;
// Timestamp in units of microseconds since the UNIX epoch.
// Deprecated in favor of `precision_timestamp_tz`
int64 timestamp_tz = 27 [deprecated = true];
bytes uuid = 28;
Type null = 29; // a typed null literal
List list = 30;
Expand Down
23 changes: 2 additions & 21 deletions proto/substrait/type.proto
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ option java_package = "io.substrait.proto";
//
// The value 0 represents the system-preferred variation and is a valid reference value.
message Type {
reserved 14, 17, 29;

oneof kind {
Boolean bool = 1;
I8 i8 = 2;
Expand All @@ -25,16 +27,10 @@ message Type {
FP64 fp64 = 11;
String string = 12;
Binary binary = 13;
// Deprecated in favor of `PrecisionTimestamp precision_timestamp`
Timestamp timestamp = 14 [deprecated = true];
Date date = 16;
// Deprecated in favor of `PrecisionTime precision_time`
Time time = 17 [deprecated = true];
IntervalYear interval_year = 19;
IntervalDay interval_day = 20;
IntervalCompound interval_compound = 35;
// Deprecated in favor of `PrecisionTimestampTZ precision_timestamp_tz`
TimestampTZ timestamp_tz = 29 [deprecated = true];
UUID uuid = 32;

FixedChar fixed_char = 21;
Expand Down Expand Up @@ -113,26 +109,11 @@ message Type {
Nullability nullability = 2;
}

message Timestamp {
uint32 type_variation_reference = 1;
Nullability nullability = 2;
}

message Date {
uint32 type_variation_reference = 1;
Nullability nullability = 2;
}

message Time {
uint32 type_variation_reference = 1;
Nullability nullability = 2;
}

message TimestampTZ {
uint32 type_variation_reference = 1;
Nullability nullability = 2;
}

// An interval consisting of years and months
message IntervalYear {
uint32 type_variation_reference = 1;
Expand Down
5 changes: 1 addition & 4 deletions site/docs/extensions/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,7 @@ A function signature uniquely identifies a function implementation within a sing
| string | str |
| binary | vbin |
| boolean | bool |
| timestamp | ts |
| timestamp_tz | tstz |
| date | date |
| time | time |
| interval_year | iyear |
| interval_day | iday |
| interval_compound | icompound |
Expand All @@ -126,7 +123,7 @@ A function signature uniquely identifies a function implementation within a sing
| ------------------------------------------------- | ------------------- |
| `add(optional enumeration, i8, i8) => i8` | `add:i8_i8` |
| `avg(fp32) => fp32` | `avg:fp32` |
| `extract(required enumeration, timestamp) => i64` | `extract:req_ts` |
| `extract(required enumeration, precision_timestamp<6>) => i64` | `extract:req_pts` |
| `sum(any1) => any1` | `sum:any` |
| `concat(str...) => str` | `concat:str` |
| `transform(list<any1>, func<any1 -> any2>) => list<any2>` | `transform:list_func` |
Expand Down
3 changes: 0 additions & 3 deletions site/docs/types/type_classes.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@ Simple type classes are those that don't support any form of configuration. For
| fp64 | An 8-byte double-precision floating point number with the same range and precision as defined for the [IEEE 754 64-bit floating-point format](https://standards.ieee.org/ieee/754/6210/). | `double`
| string | A unicode string of text, [0..2,147,483,647] UTF-8 bytes in length. | `string`
| binary | A binary value, [0..2,147,483,647] bytes in length. | `binary`
| timestamp | A naive timestamp with microsecond precision. Does not include timezone information and can thus not be unambiguously mapped to a moment on the timeline without context. Similar to naive datetime in Python. | `int64` microseconds since 1970-01-01 00:00:00.000000 (in an unspecified timezone)
| timestamp_tz | A timezone-aware timestamp with microsecond precision. Similar to timezone-aware datetime in Python. | `int64` microseconds since 1970-01-01 00:00:00.000000 UTC
| date | A date within [1000-01-01..9999-12-31]. | `int32` days since `1970-01-01`
| time | A time since the beginning of any day. Range of [0..86,399,999,999] microseconds; leap seconds need not be supported. | `int64` microseconds past midnight
| interval_year | Interval year to month. Supports a range of [-10,000..10,000] years with month precision (= [-120,000..120,000] months). Usually stored as separate integers for years and months, but only the total number of months is significant, i.e. `1y 0m` is considered equal to `0y 12m` or `1001y -12000m`. | `int32` years and `int32` months, with the added constraint that each component can never independently specify more than 10,000 years, even if the components have opposite signs (e.g. `-10000y 200000m` is **not** allowed)
| uuid | A universally-unique identifier composed of 128 bits. Typically presented to users in the following hexadecimal format: `c48ffa9e-64f4-44cb-ae47-152b4e60e77b`. Any 128-bit value is allowed, without specific adherence to RFC4122. | 16-byte `binary`

Expand Down
2 changes: 1 addition & 1 deletion site/docs/types/type_parsing.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Structs are unique from other types because they have an arbitrary number of par

```
// Struct
struct?<string, i8, i32?, timestamp_tz>
struct?<string, i8, i32?, precision_timestamp_tz<6>>

// Named structs are not yet supported in the text format.
```
Expand Down
21 changes: 9 additions & 12 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ enum_value := <identifier>::enum
result := <substrait_error> | <literal> | <enum_value>
options := <option>, <option>, ... <option>
option := <option_name>:<option_value>
literal_value := string | integer | decimal | float | boolean | date | time | timestamp | timestamp_tz | interval year | interval days | null
literal_value := string | integer | decimal | float | boolean | date | interval year | interval days | null
datatype := <basic_type> | <parametrized_type> | <complex_type>
basic_type := bool | i8 | i16 | i32 | i64 | f32 | f64 | str | date | time | ts | tstz | iyear | vbin | <parametrized_type>
basic_type := bool | i8 | i16 | i32 | i64 | f32 | f64 | str | date | iyear | vbin | <parametrized_type>
parametrized_type := fchar<int> | vchar<int> | dec<int,int> | fbin<int> | iday<int> | icompound<int> | pt<int> | pts<int> | ptstz<int> | func<params -> datatype>
params := datatype | (datatype(, datatype)*)
complex_type := <struct> | <list> | <map>
Expand Down Expand Up @@ -156,9 +156,9 @@ Integers are represented as sequences of digits. Negative numbers are preceded b
All date and time literals use ISO 8601 format:

- **date**: `YYYY-MM-DD`, example: `2021-01-01`
- **time**: `HH:MM:SS[.fraction]`, example: `12:00:00.000`
- **timestamp**: `YYYY-MM-DD HH:MM:SS[.fraction]`, example: `2021-01-01 12:00:00`
- **timestamp_tz**: `YYYY-MM-DD HH:MM:SS[.fraction]±HH:MM`, example: `2021-01-01 12:00:00+05:30`
- **precision_time**: `HH:MM:SS[.fraction]`, example: `12:00:00.000`
- **precision_timestamp**: `YYYY-MM-DD HH:MM:SS[.fraction]`, example: `2021-01-01 12:00:00`
- **precision_timestamp_tz**: `YYYY-MM-DD HH:MM:SS[.fraction]±HH:MM`, example: `2021-01-01 12:00:00+05:30`
- **interval year**: `'P[n]Y[n]M'`, example: `'P2Y3M'` (2 years, 3 months)
- **interval days**: `'P[n]DT[n]H[n]M[n]S'`, example: `'P2DT3H2M9S'` (2 days, 3 hours, 2 minutes, 9 seconds)
ex2: 'P1DT2H3M4.45::iday<3>' (1 day, 2 hours, 3 minutes, 4 seconds, 450 milliseconds)`
Expand Down Expand Up @@ -186,15 +186,12 @@ Use short names listed in https://substrait.io/extensions/#function-signature-co
- **vchar**: Variable-length string `varchar<N>`
- **vbin**: Fixed-length binary `fixedbinary<N>`
- **date**: Date
- **time**: Time
- **ts**: Timestamp
- **tstz**: Timestamp with timezone
- **iyear**: Interval year
- **iday**: Interval days
- **iday**: Interval days `interval_day<P>`
- **icompound**: Interval compound
- **pt**: Precision Time
- **pts**: Precision Timestamp
- **ptstz**: Precision Timestamp with timezone
- **pt**: Precision Time `precision_time<P>`
- **pts**: Precision Timestamp `precision_timestamp<P>`
- **ptstz**: Precision Timestamp with timezone `precision_timestamp_tz<P>`


### Nullability
Expand Down
8 changes: 4 additions & 4 deletions tests/baseline.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
"registry": {
"extension_count": 15,
"dependency_count": 15,
"function_count": 173,
"function_count": 174,
"num_aggregate_functions": 28,
"num_scalar_functions": 170,
"num_window_functions": 11,
"num_function_overloads": 533
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did this one go up so much?

Copy link
Copy Markdown
Member Author

@nielspardon nielspardon Mar 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been asking Claude for an explanation and it gave a very detailed one:

When the deprecated time, timestamp, and timestamp_tz types were removed, they were replaced with parameterized types like precision_time

and precision_timestamp

.

The massive increase in overloads (533 → 4,482) is due to parametric type expansion. Previously, functions like extract(HOUR, x: timestamp) had a single overload. Now with precision_timestamp

, the type system generates separate overloads for each supported precision value (microsecond, millisecond, etc.), causing a combinatorial explosion in the number of overload signatures.

For example:

Old: 1 overload for extract(HOUR, x: timestamp)
New: Multiple overloads for extract(HOUR, x: precision_timestamp), extract(HOUR, x: precision_timestamp), etc.
This pattern repeated across many datetime functions explains the ~8.4× increase. The later jump from 4,482 → 8,466 in commit 63f3fd5 likely comes from additional function additions or test coverage adjustments.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... if that is the case, what do you think about only deleting the old impls in this PR, and we can add new ones if we need in a subsequent one?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm only adding new impls when there would be no equivalent impls after removing the old ones. This PR retain parity in terms of functionality with before the change aka if there was an impl with time, timestamp, timestamp_tz before and there wouldn't be one after removing all impls using those then I'm adding the equivalent precision impl. If we do these in separate PRs then we risk having a Substrait release that has less functionality than before.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea what's going on here. The explanation that Claude Code gave does not match the current code on main. Currently, num_function_overloads is essentially the total number of impls across all extension YAMLs. I rebased this PR branch on latest main and now num_function_overloads decreases now from 533 to 498 which sounds more reasonable than the previous 8,466.

"num_function_overloads": 498
},
"coverage": {
"total_test_count": 1168,
"num_function_variants": 533,
"total_test_count": 1177,
"num_function_variants": 498,
"num_covered_function_variants": 245
}
}
12 changes: 6 additions & 6 deletions tests/cases/datetime/add_datetime.test
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
### SUBSTRAIT_SCALAR_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'

# timestamps: examples using the timestamp types
add('2016-12-31T13:30:15'::ts, 'P5D'::iday) = '2017-01-05T13:30:15'::ts
add('2016-12-01T13:30:15'::ts, 'P5Y'::iyear) = '2021-12-01T13:30:15'::ts
add('2016-12-01T13:30:15'::ts, 'PT5H'::iday) = '2016-12-01T18:30:15'::ts
# timestamps: examples using the precision_timestamp types
add('2016-12-31T13:30:15'::pts<6>, 'P5D'::iday<6>) = '2017-01-05T13:30:15'::pts<6>
add('2016-12-01T13:30:15'::pts<6>, 'P5Y'::iyear) = '2021-12-01T13:30:15'::pts<6>
add('2016-12-01T13:30:15'::pts<6>, 'PT5H'::iday<6>) = '2016-12-01T18:30:15'::pts<6>

# date_to_timestamp: examples using the date types and resulting in a timestamp
add('2020-12-31'::date, 'P5D'::iday) = '2021-01-05T00:00:00'::ts
add('2020-12-31'::date, 'P5D'::iday<6>) = '2021-01-05T00:00:00'::pts<6>
add('2020-12-31'::date, 'P5Y'::iyear) = '2025-12-31'::date
add('2020-12-31'::date, 'P5M'::iyear) = '2021-05-31'::date

# null_input: examples with null args or return
add(null::date?, 'P5D'::iday) = null::ts?
add(null::date?, 'P5D'::iday<6>) = null::pts?<6>
add(null::date?, 'P5Y'::iyear) = null::date?
44 changes: 22 additions & 22 deletions tests/cases/datetime/extract.test
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
### SUBSTRAIT_SCALAR_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'

# timestamps: examples using the timestamp and timestamptz types
extract(YEAR::enum, '2016-12-31T13:30:15'::ts) = 2016::i64
extract(ISO_YEAR::enum, '2016-01-01T13:30:15'::ts) = 2015::i64
extract(QUARTER::enum, '2016-12-31T13:30:15'::ts) = 4::i64
extract(MONTH::enum, '2016-12-31T13:30:15'::ts) = 12::i64
extract(ISO_WEEK::enum, '2016-12-31T13:30:15'::ts) = 52::i64
extract(DAY::enum, '2016-12-31T13:30:15'::ts) = 31::i64
extract(SUNDAY_DAY_OF_WEEK::enum, '2016-12-25T13:30:15'::ts) = 7::i64
extract(MONDAY_DAY_OF_WEEK::enum, '2016-12-25T13:30:15'::ts) = 0::i64
extract(DAY_OF_YEAR::enum, '2016-12-25T13:30:15'::ts) = 360::i64
extract(HOUR::enum, '2016-12-31T13:30:15'::ts) = 13::i64
extract(MINUTE::enum, '2016-12-31T13:30:15'::ts) = 30::i64
extract(SECOND::enum, '2016-12-31T13:30:15'::ts) = 15::i64
extract(MILLISECOND::enum, '2016-12-31T13:30:15'::ts) = 15000::i64
extract(MICROSECOND::enum, '2016-12-31T13:30:15.220000'::ts) = 15220000::i64
extract(UNIX_TIME::enum, '2016-12-31T13:30:15'::ts) = 1483191015::i64
# timestamps: examples using the precision_timestamp and precision_timestamp_tz types
extract(YEAR::enum, '2016-12-31T13:30:15'::pts<6>) = 2016::i64
extract(ISO_YEAR::enum, '2016-01-01T13:30:15'::pts<6>) = 2015::i64
extract(QUARTER::enum, '2016-12-31T13:30:15'::pts<6>) = 4::i64
extract(MONTH::enum, '2016-12-31T13:30:15'::pts<6>) = 12::i64
extract(ISO_WEEK::enum, '2016-12-31T13:30:15'::pts<6>) = 52::i64
extract(DAY::enum, '2016-12-31T13:30:15'::pts<6>) = 31::i64
extract(SUNDAY_DAY_OF_WEEK::enum, '2016-12-25T13:30:15'::pts<6>) = 7::i64
extract(MONDAY_DAY_OF_WEEK::enum, '2016-12-25T13:30:15'::pts<6>) = 0::i64
extract(DAY_OF_YEAR::enum, '2016-12-25T13:30:15'::pts<6>) = 360::i64
extract(HOUR::enum, '2016-12-31T13:30:15'::pts<6>) = 13::i64
extract(MINUTE::enum, '2016-12-31T13:30:15'::pts<6>) = 30::i64
extract(SECOND::enum, '2016-12-31T13:30:15'::pts<6>) = 15::i64
extract(MILLISECOND::enum, '2016-12-31T13:30:15'::pts<6>) = 15000::i64
extract(MICROSECOND::enum, '2016-12-31T13:30:15.220000'::pts<6>) = 15220000::i64
extract(UNIX_TIME::enum, '2016-12-31T13:30:15'::pts<6>) = 1483191015::i64

# date: examples using the date type
extract(YEAR::enum, '2020-12-31'::date) = 2020::i64
extract(MONTH::enum, '2020-12-31'::date) = 12::i64
extract(DAY::enum, '2020-12-31'::date) = 31::i64

# time: examples using the time type
extract(HOUR::enum, '01:02:03'::time) = 1::i64
extract(MINUTE::enum, '01:02:03'::time) = 2::i64
extract(SECOND::enum, '01:02:03'::time) = 3::i64
extract(MILLISECOND::enum, '01:02:03.155'::time) = 3155::i64
extract(MICROSECOND::enum, '01:02:03.45'::time) = 3450000::i64
# time: examples using the precision_time type
extract(HOUR::enum, '01:02:03'::pt<6>) = 1::i64
extract(MINUTE::enum, '01:02:03'::pt<6>) = 2::i64
extract(SECOND::enum, '01:02:03'::pt<6>) = 3::i64
extract(MILLISECOND::enum, '01:02:03.155'::pt<6>) = 3155::i64
extract(MICROSECOND::enum, '01:02:03.45'::pt<6>) = 3450000::i64
14 changes: 7 additions & 7 deletions tests/cases/datetime/gt_datetime.test
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
### SUBSTRAIT_SCALAR_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/functions_datetime.yaml'

# timestamps: examples using the timestamp type
gt('2016-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = false::bool
gt('2018-12-31T13:30:15'::ts, '2017-12-31T13:30:15'::ts) = true::bool
# timestamps: examples using the precision_timestamp type
gt('2016-12-31T13:30:15'::pts<6>, '2017-12-31T13:30:15'::pts<6>) = false::bool
gt('2018-12-31T13:30:15'::pts<6>, '2017-12-31T13:30:15'::pts<6>) = true::bool

# timestamp_tz: examples using the timestamp_tz type
gt('1999-01-08T01:05:05-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = false::bool
gt('1999-01-08T01:05:07-08:00'::tstz, '1999-01-08T04:05:06-05:00'::tstz) = true::bool
# timestamp_tz: examples using the precision_timestamp_tz type
gt('1999-01-08T01:05:05-08:00'::ptstz<6>, '1999-01-08T04:05:06-05:00'::ptstz<6>) = false::bool
gt('1999-01-08T01:05:07-08:00'::ptstz<6>, '1999-01-08T04:05:06-05:00'::ptstz<6>) = true::bool

# date: examples using the date type
gt('2020-12-30'::date, '2020-12-31'::date) = false::bool
Expand All @@ -22,4 +22,4 @@ gt('P7Y'::iyear, 'P6Y'::iyear) = true::bool
# null_input: examples with null args
gt(null::iday?, 'P5D'::iday) = null::bool?
gt(null::date?, '2020-12-30'::date) = null::bool?
gt(null::ts?, '2018-12-31T13:30:15'::ts) = null::bool?
gt(null::pts?<6>, '2018-12-31T13:30:15'::pts<6>) = null::bool?
Loading
Loading