Skip to content

Commit d9dcb34

Browse files
committed
Allow blob inline fields without blob-field
1 parent 7a0ad33 commit d9dcb34

9 files changed

Lines changed: 81 additions & 114 deletions

File tree

docs/content/append-table/blob.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ This allows one table to mix raw-data BLOB fields, descriptor-only BLOB fields,
105105
<td>No</td>
106106
<td style="word-wrap: break-word;">(none)</td>
107107
<td>String</td>
108-
<td>Specifies column names that should be stored as blob type. This is used when you want to treat a BYTES column as a BLOB.</td>
108+
<td>Specifies column names that should be stored as blob type. This is used when you want to treat a BYTES column as a BLOB. Fields listed in <code>blob-descriptor-field</code> or <code>blob-view-field</code> are also treated as BLOB fields.</td>
109109
</tr>
110110
<tr>
111111
<td><h5>blob-as-descriptor</h5></td>
@@ -120,7 +120,7 @@ This allows one table to mix raw-data BLOB fields, descriptor-only BLOB fields,
120120
<td style="word-wrap: break-word;">(none)</td>
121121
<td>String</td>
122122
<td>
123-
Comma-separated BLOB field names stored as serialized <code>BlobDescriptor</code> bytes inline in normal data files.
123+
Comma-separated field names treated as BLOB fields and stored as serialized <code>BlobDescriptor</code> bytes inline in normal data files.
124124
By default, all blob fields store blob bytes in separate <code>.blob</code> files.
125125
If configured, one table can mix:
126126
some BLOB fields in <code>.blob</code> files and some as descriptor references.
@@ -132,9 +132,9 @@ This allows one table to mix raw-data BLOB fields, descriptor-only BLOB fields,
132132
<td style="word-wrap: break-word;">(none)</td>
133133
<td>String</td>
134134
<td>
135-
Comma-separated BLOB field names stored as serialized <code>BlobViewStruct</code> bytes inline in normal data files.
135+
Comma-separated field names treated as BLOB fields and stored as serialized <code>BlobViewStruct</code> bytes inline in normal data files.
136136
The field values reference BLOB values in upstream tables and are resolved at read time.
137-
This option must be a subset of <code>blob-field</code> and must not overlap with <code>blob-descriptor-field</code>.
137+
This option must not overlap with <code>blob-descriptor-field</code>.
138138
</td>
139139
</tr>
140140
<tr>
@@ -300,7 +300,7 @@ Blob view is useful when a downstream table should reference BLOB values already
300300
Blob view requires:
301301

302302
- the upstream table to have row tracking enabled, so each row has a stable `_ROW_ID`
303-
- the downstream field to be listed in both `blob-field` and `blob-view-field`
303+
- the downstream field to be listed in `blob-view-field`
304304
- writes to provide a serialized `BlobViewStruct`; in Flink SQL, use the built-in `sys.blob_view` function
305305

306306
The Flink SQL function signature is:
@@ -335,7 +335,6 @@ CREATE TABLE image_view_table (
335335
) WITH (
336336
'row-tracking.enabled' = 'true',
337337
'data-evolution.enabled' = 'true',
338-
'blob-field' = 'image_ref',
339338
'blob-view-field' = 'image_ref'
340339
);
341340

docs/layouts/shortcodes/generated/core_configuration.html

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
<td><h5>blob-descriptor-field</h5></td>
6767
<td style="word-wrap: break-word;">(none)</td>
6868
<td>String</td>
69-
<td>Comma-separated BLOB field names, selected from blob-field, to store as serialized BlobDescriptor bytes inline in data files.</td>
69+
<td>Comma-separated field names to treat as BLOB fields and store as serialized BlobDescriptor bytes inline in data files.</td>
7070
</tr>
7171
<tr>
7272
<td><h5>blob-external-storage-field</h5></td>
@@ -84,13 +84,13 @@
8484
<td><h5>blob-field</h5></td>
8585
<td style="word-wrap: break-word;">(none)</td>
8686
<td>String</td>
87-
<td>Specifies column names that should be stored as blob type. This is used when you want to treat a BYTES column as a BLOB.</td>
87+
<td>Specifies column names that should be stored as blob type. This is used when you want to treat a BYTES column as a BLOB. Fields listed in blob-descriptor-field or blob-view-field are also treated as BLOB fields.</td>
8888
</tr>
8989
<tr>
9090
<td><h5>blob-view-field</h5></td>
9191
<td style="word-wrap: break-word;">(none)</td>
9292
<td>String</td>
93-
<td>Comma-separated BLOB field names, selected from blob-field, to store as serialized BlobViewStruct bytes inline in data files and resolve from upstream tables at read time.</td>
93+
<td>Comma-separated field names to treat as BLOB fields and store as serialized BlobViewStruct bytes inline in data files and resolve from upstream tables at read time.</td>
9494
</tr>
9595
<tr>
9696
<td><h5>blob.split-by-file-size</h5></td>

paimon-api/src/main/java/org/apache/paimon/CoreOptions.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2277,7 +2277,9 @@ public InlineElement getDescription() {
22772277
.noDefaultValue()
22782278
.withDescription(
22792279
"Specifies column names that should be stored as blob type. "
2280-
+ "This is used when you want to treat a BYTES column as a BLOB.");
2280+
+ "This is used when you want to treat a BYTES column as a BLOB. "
2281+
+ "Fields listed in blob-descriptor-field or blob-view-field "
2282+
+ "are also treated as BLOB fields.");
22812283

22822284
@Immutable
22832285
public static final ConfigOption<String> BLOB_DESCRIPTOR_FIELD =
@@ -2286,7 +2288,7 @@ public InlineElement getDescription() {
22862288
.noDefaultValue()
22872289
.withFallbackKeys("blob.stored-descriptor-fields")
22882290
.withDescription(
2289-
"Comma-separated BLOB field names, selected from blob-field, to store "
2291+
"Comma-separated field names to treat as BLOB fields and store "
22902292
+ "as serialized BlobDescriptor bytes inline in data files.");
22912293

22922294
@Immutable
@@ -2295,7 +2297,7 @@ public InlineElement getDescription() {
22952297
.stringType()
22962298
.noDefaultValue()
22972299
.withDescription(
2298-
"Comma-separated BLOB field names, selected from blob-field, to store "
2300+
"Comma-separated field names to treat as BLOB fields and store "
22992301
+ "as serialized BlobViewStruct bytes inline in data files and "
23002302
+ "resolve from upstream tables at read time.");
23012303

paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,10 @@ public static void validateTableSchema(TableSchema schema) {
165165
FileFormat fileFormat =
166166
FileFormat.fromIdentifier(options.formatType(), new Options(schema.options()));
167167
RowType tableRowType = new RowType(schema.fields());
168-
Set<String> blobFields = validateBlobFields(tableRowType, options);
169-
Set<String> blobDescriptorFields =
170-
validateBlobDescriptorFields(tableRowType, options, blobFields);
168+
validateBlobFields(tableRowType, options);
169+
Set<String> blobDescriptorFields = validateBlobDescriptorFields(tableRowType, options);
171170
Set<String> blobViewFields =
172-
validateBlobViewFields(tableRowType, options, blobFields, blobDescriptorFields);
171+
validateBlobViewFields(tableRowType, options, blobDescriptorFields);
173172
Set<String> blobInlineFields = new HashSet<>(blobDescriptorFields);
174173
blobInlineFields.addAll(blobViewFields);
175174
validateBlobExternalStorageFields(tableRowType, options, blobDescriptorFields);
@@ -719,7 +718,7 @@ private static void validateRowTracking(TableSchema schema, CoreOptions options)
719718
}
720719
}
721720

722-
private static Set<String> validateBlobFields(RowType rowType, CoreOptions options) {
721+
private static void validateBlobFields(RowType rowType, CoreOptions options) {
723722
Set<String> blobFieldNames =
724723
rowType.getFields().stream()
725724
.filter(field -> field.type().getTypeRoot() == DataTypeRoot.BLOB)
@@ -735,11 +734,10 @@ private static Set<String> validateBlobFields(RowType rowType, CoreOptions optio
735734
field,
736735
CoreOptions.BLOB_FIELD.key());
737736
}
738-
return configured;
739737
}
740738

741739
private static Set<String> validateBlobDescriptorFields(
742-
RowType rowType, CoreOptions options, Set<String> blobFields) {
740+
RowType rowType, CoreOptions options) {
743741
Set<String> blobFieldNames =
744742
rowType.getFields().stream()
745743
.filter(field -> field.type().getTypeRoot() == DataTypeRoot.BLOB)
@@ -752,21 +750,12 @@ private static Set<String> validateBlobDescriptorFields(
752750
"Field '%s' in '%s' must be a BLOB field in table schema.",
753751
field,
754752
CoreOptions.BLOB_DESCRIPTOR_FIELD.key());
755-
checkArgument(
756-
blobFields.contains(field),
757-
"Field '%s' in '%s' must also be in '%s'.",
758-
field,
759-
CoreOptions.BLOB_DESCRIPTOR_FIELD.key(),
760-
CoreOptions.BLOB_FIELD.key());
761753
}
762754
return configured;
763755
}
764756

765757
private static Set<String> validateBlobViewFields(
766-
RowType rowType,
767-
CoreOptions options,
768-
Set<String> blobFields,
769-
Set<String> blobDescriptorFields) {
758+
RowType rowType, CoreOptions options, Set<String> blobDescriptorFields) {
770759
Set<String> blobFieldNames =
771760
rowType.getFields().stream()
772761
.filter(field -> field.type().getTypeRoot() == DataTypeRoot.BLOB)
@@ -779,12 +768,6 @@ private static Set<String> validateBlobViewFields(
779768
"Field '%s' in '%s' must be a BLOB field in table schema.",
780769
field,
781770
CoreOptions.BLOB_VIEW_FIELD.key());
782-
checkArgument(
783-
blobFields.contains(field),
784-
"Field '%s' in '%s' must also be in '%s'.",
785-
field,
786-
CoreOptions.BLOB_VIEW_FIELD.key(),
787-
CoreOptions.BLOB_FIELD.key());
788771
checkArgument(
789772
!blobDescriptorFields.contains(field),
790773
"Field '%s' in '%s' can not also be in '%s'.",

paimon-core/src/test/java/org/apache/paimon/append/BlobTableTest.java

Lines changed: 25 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.apache.paimon.table.source.ReadBuilder;
5252
import org.apache.paimon.table.system.RowTrackingTable;
5353
import org.apache.paimon.types.DataField;
54+
import org.apache.paimon.types.DataTypeRoot;
5455
import org.apache.paimon.types.DataTypes;
5556
import org.apache.paimon.types.RowType;
5657
import org.apache.paimon.utils.Range;
@@ -497,49 +498,32 @@ public void testExternalStorageFieldMustBeSubsetOfDescriptorField() {
497498
}
498499

499500
@Test
500-
public void testBlobViewFieldMustBeSubsetOfBlobField() {
501-
assertThatThrownBy(
502-
() -> {
503-
Schema.Builder schemaBuilder = Schema.newBuilder();
504-
schemaBuilder.column("f0", DataTypes.INT());
505-
schemaBuilder.column("f1", DataTypes.STRING());
506-
schemaBuilder.column("f2", DataTypes.BLOB());
507-
schemaBuilder.option(CoreOptions.TARGET_FILE_SIZE.key(), "25 MB");
508-
schemaBuilder.option(CoreOptions.ROW_TRACKING_ENABLED.key(), "true");
509-
schemaBuilder.option(CoreOptions.DATA_EVOLUTION_ENABLED.key(), "true");
510-
schemaBuilder.option(CoreOptions.BLOB_VIEW_FIELD.key(), "f2");
511-
catalog.createTable(identifier(), schemaBuilder.build(), true);
512-
})
513-
.hasRootCauseInstanceOf(IllegalArgumentException.class)
514-
.hasRootCauseMessage(
515-
"Field 'f2' in '"
516-
+ CoreOptions.BLOB_VIEW_FIELD.key()
517-
+ "' must also be in '"
518-
+ CoreOptions.BLOB_FIELD.key()
519-
+ "'.");
501+
public void testBlobInlineFieldCanDeclareBlobWithoutBlobField() throws Exception {
502+
assertCreateBlobInlineFieldWithoutBlobField(
503+
"blob_descriptor_without_blob_field", CoreOptions.BLOB_DESCRIPTOR_FIELD.key());
504+
assertCreateBlobInlineFieldWithoutBlobField(
505+
"blob_view_without_blob_field", CoreOptions.BLOB_VIEW_FIELD.key());
520506
}
521507

522-
@Test
523-
public void testBlobDescriptorFieldMustBeSubsetOfBlobField() {
524-
assertThatThrownBy(
525-
() -> {
526-
Schema.Builder schemaBuilder = Schema.newBuilder();
527-
schemaBuilder.column("f0", DataTypes.INT());
528-
schemaBuilder.column("f1", DataTypes.STRING());
529-
schemaBuilder.column("f2", DataTypes.BLOB());
530-
schemaBuilder.option(CoreOptions.TARGET_FILE_SIZE.key(), "25 MB");
531-
schemaBuilder.option(CoreOptions.ROW_TRACKING_ENABLED.key(), "true");
532-
schemaBuilder.option(CoreOptions.DATA_EVOLUTION_ENABLED.key(), "true");
533-
schemaBuilder.option(CoreOptions.BLOB_DESCRIPTOR_FIELD.key(), "f2");
534-
catalog.createTable(identifier(), schemaBuilder.build(), true);
535-
})
536-
.hasRootCauseInstanceOf(IllegalArgumentException.class)
537-
.hasRootCauseMessage(
538-
"Field 'f2' in '"
539-
+ CoreOptions.BLOB_DESCRIPTOR_FIELD.key()
540-
+ "' must also be in '"
541-
+ CoreOptions.BLOB_FIELD.key()
542-
+ "'.");
508+
private void assertCreateBlobInlineFieldWithoutBlobField(String tableName, String optionKey)
509+
throws Exception {
510+
Schema.Builder schemaBuilder = Schema.newBuilder();
511+
schemaBuilder.column("f0", DataTypes.INT());
512+
schemaBuilder.column("f1", DataTypes.STRING());
513+
schemaBuilder.column("f2", DataTypes.BLOB());
514+
schemaBuilder.option(CoreOptions.TARGET_FILE_SIZE.key(), "25 MB");
515+
schemaBuilder.option(CoreOptions.ROW_TRACKING_ENABLED.key(), "true");
516+
schemaBuilder.option(CoreOptions.DATA_EVOLUTION_ENABLED.key(), "true");
517+
schemaBuilder.option(optionKey, "f2");
518+
519+
catalog.createTable(identifier(tableName), schemaBuilder.build(), true);
520+
521+
assertThat(
522+
catalog.getTable(identifier(tableName))
523+
.rowType()
524+
.getTypeAt(2)
525+
.is(DataTypeRoot.BLOB))
526+
.isTrue();
543527
}
544528

545529
@Test

paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/FlinkCatalog.java

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
import java.util.Arrays;
130130
import java.util.Collections;
131131
import java.util.HashMap;
132+
import java.util.HashSet;
132133
import java.util.LinkedHashMap;
133134
import java.util.List;
134135
import java.util.Map;
@@ -1091,11 +1092,7 @@ public static Schema fromCatalogTable(CatalogBaseTable catalogTable) {
10911092

10921093
Map<String, String> options = new HashMap<>(catalogTable.getOptions());
10931094
List<String> blobFields = CoreOptions.blobField(options);
1094-
Set<String> blobDescriptorFields = new CoreOptions(options).blobDescriptorField();
1095-
List<String> blobViewFields = CoreOptions.blobViewField(options);
1096-
validateSecondaryBlobFields(
1097-
blobFields, blobDescriptorFields, CoreOptions.BLOB_DESCRIPTOR_FIELD.key());
1098-
validateSecondaryBlobFields(blobFields, blobViewFields, CoreOptions.BLOB_VIEW_FIELD.key());
1095+
Set<String> blobTypeFields = blobTypeFields(options);
10991096
if (!blobFields.isEmpty()) {
11001097
checkArgument(
11011098
options.containsKey(CoreOptions.DATA_EVOLUTION_ENABLED.key()),
@@ -1124,30 +1121,29 @@ public static Schema fromCatalogTable(CatalogBaseTable catalogTable) {
11241121
field ->
11251122
schemaBuilder.column(
11261123
field.getName(),
1127-
resolveDataType(field.getName(), field.getType(), options),
1124+
resolveDataType(
1125+
field.getName(),
1126+
field.getType(),
1127+
options,
1128+
blobTypeFields),
11281129
columnComments.get(field.getName())));
11291130

11301131
return schemaBuilder.build();
11311132
}
11321133

1133-
private static void validateSecondaryBlobFields(
1134-
List<String> blobFields, Iterable<String> secondaryBlobFields, String optionKey) {
1135-
for (String secondaryBlobField : secondaryBlobFields) {
1136-
checkArgument(
1137-
blobFields.contains(secondaryBlobField),
1138-
"Field '%s' in '%s' must also be in '%s'.",
1139-
secondaryBlobField,
1140-
optionKey,
1141-
CoreOptions.BLOB_FIELD.key());
1142-
}
1134+
private static Set<String> blobTypeFields(Map<String, String> options) {
1135+
Set<String> blobTypeFields = new HashSet<>(CoreOptions.blobField(options));
1136+
blobTypeFields.addAll(new CoreOptions(options).blobDescriptorField());
1137+
blobTypeFields.addAll(CoreOptions.blobViewField(options));
1138+
return blobTypeFields;
11431139
}
11441140

11451141
private static org.apache.paimon.types.DataType resolveDataType(
11461142
String fieldName,
11471143
org.apache.flink.table.types.logical.LogicalType logicalType,
1148-
Map<String, String> options) {
1149-
List<String> blobFields = CoreOptions.blobField(options);
1150-
if (blobFields.contains(fieldName)) {
1144+
Map<String, String> options,
1145+
Set<String> blobTypeFields) {
1146+
if (blobTypeFields.contains(fieldName)) {
11511147
return toBlobType(logicalType);
11521148
}
11531149
Set<String> vectorFields = CoreOptions.vectorField(options);

0 commit comments

Comments
 (0)