Skip to content

Commit aee8a5f

Browse files
committed
[590] Add Iceberg Glue Catalog Sync implementation
1 parent 79c68de commit aee8a5f

37 files changed

Lines changed: 3442 additions & 15 deletions

pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
<module>xtable-hudi-support</module>
5252
<module>xtable-core</module>
5353
<module>xtable-utilities</module>
54+
<module>xtable-aws</module>
5455
</modules>
5556

5657
<properties>
@@ -386,6 +387,11 @@
386387
<version>${aws.version}</version>
387388
<scope>runtime</scope>
388389
</dependency>
390+
<dependency>
391+
<groupId>software.amazon.awssdk</groupId>
392+
<artifactId>glue</artifactId>
393+
<version>${aws.version}</version>
394+
</dependency>
389395

390396
<!-- Protobuf -->
391397
<dependency>

xtable-api/src/main/java/org/apache/xtable/model/exception/ErrorCode.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ public enum ErrorCode {
3131
UNSUPPORTED_SCHEMA_TYPE(10007),
3232
UNSUPPORTED_FEATURE(10008),
3333
PARSE_EXCEPTION(10009),
34-
CATALOG_REFRESH_EXCEPTION(10010);
34+
CATALOG_REFRESH_EXCEPTION(10010),
35+
CATALOG_SYNC_GENERIC_EXCEPTION(10011);
3536

3637
private final int errorCode;
3738

xtable-api/src/main/java/org/apache/xtable/model/storage/CatalogType.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@
2525
*/
2626
public class CatalogType {
2727
public static final String STORAGE = "STORAGE";
28+
public static final String GLUE = "GLUE";
2829
}

xtable-api/src/main/java/org/apache/xtable/spi/extractor/CatalogConversionSource.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818

1919
package org.apache.xtable.spi.extractor;
2020

21+
import org.apache.hadoop.conf.Configuration;
22+
23+
import org.apache.xtable.conversion.ExternalCatalogConfig;
2124
import org.apache.xtable.conversion.SourceTable;
2225
import org.apache.xtable.model.catalog.CatalogTableIdentifier;
2326

@@ -32,4 +35,7 @@ public interface CatalogConversionSource {
3235

3336
/** Returns the {@link org.apache.xtable.model.storage.CatalogType} for the catalog conversion */
3437
String getCatalogType();
38+
39+
/** Initializes the ConversionSource with provided configuration */
40+
void init(ExternalCatalogConfig catalogConfig, Configuration configuration);
3541
}

xtable-api/src/main/java/org/apache/xtable/spi/sync/CatalogSync.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,15 @@ public SyncResult syncTable(
5656
try {
5757
results.add(syncCatalog(catalogSyncClient, tableIdentifier, table));
5858
log.info(
59-
"Catalog sync is successful for table {} with format {} using catalogSync {}",
59+
"Catalog sync is successful for table {} with base path {} and format {} using catalogSync {}",
60+
tableIdentifier.getId(),
6061
table.getBasePath(),
6162
table.getTableFormat(),
6263
catalogSyncClient.getClass().getName());
6364
} catch (Exception e) {
6465
log.error(
65-
"Catalog sync failed for table {} with format {} using catalogSync {}",
66+
"Catalog sync failed for table {} with base path {} and format {} using catalogSync {}",
67+
tableIdentifier.getId(),
6668
table.getBasePath(),
6769
table.getTableFormat(),
6870
catalogSyncClient.getClass().getName());
@@ -83,6 +85,12 @@ private <TABLE> CatalogSyncStatus syncCatalog(
8385
CatalogSyncClient<TABLE> catalogSyncClient,
8486
CatalogTableIdentifier tableIdentifier,
8587
InternalTable table) {
88+
log.info(
89+
"Running catalog sync for table {} with base path {} and format {} using catalogSync {}",
90+
tableIdentifier.getId(),
91+
table.getBasePath(),
92+
table.getTableFormat(),
93+
catalogSyncClient.getClass().getName());
8694
if (!catalogSyncClient.hasDatabase(tableIdentifier)) {
8795
catalogSyncClient.createDatabase(tableIdentifier);
8896
}

xtable-api/src/main/java/org/apache/xtable/spi/sync/CatalogSyncClient.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818

1919
package org.apache.xtable.spi.sync;
2020

21+
import org.apache.hadoop.conf.Configuration;
22+
23+
import org.apache.xtable.conversion.ExternalCatalogConfig;
2124
import org.apache.xtable.model.InternalTable;
2225
import org.apache.xtable.model.catalog.CatalogTableIdentifier;
2326

@@ -71,4 +74,7 @@ void refreshTable(
7174

7275
/** Drops a table from the catalog. */
7376
void dropTable(InternalTable table, CatalogTableIdentifier tableIdentifier);
77+
78+
/** Initializes the client with provided configuration */
79+
void init(ExternalCatalogConfig catalogConfig, String tableFormat, Configuration configuration);
7480
}

xtable-aws/pom.xml

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
~ Licensed to the Apache Software Foundation (ASF) under one or more
4+
~ contributor license agreements. See the NOTICE file distributed with
5+
~ this work for additional information regarding copyright ownership.
6+
~ The ASF licenses this file to You under the Apache License, Version 2.0
7+
~ (the "License"); you may not use this file except in compliance with
8+
~ the License. You may obtain a copy of the License at
9+
~
10+
~ http://www.apache.org/licenses/LICENSE-2.0
11+
~
12+
~ Unless required by applicable law or agreed to in writing, software
13+
~ distributed under the License is distributed on an "AS IS" BASIS,
14+
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
~ See the License for the specific language governing permissions and
16+
~ limitations under the License.
17+
-->
18+
<project xmlns="http://maven.apache.org/POM/4.0.0"
19+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
21+
<modelVersion>4.0.0</modelVersion>
22+
<parent>
23+
<groupId>org.apache.xtable</groupId>
24+
<artifactId>xtable</artifactId>
25+
<version>0.2.0-SNAPSHOT</version>
26+
</parent>
27+
28+
<artifactId>xtable-aws</artifactId>
29+
<name>XTable AWS</name>
30+
31+
<dependencies>
32+
33+
<dependency>
34+
<groupId>org.apache.xtable</groupId>
35+
<artifactId>xtable-core_${scala.binary.version}</artifactId>
36+
<version>${project.version}</version>
37+
</dependency>
38+
39+
<!-- Hadoop dependencies -->
40+
<dependency>
41+
<groupId>org.apache.hadoop</groupId>
42+
<artifactId>hadoop-common</artifactId>
43+
<scope>provided</scope>
44+
</dependency>
45+
46+
<!-- AWS Glue dependencies -->
47+
<dependency>
48+
<groupId>software.amazon.awssdk</groupId>
49+
<artifactId>glue</artifactId>
50+
</dependency>
51+
52+
<!-- Junit -->
53+
<dependency>
54+
<groupId>org.junit.jupiter</groupId>
55+
<artifactId>junit-jupiter-api</artifactId>
56+
<scope>test</scope>
57+
</dependency>
58+
<dependency>
59+
<groupId>org.junit.jupiter</groupId>
60+
<artifactId>junit-jupiter-params</artifactId>
61+
<scope>test</scope>
62+
</dependency>
63+
<dependency>
64+
<groupId>org.junit.jupiter</groupId>
65+
<artifactId>junit-jupiter-engine</artifactId>
66+
<scope>test</scope>
67+
</dependency>
68+
69+
<!-- Mockito -->
70+
<dependency>
71+
<groupId>org.mockito</groupId>
72+
<artifactId>mockito-core</artifactId>
73+
<scope>test</scope>
74+
</dependency>
75+
<dependency>
76+
<groupId>org.mockito</groupId>
77+
<artifactId>mockito-junit-jupiter</artifactId>
78+
</dependency>
79+
80+
<dependency>
81+
<groupId>org.apache.xtable</groupId>
82+
<artifactId>xtable-core_${scala.binary.version}</artifactId>
83+
<version>${project.version}</version>
84+
<classifier>tests</classifier>
85+
<type>test-jar</type>
86+
<scope>test</scope>
87+
</dependency>
88+
</dependencies>
89+
90+
<build>
91+
<plugins>
92+
<plugin>
93+
<groupId>org.apache.maven.plugins</groupId>
94+
<artifactId>maven-shade-plugin</artifactId>
95+
</plugin>
96+
<plugin>
97+
<groupId>org.apache.maven.plugins</groupId>
98+
<artifactId>maven-deploy-plugin</artifactId>
99+
<version>${maven-deploy-plugin.version}</version>
100+
<configuration>
101+
<skip>true</skip>
102+
</configuration>
103+
</plugin>
104+
</plugins>
105+
</build>
106+
107+
</project>
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.xtable.glue;
20+
21+
import java.util.Map;
22+
23+
import org.apache.commons.lang3.StringUtils;
24+
25+
import org.apache.xtable.exception.ConfigurationException;
26+
import org.apache.xtable.reflection.ReflectionUtils;
27+
28+
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
29+
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
30+
import software.amazon.awssdk.regions.Region;
31+
import software.amazon.awssdk.services.glue.GlueClient;
32+
import software.amazon.awssdk.services.glue.GlueClientBuilder;
33+
34+
/**
35+
* Factory class for creating and configuring instances of {@link GlueClient} with settings provided
36+
* by {@link GlueCatalogConfig}.
37+
*
38+
* <p>This factory is responsible for setting the AWS region and credentials for the Glue client. If
39+
* a custom credentials provider class is specified in {@code GlueCatalogConfig}, it will use
40+
* reflection to instantiate the provider; otherwise, it defaults to the standard AWS credentials
41+
* provider.
42+
*/
43+
public class DefaultGlueClientFactory extends GlueClientFactory {
44+
45+
public DefaultGlueClientFactory(GlueCatalogConfig glueConfig) {
46+
super(glueConfig);
47+
}
48+
49+
public GlueClient getGlueClient() {
50+
GlueClientBuilder builder = GlueClient.builder();
51+
if (!StringUtils.isEmpty(glueConfig.getRegion())) {
52+
builder.region(Region.of(glueConfig.getRegion()));
53+
}
54+
55+
AwsCredentialsProvider credentialsProvider;
56+
if (!StringUtils.isEmpty(glueConfig.getClientCredentialsProviderClass())) {
57+
String className = glueConfig.getClientCredentialsProviderClass();
58+
try {
59+
credentialsProvider =
60+
ReflectionUtils.createInstanceOfClassFromStaticMethod(
61+
className,
62+
"create",
63+
new Class<?>[] {Map.class},
64+
new Object[] {glueConfig.getClientCredentialsProviderConfigs()});
65+
} catch (ConfigurationException e) {
66+
// retry credentialsProvider creation without arguments if not a ClassNotFoundException
67+
if (e.getCause() instanceof ClassNotFoundException) {
68+
throw e;
69+
}
70+
credentialsProvider =
71+
ReflectionUtils.createInstanceOfClassFromStaticMethod(className, "create");
72+
}
73+
} else {
74+
credentialsProvider = DefaultCredentialsProvider.create();
75+
}
76+
77+
builder.credentialsProvider(credentialsProvider);
78+
return builder.build();
79+
}
80+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.xtable.glue;
20+
21+
import java.util.Collections;
22+
import java.util.Map;
23+
import java.util.stream.Collectors;
24+
25+
import lombok.AccessLevel;
26+
import lombok.EqualsAndHashCode;
27+
import lombok.Getter;
28+
import lombok.RequiredArgsConstructor;
29+
import lombok.ToString;
30+
31+
import com.fasterxml.jackson.annotation.JsonProperty;
32+
import com.fasterxml.jackson.databind.DeserializationFeature;
33+
import com.fasterxml.jackson.databind.ObjectMapper;
34+
35+
/** Configurations for setting up Glue client and running Glue catalog operations */
36+
@Getter
37+
@EqualsAndHashCode
38+
@ToString
39+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
40+
public class GlueCatalogConfig {
41+
42+
private static final ObjectMapper OBJECT_MAPPER =
43+
new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
44+
45+
public static final String CLIENT_CREDENTIAL_PROVIDER_PROP_PREFIX =
46+
"externalCatalog.glue.credentials.provider.";
47+
48+
@JsonProperty("externalCatalog.glue.catalogId")
49+
private final String catalogId;
50+
51+
@JsonProperty("externalCatalog.glue.region")
52+
private final String region;
53+
54+
@JsonProperty("externalCatalog.glue.credentialsProviderClass")
55+
private final String clientCredentialsProviderClass;
56+
57+
/**
58+
* In case a credentialsProviderClass is configured and require additional properties for
59+
* instantiation, those properties should start with {@link
60+
* #CLIENT_CREDENTIAL_PROVIDER_PROP_PREFIX}.
61+
*
62+
* <p>For ex: if credentialsProviderClass requires `accessKey` and `secretAccessKey`, they should
63+
* be configured using below keys:
64+
* <li>externalCatalog.glue.credentials.provider.accessKey
65+
* <li>externalCatalog.glue.credentials.provider.secretAccessKey
66+
*/
67+
private Map<String, String> clientCredentialsProviderConfigs;
68+
69+
/** Creates GlueCatalogConfig from given key-value map */
70+
public static GlueCatalogConfig of(Map<String, String> properties) {
71+
try {
72+
GlueCatalogConfig cfg = OBJECT_MAPPER.convertValue(properties, GlueCatalogConfig.class);
73+
cfg.clientCredentialsProviderConfigs =
74+
propertiesWithPrefix(properties, CLIENT_CREDENTIAL_PROVIDER_PROP_PREFIX);
75+
return cfg;
76+
} catch (IllegalArgumentException e) {
77+
throw new RuntimeException(e);
78+
}
79+
}
80+
81+
private static Map<String, String> propertiesWithPrefix(
82+
Map<String, String> properties, String prefix) {
83+
if (properties == null || properties.isEmpty()) {
84+
return Collections.emptyMap();
85+
}
86+
87+
return properties.entrySet().stream()
88+
.filter(e -> e.getKey().startsWith(prefix))
89+
.collect(Collectors.toMap(e -> e.getKey().replaceFirst(prefix, ""), Map.Entry::getValue));
90+
}
91+
}

0 commit comments

Comments
 (0)