Skip to content

Commit

Permalink
[#4927] improvement(catalog): extract a common HMS module (#4928)
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

extract a common HMS module

### Why are the changes needed?

Fix: #4927 

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

CI passed
  • Loading branch information
mchades authored Sep 18, 2024
1 parent 61fe0ee commit 3433967
Show file tree
Hide file tree
Showing 34 changed files with 401 additions and 203 deletions.
4 changes: 2 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ tasks {
if (!it.name.startsWith("catalog") &&
!it.name.startsWith("authorization") &&
!it.name.startsWith("client") && !it.name.startsWith("filesystem") && !it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name != "trino-connector" &&
it.name != "integration-test" && it.name != "bundled-catalog" && !it.name.startsWith("flink")
it.name != "integration-test" && it.name != "hive-metastore-common" && !it.name.startsWith("flink")
) {
from(it.configurations.runtimeClasspath)
into("distribution/package/libs")
Expand All @@ -762,7 +762,7 @@ tasks {
!it.name.startsWith("integration-test") &&
!it.name.startsWith("flink") &&
!it.name.startsWith("trino-connector") &&
it.name != "bundled-catalog"
it.name != "hive-metastore-common"
) {
dependsOn("${it.name}:build")
from("${it.name}/build/libs")
Expand Down
33 changes: 6 additions & 27 deletions catalogs/catalog-hive/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -36,44 +36,19 @@ dependencies {
implementation(project(":catalogs:catalog-common")) {
exclude("*")
}
implementation(project(":catalogs:hive-metastore-common"))
implementation(project(":core")) {
exclude("*")
}

implementation(libs.caffeine)
implementation(libs.commons.collections3)
implementation(libs.commons.configuration1)
implementation(libs.htrace.core4)
implementation(libs.commons.io)
implementation(libs.guava)
implementation(libs.hadoop2.auth) {
exclude("*")
}
implementation(libs.hive2.exec) {
artifact {
classifier = "core"
}
exclude("com.google.code.findbugs", "jsr305")
exclude("com.google.protobuf")
exclude("org.apache.avro")
exclude("org.apache.ant")
exclude("org.apache.calcite")
exclude("org.apache.calcite.avatica")
exclude("org.apache.curator")
exclude("org.apache.derby")
exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager")
exclude("org.apache.hive", "hive-llap-tez")
exclude("org.apache.hive", "hive-vector-code-gen")
exclude("org.apache.ivy")
exclude("org.apache.logging.log4j")
exclude("org.apache.zookeeper")
exclude("org.codehaus.groovy", "groovy-all")
exclude("org.datanucleus", "datanucleus-core")
exclude("org.eclipse.jetty.aggregate", "jetty-all")
exclude("org.eclipse.jetty.orbit", "javax.servlet")
exclude("org.openjdk.jol")
exclude("org.pentaho")
exclude("org.slf4j")
}
implementation(libs.woodstox.core)
implementation(libs.hive2.metastore) {
exclude("ant")
Expand Down Expand Up @@ -115,6 +90,7 @@ dependencies {
annotationProcessor(libs.immutables.value)
annotationProcessor(libs.lombok)

testImplementation(project(":catalogs:hive-metastore-common", "testArtifacts"))
testImplementation(project(":common"))
testImplementation(project(":clients:client-java"))
testImplementation(project(":integration-test-common", "testArtifacts"))
Expand All @@ -125,6 +101,9 @@ dependencies {
testImplementation(libs.bundles.jersey)
testImplementation(libs.bundles.log4j)
testImplementation(libs.hadoop2.hdfs)
testImplementation(libs.hadoop2.mapreduce.client.core) {
exclude("*")
}
testImplementation(libs.hive2.common) {
exclude("org.eclipse.jetty.aggregate", "jetty-all")
exclude("org.eclipse.jetty.orbit", "javax.servlet")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
*/
package org.apache.gravitino.catalog.hive;

import static org.apache.gravitino.catalog.hive.HiveCatalogPropertiesMeta.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS;
import static org.apache.gravitino.catalog.hive.HiveCatalogPropertiesMeta.CLIENT_POOL_SIZE;
import static org.apache.gravitino.catalog.hive.HiveCatalogPropertiesMeta.LIST_ALL_TABLES;
import static org.apache.gravitino.catalog.hive.HiveCatalogPropertiesMeta.METASTORE_URIS;
import static org.apache.gravitino.catalog.hive.HiveCatalogPropertiesMeta.PRINCIPAL;
Expand All @@ -28,8 +26,8 @@
import static org.apache.gravitino.catalog.hive.HiveTable.TABLE_TYPE_PROP;
import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.COMMENT;
import static org.apache.gravitino.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE;
import static org.apache.gravitino.catalog.hive.converter.HiveDataTypeConverter.CONVERTER;
import static org.apache.gravitino.connector.BaseCatalog.CATALOG_BYPASS_PREFIX;
import static org.apache.gravitino.hive.converter.HiveDataTypeConverter.CONVERTER;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;

import com.google.common.annotations.VisibleForTesting;
Expand Down Expand Up @@ -71,6 +69,7 @@
import org.apache.gravitino.exceptions.NonEmptySchemaException;
import org.apache.gravitino.exceptions.SchemaAlreadyExistsException;
import org.apache.gravitino.exceptions.TableAlreadyExistsException;
import org.apache.gravitino.hive.CachedClientPool;
import org.apache.gravitino.meta.AuditInfo;
import org.apache.gravitino.rel.Column;
import org.apache.gravitino.rel.Table;
Expand Down Expand Up @@ -167,8 +166,7 @@ public void initialize(

initKerberosIfNecessary(conf, hadoopConf);

this.clientPool =
new CachedClientPool(getClientPoolSize(conf), hiveConf, getCacheEvictionInterval(conf));
this.clientPool = new CachedClientPool(hiveConf, conf);

this.listAllTables = enableListAllTables(conf);
}
Expand Down Expand Up @@ -282,19 +280,6 @@ private void refreshKerberosConfig() {
}
}

@VisibleForTesting
int getClientPoolSize(Map<String, String> conf) {
return (int)
propertiesMetadata.catalogPropertiesMetadata().getOrDefault(conf, CLIENT_POOL_SIZE);
}

long getCacheEvictionInterval(Map<String, String> conf) {
return (long)
propertiesMetadata
.catalogPropertiesMetadata()
.getOrDefault(conf, CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS);
}

boolean enableListAllTables(Map<String, String> conf) {
return (boolean)
propertiesMetadata.catalogPropertiesMetadata().getOrDefault(conf, LIST_ALL_TABLES);
Expand Down Expand Up @@ -372,7 +357,6 @@ public HiveSchema createSchema(
.withName(ident.name())
.withComment(comment)
.withProperties(properties)
.withConf(hiveConf)
.withAuditInfo(
AuditInfo.builder()
.withCreator(UserGroupInformation.getCurrentUser().getUserName())
Expand Down Expand Up @@ -413,7 +397,7 @@ public HiveSchema createSchema(
public HiveSchema loadSchema(NameIdentifier ident) throws NoSuchSchemaException {
try {
Database database = clientPool.run(client -> client.getDatabase(ident.name()));
HiveSchema hiveSchema = HiveSchema.fromHiveDB(database, hiveConf);
HiveSchema hiveSchema = HiveSchema.fromHiveDB(database);

LOG.info("Loaded Hive schema (database) {} from Hive Metastore ", ident.name());
return hiveSchema;
Expand Down Expand Up @@ -477,7 +461,7 @@ public HiveSchema alterSchema(NameIdentifier ident, SchemaChange... changes)
});

LOG.info("Altered Hive schema (database) {} in Hive Metastore", ident.name());
return HiveSchema.fromHiveDB(alteredDatabase, hiveConf);
return HiveSchema.fromHiveDB(alteredDatabase);

} catch (NoSuchObjectException e) {
throw new NoSuchSchemaException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,19 @@

import com.google.common.collect.ImmutableMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.gravitino.connector.AuthorizationPropertiesMeta;
import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata;
import org.apache.gravitino.connector.PropertyEntry;
import org.apache.gravitino.hive.ClientPropertiesMetadata;

public class HiveCatalogPropertiesMeta extends BaseCatalogPropertiesMetadata {

public static final String CLIENT_POOL_SIZE = HiveConstants.CLIENT_POOL_SIZE;
public static final int DEFAULT_CLIENT_POOL_SIZE = 1;

public static final String METASTORE_URIS = HiveConstants.METASTORE_URIS;

public static final String CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS =
HiveConstants.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS;

public static final long DEFAULT_CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS =
TimeUnit.MINUTES.toMillis(5);

public static final String IMPERSONATION_ENABLE = HiveConstants.IMPERSONATION_ENABLE;

public static final boolean DEFAULT_IMPERSONATION_ENABLE = false;
Expand All @@ -55,6 +50,9 @@ public class HiveCatalogPropertiesMeta extends BaseCatalogPropertiesMetadata {

public static final boolean DEFAULT_LIST_ALL_TABLES = false;

private static final ClientPropertiesMetadata CLIENT_PROPERTIES_METADATA =
new ClientPropertiesMetadata();

private static final Map<String, PropertyEntry<?>> HIVE_CATALOG_PROPERTY_ENTRIES =
ImmutableMap.<String, PropertyEntry<?>>builder()
.put(
Expand All @@ -64,22 +62,6 @@ public class HiveCatalogPropertiesMeta extends BaseCatalogPropertiesMetadata {
"The Hive metastore URIs",
false /* immutable */,
false /* hidden */))
.put(
CLIENT_POOL_SIZE,
PropertyEntry.integerOptionalPropertyEntry(
CLIENT_POOL_SIZE,
"The maximum number of Hive metastore clients in the pool for Gravitino",
false /* immutable */,
DEFAULT_CLIENT_POOL_SIZE,
false /* hidden */))
.put(
CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS,
PropertyEntry.longOptionalPropertyEntry(
CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS,
"The cache pool eviction interval",
false /* immutable */,
DEFAULT_CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS,
false /* hidden */))
.put(
IMPERSONATION_ENABLE,
PropertyEntry.booleanPropertyEntry(
Expand Down Expand Up @@ -129,6 +111,7 @@ public class HiveCatalogPropertiesMeta extends BaseCatalogPropertiesMetadata {
false /* hidden */,
false /* reserved */))
.putAll(AuthorizationPropertiesMeta.RANGER_AUTHORIZATION_PROPERTY_ENTRIES)
.putAll(CLIENT_PROPERTIES_METADATA.propertyEntries())
.build();

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,21 @@
import lombok.ToString;
import org.apache.gravitino.connector.BaseSchema;
import org.apache.gravitino.meta.AuditInfo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.PrincipalType;

/** Represents an Apache Hive Schema (Database) entity in the Hive Metastore catalog. */
@ToString
public class HiveSchema extends BaseSchema {
private Configuration conf;

private HiveSchema() {}

/**
* Creates a new HiveSchema instance from a Database and a Builder.
*
* @param db The Database representing the HiveSchema.
* @param hiveConf The HiveConf used to construct the HiveSchema.
* @return A new HiveSchema instance.
*/
public static HiveSchema fromHiveDB(Database db, Configuration hiveConf) {
public static HiveSchema fromHiveDB(Database db) {
Map<String, String> properties = buildSchemaProperties(db);

// Get audit info from Hive's Database object. Because Hive's database doesn't store create
Expand All @@ -55,7 +51,6 @@ public static HiveSchema fromHiveDB(Database db, Configuration hiveConf) {
.withComment(db.getDescription())
.withProperties(properties)
.withAuditInfo(auditInfoBuilder.build())
.withConf(hiveConf)
.build();
}

Expand Down Expand Up @@ -98,19 +93,6 @@ public Database toHiveDB() {
/** A builder class for constructing HiveSchema instances. */
public static class Builder extends BaseSchemaBuilder<Builder, HiveSchema> {

protected Configuration conf;

/**
* Sets the Configuration to be used for building the HiveSchema.
*
* @param conf The Configuration.
* @return The Builder instance.
*/
public Builder withConf(Configuration conf) {
this.conf = conf;
return this;
}

/** Creates a new instance of {@link Builder}. */
private Builder() {}

Expand All @@ -126,7 +108,6 @@ protected HiveSchema internalBuild() {
hiveSchema.comment = comment;
hiveSchema.properties = properties;
hiveSchema.auditInfo = auditInfo;
hiveSchema.conf = conf;

return hiveSchema;
}
Expand Down
Loading

0 comments on commit 3433967

Please sign in to comment.