Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add data compaction policy validator #1238

Merged
merged 8 commits into from
Mar 25, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.policy.validator;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import java.util.Map;

public class DataCompactionPolicy {
@JsonDeserialize(using = StrictBooleanDeserializer.class)
private Boolean enable;

private String version;
private Map<String, String> config;

@JsonCreator
public DataCompactionPolicy(@JsonProperty(value = "enable", required = true) boolean enable) {
this.enable = enable;
}

public String getVersion() {
return version;
}

public void setVersion(String version) {
this.version = version;
}

public Boolean enabled() {
return enable;
}

public void setEnabled(Boolean enable) {
this.enable = enable;
}

public Map<String, String> getConfig() {
return config;
}

public void setConfig(Map<String, String> config) {
this.config = config;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.policy.validator;

import static org.apache.polaris.core.entity.PolarisEntityType.CATALOG;
import static org.apache.polaris.core.entity.PolarisEntityType.ICEBERG_TABLE_LIKE;
import static org.apache.polaris.core.entity.PolarisEntityType.NAMESPACE;

import com.google.common.base.Strings;
import java.util.Set;
import org.apache.polaris.core.entity.PolarisEntitySubType;
import org.apache.polaris.core.entity.PolarisEntityType;

public class DataCompactionPolicyValidator implements PolicyValidator<DataCompactionPolicy> {
static final DataCompactionPolicyValidator INSTANCE = new DataCompactionPolicyValidator();

private static final String DEFAULT_POLICY_SCHEMA_VERSION = "2025-02-03";
private static final Set<String> POLICY_SCHEMA_VERSIONS = Set.of(DEFAULT_POLICY_SCHEMA_VERSION);
private static final Set<PolarisEntityType> ATTACHABLE_ENTITY_TYPES =
Set.of(CATALOG, NAMESPACE, ICEBERG_TABLE_LIKE);

@Override
public DataCompactionPolicy parse(String content) {
if (Strings.isNullOrEmpty(content)) {
throw new InvalidPolicyException("Policy is empty");
}

try {
var policy = PolicyValidatorUtil.MAPPER.readValue(content, DataCompactionPolicy.class);
if (policy == null) {
throw new InvalidPolicyException("Invalid policy");
}

if (Strings.isNullOrEmpty(policy.getVersion())) {
policy.setVersion(DEFAULT_POLICY_SCHEMA_VERSION);
}

if (!POLICY_SCHEMA_VERSIONS.contains(policy.getVersion())) {
throw new InvalidPolicyException("Invalid policy version: " + policy.getVersion());
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be a follow up: Do we need to validate data compaction configs (if they present). For example, target_file_size_bytes needs to be a value larger than 0.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We cannot know the semantic within the config map. User can give a string to target_file_size_bytes, it should still be acceptable, as target_file_size_bytes doesn't mean anything in the schema.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation! That make sense, we only validate fields like max_orphan_file_age_in_days, which is in orphan-file-removal's schema

return policy;
} catch (Exception e) {
throw new InvalidPolicyException(e);
}
}

@Override
public boolean canAttach(PolarisEntityType entityType, PolarisEntitySubType entitySubType) {
if (entityType == null) {
return false;
}

if (!ATTACHABLE_ENTITY_TYPES.contains(entityType)) {
return false;
}

if (entityType == ICEBERG_TABLE_LIKE && entitySubType != PolarisEntitySubType.TABLE) {
return false;
}

return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.policy.validator;

import com.google.common.base.Preconditions;
import org.apache.polaris.core.entity.PolarisEntity;
import org.apache.polaris.core.policy.PolicyEntity;
import org.apache.polaris.core.policy.PredefinedPolicyTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Validates a given {@link PolicyEntity} against its defined policy type.
*
* <p>This class maps the policy type code from the {@code PolicyEntity} to a predefined policy
* type, then delegates parsing/validation to a specific validator implementation.
*/
public class GenericPolicyValidator {
private static final Logger LOGGER = LoggerFactory.getLogger(GenericPolicyValidator.class);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we have "generic tables", maybe a keyword other that "generic" would be a good idea

Copy link
Contributor Author

@flyrain flyrain Mar 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about a name like PolicyValidators? This is one of Java code practice to gather static functionalities for a group of objects.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that sounds good to me. Or it's almost like a factory, right?

/**
* Validates the given policy.
*
* @param policy the policy entity to validate
* @throws InvalidPolicyException if the policy type is unknown or unsupported, or if the policy
* content is invalid
*/
public static void validate(PolicyEntity policy) {
Preconditions.checkNotNull(policy, "Policy must not be null");

var type = PredefinedPolicyTypes.fromCode(policy.getPolicyTypeCode());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
var type = PredefinedPolicyTypes.fromCode(policy.getPolicyTypeCode());
var type = policy.getPolicyType()

I think we can utilize the getPolicyType() here. The method will return a type or throw an error for corrupted PolicyEntity. So we can remove the Preconditions check below

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's my first version actually, but the issue is that Java switch clause doesn't support a regular class. Here is the error message:

Incompatible types. Found: 'org.apache.polaris.core.policy.PolicyType', required: 'char, byte, short, int, Character, Byte, Short, Integer, String, or an enum'

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, thanks for the explanation!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering if we should use the name of policy type in the switch clause, as customized type won't presented as an enum.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for custom type we should rely on the default case to load their corresponding validator impl?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we still need to distinguish the different customized types, right?

Preconditions.checkArgument(type != null, "Unknown policy type: " + policy.getPolicyTypeCode());

switch (type) {
case DATA_COMPACTION:
DataCompactionPolicyValidator.INSTANCE.parse(policy.getContent());
break;

// To support additional policy types in the future, add cases here.
case METADATA_COMPACTION:
case SNAPSHOT_RETENTION:
case ORPHAN_FILE_REMOVAL:
default:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just check my understanding: in the future when we add support for custom type, we will need to load the custom type's validator here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, something like this:

      ctor = DynConstructors.builder(PolicyValidator.class).impl(impl).buildChecked();
      policyValidator = ctor.newInstance();

throw new InvalidPolicyException("Unsupported policy type: " + type.getName());
}

LOGGER.info("Policy validated successfully: {}", type.getName());
}

/**
* Determines whether the given policy can be attached to the specified target entity.
*
* @param policy the policy entity to check
* @param targetEntity the target Polaris entity to attach the policy to
* @return {@code true} if the policy is attachable to the target entity; {@code false} otherwise
*/
public static boolean canAttach(PolicyEntity policy, PolarisEntity targetEntity) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just brainstorming about the name, may be "isAttachable"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method’s primary role is to decide whether a policy is capable of being attached to a target, "canAttach" can be appropriate. I think isAttachable is more suitable if the method only takes a policy entity. WDYT?

Preconditions.checkNotNull(policy, "Policy must not be null");
Preconditions.checkNotNull(targetEntity, "Target entity must not be null");

var policyType = PredefinedPolicyTypes.fromCode(policy.getPolicyTypeCode());
Preconditions.checkArgument(
policyType != null, "Unknown policy type: " + policy.getPolicyTypeCode());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
var policyType = PredefinedPolicyTypes.fromCode(policy.getPolicyTypeCode());
Preconditions.checkArgument(
policyType != null, "Unknown policy type: " + policy.getPolicyTypeCode());
var type = policy.getPolicyType()

same here


switch (policyType) {
case DATA_COMPACTION:
return DataCompactionPolicyValidator.INSTANCE.canAttach(
targetEntity.getType(), targetEntity.getSubType());
// To support additional policy types in the future, add cases here.
case METADATA_COMPACTION:
case SNAPSHOT_RETENTION:
case ORPHAN_FILE_REMOVAL:
default:
LOGGER.warn("Attachment not supported for policy type: {}", policyType.getName());
return false;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.policy.validator;

import org.apache.polaris.core.exceptions.PolarisException;

/** Exception thrown when a policy is invalid or violates defined rules. */
public class InvalidPolicyException extends PolarisException {
public InvalidPolicyException(String message) {
super(message);
}

public InvalidPolicyException(String message, Throwable cause) {
super(message, cause);
}

public InvalidPolicyException(Throwable cause) {
super("Invalid policy", cause);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.policy.validator;

import org.apache.polaris.core.entity.PolarisEntitySubType;
import org.apache.polaris.core.entity.PolarisEntityType;

/**
* Validates and parses a given policy content string against its defined schema.
*
* @param <T> the type of policy object to be returned after validation
*/
public interface PolicyValidator<T> {

/**
* Parses and validates the provided policy content.
*
* @param content the policy content to parse and validate
* @return a policy object of type T if the content is valid
* @throws InvalidPolicyException if the content does not meet the required policy rules
*/
T parse(String content) throws InvalidPolicyException;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this is primarily a parser, not a validator if it's turning a string into a policy type.

Also, is there not a shared supertype for policies? Even a marker interface might be a good idea.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, is there not a shared supertype for policies? Even a marker interface might be a good idea.

Yeah, that's another option I thought of. I'm fine with either way. Are you suggesting to use a new policy interface to avoid type parameters()?

Copy link
Contributor

@eric-maynard eric-maynard Mar 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was imagining something like:

public interface Policy<T extends Policy<T>> {
    T fromString(String content);
}

This way I can just write my implementations like this:

public class CompactionPolicy implements Policy<CompactionPolicy> {
  public CompactionPolicy fromString(...)
}

public class RbacPolicy implements Policy<RbacPolicy> {
  public RbacPolicy fromString(...)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this is primarily a parser, not a validator if it's turning a string into a policy type.

Separated the parsing and validating in the new commit. The parsing is with the policy content class now.

Added a marker interface PolicyContent. fromString() is a static method, so I didn't use this pattern, https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern. We could have a pattern like Policy<T extends Policy<T>> once we need a inheritable method that could return a subtype.


/**
* Determines whether the policy is attachable to a target entity.
*
* <p>This method examines the provided {@link PolarisEntityType} and {@link PolarisEntitySubType}
* to decide if a policy is applicable for attachment to the target entity.
*
* @param entityType the type of the target entity
* @param entitySubType the subtype of the target entity
* @return {@code true} if the policy can be attached to the target entity; {@code false}
* otherwise
*/
boolean canAttach(PolarisEntityType entityType, PolarisEntitySubType entitySubType);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.polaris.core.policy.validator;

import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;

class PolicyValidatorUtil {
static final ObjectMapper MAPPER = configureMapper();

private static ObjectMapper configureMapper() {
ObjectMapper mapper = new ObjectMapper();
// Fails if a required field (in the constructor) is missing
mapper.configure(DeserializationFeature.FAIL_ON_MISSING_CREATOR_PROPERTIES, true);
// Fails if a required field is present but explicitly null, e.g., {"enable": null}
mapper.configure(DeserializationFeature.FAIL_ON_NULL_CREATOR_PROPERTIES, true);
return mapper;
}
}
Loading