forked from apache/iceberg
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Spark: Implement copy-on-write DELETE (apache#1862)
- Loading branch information
1 parent
7d7a51d
commit af5f600
Showing
14 changed files
with
1,250 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg; | ||
|
||
/** | ||
* An isolation level in a table. | ||
* <p> | ||
* Two isolation levels are supported: serializable and snapshot isolation. Both of them provide | ||
* a read consistent view of the table to all operations and allow readers to see only already | ||
* committed data. While serializable is the strongest isolation level in databases, | ||
* snapshot isolation is beneficial for environments with many concurrent writers. | ||
* <p> | ||
* The serializable isolation level guarantees that an ongoing UPDATE/DELETE/MERGE operation | ||
* fails if a concurrent transaction commits a new file that might contain rows matching | ||
* the condition used in UPDATE/DELETE/MERGE. For example, if there is an ongoing update | ||
* on a subset of rows and a concurrent transaction adds a new file with records | ||
* that potentially match the update condition, the update operation must fail under | ||
* the serializable isolation but can still commit under the snapshot isolation. | ||
*/ | ||
public enum IsolationLevel { | ||
SERIALIZABLE, SNAPSHOT | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/Employee.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg.spark.extensions; | ||
|
||
import java.util.Objects; | ||
|
||
public class Employee { | ||
private Integer id; | ||
private String dep; | ||
|
||
public Employee() { | ||
} | ||
|
||
public Employee(Integer id, String dep) { | ||
this.id = id; | ||
this.dep = dep; | ||
} | ||
|
||
public Integer getId() { | ||
return id; | ||
} | ||
|
||
public void setId(Integer id) { | ||
this.id = id; | ||
} | ||
|
||
public String getDep() { | ||
return dep; | ||
} | ||
|
||
public void setDep(String dep) { | ||
this.dep = dep; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object other) { | ||
if (this == other) { | ||
return true; | ||
} else if (other == null || getClass() != other.getClass()) { | ||
return false; | ||
} | ||
|
||
Employee employee = (Employee) other; | ||
return Objects.equals(id, employee.id) && Objects.equals(dep, employee.dep); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(id, dep); | ||
} | ||
} |
79 changes: 79 additions & 0 deletions
79
...ns/src/test/java/org/apache/iceberg/spark/extensions/SparkRowLevelOperationsTestBase.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg.spark.extensions; | ||
|
||
import java.util.Map; | ||
import java.util.Random; | ||
import java.util.concurrent.ThreadLocalRandom; | ||
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
import org.apache.iceberg.spark.SparkCatalog; | ||
import org.apache.iceberg.spark.SparkSessionCatalog; | ||
import org.junit.runner.RunWith; | ||
import org.junit.runners.Parameterized; | ||
import org.junit.runners.Parameterized.Parameters; | ||
|
||
@RunWith(Parameterized.class) | ||
public abstract class SparkRowLevelOperationsTestBase extends SparkExtensionsTestBase { | ||
|
||
private static final Random RANDOM = ThreadLocalRandom.current(); | ||
|
||
protected final String fileFormat; | ||
protected final boolean vectorized; | ||
|
||
public SparkRowLevelOperationsTestBase(String catalogName, String implementation, | ||
Map<String, String> config, String fileFormat, | ||
boolean vectorized) { | ||
super(catalogName, implementation, config); | ||
this.fileFormat = fileFormat; | ||
this.vectorized = vectorized; | ||
} | ||
|
||
@Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}, format = {3}, vectorized = {4}") | ||
public static Object[][] parameters() { | ||
return new Object[][] { | ||
{ "testhive", SparkCatalog.class.getName(), | ||
ImmutableMap.of( | ||
"type", "hive", | ||
"default-namespace", "default" | ||
), | ||
"orc", | ||
true | ||
}, | ||
{ "testhadoop", SparkCatalog.class.getName(), | ||
ImmutableMap.of( | ||
"type", "hadoop" | ||
), | ||
"parquet", | ||
RANDOM.nextBoolean() | ||
}, | ||
{ "spark_catalog", SparkSessionCatalog.class.getName(), | ||
ImmutableMap.of( | ||
"type", "hive", | ||
"default-namespace", "default", | ||
"clients", "1", | ||
"parquet-enabled", "false", | ||
"cache-enabled", "false" // Spark will delete tables using v1, leaving the cache out of sync | ||
), | ||
"avro", | ||
false | ||
} | ||
}; | ||
} | ||
} |
37 changes: 37 additions & 0 deletions
37
...3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestCopyOnWriteDelete.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg.spark.extensions; | ||
|
||
import java.util.Map; | ||
import org.apache.iceberg.TableProperties; | ||
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
|
||
public class TestCopyOnWriteDelete extends TestDelete { | ||
|
||
public TestCopyOnWriteDelete(String catalogName, String implementation, Map<String, String> config, | ||
String fileFormat, Boolean vectorized) { | ||
super(catalogName, implementation, config, fileFormat, vectorized); | ||
} | ||
|
||
@Override | ||
protected Map<String, String> extraTableProperties() { | ||
return ImmutableMap.of(TableProperties.DELETE_MODE, "copy-on-write"); | ||
} | ||
} |
Oops, something went wrong.