Skip to content

Commit

Permalink
[CALCITE-3827] Reduce the time complexity of finding in-edges of a ve…
Browse files Browse the repository at this point in the history
…rtex in the graph (Liya Fan)

Add benchmark to measure performance difference;
Place inward & outward edges in VertexInfo;
Switch algorithms when removing vertices.

Based on benchmarking, change threshold to 35%, and convert
collection to HashSet; add benchmarking README. (Julian Hyde)
  • Loading branch information
liyafan82 authored and julianhyde committed Apr 28, 2020
1 parent 650ce46 commit 0e00d65
Show file tree
Hide file tree
Showing 5 changed files with 454 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ public E addEdge(V vertex, V targetVertex, Object... attributes) {
if (info == null) {
throw new IllegalArgumentException("no vertex " + vertex);
}
final VertexInfo<V, E> info2 = vertexMap.get(targetVertex);
if (info2 == null) {
final VertexInfo<V, E> targetInfo = vertexMap.get(targetVertex);
if (targetInfo == null) {
throw new IllegalArgumentException("no vertex " + targetVertex);
}
@SuppressWarnings("unchecked")
Expand All @@ -71,6 +71,7 @@ public E addEdge(V vertex, V targetVertex, Object... attributes) {
final E edge = f.createEdge(vertex, targetVertex, attributes);
if (edges.add(edge)) {
info.outEdges.add(edge);
targetInfo.inEdges.add(edge);
return edge;
} else {
return null;
Expand All @@ -86,18 +87,31 @@ public Iterable<E> getEdges(V source, final V target) {
/** Removes all edges from a given vertex to another.
* Returns whether any were removed. */
public boolean removeEdge(V source, V target) {
final VertexInfo<V, E> info = vertexMap.get(source);
List<E> outEdges = info.outEdges;
int removeCount = 0;
// remove out edges
final List<E> outEdges = vertexMap.get(source).outEdges;
int removeOutCount = 0;
for (int i = 0, size = outEdges.size(); i < size; i++) {
E edge = outEdges.get(i);
if (edge.target.equals(target)) {
outEdges.remove(i);
edges.remove(edge);
++removeCount;
++removeOutCount;
}
}
return removeCount > 0;

// remove in edges
final List<E> inEdges = vertexMap.get(target).inEdges;
int removeInCount = 0;
for (int i = 0, size = inEdges.size(); i < size; i++) {
E edge = inEdges.get(i);
if (edge.source.equals(source)) {
inEdges.remove(i);
++removeInCount;
}
}

assert removeOutCount == removeInCount;
return removeOutCount > 0;
}

/** Factory for edges that have attributes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
Expand Down Expand Up @@ -95,13 +96,14 @@ public E addEdge(V vertex, V targetVertex) {
if (info == null) {
throw new IllegalArgumentException("no vertex " + vertex);
}
final VertexInfo<V, E> info2 = vertexMap.get(targetVertex);
if (info2 == null) {
final VertexInfo<V, E> targetInfo = vertexMap.get(targetVertex);
if (targetInfo == null) {
throw new IllegalArgumentException("no vertex " + targetVertex);
}
final E edge = edgeFactory.createEdge(vertex, targetVertex);
if (edges.add(edge)) {
info.outEdges.add(edge);
targetInfo.inEdges.add(edge);
return edge;
} else {
return null;
Expand All @@ -120,28 +122,89 @@ public E getEdge(V source, V target) {
}

public boolean removeEdge(V source, V target) {
final VertexInfo<V, E> info = vertexMap.get(source);
List<E> outEdges = info.outEdges;
// remove out edges
final List<E> outEdges = vertexMap.get(source).outEdges;
boolean outRemoved = false;
for (int i = 0, size = outEdges.size(); i < size; i++) {
E edge = outEdges.get(i);
if (edge.target.equals(target)) {
outEdges.remove(i);
edges.remove(edge);
return true;
outRemoved = true;
break;
}
}

// remove in edges
final List<E> inEdges = vertexMap.get(target).inEdges;
boolean inRemoved = false;
for (int i = 0, size = inEdges.size(); i < size; i++) {
E edge = inEdges.get(i);
if (edge.source.equals(source)) {
inEdges.remove(i);
inRemoved = true;
break;
}
}
return false;
assert outRemoved == inRemoved;
return outRemoved;
}

public Set<V> vertexSet() {
return vertexMap.keySet();
}

public void removeAllVertices(Collection<V> collection) {
// The point at which collection is large enough to make the 'majority'
// algorithm more efficient.
final float threshold = 0.35f;
final int thresholdSize = (int) (vertexMap.size() * threshold);
if (collection.size() > thresholdSize && !(collection instanceof Set)) {
// Convert collection to a set, so that collection.contains() is
// faster. If there are duplicates, collection.size() will get smaller.
collection = new HashSet<>(collection);
}
if (collection.size() > thresholdSize) {
removeMajorityVertices((Set<V>) collection);
} else {
removeMinorityVertices(collection);
}
}

/** Implementation of {@link #removeAllVertices(Collection)} that is efficient
* if {@code collection} is a small fraction of the set of vertices. */
private void removeMinorityVertices(Collection<V> collection) {
for (V v : collection) {
final VertexInfo<V, E> info = vertexMap.get(v);
if (info == null) {
continue;
}

// remove all edges pointing to v
for (E edge : info.inEdges) {
final V source = (V) edge.source;
final VertexInfo<V, E> sourceInfo = vertexMap.get(source);
sourceInfo.outEdges.removeIf(e -> e.target.equals(v));
}

// remove all edges starting from v
for (E edge : info.outEdges) {
final V target = (V) edge.target;
final VertexInfo<V, E> targetInfo = vertexMap.get(target);
targetInfo.inEdges.removeIf(e -> e.source.equals(v));
}
}
vertexMap.keySet().removeAll(collection);
}

/** Implementation of {@link #removeAllVertices(Collection)} that is efficient
* if {@code vertexSet} is a large fraction of the set of vertices in the
* graph. */
private void removeMajorityVertices(Set<V> vertexSet) {
vertexMap.keySet().removeAll(vertexSet);
for (VertexInfo<V, E> info : vertexMap.values()) {
//noinspection SuspiciousMethodCalls
info.outEdges.removeIf(next -> collection.contains(next.target));
info.outEdges.removeIf(e -> vertexSet.contains((V) e.target));
info.inEdges.removeIf(e -> vertexSet.contains((V) e.source));
}
}

Expand All @@ -150,15 +213,7 @@ public List<E> getOutwardEdges(V source) {
}

public List<E> getInwardEdges(V target) {
final ArrayList<E> list = new ArrayList<>();
for (VertexInfo<V, E> info : vertexMap.values()) {
for (E edge : info.outEdges) {
if (edge.target.equals(target)) {
list.add(edge);
}
}
}
return list;
return vertexMap.get(target).inEdges;
}

final V source(E edge) {
Expand All @@ -172,12 +227,13 @@ final V target(E edge) {
}

/**
* Information about an edge.
* Information about a vertex.
*
* @param <V> Vertex type
* @param <E> Edge type
*/
static class VertexInfo<V, E> {
public List<E> outEdges = new ArrayList<>();
final List<E> outEdges = new ArrayList<>();
final List<E> inEdges = new ArrayList<>();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ public interface DirectedGraph<V, E> {

Set<V> vertexSet();

/** Removes from this graph all vertices that are in {@code collection},
* and the edges into and out of those vertices. */
void removeAllVertices(Collection<V> collection);

List<E> getOutwardEdges(V source);
Expand Down
76 changes: 76 additions & 0 deletions ubenchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<!--
{% comment %}
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to you under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
{% endcomment %}
-->
# Micro-benchmarks

This directory, `ubenchmark`, contains micro-benchmarks written using
the [jmh](https://openjdk.java.net/projects/code-tools/jmh/) framework.

The benchmarks are tools for development and are not distributed as
Calcite artifacts. (Besides, jmh's license does not allow that.)

## Running all benchmark from the command line

To run all benchmarks:

{noformat}bash
$ cd calcite
$ ./gradlew :ubenchmark:jmh
{noformat}

## Running one benchmark from the command line

To run just one benchmark, modify `ubenchmark/build.gradle.kts` and add the
following task:

{noformat}kotlin
jmh {
include = listOf("removeAllVertices.*Benchmark")
}
{noformat}

and run

{noformat}bash
$ ./gradlew :ubenchmark:jmh
{noformat}

as before. In this case, `removeAllVertices.*Benchmark` is a
regular expression that matches a few methods -- benchmarks -- in
`class DefaultDirectedGraphBenchmark`.

The `jmd-gradle-plugin` has
[many other options](https://github.com/melix/jmh-gradle-plugin#configuration-options)
but you will need to translate them from Groovy syntax to our Kotlin syntax.

## Recording results

When you have run the benchmarks, please record them in the relevant JIRA
case and link them here:

* ParserBenchmark:
[459](https://issues.apache.org/jira/browse/CALCITE-459),
[1012](https://issues.apache.org/jira/browse/CALCITE-1012)
* ArrayListTest:
[3878](https://issues.apache.org/jira/browse/CALCITE-3878)
* DefaultDirectedGraphBenchmark:
[3827](https://issues.apache.org/jira/browse/CALCITE-3827)
* RelNodeBenchmark:
[3836](https://issues.apache.org/jira/browse/CALCITE-3836)
* ReflectVisitorDispatcherTest:
[3873](https://issues.apache.org/jira/browse/CALCITE-3873)
Loading

0 comments on commit 0e00d65

Please sign in to comment.