Skip to content

Commit

Permalink
[FLINK-4963] [gelly] Tabulate edge direction for directed VertexMetrics
Browse files Browse the repository at this point in the history
The current implementation simply counts edges. We can do one better and
tabulate unidirectional (u:v but no v:u) and bidirectional edges (u:v
and v:u).

This is effectively the 'dyadic census'.

This commit also makes edge metrics distinct from vertex metrics.
Previously EdgeMetrics has always been a superset of VertexMetrics.

This closes apache#2725
  • Loading branch information
greghogan committed Nov 8, 2016
1 parent 6a6eeb9 commit f025c45
Show file tree
Hide file tree
Showing 10 changed files with 300 additions and 518 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.graph;

import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.AbstractID;

import java.io.IOException;
import java.io.Serializable;

/**
* A {@link GraphAnalytic} computes over a DataSet and returns the results via
* Flink accumulators. This computation is cheaply performed in a terminating
* {@link RichOutputFormat}.
*
* This class simplifies the creation of analytic helpers by providing pass-through
* methods for adding and getting accumulators. Each accumulator name is prefixed
* with a random string since Flink accumulators share a per-job global namespace.
* This class also provides empty implementations of {@link RichOutputFormat#open}
* and {@link RichOutputFormat#close}.
*
* @param <T> element type
*/
public abstract class AnalyticHelper<T>
extends RichOutputFormat<T> {

private static final String SEPARATOR = "-";

private String id = new AbstractID().toString();

@Override
public void configure(Configuration parameters) {}

@Override
public void open(int taskNumber, int numTasks) throws IOException {}

/**
* Adds an accumulator by prepending the given name with a random string.
*
* @param name The name of the accumulator
* @param accumulator The accumulator
* @param <V> Type of values that are added to the accumulator
* @param <A> Type of the accumulator result as it will be reported to the client
*/
public <V, A extends Serializable> void addAccumulator(String name, Accumulator<V, A> accumulator) {
getRuntimeContext().addAccumulator(id + SEPARATOR + name, accumulator);
}

/**
* Gets the accumulator with the given name. Returns {@code null}, if no accumulator with
* that name was produced.
*
* @param accumulatorName The name of the accumulator
* @param <A> The generic type of the accumulator value
* @return The value of the accumulator with the given name
*/
public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) {
return env.getLastJobExecutionResult().getAccumulatorResult(id + SEPARATOR + accumulatorName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,14 @@

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.accumulators.DoubleCounter;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.graph.AbstractGraphAnalytic;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.AnalyticHelper;
import org.apache.flink.graph.library.clustering.directed.AverageClusteringCoefficient.Result;
import org.apache.flink.types.CopyableValue;
import org.apache.flink.util.AbstractID;

import java.io.IOException;

Expand All @@ -47,7 +44,11 @@
public class AverageClusteringCoefficient<K extends Comparable<K> & CopyableValue<K>, VV, EV>
extends AbstractGraphAnalytic<K, VV, EV, Result> {

private String id = new AbstractID().toString();
private static final String VERTEX_COUNT = "vertexCount";

private static final String SUM_OF_LOCAL_CLUSTERING_COEFFICIENT = "sumOfLocalClusteringCoefficient";

private AverageClusteringCoefficientHelper<K> averageClusteringCoefficientHelper;

// Optional configuration
private int littleParallelism = PARALLELISM_DEFAULT;
Expand Down Expand Up @@ -80,19 +81,19 @@ public AverageClusteringCoefficient<K, VV, EV> run(Graph<K, VV, EV> input)
.run(new LocalClusteringCoefficient<K, VV, EV>()
.setLittleParallelism(littleParallelism));

averageClusteringCoefficientHelper = new AverageClusteringCoefficientHelper<>();

localClusteringCoefficient
.output(new AverageClusteringCoefficientHelper<K>(id))
.output(averageClusteringCoefficientHelper)
.name("Average clustering coefficient");

return this;
}

@Override
public Result getResult() {
JobExecutionResult res = env.getLastJobExecutionResult();

long vertexCount = res.getAccumulatorResult(id + "-0");
double sumOfLocalClusteringCoefficient = res.getAccumulatorResult(id + "-1");
long vertexCount = averageClusteringCoefficientHelper.getAccumulator(env, VERTEX_COUNT);
double sumOfLocalClusteringCoefficient = averageClusteringCoefficientHelper.getAccumulator(env, SUM_OF_LOCAL_CLUSTERING_COEFFICIENT);

return new Result(vertexCount, sumOfLocalClusteringCoefficient);
}
Expand All @@ -103,28 +104,10 @@ public Result getResult() {
* @param <T> ID type
*/
private static class AverageClusteringCoefficientHelper<T>
extends RichOutputFormat<LocalClusteringCoefficient.Result<T>> {
private final String id;

extends AnalyticHelper<LocalClusteringCoefficient.Result<T>> {
private long vertexCount;
private double sumOfLocalClusteringCoefficient;

/**
* The unique id is required because Flink's accumulator namespace is
* shared among all operators.
*
* @param id unique string used for accumulator names
*/
public AverageClusteringCoefficientHelper(String id) {
this.id = id;
}

@Override
public void configure(Configuration parameters) {}

@Override
public void open(int taskNumber, int numTasks) throws IOException {}

@Override
public void writeRecord(LocalClusteringCoefficient.Result<T> record) throws IOException {
vertexCount++;
Expand All @@ -138,8 +121,8 @@ public void writeRecord(LocalClusteringCoefficient.Result<T> record) throws IOEx

@Override
public void close() throws IOException {
getRuntimeContext().addAccumulator(id + "-0", new LongCounter(vertexCount));
getRuntimeContext().addAccumulator(id + "-1", new DoubleCounter(sumOfLocalClusteringCoefficient));
addAccumulator(VERTEX_COUNT, new LongCounter(vertexCount));
addAccumulator(SUM_OF_LOCAL_CLUSTERING_COEFFICIENT, new DoubleCounter(sumOfLocalClusteringCoefficient));
}
}

Expand All @@ -148,7 +131,6 @@ public void close() throws IOException {
*/
public static class Result {
private long vertexCount;

private double averageLocalClusteringCoefficient;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,14 @@

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.accumulators.DoubleCounter;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.graph.AbstractGraphAnalytic;
import org.apache.flink.graph.Graph;
import org.apache.flink.graph.AnalyticHelper;
import org.apache.flink.graph.library.clustering.undirected.AverageClusteringCoefficient.Result;
import org.apache.flink.types.CopyableValue;
import org.apache.flink.util.AbstractID;

import java.io.IOException;

Expand All @@ -47,7 +44,11 @@
public class AverageClusteringCoefficient<K extends Comparable<K> & CopyableValue<K>, VV, EV>
extends AbstractGraphAnalytic<K, VV, EV, Result> {

private String id = new AbstractID().toString();
private static final String VERTEX_COUNT = "vertexCount";

private static final String SUM_OF_LOCAL_CLUSTERING_COEFFICIENT = "sumOfLocalClusteringCoefficient";

private AverageClusteringCoefficientHelper<K> averageClusteringCoefficientHelper;

// Optional configuration
private int littleParallelism = PARALLELISM_DEFAULT;
Expand Down Expand Up @@ -80,19 +81,19 @@ public AverageClusteringCoefficient<K, VV, EV> run(Graph<K, VV, EV> input)
.run(new LocalClusteringCoefficient<K, VV, EV>()
.setLittleParallelism(littleParallelism));

averageClusteringCoefficientHelper = new AverageClusteringCoefficientHelper<>();

localClusteringCoefficient
.output(new AverageClusteringCoefficientHelper<K>(id))
.output(averageClusteringCoefficientHelper)
.name("Average clustering coefficient");

return this;
}

@Override
public Result getResult() {
JobExecutionResult res = env.getLastJobExecutionResult();

long vertexCount = res.getAccumulatorResult(id + "-0");
double sumOfLocalClusteringCoefficient = res.getAccumulatorResult(id + "-1");
long vertexCount = averageClusteringCoefficientHelper.getAccumulator(env, VERTEX_COUNT);
double sumOfLocalClusteringCoefficient = averageClusteringCoefficientHelper.getAccumulator(env, SUM_OF_LOCAL_CLUSTERING_COEFFICIENT);

return new Result(vertexCount, sumOfLocalClusteringCoefficient);
}
Expand All @@ -103,28 +104,10 @@ public Result getResult() {
* @param <T> ID type
*/
private static class AverageClusteringCoefficientHelper<T>
extends RichOutputFormat<LocalClusteringCoefficient.Result<T>> {
private final String id;

extends AnalyticHelper<LocalClusteringCoefficient.Result<T>> {
private long vertexCount;
private double sumOfLocalClusteringCoefficient;

/**
* The unique id is required because Flink's accumulator namespace is
* shared among all operators.
*
* @param id unique string used for accumulator names
*/
public AverageClusteringCoefficientHelper(String id) {
this.id = id;
}

@Override
public void configure(Configuration parameters) {}

@Override
public void open(int taskNumber, int numTasks) throws IOException {}

@Override
public void writeRecord(LocalClusteringCoefficient.Result<T> record) throws IOException {
vertexCount++;
Expand All @@ -138,8 +121,8 @@ public void writeRecord(LocalClusteringCoefficient.Result<T> record) throws IOEx

@Override
public void close() throws IOException {
getRuntimeContext().addAccumulator(id + "-0", new LongCounter(vertexCount));
getRuntimeContext().addAccumulator(id + "-1", new DoubleCounter(sumOfLocalClusteringCoefficient));
addAccumulator(VERTEX_COUNT, new LongCounter(vertexCount));
addAccumulator(SUM_OF_LOCAL_CLUSTERING_COEFFICIENT, new DoubleCounter(sumOfLocalClusteringCoefficient));
}
}

Expand All @@ -148,7 +131,6 @@ public void close() throws IOException {
*/
public static class Result {
private long vertexCount;

private double averageLocalClusteringCoefficient;

/**
Expand Down
Loading

0 comments on commit f025c45

Please sign in to comment.