Skip to content

Commit

Permalink
Fix Write transform primitive display data
Browse files Browse the repository at this point in the history
(cherry picked from commit ea8019e)
  • Loading branch information
swegner committed May 16, 2016
1 parent 8b2bbce commit 057c841
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.dataflow.io;

import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasValue;

import static org.hamcrest.Matchers.hasItem;
import static org.hamcrest.Matchers.startsWith;
import static org.junit.Assert.assertThat;

import org.apache.beam.runners.dataflow.DataflowPipelineRunner;
import org.apache.beam.runners.dataflow.testing.TestDataflowPipelineOptions;
import org.apache.beam.runners.dataflow.transforms.DataflowDisplayDataEvaluator;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.transforms.display.DisplayDataEvaluator;
import org.apache.beam.sdk.util.GcsUtil;
import org.apache.beam.sdk.util.TestCredential;
import org.apache.beam.sdk.util.gcsfs.GcsPath;

import com.google.common.collect.ImmutableList;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.util.List;
import java.util.Set;

/**
* {@link DataflowPipelineRunner} specific tests for TextIO Read and Write transforms.
*/
@RunWith(JUnit4.class)
public class DataflowTextIOTest {
private TestDataflowPipelineOptions buildTestPipelineOptions() {
TestDataflowPipelineOptions options =
PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
options.setGcpCredential(new TestCredential());
return options;
}

private GcsUtil buildMockGcsUtil() throws IOException {
GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);

// Any request to open gets a new bogus channel
Mockito
.when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
.then(new Answer<SeekableByteChannel>() {
@Override
public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
return FileChannel.open(
Files.createTempFile("channel-", ".tmp"),
StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
}
});

// Any request for expansion returns a list containing the original GcsPath
// This is required to pass validation that occurs in TextIO during apply()
Mockito
.when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
.then(new Answer<List<GcsPath>>() {
@Override
public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
}
});

return mockGcsUtil;
}

/**
* This tests a few corner cases that should not crash.
*/
@Test
public void testGoodWildcards() throws Exception {
TestDataflowPipelineOptions options = buildTestPipelineOptions();
options.setGcsUtil(buildMockGcsUtil());

Pipeline pipeline = Pipeline.create(options);

applyRead(pipeline, "gs://bucket/foo");
applyRead(pipeline, "gs://bucket/foo/");
applyRead(pipeline, "gs://bucket/foo/*");
applyRead(pipeline, "gs://bucket/foo/?");
applyRead(pipeline, "gs://bucket/foo/[0-9]");
applyRead(pipeline, "gs://bucket/foo/*baz*");
applyRead(pipeline, "gs://bucket/foo/*baz?");
applyRead(pipeline, "gs://bucket/foo/[0-9]baz?");
applyRead(pipeline, "gs://bucket/foo/baz/*");
applyRead(pipeline, "gs://bucket/foo/baz/*wonka*");
applyRead(pipeline, "gs://bucket/foo/*baz/wonka*");
applyRead(pipeline, "gs://bucket/foo*/baz");
applyRead(pipeline, "gs://bucket/foo?/baz");
applyRead(pipeline, "gs://bucket/foo[0-9]/baz");

// Check that running doesn't fail.
pipeline.run();
}

private void applyRead(Pipeline pipeline, String path) {
pipeline.apply("Read(" + path + ")", TextIO.Read.from(path));
}

@Test
public void testPrimitiveDisplayData() {
DisplayDataEvaluator evaluator = DataflowDisplayDataEvaluator.create();

TextIO.Write.Bound<?> write = TextIO.Write.to("foobar");

Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(write);
assertThat("TextIO.Write should include the file prefix in its primitive display data",
displayData, hasItem(hasDisplayItem(hasValue(startsWith("foobar")))));
}
}
5 changes: 5 additions & 0 deletions sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,11 @@ public void finishBundle(Context c) throws Exception {
c.output(result);
}
}

@Override
public void populateDisplayData(DisplayData.Builder builder) {
Write.Bound.this.populateDisplayData(builder);
}
}).withSideInputs(writeOperationView))
.setCoder(writeOperation.getWriterResultCoder());

Expand Down

0 comments on commit 057c841

Please sign in to comment.