Skip to content

Commit

Permalink
http_archive: verify that unicode characters are OK in tar archives
Browse files Browse the repository at this point in the history
Add a test verifying that http_archive can extract a tar archive
containing unicode characters. While such files cannot be referred
to by labels, it is still important that the archive can be extracted.
Also fix that use case on Darwin, by appropriately reencoding the string,
so that the Files java standard library can encode it back to what we
had in the first place.

Work-around for bazelbuild#1653, showing that http_archive from @bazel_tools can
be used; however, the issue still remains for zip archives.

Change-Id: If944203bf618c21705af676347d8591ab015d559
PiperOrigin-RevId: 183987726
  • Loading branch information
aehlig authored and Copybara-Service committed Jan 31, 2018
1 parent 0fba306 commit 7e6837c
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@
import com.google.common.base.Optional;
import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor;
import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException;
import com.google.devtools.build.lib.util.OS;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.skyframe.SkyFunctionException.Transience;

import java.util.Date;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.Date;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;

/**
* Common code for unarchiving a compressed TAR file.
Expand Down Expand Up @@ -79,8 +79,20 @@ public Path decompress(DecompressorDescriptor descriptor) throws RepositoryFunct
filename, descriptor.repositoryPath().getRelative(linkName));
}
} else {
Files.copy(
tarStream, filename.getPathFile().toPath(), StandardCopyOption.REPLACE_EXISTING);
if (OS.getCurrent() == OS.DARWIN) {
// On Darwin, Files interprets file names as utf8, regardless of the standard
// encoding, so we have to create a unicode string that, when ecoded utf-8 gives
// the same octets back; in this way, we can have Files.copy to behave consistent
// with the file name interpretation of com.google.devtools.build.lib.vfs.
String filenameForFiles =
new String(
filename.getPathFile().toPath().toString().getBytes("ISO-8859-1"), "UTF-8");
Files.copy(
tarStream, Paths.get(filenameForFiles), StandardCopyOption.REPLACE_EXISTING);
} else {
Files.copy(
tarStream, filename.getPathFile().toPath(), StandardCopyOption.REPLACE_EXISTING);
}
filename.chmod(entry.getMode());

// This can only be done on real files, not links, or it will skip the reader to
Expand Down
27 changes: 27 additions & 0 deletions src/test/shell/bazel/external_integration_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,33 @@ EOF
|| fail 'Expected @ext//:foo and //:foo not to conflict'
}

function test_unicode_characters_tar() {
# Verify that archives with the utf-8 encoding of unicode-characters in the
# file name can be decompressed.
WRKDIR=$(mktemp -d "${TEST_TMPDIR}/testXXXXXX")
cd "${WRKDIR}"
mkdir ext
# F0 9F 8D 82 is the UTF-8 encoding of the 'FALLEN LEAF' (U+1F342) unicode
# symbol
echo 'leaves' > ext/$'unicode-\xF0\x9F\x8D\x83.txt'
echo 'Hello World' > ext/hello.txt
echo 'exports_files(["hello.txt"])' > ext/BUILD
tar cvf ext.tar ext
rm -rf ext

mkdir main
cd main
cat > WORKSPACE <<EOF
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name="ext",
strip_prefix="ext",
urls=["file://${WRKDIR}/ext.tar"],
)
EOF
bazel build '@ext//:hello.txt' || fail "expected success"
}

function test_missing_build() {
mkdir ext
echo foo> ext/foo
Expand Down

0 comments on commit 7e6837c

Please sign in to comment.