Skip to content

Commit

Permalink
Use at-testset in unicode tests. (JuliaLang#20445)
Browse files Browse the repository at this point in the history
  • Loading branch information
pkofod authored and StefanKarpinski committed Feb 6, 2017
1 parent 7f34449 commit 0fa0c83
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 214 deletions.
10 changes: 6 additions & 4 deletions test/unicode/UnicodeError.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

let io = IOBuffer()
show(io, UnicodeError(Base.UTF_ERR_SHORT, 1, 10))
check = "UnicodeError: invalid UTF-8 sequence starting at index 1 (0xa missing one or more continuation bytes)"
@test String(take!(io)) == check
@testset "invalid utf8" begin
let io = IOBuffer()
show(io, UnicodeError(Base.UTF_ERR_SHORT, 1, 10))
check = "UnicodeError: invalid UTF-8 sequence starting at index 1 (0xa missing one or more continuation bytes)"
@test String(take!(io)) == check
end
end
70 changes: 37 additions & 33 deletions test/unicode/utf8.jl
Original file line number Diff line number Diff line change
@@ -1,43 +1,47 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

## Test for CESU-8 sequences

let ch = 0x10000
for hi = 0xd800:0xdbff
for lo = 0xdc00:0xdfff
@test convert(String, Vector{UInt8}(String(Char[hi, lo]))) == string(Char(ch))
ch += 1
@testset "cesu8 input" begin
let ch = 0x10000
for hi = 0xd800:0xdbff
for lo = 0xdc00:0xdfff
@test convert(String, Vector{UInt8}(String(Char[hi, lo]))) == string(Char(ch))
ch += 1
end
end
end
end


let str = String(b"this is a test\xed\x80")
@test next(str, 15) == ('\ufffd', 16)
@test_throws BoundsError getindex(str, 0:3)
@test_throws BoundsError getindex(str, 17:18)
@test_throws BoundsError getindex(str, 2:17)
@test_throws UnicodeError getindex(str, 16:17)
@test string(Char(0x110000)) == "\ufffd"
@test convert(String, b"this is a test\xed\x80\x80") == "this is a test\ud000"
@testset "string indexing" begin
let str = String(b"this is a test\xed\x80")
@test next(str, 15) == ('\ufffd', 16)
@test_throws BoundsError getindex(str, 0:3)
@test_throws BoundsError getindex(str, 17:18)
@test_throws BoundsError getindex(str, 2:17)
@test_throws UnicodeError getindex(str, 16:17)
@test string(Char(0x110000)) == "\ufffd"
end
end

## Reverse of String
@test reverse("") == ""
@test reverse("a") == "a"
@test reverse("abc") == "cba"
@test reverse("xyz\uff\u800\uffff\U10ffff") == "\U10ffff\uffff\u800\uffzyx"
for str in [
b"xyz\xc1",
b"xyz\xd0",
b"xyz\xe0",
b"xyz\xed\x80",
b"xyz\xf0",
b"xyz\xf0\x80",
b"xyz\xf0\x80\x80"
]
@test_throws UnicodeError reverse(String(str))
@testset "string reverse" begin
@test reverse("") == ""
@test reverse("a") == "a"
@test reverse("abc") == "cba"
@test reverse("xyz\uff\u800\uffff\U10ffff") == "\U10ffff\uffff\u800\uffzyx"
for str in [
b"xyz\xc1",
b"xyz\xd0",
b"xyz\xe0",
b"xyz\xed\x80",
b"xyz\xf0",
b"xyz\xf0\x80",
b"xyz\xf0\x80\x80"
]
@test_throws UnicodeError reverse(String(str))
end
end

## Specifically check UTF-8 string whose lead byte is same as a surrogate
@test convert(String, b"\xed\x9f\xbf") == "\ud7ff"
@testset "string convert" begin
@test convert(String, b"this is a test\xed\x80\x80") == "this is a test\ud000"
## Specifically check UTF-8 string whose lead byte is same as a surrogate
@test convert(String, b"\xed\x9f\xbf") == "\ud7ff"
end
Loading

0 comments on commit 0fa0c83

Please sign in to comment.