Skip to content

Commit

Permalink
Use ICU to implement basic string functions.
Browse files Browse the repository at this point in the history
Swift SVN r25803
  • Loading branch information
stormbrew committed Mar 6, 2015
1 parent 39c7b6c commit dd92de7
Show file tree
Hide file tree
Showing 7 changed files with 446 additions and 45 deletions.
29 changes: 29 additions & 0 deletions cmake/modules/FindICU.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Find libicu's libraries

include(FindPackageHandleStandardArgs)

find_package(PkgConfig)

set(ICU_REQUIRED)
foreach(MODULE ${ICU_FIND_COMPONENTS})
string(TOUPPER "${MODULE}" MODULE)
string(TOLOWER "${MODULE}" module)
list(APPEND ICU_REQUIRED
ICU_${MODULE}_INCLUDE_DIR ICU_${MODULE}_LIBRARIES)

pkg_check_modules(PC_ICU_${MODULE} QUIET icu-${module})
if(${PC_ICU_${MODULE}_FOUND})
set(ICU_${MODULE}_DEFINITIONS ${PC_ICU_${MODULE}_CFLAGS_OTHER})

find_path(ICU_${MODULE}_INCLUDE_DIR unicode
HINTS ${PC_ICU_${MODULE}_INCLUDEDIR} ${PC_ICU_${MODULE}_INCLUDE_DIRS})
set(ICU_${MODULE}_INCLUDE_DIR ${ICU_${MODULE}_INCLUDE_DIR})

find_library(ICU_${MODULE}_LIBRARY NAMES icu${module}
HINTS ${PC_ICU_${MODULE}_LIBDIR} ${PC_ICU_${MODULE}_LIBRARY_DIRS})
set(ICU_${MODULE}_LIBRARIES ${ICU_${MODULE}_LIBRARY})
endif()
endforeach()

find_package_handle_standard_args(ICU DEFAULT_MSG ${ICU_REQUIRED})
mark_as_advanced(${ICU_REQUIRED})
161 changes: 118 additions & 43 deletions stdlib/core/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -302,36 +302,14 @@ public func ==(lhs: String, rhs: String) -> Bool {
UnsafeMutablePointer(rhs._core.startASCII),
rhs._core.count) == 0
}
#if _runtime(_ObjC)
// Note: this operation should be consistent with equality comparison of
// Character.
return _stdlib_compareNSStringDeterministicUnicodeCollation(
lhs._bridgeToObjectiveCImpl(), rhs._bridgeToObjectiveCImpl()) == 0
#else
// FIXME: Actually implement. For now, all strings are unequal.
// rdar://problem/18878343
return false
#endif
return lhs._compareString(rhs) == 0
}

extension String : Comparable {
}

extension String {
@inline(never) @semantics("stdlib_binary_only") // Hide the CF dependency
public // @testable
func _lessThanUTF16(rhs: String) -> Bool {
#if _runtime(_ObjC)
return _stdlib_compareNSStringDeterministicUnicodeCollation(
self._stdlib_binary_bridgeToObjectiveCImpl(),
rhs._stdlib_binary_bridgeToObjectiveCImpl()) < 0
#else
// FIXME: Actually implement. For now, all strings are unequal
// rdar://problem/18878343
return false
#endif
}

/// This is consistent with Foundation, but incorrect as defined by Unicode.
/// Unicode weights some ASCII punctuation in a different order than ASCII
/// value. Such as:
Expand All @@ -342,24 +320,75 @@ extension String {
/// 0027 ; [*02F8.0020.0002] # APOSTROPHE
/// precondition: both self and rhs are ASCII strings
public // @testable
func _lessThanASCII(rhs: String) -> Bool {
let compare = memcmp(
func _compareASCII(rhs: String) -> Int {
var compare = Int(memcmp(
UnsafeMutablePointer(self._core.startASCII),
UnsafeMutablePointer(rhs._core.startASCII),
min(self._core.count, rhs._core.count))
min(self._core.count, rhs._core.count)))
if compare == 0 {
return self._core.count < rhs._core.count
} else {
return compare < 0
compare = self._core.count - rhs._core.count
}
// This efficiently normalizes the result to -1, 0, or 1 to match the
// behaviour of NSString's compare function.
return (compare > 0 ? 1 : 0) - (compare < 0 ? 1 : 0)
}
#endif

/// Compares two strings with the Unicode Collation Algorithm
@inline(never) @semantics("stdlib_binary_only") // Hide the CF/ICU dependency
public // @testable
func _compareDeterministicUnicodeCollation(rhs: String) -> Int {
// Note: this operation should be consistent with equality comparison of
// Character.
#if _runtime(_ObjC)
return Int(_stdlib_compareNSStringDeterministicUnicodeCollation(
_bridgeToObjectiveCImpl(), rhs._bridgeToObjectiveCImpl()))
#else
switch (_core.isASCII, rhs._core.isASCII) {
case (true, false):
let lhsPtr = UnsafePointer<Int8>(_core.startASCII)
let rhsPtr = UnsafePointer<UTF16.CodeUnit>(rhs._core.startUTF16)

return Int(_swift_stdlib_unicode_compare_utf8_utf16(
lhsPtr, Int32(_core.count), rhsPtr, Int32(rhs._core.count)))
case (false, true):
// Just invert it and recurse for this case.
return -rhs._compareDeterministicUnicodeCollation(self)
case (false, false):
let lhsPtr = UnsafePointer<UTF16.CodeUnit>(_core.startUTF16)
let rhsPtr = UnsafePointer<UTF16.CodeUnit>(rhs._core.startUTF16)

return Int(_swift_stdlib_unicode_compare_utf16_utf16(
lhsPtr, Int32(_core.count),
rhsPtr, Int32(rhs._core.count)))
case (true, true):
let lhsPtr = UnsafePointer<Int8>(_core.startASCII)
let rhsPtr = UnsafePointer<Int8>(rhs._core.startASCII)

return Int(_swift_stdlib_unicode_compare_utf8_utf8(
lhsPtr, Int32(_core.count),
rhsPtr, Int32(rhs._core.count)))
default:
_preconditionFailure("Unreachable but necessary case for exhaustive switch")
}
#endif
}

public // @testable
func _compareString(rhs: String) -> Int {
#if _runtime(_ObjC)
// We only want to perform this optimization on objc runtimes. Elsewhere,
// we will make it follow the unicode collation algorithm even for ASCII.
if (_core.isASCII && rhs._core.isASCII) {
return _compareASCII(rhs)
}
#endif
return _compareDeterministicUnicodeCollation(rhs)
}
}

public func <(lhs: String, rhs: String) -> Bool {
if lhs._core.isASCII && rhs._core.isASCII {
return lhs._lessThanASCII(rhs)
}
return lhs._lessThanUTF16(rhs)
return lhs._compareString(rhs) < 0
}

// Support for copy-on-write
Expand Down Expand Up @@ -404,14 +433,14 @@ extension String : Hashable {
/// different invocations of the same program. Do not persist the
/// hash value across program runs.
public var hashValue: Int {
#if _runtime(_ObjC)
// Mix random bits into NSString's hash so that clients don't rely on
// Swift.String.hashValue and NSString.hash being the same.
#if arch(i386) || arch(arm)
let hashOffset = Int(bitPattern: 0x88dd_cc21)
#else
let hashOffset = Int(bitPattern: 0x429b_1266_88dd_cc21)
#endif
#if _runtime(_ObjC)
// FIXME(performance): constructing a temporary NSString is extremely
// wasteful and inefficient.
let cocoaString = unsafeBitCast(
Expand All @@ -424,9 +453,15 @@ extension String : Hashable {
return hashOffset ^ _stdlib_NSStringNFDHashValue(cocoaString)
}
#else
// FIXME: Actually implement. For now, all strings have the same hash.
// rdar://problem/18878343
return hashOffset
if self._core.isASCII {
return _swift_stdlib_unicode_hash_ascii(
UnsafeMutablePointer<Int8>(_core.startASCII),
Int32(_core.count))
} else {
return _swift_stdlib_unicode_hash(
UnsafeMutablePointer<UInt16>(_core.startUTF16),
Int32(_core.count))
}
#endif
}
}
Expand Down Expand Up @@ -844,6 +879,50 @@ func _stdlib_NSStringLowercaseString(str: AnyObject) -> _CocoaStringType

@asmname("swift_stdlib_NSStringUppercaseString")
func _stdlib_NSStringUppercaseString(str: AnyObject) -> _CocoaStringType
#else
internal func _nativeUnicodeLowercaseString(str: String) -> String {
var buffer = _StringBuffer(
capacity: str._core.count, initialSize: str._core.count, elementWidth: 2)

// Try to write it out to the same length.
var dest = UnsafeMutablePointer<UTF16.CodeUnit>(buffer.start)
let z = _swift_stdlib_unicode_strToLower(
dest, Int32(str._core.count),
str._core.startUTF16, Int32(str._core.count))
let correctSize = Int(z)

// If more space is needed, do it again with the correct buffer size.
if correctSize != str._core.count {
buffer = _StringBuffer(
capacity: correctSize, initialSize: correctSize, elementWidth: 2)
_swift_stdlib_unicode_strToLower(
dest, Int32(correctSize), str._core.startUTF16, Int32(str._core.count))
}

return String(_storage: buffer)
}

internal func _nativeUnicodeUppercaseString(str: String) -> String {
var buffer = _StringBuffer(
capacity: str._core.count, initialSize: str._core.count, elementWidth: 2)

// Try to write it out to the same length.
var dest = UnsafeMutablePointer<UTF16.CodeUnit>(buffer.start)
let z = _swift_stdlib_unicode_strToUpper(
dest, Int32(str._core.count),
str._core.startUTF16, Int32(str._core.count))
let correctSize = Int(z)

// If more space is needed, do it again with the correct buffer size.
if correctSize != str._core.count {
buffer = _StringBuffer(
capacity: correctSize, initialSize: correctSize, elementWidth: 2)
_swift_stdlib_unicode_strToUpper(
dest, Int32(correctSize), str._core.startUTF16, Int32(str._core.count))
}

return String(_storage: buffer)
}
#endif

// Unicode algorithms
Expand Down Expand Up @@ -904,9 +983,7 @@ extension String {
return _cocoaStringToSwiftString_NonASCII(
_stdlib_NSStringLowercaseString(self._bridgeToObjectiveCImpl()))
#else
// FIXME: Actually implement. For now, don't change case.
// rdar://problem/18878343
return self
return _nativeUnicodeLowercaseString(self)
#endif
}

Expand All @@ -933,9 +1010,7 @@ extension String {
return _cocoaStringToSwiftString_NonASCII(
_stdlib_NSStringUppercaseString(self._bridgeToObjectiveCImpl()))
#else
// FIXME: Actually implement. For now, don't change case.
// rdar://problem/18878343
return self
return _nativeUnicodeUppercaseString(self)
#endif
}
}
Expand Down
17 changes: 16 additions & 1 deletion stdlib/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,27 @@ if (SWIFT_RUNTIME_ENABLE_DTRACE)
endif()

set(swift_runtime_objc_sources)
if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
set(swift_runtime_unicode_normalization_sources)
set(swift_runtime_private_link_libraries)
if("${SWIFT_HOST_VARIANT_SDK}" STREQUAL "OSX")
set(swift_runtime_objc_sources
Availability.mm
SwiftObject.mm
SwiftNativeNSXXXBase.mm.gyb
Reflection.mm)
set(LLVM_OPTIONAL_SOURCES
UnicodeNormalization.cpp)
else()
find_package(ICU REQUIRED COMPONENTS uc i18n)
set(swift_runtime_unicode_normalization_sources
UnicodeNormalization.cpp)
set(swift_runtime_private_link_libraries
${ICU_UC_LIBRARIES} ${ICU_I18N_LIBRARIES})
include_directories(
${ICU_UC_INCLUDE_DIRS} ${ICU_I18N_LIBRARIES})
endif()


add_swift_library(swiftRuntime IS_STDLIB IS_STDLIB_CORE
Casting.cpp
Demangle.cpp
Expand All @@ -52,7 +65,9 @@ add_swift_library(swiftRuntime IS_STDLIB IS_STDLIB_CORE
${swift_runtime_objc_sources}
${swift_runtime_dtrace_sources}
${swift_runtime_leaks_sources}
${swift_runtime_unicode_normalization_sources}
C_COMPILE_FLAGS ${swift_runtime_compile_flags}
PRIVATE_LINK_LIBRARIES ${swift_runtime_private_link_libraries}
INSTALL_IN_COMPONENT stdlib)

foreach(sdk ${SWIFT_CONFIGURED_SDKS})
Expand Down
Loading

0 comments on commit dd92de7

Please sign in to comment.