Skip to content

Commit

Permalink
Merge branch 'swift' into main-dev
Browse files Browse the repository at this point in the history
Co-authored-by: Vatsal Manot <[email protected]>
  • Loading branch information
ashvardanian and vmanot committed Feb 5, 2024
2 parents ff6a660 + 1c4ffda commit 46e957c
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 56 deletions.
5 changes: 4 additions & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import PackageDescription
let package = Package(
name: "StringZilla",
products: [
.library(name: "StringZilla", targets: ["StringZillaC", "StringZilla"])
.library(
name: "StringZilla",
targets: ["StringZillaC", "StringZilla"]
)
],
targets: [
.target(
Expand Down
1 change: 1 addition & 0 deletions include/stringzilla/spm-fix.c
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

131 changes: 76 additions & 55 deletions swift/StringProtocol+StringZilla.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
// - Stable pointer into a C string without copying it? Aug 2021
// https://forums.swift.org/t/stable-pointer-into-a-c-string-without-copying-it/51244/1

import Foundation
import StringZillaC

// We need to link the standard libraries.
Expand All @@ -25,10 +24,12 @@ import Darwin.C
#endif

/// Protocol defining a single-byte data type.
protocol SingleByte {}
fileprivate protocol SingleByte {}

extension UInt8: SingleByte {}
extension Int8: SingleByte {} // This would match `CChar` as well.

@usableFromInline
enum StringZillaError: Error {
case contiguousStorageUnavailable
case memoryAllocationFailed
Expand All @@ -51,47 +52,52 @@ enum StringZillaError: Error {
/// https://developer.apple.com/documentation/swift/stringprotocol/withcstring(_:)
/// https://developer.apple.com/documentation/swift/stringprotocol/withcstring(encodedas:_:)
/// https://developer.apple.com/documentation/swift/stringprotocol/data(using:allowlossyconversion:)
public protocol SZViewable {
associatedtype SZIndex

public protocol StringZillaViewable: Collection {
/// A type that represents a position in the collection.
///
/// Executes a closure with a pointer to the string's UTF8 C representation and its length.
///
/// - Parameters:
/// - body: A closure that takes a pointer to a C string and its length.
/// - Throws: Can throw an error.
/// - Returns: Returns a value of type R, which is the result of the closure.
func szScope<R>(_ body: (sz_cptr_t, sz_size_t) throws -> R) rethrows -> R
func withStringZillaScope<R>(_ body: (sz_cptr_t, sz_size_t) throws -> R) rethrows -> R

/// Calculates the offset index for a given byte pointer relative to a start pointer.
///
/// - Parameters:
/// - bytePointer: A pointer to the byte for which the offset is calculated.
/// - startPointer: The starting pointer for the calculation, previously obtained from `szScope`.
/// - Returns: The calculated index offset.
func szOffset(forByte bytePointer: sz_cptr_t, after startPointer: sz_cptr_t) -> SZIndex
func stringZillaByteOffset(forByte bytePointer: sz_cptr_t, after startPointer: sz_cptr_t) -> Index
}

extension String: SZViewable {
public typealias SZIndex = String.Index
extension String: StringZillaViewable {
public typealias Index = String.Index

public func szScope<R>(_ body: (sz_cptr_t, sz_size_t) throws -> R) rethrows -> R {
let cLength = sz_size_t(self.lengthOfBytes(using: .utf8))
@_transparent
public func withStringZillaScope<R>(_ body: (sz_cptr_t, sz_size_t) throws -> R) rethrows -> R {
let cLength = sz_size_t(utf8.count)
return try self.withCString { cString in
try body(cString, cLength)
}
}

public func szOffset(forByte bytePointer: sz_cptr_t, after startPointer: sz_cptr_t) -> SZIndex {
return self.index(self.startIndex, offsetBy: bytePointer - startPointer)
@_transparent
public func stringZillaByteOffset(forByte bytePointer: sz_cptr_t, after startPointer: sz_cptr_t) -> Index {
self.utf8.index(self.utf8.startIndex, offsetBy: bytePointer - startPointer)
}
}

extension Substring.UTF8View: SZViewable {
public typealias SZIndex = Substring.UTF8View.Index
extension Substring.UTF8View: StringZillaViewable {
public typealias Index = Substring.UTF8View.Index

/// Executes a closure with a pointer to the UTF8View's contiguous storage of single-byte elements (UTF-8 code units).
/// - Parameters:
/// - body: A closure that takes a pointer to the contiguous storage and its size.
/// - Throws: An error if the storage is not contiguous.
public func szScope<R>(_ body: (sz_cptr_t, sz_size_t) throws -> R) rethrows -> R {
@_transparent
public func withStringZillaScope<R>(_ body: (sz_cptr_t, sz_size_t) throws -> R) rethrows -> R {
return try withContiguousStorageIfAvailable { bufferPointer -> R in
let cLength = sz_size_t(bufferPointer.count)
let cString = UnsafeRawPointer(bufferPointer.baseAddress!).assumingMemoryBound(to: CChar.self)
Expand All @@ -106,19 +112,20 @@ extension Substring.UTF8View: SZViewable {
/// - bytePointer: A pointer to the byte for which the offset is calculated.
/// - startPointer: The starting pointer for the calculation, previously obtained from `szScope`.
/// - Returns: The calculated index offset.
public func szOffset(forByte bytePointer: sz_cptr_t, after startPointer: sz_cptr_t) -> SZIndex {
@_transparent
public func stringZillaByteOffset(forByte bytePointer: sz_cptr_t, after startPointer: sz_cptr_t) -> Index {
return self.index(self.startIndex, offsetBy: bytePointer - startPointer)
}
}

extension String.UTF8View: SZViewable {
public typealias SZIndex = String.UTF8View.Index
extension String.UTF8View: StringZillaViewable {
public typealias Index = String.UTF8View.Index

/// Executes a closure with a pointer to the UTF8View's contiguous storage of single-byte elements (UTF-8 code units).
/// - Parameters:
/// - body: A closure that takes a pointer to the contiguous storage and its size.
/// - Throws: An error if the storage is not contiguous.
public func szScope<R>(_ body: (sz_cptr_t, sz_size_t) throws -> R) rethrows -> R {
public func withStringZillaScope<R>(_ body: (sz_cptr_t, sz_size_t) throws -> R) rethrows -> R {
return try withContiguousStorageIfAvailable { bufferPointer -> R in
let cLength = sz_size_t(bufferPointer.count)
let cString = UnsafeRawPointer(bufferPointer.baseAddress!).assumingMemoryBound(to: CChar.self)
Expand All @@ -133,22 +140,24 @@ extension String.UTF8View: SZViewable {
/// - bytePointer: A pointer to the byte for which the offset is calculated.
/// - startPointer: The starting pointer for the calculation, previously obtained from `szScope`.
/// - Returns: The calculated index offset.
public func szOffset(forByte bytePointer: sz_cptr_t, after startPointer: sz_cptr_t) -> SZIndex {
public func stringZillaByteOffset(forByte bytePointer: sz_cptr_t, after startPointer: sz_cptr_t) -> Index {
return self.index(self.startIndex, offsetBy: bytePointer - startPointer)
}
}

public extension SZViewable {
public extension StringZillaViewable {

/// Finds the first occurrence of the specified substring within the receiver.
/// - Parameter needle: The substring to search for.
/// - Returns: The index of the found occurrence, or `nil` if not found.
func findFirst(substring needle: any SZViewable) -> SZIndex? {
var result: SZIndex?
szScope { hPointer, hLength in
needle.szScope { nPointer, nLength in
@_specialize(where Self == String, S == String)
@_specialize(where Self == String.UTF8View, S == String.UTF8View)
func findFirst<S: StringZillaViewable>(substring needle: S) -> Index? {
var result: Index?
withStringZillaScope { hPointer, hLength in
needle.withStringZillaScope { nPointer, nLength in
if let matchPointer = sz_find(hPointer, hLength, nPointer, nLength) {
result = self.szOffset(forByte: matchPointer, after: hPointer)
result = self.stringZillaByteOffset(forByte: matchPointer, after: hPointer)
}
}
}
Expand All @@ -158,12 +167,14 @@ public extension SZViewable {
/// Finds the last occurrence of the specified substring within the receiver.
/// - Parameter needle: The substring to search for.
/// - Returns: The index of the found occurrence, or `nil` if not found.
func findLast(substring needle: any SZViewable) -> SZIndex? {
var result: SZIndex?
szScope { hPointer, hLength in
needle.szScope { nPointer, nLength in
@_specialize(where Self == String, S == String)
@_specialize(where Self == String.UTF8View, S == String.UTF8View)
func findLast<S: StringZillaViewable>(substring needle: S) -> Index? {
var result: Index?
withStringZillaScope { hPointer, hLength in
needle.withStringZillaScope { nPointer, nLength in
if let matchPointer = sz_rfind(hPointer, hLength, nPointer, nLength) {
result = self.szOffset(forByte: matchPointer, after: hPointer)
result = self.stringZillaByteOffset(forByte: matchPointer, after: hPointer)
}
}
}
Expand All @@ -173,12 +184,14 @@ public extension SZViewable {
/// Finds the first occurrence of the specified character-set members within the receiver.
/// - Parameter characters: A string-like collection of characters to match.
/// - Returns: The index of the found occurrence, or `nil` if not found.
func findFirst(characterFrom characters: any SZViewable) -> SZIndex? {
var result: SZIndex?
szScope { hPointer, hLength in
characters.szScope { nPointer, nLength in
@_specialize(where Self == String, S == String)
@_specialize(where Self == String.UTF8View, S == String.UTF8View)
func findFirst<S: StringZillaViewable>(characterFrom characters: S) -> Index? {
var result: Index?
withStringZillaScope { hPointer, hLength in
characters.withStringZillaScope { nPointer, nLength in
if let matchPointer = sz_find_char_from(hPointer, hLength, nPointer, nLength) {
result = self.szOffset(forByte: matchPointer, after: hPointer)
result = self.stringZillaByteOffset(forByte: matchPointer, after: hPointer)
}
}
}
Expand All @@ -188,12 +201,14 @@ public extension SZViewable {
/// Finds the last occurrence of the specified character-set members within the receiver.
/// - Parameter characters: A string-like collection of characters to match.
/// - Returns: The index of the found occurrence, or `nil` if not found.
func findLast(characterFrom characters: any SZViewable) -> SZIndex? {
var result: SZIndex?
szScope { hPointer, hLength in
characters.szScope { nPointer, nLength in
@_specialize(where Self == String, S == String)
@_specialize(where Self == String.UTF8View, S == String.UTF8View)
func findLast<S: StringZillaViewable>(characterFrom characters: S) -> Index? {
var result: Index?
withStringZillaScope { hPointer, hLength in
characters.withStringZillaScope { nPointer, nLength in
if let matchPointer = sz_rfind_char_from(hPointer, hLength, nPointer, nLength) {
result = self.szOffset(forByte: matchPointer, after: hPointer)
result = self.stringZillaByteOffset(forByte: matchPointer, after: hPointer)
}
}
}
Expand All @@ -203,12 +218,14 @@ public extension SZViewable {
/// Finds the first occurrence of a character outside of the the given character-set within the receiver.
/// - Parameter characters: A string-like collection of characters to exclude.
/// - Returns: The index of the found occurrence, or `nil` if not found.
func findFirst(characterNotFrom characters: any SZViewable) -> SZIndex? {
var result: SZIndex?
szScope { hPointer, hLength in
characters.szScope { nPointer, nLength in
@_specialize(where Self == String, S == String)
@_specialize(where Self == String.UTF8View, S == String.UTF8View)
func findFirst<S: StringZillaViewable>(characterNotFrom characters: S) -> Index? {
var result: Index?
withStringZillaScope { hPointer, hLength in
characters.withStringZillaScope { nPointer, nLength in
if let matchPointer = sz_find_char_not_from(hPointer, hLength, nPointer, nLength) {
result = self.szOffset(forByte: matchPointer, after: hPointer)
result = self.stringZillaByteOffset(forByte: matchPointer, after: hPointer)
}
}
}
Expand All @@ -218,12 +235,14 @@ public extension SZViewable {
/// Finds the last occurrence of a character outside of the the given character-set within the receiver.
/// - Parameter characters: A string-like collection of characters to exclude.
/// - Returns: The index of the found occurrence, or `nil` if not found.
func findLast(characterNotFrom characters: any SZViewable) -> SZIndex? {
var result: SZIndex?
szScope { hPointer, hLength in
characters.szScope { nPointer, nLength in
@_specialize(where Self == String, S == String)
@_specialize(where Self == String.UTF8View, S == String.UTF8View)
func findLast<S: StringZillaViewable>(characterNotFrom characters: S) -> Index? {
var result: Index?
withStringZillaScope { hPointer, hLength in
characters.withStringZillaScope { nPointer, nLength in
if let matchPointer = sz_rfind_char_not_from(hPointer, hLength, nPointer, nLength) {
result = self.szOffset(forByte: matchPointer, after: hPointer)
result = self.stringZillaByteOffset(forByte: matchPointer, after: hPointer)
}
}
}
Expand All @@ -234,13 +253,15 @@ public extension SZViewable {
/// - Parameter other: A string-like collection of characters to exclude.
/// - Returns: The edit distance, as an unsigned integer.
/// - Throws: If a memory allocation error has happened.
func editDistance(from other: any SZViewable, bound: UInt64 = 0) throws -> UInt64? {
@_specialize(where Self == String, S == String)
@_specialize(where Self == String.UTF8View, S == String.UTF8View)
func editDistance<S: StringZillaViewable>(from other: S, bound: UInt64 = 0) throws -> UInt64? {
var result: UInt64?

// Use a do-catch block to handle potential errors
do {
try szScope { hPointer, hLength in
try other.szScope { nPointer, nLength in
try withStringZillaScope { hPointer, hLength in
try other.withStringZillaScope { nPointer, nLength in
result = UInt64(sz_edit_distance(hPointer, hLength, nPointer, nLength, sz_size_t(bound), nil))
if result == SZ_SIZE_MAX {
result = nil
Expand Down

0 comments on commit 46e957c

Please sign in to comment.