Skip to content

Commit

Permalink
F53OSCMessage: interpret left/right curly quotes as plain quotes
Browse files Browse the repository at this point in the history
- Fixes #37 to handle strings from text editors that automatically replace plain quotations marks (U+0022) with "smart" quotes (U+201C and U+201D)
  • Loading branch information
balord committed Feb 11, 2022
1 parent 68398a1 commit f916c3e
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 7 deletions.
71 changes: 64 additions & 7 deletions Sources/F53OSC/F53OSCMessage.m
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,45 @@

NS_ASSUME_NONNULL_BEGIN

// Not trying to be perfect here; we just use unlikely characters.
NSString * const QUOTE_CHAR_TOKEN_PLAIN = @""; // QUOTATION MARK " (U+0022)
NSString * const QUOTE_CHAR_TOKEN_LEFT_DOUBLE = @""; // LEFT DOUBLE QUOTATION MARK “ (U+201C)
NSString * const QUOTE_CHAR_TOKEN_RIGHT_DOUBLE = @""; // RIGHT DOUBLE QUOTATION MARK ” (U+201D)

#pragma mark - NSString category

@interface NSString (F53OSCMessage)
- (NSString *)escapedCharacterTokenizedString;
- (NSString *)detokenizedUnescapedString;
@end

@implementation NSString (F53OSCMessage)

- (NSString *)escapedCharacterTokenizedString
{
NSString *string = self;

string = [string stringByReplacingOccurrencesOfString:@"\\\"" withString:QUOTE_CHAR_TOKEN_PLAIN];
string = [string stringByReplacingOccurrencesOfString:@"\\\u201C" withString:QUOTE_CHAR_TOKEN_LEFT_DOUBLE];
string = [string stringByReplacingOccurrencesOfString:@"\\\u201D" withString:QUOTE_CHAR_TOKEN_RIGHT_DOUBLE];

return string;
}

- (NSString *)detokenizedUnescapedString
{
NSString *string = self;

string = [string stringByReplacingOccurrencesOfString:QUOTE_CHAR_TOKEN_PLAIN withString:@"\""];
string = [string stringByReplacingOccurrencesOfString:QUOTE_CHAR_TOKEN_LEFT_DOUBLE withString:@"\u201C"];
string = [string stringByReplacingOccurrencesOfString:QUOTE_CHAR_TOKEN_RIGHT_DOUBLE withString:@"\u201D"];

return string;
}

@end


@interface F53OSCMessage ()

@property (strong, nullable) NSArray<NSString *> *addressPartsCache;
Expand All @@ -47,6 +86,7 @@ @implementation F53OSCMessage

static NSCharacterSet *LEGAL_ADDRESS_CHARACTERS = nil;
static NSCharacterSet *LEGAL_METHOD_CHARACTERS = nil;
static NSCharacterSet *QUOTATION_MARK_CHARACTERS = nil;
static NSNumberFormatter *NUMBER_FORMATTER = nil;

+ (void) initialize
Expand All @@ -56,6 +96,10 @@ + (void) initialize
NSString *legalAddressChars = [NSString stringWithFormat:@"%@/*?[]{,}", [F53OSCServer validCharsForOSCMethod]];
LEGAL_ADDRESS_CHARACTERS = [NSCharacterSet characterSetWithCharactersInString:legalAddressChars];
LEGAL_METHOD_CHARACTERS = [NSCharacterSet characterSetWithCharactersInString:[F53OSCServer validCharsForOSCMethod]];

// Unless escaped with a `\`, +messageWithString: interprets all characters in this set as plain " (U+0022) quotation marks:
// " plain (U+0022), “ curly left double (U+201C), ” curly right double (U+201D)
QUOTATION_MARK_CHARACTERS = [NSCharacterSet characterSetWithCharactersInString:@"\"\u201C\u201D"];
}
if ( !NUMBER_FORMATTER )
{
Expand Down Expand Up @@ -130,11 +174,15 @@ + (nullable F53OSCMessage *) messageWithString:(NSString *)qscString

// Pull out arguments...

// Create a working copy and place a token for each escaped " character.
NSString *QUOTE_CHAR_TOKEN = @""; // not trying to be perfect here; we just use an unlikely character
// Create a working copy.
NSString *workingArguments = [qscString substringFromIndex:[address length]];
workingArguments = [workingArguments stringByReplacingOccurrencesOfString:@"\\\"" withString:QUOTE_CHAR_TOKEN];

// Place a token for each escaped quotation mark character.
workingArguments = [workingArguments escapedCharacterTokenizedString];

// After all escaped quotation mark characters are tokenized, conform all allowed quotation mark characters to the plain " (U+0022) mark.
workingArguments = [[workingArguments componentsSeparatedByCharactersInSet:QUOTATION_MARK_CHARACTERS] componentsJoinedByString:@"\""];

// The remaining " characters signify quoted string arguments; they should be paired up.
NSArray<NSString *> *splitOnQuotes = [workingArguments componentsSeparatedByString:@"\""];
if ( [splitOnQuotes count] % 2 != 1 )
Expand All @@ -151,6 +199,9 @@ + (nullable F53OSCMessage *) messageWithString:(NSString *)qscString
// Place a token for the quote we just pulled.
NSString *extractedQuote = [NSString stringWithFormat:@"\"%@\"", quotedString];
NSRange rangeOfFirstOccurrence = [workingArguments rangeOfString:extractedQuote];
if ( rangeOfFirstOccurrence.location == NSNotFound || rangeOfFirstOccurrence.length == 0 )
continue;

workingArguments = [workingArguments stringByReplacingOccurrencesOfString:extractedQuote
withString:QUOTED_STRING_TOKEN
options:0
Expand All @@ -176,13 +227,15 @@ + (nullable F53OSCMessage *) messageWithString:(NSString *)qscString
return nil;

NSString *quotedString = [quotedStrings objectAtIndex:quotedStringIndex];
NSString *detokenized = [quotedString stringByReplacingOccurrencesOfString:QUOTE_CHAR_TOKEN withString:@"\""];
NSString *detokenized = [quotedString detokenizedUnescapedString];
[finalArgs addObject:detokenized]; // quoted OSC string
quotedStringIndex++;
}
else if ( [arg isEqual:QUOTE_CHAR_TOKEN] )
else if ( [arg isEqual:QUOTE_CHAR_TOKEN_PLAIN] ||
[arg isEqual:QUOTE_CHAR_TOKEN_LEFT_DOUBLE] ||
[arg isEqual:QUOTE_CHAR_TOKEN_RIGHT_DOUBLE] )
{
[finalArgs addObject:@"\""]; // single character OSC string - 's'
[finalArgs addObject:[arg detokenizedUnescapedString]]; // single character OSC string - 's'
}
else if ( [arg hasPrefix:@"#blob"] )
{
Expand Down Expand Up @@ -228,11 +281,15 @@ + (nullable F53OSCMessage *) messageWithString:(NSString *)qscString
NSNumber *number = [NUMBER_FORMATTER numberFromString:arg];
if ( number != nil )
{
// unquoted argument was successfully formatted as a number
[finalArgs addObject:number]; // OSC int or float - 'i' or 'f'
}
else
{
[finalArgs addObject:[arg stringByReplacingOccurrencesOfString:QUOTE_CHAR_TOKEN withString:@"\""]]; // unquoted OSC string - 's'
// If all other conditions above were not satisfied,
// handle unquoted argument as a string anyway.
NSString *detokenized = [arg detokenizedUnescapedString];
[finalArgs addObject:detokenized]; // unquoted OSC string - 's'
}
}
}
Expand Down
18 changes: 18 additions & 0 deletions Tests/F53OSCTests/F53OSCMessageTests.m
Original file line number Diff line number Diff line change
Expand Up @@ -1379,6 +1379,7 @@ - (void) testThat_F53OSCMessageParsesQuotationMarks
{
// given
NSDictionary<NSString *, NSArray<id> *> *stringsAndArgs = @{
// QUOTATION MARK (plain) U+0022
@"/test/1a \"a\"" : @[ @"a" ],
@"/test/1b \"a b\"" : @[ @"a b" ],
@"/test/1c \"a\" \"b\"" : @[ @"a", @"b" ],
Expand All @@ -1391,6 +1392,23 @@ - (void) testThat_F53OSCMessageParsesQuotationMarks
@"/test/1h \"a \" b \" c\"" : @[ @"a ", @"b", @" c" ],
// interior escaped quotes, valid - single string arg includes quote characters
@"/test/1j \"a \\\"b\\\" c\"" : @[ @"a \"b\" c" ],
@"/test/1k \\\"" : @[ @"\"" ],

// LEFT/RIGHT DOUBLE QUOTATION MARK (curly) U+201C U+201D
@"/test/2a “a”" : @[ @"a" ],
@"/test/2b \u201ca b\u201d" : @[ @"a b" ],
@"/test/2c “a” “b”" : @[ @"a", @"b" ],
@"/test/2d “a” 1 “b”" : @[ @"a", @1, @"b" ],
@"/test/2e “a b” 1 “c d”" : @[ @"a b", @1, @"c d" ],
@"/test/2f “a b” 1.2 “c d”" : @[ @"a b", @1.2, @"c d" ],
// interior quotes with improper spacing around arg 2, malformed - fails
@"/test/2g \"a “b” c\"" : @[],
// interior non-escaped quotes with proper spacing around arg 2, valid - A and C args include extra spaces
@"/test/2h \"a “ b ” c\"" : @[ @"a ", @"b", @" c" ],
// interior escaped quotes, valid - single string arg includes quote characters
@"/test/2j \"a \\“b\\” c\"" : @[ @"a “b” c" ],
@"/test/2k \\" : @[ @"" ],
@"/test/2l \\" : @[ @"" ],
};

for (NSString *string in stringsAndArgs)
Expand Down

0 comments on commit f916c3e

Please sign in to comment.