From 4d7f52e7ddd9eb600c5e15622286b127e18a28a1 Mon Sep 17 00:00:00 2001
From: Matthew Flatt <mflatt@racket-lang.org>
Date: Fri, 25 Dec 2020 08:00:32 -0700
Subject: [PATCH] expose WTF-8 converters on all platforms

---
 .../scribblings/reference/bytes.scrbl         |  41 +++--
 .../scribblings/reference/reference.scrbl     |   6 +
 .../tests/racket/unicode.rktl                 |  69 ++++---
 racket/src/bc/src/string.c                    | 174 +++++++++---------
 racket/src/cs/schemified/io.scm               |  50 +++--
 racket/src/io/converter/main.rkt              |  23 +--
 racket/src/io/converter/utf-8.rkt             |  28 +--
 racket/src/io/demo.rkt                        |   5 +-
 8 files changed, 215 insertions(+), 181 deletions(-)

diff --git a/pkgs/racket-doc/scribblings/reference/bytes.scrbl b/pkgs/racket-doc/scribblings/reference/bytes.scrbl
index 597c9d2c31a..19d1616b2bc 100644
--- a/pkgs/racket-doc/scribblings/reference/bytes.scrbl
+++ b/pkgs/racket-doc/scribblings/reference/bytes.scrbl
@@ -453,30 +453,44 @@ Certain encoding combinations are always available:
  @item{@racket[(bytes-open-converter "platform-UTF-8" "platform-UTF-16")]
    --- converts UTF-8 to UTF-16 on @|AllUnix|, where each UTF-16
    code unit is a sequence of two bytes ordered by the current
-   platform's endianness. On Windows, the input can include
-   encodings that are not valid UTF-8, but which naturally extend the
-   UTF-8 encoding to support unpaired surrogate code units, and the
-   output is a sequence of UTF-16 code units (as little-endian byte
-   pairs), potentially including unpaired surrogates.}
+   platform's endianness. On Windows, the conversion is the same
+   as @racket[(bytes-open-converter "WTF-8" "WTF-16")] to support
+   unpaired surrogate code units.}
 
  @item{@racket[(bytes-open-converter "platform-UTF-8-permissive" "platform-UTF-16")]
    --- like @racket[(bytes-open-converter "platform-UTF-8" "platform-UTF-16")],
    but an input byte that is not part of a valid UTF-8 encoding
    sequence (or valid for the unpaired-surrogate extension on
-   Windows) is effectively replaced with @racket[(char->integer #\?)].}
+   Windows) is effectively replaced with @racketvalfont{#\uFFFD}.}
 
  @item{@racket[(bytes-open-converter "platform-UTF-16" "platform-UTF-8")]
    --- converts UTF-16 (bytes ordered by the current platform's
-   endianness) to UTF-8 on @|AllUnix|. On Windows, the input can
-   include UTF-16 code units that are unpaired surrogates, and the
-   corresponding output includes an encoding of each surrogate in a
-   natural extension of UTF-8. On @|AllUnix|, surrogates are
+   endianness) to UTF-8 on @|AllUnix|. On Windows, the conversion
+   is the same as @racket[(bytes-open-converter "WTF-16" "WTF-8")]
+   to support unpaired surrogates. On @|AllUnix|, surrogates are
    assumed to be paired: a pair of bytes with the bits @code{#xD800}
    starts a surrogate pair, and the @code{#x03FF} bits are used from
    the pair and following pair (independent of the value of the
    @code{#xDC00} bits). On all platforms, performance may be poor
    when decoding from an odd offset within an input byte string.}
 
+ @item{@racket[(bytes-open-converter "WTF-8" "WTF-16")]
+   --- converts the WTF-8 @cite["Sapin18"] superset of UTF-8 to a
+   superset of UTF-16 to support unpaired surrogate code units, where
+   each UTF-16 code unit is a sequence of two bytes ordered by the
+   current platform's endianness.}
+
+ @item{@racket[(bytes-open-converter "WTF-8-permissive" "WTF-16")]
+   --- like @racket[(bytes-open-converter "WTF-8" "WTF-16")],
+   but an input byte that is not part of a valid WTF-8 encoding
+   sequence is effectively replaced with @racketvalfont{#\uFFFD}.}
+
+ @item{@racket[(bytes-open-converter "WTF-16" "WTF-8")]
+   --- converts the WTF-16 @cite["Sapin18"] superset of UTF-16 to the
+   WTF-8 superset of UTF-8. The input can include UTF-16 code units
+   that are unpaired surrogates, and the corresponding output includes
+   an encoding of each surrogate in a natural extension of UTF-8.}
+
  ]
 
 A newly opened byte converter is registered with the current custodian
@@ -501,7 +515,12 @@ current executable's directory at run time, and the DLL must either
 supply @tt{_errno} or link to @filepath{msvcrt.dll} for @tt{_errno};
 otherwise, only the guaranteed combinations are available.
 
-Use @racket[bytes-convert] with the result to convert byte strings.}
+Use @racket[bytes-convert] with the result to convert byte strings.
+
+@history[#:changed "7.9.0.17" @elem{Added built-in converters for
+                                    @racket["WTF-8"],
+                                    <@racket["WTF-8-permissive"], and
+                                    @racket["WTF-16"].}]}
 
 
 @defproc[(bytes-close-converter [converter bytes-converter?]) void]{
diff --git a/pkgs/racket-doc/scribblings/reference/reference.scrbl b/pkgs/racket-doc/scribblings/reference/reference.scrbl
index 334d0575a1a..e98cad657a6 100644
--- a/pkgs/racket-doc/scribblings/reference/reference.scrbl
+++ b/pkgs/racket-doc/scribblings/reference/reference.scrbl
@@ -209,6 +209,12 @@ The @racketmodname[racket] library combines
              #:url "https://doi.org/10.1017/CBO9780511574962"
              #:date "1999")
 
+  (bib-entry #:key "Sapin18"
+             #:author "Simon Sapin"
+             #:title "The WTF-8 Encoding"
+             #:url "http://simonsapin.github.io/wtf-8/"
+             #:date "2018")
+
   (bib-entry #:key "Shan04"
              #:author "Ken Shan"
              #:title "Shift to Control"
diff --git a/pkgs/racket-test-core/tests/racket/unicode.rktl b/pkgs/racket-test-core/tests/racket/unicode.rktl
index 7658ad86ac0..bc328309a80 100644
--- a/pkgs/racket-test-core/tests/racket/unicode.rktl
+++ b/pkgs/racket-test-core/tests/racket/unicode.rktl
@@ -889,11 +889,14 @@
 		      (go (lambda (n p) (read-n n p 1)))
 		      (go (lambda (n p) (read-n n p 2))))))
 		;; Test UTF-16
-		(let ([c (bytes-open-converter "platform-UTF-8" "platform-UTF-16")])
+		(for ([c (list (bytes-open-converter "platform-UTF-8" "platform-UTF-16")
+                               (bytes-open-converter "WTF-8" "WTF-16"))]
+                      [wtf? (list (eq? 'windows (system-type))
+                                  #t)])
 		  (let-values ([(s2 n status) (bytes-convert c s)])
 		    (case parse-status
 		      [(surrogate1 surrogate2)
-		       (if (eq? (system-type) 'windows)
+		       (if wtf?
 			   (begin
 			     (if (eq? parse-status 'surrogate1)
 				 (test 'aborts 'status status)
@@ -975,20 +978,23 @@
 	    basic-utf-8-tests))
 
 ;; Further UTF-16 tests
-(let ([c (bytes-open-converter "platform-UTF-16" "platform-UTF-8")])
+(for ([c (list (bytes-open-converter "platform-UTF-16" "platform-UTF-8")
+               (bytes-open-converter "WTF-16" "WTF-8"))]
+      [wtf? (list (eq? 'windows (system-type))
+                  #t)])
   (let-values ([(s n status) (bytes-convert c (bytes-append
 					       (integer->integer-bytes #xD800 2 #f)
 					       (integer->integer-bytes #xDC00 2 #f)))])
     (test-values (list #"" 0 'aborts)
 		 (lambda () (bytes-convert c (integer->integer-bytes #xD800 2 #f) )))
-    ;; Windows: unpaired surrogates allowed:
-    (when (eq? 'windows (system-type))
+    ;; WTF: unpaired surrogates allowed:
+    (when wtf?
       (test-values (list #"" 0 'aborts)
 		   (lambda () (bytes-convert c (integer->integer-bytes #xD8FF 2 #f))))
       (test-values (list #"\355\277\277" 2 'complete)
 		   (lambda () (bytes-convert c (integer->integer-bytes #xDFFF 2 #f)))))
-    ;; Non-windows: after #xD800 bits, surrogate pair is assumed
-    (unless (eq? 'windows (system-type))
+    ;; UTF: after #xD800 bits, surrogate pair is assumed
+    (unless wtf?
       (test-values (list #"" 0 'aborts)
 		   (lambda () (bytes-convert c (integer->integer-bytes #xD800 2 #f))))
       (test-values (list #"" 0 'aborts)
@@ -1027,29 +1033,32 @@
   (test-values '(#"" complete)
                (lambda () (bytes-convert-end c))))
 
-(when (eq? (system-type) 'windows)
-  (let ([c (bytes-open-converter "platform-UTF-8-permissive" "platform-UTF-16")])
-    ;; Check that we use all 6 bytes of #"\355\240\200\355\260\200" or none
-    (test-values (list 12 6 'complete)
-		 (lambda ()
-		   (bytes-convert c #"\355\240\200\355\260\200" 0 6 (make-bytes 12))))
-    ;; If we can't look all the way to the end, reliably abort without writing:
-    (let ([s (make-bytes 12 (char->integer #\x))])
-      (let loop ([n 1])
-	(unless (= n 6)
-	  (test-values (list 0 0 'aborts)
-		       (lambda ()
-		         (bytes-convert c #"\355\240\200\355\260\200" 0 n s)))
-	  (test #"xxxxxxxxxxxx" values s) ; no writes to bytes string
-	  (loop (add1 n)))))
-    (let ([s (make-bytes 12 (char->integer #\x))])
-      (let loop ([n 0])
-	(unless (= n 12)
-	  (test-values (list 0 0 'continues)
-		       (lambda ()
-			 (bytes-convert c #"\355\240\200\355\260\200" 0 6 (make-bytes n))))
-	  (test #"xxxxxxxxxxxx" values s) ; no writes to bytes string
-	  (loop (add1 n)))))))
+(for ([c (append
+          (if (eq? (system-type) 'windows)
+              (list (bytes-open-converter "platform-UTF-8-permissive" "platform-UTF-16"))
+              null)
+          (list (bytes-open-converter "WTF-8-permissive" "WTF-16")))])
+  ;; Check that we use all 6 bytes of #"\355\240\200\355\260\200" or none
+  (test-values (list 12 6 'complete)
+               (lambda ()
+                 (bytes-convert c #"\355\240\200\355\260\200" 0 6 (make-bytes 12))))
+  ;; If we can't look all the way to the end, reliably abort without writing:
+  (let ([s (make-bytes 12 (char->integer #\x))])
+    (let loop ([n 1])
+      (unless (= n 6)
+        (test-values (list 0 0 'aborts)
+                     (lambda ()
+                       (bytes-convert c #"\355\240\200\355\260\200" 0 n s)))
+        (test #"xxxxxxxxxxxx" values s) ; no writes to bytes string
+        (loop (add1 n)))))
+  (let ([s (make-bytes 12 (char->integer #\x))])
+    (let loop ([n 0])
+      (unless (= n 12)
+        (test-values (list 0 0 'continues)
+                     (lambda ()
+                       (bytes-convert c #"\355\240\200\355\260\200" 0 6 (make-bytes n))))
+        (test #"xxxxxxxxxxxx" values s) ; no writes to bytes string
+        (loop (add1 n))))))
 
 ;; Seems like this sort of thing should be covered above, and maybe it
 ;;  it after some other corrections. But just in case:
diff --git a/racket/src/bc/src/string.c b/racket/src/bc/src/string.c
index de007f66b73..81a344c30d6 100644
--- a/racket/src/bc/src/string.c
+++ b/racket/src/bc/src/string.c
@@ -29,6 +29,12 @@
 # define mzLOCALE_IS_UTF_8(s) (!(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
 #endif
 
+#ifdef WINDOWS_UNICODE_SUPPORT
+# define WIN_UTF16_AS_WTF16(utf16) utf16
+#else
+# define WIN_UTF16_AS_WTF16(utf16) 0
+#endif
+
 #define mzICONV_KIND 0
 #define mzUTF8_KIND 1
 #define mzUTF8_TO_UTF16_KIND 2
@@ -39,7 +45,7 @@ typedef struct Scheme_Converter {
   short closed;
   short kind;
   rktio_converter_t *cd;
-  int permissive;
+  int permissive, wtf;
   Scheme_Custodian_Reference *mref;
 } Scheme_Converter;
 
@@ -154,13 +160,13 @@ static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, c
 static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned char *str2, intptr_t l2);
 
 XFORM_NONGCING static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t end,
-					unsigned int *us, intptr_t dstart, intptr_t dend,
-					intptr_t *ipos, intptr_t *jpos,
-					char compact, char utf16,
-					int *state, int might_continue, int permissive);
+                                             unsigned int *us, intptr_t dstart, intptr_t dend,
+                                             intptr_t *ipos, intptr_t *jpos,
+                                             char compact, char utf16,
+                                             int *state, int might_continue, int permissive, int wtf);
 XFORM_NONGCING static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
-					unsigned char *s, intptr_t dstart, intptr_t dend,
-					intptr_t *_ipos, intptr_t *_opos, char utf16);
+                                             unsigned char *s, intptr_t dstart, intptr_t dend,
+                                             intptr_t *_ipos, intptr_t *_opos, char utf16, int wtf);
 
 static char *string_to_from_locale(int to_bytes,
 				   char *in, intptr_t delta, intptr_t len,
@@ -1199,7 +1205,7 @@ do_byte_string_to_char_string(const char *who,
 		       NULL, 0, -1,
 		       NULL, NULL, 0, 0,
 		       NULL, 0, 
-		       (perm > -1) ? 0xD800 : 0);
+		       (perm > -1) ? 0xD800 : 0, 0);
   if (ulen < 0) {
     scheme_contract_error(who,
                           "string is not a well-formed UTF-8 encoding",
@@ -1212,7 +1218,7 @@ do_byte_string_to_char_string(const char *who,
 		v, 0, -1,
 		NULL, NULL, 0, 0,
 		NULL, 0, 
-		(perm > -1) ? 0xD800 : 0);
+		(perm > -1) ? 0xD800 : 0, 0);
   
   if (perm > -1) {
     for (i = 0; i < ulen; i++) {
@@ -1562,7 +1568,7 @@ byte_string_utf8_index(int argc, Scheme_Object *argv[])
   result = utf8_decode_x((unsigned char *)chars, istart, ifinish,
 			 NULL, 0, pos,
 			 &ipos, &opos,
-			 0, 0, NULL, 0, perm ? 1 : 0);
+			 0, 0, NULL, 0, perm ? 1 : 0, 0);
 
   if (((result < 0) && (result != -3))
       || ((ipos == ifinish) && (opos <= pos)))
@@ -1610,7 +1616,7 @@ byte_string_utf8_ref(int argc, Scheme_Object *argv[])
     utf8_decode_x((unsigned char *)chars, istart, ifinish,
 		  NULL, 0, pos,
 		  &ipos, &opos,
-		  0, 0, NULL, 0, perm ? 1 : 0);
+		  0, 0, NULL, 0, perm ? 1 : 0, 0);
     if (opos < pos)
       return scheme_false;
     istart = ipos;
@@ -1619,7 +1625,7 @@ byte_string_utf8_ref(int argc, Scheme_Object *argv[])
   utf8_decode_x((unsigned char *)chars, istart, ifinish,
 		us, 0, 1,
 		&ipos, &opos,
-		0, 0, NULL, 0, perm ? 0xFFFFFF : 0);
+		0, 0, NULL, 0, perm ? 0xFFFFFF : 0, 0);
 
   if (opos < 1)
     return scheme_false;
@@ -2771,7 +2777,7 @@ static char *do_convert(rktio_converter_t *cd,
 	r = utf8_decode_x((unsigned char *)in, id + dip, iilen,
 			  (unsigned int *)out, (od + dop) >> 2, iolen >> 2,
 			  &ipos, &opos,
-			  0, 0, NULL, 0, 0);
+			  0, 0, NULL, 0, 0, 0);
 	
 	opos <<= 2;
 	dop = (opos - od);
@@ -4334,7 +4340,7 @@ Scheme_Object *scheme_open_converter(const char *from_e, const char *to_e)
   Scheme_Converter *c;
   rktio_converter_t *cd;
   int kind;
-  int permissive;
+  int permissive, wtf;
   int need_regis = 1;
   Scheme_Custodian_Reference *mref;
 
@@ -4354,6 +4360,7 @@ Scheme_Object *scheme_open_converter(const char *from_e, const char *to_e)
       permissive = 0;
     cd = NULL;
     need_regis = 0;
+    wtf = 0;
   } else if ((!strcmp(from_e, "platform-UTF-8")
 	      || !strcmp(from_e, "platform-UTF-8-permissive"))
 	     && !strcmp(to_e, "platform-UTF-16")) {
@@ -4364,12 +4371,32 @@ Scheme_Object *scheme_open_converter(const char *from_e, const char *to_e)
       permissive = 0;
     cd = NULL;
     need_regis = 0;
+    wtf = WIN_UTF16_AS_WTF16(1);
+  } else if ((!strcmp(from_e, "WTF-8")
+	      || !strcmp(from_e, "WTF-8-permissive"))
+	     && !strcmp(to_e, "WTF-16")) {
+    kind = mzUTF8_TO_UTF16_KIND;
+    if (!strcmp(from_e, "WTF-8-permissive"))
+      permissive = 0xFFFD;
+    else
+      permissive = 0;
+    cd = NULL;
+    need_regis = 0;
+    wtf = 1;
   } else if (!strcmp(from_e, "platform-UTF-16")
 	     && !strcmp(to_e, "platform-UTF-8")) {
     kind = mzUTF16_TO_UTF8_KIND;
     permissive = 0;
     cd = NULL;
     need_regis = 0;
+    wtf = WIN_UTF16_AS_WTF16(1);
+  } else if (!strcmp(from_e, "WTF-16")
+	     && !strcmp(to_e, "WTF-8")) {
+    kind = mzUTF16_TO_UTF8_KIND;
+    permissive = 0;
+    cd = NULL;
+    need_regis = 0;
+    wtf = 1;
   } else {
     char *tmp_from_e = NULL, *tmp_to_e = NULL;
 
@@ -4397,6 +4424,7 @@ Scheme_Object *scheme_open_converter(const char *from_e, const char *to_e)
 
     kind = mzICONV_KIND;
     permissive = 0;
+    wtf = 0;
   }
 
   c = MALLOC_ONE_TAGGED(Scheme_Converter);
@@ -4404,6 +4432,7 @@ Scheme_Object *scheme_open_converter(const char *from_e, const char *to_e)
   c->closed = 0;
   c->kind = kind;
   c->permissive = permissive;
+  c->wtf = wtf;
   c->cd = cd;
   if (!need_regis)
     mref = NULL;
@@ -4537,7 +4566,7 @@ static Scheme_Object *convert_one(const char *who, int opos, int argc, Scheme_Ob
 
     status = utf8_encode_x((const unsigned int *)instr, istart >> 1, ifinish >> 1,
 			   (unsigned char *)r, ostart, ofinish,
-			   &amt_read, &amt_wrote, 1);
+			   &amt_read, &amt_wrote, 1, c->wtf);
     
     amt_read -= (istart >> 1);
 
@@ -4547,7 +4576,7 @@ static Scheme_Object *convert_one(const char *who, int opos, int argc, Scheme_Ob
 	r = (char *)scheme_malloc_atomic(amt_wrote + 1);
 	utf8_encode_x((const unsigned int *)instr, istart >> 1, ifinish >> 1,
 		      (unsigned char *)r, ostart, ofinish,
-		      NULL, NULL, 1);
+		      NULL, NULL, 1, c->wtf);
 	r[amt_wrote] = 0;
       }
       amt_read <<= 1;
@@ -4600,7 +4629,7 @@ static Scheme_Object *convert_one(const char *who, int opos, int argc, Scheme_Ob
       status = utf8_decode_x((unsigned char *)instr, istart, ifinish,
 			     (unsigned int *)r, _ostart, _ofinish,
 			     &amt_read, &amt_wrote,
-			     1, utf16, NULL, 1, c->permissive);
+			     1, utf16, NULL, 1, c->permissive, c->wtf);
       
       if (utf16) {
 	_ostart <<= 1;
@@ -4624,7 +4653,7 @@ static Scheme_Object *convert_one(const char *who, int opos, int argc, Scheme_Ob
 	    utf8_decode_x((unsigned char *)instr, istart, ifinish,
 			  (unsigned int *)r, ostart, _ofinish,
 			  NULL, NULL,
-			  1, utf16, NULL, 1, c->permissive);
+			  1, utf16, NULL, 1, c->permissive, c->wtf);
 	    r[amt_wrote] = 0;
 	  }
 	} else if (!r)
@@ -4721,7 +4750,7 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
                               unsigned int *us, intptr_t dstart, intptr_t dend,
                               intptr_t *ipos, intptr_t *jpos,
                               char compact, char utf16, int *_state,
-                              int might_continue, int permissive)
+                              int might_continue, int permissive, int wtf)
      /* Results:
 	non-negative => translation complete, = number of produced chars
 	-1 => input ended in middle of encoding (only if might_continue)
@@ -4731,8 +4760,8 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
 	ipos & jpos are filled with ending positions (between [d]start
 	and [d]end) before return, unless they are NULL.
 
-	compact => UTF-8 to UTF-8 or UTF-16 --- the latter if utf16
-	for Windows for utf16, decode extended UTF-8 that allows surrogates
+	compact => UTF-8 to UTF-8 or UTF-16 --- the latter if utf16;
+	for utf16 and wtf, decode extended UTF-8 that allows surrogates
 
 	_state provides initial state and is filled with ending state;
 	when it's not NULL, the us must be NULL
@@ -4742,16 +4771,13 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
 	permissive is non-zero => use permissive as value for bad byte
 	sequences. When generating UTF-8, this must be an ASCII character
         or U+FFFD. */
-
 {
   intptr_t i, j, oki;
   int failmode = -3, state;
   int init_doki;
   int nextbits, v;
   unsigned int sc;
-# ifdef WINDOWS_UNICODE_SUPPORT
   int pending_surrogate = 0;
-# endif
 
   if (_state) {
     state = (*_state) & 0x7;
@@ -4817,12 +4843,10 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
 	    /* We finished. One last check: */
 	    if ((((v >= 0xD800) && (v <= 0xDFFF))
 		 || (v > 0x10FFFF))
-# ifdef WINDOWS_UNICODE_SUPPORT
-		&& (!utf16
-		    /* If UTF-16 for Windows, just apply upper-limit check */
-		    || (v > 0x10FFFF))
-# endif
-		) {
+		&& (!wtf
+                    || !utf16
+                    /* If WTF-16, just apply upper-limit check */
+                    || (v > 0x10FFFF))) {
 	      /* UTF-16 surrogates or other illegal code units */
 	      if (permissive) {
 		v = permissive;
@@ -4902,14 +4926,12 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
       if (compact) {
 	if (utf16) {
 	  if (v > 0xFFFF) {
-# ifdef WINDOWS_UNICODE_SUPPORT
 	    if (pending_surrogate) {
 	      if (us)
 		((unsigned short *)us)[j] = pending_surrogate;
 	      j++; /* Accept previously written unpaired surrogate */
 	      pending_surrogate = 0;
 	    }
-# endif
 	    if (j + 1 >= dend)
 	      break;
 	    if (us) {
@@ -4918,8 +4940,7 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
 	      ((unsigned short *)us)[j+1] = 0xDC00 | (v & 0x3FF);
 	    }
 	    j++;
-	  } else {
-# ifdef WINDOWS_UNICODE_SUPPORT
+	  } else if (wtf) {
 	    /* We allow a surrogate by itself, but don't allow
 	       a 0xDC00 after a 0xD800, otherwise multiple encodings can
 	       map to the same thing. */
@@ -4977,10 +4998,9 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
 	      --j; /* don't accept unpaired surrogate, yet */
 	    else if (us)
 	      ((unsigned short *)us)[j] = v;
-# else
+          } else {
 	    if (us)
 	      ((unsigned short *)us)[j] = v;
-# endif
 	  }
 	} else {
 	  intptr_t delta;
@@ -5031,7 +5051,6 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
       failmode = -1;
       i = end - 1; /* to ensure that failmode is returned */
     } else if (permissive) {
-# ifdef WINDOWS_UNICODE_SUPPORT
       if (pending_surrogate) {
         /* Unpaired surrogate before permissive replacements */
         if (utf16 && (j < dend)) {
@@ -5041,7 +5060,6 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
         }
         pending_surrogate = 0;
       }
-#endif
       for (i = oki; i < end; i++) {
 	if (j < dend) {
 	  if (us) {
@@ -5061,7 +5079,6 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
     }
   }
 
-# ifdef WINDOWS_UNICODE_SUPPORT
   if (pending_surrogate) {
     if (!might_continue) {
       /* Accept unpaired surrogate at end of input */
@@ -5074,7 +5091,6 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
       oki -= 3;
     }
   }
-#endif
 
   if (ipos)
     *ipos = oki;
@@ -5084,12 +5100,10 @@ static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t e
   if (i < end)
     return failmode;
 
-# ifdef WINDOWS_UNICODE_SUPPORT
   if (pending_surrogate) {
     /* input must have ended right after surrogate */
     return -1;
   }
-#endif
 
   return j - dstart;
 }
@@ -5099,7 +5113,7 @@ intptr_t scheme_utf8_decode(const unsigned char *s, intptr_t start, intptr_t end
                             intptr_t *ipos, char utf16, int permissive)
 {
   return utf8_decode_x(s, start, end, us, dstart, dend,
-		       ipos, NULL, utf16, utf16, NULL, 0, permissive);
+		       ipos, NULL, utf16, utf16, NULL, 0, permissive, WIN_UTF16_AS_WTF16(utf16));
 }
 
 intptr_t scheme_utf8_decode_offset_prefix(const unsigned char *s, intptr_t start, intptr_t end,
@@ -5107,7 +5121,7 @@ intptr_t scheme_utf8_decode_offset_prefix(const unsigned char *s, intptr_t start
                                           intptr_t *ipos, char utf16, int permissive)
 {
   return utf8_decode_x(s, start, end, us, dstart, dend,
-		       ipos, NULL, utf16, utf16, NULL, 1, permissive);
+		       ipos, NULL, utf16, utf16, NULL, 1, permissive, WIN_UTF16_AS_WTF16(utf16));
 }
 
 intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end,
@@ -5117,13 +5131,13 @@ intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, in
 {
   intptr_t opos;
   utf8_decode_x(s, start, end, us, dstart, dend,
-		ipos, &opos, utf16, utf16, NULL, 1, permissive);
+		ipos, &opos, utf16, utf16, NULL, 1, permissive, WIN_UTF16_AS_WTF16(utf16));
   return opos - dstart;
 }
 
 intptr_t scheme_utf8_decode_all(const unsigned char *s, intptr_t len, unsigned int *us, int permissive)
 {
-  return utf8_decode_x(s, 0, len, us, 0, -1, NULL, NULL, 0, 0, NULL, 0, permissive);
+  return utf8_decode_x(s, 0, len, us, 0, -1, NULL, NULL, 0, 0, NULL, 0, permissive, 0);
 }
 
 intptr_t scheme_utf8_decode_prefix(const unsigned char *s, intptr_t len, unsigned int *us, int permissive)
@@ -5142,7 +5156,7 @@ intptr_t scheme_utf8_decode_prefix(const unsigned char *s, intptr_t len, unsigne
       return len;
   }
 
-  return utf8_decode_x(s, 0, len, us, 0, -1, NULL, NULL, 0, 0, NULL, 1, permissive);
+  return utf8_decode_x(s, 0, len, us, 0, -1, NULL, NULL, 0, 0, NULL, 1, permissive, 0);
 }
 
 mzchar *scheme_utf8_decode_to_buffer_len(const unsigned char *s, intptr_t len,
@@ -5152,7 +5166,7 @@ mzchar *scheme_utf8_decode_to_buffer_len(const unsigned char *s, intptr_t len,
 
   ulen = utf8_decode_x(s, 0, len, NULL, 0, -1,
 		       NULL, NULL, 0, 0,
-		       NULL, 0, 0);
+		       NULL, 0, 0, 0);
   if (ulen < 0)
     return NULL;
   if (ulen + 1 > blen) {
@@ -5160,7 +5174,7 @@ mzchar *scheme_utf8_decode_to_buffer_len(const unsigned char *s, intptr_t len,
   }
   utf8_decode_x(s, 0, len, buf, 0, -1,
 		NULL, NULL, 0, 0,
-		NULL, 0, 0);
+		NULL, 0, 0, 0);
   buf[ulen] = 0;
   *_ulen = ulen;
   return buf;
@@ -5193,14 +5207,14 @@ intptr_t scheme_utf8_decode_count(const unsigned char *s, intptr_t start, intptr
 		NULL, 0, -1,
 		NULL, &pos,
 		0, 0, _state,
-		might_continue, permissive);
+		might_continue, permissive, 0);
 
   return pos;
 }
 
 static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
-			 unsigned char *s, intptr_t dstart, intptr_t dend,
-			 intptr_t *_ipos, intptr_t *_opos, char utf16)
+                              unsigned char *s, intptr_t dstart, intptr_t dend,
+                              intptr_t *_ipos, intptr_t *_opos, char utf16, int wtf)
   /* Results:
         -1 => input ended in the middle of an encoding - only when utf16 and _opos
 	non-negative => reports number of bytes/code-units produced */
@@ -5222,30 +5236,23 @@ static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t e
              end and _opos is 0. The well-formedness assumption was
              probably not a good idea, but note that it's explicitly
              documented to behave that way. */
-# ifdef WINDOWS_UNICODE_SUPPORT
-#  define UNPAIRED_MASK 0xFC00
-# else
-#  define UNPAIRED_MASK 0xF800
-# endif
-	  if (((i + 1) == end) && ((wc & UNPAIRED_MASK) == 0xD800) && _opos) {
+# define UNPAIRED_MASK(wtf) (wtf ? 0xFC00 : 0xF800)
+	  if (((i + 1) == end) && ((wc & UNPAIRED_MASK(wtf)) == 0xD800) && _opos) {
 	    /* Ended in the middle of a surrogate pair */
 	    *_opos = j;
 	    if (_ipos)
 	      *_ipos = i;
 	    return -1;
 	  }
-# ifdef WINDOWS_UNICODE_SUPPORT
-	  if ((wc & 0xFC00) != 0xD800) {
+	  if (wtf && ((wc & 0xFC00) != 0xD800)) {
 	    /* Count as one */
-	  } else if ((i + 1 >= end)
-		     || (((((unsigned short *)us)[i+1]) & 0xFC00) != 0xDC00)) {
-	  } else 
-# endif
-	    {
-	      i++;
-	      wc = ((wc & 0x3FF) << 10) + ((((unsigned short *)us)[i]) & 0x3FF);
-	      wc += 0x10000;
-	    }
+	  } else if (wtf && ((i + 1 >= end)
+                             || (((((unsigned short *)us)[i+1]) & 0xFC00) != 0xDC00))) {
+	  } else {
+            i++;
+            wc = ((wc & 0x3FF) << 10) + ((((unsigned short *)us)[i]) & 0x3FF);
+            wc += 0x10000;
+          }
 	}
       } else {
 	wc = us[i];
@@ -5279,26 +5286,23 @@ static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t e
 	  /* Unparse surrogates. We assume that the surrogates are
 	     well formed on non-Windows platforms, but when _opos,
 	     we detect ending in the middle of an surrogate pair. */
-	  if (((i + 1) == end) && ((wc & UNPAIRED_MASK) == 0xD800) && _opos) {
+	  if (((i + 1) == end) && ((wc & UNPAIRED_MASK(wtf)) == 0xD800) && _opos) {
 	    /* Ended in the middle of a surrogate pair */
 	    *_opos = j;
 	    if (_ipos)
 	      *_ipos = i;
 	    return -1;
 	  }
-# ifdef WINDOWS_UNICODE_SUPPORT
-	  if ((wc & 0xFC00) != 0xD800) {
+	  if (wtf && ((wc & 0xFC00) != 0xD800)) {
 	    /* Let the misplaced surrogate through */
-	  } else if ((i + 1 >= end)
-		     || (((((unsigned short *)us)[i+1]) & 0xFC00) != 0xDC00)) {
+	  } else if (wtf && ((i + 1 >= end)
+                             || (((((unsigned short *)us)[i+1]) & 0xFC00) != 0xDC00))) {
 	    /* Let the misplaced surrogate through */
-	  } else
-# endif
-	    {
-	      i++;
-	      wc = ((wc & 0x3FF) << 10) + ((((unsigned short *)us)[i]) & 0x3FF);
-	      wc += 0x10000;
-	    }
+	  } else {
+            i++;
+            wc = ((wc & 0x3FF) << 10) + ((((unsigned short *)us)[i]) & 0x3FF);
+            wc += 0x10000;
+          }
 	}
       } else {
 	wc = us[i];
@@ -5360,12 +5364,12 @@ intptr_t scheme_utf8_encode(const unsigned int *us, intptr_t start, intptr_t end
 {
   return utf8_encode_x(us, start, end,
 		       s, dstart, -1,
-		       NULL, NULL, utf16);
+		       NULL, NULL, utf16, WIN_UTF16_AS_WTF16(utf16));
 }
 
 intptr_t scheme_utf8_encode_all(const unsigned int *us, intptr_t len, unsigned char *s)
 {
-  return utf8_encode_x(us, 0, len, s, 0, -1, NULL, NULL, 0 /* utf16 */);
+  return utf8_encode_x(us, 0, len, s, 0, -1, NULL, NULL, 0 /* utf16 */, 0);
 }
 
 char *scheme_utf8_encode_to_buffer_len(const mzchar *s, intptr_t len,
@@ -5389,11 +5393,11 @@ char *scheme_utf8_encode_to_buffer_len(const mzchar *s, intptr_t len,
     }
   }
 
-  slen = utf8_encode_x(s, 0, len, NULL, 0, -1, NULL, NULL, 0);
+  slen = utf8_encode_x(s, 0, len, NULL, 0, -1, NULL, NULL, 0, 0);
   if (slen + 1 > blen) {
     buf = (char *)scheme_malloc_atomic(slen + 1);
   }
-  utf8_encode_x(s, 0, len, (unsigned char *)buf, 0, -1, NULL, NULL, 0);
+  utf8_encode_x(s, 0, len, (unsigned char *)buf, 0, -1, NULL, NULL, 0, 0);
   buf[slen] = 0;
   *_slen = slen;
   return buf;
diff --git a/racket/src/cs/schemified/io.scm b/racket/src/cs/schemified/io.scm
index 9102eb0158b..5a4d7a23d1f 100644
--- a/racket/src/cs/schemified/io.scm
+++ b/racket/src/cs/schemified/io.scm
@@ -14846,9 +14846,9 @@
            (if (let ((or-part_0 (eq? from_0 'utf-16)))
                  (if or-part_0
                    or-part_0
-                   (let ((or-part_1 (eq? from_0 'utf-16-ish)))
+                   (let ((or-part_1 (eq? from_0 'wtf-16)))
                      (if or-part_1 or-part_1 (eq? from_0 'utf-16-assume)))))
-             (let ((temp33_0 (eq? from_0 'utf-16-ish)))
+             (let ((temp33_0 (eq? from_0 'wtf-16)))
                (let ((temp34_0 (eq? from_0 'utf-16-assume)))
                  (utf-16-ish-reencode!.1
                   temp34_0
@@ -14861,19 +14861,17 @@
                   dest-end_0)))
              (let ((or-part_0 (eq? from_0 'utf-8-permissive)))
                (let ((temp41_0
-                      (if or-part_0
-                        or-part_0
-                        (eq? from_0 'utf-8-ish-permissive))))
+                      (if or-part_0 or-part_0 (eq? from_0 'wtf-8-permissive))))
                  (let ((temp42_0
-                        (let ((or-part_1 (eq? from_0 'utf-8-ish)))
+                        (let ((or-part_1 (eq? from_0 'wtf-8)))
                           (if or-part_1
                             or-part_1
-                            (eq? from_0 'utf-8-ish-permissive)))))
+                            (eq? from_0 'wtf-8-permissive)))))
                    (let ((temp43_0
                           (let ((or-part_1 (eq? to_0 'utf-16)))
                             (if or-part_1
                               or-part_1
-                              (let ((or-part_2 (eq? to_0 'utf-16-ish)))
+                              (let ((or-part_2 (eq? to_0 'wtf-16)))
                                 (if or-part_2
                                   or-part_2
                                   (eq? to_0 'utf-16-assume)))))))
@@ -14902,7 +14900,7 @@
 (define utf-8-ish-reencode!.1
   (|#%name|
    utf-8-ish-reencode!
-   (lambda (from-utf-8-ish?3_0
+   (lambda (from-wtf-8?3_0
             permissive?2_0
             to-utf-16?4_0
             in-bstr8_0
@@ -15111,7 +15109,7 @@
                                        (if (if (>= next-accum_0 55296)
                                              (<= next-accum_0 57343)
                                              #f)
-                                         (if from-utf-8-ish?3_0
+                                         (if from-wtf-8?3_0
                                            (if (= i_0 in-end10_0)
                                              (let ((app_0
                                                     (- base-i_0 in-start9_0)))
@@ -15439,7 +15437,7 @@
   (|#%name|
    utf-16-ish-reencode!
    (lambda (assume-paired-surrogates?16_0
-            from-utf-16-ish?15_0
+            from-wtf-16?15_0
             in-bstr19_0
             in-start20_0
             in-end21_0
@@ -15693,10 +15691,10 @@
                                                        v2_0
                                                        1023))))))
                                              (continue_0 v3_0 (+ i_0 4)))
-                                           (if from-utf-16-ish?15_0
+                                           (if from-wtf-16?15_0
                                              (continue_0 v_0 (+ i_0 2))
                                              (done_0 'error)))))))
-                                 (if from-utf-16-ish?15_0
+                                 (if from-wtf-16?15_0
                                    (continue_0 v_0 (+ i_0 2))
                                    (done_0 'error)))
                                (continue_0 v_0 (+ i_0 2)))))))))))))))
@@ -15806,10 +15804,10 @@
          'bytes-converter
          'custodian-reference))))))
 (define windows? (eq? 'windows (system-type)))
-(define platform-utf-8 (if windows? 'utf-8-ish 'utf-8))
+(define platform-utf-8 (if windows? 'wtf-8 'utf-8))
 (define platform-utf-8-permissive
-  (if windows? 'utf-8-ish-permissive 'utf-8-permissive))
-(define platform-utf-16 (if windows? 'utf-16-ish 'utf-16-assume))
+  (if windows? 'wtf-8-permissive 'utf-8-permissive))
+(define platform-utf-16 (if windows? 'wtf-16 'utf-16-assume))
 (define bytes-open-converter-in-custodian
   (lambda (who_0 cust_0 from-str_0 to-str_0)
     (begin
@@ -15843,23 +15841,21 @@
                 (bytes-converter1.1
                  (utf-8-converter1.1 platform-utf-16 platform-utf-8)
                  #f)
-                (if (if (string=? from-str_0 "UTF-8-ish")
-                      (string=? to-str_0 "UTF-16-ish")
+                (if (if (string=? from-str_0 "WTF-8")
+                      (string=? to-str_0 "WTF-16")
                       #f)
-                  (bytes-converter1.1
-                   (utf-8-converter1.1 'utf-8-ish 'utf-16-ish)
-                   #f)
-                  (if (if (string=? from-str_0 "UTF-8-ish-permissive")
-                        (string=? to-str_0 "UTF-16-ish")
+                  (bytes-converter1.1 (utf-8-converter1.1 'wtf-8 'wtf-16) #f)
+                  (if (if (string=? from-str_0 "WTF-8-permissive")
+                        (string=? to-str_0 "WTF-16")
                         #f)
                     (bytes-converter1.1
-                     (utf-8-converter1.1 'utf-8-ish-permissive 'utf-16-ish)
+                     (utf-8-converter1.1 'wtf-8-permissive 'wtf-16)
                      #f)
-                    (if (if (string=? from-str_0 "UTF-16-ish")
-                          (string=? to-str_0 "UTF-8-ish")
+                    (if (if (string=? from-str_0 "WTF-16")
+                          (string=? to-str_0 "WTF-8")
                           #f)
                       (bytes-converter1.1
-                       (utf-8-converter1.1 'utf-16-ish 'utf-8-ish)
+                       (utf-8-converter1.1 'wtf-16 'wtf-8)
                        #f)
                       (if (if (let ((or-part_0
                                      (if (string=? from-str_0 "UTF-8")
diff --git a/racket/src/io/converter/main.rkt b/racket/src/io/converter/main.rkt
index a56250789ce..b61259de954 100644
--- a/racket/src/io/converter/main.rkt
+++ b/racket/src/io/converter/main.rkt
@@ -24,9 +24,9 @@
 ;; intended for converting to and from arbitrary 16-byte sequences,
 ;; which is useful for encoding Windows paths.
 (define windows? (eq? 'windows (system-type)))
-(define platform-utf-8 (if windows? 'utf-8-ish 'utf-8))
-(define platform-utf-8-permissive (if windows? 'utf-8-ish-permissive 'utf-8-permissive))
-(define platform-utf-16 (if windows? 'utf-16-ish 'utf-16-assume))
+(define platform-utf-8 (if windows? 'wtf-8 'utf-8))
+(define platform-utf-8-permissive (if windows? 'wtf-8-permissive 'utf-8-permissive))
+(define platform-utf-16 (if windows? 'wtf-16 'utf-16-assume))
 
 (define (bytes-open-converter-in-custodian who cust from-str to-str)
   (check who string? from-str)
@@ -47,17 +47,18 @@
     [(and (string=? from-str "platform-UTF-16") (string=? to-str "platform-UTF-8"))
      (bytes-converter (utf-8-converter platform-utf-16 platform-utf-8)
                       #f)]
-    ;; "UTF-8-ish" is also known as "WTF-8".
-    ;; "UTF-16-ish" is similar to UTF-16, but allows unpaired surrogates --- which is still
+    ;; WTF-16 is similar to UTF-16, but allows unpaired surrogates --- which is still
     ;; different from UCS-2, since paired surrogates are decoded as in UTF-16.
-    [(and (string=? from-str "UTF-8-ish") (string=? to-str "UTF-16-ish"))
-     (bytes-converter (utf-8-converter 'utf-8-ish 'utf-16-ish)
+    ;; WTF-8 is the analogous extension of UTF-8, where a surrogate pair encoded
+    ;; as a sequence of unpaired surrogates is specifically disallowed.
+    [(and (string=? from-str "WTF-8") (string=? to-str "WTF-16"))
+     (bytes-converter (utf-8-converter 'wtf-8 'wtf-16)
                       #f)]
-    [(and (string=? from-str "UTF-8-ish-permissive") (string=? to-str "UTF-16-ish"))
-     (bytes-converter (utf-8-converter 'utf-8-ish-permissive 'utf-16-ish)
+    [(and (string=? from-str "WTF-8-permissive") (string=? to-str "WTF-16"))
+     (bytes-converter (utf-8-converter 'wtf-8-permissive 'wtf-16)
                       #f)]
-    [(and (string=? from-str "UTF-16-ish") (string=? to-str "UTF-8-ish"))
-     (bytes-converter (utf-8-converter 'utf-16-ish 'utf-8-ish)
+    [(and (string=? from-str "WTF-16") (string=? to-str "WTF-8"))
+     (bytes-converter (utf-8-converter 'wtf-16 'wtf-8)
                       #f)]
     [(and (or (and (string=? from-str "UTF-8") (string=? to-str ""))
               (and (string=? from-str "") (string=? to-str "UTF-8")))
diff --git a/racket/src/io/converter/utf-8.rkt b/racket/src/io/converter/utf-8.rkt
index bc09ce2916d..22fefc352e1 100644
--- a/racket/src/io/converter/utf-8.rkt
+++ b/racket/src/io/converter/utf-8.rkt
@@ -19,20 +19,20 @@
   (define to (utf-8-converter-to c))
   (define-values (in-consumed out-produced status)
     (if (or (eq? from 'utf-16)
-            (eq? from 'utf-16-ish)
+            (eq? from 'wtf-16)
             (eq? from 'utf-16-assume))
         (utf-16-ish-reencode! src src-start src-end
                               dest dest-start dest-end
-                              #:from-utf-16-ish? (eq? from 'utf-16-ish)
+                              #:from-wtf-16? (eq? from 'wtf-16)
                               #:assume-paired-surrogates? (eq? from 'utf-16-assume))
         (utf-8-ish-reencode! src src-start src-end
                              dest dest-start dest-end
                              #:permissive? (or (eq? from 'utf-8-permissive)
-                                               (eq? from 'utf-8-ish-permissive))
-                             #:from-utf-8-ish? (or (eq? from 'utf-8-ish)
-                                                   (eq? from 'utf-8-ish-permissive))
+                                               (eq? from 'wtf-8-permissive))
+                             #:from-wtf-8? (or (eq? from 'wtf-8)
+                                                   (eq? from 'wtf-8-permissive))
                              #:to-utf-16? (or (eq? to 'utf-16)
-                                              (eq? to 'utf-16-ish)
+                                              (eq? to 'wtf-16)
                                               (eq? to 'utf-16-assume)))))
   (values in-consumed
           out-produced
@@ -44,17 +44,17 @@
 
 ;; Similar to `utf-8-decode` in "../string/utf-8-decode.rkt", but
 ;; "decodes" back to a byte string either as UTF-8 or UTF-16, and also
-;; supports a "utf-8-ish" encoding that allows unpaired surrogates.
+;; supports a WTF-8 encoding that allows unpaired surrogates.
 ;;
 ;; There's a lot of similarly to the implementation of `utf-8-decode`,
 ;; but with enough differences to make abstraction difficult.
 (define (utf-8-ish-reencode! in-bstr in-start in-end
                              out-bstr out-start out-end
                              #:permissive? permissive?
-                             #:from-utf-8-ish? from-utf-8-ish?
+                             #:from-wtf-8? from-wtf-8?
                              #:to-utf-16? to-utf-16?)
   (let loop ([i in-start] [j out-start] [base-i in-start] [accum 0] [remaining 0]
-                          ;; for '-ish' mode to UTF-16:
+                          ;; for WTF-8 mode to WTF-16:
                           [pending-surrogate #f])
 
     ;; Used to write a pending surrogate before continuing to write other:
@@ -196,7 +196,7 @@
                  [(and (v . >= . #xD800)
                        (v . <= . #xDFFF))
                   (cond
-                    [from-utf-8-ish?
+                    [from-wtf-8?
                      ;; Assuming `to-utf-16?`...
                      ;; Allow an unpaired surrogate, but make sure it's really unpaired
                      (cond
@@ -268,7 +268,7 @@
                                   (- j out-start)
                                   'continues)]))]
                     [else
-                     ;; For UTF-8-to-UTF-8 with no "-ish" corrections, we can just copy
+                     ;; For UTF-8-to-UTF-8 (no WTF-8), we can just copy
                      ;; the input encoding bytes to the output bytes
                      (define next-i (add1 i))
                      (let loop ([from-i base-i] [to-j j])
@@ -329,7 +329,7 @@
 ;; Converts UTF-16 into UTF-8
 (define (utf-16-ish-reencode! in-bstr in-start in-end
                               out-bstr out-start out-end
-                              #:from-utf-16-ish? from-utf-16-ish?
+                              #:from-wtf-16? from-wtf-16?
                               #:assume-paired-surrogates? assume-paired-surrogates?)
   (let loop ([i in-start] [j out-start])
     (define (done status)
@@ -378,7 +378,7 @@
                                  (bitwise-ior (arithmetic-shift (bitwise-and v #x3FF) 10)
                                               (bitwise-and v2 #x3FF))))
                    (continue v3 (+ i 4))]
-                  [from-utf-16-ish?
+                  [from-wtf-16?
                    ;; continue anyway as as unpaired surrogate
                    (continue v (+ i 2))]
                   [else
@@ -386,7 +386,7 @@
             [else
              ;; unpaired surrogate
              (cond
-               [from-utf-16-ish?
+               [from-wtf-16?
                 ;; continue anyway
                 (continue v (+ i 2))]
                [else (done 'error)])])]
diff --git a/racket/src/io/demo.rkt b/racket/src/io/demo.rkt
index 8c784127d41..7c33decef89 100644
--- a/racket/src/io/demo.rkt
+++ b/racket/src/io/demo.rkt
@@ -574,7 +574,7 @@
         (call-with-values (lambda () (bytes-convert c #"\360\220\220\200")) list))
   (test (void) (bytes-close-converter c)))
 
-(let ([c (bytes-open-converter "UTF-8-ish" "UTF-16-ish")])
+(let ([c (bytes-open-converter "WTF-8" "WTF-16")])
   (test `(,(reorder #"A\0\200\0") 3 complete)
         (call-with-values (lambda () (bytes-convert c #"A\302\200")) list))
   (test `(,(reorder #"A\0") 1 error)
@@ -586,7 +586,6 @@
   (test `(,(reorder #"\1\334") 3 complete)
         (call-with-values (lambda () (bytes-convert c #"\355\260\201")) list))
   ;; surrogate pair where each is separately encoded, high before low
-  (log-error "here")
   (test `(,(reorder #"") 0 error)
         (call-with-values (lambda () (bytes-convert c #"\355\240\200\355\260\201")) list))
   ;; surrogate pair where each is separately encoded, low before high
@@ -599,7 +598,7 @@
         (call-with-values (lambda () (bytes-convert c #"\360\220\220\200")) list))
   (test (void) (bytes-close-converter c)))
 
-(let ([c (bytes-open-converter "UTF-16-ish" "UTF-8-ish")])
+(let ([c (bytes-open-converter "WTF-16" "WTF-8")])
   (test `(#"A\302\200" 4 complete)
         (call-with-values (lambda () (bytes-convert c (reorder #"A\0\200\0"))) list))
   ;; unpaired high surrogate