diff --git a/doc/.vuepress/config.js b/doc/.vuepress/config.js index 0cda87317..250fd10e2 100644 --- a/doc/.vuepress/config.js +++ b/doc/.vuepress/config.js @@ -149,6 +149,7 @@ module.exports = { path: '/reference/std/misc/', children: [ 'misc/', + 'misc/decimal', 'misc/list', 'misc/list-builder', 'misc/alist', diff --git a/doc/reference/std/misc/decimal.md b/doc/reference/std/misc/decimal.md new file mode 100644 index 000000000..1069582d3 --- /dev/null +++ b/doc/reference/std/misc/decimal.md @@ -0,0 +1,235 @@ +# Decimal Numbers + +The `:std/misc/decimal` library provides support for +arbitrary-precision decimal numbers and conversion between them and strings. +This can notably be important for handling financial data without losing precision. + +::: tip To use bindings from this module +``` Scheme +(import :std/misc/decimal) +``` +::: + +Decimal numbers are "just" a subset of rational numbers, and so +all arithmetic operations are already implemented by Gerbil Scheme's +underlying generic number arithmetics (itself handled by Gambit Scheme). +The main remaining issue then is parsing and printing, +conversion to and from strings, since going through +regular floating point parsing and printing will drop decimals. + +Acknowledgements: The code was written with +inspiration from two Common Lisp libraries, +QUUX (see the [snapshot at QITAB](https://qitab.common-lisp.dev/) and +[wu-decimal](https://github.com/Wukix/wu-decimal), +with its own design and implementation improvements. + +## decimal? +``` Scheme +(decimal? x) -> bool +``` + +Given any value `x`, return true if that object is a decimal number, +i.e. a rational number that is not a floating-point number. + +::: tip Examples: +``` Scheme +> (decimal? 13/10) +#t +> (decimal? 1.3) +#f +> (decimal? 13/125) +#t +> (decimal? 'foo) +#f +``` +::: + +## parse-decimal +``` Scheme +(parse-decimal + input + sign-allowed?: (sign-allowed? #t) + decimal-mark: (decimal-mark #\.) + group-separator: (group-separator_ #f) + exponent-allowed: (exponent-allowed_ #f)) -> decimal +``` + +`parse-decimal` expects and parses a decimal number on an `input`, +with the options specifed via keyword arguments. + +The `input` will be cast to a `BufferedStringReader` using +[`open-buffered-string-reader`](../stdio.md#open-buffered-string-reader). +`parse-decimal` will then side-effect this reader as it parses, +and finally return the decimal number, +or raises a `parse-error` (from `:std/parser/base`). + +The keyword arguments `decimal-mark` and `group-separator` are each a character or false, +and specify optional allowed decimal mark and group separator characters, +to support for different (typically cultural) numerical conventions. +For convenience, `group-separator` can also be `#t`, designating the comma character `#\,`. +These two arguments cannot designate the same character. +If `decimal-mark` is false, then and you can only parse integer numbers before the exponent, +and so can only parse integers if `exponent-allowed` is false (the default), +though you can still use exponents if allowed to denote a fractional number (a weird use case). + +The boolean `sign-allowed` controls whether a `+` or `-` sign is accepted or must be omitted. + +`exponent-allowed` is a boolean or a string controlling exponent notation. +Exponent notation follows the syntax for Scheme floats, +with the exception that the exponent marker must be `#\e` or `#\E` when `exponent-allowed` is `#t`, +or the exponent marker must be `char=` to some element of `exponent-allowed` +when `exponent-allowed` is a string. +Exponents, when allowed, can always be signed. + +It is up to the caller to provide an actual `BufferedStringReader` +and process any leading or trailing whitespace and check for `#!eof` +before and/or after calling `parse-decimal`. + +`: PeekableStringReader sign-allowed?:Bool decimal-mark:Char group-separator:(Or Char Bool) exponent-allowed:(or Bool String) -> Decimal` + +You may use utilities from [:std/text/basic-parsers](../text/basic-parsers) +to parse decimals as part of something bigger, or just use `string->decimal` below. + +## string->decimal +``` Scheme +(string->decimal s + sign-allowed?: (sign-allowed? #t) + decimal-mark: (decimal-mark #\.) + group-separator: (group-separator #f) + exponent-allowed: (exponent-allowed #f) + allow-leading-whitespace?: (allow-leading-whitespace? #f) + allow-trailing-whitespace?: (allow-trailing-whitespace? #f) + start: (start 0) + end: (end #f)) +``` +Parse a decimal number from given string `s`. + +The `start` and `end` arguments specify which slice of the string to use +as an interval `[start, end)` of the string indexes +(if `end` is unspecified or false, it designates the length of the string). + +The `sign-allowed?`, `decimal-mark`, `group-separator` and `exponent-allowed` +arguments are as per `parse-decimal`. + +The `allow-leading-whitespace?` (respectively `allow-trailing-whitespace?`) +arguments specify whether whitespace is allowed to be parsed and skipped +before (respectively after) the decimal number as part of the string: + - if the value is `#f` (the default) then no whitespace is allowed + before or after the decimal; + - if the value is `#t`, then any of the strict whitespaces + (space, tab, newline, return) is accepted; + - if the value is a procedure, then this procedure is assumed to be + a unary predicate accepting any character or the `#!eof` marker and + returning true if its argument is a character considered whitespace + for the purpose of skipping before to parse a decimal + (include digits and signs in the list at your peril); + see [:std/text/char-set](../text/char-set.md) + for other whitespace predicates. + +## write-decimal +``` Scheme +(write-decimal number (port (current-output-port)) + scale: (scale #f) + width: (width #f) + integral-digits: (integral-digits #f) + fractional-digits: (fractional-digits #f) + pad: (pad_ #f) + always-decimal?: (always-decimal? #f) + always-sign?: (always-sign? #f) + decimal-mark: (decimal-mark #\.) + precision-loss-behavior: (precision-loss-behavior 'error)) +``` + +Write a decimal `number` to the specified `port` with the given keyword options. + +The `port` is designated as per [`with-output`](../ports.md#with-output). + +The keyword options are as follow: + - An integer `scale` (or `#f` meaning `0`, the default), such that + the number actually printed is the argument `number` notionally multiplied + by ten to that `scale` (default `#f`). + - A natural integer `width` within which to fit the number + or `#f` (the default) for no limitation. + - A minimum number of `integral-digits` to display + or `#f` (the default, same as `0`) for no minimum; + the minimum can notably be 1 to force `#\0` to be printed + before a decimal point even if there are already fractional digits after. + - A minimum number of `fractional-digits` to display + or `#f` (the default, same as `0`) for no minimum; + the minimum can notably be 1 to force `#\0` to be printed + after a decimal point even if there are already integral digits before. + - A character `pad` to print when left-padding for desired width + or `#f` (the default, same as `#\space`); + - A boolean `always-decimal?` (defaults to `#f`) for whether + a decimal mark will always be printed even for integers. + - A boolean `always-sign?` (defaults to `#f`) for whether + a sign will always be printed even for positive numbers. + - A character `decimal-mark` to use as the decimal mark. + - A symbol `precision-loss-behavior` for the behavior on precision loss, + one of `error` (the default), `truncate` or `round`, + in case the digits cannot fit in the space. + +Note that even if `precision-loss-behavior` is `truncate` or `round`, +`write-decimal` may throw an error if the integral part of the `number` is +too large to fit within the given width. + +## decimal->string +``` Scheme +(decimal->string number + scale: (scale #f) width: (width #f) + integral-digits: (integral-digits #f) fractional-digits: (fractional-digits #f) + pad: (pad #f) always-decimal?: (always-decimal? #f) always-sign?: (always-sign? #f) + decimal-mark: (decimal-mark #\.) + precision-loss-behavior: (precision-loss-behavior 'error)) +``` + +`decimal->string` converts a decimal `number` to a string by +calling `write-decimal` with the same number and options. + +## LossOfPrecision LossOfPrecision? + +An error class and its recognizer predicate, for the sake of handling cases when +printing a decimal number results in loss of precision. + +## power-of-5 +``` Scheme +(power-of-5 x) -> nat or false +``` +If `x` is an exact integer that is the `n`th power of 5, return `n`, +otherwise return false. + +## find-decimal-multiplier +``` Scheme +(find-decimal-multiplier d) -> (values integer integer) +``` +Given a positive integer `d`, the reduced denominator of a decimal number, +thus a number of the form `2**m * 5**n`, +compute `c` such that `c*d = c*(2^m*5^n) = 10^max(m,n)` +and returns the two values `c` and `max(m,n)`, +respectively the multiplier required to make the denominator a power of 10, +and which power of 10 you will thus have reached. + +## count-significant-digits +``` Scheme +(count-significant-digits n) -> nat +``` + +Count the number of significant digits to represent the natural integer `n`. + +Exception: for `0`, return `0`, which defies convention for writing integers, +but is the right thing in the context of figuring out how many decimals to use + +## decimal->digits-exponent +``` Scheme +(decimal->digits-exponent decimal) -> (values integer integer) +``` +Given a decimal number `decimal`, return two values: + - the absolute smallest integer with all its non-zero digits, of same sign as decimal. + - the power of ten by which the decimal had to be multiplied to get this integer + (can be positive, zero or negative). + +## digits-exponent->decimal +``` Scheme +(digits-exponent->decimal digits exponent) -> decimal +``` +Given an integer `digits` and an `exponent`, multiply `digits` by 10 to the given power. diff --git a/src/std/build-spec.ss b/src/std/build-spec.ss index 4667f6ec0..004805b57 100644 --- a/src/std/build-spec.ss +++ b/src/std/build-spec.ss @@ -119,7 +119,7 @@ (gxc: "srfi/srfi-135/macros" (extra-inputs: ("srfi/srfi-135/macros.scm"))) (gxc: "srfi/srfi-135/text" (extra-inputs: ("srfi/srfi-135/text.scm"))) "srfi/135" - (gxc: "srfi/141" (extra-inputs: ("srfi/srfi-141.scm"))) + (gxc: "srfi/141") (gxc: "srfi/143" (extra-inputs: ("srfi/srfi-143/carries.scm"))) (gxc: "srfi/144" (extra-inputs: ("srfi/srfi-144/144.constants.scm" "srfi/srfi-144/144.body0.scm" "srfi/srfi-144/144.body.scm" "srfi/srfi-144/144.special.scm"))) diff --git a/src/std/misc/decimal-test.ss b/src/std/misc/decimal-test.ss index beb901e4e..72b7b62bd 100644 --- a/src/std/misc/decimal-test.ss +++ b/src/std/misc/decimal-test.ss @@ -1,63 +1,117 @@ (export decimal-test) (import - :std/srfi/13 - :std/iter :std/misc/decimal - :std/misc/string - :std/parser/base - :std/test) + (only-in :std/iter for in-range) + (only-in :std/parser/base parse-error?) + (only-in :std/error ContractViolation?) + (only-in :std/sugar defrule) + (only-in :std/text/char-set char-ascii-alphabetic?) + (only-in :std/test test-suite test-case check check-exception)) (def decimal-test (test-suite "test suite for std/misc/decimal" + (test-case "power-of-5" + (for (i (in-range 1000)) + (check (power-of-5 (expt 5 i)) => i) + (check (power-of-5 (1- (expt 5 i))) => #f) + (check (power-of-5 (1+ (expt 5 i))) => #f)) + (check (power-of-5 (* (expt 2 2000) (expt 5 880))) => #f) + (check (power-of-5 (* 256 (expt 5 4400))) => #f)) + (test-case "decimal?" + (defrule (checks res val ...) + (begin (check (decimal? val) => res) ...)) + (checks #t 0 1 -5 42 1/10 1/4 -1/5 3/125 1/1000) + (checks #f 0.0 'foo "1" -1.2 +inf.0 1/42 .001 #() (void) '())) (test-case "parse decimal" - (for-each - (match <> ([r . a] (check-equal? (apply string->decimal a) r))) - '((0 "0") - (0 "0.0") - (0 "0.") - (0 ".0") - (157/50 "3.14") - (317000 "317000") - (3170000 "3.17e6" exponent-allowed: #t) - (317/100000000 "3.17e-6" exponent-allowed: #t) - (-317/100000000 "-3.17e-6" exponent-allowed: #t)))) + (defrule (checks (res str . opts) ...) + (begin (check (string->decimal str . opts) => res) ...)) + (checks + (0 "0") + (0 "0.0") + (0 "0.") + (0 ".0") + ((/ (expt 10 100)) (string-append "." (make-string 99 #\0) "1")) + ((/ (expt 10 1000)) (string-append "." (make-string 999 #\0) "1")) + (157/50 "3.14") + (157/50 "+3.14") + (-157/50 "-3.14") + (317000 "317000") + (1234/1000 "1,234" decimal-mark: #\,) + (1234567890 "1,234,567,890" group-separator: #t) + (1234567890 "1.234.567.890" group-separator: #\. decimal-mark: #f) + (1234/1000 "1234e-3" decimal-mark: #f exponent-allowed: #t) + (1234567890123/1000 "1.234.567.890,123" group-separator: #\. decimal-mark: #\,) + (3170000 "3.17e6" exponent-allowed: #t) + (317/100000000 "3.17e-6" exponent-allowed: #t) + (317/100000000 "3.17e-6" exponent-allowed: #t sign-allowed?: #f) + (-317/100000000 "-3.17e-6" exponent-allowed: #t) + (157/50 " 3.14 " allow-leading-whitespace?: #t allow-trailing-whitespace?: #t) + (157/50 "PI3.14JUNK" + allow-leading-whitespace?: char-ascii-alphabetic? + allow-trailing-whitespace?: char-ascii-alphabetic?))) (test-case "parse decimal error" - (for-each - (lambda (a) (check-exception (apply string->decimal a) parse-error?)) - '((".") - ("3.17e6") - ("3.17e" exponent-allowed: #t) - ("-3.17e-6" exponent-allowed: #t sign-allowed?: #f) - ("-")))) + (defrule (checks (err str . opts) ...) + (begin (check-exception (string->decimal str . opts) err) ...)) + (checks + (parse-error? ".") + (parse-error? "+3.17" sign-allowed?: #f) + (parse-error? "-3.17" sign-allowed?: #f) + (parse-error? " 1.0") + (parse-error? "1.0 ") + (parse-error? "1.234.567") + (parse-error? "1,234,567,890") + (ContractViolation? "1" group-separator: #\.) + (parse-error? "1,234,567,890" decimal-mark: #\,) + (parse-error? "1,234,567,890" group-separator: #f) + (parse-error? "3.17e6") ;; exponent not allowed by default + (parse-error? "3.17e" exponent-allowed: #t) + (parse-error? "-3.17e-6" exponent-allowed: #t sign-allowed?: #f) + (parse-error? "-"))) + (test-case "count-significant-digits" + (defrule (checks res val ...) (begin (check (count-significant-digits val) => res) ...)) + (checks 1 0 1 2 3 4 5 6 7 8 9) + (checks 2 10 11 42 69 98 99) + (checks 3 100 101 128 256 512 666 998 999) + (checks 4 1000 1001 1024 1729 8192 9998 9999) + (checks 5 10000 10001 55555 99999) + (for (i (in-range 1000)) + (unless (zero? i) (check (count-significant-digits (1- (expt 10 i))) => i)) + (check (count-significant-digits (expt 10 i)) => (1+ i)))) + (test-case "find-decimal-multiplier" + (defrule (checks (d c m) ...) + (begin (check (values->list (find-decimal-multiplier d)) => [c m]) ...)) + (checks (5 2 1) (2 5 1) (12500 8 5))) + (test-case "decimal->digits-exponent, digits-exponent->decimal" + (defrule (checks (n d e) ...) + (begin (check (values->list (decimal->digits-exponent n)) => [d e]) ... + (check (digits-exponent->decimal d e) => n) ...)) + (checks (0 0 0) (1 1 0) (1/1000 1 -3) (1000 1 3) + (314 314 0) (157/50 314 -2) (31400 314 2) + (123400000 1234 5) (1234/100000000 1234 -8))) (test-case "print decimal" - (for-each - (match <> ([r n . a] - (def x (apply decimal->string n a)) - (check-equal? x r))) - '(("0" 0) - ("3.14" 157/50) - ("42." 42 always-decimal?: #t) - ("1.0" 1 fractional-digits: 1) - ("007" 7 width: 3 pad: #\0) - ("10" 10 width: 2 pad: #\0) ;; <--- known bug; the original ported code always assumed a . - ("10" 10 integral-digits: 2) - ("0010" 10 integral-digits: 4) - ("+317000" 317000 always-sign?: #t) - ("-.00000317" -317/100000000 integral-digits: 0) - ("-0.00000317" -317/100000000 integral-digits: 1) - ("-.00000317" -317 scale: -8) - ("31.700" 317 scale: -1 fractional-digits: 3) - (".31" 317 scale: -3 width: 3 precision-loss-behavior: truncate) - (".32" 317 scale: -3 width: 3 precision-loss-behavior: round) - ("3.14" 314 scale: -2) - (" 3.140" 314 width: 20 scale: -2 fractional-digits: 3) - (" 3.14" 314/1000 width: 6 scale: 1 fractional-digits: 2) - (".314" 314/1000)))) + (defrule (checks (s d . a) ...) + (begin (check (decimal->string d . a) => s) ...)) + (checks + ("0" 0) + ("1000" 1000) + ("3.14" 157/50) + ("42." 42 always-decimal?: #t) + ("1.0" 1 fractional-digits: 1) + ("007" 7 width: 3 pad: #\0) + ("10" 10 width: 2 pad: #\0) ;; <--- known bug; the original ported code always assumed a . + ("10" 10 integral-digits: 2) + ("0010" 10 integral-digits: 4) + ("+317000" 317000 always-sign?: #t) + ("-.00000317" -317/100000000 integral-digits: 0) + ("-0.00000317" -317/100000000 integral-digits: 1) + ("-.00000317" -317 scale: -8) + ("31.700" 317 scale: -1 fractional-digits: 3) + (".31" 317 scale: -3 width: 3 precision-loss-behavior: 'truncate) + (".32" 317 scale: -3 width: 3 precision-loss-behavior: 'round) + ("3.14" 314 scale: -2) + (" 3.140" 314 width: 20 scale: -2 fractional-digits: 3) + (" 3.14" 314/1000 width: 6 scale: 1 fractional-digits: 2) + (".314" 314/1000))) (test-case "print decimal error" - (check-exception (decimal->string 317/100 width: 3) LossOfPrecision?)) - (test-case "power-of-5?" - (for (i (in-range 1000)) - (check (power-of-5? (expt 5 i)) => #t) - (check (power-of-5? (1- (expt 5 i))) => #f) - (check (power-of-5? (1+ (expt 5 i))) => #f))))) + (check-exception (decimal->string 317/100 width: 3) LossOfPrecision?)))) diff --git a/src/std/misc/decimal.ss b/src/std/misc/decimal.ss index 1b00b3d40..0a9e904b1 100644 --- a/src/std/misc/decimal.ss +++ b/src/std/misc/decimal.ss @@ -1,40 +1,52 @@ -(import - :gerbil/gambit - :std/srfi/141 - :std/error - :std/contract - :std/io - :std/iter - :std/misc/number - :std/parser/base - :std/sugar - :std/text/basic-parsers - :std/text/basic-printers - :std/text/char-set - :std/values) +(export + decimal? + parse-decimal + string->decimal + write-decimal + decimal->string + LossOfPrecision + LossOfPrecision? + power-of-5 + find-decimal-multiplier + count-significant-digits + decimal->digits-exponent + digits-exponent->decimal) -(export decimal? parse-decimal string->decimal write-decimal decimal->string - LossOfPrecision LossOfPrecision? - power-of-5? - find-decimal-multiplier - count-significant-digits nat->significant-digits - decimal->digits-exponent digits-exponent->decimal) +(import + (only-in :std/srfi/141 round/ truncate/ floor/) + (only-in :std/error check-argument raise-bad-argument + deferror-class raise/context exception-context) + (only-in :std/contract using) + (only-in :std/io PeekableStringReader open-buffered-string-reader + PeekableStringReader-read-char PeekableStringReader-peek-char) + (only-in :std/misc/number decrement! nat? integer-part + integer-log factor-out-powers factor-out-powers-of-2) + (only-in :std/misc/ports with-output) + (only-in :std/parser/base raise-parse-error) + (only-in :std/sugar syntax-eval) + (only-in :std/text/basic-parsers parse-and-skip-any-whitespace parse-eof) + (only-in :std/text/basic-printers write-n-chars) + (only-in :std/text/char-set digit-char char-ascii-digit char-strict-whitespace?) + (only-in :std/values first-value)) ;; : Any -> Bool (def (decimal? x) - (and (rational? x) - (power-of-5? (first-value (factor-out-powers-of-2 (denominator x)))))) + (or (exact-integer? x) + (and (##ratnum? x) + (power-of-5 (first-value (factor-out-powers-of-2 (denominator x)))) + #t))) -;; : Integer -> Bool -(def (power-of-5? n) +;; : Integer -> (OrFalse Nat) +(def (power-of-5 n) (and (exact-integer? n) (positive? n) - (if (< (integer-length n) 1024) ;; number small enough to be converter to double float + (if (< (integer-length n) 1024) ;; number small enough to be converted to double float (let (l (integer-part (round (log n 5)))) ;; no loss of precision below 440 - (= n (expt 5 l))) - (let*-values (((p) (expt 5 440)) ;; largest power of five under 2**1023 - ((l) (integer-log n p)) - ((q r) (floor/ n (expt p l)))) - (and (zero? r) (power-of-5? q)))))) ;; reduce to the simpler problem above + (and (= n (expt 5 l)) l)) + (let*-values (((p) (syntax-eval (expt 5 440))) ;; largest power of five under 2**1023 + ((q k) (factor-out-powers n p))) + (and (< q p) + (let (l (power-of-5 q)) + (and l (+ l (* 440 k))))))))) ;; `parse-decimal` expects and parses a decimal number on the PeekableStringReader. ;; The character parameters `decimal-mark` and `group-separator` provide @@ -58,7 +70,7 @@ exponent-allowed: (exponent-allowed_ #f)) (def reader (PeekableStringReader (open-buffered-string-reader pre-reader))) (check-argument (boolean? sign-allowed?) "boolean" sign-allowed?) - (check-argument (char? decimal-mark) "char" decimal-mark) + (check-argument (or (char? decimal-mark) (boolean? decimal-mark)) "char or boolean" decimal-mark) (check-argument (or (boolean? group-separator_) (char? group-separator_)) "boolean or char" group-separator_) (check-argument (or (boolean? exponent-allowed_) (string? exponent-allowed_)) @@ -176,12 +188,18 @@ end: (end_ #f)) (def l (string-length s)) (def end (or end_ l)) + (def (make-space? allow-whitespace?) + (cond + ((eq? allow-whitespace? #t) char-strict-whitespace?) + ((procedure? allow-whitespace?) allow-whitespace?) + (else (raise-bad-argument string->decimal "allow-*-whitespace? to be #t or a character predicate" + allow-whitespace?)))) (call-with-input-string (if (and (zero? start) (= end l)) s (substring s start end)) (lambda (port) (def reader (PeekableStringReader (open-buffered-string-reader port))) (when allow-leading-whitespace? - (parse-and-skip-any-whitespace reader)) + (parse-and-skip-any-whitespace reader (make-space? allow-leading-whitespace?))) (begin0 (parse-decimal reader sign-allowed?: sign-allowed? @@ -189,63 +207,57 @@ group-separator: group-separator exponent-allowed: exponent-allowed) (when allow-trailing-whitespace? - (parse-and-skip-any-whitespace reader)) + (parse-and-skip-any-whitespace reader (make-space? allow-trailing-whitespace?))) (parse-eof reader))))) -;; Given an integer d of the form 2^m*5^n (reduced denominator of a decimal number), +;; Given a positive integer d of the form 2^m*5^n (reduced denominator of a decimal number), ;; compute c such that c*d = c*(2^m*5^n) = 10^max(m,n). ;; Returns c and max(m,n). -;; : Nat -> Nat Nat +;; : Nat+ -> Nat+ Nat (def (find-decimal-multiplier d) (define-values (5^n m) (factor-out-powers-of-2 d)) - (def n (integer-log 5^n 5)) + (def n (power-of-5 5^n)) + (check-argument n "divisor of a power of 10" d) ;; We check that the answer is correct before returning it to the caller. - (check-argument (= d (* (arithmetic-shift 1 m) (expt 5 n))) "divisor of power of 10" d) (if (> m n) (values (expt 5 (- m n)) m) (values (arithmetic-shift 1 (- n m)) n))) ;; Count the number of significant digits to represent this natural integer. ;; For 0, return 0. -;; TODO: document and check the limit conditions of validity of the algorithm ;; : Nat -> Nat (def (count-significant-digits n) (check-argument (nat? n) "natural" n) - (if (zero? n) - 0 - (let (l0 (integer-part (log n 10))) - (let loop ((l l0) (a (quotient n (expt 10 l0)))) - (if (< a 1) l (loop (1+ l) (quotient a 10))))))) - -;; Converts an integer into a base 10 string. The sign is ignored. -;; For 0 becomes an empty string "" rather than "0". -;; : Nat -> String -(def (nat->significant-digits n) - (check-argument (nat? n) "natural" n) - (let* ((digit-count (count-significant-digits n)) - (str (make-string digit-count)) - (remainder 0)) - (let loop ((a n) - (i (1- digit-count))) - (if (negative? i) str - (let-values (((q r) (truncate/ a 10))) - (string-set! str i (digit-char r 10)) - (loop q (1- i))))))) + (cond + ((zero? n) 1) ;; special case: 0 requires 1 digit to display + ;; We'd like to use the below formula for small enough numbers, except that + ;; the floating point approximation is sometimes slightly off: + ;; for instance (log 1000 10) = 2.9999999999999996 instead of 3. + ;; TODO: use the formula as a heuristic then adjust by ±1 as needed, + ;; and it should be faster than integer-log + #;((< (integer-length n) 1024) (1+ (integer-part (log n 10)))) + (else (1+ (integer-log n 10))))) ;; Given a decimal number, return -;; - The absolute smallest integer with all its digits -;; - the non-negative power of ten by which the decimal had to be multiplied to get this integer. -;; Maybe we should find the valuation of the decimal in base 10, -;; and return the least, negative, number when appropriate? -;; : Decimal -> Integer Nat +;; - The absolute smallest integer with all its digits, excluding the all-zeros to the right and left +;; - the power of ten by which the decimal had to be multiplied to get this integer. +;; : Decimal -> Integer Integer (def (decimal->digits-exponent decimal) - (defvalues (c m) (find-decimal-multiplier (denominator decimal))) - (values (* (numerator decimal) c) m)) + (cond + ((zero? decimal) (values 0 0)) + ((exact-integer? decimal) + (let*-values (((r 2s) (factor-out-powers-of-2 decimal)) + ((_ 5s) (factor-out-powers r 5)) + ((m) (min 2s 5s))) + (values (/ decimal (expt 10 m)) m))) + (else + (let-values (((c m) (find-decimal-multiplier (denominator decimal)))) + (values (* (numerator decimal) c) (- m)))))) -;; From an integer number for the digits and an exponent for the negative powers of 10, +;; From an integer number for the digits and an exponent for a power of 10, ;; return a decimal number. (def (digits-exponent->decimal digits exponent) - (/ digits (expt 10 exponent))) + (* digits (expt 10 exponent))) ;; Attempted print operation would lose precision. See precision-loss-behavior. (deferror-class LossOfPrecision ()) @@ -270,24 +282,26 @@ ;; always-decimal?:Bool \ ;; decimal-mark:Char \ ;; precision-loss-behavior:(Enum error truncate round) -> String -(def (%decimal->digits n scale: (scale #f) width: (width #f) - integral-digits: (integral-digits #f) - fractional-digits: (fractional-digits #f) +(def (%decimal->digits n scale: (scale_ #f) width: (width #f) + integral-digits: (integral-digits_ #f) + fractional-digits: (fractional-digits_ #f) always-decimal?: (always-decimal? #f) decimal-mark: (decimal-mark #\.) precision-loss-behavior: (precision-loss-behavior 'error)) - (unless integral-digits (set! integral-digits 0)) - (unless fractional-digits (set! fractional-digits 0)) + (def scale (or scale_ 0)) + (def integral-digits (or integral-digits_ 0)) + (def fractional-digits (or fractional-digits_ 0)) (let/cc return + ;; special case: zero always shows a "0" even if no digits are explicitly required. (when (and (zero? n) (zero? integral-digits) (zero? fractional-digits)) - (return (if always-decimal? "0." "0"))) + (return (if always-decimal? (list->string [#\0 decimal-mark]) "0"))) ;; Integer with the significant digits, number of fractional digits in it (defvalues (all-digits denominator-power) (decimal->digits-exponent n)) ;; Total number of significant digits in number (def digit-count (count-significant-digits all-digits)) ;; Where is the decimal mark relative to the start of the significant digits (0: just before) - (def decimal-mark-index (- (+ digit-count (or scale 0)) denominator-power)) + (def decimal-mark-index (+ digit-count scale denominator-power)) ;; How many 0s to add in front of the digits for the integer part? (def integral-left-padding (max 0 (- integral-digits (max 0 decimal-mark-index)))) ;; How many digits to copy from all-digits for the integer part? @@ -323,7 +337,7 @@ (def (digits) ;; A string with the significant digits - (def significant-digits (nat->significant-digits all-digits)) + (def significant-digits (##exact-int->string all-digits)) ;; The target string (def string (make-string effective-length #\0)) (string-copy! string integral-left-padding significant-digits 0 integral-digits-copied) @@ -335,8 +349,8 @@ string) (def (disallowed-loss-of-precision) - (raise (LossOfPrecision "loss of precision" irritants: [n] - where: (exception-context disallowed-loss-of-precision)))) + (raise/context (LossOfPrecision "loss of precision" irritants: [n] + where: (exception-context disallowed-loss-of-precision)))) (cond ((>= 0 extra-width) @@ -394,25 +408,25 @@ always-sign?: (always-sign? #f) decimal-mark: (decimal-mark #\.) precision-loss-behavior: (precision-loss-behavior 'error)) - (def pad (or pad_ #\space)) - (def spaceleft width) - (when (and width (or always-sign? (> 0 number))) - (decrement! spaceleft)) - (def digits (%decimal->digits (abs number) scale: scale width: spaceleft - integral-digits: integral-digits - fractional-digits: fractional-digits - always-decimal?: always-decimal? - decimal-mark: decimal-mark - precision-loss-behavior: precision-loss-behavior)) - (when width - (decrement! spaceleft (string-length digits)) - (write-n-chars spaceleft pad port)) - (if (> 0 number) - (write-char #\- port) - (when always-sign? - (write-char #\+ port))) - (display digits port) - (void)) + (with-output (port) + (def pad (or pad_ #\space)) + (def spaceleft width) + (when (and width (or (negative? number) (and always-sign? (not (zero? number))))) + (decrement! spaceleft)) + (def digits (%decimal->digits (abs number) scale: scale width: spaceleft + integral-digits: integral-digits + fractional-digits: fractional-digits + always-decimal?: always-decimal? + decimal-mark: decimal-mark + precision-loss-behavior: precision-loss-behavior)) + (when width + (decrement! spaceleft (string-length digits)) + (write-n-chars spaceleft pad port)) + (cond + ((negative? number) (write-char #\- port)) + ((zero? number) (void)) + (always-sign? (write-char #\+ port))) + (display digits port))) ;; Given ;; - a decimal number, diff --git a/src/std/text/char-set.ss b/src/std/text/char-set.ss index 7b11a37b5..b2cc73b70 100644 --- a/src/std/text/char-set.ss +++ b/src/std/text/char-set.ss @@ -67,7 +67,7 @@ ;; Whitespace as defined by C, C++ and Python. ;; : Codepoint -> Bool (def-codepoint (ascii-whitespace? c) - (or (codepoint-ascii-whitespace? c) + (or (codepoint-strict-whitespace? c) (= c #x0B) ;; #\vtab (vertical tab) C'\v' (= c #x0C))) ;; #\page (page break, form feed) C'\f'