-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsummaries.rkt
129 lines (98 loc) · 3.67 KB
/
summaries.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#lang racket
(provide (all-defined-out))
(provide median mean stddev variance)
;;
;; summaries.rkt - some techniques for computing summary statistics
;;
(require plot)
(require math/base)
(require math/statistics)
;;
;; Summary Statistics, computed either from the posterior or from samples
;;
;; Intervals of defined boundaries, based on the grid posterior
;; compute the mass of probability that satisfies pred
(define (pr-mass-posterior pred? p-grid posterior)
(define relevant-mass
(for/list ([p p-grid]
[d posterior]
#:when (pred? p))
d))
(/ (sum relevant-mass)
(sum posterior)))
#;
(pr-mass-posterior (λ (p) (< p 0.5)) p-grid posterior)
;; Intervals of defined boundaries, based on the samples
;; compute the mass of probability that satisfies pred
(define (pr-mass-samples pred samples)
(/ (count pred samples)
(length samples)))
#;(pr-mass-samples (λ (p) (< p 0.5)) samples)
#;(pr-mass-samples (λ (p) (< 0.5 p 0.75)) samples)
;; DEPRECATED!: quantile is already defined in math/statistics.
;; produce the UPPER boundary of the lower q percent of probability mass
;; (the LOWER boundary is 0).
(define (my-quantile q < samples)
(let ([pos (inexact->exact (ceiling (* q (length samples))))])
(list-ref (sort samples <) pos)))
;; lower and upper boundaries of the *middle* q percent of probability mass
(define (compatibility-interval samples q)
(let* ([split (/ q 2)]
[lo (- 1/2 split)]
[hi (+ 1/2 split)])
(for/list ([p (list lo hi)])
(quantile p < samples)
#;(list p (quantile p < samples)))))
;; Highest Posterior Density (HPD) Interval
;; calculate the minimum interval that contains q percent of probability mass
(define (hpdi samples q)
;; WHY WHY use multiple return values?!?
(define-values (lo hi) (real-hpd-interval q samples))
(list lo hi))
;; calculate the loss (wrt loss function loss-fn) for point p-guess against
;; the grid posterior (p-grid,posterior)
(define (calculate-loss p-guess p-grid posterior loss-fn)
(/ (sum (map (λ (x y) (* (loss-fn p-guess x) y)) p-grid posterior))
(sum posterior)))
;; approximate a point estimate with respect to the given loss-function
;; for the approximate posterior (pgrid,posterior)
(define (point-estimate loss-fn p-grid posterior)
(argmin (λ (pg) (calculate-loss pg p-grid posterior loss-fn))
p-grid))
;; Example loss functions
;; absolute loss: corresponds to the mean (expected value) of the posterior
(define (absolute-loss pg p) (abs (- pg p)))
;; quadratic loss: corresponds to the median of the posterior
(define (quadratic-loss pg p) (sqr (- pg p)))
;; 0-1 loss: corresponds to *a* mode of the posterior (not necessarily unique)
(define (0-1-loss pg p) (if (= pg p) 0 1))
;;
;; Maximum a posterior (for a posterior function)
;;
;; Using nlopt (wrapper for NLopt library) to find map.
(module maximize racket
(require nlopt)
(provide maximize-posterior)
(define (maximize-posterior post)
(define-values
(fst snd)
(optimize/args post 1
#:maximize #t
#:bounds '((0.0 . 1.0))
#:method 'GN_DIRECT_L))
;; no idea what the first value is about!
snd)
) ; module maximize
(require (submod "." maximize))
;;
;; Sampling to simulate prediction
;;
;; code was very specialized...
;; Custom histogram plotting ('cause the plot density plotter
;; is sometimes weird)
(define (mk-hist-coords samples)
(let-values ([(x* y*) (count-samples samples)])
(let ([pre-coords (map vector x* y*)])
(sort pre-coords < #:key (λ (v) (vector-ref v 0))))))
(define (plot-simple-hist samples)
(plot (discrete-histogram (mk-hist-coords samples))))