-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEXPRISK.G
121 lines (121 loc) · 6.17 KB
/
EXPRISK.G
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/* Test program (see also EXPREG.TST): output file = exprisk.out reset;
let y = 8 5 22 16; let n = 106 120 98 85; _clevel = .9;
let x[4,2] = 1 0 0 0 1 1 0 1; t2 = 1000000; let bnames = tolbutam agegt55;
let yname = death; call exprisk(x,y,n,0,1,0,bnames,yname);
call expriskp(x,y,n,0,0,t2,0,1,0,bnames,yname,0);
call exprisk(x~prodr(x),y,n,0,1,0,bnames|"tol*age",yname);
call expriskp(x~prodr(x),y,n,0,0,t2,0,1,0,bnames|"TOL*AGE",yname,0); end; */
/* This procedure does ML exponential risk regression.
Inputs: x = design matrix,
y = vector of case counts,
n = vector of totals (if scalar, all totals will be set to this n),
rep = vector of repetion counts (0 if none),
const = 1 if you wish the program to add a constant to x, 0 if not;
if the first j (>1) columns of x represent a complete set
of indicators for one covariate, const = j will result in
weighted-mean centering of the remaining cols(x)-j covariates,
where the weights are those obtained from the final iteration,
which "approximately diagonalizes" the covariance submatrix
of the j indicator-coefficient (intercept) estimates.
offset = vector of offsets (0 if no offsets),
bnames = vector of coefficient names
(set to 0 if no printed output is desired),
yname = scalar name of outcome (may be 0 if no printed output).
Outputs: b = coefficient estimates,
bcov = inverse expected-information covariance matrix,
bcovo = inverse observed-information covariance matrix,
dev = residual deviance for model,
rss = residual weighted sum of squares,
df = residual degrees of freedom.
Globals (may be set by user from calling program; otherwise defaults are used):
_binit = vector of initial values for b (default is weighted-least
squares estimates; if user-specified and constant is
requested, it must include a value for the constant as its
first element)
_bcriter = convergence criterion for b (default is .001)
_dcriter = convergence criterion for deviance (default is .1)
_maxit = maximum number of iterations (default is 30)
*/
proc (6) = exprisk(x,y,n,rep,const,offset,bnames,yname);
declare matrix _binit = 0; declare matrix _bcriter = .001;
declare matrix _dcriter = .01; declare matrix _maxit = 30;
local t0,neq0,nr,np,df,llsat,b,bold,bcov,
dev,devold,iter,cnv,eta,p,mu,w,bcovo,rss,mcc;
t0 = date; bnames = 0$+bnames; yname = 0$+yname;
if sumc(rep); y = y.*rep; n = n.*rep; endif;
/* Delete empty covariate patterns: */ neq0 = n .eq 0;
if sumc(neq0); x = delif(x,neq0); y = delif(y,neq0); n = delif(n,neq0);
if rows(offset) eq rows(neq0); offset = delif(offset,neq0); endif;
endif;
nr = rows(x);
if bnames[1]; print; format /rds 3,0;
" PROC EXPRISK.G: ML exponential risk regression for "$+yname$+".";
if rows(bnames) ne cols(x);
"INPUT ERROR: No. names not equal to no. of regressors."; end;
endif;
format 5,0; sumc(n);; " total count,";; 2*nr;; " fitted cells,";;
" and ";; format 5,2; sumc(n)/(2*nr);; " average count.";
endif;
if const eq 1; /* add constant: */ x = ones(nr,1)~x; endif;
/* No. parameters: */ np = cols(x);
if rank(x) lt np; "DESIGN MATRIX RANK DEFICIENT IN EXPRISK.G";
retp(0,0,0,0,0); endif;
/* residual degrees of freedom: */ df = nr - np;
if rows(n) eq 1; n = n*ones(nr,1); endif;
/* Saturated loglikelihood + n'ln(n) - sumc(ln(combin(n,y))): */
llsat = y'ln(y./n + (y.==0)) + (n - y)'ln(1 - y./n + (y.==n));
/* Initialize beta, deviance, convergence indicator, counter: */
if rows(_binit) eq np; b = _binit;
else; /* start with WLS estimates: */
p = (y + sumc(y)/sumc(n))./(n + 1);
w = n.*p./(1-p);
trap 1; bcov = invpd(moment(sqrt(w).*x,0));
if scalerr(bcov); "SINGULARITY INITIALIZING EXPRISK.G";
retp(0,0,0,0,0); endif; trap 0;
b = bcov*(x'(w.*(ln(p)-offset)));
endif;
dev = 0; cnv = 0; iter = 0;
/* Begin iterative reweighted LS estimation */
do until cnv or (iter ge _maxit); iter = iter + 1;
/* linear predictors: */ eta = offset + x*b;
/* expected proportions: */ p = exp(eta);
/* expected counts: */ mu = n.*p;
devold = dev; dev = 2*(llsat - (y'eta + (n - y)'ln(1 - p + (p .ge 1))));
if bnames[1]; "Deviance at iteration ";;
format /rds 4,0; iter;; ":";; format 12,3; dev; endif;
/* weight vector: */ w = mu./(1-p);
trap 1; bcov = invpd(moment(sqrt(w).*x,0));
if scalerr(bcov); "SINGULARITY IN EXPRISK.G";
retp(0,0,0,0,0); endif; trap 0;
/* Gauss-Newton step: */ bold = b; b = b + bcov*(x'((y-mu)./(1-p)));
cnv = prodc(abs(b - bold) .lt _bcriter) and (abs(devold - dev) lt _dcriter);
endo;
if iter ge _maxit;
"WARNING: No convergence after ";; format 4,0; iter;; " iterations";
endif;
/* weighted sum of squared residuals: */ rss = (y-mu)'((y-mu)./(mu.*(1-p)));
if const gt 1 and np gt const; local k; k = seqa(const+1,1,np-const);
/* center all covariates after the first j, then recompute bcov & b: */
x[.,k] = x[.,k] - (w/sumc(w))'x[.,k];
bcov = invpd(moment(sqrt(w).*x,0)); b = bcov*(x'(w.*(eta + y./mu - 1)));
endif;
/* invert observed information: */ bcovo = invpd(x'(((1 + (p - y./n)).*w).*x));
if bnames[1]; format 10,4;
"Number of observations with nonzero weight:";; format 10,0; nr;
"Number of coefficients :";; np;
"With standard errors from inverse of expected information matrix:";
rreport(b,sqrt(diag(bcov)),bnames,yname,-1,-1,df,1);
"With standard errors from inverse of observed information matrix:";
mcc = meanc(y)|meanc(n-y);
rreport(b,sqrt(diag(bcovo)),bnames,yname,dev,rss,df,1+(minc(mcc) ge 3));
"The mean case and mean noncase counts are ";; format 6,1; mcc';
if minc(mcc) lt 5;
"The minimum of the mean case & mean noncase counts is less than 5.";
if minc(mcc) ge 3;
"Hence the above deviance and rss tests of fit may be invalid.";
endif;
endif;
"Total run time: ";; etstr(ethsec(t0,date));
endif;
retp(b,bcov,bcovo,dev,rss,df);
endp;