-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
761 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
--- | ||
title: "CampaignEffects" | ||
author: "John Mount" | ||
date: "March 24, 2015" | ||
output: html_document | ||
--- | ||
|
||
```{r} | ||
source('functions.R') | ||
``` | ||
|
||
```{r} | ||
# use case 1: user enters two success probability estimates, two success valuations | ||
# and optionally overrides errorProbability, and relativeError | ||
# we compute an experiment plan and show a typical expected outcome | ||
tabExperiment <- data.frame( | ||
Label=c('campaign1','campaign2'), | ||
Probability=c(0.05,0.025), | ||
ValueSuccess=c(2,3)) | ||
errorProbability=0.05 | ||
relativeError=0.2 | ||
# end of user input | ||
tabExperiment$expectedValue <- tabExperiment$Probability*tabExperiment$ValueSuccess | ||
print(tabExperiment) | ||
sizes <- heuristicPowerPlan(tabExperiment) | ||
print(sizes) | ||
tabTypical <- typicalTable(tabExperiment,sizes) | ||
print(tabTypical) | ||
tabGraphs <- sampleGraph(tabExperiment,sizes) | ||
plotSample(tabGraphs) | ||
computeProbsGES(tabExperiment,tabGraphs) | ||
g2 = build2dProbGraph(tabGraphs,'campaign1','campaign2') | ||
ggplot() + | ||
geom_contour(data=g2,aes(x=v1,y=v2,z=d)) + | ||
xlab('campaign1') + ylab('campaign2') + | ||
geom_abline(slope=1) + coord_fixed() + expand_limits(x = 0, y = 0) | ||
``` | ||
|
||
|
||
|
||
|
||
```{r} | ||
# use case 2: user enters an experiment result | ||
tabValue <- data.frame(Label=c('Campaign$2','Campaign$4'), | ||
Actions=c(10000,10000), | ||
Successes=c(412,255), | ||
ValueSuccess=c(2,4)) | ||
# end of user input | ||
tabValue$Probability <- tabValue$Successes/tabValue$Actions | ||
tabValue$expectedValue <- tabValue$Probability*tabValue$ValueSuccess | ||
print(tabValue) | ||
valueGraphs <- posteriorGraph(tabValue) | ||
plotPosterior(valueGraphs) | ||
computeProbsGEP(tabValue,valueGraphs$graph) | ||
g = build2dProbGraph(valueGraphs$graph,'Campaign$2','Campaign$4') | ||
ggplot() + | ||
geom_contour(data=g,aes(x=v1,y=v2,z=d)) + | ||
xlab('Campaign$2') + ylab('Campaign$4') + | ||
geom_abline(slope=1) + coord_fixed() + expand_limits(x = 0, y = 0) | ||
``` | ||
|
||
|
||
```{r} | ||
# stacked example | ||
tabValue <- data.frame(Label=c('Campaign1','Campaign2'), | ||
Actions=c(10,10), | ||
Successes=c(1,1), | ||
ValueSuccess=c(1,1)) | ||
# end of user input | ||
tabValue$Probability <- tabValue$Successes/tabValue$Actions | ||
tabValue$expectedValue <- tabValue$Probability*tabValue$ValueSuccess | ||
print(tabValue) | ||
valueGraphs <- posteriorGraph(tabValue) | ||
plotPosterior(valueGraphs) | ||
computeProbsGEP(tabValue,valueGraphs$graph) | ||
g = build2dProbGraph(valueGraphs$graph,'Campaign1','Campaign2') | ||
ggplot() + | ||
geom_contour(data=g,aes(x=v1,y=v2,z=d)) + | ||
xlab('Campaign1') + ylab('Campaign2') + | ||
geom_abline(slope=1) + coord_fixed() + expand_limits(x = 0, y = 0) | ||
``` |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,251 @@ | ||
# libraries and functions | ||
#library('ggplot2') | ||
|
||
|
||
# compute approximate experiment sizes | ||
# see http://www.win-vector.com/blog/2013/12/sample-size-and-power-for-rare-events/ | ||
# valueTab: table with at least columns (Probability,ValueSuccess) | ||
heuristicPowerPlan <- function(valueTab,errorProbability=0.05,relativeError=0.2) { | ||
meanValues <- valueTab$Probability*valueTab$ValueSuccess | ||
big <- max(meanValues) | ||
slop <- pmin(relativeError*big/valueTab$ValueSuccess,1) | ||
ceiling(-log(errorProbability)*valueTab$Probability/(slop^2)) | ||
} | ||
|
||
|
||
# build a typical table (assume parameters are dead-on) | ||
# valueTab: table with at least columns (Label,Probability,ValueSuccess) | ||
typicalTable <- function(valueTab,sizes) { | ||
n <- nrow(valueTab) | ||
sizes <- round(sizes) | ||
Successes <- vapply(seq_len(n), | ||
function(i) {rbinom(1, | ||
size=sizes[i], | ||
prob=valueTab$Probability[i])}, | ||
numeric(1)) | ||
data.frame(Label=valueTab$Label, | ||
Actions=sizes, | ||
Successes=Successes, | ||
ValueSuccess=valueTab$ValueSuccess) | ||
} | ||
|
||
|
||
# compute probilities of observations given parameters | ||
# valueTab: table with at least columns (Label,Probability,ValueSuccess) | ||
sampleGraph <- function(tab,sizes,epsilon=1.0e-3) { | ||
sList <- lapply(seq_len(nrow(tab)), | ||
function(i) { | ||
lowWater <- max(0,floor(qbinom(epsilon, | ||
size=sizes[i], | ||
prob=tab$Probability[i]))) | ||
highWater <- min(sizes[i],ceiling(qbinom(1.0-epsilon, | ||
size=sizes[i], | ||
prob=tab$Probability[i]))) | ||
if(highWater-lowWater>500) { | ||
counts <- sort(unique(round(seq(lowWater,highWater,length.out=201)))) | ||
} else { | ||
counts <- lowWater:highWater | ||
} | ||
nc <- length(counts) | ||
# successes observed | ||
probs <- dbinom(counts, | ||
size=sizes[i], | ||
prob=tab$Probability[i]) | ||
data.frame(density=sizes[i]*probs/tab$ValueSuccess[i], | ||
value=counts*tab$ValueSuccess[i]/sizes[i], | ||
Label=tab$Label[i]) | ||
}) | ||
do.call('rbind',sList) | ||
} | ||
|
||
# given a single intensity plot possible observations | ||
# planTab: table with at least columns (Label,Probability,ValueSuccess) | ||
# p: sampleGraph(planTab) | ||
computeProbsGES <- function(planTab,p,computeLoss=FALSE) { | ||
# compute some odds | ||
planTab$e <- planTab$Probability*planTab$ValueSuccess | ||
deals <- sort(unique(p$Label)) | ||
combs <- combn(deals,2) | ||
pG <- c() | ||
for(j in seq_len(ncol(combs))) { | ||
d1 <- combs[1,j] | ||
d2 <- combs[2,j] | ||
g1 <- p[p$Label==d1,,drop=FALSE] | ||
g2 <- p[p$Label==d2,,drop=FALSE] | ||
e1 <- planTab$e[planTab$Label==d1] | ||
e2 <- planTab$e[planTab$Label==d2] | ||
oneBest <- e1>=e2 | ||
absE <- abs(e1-e2) | ||
totProb <- 0 | ||
totInd <- 0 | ||
totLoss <- 0 | ||
for(i1 in 1:nrow(g1)) { | ||
probii <- g1[i1,'density']*g2[,'density'] | ||
condii <- g1[i1,'value']>=g2[,'value'] | ||
# idea for loass is: each time the wrong campaign seems best | ||
# we lose the ideal difference in expecations | ||
lossii <- ifelse(condii==oneBest,0,absE) | ||
totLoss <- totLoss + sum(lossii*probii) | ||
totProb <- totProb + sum(probii) | ||
totInd <- totInd + sum(ifelse(condii,probii,0)) | ||
} | ||
p1Greater2 <- totInd/totProb | ||
campaignSwitchExposure <- totLoss/totProb | ||
pG <- rbind(pG,data.frame(Deal1=d1, | ||
Deal2=d2, | ||
p1Greater2=p1Greater2 | ||
)) | ||
if(computeLoss) { | ||
pG$campaignSwitchExposure=campaignSwitchExposure | ||
} | ||
} | ||
pG | ||
} | ||
|
||
|
||
wQuantile <- function(values,weights,cut) { | ||
if(cut<=0) { | ||
return(min(values)) | ||
} | ||
if(cut>=1) { | ||
return(max(values)) | ||
} | ||
perm <- order(values) | ||
values <- values[perm] | ||
weights <- weights[perm] | ||
total <- sum(weights) | ||
running <- cumsum(weights) | ||
idx <- match(TRUE,running>=cut*total) | ||
if(is.na(idx)) { | ||
return(max(values)) | ||
} | ||
values[idx] | ||
} | ||
|
||
plotSample <- function(g) { | ||
ggplot() + | ||
geom_line(data=g,aes(x=value,y=density,color=Label)) + | ||
xlab('Expected Value per Action (dollars)') + | ||
scale_x_continuous(limits = c(wQuantile(g$value,g$density,0.001), | ||
wQuantile(g$value,g$density,0.999))) + | ||
ggtitle('distribution of possible outcomes') | ||
} | ||
|
||
# build detailed curves of posterior intensity estimates | ||
# tab: an experiment table with our standard columns (Label,Actions,Successes,ValueSuccess) | ||
posteriorGraph <- function(tab,epsilon=1.0e-4) { | ||
p <- c() | ||
s <- c() | ||
for(i in seq_len(nrow(tab))) { | ||
# ai, bi posterior beta paremers assume prior a0=0.5,b0=0.5 (Jeffreys) | ||
ai <- 0.5 + tab$Successes[i] | ||
bi <- 0.5 + tab$Actions[i] - tab$Successes[i] | ||
lowWater <- qbeta(epsilon,shape1=ai,shape2=bi) | ||
highWater <- qbeta(1-epsilon,shape1=ai,shape2=bi) | ||
intensities <- seq(lowWater,highWater,(highWater-lowWater)/201) | ||
pi <- data.frame(density=dbeta(intensities,shape1=ai,shape2=bi)/tab$ValueSuccess[i], | ||
value=intensities*tab$ValueSuccess[i], | ||
Label=tab$Label[i]) | ||
p <- rbind(p,pi) | ||
medIntensity <- qbeta(0.5,shape1=ai,shape2=bi) | ||
meanIntensity <- (ai/(ai+bi)) | ||
modeIntensity <- ((ai-1)/(ai+bi-2)) | ||
si <- data.frame(median=medIntensity*tab$ValueSuccess[i], | ||
mean=meanIntensity*tab$ValueSuccess[i], | ||
mode=modeIntensity*tab$ValueSuccess[i], | ||
Label=tab$Label[i]) | ||
si$medianY <- dbeta(medIntensity,shape1=ai,shape2=bi)/tab$ValueSuccess[i] | ||
si$meanY <- dbeta(meanIntensity,shape1=ai,shape2=bi)/tab$ValueSuccess[i] | ||
si$modeY <- dbeta(modeIntensity,shape1=ai,shape2=bi)/tab$ValueSuccess[i] | ||
s <- rbind(s,si) | ||
} | ||
list(graph=p,summary=s) | ||
} | ||
|
||
plotPosterior <- function(p) { | ||
ggplot() + | ||
geom_line(data=p$graph,aes(x=value,y=density,color=Label)) + | ||
xlab('Action To Success Value (dollars)') + | ||
geom_vline(data=p$summary,aes(xintercept=median,color=Label)) + | ||
geom_point(data=p$summary,aes(x=mean,y=meanY,color=Label),shape=3) + | ||
geom_point(data=p$summary,aes(x=mode,y=modeY,color=Label),sahpe=2) + | ||
scale_x_continuous(limits = c(wQuantile(p$graph$value,p$graph$density,0.001), | ||
wQuantile(p$graph$value,p$graph$density,0.999))) + | ||
ggtitle('Bayesian posterior action to success value estimates') | ||
} | ||
|
||
# given a single outcome plot posterior probabilities of unknown intensities | ||
# tab: an experiment table with our standard columns (Label,Actions,Successes,ValueSuccess) | ||
# p: posteriorGraph(tab) | ||
computeProbsGEP <- function(tab,p,computeLoss=TRUE) { | ||
# compute some odds | ||
tab$e <- tab$Successes*tab$ValueSuccess/tab$Actions | ||
deals <- sort(unique(p$Label)) | ||
combs <- combn(deals,2) | ||
pG <- c() | ||
for(j in seq_len(ncol(combs))) { | ||
d1 <- combs[1,j] | ||
d2 <- combs[2,j] | ||
g1 <- p[p$Label==d1,,drop=FALSE] | ||
g2 <- p[p$Label==d2,,drop=FALSE] | ||
e1 <- tab$e[tab$Label==d1] | ||
e2 <- tab$e[tab$Label==d2] | ||
oneBest <- e1>=e2 | ||
totProb <- 0 | ||
totInd <- 0 | ||
totLoss <- 0 | ||
for(i1 in 1:nrow(g1)) { | ||
probii <- g1[i1,'density']*g2[,'density'] | ||
condii <- g1[i1,'value']>=g2[,'value'] | ||
totProb <- totProb + sum(probii) | ||
totInd <- totInd + sum(ifelse(condii,probii,0)) | ||
# idea for loss is: each time the unknown intensity of the unchosen | ||
# campaign is not the highest we lose the current difference. | ||
lossii <- ifelse(condii==oneBest,0,abs(g1[i1,'value']-g2[,'value'])) | ||
totLoss <- totLoss + sum(lossii*probii) | ||
} | ||
p1Greater2 <- totInd/totProb | ||
expectedDecisionLoss <- totLoss/totProb | ||
pG <- rbind(pG,data.frame(Deal1=d1, | ||
Deal2=d2, | ||
p1Greater2=p1Greater2 | ||
)) | ||
if(computeLoss) { | ||
pG$expectedDecisionLoss=expectedDecisionLoss | ||
} | ||
} | ||
pG | ||
} | ||
|
||
# p: a posterior graph | ||
# l1,l2: Labels | ||
build2dProbGraph <- function(p,l1,l2) { | ||
g1 <- p[p$Label==l1,,drop=FALSE] | ||
g2 <- p[p$Label==l2,,drop=FALSE] | ||
n1 <- nrow(g1) | ||
n2 <- nrow(g2) | ||
d_i1=integer(n1*n2) | ||
d_i2=integer(n1*n2) | ||
d_v1=numeric(n1*n2) | ||
d_v2=numeric(n1*n2) | ||
d_d=numeric(n1*n2) | ||
idx <- 1 | ||
for(i1 in 1:n1) { | ||
v1 <- g1[i1,'value'] | ||
for(i2 in 1:n2) { | ||
v2 <- g2[i2,'value'] | ||
d_i1[idx] <- i1 | ||
d_i2[idx] <- i2 | ||
d_v1[idx] <- v1 | ||
d_v2[idx] <- v2 | ||
d_d[idx] <- g1[i1,'density']*g2[i2,'density'] | ||
idx <- idx + 1 | ||
} | ||
} | ||
data.frame(i1=d_i1, | ||
i2=d_i2, | ||
v1=d_v1, | ||
v2=d_v2, | ||
d=d_d) | ||
} | ||
|
Oops, something went wrong.