diff --git a/gpmivm/demClassificationOneIvm1.m b/gpmivm/demClassificationOneIvm1.m new file mode 100644 index 00000000..5b6e4f12 --- /dev/null +++ b/gpmivm/demClassificationOneIvm1.m @@ -0,0 +1,70 @@ +% DEMCLASSIFICATIONIVM1 Test IVM code on a toy feature selection. + +% IVM + +% Fix seeds +randn('seed', 1e5); +rand('seed', 1e5); + +dataSetName = 'classificationOne'; +experimentNo = 1; + +% load data +[X, y] = mapLoadData(dataSetName); + + +% Set up model +options = ivmOptions; +options.display = 2; +options.numActive = 100; +% Use a combination of an MLP and linear ARD kernel. +options.kern = {'mlpard', 'linard', 'white'}; + +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +% Constrain the ARD parameters in the MLP and linear kernels to be the same. +model.kern = cmpndTieParameters(model.kern, {[4, 7], [5, 8]}); + +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +for i = 1:options.extIters; + + % Select the active set. + model = ivmOptimiseIvm(model, options.display); + % Plot the data. + if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); + end + % Optimise the kernel parameters. + model = ivmOptimiseKernel(model, options.display, options.kernIters); +end +model = ivmOptimiseIvm(model, options.display); +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +% display active points. +model = ivmOptimiseIvm(model, options.display); + +% Display the final model. +ivmDisplay(model); + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') & printDiagram + ivmPrintPlot(model, 'ivmContour', [], [], [], capName, experimentNo); +end + + + + + + + diff --git a/gpmivm/demClassificationTwoIvm1.m b/gpmivm/demClassificationTwoIvm1.m new file mode 100644 index 00000000..e7d154ff --- /dev/null +++ b/gpmivm/demClassificationTwoIvm1.m @@ -0,0 +1,66 @@ +% DEMCLASSIFICATIONTWOIVM1 IVM for classification on a data-set sampled from a GP + +% IVM + +% Fix seeds +randn('seed', 1e5); +rand('seed', 1e5); + +dataSetName = 'classificationTwo'; +experimentNo = 1; + +% load data +[X, y] = mapLoadData(dataSetName); + + +% Set up model +options = ivmOptions; +options.display = 2; +options.numActive = 200; +options.kern = {'rbf', 'white'}; + +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +for i = 1:options.extIters; + + % Select the active set. + model = ivmOptimiseIvm(model, options.display); + % Plot the data. + if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); + end + % Optimise the kernel parameters. + model = ivmOptimiseKernel(model, options.display, options.kernIters); +end +model = ivmOptimiseIvm(model, options.display); +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +% display active points. +model = ivmOptimiseIvm(model, options.display); + +% Display the final model. +ivmDisplay(model); + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') & printDiagram + ivmPrintPlot(model, 'ivmContour', [], [], [], capName, experimentNo); +end + + + + + + + diff --git a/gpmivm/demEP1.m b/gpmivm/demEP1.m new file mode 100644 index 00000000..be9f4b9a --- /dev/null +++ b/gpmivm/demEP1.m @@ -0,0 +1,60 @@ +% DEMEP1 Demonstrate Expectation propagation on a toy data set.. + +% IVM + +% Fix seeds +randn('seed', 1e5); +rand('seed', 1e5); + +dataSetName = 'classificationOne'; +experimentNo = 2; + +% load data +[X, y] = mapLoadData(dataSetName); + + + + +options = ivmOptions; +options.kern = {'mlp', 'white'}; +options.display = 2; +options.numActive = 100; + +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +for i= 1:4 + model = ivmSelectPoints(model, options.display); + % Plot the data. + if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); + end + model = ivmOptimiseKernel(model, options.display, options.kernIters); +end +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +% display active points. +model = ivmSelectPoints(model, options.display); + + + +% Display the final model. +ivmDisplay(model); + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') & printDiagram + ivmPrintPlot(model, 'ivmContour', [], [], [], capName, experimentNo); +end + + diff --git a/gpmivm/demOrderedOneIvm1.m b/gpmivm/demOrderedOneIvm1.m new file mode 100644 index 00000000..1f25fefa --- /dev/null +++ b/gpmivm/demOrderedOneIvm1.m @@ -0,0 +1,80 @@ +% DEMORDEREDONEIVM1 Run a demonstration of the ordered categories noise model (linear data). + +% IVM +% Fix seeds +randn('seed', 1e5); +rand('seed', 1e5); + +dataSetName = 'orderedOne'; +experimentNo = 1; + +% load data +[X, y] = mapLoadData(dataSetName); + + + +% Set up model +options = ivmOptions; +options.noise = 'ordered'; +% Learn the noise model for the ordered categorical case. +options.noiseIters = 100; +options.display = 2; +% Use a kernel consisting of an RBF ard kernel, a linear ard kernel and a +% bias term. +options.kern = {'rbfard', 'linard', 'bias', 'white'}; + + +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +% Constrain the ARD parameters in the RBF and linear kernels to be the same. +model.kern = cmpndTieParameters(model.kern, {[3, 6], [4, 7]}); +% Do some plotting +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +for i = 1:options.extIters + % Select active set. + model = ivmOptimiseIvm(model, options.display); + if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); + end + % Optimise the noise model. + model = ivmOptimiseNoise(model, options.display, options.noiseIters); + + % Select active set. + model = ivmOptimiseIvm(model, options.display); + if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); + end + % Optimise kernel parameters + model = ivmOptimiseKernel(model, options.display, options.kernIters); +end +% Select active set. +model = ivmOptimiseIvm(model, options.display); +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +% Display active points +model = ivmOptimiseIvm(model, options.display); +% Display parameters of end model. +ivmDisplay(model); + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') & printDiagram + ivmPrintPlot(model, 'ivmContour', [], [], [], capName, experimentNo); +end + + + + + + + diff --git a/gpmivm/demOrderedTwoIvm1.m b/gpmivm/demOrderedTwoIvm1.m new file mode 100644 index 00000000..bc43907d --- /dev/null +++ b/gpmivm/demOrderedTwoIvm1.m @@ -0,0 +1,72 @@ +% DEMORDEREDTWOIVM1 Run a demonstration of the ordered categories noise model (circular data). + +% IVM + +randn('seed', 1e5); +rand('seed', 1e5); + +dataSetName = 'orderedTwo'; +experimentNo = 1; + +% load data +[X, y] = mapLoadData(dataSetName); + + +% Set up model +options = ivmOptions; +options.noise = 'ordered'; +% Learn the noise model for the ordered categorical case. +options.noiseIters = 100; +options.display = 2; +% Use a kernel consisting of an RBF ard kernel, a linear ard kernel and a +% bias term. +options.kern = {'rbfard', 'linard', 'bias', 'white'}; + +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +% Constrain the ARD parameters in the RBF and linear kernels to be the same. +model.kern = cmpndTieParameters(model.kern, {[3, 6], [4, 7]}); +% Do some plotting +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +for i = 1:options.extIters + % Select active set. + model = ivmOptimiseIvm(model, options.display); + if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); + end + % Optimise the noise model. + model = ivmOptimiseNoise(model, options.display, options.noiseIters); + + % Select active set. + model = ivmOptimiseIvm(model, options.display); + if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); + end + % Optimise kernel parameters + model = ivmOptimiseKernel(model, options.display, options.kernIters); +end +% Select active set. +model = ivmOptimiseIvm(model, options.display); +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +% Display active points +model = ivmOptimiseIvm(model, options.display); +% Display parameters of end model. +ivmDisplay(model); + + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') & printDiagram + ivmPrintPlot(model, 'ivmContour', [], [], [], capName, experimentNo); +end diff --git a/ivm/demProbit1.m b/gpmivm/demProbit1.m similarity index 100% rename from ivm/demProbit1.m rename to gpmivm/demProbit1.m diff --git a/gpmivm/demRegressionOneIvm1.m b/gpmivm/demRegressionOneIvm1.m new file mode 100644 index 00000000..90fc3e4d --- /dev/null +++ b/gpmivm/demRegressionOneIvm1.m @@ -0,0 +1,70 @@ +% DEMREGRESSIONONEIVM1 The data-set is sampled from a GP with known parameters. + +% IVM + +randn('seed', 1e5); +rand('seed', 1e5); + +dataSetName = 'regressionOne'; +experimentNo = 1; + +% load data +[X, y] = mapLoadData(dataSetName); + + +% Set up model +options = ivmOptions; +options.noise = 'gaussian'; +% Learn the noise model for the ordered categorical case. +options.display = 2; +options.numActive = 50; +% Use a kernel consisting of an RBF ard kernel, a linear ard kernel and a +% bias term. +options.kern = {'rbfard', 'linard', 'white'}; + +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +model.kern = cmpndTieParameters(model.kern, {[3, 6], [4, 7]}); +if options.display > 1 + [h1, h2] = ivm3dPlot(model, 'mesh', i); + drawnow +end + +for i = 1:options.extIters + % Plot the data. + % Select the active set. + model = ivmOptimiseIvm(model, options.display); + if options.display > 1 + delete(h2) + [h1, h2] = ivm3dPlot(model, 'mesh', i); + drawnow + end + % Optimise kernel parameters. + model = ivmOptimiseKernel(model, options.display, options.kernIters); + +end +model = ivmOptimiseIvm(model, options.display); +if options.display > 1 + delete(h2) + [h1, h2] = ivm3dPlot(model, 'mesh', i); +end +% Show the active points. +model = ivmOptimiseIvm(model, options.display); + +ivmDisplay(model); + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') && printDiagram + ivmPrintPlot(model, 'mesh', [], [], [], capName, experimentNo); +end + + + diff --git a/ivm/demRegressionTwoIvm1.m b/gpmivm/demRegressionTwoIvm1.m similarity index 100% rename from ivm/demRegressionTwoIvm1.m rename to gpmivm/demRegressionTwoIvm1.m diff --git a/gpmivm/demThreeFiveIvm1.m b/gpmivm/demThreeFiveIvm1.m new file mode 100644 index 00000000..ec33a098 --- /dev/null +++ b/gpmivm/demThreeFiveIvm1.m @@ -0,0 +1,135 @@ +% DEMTHREEFIVEIVM1 Try the IVM & NCNM on 3 vs 5. + +% IVM + + +rand('seed', 1e5); +randn('seed', 1e5); + +dataSetName = 'threeFive'; +experimentNo = 1; + +[X, y, XTest, yTest] = mapLoadData(dataSetName); + +% IVM Set up + +optionsIvm = ivmOptions; +dVal = 200; +optionsIvm.display = 0; +optionsIvm.kernIters = 400; +optionsIvm.extIters = 15; +optionsIvm.kern = {'rbf', 'white'}; + +% SVM Set up +optionsSvm = svmlopt; +optionsSvm.Kernel = 2; + + +% Use this prior for NCNM Process variances. +prior.type = 'gamma'; +prior = priorParamInit(prior); +prior.a = 1; +prior.b = 1; +num = 6; +probLabelled = [0.2 0.1 0.05 0.025 0.0125]; + +for i = 1:10 + for j = 1:length(probLabelled) + rand('seed', i*1e5); + randn('seed', i*1e5); + + prob = probLabelled(j); + unlabelled = find(rand(size(y))>prob); + + % Create labelled only data-sets. + ylab = y; + Xlab = X; + ylab(unlabelled) = []; + Xlab(unlabelled, :) = []; + yunlab = y; + % Run IVM on labelled data. + optionsIvm.noise = 'probit'; + optionsIvm.numActive = min([dVal length(ylab)]); + + % Initialise the model. + model = ivmCreate(size(Xlab, 1), size(ylab, 2), Xlab, ylab, optionsIvm); + + model.noise.bias = 0; + % Optimise the IVM but don't change noise params. + model = ivmOptimise(model, optionsIvm) + % Select data-points in an IVM with those kernel parameters. + model = ivmOptimiseIvm(model, optionsIvm.display); + % Display the final model. + ivmDisplay(model); + [mu, varSigma] = ivmPosteriorMeanVar(model, XTest); + areaIvm(i, j) = rocCurve(mu, yTest); + fprintf('IVM %d, %d: Area under ROC curve: %2.4f\n', i, j, areaIvm(i, j)); + % Store the results. + [kernIvm(i, j), noiseIvm(i, j), ivmInfoIvm(i, j)] = ivmDeconstruct(model); + + % Run NCNM Ivm on data. + optionsIvm.noise = 'ncnm'; + optionsIvm.numActive = dVal; + yunlab(unlabelled) = NaN; + % Intitalise NCNM + model = ivmCreate(size(X, 1), size(yunlab, 2), X, yunlab, optionsIvm); + model.noise.bias = 0; + model.noise.width = 1; + prior.index = 2; + model.kern.comp{1}.priors(1) = prior; + prior.index = 1; + model.kern.comp{2}.priors(1) = prior; + % Optimise the NCNM but don't change noise params. + model = ivmOptimise(model, optionsIvm); + % Select data-points in an NCNM with those kernel parameters. + model = ivmOptimiseIvm(model, optionsIvm.display); + % Display the final model. + ivmDisplay(model); + [mu, varSigma] = ivmPosteriorMeanVar(model, XTest); + areaNCNM(i, j) = rocCurve(mu, yTest); + fprintf('NCNM %d, %d: Area under ROC curve: %2.4f\n', i, j, areaNCNM(i, j)); + % Store the results. + [kernNCNM(i, j), noiseNCNM(i, j), ivmInfoNCNM(i, j)] = ivmDeconstruct(model); + + % Run SVM on labelled data. + optionsSvm.KernelParam = kernIvm(i, j).comp{1}.inverseWidth/2; + model = svml([dataSetName num2str(i) num2str(j)], optionsSvm); + model = svmltrain(model, Xlab, ylab); + [alpha, Xactive] = svmlread(model.fname); + % Fix bug in svmlread which doesn't recognise size of Xactive + missingCols = size(XTest, 2) - size(Xactive, 2); + Xactive = [Xactive zeros(size(Xactive, 1), missingCols)]; + missingRows = size(alpha, 1) - size(Xactive, 1); + Xactive = [Xactive; zeros(missingRows, size(Xactive, 2))]; + % Make output predictions. + testOut = XTest*Xactive'*alpha; + areaSVM(i, j) = rocCurve(testOut, yTest); + fprintf('SVM %d, %d, Area under ROC curve: %2.4f\n', i, j, areaSVM(i, j)); + + % Run Transductive SVM on data. + yunlab(unlabelled) = 0; + optionsSvm.KernelParam = max([kernNCNM(i, j).comp{1}.inverseWidth kernIvm(i, j).comp{1}.inverseWidth])/2; + model = svml([dataSetName num2str(i) num2str(j)], optionsSvm); + model = svmltrain(model, X, yunlab); + [alpha, Xactive] = svmlread(model.fname); + % Fix bug in svmlread which doesn't recognise size of Xactive + missingCols = size(XTest, 2) - size(Xactive, 2); + Xactive = [Xactive zeros(size(Xactive, 1), missingCols)]; + missingRows = size(alpha, 1) - size(Xactive, 1); + Xactive = [Xactive; zeros(missingRows, size(Xactive, 2))]; + % Make output predictions. + testOut = XTest*Xactive'*alpha; + areaTSVM(i, j) = rocCurve(testOut, yTest); + fprintf('T-SVM %d, %d, Area under ROC curve: %2.4f\n', i, j, areaTSVM(i, j)); + capName = dataSetName; + capName(1) = upper(capName(1)); + save(['dem' capName 'Ivm1.mat'], ... + 'kernIvm', 'noiseIvm', ... + 'ivmInfoIvm', 'areaIvm', ... + 'kernNCNM', 'noiseNCNM', ... + 'ivmInfoNCNM', 'areaNCNM', ... + 'probLabelled', 'areaSVM', 'areaTSVM'); + end +end + + diff --git a/gpmivm/demThreeFiveResults.m b/gpmivm/demThreeFiveResults.m new file mode 100644 index 00000000..d8a635eb --- /dev/null +++ b/gpmivm/demThreeFiveResults.m @@ -0,0 +1,26 @@ +% DEMTHREEFIVERESULTS Plot results from the three vs five experiments. +% +% Recreates the three versus five classification experiments +% presented in the NIPS paper. + +% IVM + +load demThreeFiveIvm1 +clf +hold on +lineA = errorbar(probLabelled, mean(areaIVM), std(areaIVM), 'rx-'); +set(lineA, 'linewidth', 2); +lineB = errorbar(probLabelled, mean(areaNCNM), std(areaNCNM), 'bo:'); +set(lineB, 'linewidth', 2); +lineC = errorbar(probLabelled, mean(areaSVM), std(areaSVM), 'gx-.'); +set(lineC, 'linewidth', 2); +lineD = errorbar(probLabelled, mean(areaTSVM), std(areaTSVM), 'mo--'); +set(lineD, 'lineWidth', 2); +set(gca, 'xscale', 'log') +set(gca, 'xlim', [0.01 0.2]) +set(gca, 'ylim', [0.75 1]) +set(gca, 'fontname', 'times') +set(gca, 'fontsize', 20) +set(gca, 'ytick', [0.7 0.8 0.9 1]) +xlabel('prob. of label present') +ylabel('area under ROC curve') diff --git a/gpmivm/demUnlabelledIvm2.m b/gpmivm/demUnlabelledIvm2.m new file mode 100644 index 00000000..96ce0a82 --- /dev/null +++ b/gpmivm/demUnlabelledIvm2.m @@ -0,0 +1,78 @@ +% DEMUNLABELLEDIVM2 Test IVM code on a toy crescent data. +% +% Recreates the toy crescent data example shown in the NIPS paper. + +% IVM + + +randn('seed', 1e6) +rand('seed', 1e6) + +% Generate a toy data-set +dataSetName = 'unlabelledOne'; +experimentNo = 2; + +labelledProb = 0.1; +[X, y] = mapLoadData(['semi:' dataSetName ':' num2str(labelledProb)], 1e6); + +ind = find(isnan(y)); +X(ind, :) = []; +y(ind, :) = []; + +options = ivmOptions; +options.noise = 'probit'; +options.kern = {'rbf', 'white'}; +options.display = 2; +options.numActive = 100; +prior = 0; + +% Initialise the model. +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +prior.type = 'gamma'; +prior = priorParamInit(prior); +prior.a = 1; +prior.b = 1; +prior.index = 2; +model.kern.comp{1}.priors(1) = prior; +prior.index = 1; +model.kern.comp{2}.priors(1) = prior; +if options.display > 1 + ivm3dPlot(model, 'ncnmContour', i); %incnmTwoDPlot(model, i); +end +for i = 1:15 + + % Plot the data. + % Select the active set. + model = ivmOptimiseIvm(model, options.display); + if options.display > 1 + ivm3dPlot(model, 'ncnmContour', i); %incnmTwoDPlot(model, i); + end + % Optimise the kernel parameters. + model = ivmOptimiseKernel(model, options.display, options.kernIters); + ivmDisplay(model); + +end +model = ivmOptimiseIvm(model, options.display); +if options.display > 1 + ivm3dPlot(model, 'ncnmContour', i); %incnmTwoDPlot(model, i); +end +model = ivmOptimiseIvm(model, options.display); +% Display the final model. +ivmDisplay(model); + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') & printDiagram + ivmPrintPlot(model, 'ivmContour', [], [], [], capName, experimentNo); +end + + + diff --git a/gpmivm/demUnlabelledOneIvm1.m b/gpmivm/demUnlabelledOneIvm1.m new file mode 100644 index 00000000..1f10406f --- /dev/null +++ b/gpmivm/demUnlabelledOneIvm1.m @@ -0,0 +1,74 @@ +% DEMUNLABELLEDONEIVM1 Test IVM code on a toy crescent data. +% +% Recreates the toy crescent data example shown in the NIPS paper. + +% IVM + + +randn('seed', 1e6) +rand('seed', 1e6) + +% Generate a toy data-set +dataSetName = 'unlabelledOne'; +experimentNo = 1; + +labelledProb = 0.1; +[X, y] = mapLoadData(['semi:' dataSetName ':' num2str(labelledProb)], 1e6); + +options = ivmOptions; +options.noise = 'ncnm'; +options.kern = {'rbf', 'white'}; +options.display = 2; +options.numActive = 100; +prior = 0; + +% Initialise the model. +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +prior.type = 'gamma'; +prior = priorParamInit(prior); +prior.a = 1; +prior.b = 1; +prior.index = 2; +model.kern.comp{1}.priors(1) = prior; +prior.index = 1; +model.kern.comp{2}.priors(1) = prior; +if options.display > 1 + ivm3dPlot(model, 'ncnmContour', i); %incnmTwoDPlot(model, i); +end +for i = 1:15 + + % Plot the data. + % Select the active set. + model = ivmOptimiseIvm(model, options.display); + if options.display > 1 + ivm3dPlot(model, 'ncnmContour', i); %incnmTwoDPlot(model, i); + end + % Optimise the kernel parameters. + model = ivmOptimiseKernel(model, options.display, options.kernIters); + ivmDisplay(model); + +end +model = ivmOptimiseIvm(model, options.display); +if options.display > 1 + ivm3dPlot(model, 'ncnmContour', i); %incnmTwoDPlot(model, i); +end +model = ivmOptimiseIvm(model, options.display); +% Display the final model. +ivmDisplay(model); + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') & printDiagram + ivmPrintPlot(model, 'ncnmContour', [], [], [], capName, experimentNo); +end + + + diff --git a/ivm/demUnlabelledOneIvm2.m b/gpmivm/demUnlabelledOneIvm2.m similarity index 100% rename from ivm/demUnlabelledOneIvm2.m rename to gpmivm/demUnlabelledOneIvm2.m diff --git a/gpmivm/demUspsIvm1.m b/gpmivm/demUspsIvm1.m new file mode 100644 index 00000000..ed439767 --- /dev/null +++ b/gpmivm/demUspsIvm1.m @@ -0,0 +1,60 @@ +% DEMUSPSIVM1 Try the IVM on the USPS digits data with RBF kernel. + +% IVM + +dataSetName = 'usps'; +experimentNo = 1; + +randn('seed', 1e5) +rand('seed', 1e5) + +[X, y, XTest, yTest] = mapLoadData(dataSetName); + +capitalName = dataSetName; +capitalName(1) = upper(capitalName(1)); + +options = ivmOptions; +options.kern = {'rbf', 'lin', 'bias', 'white'}; +options.numActive = 500; + +mu = zeros(size(yTest)); +varSigma = zeros(size(yTest)); + +tic +% Learn an IVM for each digit +for trainData = 0:9 + index = trainData+1; + + % Train the IVM. + model = ivmRun(X, y(:, index), options); + + % Make prediction for this digit. + [mu(:, index), varSigma(:, index)] = ivmPosteriorMeanVar(model, XTest); + mu(:, index) = mu(:, index) + model.noise.bias; + yPred = sign(mu(:, index)); + testError(index) = 1-sum(yPred==yTest(:, index))/size(yTest, 1); + fprintf('Digit %d, test error %2.4f\n', trainData, testError(index)); + + % Deconstruct IVM for saving. + [kernStore{index}, noiseStore{index}, ... + ivmInfoStore{index}] = ivmDeconstruct(model); + save(['dem' capitalName num2str(experimentNo)], 'testError', ... + 'ivmInfoStore', 'kernStore', 'noiseStore') +end +overallTime = toc; + +% Make prediction for all digits. +[void, yPred] = max(mu, [], 2); +[void, yTest] = max(yTest, [], 2); +yPred = yPred - 1; +yTest = yTest - 1; +overallError = 1 - sum(yPred == yTest)/size(yTest, 1); + +confusMat = zeros(10); +for i = 1:length(yPred) + confusMat(yPred(i)+1, yTest(i)+1) = confusMat(yPred(i)+1, yTest(i)+1) + 1; +end +save(['dem' capitalName num2str(experimentNo)], 'testError', ... + 'ivmInfoStore', 'kernStore', ... + 'noiseStore', 'overallError', ... + 'confusMat', 'overallTime'); diff --git a/gpmivm/demUspsIvm2.m b/gpmivm/demUspsIvm2.m new file mode 100644 index 00000000..9d190ef0 --- /dev/null +++ b/gpmivm/demUspsIvm2.m @@ -0,0 +1,60 @@ +% DEMUSPSIVM2 Try the IVM on the USPS digits data with MLP kernel. + +% IVM + +dataSetName = 'usps'; +experimentNo = 2; + +randn('seed', 1e5) +rand('seed', 1e5) + +[X, y, XTest, yTest] = mapLoadData(dataSetName); + +capitalName = dataSetName; +capitalName(1) = upper(capitalName(1)); + +options = ivmOptions; +options.kern = {'mlp', 'lin', 'bias', 'white'}; +options.numActive = 500; + +mu = zeros(size(yTest)); +varSigma = zeros(size(yTest)); + +tic +% Learn an IVM for each digit +for trainData = 0:9 + index = trainData+1; + + % Train the IVM. + model = ivmRun(X, y(:, index), options); + + % Make prediction for this digit. + [mu(:, index), varSigma(:, index)] = ivmPosteriorMeanVar(model, XTest); + mu(:, index) = mu(:, index) + model.noise.bias; + yPred = sign(mu(:, index)); + testError(index) = 1-sum(yPred==yTest(:, index))/size(yTest, 1); + fprintf('Digit %d, test error %2.4f\n', trainData, testError(index)); + + % Deconstruct IVM for saving. + [kernStore{index}, noiseStore{index}, ... + ivmInfoStore{index}] = ivmDeconstruct(model); + save(['dem' capitalName num2str(experimentNo)], 'testError', ... + 'ivmInfoStore', 'kernStore', 'noiseStore') +end +overallTime = toc; + +% Make prediction for all digits. +[void, yPred] = max(mu, [], 2); +[void, yTest] = max(yTest, [], 2); +yPred = yPred - 1; +yTest = yTest - 1; +overallError = 1 - sum(yPred == yTest)/size(yTest, 1); + +confusMat = zeros(10); +for i = 1:length(yPred) + confusMat(yPred(i)+1, yTest(i)+1) = confusMat(yPred(i)+1, yTest(i)+1) + 1; +end +save(['dem' capitalName num2str(experimentNo)], 'testError', ... + 'ivmInfoStore', 'kernStore', ... + 'noiseStore', 'overallError', ... + 'confusMat', 'overallTime'); diff --git a/gpmivm/demUspsIvm3.m b/gpmivm/demUspsIvm3.m new file mode 100644 index 00000000..832785cf --- /dev/null +++ b/gpmivm/demUspsIvm3.m @@ -0,0 +1,73 @@ +% DEMUSPSIVM3 Try the ARD IVM on some digits data. + +% IVM + +dataSetName = 'usps'; +experimentNo = 3; + +randn('seed', 1e5) +rand('seed', 1e5) + +[X, y, XTest, yTest] = mapLoadData(dataSetName); + +capitalName = dataSetName; +capitalName(1) = upper(capitalName(1)); + +options = ivmOptions; +options.kern = {'rbfard', 'lin', 'bias', 'white'}; +options.numActive = 500; + +mu = zeros(size(yTest)); +varSigma = zeros(size(yTest)); + +a = [ones(4) repmat(2, 4, 4) ... + repmat(3, 4, 4) repmat(4, 4, 4); ... + repmat(5, 4, 4) repmat(6, 4, 4) ... + repmat(7, 4, 4) repmat(8, 4, 4); ... + repmat(9, 4, 4) repmat(10, 4, 4) ... + repmat(11, 4, 4) repmat(12, 4, 4); ... + repmat(13, 4, 4) repmat(14, 4, 4) ... + repmat(15, 4, 4) repmat(16, 4, 4)]; +for i = 1:16 + ardPos = find(a == i)'; + tieIndices{i} = [2+ardPos]; +end + +tic +% Learn an IVM for each digit +for trainData = 0:9 + index = trainData+1; + + % Train the IVM. + model = ivmRun(X, y(:, index), options); + + % Make prediction for this digit. + [mu(:, index), varSigma(:, index)] = ivmPosteriorMeanVar(model, XTest); + mu(:, index) = mu(:, index) + model.noise.bias; + yPred = sign(mu(:, index)); + testError(index) = 1-sum(yPred==yTest(:, index))/size(yTest, 1); + fprintf('Digit %d, test error %2.4f\n', trainData, testError(index)); + + % Deconstruct IVM for saving. + [kernStore{index}, noiseStore{index}, ... + ivmInfoStore{index}] = ivmDeconstruct(model); + save(['dem' capitalName num2str(experimentNo)], 'testError', ... + 'ivmInfoStore', 'kernStore', 'noiseStore') +end +overallTime = toc; + +% Make prediction for all digits. +[void, yPred] = max(mu, [], 2); +[void, yTest] = max(yTest, [], 2); +yPred = yPred - 1; +yTest = yTest - 1; +overallError = 1 - sum(yPred == yTest)/size(yTest, 1); + +confusMat = zeros(10); +for i = 1:length(yPred) + confusMat(yPred(i)+1, yTest(i)+1) = confusMat(yPred(i)+1, yTest(i)+1) + 1; +end +save(['dem' capitalName num2str(experimentNo)], 'testError', ... + 'ivmInfoStore', 'kernStore', ... + 'noiseStore', 'overallError', ... + 'confusMat', 'overallTime'); diff --git a/gpmivm/ivm.m b/gpmivm/ivm.m new file mode 100644 index 00000000..217b7b8f --- /dev/null +++ b/gpmivm/ivm.m @@ -0,0 +1,54 @@ +function model = ivm(X, y, kernelType, noiseType, selectionCriterion, d) + +% IVM Initialise an IVM model. +% FORMAT +% DESC this function is now deprecated, please use ivmCreate. +% +% SEEALSO : ivmCreate + +% IVM + +model.type = 'ivm'; +model.terminate = 0; +model.epUpdate = 0; + +model.d = d; + +model.X = X; +model.y = y; + +model.m = []; +model.beta = []; + +model.nu = zeros(size(y)); +model.g = zeros(size(y)); + +model.kern = kernCreate(X, kernelType); + +model.varSigma = zeros(size(y)); +model.mu = zeros(size(y)); + +model.I = []; +model.J = []; + +model.noise = noiseCreate(noiseType, y); + +if model.noise.spherical + model.Sigma.M = []; + model.Sigma.L = []; +else + for i = 1:size(y, 2) + model.Sigma(i).M = []; + model.Sigma(i).L = []; + end +end +model.selectionCriterion = selectionCriterion; + + +switch selectionCriterion + case 'none' + numData = size(X, 1); + model.I = (1:numData); + otherwise + +end diff --git a/gpmivm/ivm3dPlot.m b/gpmivm/ivm3dPlot.m new file mode 100644 index 00000000..1822e9c1 --- /dev/null +++ b/gpmivm/ivm3dPlot.m @@ -0,0 +1,61 @@ +function [h1, h2] = ivm3dPlot(model, plotType, iter, X, y) + +% IVM3DPLOT Make a 3-D or contour plot of the IVM. +% FORMAT +% DESC makes a 3-D or contour plot from an IVM model to show +% results. +% ARG model : the model from which the plot is generated. +% ARG plotType : type of plot to be used, options include +% 'ncnmContour', which gives the contours at the edge of the null +% category region for the null category noise model, and +% 'ivmContour'. +% ARG iter : iteration number, if give it is used at the title of +% the plot. +% ARG X : optional argument, if given it is the plotted input +% locations, otherwise model.X is used. +% ARG y : optional argument, if given it is the plotted target +% locations, otherwise model.y is used. +% RETURN h1 : a handle to the data in the plot. +% RETURN h2 : a handle to any contours, or functions plotted +% derived from the IVM. +% +% SEEALSO : noise3dPlot, noisePointPlot, ivmMeshVals, ivmCreate +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +h2 = []; +lineWidth = 2; +if nargin < 5 + X = model.X; + y = model.y; +end +if isempty(X) + X = model.X; +end +if isempty(y) + y = model.y; +end +clf +h1 = noisePointPlot(model.noise, X, y, 'times', 20, 10, lineWidth); + +if ~isempty(iter) + title(['Iteration ' num2str(iter)]) +end + +if length(model.I)>0 + % It's probably learnt something. + xlim = get(gca, 'xlim'); + ylim = get(gca, 'ylim'); + [CX, CY, CZ, CZVar] = ivmMeshVals(model, xlim, ylim, 60); + switch plotType + case {'ivmContour', 'ncnmContour'} + threeDargs = cell(1); + threeDargs{1} = 2; + otherwise + threeDargs = cell(0); + end + h2 = noise3dPlot(model.noise, plotType, CX, CY, CZ, CZVar, threeDargs{:}); +end +drawnow diff --git a/gpmivm/ivmAddPoint.m b/gpmivm/ivmAddPoint.m new file mode 100644 index 00000000..8309e3e6 --- /dev/null +++ b/gpmivm/ivmAddPoint.m @@ -0,0 +1,33 @@ +function model = ivmAddPoint(model, i) + +% IVMADDPOINT Add a point into the IVM representation. +% FORMAT +% DESC incorporates the ith point from the data set into the IVM +% model. +% ARG model : the model to which the point is to be added. +% ARG index : the index of the point in the training data which is +% to be added. +% RETURN model : the returned model with the point added in. +% +% SEEALSO : ivmUpdateSites, ivmUpdateM, ivmUpdateNuG, ivmSelectPoint, +% ivmRemovePoint, ivmCreate +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +index = find(model.J == i); +if isempty(index) + error(['Point ' num2str(i) ' is not in inactive set']) +end + +%/~model = ivmUpdateNuG(model, i); +%~/ +model = ivmUpdateSites(model, i); +model = ivmUpdateM(model, i); + +% Remove point from the non-active set and place in the active. +model.J(index) = []; +model.I = [model.I; i]; + +model = ivmUpdateNuG(model, model.J); diff --git a/gpmivm/ivmApproxGradX.m b/gpmivm/ivmApproxGradX.m new file mode 100644 index 00000000..2a34217e --- /dev/null +++ b/gpmivm/ivmApproxGradX.m @@ -0,0 +1,34 @@ +function g = ivmApproxGradX(model, x, m, beta); + +% IVMAPPROXGRADX Returns the gradient of the approxmate log-likelihood wrt x. +% FORMAT +% DESC returns the gradient of the approximate log likelihood for +% the IVM with respect to a given input position. +% ARG model : the model for which the gradient is being computed. +% ARG x : the input location for which the gradient is to be +% computed. +% ARG m : the output position where the gradient is to be computed. +% ARG beta : the output noise level for which the gradient is being +% computed. +% RETURN g : the gradient of the log likelihood with respect to x. +% +% SEEALSO : ivmPosteriorMeanVar, ivmPosteriorGradMeanVar +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +[mu, varsigma] = ivmPosteriorMeanVar(model, x); +[dmu, dvs] = ivmPosteriorGradMeanVar(model, x); + + +D = size(m, 2); +nu = 1./(1./beta+varsigma); +dlnZ_dmu = zeros(size(nu)); +for i = 1:D + dlnZ_dmu(:, i) = m(:, i) - mu(:, i) - model.noise.bias(i); +end +dlnZ_dmu = dlnZ_dmu.*nu; +dlnZ_dvs = 0.5*(dlnZ_dmu.*dlnZ_dmu - nu); + +g = dlnZ_dmu*dmu' + dlnZ_dvs*dvs'; diff --git a/gpmivm/ivmApproxLogLikeKernGrad.m b/gpmivm/ivmApproxLogLikeKernGrad.m new file mode 100644 index 00000000..57ceb0b0 --- /dev/null +++ b/gpmivm/ivmApproxLogLikeKernGrad.m @@ -0,0 +1,34 @@ +function g = ivmApproxLogLikeKernGrad(model) + +% IVMAPPROXLOGLIKEKERNGRAD Gradient of the approximate likelihood wrt kernel parameters. +% FORMAT +% DESC computes the gradient of the approximate log likelihood with +% respect to any kernel parameters. +% ARG model : the model for which the gradients are to be computed. +% RETURN g : gradient of the log likelihood with respect to the +% kernel parameters +% +% SEEALSO : kernGradient, ivmCovarianceGradient, ivmCreate +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +x = model.X(model.I, :); +m = model.m(model.I, :); +K = kernCompute(model.kern, x); +g = zeros(1, model.kern.nParams); + +if model.noise.spherical + % there is only one value for all beta + invK = pdinv(K+diag(1./model.beta(model.I, 1))); +end + +for j = 1:size(m, 2) + if ~model.noise.spherical + invK = pdinv(K+diag(1./model.beta(model.I, j))); + end + fhandle = str2func([model.type 'CovarianceGradient']); + covGrad = fhandle(invK, m(:, j)); + g = g + kernGradient(model.kern, x, covGrad); +end diff --git a/gpmivm/ivmApproxLogLikelihood.m b/gpmivm/ivmApproxLogLikelihood.m new file mode 100644 index 00000000..f6fc0a0f --- /dev/null +++ b/gpmivm/ivmApproxLogLikelihood.m @@ -0,0 +1,37 @@ +function L = ivmApproxLogLikelihood(model); + +% IVMAPPROXLOGLIKELIHOOD Return the approximate log-likelihood for the IVM. +% FORMAT +% DESC evaluates the approximate log likelihood for the IVM. The +% approximate log likelihood involves only those points in the +% active set. It is basically the approximate likelihood for EP +% (see e.g. Kuss and Rasmussen's JMLR paper), but it is missing the +% terms that are directly dependent on the noise model. +% ARG model : the IVM model for which the approximate log +% likelihood is to be computed. +% RETURN L : the approximate log likelihood of the active set. +% +% COPYRIGHT : Neil D. Lawrence, 2005, 2004 +% +% SEEALSO : ivmApproxLogLikeKernGrad, ivmApproxGradX + +% IVM + +x = model.X(model.I, :); +m = model.m(model.I, :); +K = kernCompute(model.kern, x); +L = 0; + +if model.noise.spherical + % there is only one value for all beta + [invK, UC] = pdinv(K+diag(1./model.beta(model.I, 1))); + logDetTerm = logdet(K, UC); +end + +for i = 1:size(m, 2) + if ~model.noise.spherical + [invK, UC] = pdinv(K+diag(1./model.beta(model.I, i))); + logDetTerm = logdet(K, UC); + end + L = L -.5*logDetTerm- .5*m(:, i)'*invK*m(:, i); +end diff --git a/gpmivm/ivmComputeInfoChange.m b/gpmivm/ivmComputeInfoChange.m new file mode 100644 index 00000000..fb0bd857 --- /dev/null +++ b/gpmivm/ivmComputeInfoChange.m @@ -0,0 +1,50 @@ +function delta = ivmComputeInfoChange(model, add) + +% IVMCOMPUTEINFOCHANGE Compute the information change associated with each point. +% FORMAT +% DESC computes the information change associated with adding or +% deleting a point. This function is used to select which points +% are added or deleted. +% ARG model : the model for which the information change is to be +% comptuted. +% ARG add : flag which states whether the points are to be added or +% subtracted. Set to true if points are to be added, false +% otherwise (default value is true). +% RETURN delta : vector of information change values associated +% with the different points. +% +% SEEALSO : ivmCreate, ivmSelectPoint +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2007 + +% IVM + +if nargin < 2 + add = true; +end + +if add + switch model.selectionCriterion + case {'entropy', 'rentropy'} + if model.noise.spherical + % Noise model leads to constant values for beta. + delta = -.5*size(model.y, 2).*sum(log2(1-model.varSigma(model.J, 1).* ... + model.nu(model.J, 1)+1e-300), 2); + else + delta = -.5*sum(log2(1-model.varSigma(model.J, :).* ... + model.nu(model.J, :)+1e-300), 2); + end + otherwise + error(['Selection criterion ' model.selectionCriterion ' not yet implemented']) + end +else + + switch model.selectionCriterion + case {'entropy', 'rentropy'} + delta = .5*sum(log2(1-model.varSigma(model.I, :).*model.beta(model.I, ... + :)+1e-300), 2); + otherwise + error(['Selection criterion ' model.selectionCriterion ' not yet implemented']) + + end +end diff --git a/gpmivm/ivmComputeLandM.m b/gpmivm/ivmComputeLandM.m new file mode 100644 index 00000000..c32a794d --- /dev/null +++ b/gpmivm/ivmComputeLandM.m @@ -0,0 +1,31 @@ +function model = ivmComputeLandM(model) + +% IVMCOMPUTELANDM Compute the L and M matrix. +% FORMAT +% DESC computes the matrices L and M from a given active set and +% site parameters. Normally L and M are developed in a sequential +% way, as points are added. This function creates them directly +% from a given active set, for use, e.g. when loading in a saved +% model from file (ivmReconstruct). +% ARG model : the model for which we are recomputing L and M. +% RETURN model : the model with the updated L and M values. +% +% COPYRIGHT : Neil D. Lawrence, 2005 +% +% SEEALSO : ivmReconstruct + +% IVM + +if model.noise.spherical + model.Sigma.L = chol(model.kern.Kstore(model.I, :) ... + + diag(1./model.beta(model.I)))'; + model.Sigma.Linv = eye(size(model.Sigma.L))/model.Sigma.L; + model.Sigma.M = model.Sigma.Linv*model.kern.Kstore'; +else + for i = 1:size(model.y, 2) + model.Sigma(i).L = chol(model.kern.Kstore(model.I, :) ... + + diag(1./model.beta(model.I, i)))'; + model.Sigma(i).Linv = eye(size(model.Sigma(i).L))/model.Sigma(i).L; + model.Sigma(i).M = model.Sigma(i).Linv*model.kern.Kstore'; + end +end diff --git a/gpmivm/ivmContour.m b/gpmivm/ivmContour.m new file mode 100644 index 00000000..5efa4943 --- /dev/null +++ b/gpmivm/ivmContour.m @@ -0,0 +1,25 @@ +function h = ivmContour(X, Y, Z, lineWidth) + +% IVMCONTOUR Special contour plot showing decision boundary. +% FORMAT +% DESC plots a contour plot with a red solid line at 0.5 and blue +% dashed lines at 0.25 and 0.75. +% ARG X : input X locations (from e.g. meshgrid). +% ARG Y : input Y locations. +% ARG Z : input Z locations. +% ARG lineWidth : width of the lines to use. +% RETURN h : handle to the contour lines. +% +% SEEALSO : contour, ivmMeshVals +% +% COYRIGHT : Neil D. Lawrence, 2005 + +% IVM + +[void, clines1] = contour(X, Y, Z, [0.25 .75], 'b--'); +[void, clines2] = contour(X, Y, Z, [0.5 0.5], 'r-'); +set(clines1, 'linewidth', lineWidth) +h = [clines1; clines2]; +if ~isempty(clines2) + set(clines2, 'linewidth', lineWidth) +end diff --git a/gpmivm/ivmCovarianceGradient.m b/gpmivm/ivmCovarianceGradient.m new file mode 100644 index 00000000..8cf2f45f --- /dev/null +++ b/gpmivm/ivmCovarianceGradient.m @@ -0,0 +1,23 @@ +function g = ivmCovarianceGradient(invK, m) + +% IVMCOVARIANCEGRADIENT The gradient of the likelihood approximation wrt the covariance. +% FORMAT +% DESC returns the gradient of the log likelihood approximation +% with respect to the covariance function (i.e. kernel) evaluated +% at the active points. +% ARG invK : the inverse of the covariance function evaluated at +% the active points. +% ARG m : the difference between the target and the mean value. +% RETURN g : the gradient of the log likelihood approximation with +% respect to the covariance function elements. +% +% SEEALSO : ivmCreate +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +invKm = invK*m; + +g = -invK + invKm*invKm'; +g= g*.5; diff --git a/gpmivm/ivmCreate.m b/gpmivm/ivmCreate.m new file mode 100644 index 00000000..6f24f7e2 --- /dev/null +++ b/gpmivm/ivmCreate.m @@ -0,0 +1,81 @@ +function model = ivmCreate(inputDim, outputDim, X, y, options); + +% IVMCREATE Create a IVM model with the IVM sparse approximaiton. +% The IVM stands for Informative Vector Machine. The IVM is a +% sparse Gaussian process approximation which uses information +% theoretic criteria for selection of an active set. Unlike most +% sparse Gaussian process methods (but like the SVM) the IVM is a +% compression scheme. In other words the IVM can be recreated using +% only those data points considered to be 'informative vectors'. We +% refer to these points as the active set. +% +% FORMAT +% DESC creates a Gaussian process model structure using the IVM +% approximation. The default parameter settings as specified by the +% options vector. This function replaces the deprecated ivm function. +% ARG q : input data dimension. +% ARG d : the number of processes (i.e. output data dimension). +% ARG X : the input data matrix. +% ARG y : the target (output) data. +% ARG options : options structure as defined by ivmOptions.m. +% RETURN model : model structure containing the IVM. +% +% SEEALSO : ivmOptions, modelCreate +% +% COPYRIGHT : Neil D. Lawrence, 2005, 2006, 2007 + +% IVM + + +model.type = 'ivm'; +model.terminate = 0; +model.epUpdate = 0; + +model.d = options.numActive; + +model.X = X; +model.y = y; + +model.m = []; +model.beta = []; + +model.nu = zeros(size(y)); +model.g = zeros(size(y)); + +if isstruct(options.kern) + model.kern = options.kern; +else + model.kern = kernCreate(model.X, options.kern); +end + +model.varSigma = zeros(size(y)); +model.mu = zeros(size(y)); + +model.I = []; +model.J = []; + +if isstruct(options.noise) + model.noise = options.noise; +else + model.noise = noiseCreate(options.noise, model.y); +end + +if model.noise.spherical + model.Sigma.M = []; + model.Sigma.L = []; +else + for i = 1:size(y, 2) + model.Sigma(i).M = []; + model.Sigma(i).L = []; + end +end +model.selectionCriterion = options.selectionCriterion; + + +switch options.selectionCriterion + case 'none' + numData = size(X, 1); + model.I = (1:numData); + otherwise + +end diff --git a/gpmivm/ivmDeconstruct.m b/gpmivm/ivmDeconstruct.m new file mode 100644 index 00000000..290f0893 --- /dev/null +++ b/gpmivm/ivmDeconstruct.m @@ -0,0 +1,27 @@ +function [kern, noise, ivmInfo] = ivmDeconstruct(model) + +% IVMDECONSTRUCT break IVM in pieces for saving. +% FORMAT +% DESC takes an IVM model structure and breaks it into component +% parts for saving. +% ARG model : the model that needs to be saved. +% RETURN kern : the kernel component of the IVM model. +% RETURN noise : the noise component of the IVM model. +% RETURN ivmInfo : a structure containing the other information +% from the IVM: what the active set is, what the inactive set is +% and what the site parameters are. +% +% SEEALSO : ivmReconstruct +% +% COPYRIGHT : Neil D. Lawrence, 2005 + +% IVM + +kern = rmfield(model.kern, 'Kstore'); +kern = rmfield(kern, 'diagK'); +noise = model.noise; +ivmInfo.I = model.I; +ivmInfo.J = model.J; +ivmInfo.m = model.m; +ivmInfo.beta = model.beta; +ivmInfo.type = 'ivm'; diff --git a/gpmivm/ivmDisplay.m b/gpmivm/ivmDisplay.m new file mode 100644 index 00000000..5c00a88e --- /dev/null +++ b/gpmivm/ivmDisplay.m @@ -0,0 +1,32 @@ +function ivmDisplay(model, spacing) + +% IVMDISPLAY Display parameters of an IVM model. +% FORMAT +% DESC displays the parameters of an IVM model. +% ARG model : the model to display. +% +% FORMAT does the same as above, but indents the display according +% to the amount specified. +% ARG model : the IVM model to display. +% ARG spacing : how many spaces to indent the display of the kernel by. +% +% SEEALSO : gaussianNoiseParamInit, modelDisplay, noiseDisplay +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006, 2007 + +% IVM + +if nargin > 1 + spacing = repmat(32, 1, spacing); +else + spacing = []; +end +spacing = char(spacing); +fprintf(spacing); +fprintf('IVM Model:\n') +fprintf(spacing); +fprintf(' Noise Model:\n') +noiseDisplay(model.noise, length(spacing)+2); +fprintf(spacing); +fprintf(' Kernel:\n'); +kernDisplay(model.kern, length(spacing)+2); diff --git a/gpmivm/ivmDowndateM.m b/gpmivm/ivmDowndateM.m new file mode 100644 index 00000000..d26f4c70 --- /dev/null +++ b/gpmivm/ivmDowndateM.m @@ -0,0 +1,94 @@ +function model = ivmDowndateM(model, index) + +% IVMDOWNDATEM Remove point from M, L, mu and varSigma. +% FORMAT +% DESC removes the given data point from the IVM representations, +% in particular the matrices M and L. As well as from mu and +% varSigma. Unfortunately this operation can be numerically +% unstable: if it is done too many times these represenations must +% be recomputed (see ivmComputeLandM). +% ARG model : the model from which the point is to be removed. +% ARG index : the index of the point to remove from the model. +% +% SEEALSO : ivmRemovePoint, ivmEpUpdatePoint, ivmComputeLandM, +% ivmUpdateM, rocholhFactorise, rocholFactorise, rocholTransMultiply, +% rocholForeSub +% +% COPYRIGHT : Neil D. Lawrence, 2005 + +% IVM + +d = model.d; +k = find(model.I == index); +for c = 1:length(model.Sigma) + % Compute the kth row of Sigma + a = model.Sigma(c).M(:, index); + s = model.kern.Kstore(:, k)' - a'*model.Sigma(c).M; + % Compute the three vectors that form Lambda. + + %sLambda_k = rocholFactorise(model.Sigma(c).Linv(k:end, k)); + v = model.Sigma(c).Linv(k:end, k); +% v = v/eps; +% sLambda_k = rocholFactorise(v); + sLambda_k = rocholhFactorise(v); + + model.Sigma(c).M(k:end, :) = ... + rocholForeSub(sLambda_k, ... + model.Sigma(c).M(k:end, :)); + + model.Sigma(c).M(k, :) = []; + t2 = model.Sigma(c).Linv(k, 1:k); + if sLambda_k.n > 1; + slambda_11 = sLambda_k.u(1)*sLambda_k.v(1); + slambda_21 = sLambda_k.v*sLambda_k.u(1); + slambda_21(1) = []; + sLambda_22 = sLambda_k; + sLambda_22.s(1) = []; + sLambda_22.u(1) = []; + sLambda_22.v(1) = []; + sLambda_22.n = sLambda_22.n-1; + T3prime = rocholForeSub(sLambda_22, model.Sigma(c).Linv(k+1:end, : ... + )); + T3prime(:, 1:k) = T3prime(:, 1:k) ... + - rocholForeSub(sLambda_22, slambda_21*1/slambda_11)*t2; + v = T3prime(:, k+1:end)\T3prime(:, k); + sVtilde = rocholFactorise(v); + V = rocholTransMultiply(sVtilde, T3prime(:, k+1:end)')'; + T33inv = rocholTransMultiply(sLambda_22, model.Sigma(c).L(k+1:end, ... + k+1:end)')'; + invV = rocholForeSub(sVtilde, T33inv); + % invV = V\eye(size(V)); + model.Sigma(c).Linv(end, :) = []; + model.Sigma(c).Linv(:, end) = []; + model.Sigma(c).Linv(k:end, :) = [T3prime(:, 1:k-1) V]; + model.Sigma(c).L(end, :) = []; + model.Sigma(c).L(:, end) = []; + model.Sigma(c).L(k:end, :) = [-invV*T3prime(:, 1:k-1)*model.Sigma(c).L(1:k-1, 1:k-1) invV]; + else + model.Sigma(c).L(:, end) = []; + model.Sigma(c).L(end, :) = []; + model.Sigma(c).Linv(:, end) = []; + model.Sigma(c).Linv(end, :) = []; + end + model.varSigma(:, c) = model.varSigma(:, c) + ((model.nu(index, c)*s).*s)'; + %/~ + if any(model.varSigma(:, c)<0) + warning('Variance less than zero') + end + %~/ + model.mu(:, c) = model.mu(:, c) + model.g(index, c)*s'; + +end +if length(model.Sigma)==1 & size(model.y, 2)>1 + for c = 2:size(model.y, 2) + model.varSigma(:, c) = model.varSigma(:, c) ... + - ((model.nu(index, c)*s).*s)'; + model.mu(:, c) = model.mu(:, c) + model.g(index, c)*s'; + %/~ + if any(model.varSigma(:, c)<0) + warning('Variance less than zero') + end + %~/ + end +end +model.kern.Kstore(:, k) = []; diff --git a/gpmivm/ivmDowndateNuG.m b/gpmivm/ivmDowndateNuG.m new file mode 100644 index 00000000..4d0a5afb --- /dev/null +++ b/gpmivm/ivmDowndateNuG.m @@ -0,0 +1,25 @@ +function model = ivmDowndateNuG(model, index) + +% IVMDOWNDATENUG Downdate nu and g parameters associated with noise model. +% FORMAT +% DESC removes a given data point from the nu and g +% representations. +% ARG model : the model from which the point is to be removed. +% ARG index : the index of the point to be removed. +% RETURN model : the model with the point removed. +% +% SEEALSO : ivmRemovePoint, ivmEpUpdatePoint, ivmDowndateM +% +% COPYRIGHT : Neil D. Lawrence, 2005 + +% IVM + +if isfield(model.kern, 'whiteVariance') + subTerm = model.varSigma(index, :)-model.kern.whiteVariance; +else + subTerm = model.varSigma(index, :); +end +subTerm = model.varSigma(index, :); +model.nu(index, :) = 1./(1./model.beta(index, :) - subTerm); +model.g(index, :) = model.nu(index, :).*(model.mu(index, :) - model.m(index, :)); + diff --git a/gpmivm/ivmDowndateSites.m b/gpmivm/ivmDowndateSites.m new file mode 100644 index 00000000..beb085a0 --- /dev/null +++ b/gpmivm/ivmDowndateSites.m @@ -0,0 +1,18 @@ +function model = ivmDowndateSites(model, index) + +% IVMDOWNDATESITES Downdate site parameters. +% FORMAT +% DESC remove a given data point from the site parameters. +% ARG model : the IVM structure from which the point is to be +% removed. +% ARG index : the index of the point to be removed. +% RETURN model : the model with the given point removed. +% +% COPYRIGHT : Neil D. Lawrence, 2005 +% +% SEEALSO : ivmEpUpdateM, ivmRemovePoint + +% IVM + +model.m(index, :) = 0; +model.beta(index, :) = 0; diff --git a/gpmivm/ivmEpLogLikelihood.m b/gpmivm/ivmEpLogLikelihood.m new file mode 100644 index 00000000..75b6593a --- /dev/null +++ b/gpmivm/ivmEpLogLikelihood.m @@ -0,0 +1,53 @@ +function L = ivmEpLogLikelihood(model, x, y); + +% IVMEPLOGLIKELIHOOD Return the EP approximation to the log-likelihood. +% FORMAT +% DESC computes the EP approximation to the log likelihood as given +% in the JMLR paper of Kuss & Rasmussen. +% ARG model : the IVM model for which the approximation is to be +% computed. +% RETURN L : the log likelihood of the data according to the EP +% approximation. +% +% COPYRIGHT : Neil D. Lawrence, 2006 +% +% SEEALSO : ivmCreate, ivmApproxLogLikelihood + +% IVM + +if nargin < 3 + % This implies evaluate for the training data. + mu = model.mu; + varsigma = model.varSigma; + y = model.y; +else + [mu, varsigma] = ivmPosteriorMeanVar(model, x); +end + +if model.noise.spherical +else + varSigSlash = ... + 1./(1./model.varSigma(model.I, :) - model.beta(model.I, :)); + muSlash = ... + (model.mu(model.I, :)./model.varSigma(model.I, :) ... + - model.m(model.I, :).*model.beta(model.I, :)).*varSigSlash; + + L = 0.5*(sum(sum(log(varSigSlash+1./model.beta(model.I, :))))... + + sum(sum((model.m(model.I, :) - muSlash)... + .*(model.m(model.I, :)-muSlash) ... + ./((varSigSlash+1./model.beta(model.I, :)))))); +end +L = L + noiseLogLikelihood(model.noise, muSlash, varSigSlash, ... + y(model.I, :)); + +L = L + ivmApproxLogLikelihood(model); + +% check if there is a prior over kernel parameters +if isfield(model.kern, 'priors') + fhandle = str2func([model.kern.type 'KernExpandParams']); + params = fhandle(model.kern); + for i = 1:length(model.kern.priors) + index = model.kern.priors(i).index; + L = L + priorLogProb(model.kern.priors(i), params(index)); + end +end diff --git a/gpmivm/ivmEpUpdateM.m b/gpmivm/ivmEpUpdateM.m new file mode 100644 index 00000000..bd50f9cf --- /dev/null +++ b/gpmivm/ivmEpUpdateM.m @@ -0,0 +1,145 @@ +function model = ivmEpUpdateM(model, index) + +% IVMEPUPDATEM Update matrix M, L, varSigma and mu for EP. +% FORMAT +% DESC performs an EP update on the IVM model's represenations for +% a given point. +% ARG model : the mode for which the update will be done. +% ARG index : the index of the point for which the update will take +% place. +% +% SEEALSO : ivmEpUpdatePoint, ivmDowndateSites +% +% COPYRIGHT : Neil D. Lawrence, 2006 + +% IVM + +d = length(model.I); +k = find(model.I == index); +if k == d % This point has just been included so EP update is irrelevant. + return; +end +kvector = model.kern.Kstore(:, k); +for c = 1:length(model.Sigma) + % Compute the kth row of Sigma + a = model.Sigma(c).M(:, index); + s = kvector' - a'*model.Sigma(c).M; + + v = model.Sigma(c).Linv(k:end, k); + sLambda_k = rocholhFactorise(v); + + % Update M + model.Sigma(c).M(k:end, :) = ... + rocholForeSub(sLambda_k, ... + model.Sigma(c).M(k:end, :)); + model.Sigma(c).M(k:end-1) = model.Sigma(c).M(k+1:end); + + % Update L + t2 = model.Sigma(c).Linv(k, 1:k); + slambda_11 = sLambda_k.u(1)*sLambda_k.v(1); + slambda_21 = sLambda_k.v*sLambda_k.u(1); + slambda_21(1) = []; + sLambda_22 = sLambda_k; + sLambda_22.s(1) = []; + sLambda_22.u(1) = []; + sLambda_22.v(1) = []; + sLambda_22.n = sLambda_22.n-1; + T3prime = rocholForeSub(sLambda_22, model.Sigma(c).Linv(k+1:end, : ... + )); + T3prime(:, 1:k) = T3prime(:, 1:k) ... + - rocholForeSub(sLambda_22, slambda_21*1/slambda_11)*t2; + v = T3prime(:, k+1:end)\T3prime(:, k); + sVtilde = rocholFactorise(v); + V = rocholTransMultiply(sVtilde, T3prime(:, k+1:end)')'; + T33inv = rocholTransMultiply(sLambda_22, model.Sigma(c).L(k+1:end, ... + k+1:end)')'; + invV = rocholForeSub(sVtilde, T33inv); + model.Sigma(c).Linv(k:end-1, 1:end-1) = [T3prime(:, 1:k-1) V]; + model.Sigma(c).L(k:end-1, 1:end-1) = [-invV*T3prime(:, 1:k-1)* ... + model.Sigma(c).L(1:k-1, 1:k-1) invV]; + %/~ + oldVarSigma = model.varSigma; + %~/ + model.varSigma(:, c) = model.varSigma(:, c) + ((model.nu(index, c)*s).*s)'; + %/~ + if any(model.varSigma(:, c)<0) + warning('Variance less than zero') + end + %~/ + model.mu(:, c) = model.mu(:, c) + model.g(index, c)*s'; + +end +if length(model.Sigma)==1 & size(model.y, 2)>1 + for c = 2:size(model.y, 2) + model.varSigma(:, c) = model.varSigma(:, c) ... + - ((model.nu(index, c)*s).*s)'; + model.mu(:, c) = model.mu(:, c) + model.g(index, c)*s'; + %/~ + if any(model.varSigma(:, c)<0) + warning('Variance less than zero') + end + %~/ + end +end + +%model = ivmDowndateSites(model, index); +%model.J = [model.J index]; + +% Ensure nu and g now accurately reflect mu and varsigma. +model = ivmUpdateNuG(model, index); +% Update the site parameter. +model = ivmUpdateSites(model, index); +d = model.d; + +% Compute the values of M for the new point +for c = 1:length(model.Sigma) + a = model.Sigma(c).M(1:end-1, index); + s = kvector' - a'*model.Sigma(c).M(1:end-1, ... + :); + lValInv = sqrt(model.nu(index, c)); + %/~ + % If Nu is so low then the included data-point isn't really useful. + if lValInv < 1e-16 + warning(['Square root of nu is ' num2str(lValInv)]) + end + %~/ + model.Sigma(c).M(end, :) = lValInv*s; + ainv = (-a*lValInv)'/tril(model.Sigma(c).L(1:end-1, 1:end-1)); + model.Sigma(c).L(end, :) = [a' 1/lValInv]; + model.Sigma(c).Linv(end, :) = [ainv lValInv]; + % make sure Matlab knows they are lower triangular. + model.Sigma(c).L = tril(model.Sigma(c).L); + model.Sigma(c).Linv = tril(model.Sigma(c).Linv); + %/~ + oldVarSigma = model.varSigma(:, c); + if any(model.varSigma(:, c)<0) + warning('Variance less than zero') + end + %~/ + model.varSigma(:, c) = model.varSigma(:, c) - ((model.nu(index, c)*s).*s)'; + %/~ + if any(model.varSigma(:, c)<0) + warning('Variance less than zero') + end + %~/ + model.mu(:, c) = model.mu(:, c) + model.g(index, c)*s'; +end +if length(model.Sigma)==1 & size(model.y, 2)>1 + for c = 2:size(model.y, 2) + model.varSigma(:, c) = model.varSigma(:, c) ... + - ((model.nu(index, c)*s).*s)'; + model.mu(:, c) = model.mu(:, c) + model.g(index, c)*s'; + %/~ + if any(model.varSigma(:, c)<0) + warning('Variance less than zero') + end + %~/ + end +end +% Swap the columns of the kernel matrix about. +model.kern.Kstore(:, k:end-1) = model.kern.Kstore(:, k+1:end); +model.kern.Kstore(:, end) = kvector; + +% Move point to end of active set. +model.I(k:end-1) = model.I(k+1:end); +model.I(end) = index; diff --git a/gpmivm/ivmEpUpdatePoint.m b/gpmivm/ivmEpUpdatePoint.m new file mode 100644 index 00000000..e2d6ed95 --- /dev/null +++ b/gpmivm/ivmEpUpdatePoint.m @@ -0,0 +1,27 @@ +function model = ivmEpUpdatePoint(model, i) + +% IVMEPUPDATEPOINT Do an EP update of a point. +% FORMAT +% DESC performs an EP update for a given point in an IVM model. Be +% careful when using because repeated EP updates can be unstable. +% ARG model : the model for which the EP update is to apply. +% ARG i : the index of the point which is being updated. +% RETURN model : the returned model with the given point updated. +% +% COPYRIGHT : Neil D. Lawrence, 2005, 2006 +% +% SEEALSO : ivmDowndateNugG, ivmEpUpdateM + +% IVM + +index = find(model.I == i); +if isempty(index) + error(['Point ' num2str(i) ' is not in active set']) +end + +%/~ +model = ivmUpdateNuG(model, i); +%~/ +% Set nu ready for the point removal. +%model = ivmDowndateNuG(model, i); +model = ivmEpUpdateM(model, i); diff --git a/gpmivm/ivmGradX.m b/gpmivm/ivmGradX.m new file mode 100644 index 00000000..43138faf --- /dev/null +++ b/gpmivm/ivmGradX.m @@ -0,0 +1,22 @@ +function g = ivmGradX(model, x, y); + +% IVMGRADX Returns the gradient of the log-likelihood wrt x. +% FORMAT +% DESC returns the gradient of the approximate log likelihood with +% respect to an input location x. This is used for optimising with +% respect to x in the GP-LVM. +% ARG model : the model for which the gradient is being computed. +% ARG x : the input location where the gradient is to be evaluated. +% ARG y : the target location where the gradient is being +% evaluated. +% +% SEEALSO ivmPosteriorMeanVar, ivmPosteriorGradMeanVar +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +[mu, varsigma] = ivmPosteriorMeanVar(model, x); +[dmu, dvs] = ivmPosteriorGradMeanVar(model, x); + +g = noiseGradX(model.noise, mu, varsigma, dmu, dvs, y); diff --git a/gpmivm/ivmGunnarData.m b/gpmivm/ivmGunnarData.m new file mode 100644 index 00000000..050fac5f --- /dev/null +++ b/gpmivm/ivmGunnarData.m @@ -0,0 +1,70 @@ +function model = ivmGunnarData(dataSet, dataNum, kernelType, invWidth, dVal) + +% IVMGUNNARDATA Script for running experiments on Gunnar data. +% Note that for this script to work, you need to have +% placed Gunnar Raetsch's benchmark data sets in the correct +% directory under the DATASETS toolbox (see +% http://ida.first.fraunhofer.de/projects/bench/benchmarks.htm for +% the data). +% FORMAT +% DESC is a script for running experiments on Gunnar Raetsch's +% benchmark data sets. +% ARG dataSet : the data set to be trained on. +% ARG dataNum : the partition to be trained. +% ARG kernelType : the kernel type to be used. +% ARG invWidth : inverse width of the kernel being used. +% ARG dVal : active set size to be used. +% RETURN model : the model learnt on the data. +% +% SEEALSO : mapLoadData +% +% COPYRIGHT : Neil D. Lawrence, 2005 + +% IVM + +% Load the data +HOME = getenv('HOME'); +fprintf('Dataset: %s, number %d, inverse width %2.4f\n', dataSet, dataNum, invWidth) +fs = filesep; +baseDir = [HOME filesep 'datasets' filesep 'gunnar' filesep dataSet filesep]; +[X, y, Xtest, ytest] = mapLoadData(['gunnar' ':' dataSet ':' ... + num2str(dataNum)]); + +% Get the default options +options = ivmOptions; +% Modify kernel type and number of active points. +options.kern = kernelType; +options.numActive = dVal; +options.display = 0; +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); +model.kern.comp{1}.inverseWidth = invWidth; +fprintf('Initial model:\n'); +ivmDisplay(model); + +% Do 15 iterations +for i = 1:15 + % Select the active set. + model = ivmOptimiseIvm(model, options.display); + % Optimise the kernel parameters. + model = ivmOptimiseKernel(model, options.display, 100); + fprintf('Iteration %d\n', i); + ivmDisplay(model); +end +model = ivmOptimiseIvm(model, options.display); +% Display the final model. +fprintf('Final model:\n'); +ivmDisplay(model); + +yPred = ivmOut(model, Xtest); +classError = 1- sum(ytest ==yPred)/length(ytest); + +ll = ivmApproxLogLikelihood(model); +fprintf('Test Error %2.4f\n', classError); +fprintf('Model likelihood %2.4f\n', ll); +invWidthStr = num2str(invWidth); +ind = find(invWidthStr==46); +invWidthStr(ind) = 'p'; +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['ivm' dataSet num2str(dataNum) kernelType{1} invWidthStr], 'classError', 'll', ... + 'kern', 'noise', 'ivmInfo') + diff --git a/gpmivm/ivmInit.m b/gpmivm/ivmInit.m new file mode 100644 index 00000000..f2884efd --- /dev/null +++ b/gpmivm/ivmInit.m @@ -0,0 +1,73 @@ +function model = ivmInit(model, d) + +% IVMINIT Initialise the IVM model. +% FORMAT +% DESC sets up some initial matrices and vectors in the IVM +% representation in preparation for learning. +% ARG model : the model to be set up. +% ARG d : optional value of the active set size (if not given, +% model.d is used). +% RETURN model : model with various matrices set up for learning. +% +% SEEALSO : ivmOptimiseIvm, ivmSelectPoints +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +if nargin < 2 + d = model.d; +end + +% Get number of data. +numData = size(model.y, 1); +numOut = size(model.y, 2); + +% Initialise kernel storage. +model.kern.Kstore = zeros(numData, d); + +% Initialise Indices +switch model.selectionCriterion + case 'none' + model.I = 1:size(model.X, 1); + otherwise + model.I = []; +end + +model.terminate = 0; + +% Initialise parameters +model.kern.diagK = kernDiagCompute(model.kern, model.X); + +model.m = sparse(zeros(numData, numOut)); +model.beta = sparse(zeros(numData, numOut)); + +model.varSigma = repmat(model.kern.diagK, 1, numOut); +model.mu = zeros(numData, numOut); + +model.g = zeros(numData, numOut); +model.nu = zeros(numData, numOut); + +%/~ +% % Initialise site precision and mean +% switch model.noise.type + +% case {'probit', 'multiprobit'} + +% model.u = zeros(numData, numOut); +% model.c = zeros(numData, numOut); + +% case 'heaviside' +% model.u = zeros(numData, numOut); +% model.c = zeros(numData, numOut); + +% case 'ordered' +% model.u = zeros(numData, numOut); +% model.c = zeros(numData, numOut); + + + +% end +%~/ +model = ivmUpdateNuG(model, 1:numData); + diff --git a/gpmivm/ivmKernelGradient.m b/gpmivm/ivmKernelGradient.m new file mode 100644 index 00000000..6303240a --- /dev/null +++ b/gpmivm/ivmKernelGradient.m @@ -0,0 +1,30 @@ +function g = ivmKernelGradient(params, model) + +% IVMKERNELGRADIENT Gradient of likelihood approximation wrt kernel parameters. +% FORMAT +% DESC returns the gradient of the approximate log likelihood with +% respect to the kernel parameters. +% ARG params : the current parameters of the kernel. +% ARG model : the model structure being optimised. +% RETURN g : the gradient of the approximate log likelihood with +% respect to the kernel parameters +% +% SEEALSO : scg, conjgrad, quasinew, kernExpandParam, +% ivmOptimiseKernel, kernPriorGradient, ivmApproxLogLikeKernGrad, +% ivmKernelObjective +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +%/~ +if any(isnan(params)) + warning('Parameter is NaN') +end +%~/ + +model.kern = kernExpandParam(model.kern, params); +g = ivmApproxLogLikeKernGrad(model); +g = g + kernPriorGradient(model.kern); +g = -g; + diff --git a/gpmivm/ivmKernelObjective.m b/gpmivm/ivmKernelObjective.m new file mode 100644 index 00000000..ccadb321 --- /dev/null +++ b/gpmivm/ivmKernelObjective.m @@ -0,0 +1,31 @@ +function f = ivmKernelObjective(params, model) + +% IVMKERNELOBJECTIVE Compute the negative of the IVM log likelihood approximation. +% FORMAT + +% DESC computes the IVM negative log likelihood approximation at a +% given set of kernel parameters. Adiditonally if there is any +% regularisation present on the kernel parameter it computes the prior +% probability and adds it in. +% ARG params : the parameter values where the obective is to be +% evaluated. +% ARG model : the model structure for which the objective is to be +% evaluated. +% +% SEEALSO : ivmKernelGradient, ivmOptimiseKernel, kernExpandParam, +% ivmApproxLogLikelihood, kernPriorLogProb +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +%/~ +if any(isnan(params)) + warning('Parameter is NaN') +end +%~/ + +model.kern = kernExpandParam(model.kern, params); +f = ivmApproxLogLikelihood(model); +f = f + kernPriorLogProb(model.kern); +f = -f; diff --git a/gpmivm/ivmLogLikelihood.m b/gpmivm/ivmLogLikelihood.m new file mode 100644 index 00000000..3a4cbb19 --- /dev/null +++ b/gpmivm/ivmLogLikelihood.m @@ -0,0 +1,37 @@ +function L = ivmLogLikelihood(model, x, y); + +% IVMLOGLIKELIHOOD Return the log-likelihood for the IVM. +% FORMAT +% DESC computes the log likelihood of the given data points under +% the IVM and the given noise model. +% ARG model : the model for which the log likelihood is computed. +% ARG x : the input locations. +% ARG y : the target values. +% +% SEEALSO : ivmNegLogLikelihood, ivmPosteriorMeanVar, +% noiseLogLikelihood +% +% COPYRIGHT : Neil D. Lawrence + +% IVM + +if nargin < 3 + % This implies evaluate for the training data. + mu = model.mu; + varsigma = model.varSigma; + y = model.y; +else + [mu, varsigma] = ivmPosteriorMeanVar(model, x); +end + +L = noiseLogLikelihood(model.noise, mu, varsigma, y); + +% check if there is a prior over kernel parameters +if isfield(model.kern, 'priors') + fhandle = str2func([model.kern.type 'KernExpandParams']); + params = fhandle(model.kern); + for i = 1:length(model.kern.priors) + index = model.kern.priors(i).index; + L = L + priorLogProb(model.kern.priors(i), params(index)); + end +end diff --git a/gpmivm/ivmMeshVals.m b/gpmivm/ivmMeshVals.m new file mode 100644 index 00000000..51bbd15f --- /dev/null +++ b/gpmivm/ivmMeshVals.m @@ -0,0 +1,27 @@ +function [X, Y, Z, varZ] = ivmMeshVals(model, limx, limy, number) + +% IVMMESHVALS Give the output of the IVM for contour plot display. +% FORMAT +% DESC gives the output of the IVM for plotting as a contour plot. +% ARG model : the model to be plotted. +% ARG limx : the x limits of the contour plot. +% ARG limy : the y limits of the contour plot. +% ARG number : the number of points to use along each side of the +% grid for the contour plot. +% RETURN X : the matrix of X values for the contour plot. +% RETURN Y : the matrix of Y values for the contour plot. +% RETURN Z : the matrix of Z values for the contour plot. +% +% SEEALSO : ivmContour, ncnmContour +% +% COPYRIGHT : Neil D. Lawrence, 2005, 2004 + +% IVM + +x = linspace(limx(1), limx(2), number); +y = linspace(limy(1), limy(2), number); +[X, Y] = meshgrid(x, y); + +[Z, varZ] = ivmPosteriorMeanVar(model, [X(:) Y(:)]); +Z = reshape(Z, size(X)); +varZ = reshape(varZ, size(X)); diff --git a/gpmivm/ivmNegGradientNoise.m b/gpmivm/ivmNegGradientNoise.m new file mode 100644 index 00000000..0ad30737 --- /dev/null +++ b/gpmivm/ivmNegGradientNoise.m @@ -0,0 +1,22 @@ +function g = ivmNegGradientNoise(params, model) + +% IVMNEGGRADIENTNOISE Wrapper function for calling noise param gradients. +% FORMAT +% DESC is a wrapper function that returns the negative gradients of +% the log likelihood with respect to the noise parameters for +% optimisation in the NETLAB style. +% ARG params : the parameters where the gradients are to be +% computed. +% ARG model : the model for which the gradients are to be computed. +% RETURN g : the negative gradients of the log likelihood with respect to +% the noise parameters. +% +% SEEALSO : noiseGradientParam, noiseExpandParam +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 +% + +% IVM + +model.noise = noiseExpandParam(model.noise, params); +g = - noiseGradientParam(model.noise, model.mu, model.varSigma, model.y); diff --git a/gpmivm/ivmNegLogLikelihood.m b/gpmivm/ivmNegLogLikelihood.m new file mode 100644 index 00000000..5745bbe5 --- /dev/null +++ b/gpmivm/ivmNegLogLikelihood.m @@ -0,0 +1,19 @@ +function e = ivmNegLogLikelihood(params, model) + +% IVMNEGLOGLIKELIHOOD Wrapper function for calling IVM likelihood. +% FORMAT +% DESC is a wrapper function for calling the IVM log likelihood. +% ARG param : the parameters where the log likelihood is to be +% evaluated. +% ARG model : the model structure for which the log likelihood is +% being evaluated. +% ARG e : the negative log likelihood of the model. +% +% COPYRIGHT : Neil D. Lawrence, 2005 +% +% SEEALSO : noiseExpandParam, ivmLogLikelihood + +% IVM + +model.noise = noiseExpandParam(model.noise, params); +e = - ivmLogLikelihood(model); diff --git a/gpmivm/ivmOptimise.m b/gpmivm/ivmOptimise.m new file mode 100644 index 00000000..88e744de --- /dev/null +++ b/gpmivm/ivmOptimise.m @@ -0,0 +1,30 @@ +function model = ivmOptimise(model, options); + +% IVMOPTIMISE Optimise the IVM. +% FORMAT +% DESC optimises an IVM by iterating between selecting points and +% optimising kernel and noise parameters. +% ARG model : the model to be optimised. +% ARG options : options structure as returned by ivmOptions. +% +% SEEALSO : ivmOptimiseIvm, ivmOptimiseKernel, ivmOptimiseNoise +% +% COPYRIGHT : Neil D. Lawrence, 2005, 2006 + +% IVM + +for i = 1:options.extIters + if options.kernIters + % Update the kernel if required. + model = ivmOptimiseIvm(model, options.display); + model = ivmOptimiseKernel(model, options.display, options.kernIters); + end + if options.noiseIters + % Update the noise model if required. + model = ivmOptimiseIvm(model, options.display); + model = ivmOptimiseNoise(model, options.display, options.noiseIters); + end + if options.display + ivmDisplay(model); + end +end diff --git a/gpmivm/ivmOptimiseKernel.m b/gpmivm/ivmOptimiseKernel.m new file mode 100644 index 00000000..7832338b --- /dev/null +++ b/gpmivm/ivmOptimiseKernel.m @@ -0,0 +1,34 @@ +function model = ivmOptimiseKernel(model, display, iters); + +% IVMOPTIMISEKERNEL Optimise the kernel parameters. +% FORMAT +% DESC optimises the kernel parameters of the IVM model. +% ARG model : the model for which the kernel parameters are to be +% optimised. +% ARG display : how much to display during optimisation (defaults +% to level 1). +% ARG iters : how many iterations of optimisation to use (defaults +% to 500). +% +% SEEALSO : optimiseParams, defaultOptions, ivmKernelObjective, ivmKernelGradient +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +if nargin < 3 + iters = 500; + if nargin < 2 + display = 1; + end +end +options = defaultOptions; +if display + options(1) = 1; +end +options(14) = iters; + + +model = optimiseParams('kern', 'scg', 'ivmKernelObjective', ... + 'ivmKernelGradient', options, model); + diff --git a/gpmivm/ivmOptimiseNoise.m b/gpmivm/ivmOptimiseNoise.m new file mode 100644 index 00000000..b73a1780 --- /dev/null +++ b/gpmivm/ivmOptimiseNoise.m @@ -0,0 +1,33 @@ +function model = ivmOptimiseNoise(model, display, iters); + +% IVMOPTIMISENOISE Optimise the noise parameters. +% FORMAT +% DESC optimises the noise parameters in the IVM model. +% ARG model : the model for which the noise parameters are to be +% optimised. +% ARG display : how much to display during optimisation (defaults +% to level 1). +% ARG iters : how many iterations of optimisation to use (defaults +% to 500). +% +% SEEALSO : optimiseParams, defaultOptions, ivmNegLogLikelihood, ivmNegGradientNoise +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + + +if nargin < 3 + iters = 500; + if nargin < 2 + display = 1; + end +end +options = defaultOptions; +if display + options(1) = 1; +end +options(14) = iters; + +model = optimiseParams('noise', 'scg', 'ivmNegLogLikelihood', ... + 'ivmNegGradientNoise', options, model); diff --git a/gpmivm/ivmOptions.m b/gpmivm/ivmOptions.m new file mode 100644 index 00000000..d52ee5d0 --- /dev/null +++ b/gpmivm/ivmOptions.m @@ -0,0 +1,24 @@ +function options = ivmOptions(varargin) + +% IVMOPTIONS Return default options for IVM model. +% FORMAT +% DESC returns the default options in a structure for a IVM model. +% RETURN options : structure containing the default options for the +% given approximation type. +% +% SEEALSO : ivmCreate +% +% COPYRIGHT : Neil D. Lawrence, 2006, 2005 + +% IVM + +% bog-standard kernel. +options.kern = {'rbf', 'bias', 'white'}; +options.numActive = 100; +options.noise = 'probit'; +options.selectionCriterion = 'entropy'; +options.display = 0; +options.kernIters = 100; +options.noiseIters = 0; +options.extIters = 8; + diff --git a/gpmivm/ivmOut.m b/gpmivm/ivmOut.m new file mode 100644 index 00000000..05c03e52 --- /dev/null +++ b/gpmivm/ivmOut.m @@ -0,0 +1,26 @@ +function y = ivmOut(model, x); + +% IVMOUT Evaluate the output of an IVM model. +% FORMAT +% DESC evaluates the output of a given IVM model. +% ARG model : the model for which the output is being evaluated. +% ARG x : the input position for which the output is required. +% RETURN y : the output of the GP model. The function checks if +% there is a noise model associated with the GP, if there is, it is +% used, otherwise the mean of gpPosteriorMeanVar is returned. +% +% SEEALSO : ivmCreate, ivmPosteriorMeanVar, noiseOut +% +% COPYRIGHT : Neil D. Lawrence, 2003, 2005 + +% IVM + +if nargin < 2 + % This implies evaluate for the training data. + mu = model.mu; + varsigma = model.varSigma; +else + [mu, varsigma] = ivmPosteriorMeanVar(model, x); +end + +y = noiseOut(model.noise, mu, varsigma); diff --git a/gpmivm/ivmPosteriorGradMeanVar.m b/gpmivm/ivmPosteriorGradMeanVar.m new file mode 100644 index 00000000..0aad1ab4 --- /dev/null +++ b/gpmivm/ivmPosteriorGradMeanVar.m @@ -0,0 +1,44 @@ +function [gmu, gsigmavar] = ivmPosteriorGradMeanVar(model, X); + +% IVMPOSTERIORGRADMEANVAR Gradient of mean and variances of the posterior wrt X. +% FORMAT +% DESC computes the gradient of the mean and variances of the +% posterior distribution of a IVM with respect to the +% input locations. +% ARG model : the model for which gradients are to be computed. +% ARG X : the input locations where gradients are to be computed. +% RETURN gmu : the gradient of the posterior mean with respect to +% the input locations. +% RETURN gsigmavar : the gradient of the posterior variances with +% respect to the input locations. +% +% SEEALSO : ivmCreate, ivmPosteriorMeanVar, gpPosteriorGradMeanVar +% +% COPYRIGHT : Neil D. Lawrence, 2003, 2004 + +% IVM + +D = size(model.y, 2); +if size(X, 1) > 1 + error('This function only handles one data-point at a time') +end + +gX = kernGradX(model.kern, X, model.X(model.I, :)); +kX = kernCompute(model.kern, X, model.X(model.I, :))'; +diaggK = kernDiagGradX(model.kern, X); + + +gmu = zeros(size(X, 2), D); +gsigmavar = zeros(size(X, 2), D); + +if length(model.Sigma)>1 + for i = 1:D + Kinvgk = model.Sigma(i).Linv'*(model.Sigma(i).Linv*gX); + gsigmavar(:, i) = diaggK' - 2*Kinvgk'*kX; + gmu(:, i) = Kinvgk'*model.m(model.I, i); + end +else + Kinvgk = model.Sigma.Linv'*(model.Sigma.Linv*gX); + gsigmavar = repmat(diaggK' - 2*Kinvgk'*kX, 1, D); + gmu = Kinvgk'*full(model.m(model.I, :)); +end diff --git a/gpmivm/ivmPosteriorMeanVar.m b/gpmivm/ivmPosteriorMeanVar.m new file mode 100644 index 00000000..dec09e1c --- /dev/null +++ b/gpmivm/ivmPosteriorMeanVar.m @@ -0,0 +1,53 @@ +function [mu, varsigma] = ivmPosteriorMeanVar(model, X); + +% IVMPOSTERIORMEANVAR Mean and variances of the posterior at points given by X. +% FORMAT +% DESC returns the posterior mean and variance for a given set of +% points. +% ARG model : the model for which the posterior will be computed. +% ARG x : the input positions for which the posterior will be +% computed. +% RETURN mu : the mean of the posterior distribution. +% RETURN sigma : the variances of the posterior distributions. +% +% SEEALSO : ivmCreate, ivmPosteriorGradMeanVar +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 + +% IVM + +D = size(model.y, 2); +numData = size(X, 1); + +varsigma = zeros(numData, D); +mu = zeros(numData, D); + +% Compute kernel for new point. +kX = kernCompute(model.kern, X, model.X(model.I, :))'; + +% Compute diagonal of kernel for new point. +diagK = kernDiagCompute(model.kern, X); + +if length(model.Sigma) > 1 + for i = 1:D + Lk = model.Sigma(i).Linv*kX; + Kinvk = model.Sigma(i).Linv'*Lk; + for n = 1:numData + varsigma(n, i) = diagK(n) - Lk(:, n)'*Lk(:, n); + if any(varsigma(n, :) < 0) + warning('Varsigma less than zero'); + end + end + mu(:, i) = Kinvk'*model.m(model.I, i); + end +else + Lk = model.Sigma.Linv*kX; + Kinvk = model.Sigma.Linv'*Lk; + for n = 1:numData + varsigma(n, :) = repmat(diagK(n) - Lk(:, n)'*Lk(:, n), 1, D); + if varsigma(n, 1) < 0 + warning('Varsigma less than zero'); + end + end + mu = Kinvk'*full(model.m(model.I, :)); +end diff --git a/gpmivm/ivmPrintPlot.m b/gpmivm/ivmPrintPlot.m new file mode 100644 index 00000000..0cd75b03 --- /dev/null +++ b/gpmivm/ivmPrintPlot.m @@ -0,0 +1,37 @@ +function [h1, h2] = ivmPrintPlot(model, plotType, iter, X, ... + y, capName, experimentNo) + +% IVMPRINTPLOT Make a 3-D or contour plot of the IVM. +% FORMAT +% DESC makes a 3-D or contour plot from an IVM model to show +% results and prints it out to various directories (in eps and png form). +% ARG model : the model from which the plot is generated. +% ARG plotType : type of plot to be used, options include +% 'ncnmContour', which gives the contours at the edge of the null +% category region for the null category noise model, and +% 'ivmContour'. +% ARG iter : iteration number, if give it is used at the title of +% the plot. +% ARG X : optional argument, if given it is the plotted input +% locations, otherwise model.X is used. +% ARG y : optional argument, if given it is the plotted target +% locations, otherwise model.y is used. +% ARG capName : the name of the saved plots. +% ARG experimentNo : the experiment number to assign to the files. +% RETURN h1 : a handle to the data in the plot. +% RETURN h2 : a handle to any contours, or functions plotted +% derived from the IVM. +% +% SEEALSO : noise3dPlot, noisePointPlot, ivmMeshVals, ivmCreate +% +% COPYRIGHT : Neil D. Lawrence, 2007 + +% IVM + + +ivm3dPlot(model, plotType, iter, X, y); +% display active points. +model = ivmOptimiseIvm(model, 2); +fileName = ['dem' capName 'Ivm' num2str(experimentNo)]; + +printPlot(fileName, '../tex/diagrams', '../html'); diff --git a/gpmivm/ivmReadFromFID.m b/gpmivm/ivmReadFromFID.m new file mode 100644 index 00000000..94838f10 --- /dev/null +++ b/gpmivm/ivmReadFromFID.m @@ -0,0 +1,54 @@ +function [model, origI] = ivmReadFromFID(FID, varargin) + +% IVMREADFROMFID Load from a FID produced by the C++ implementation. +% FORMAT +% DESC loads in from a file stream the data format produced by the +% C++ implementation of the IVM. +% ARG FID : the file ID from where the data is loaded. +% RETURN model : the model loaded in from the file. +% model. +% RETURN I : the indices of the active points in the original data set. +% +% COPYRIGHT : Neil D. Lawrence, 2007, 2008 +% +% SEEALSO : ivmReadFromFile + +% IVM + +numData = readIntFromFID(FID, 'numData'); +numProcesses = readIntFromFID(FID, 'outputDim'); +numFeatures = readIntFromFID(FID, 'inputDim'); +activeSetSize = readIntFromFID(FID, 'activeSetSize'); + +kern = modelReadFromFID(FID); +noise = modelReadFromFID(FID); + +X = zeros(activeSetSize, numFeatures); +y = zeros(activeSetSize, numProcesses); +m = zeros(activeSetSize, numProcesses); +beta = zeros(activeSetSize, numProcesses); + +origI = zeros(activeSetSize, 1); + +lineStr = getline(FID); +tokens = tokenise(lineStr, '='); +if(length(tokens)~=2 | ~strcmp(tokens{1}, 'activeSet')) + error('Incorrect file format.') +end +tokens = tokenise(tokens{2}, ' '); +if(length(tokens)~=activeSetSize) + error('Incorrect file format.') +end +for i = 1:activeSetSize + origI(i) = str2num(tokens{i}); +end +y = modelReadFromFID(FID); +X = modelReadFromFID(FID); +m = modelReadFromFID(FID); +beta = modelReadFromFID(FID); +ivmInfo.J = []; +ivmInfo.I = 1:activeSetSize; +ivmInfo.m = m; +ivmInfo.beta = beta; + +model = ivmReconstruct(kern, noise, ivmInfo, X, y); diff --git a/gpmivm/ivmReadFromFile.m b/gpmivm/ivmReadFromFile.m new file mode 100644 index 00000000..183caf4d --- /dev/null +++ b/gpmivm/ivmReadFromFile.m @@ -0,0 +1,23 @@ +function [model, I] = ivmReadFromFile(fileName) + +% IVMREADFROMFILE Load a file produced by the C++ implementation. +% FORMAT +% DESC loads an IVM model from a file produced by the C++ +% implementation of the IVM. +% ARG fileName : the file name written by the C++ software. +% RETURN model : a MATLAB IVM model structure containing the +% model from the file. +% RETURN I : the indices of the active points in the original data set. +% +% SEEALSO : ivmReadFromFID +% +% COPYRIGHT : Neil D. Lawrence, 2007 + +% IVM + +FID = fopen(fileName); +if FID==-1 + error(['Cannot find file ' fileName]) +end +[model, I] = ivmReadFromFID(FID); +fclose(FID); diff --git a/gpmivm/ivmReconstruct.m b/gpmivm/ivmReconstruct.m new file mode 100644 index 00000000..ed9a3341 --- /dev/null +++ b/gpmivm/ivmReconstruct.m @@ -0,0 +1,49 @@ +function model = ivmReconstruct(kern, noise, ivmInfo, X, y) + +% IVMRECONSTRUCT Reconstruct an IVM form component parts. +% FORMAT +% DESC takes component parts of an IVM model and reconstructs the +% IVM model. The component parts are normally retrieved from a +% saved file. +% ARG kern : a kernel structure for the IVM. +% ARG noise : a noise structure for the IVM. +% ARG ivmInfo : the active set and the inactive set of the IVM as +% well as the site parameters, stored in a structure. +% ARG X : the input training data for the IVM. +% ARG y : the output target training data for the IVM. +% RETURN model : an IVM model structure that combines the component +% parts. +% +% SEEALSO : ivmDeconstruct, ivmCreate, ivmComputeLandM +% +% COPYRIGHT : Neil D. Lawrence, 2005, 2006 + +% IVM + +model.type = 'ivm'; + +model.X = X; +model.y = y; + +model.kern = kern; +model.noise = noise; + +model.I = ivmInfo.I; +model.d = length(model.I); +model.J = ivmInfo.J; + +model.m = ivmInfo.m; +model.beta = ivmInfo.beta; + +model.kern.Kstore = kernCompute(model.kern, model.X, ... + model.X(model.I, :)); +if isfield(model.kern, 'whiteVariance') + model.kern.Kstore(model.I, 1:model.d) = ... + model.kern.Kstore(model.I, 1:model.d) ... + + model.kern.whiteVariance; +end +model.kern.diagK = kernDiagCompute(model.kern, model.X); + +model = ivmComputeLandM(model); +model.d = length(model.I); +[model.mu, model.varSigma] = ivmPosteriorMeanVar(model, X); diff --git a/gpmivm/ivmRemovePoint.m b/gpmivm/ivmRemovePoint.m new file mode 100644 index 00000000..9f36d804 --- /dev/null +++ b/gpmivm/ivmRemovePoint.m @@ -0,0 +1,30 @@ + function model = ivmRemovePoint(model, i) + +% IVMREMOVEPOINT Removes a given point from the IVM. +% FORMAT +% DESC removes a given point from the IVM. +% ARG model : the model from which the point is to be removed. +% ARG ind : the index of the point to be removed. +% RETURN model : the model with the point removed. +% +% SEEALSO : ivmDowndateNuG, ivmDowndateM, ivmDowndateSites, +% ivmSelectPoints +% +% COPYRIGHT : Neil D. Lawrence, 2005, 2006 + +% IVM + +index = find(model.I == i); +if isempty(index) + error(['Point ' num2str(i) ' is not in active set']); +end + + +model = ivmDowndateNuG(model, i); +model = ivmDowndateM(model, i); +model = ivmDowndateSites(model, i); + +model.I(index) = []; +model.J = [model.J i]; + +model = ivmUpdateNuG(model, i); diff --git a/gpmivm/ivmRun.m b/gpmivm/ivmRun.m new file mode 100644 index 00000000..5f84cd25 --- /dev/null +++ b/gpmivm/ivmRun.m @@ -0,0 +1,45 @@ +function model = ivmRun(XTrain, yTrain, options, kernelTieStructure, noiseTieStructure) + +% IVMRUN Run the IVM on a given data set. +% FORMAT +% DESC takes an input data matrix, a target data matrix and an options +% structure (as defined by ivmOptions). It then creates and +% optimises an IVM using ivmOptimise. +% ARG X : the input training data. +% ARG y : the target training data. +% ARG options : options structure (as defined by ivmOptions). +% +% FORMAT +% DESC is as above but includes the facility to 'tie' some of the +% kernel an noise parameters together. +% ARG X : the input training data. +% ARG y : the target training data. +% ARG options : options structure (as defined by ivmOptions). +% ARG kernelTie : structure which speficies which kernel parameters +% are forced to be equal to each other (see cmpndTieParameters). +% ARG noiseTie : structure which speficies which noise parameters +% are forced to be equal to each other (see cmpndTieParameters). +% +% SEEALSO : ivmCreate, ivmOptimise, ivmOptions, cmpndTieParameters +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006, 2007 + +% IVM + +% Intitalise IVM +model = ivmCreate(size(XTrain, 1), size(yTrain, 2), XTrain, yTrain, options); + +if nargin > 8 & ~isempty(noiseTieStructure); + % Some of the noise parameters are constrained equal to each other + model.noise = cmpndTieParameters(model.noise, noiseTieStructure); +end +if nargin > 7 & ~isempty(kernelTieStructure); + % Some of the kernel parameters are constrained to equal each other. + model.kern = cmpndTieParameters(model.kern, kernelTieStructure); +end + +% Optimise the parameters +model = ivmOptimise(model, options); + +% Select data-points in an IVM with the given parameters. +model = ivmOptimiseIvm(model, options.display); diff --git a/gpmivm/ivmSelectPoint.m b/gpmivm/ivmSelectPoint.m new file mode 100644 index 00000000..be9fb418 --- /dev/null +++ b/gpmivm/ivmSelectPoint.m @@ -0,0 +1,61 @@ +function [indexSelect, infoChange] = ivmSelectPoint(model, add); + +% IVMSELECTPOINT Choose a point for inclusion or removal. +% FORMAT +% DESC identifies the next point for inclusion or removal. +% ARG model : IVM structure for which the next point is being +% selected. +% ARG add : flag which indicates whether or not we are adding a +% point. If we are not adding we are assumed to be removing a point +% (default is true). +% +% SEEALSO : ivmOptimiseIvm, ivmSelectPoints, ivmCreate +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2007 + +% IVM + +if nargin < 2 + % If add is 1, then we are including a point. + add = true; +end + +switch model.selectionCriterion + case 'random' + if add + indexSelect = ceil(rand(1)*length(model.J)); + infoChange = -.5*sum(log2(1-model.varSigma(indexSelect, :).* ... + model.nu(indexSelect, :)), 2); + else + indexSelect = ceil(rand(1)*length(model.I)); + infoChange = -.5*sum(log2(1-model.varSigma(indexSelect, :).* ... + model.beta(indexSelect, :)+1e-300), 2); + end + case 'entropy' + delta = ivmComputeInfoChange(model, add); + [infoChange, indexSelect] = max(delta); + numSelect = sum(delta==infoChange); + if numSelect>1 + index1 = find(delta==infoChange); + index1Select = ceil(rand(1)*numSelect); + indexSelect = index1(index1Select); + end + case 'rentropy' + % entropy with first point random + if length(model.I) + % if point is already selected select another. + delta = ivmComputeInfoChange(model, add); + [infoChange, indexSelect] = max(delta); + else + % otherwise select one randomly + if add + indexSelect = ceil(rand(1)*length(model.J)); + infoChange = -.5*sum(log2(1-model.varSigma(indexSelect, :)* ... + model.nu(indexSelect, :)), 2); + else + indexSelect = ceil(rand(1)*length(model.I)); + infoChange = -.5*sum(log2(1-model.varSigma(indexSelect, :)* ... + model.beta(indexSelect, :)+1e-300), 2); + end + end +end diff --git a/gpmivm/ivmSelectPoints.m b/gpmivm/ivmSelectPoints.m new file mode 100644 index 00000000..c758a7a5 --- /dev/null +++ b/gpmivm/ivmSelectPoints.m @@ -0,0 +1,82 @@ +function model = ivmSelectPoints(model, display) + +% IVMSELECTPOINTS Selects the point for an IVM. +% FORMAT +% DESC is an alternative selection approach to ivmOptimiseIvm. It +% performs EP style updates and point swaps as well as point +% selection. +% ARG model : model for which the points are being selected. +% ARG display : whether or not to display the details of the +% optimisaiton (set to 0, 1 or 2, default value is 1). +% RETURN model : model structure with the active set selected. +% +% SEEALSO : ivmOptimiseIvm, ivmSelectPoint, ivmAddPoint, +% ivmSelectVisualise, ivmRemovePoint, ivmEpUpdatePoint +% +% COPYRIGHT : Neil D. Lawrence, 2005, 2006 + +% IVM + +tol = 1e-3; +if nargin < 2 + display = 1; +end + +model = ivmInit(model); +numData = size(model.X, 1); +model.J = 1:numData; + +% Set first infoChange to NaN +infoChange(1) = NaN; + +dVal = model.d; + +for k = 1:dVal + + [indexSelect, infoChange(k)] = ivmSelectPoint(model); + dataIndexSelect = model.J(indexSelect); + model = ivmAddPoint(model, dataIndexSelect); + if display + ivmSelectVisualise(model, display, k, dataIndexSelect); + end +end +if length(model.J)>1 & ~strcmp(model.selectionCriterion, 'random') + deltaInfo = tol+1; + while abs(deltaInfo) > tol + [indexSelectRemove, infoChangeRemove] = ivmSelectPoint(model, 0); + dataIndexSelectRemove = model.I(indexSelectRemove); + model = ivmRemovePoint(model, dataIndexSelectRemove); + [indexSelect, infoChangeAdd] = ivmSelectPoint(model, 1); + dataIndexSelectAdd = model.J(indexSelect); + model = ivmAddPoint(model, dataIndexSelectAdd); + deltaInfo = infoChangeRemove + infoChangeAdd; + fprintf('Swapping inclusion %d: point %d for point %d, change %2.4f.\n', ... + indexSelectRemove, ... + dataIndexSelectRemove, dataIndexSelectAdd, deltaInfo); + end +end +lengthIndex = length(model.I); +betaChange = 1; +oldBeta = model.beta; +counter = 0; +updateCount = 0; +while betaChange > tol + I = model.I; + updateCount = updateCount + 1; + for i = I' + counter = counter + 1; + model = ivmEpUpdatePoint(model, i); + if ~rem(counter, 100) + model = ivmComputeLandM(model); + [model.mu, model.varSigma] = ivmPosteriorMeanVar(model, model.X); + model = ivmUpdateNuG(model); + fprintf('EP Recompute\n') + end + end + model = ivmComputeLandM(model); + [model.mu, model.varSigma] = ivmPosteriorMeanVar(model, model.X); + model = ivmUpdateNuG(model); + betaChange = full(max(abs(model.beta - oldBeta))); + fprintf('EP Update %d, beta change %2.4f\n', updateCount, betaChange) + oldBeta = model.beta; +end diff --git a/gpmivm/ivmSelectVisualise.m b/gpmivm/ivmSelectVisualise.m new file mode 100644 index 00000000..24782b03 --- /dev/null +++ b/gpmivm/ivmSelectVisualise.m @@ -0,0 +1,81 @@ +function ivmSelectVisualise(model, display, k, dataIndexSelect) + +% IVMSELECTVISUALISE Visualise the selected point. +% FORMAT +% DESC visualises the IVM learning by showing which point has been +% selected. +% ARG model : the model for which the visualisation is occuring. +% ARG display : the verbosity level (whether or not to display in a +% plot, 1 - don't plot, 2 - attempt to plot). +% ARG k : the point inclusion number that we are displaying. +% ARG index : the index of the data point that is being included. +% +% COPYRIGHT : Neil D. Lawrence, 2005 +% +% SEEALSO : ivmCreate, ivmLogLikelihood, ivmOptions + +% IVM + +logLikelihood = ivmLogLikelihood(model); +fprintf('%ith inclusion, remaining log Likelihood %2.4f', ... + k, logLikelihood) +switch model.noise.type + case {'probit'} + falsePositives(k) = 0; + truePositives(k) = 0; + for i = 1:size(model.y, 2) + falsePositives(k) = falsePositives(k) ... + + sum(... + sign(model.mu(:, i)+model.noise.bias(i)) ... + ~=model.y(:, i) & model.y(:, i)==-1); + truePositives(k) = truePositives(k) ... + + sum(... + sign(model.mu(:, i)+model.noise.bias(i)) ... + ~=model.y(:, i) & model.y(:, i)==1); + end + fprintf(', falsePos %2.4f, truePos %2.4f\n', ... + sum(falsePositives(k))./sum(sum(model.y==-1)), ... + sum(truePositives(k))./sum(sum(model.y==1))); + otherwise + fprintf('\n'); +end +if display > 1 + if size(model.X, 2) == 2 + type = model.noise.type; + if strcmp(type, 'cmpnd') + type = model.noise.comp{1}.type; + end + switch type + case {'probit', 'ordered', 'ncnm'} + figure(1) + a = plot(model.X(dataIndexSelect, 1), ... + model.X(dataIndexSelect, 2), '.', ... + 'markersize', 20); + if ~isoctave + set(a, 'eraseMode', 'xor'); + end + drawnow + case {'gaussian', 'ngauss'} + figure(1) + a = plot3(model.X(dataIndexSelect, 1), ... + model.X(dataIndexSelect, 2), model.y(dataIndexSelect), ... + '.', 'markersize', 20); + if ~isoctave + set(a, 'erasemode', 'xor'); + end + % xlim = get(gca, 'xlim'); + % labelGap = (xlim(2) - xlim(1)) * 0.025; + % b = text(model.X(dataIndexSelect, 1)+labelGap, ... + % model.X(dataIndexSelect, 2), model.y(dataIndexSelect), ... + % num2str(k), 'erasemode', 'xor'); + drawnow + end + else + subplot(10, 10, rem(k-1, 100)+1); + image(round(reshape(model.X(dataIndexSelect, :), 20, 20)*64)) + axis image + axis off + drawnow + end + +end diff --git a/gpmivm/ivmUpdateM.m b/gpmivm/ivmUpdateM.m new file mode 100644 index 00000000..7cf0c0e2 --- /dev/null +++ b/gpmivm/ivmUpdateM.m @@ -0,0 +1,78 @@ +function model = ivmUpdateM(model, index) + +% IVMUPDATEM Update matrix M, L, v and mu. +% FORMAT +% DESC updates the stored representations in the IVM (M, L, +% v and mu) given a new data point. +% ARG model : the model for which the represenations are to be +% updated. +% ARG index : the index of the data point that is being included. +% RETURN model : the returned model with all the representations up +% to date. +% +% SEEALSO : ivmCreate, ivmAddPoint, kernCompute +% +% COPYRIGHT : Neil D. Lawrence, 2005 +% IVM + +activePoint = length(model.I)+1; + +% Compute the kernel(s) at the new point. +model.kern.Kstore(:, activePoint) = kernCompute(model.kern, model.X, ... + model.X(index, :)); +if isfield(model.kern, 'whiteVariance') + model.kern.Kstore(index, activePoint) = model.kern.Kstore(index, activePoint) ... + + model.kern.whiteVariance; +end +% Compute the values of M for the new point +for c = 1:length(model.Sigma) + if length(model.I) > 0 + a = model.Sigma(c).M(:, index); + s = model.kern.Kstore(:, activePoint)' - a'*model.Sigma(c).M; + lValInv = sqrt(model.nu(index, c)); + %/~ + % If Nu is so low then the included data-point isn't really useful. + if lValInv < 1e-16 + warning(['Square root of nu is ' num2str(lValInv)]) + end + %~/ + model.Sigma(c).M = [model.Sigma(c).M; lValInv*s]; + ainv = (-a*lValInv)'/model.Sigma(c).L; + model.Sigma(c).L = [[model.Sigma(c).L; a'] [zeros(length(model.I),1); 1/lValInv]]; + model.Sigma(c).Linv = [[model.Sigma(c).Linv; ainv] [zeros(length(model.I),1); lValInv]]; + else + s = model.kern.Kstore(:, 1)'; + lValInv = sqrt(model.nu(index, c)); + model.Sigma(c).M = [lValInv*s]; + + model.Sigma(c).L = 1/lValInv; + model.Sigma(c).Linv = lValInv; + end + + %/~ + oldVarSigma = model.varSigma(:, c); + %~/ + model.varSigma(:, c) = model.varSigma(:, c) - ((model.nu(index, c)*s).*s)'; + %/~ + if any(model.varSigma(:, c)<0) + minVar = min(model.varSigma(:, c)); + warning(['Minimum variance ' num2str(minVar)]) + model.varSigma(find(model.varSigma(:, c) < 0), c) = 0; + end + % Seems like this variance can go as low as 1e-13 in, for example, demRegression1.m + %~/ + model.mu(:, c) = model.mu(:, c) + model.g(index, c)*s'; +end +if length(model.Sigma)==1 & size(model.y, 2)>1 + for c = 2:size(model.y, 2) + model.varSigma(:, c) = model.varSigma(:, c) ... + - ((model.nu(index, c)*s).*s)'; + model.mu(:, c) = model.mu(:, c) + model.g(index, c)*s'; + %/~ + if any(model.varSigma(:, c)<0) + minVar = min(model.varSigma(:, c)); + warning(['Minimum variance ' num2str(minVar)]) + end + %~/ + end +end diff --git a/gpmivm/ivmUpdateNuG.m b/gpmivm/ivmUpdateNuG.m new file mode 100644 index 00000000..703aad29 --- /dev/null +++ b/gpmivm/ivmUpdateNuG.m @@ -0,0 +1,34 @@ +function model = ivmUpdateNuG(model, index) + +% IVMUPDATENUG Update nu and g parameters associated with noise model. +% FORMAT +% DESC updates the parameter vectors nu and g which depend on the +% noise model used. +% ARG model : the model being updated. +% ARG ind : the index of the point that has been included. +% RETURN model : the model structure with nu and g updated. +% +% SEEALSO : ivmCreate, ivmAddPoint, ivmEpUpdateM, ivmInit, ivmRemovePoint, +% ivmSelectPoints, +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006 + +% IVM + +if nargin < 2 + index = 1:size(model.y, 1); +end + +[model.g(index, :), model.nu(index, :)] = ... + noiseUpdateNuG(model.noise, ... + model.mu(index, :), model.varSigma(index, :), ... + model.y(index, :)); + + +if strcmp(model.noise.type, 'cmpnd') & any(model.nu(index, :)< 0) + if model.noise.logconcave + warning('nu less than zero in log concave model.') + else + fprintf('nu less than zero\n') + end +end diff --git a/gpmivm/ivmUpdateSites.m b/gpmivm/ivmUpdateSites.m new file mode 100644 index 00000000..11d05b81 --- /dev/null +++ b/gpmivm/ivmUpdateSites.m @@ -0,0 +1,35 @@ +function model = ivmUpdateSites(model, index) + +% IVMUPDATESITES Update site parameters. +% FORMAT +% DESC updates the site parameters in the IVM for a given index +% point. +% ARG model : the model for which the site parameters are being +% updated. +% ARG index : the indices of the site parameters to be updated. +% RETURN model : the model structure with the site parameters up to +% date. +% +% COPYRIGHT : Neil D. Lawrence, 2004, 2005 +% +% SEEALSO : ivmCreate, ivmAddPoint, ivmEpUpdateM + +% IVM + +[model.m(index, :), model.beta(index, :)] = ... + noiseUpdateSites(model.noise, ... + model.g(index, :), model.nu(index, :), ... + model.mu(index, :), model.varSigma(index, :), ... + model.y(index, :)); + + +if any(model.beta<0) + if model.noise.logconcave + error('Beta less than zero for log concave model.') + else + indices = find(model.beta < 0); + model.beta(indices) = 1e-6; + fprintf('Beta less than zero .... fixing to 1e-6.\n') + %model.terminate = 1; + end +end diff --git a/gpmivm/ivmUspsResults.m b/gpmivm/ivmUspsResults.m new file mode 100644 index 00000000..7ab48bbd --- /dev/null +++ b/gpmivm/ivmUspsResults.m @@ -0,0 +1,50 @@ +% IVMUSPSRESULTS Summarise the USPS result files in LaTeX. + +% IVM + +try + load ivmUspsResults +catch + [void, id] = lasterr; + if strcmp(id, 'MATLAB:load:couldNotReadFile') + [X, y, XTest, yTest] = mapLoadData('usps'); + [void, yTest] = max(yTest, [], 2); + yTest = yTest - 1; + + for expNo = 1:2 + for seed = 1:10 + mu = zeros(size(yTest)); + for digit = 0:9 + load(['demUsps' num2str(digit) '_' num2str(expNo) '_' ... + num2str(seed*1e5)]); + + testEr(expNo, seed, digit+1) = testError; + model = ivmReconstruct(kernStore, noiseStore, ivmInfoStore, ... + X, y(:, digit+1)); + mu(:, digit+1) = ivmPosteriorMeanVar(model, XTest); + mu(:, digit+1) = mu(:, digit+1) + model.noise.bias; + end + [void, yPred] = max(mu, [], 2); + yPred = yPred - 1; + overallError(expNo, seed) = 1 - sum(yPred == yTest)/size(yTest, 1); + + end + end + save('ivmUspsResults', 'testEr', 'overallError'); + else + error(lasterr) + end +end + +% Now make the table +meanEr = squeeze(mean(testEr*100, 2)); +varEr = squeeze(var(testEr*100, [], 2)); +meanOverEr = mean(overallError*100, 2); +varOverEr = var(overallError*100, [], 2); +for i = 1:2 + for j = 1:10 + fprintf(['$%s \\pm %1.2f$'], numsf2str(meanEr(i, j), 3), sqrt(varEr(i, j))); + fprintf(' & '); + end + fprintf(['$%s \\pm %1.2f$\n'], numsf2str(meanOverEr(i), 3), sqrt(varOverEr(i))) +end diff --git a/gpmivm/ivmVirtual.m b/gpmivm/ivmVirtual.m new file mode 100644 index 00000000..365977c5 --- /dev/null +++ b/gpmivm/ivmVirtual.m @@ -0,0 +1,38 @@ +function [X, y] = ivmVirtual(origX, origy, invariance) + +% IVMVIRTUAL Create virtual data points with the specified invariance. +% One advantage of a sparse compression scheme (like the IVM and +% SVM, but unlike the RVM) is you can impose invariances on your +% data by only placing invariances on the 'active set'. This is a +% simple helper function to create a new data set (typically from +% an active set) by imposing a simple invariance. +% +% FORMAT +% DESC creates virtual data points for use with a 'virtual IVM'. +% ARG X : input data set on which we wish to make the transformation +% IVM + +switch invariance + case 'translate' + baseVal = min(min(origX)); + X = zeros(size(origX, 1)*5, size(origX, 2)); + y = zeros(size(origy, 1)*5, size(origy, 2)); + for j = 1:size(origX, 1) + xPoint = origX(j, :); + X(5*j-4, :) = xPoint; + Xmat = reshape(xPoint, 16, 16); + XmatU = [Xmat(2:end, :); repmat(baseVal, 1, 16)]; + X(5*j-3, :) = XmatU(:)'; + XmatD = [repmat(baseVal, 1, 16); Xmat(1:end-1, :)]; + X(5*j-2, :) = XmatD(:)'; + XmatL = [Xmat(:, 2:end) repmat(baseVal, 16, 1)]; + X(5*j-1, :) = XmatL(:)'; + XmatR = [repmat(baseVal, 16, 1) Xmat(:, 1:end-1)]; + X(5*j, :) = XmatR(:)'; + for i = 0:4 + y(5*j-i, :) = origy(j, :); + end + end + otherwise + error('That invariance is not yet implemented') +end diff --git a/gpmivm/ncnmContour.m b/gpmivm/ncnmContour.m new file mode 100644 index 00000000..496f3b3a --- /dev/null +++ b/gpmivm/ncnmContour.m @@ -0,0 +1,20 @@ +function h = ncnmContour(X, Y, Z, lineWidth) + +% NCNMCONTOUR Special contour plot showing null category region. +% FORMAT +% DESC produces a special contour plot for the null category noise +% model which places lines at the center and edges of the null +% category region. +% ARG X : input X locations for showing contours. +% ARG Y : input Y locations for showing contours. +% ARG Z : output of the null category noise model. +% ARG lineWidth : width of contour lines. +% RETURN H : handle to contour lines. + +% IVM + +% It's probably learnt something. +[void, clines1] =contour(X, Y, Z, [-0.5 0.5], 'b--'); +[void, clines2] =contour(X, Y, Z, [0 0], 'r-'); +h = [clines1; clines2]; +set(h, 'linewidth', lineWidth) diff --git a/gpmivm/vivmRunDataSet.m b/gpmivm/vivmRunDataSet.m new file mode 100644 index 00000000..777c1047 --- /dev/null +++ b/gpmivm/vivmRunDataSet.m @@ -0,0 +1,58 @@ +function vivmRunDataSet(dataSetName, invariance, experimentNo, ... + selectionCriterion, dVal, seedVal); + +% VIVMRUNDATASET Try the virtual IVM on a data set and save the results. + +% IVM + +if nargin < 6 + seedVal = []; + randn('seed', 1e5) + rand('seed', 1e5) +else + randn('seed', seedVal); + rand('seed', seedVal); +end + +jobId = getenv('PBS_JOBID'); +jobName = getenv('PBS_JOBNAME'); +fprintf('Seed %2.0e\n', seedVal); + +if isempty(seedVal) + [X, y, XTest, yTest] = mapLoadData(dataSetName); +else + [X, y, XTest, yTest] = mapLoadData(dataSetName, seedVal); +end + +capitalName = dataSetName; +capitalName(1) = upper(capitalName(1)); + +options = ivmOptions; +options.extIters = 0; % don't optimise parameters +mu = zeros(size(yTest)); +varSigma = zeros(size(yTest)); + +load(['dem' capitalName '_' ... + num2str(experimentNo) '_' ... + num2str(seedVal) '.mat']) + +% Create new training set with virtual SVs. +origIndex = ivmInfoStore.I; +[X, y] = ivmVirtual(X(ivmInfoStore.I, :), y(ivmInfoStore.I, ... + :), invariance); +tic +model = ivmRun(X, y, kernStore, ... + noiseStore, selectionCriterion, dVal, ... + options); + +runTime = toc; +if ~isempty(XTest) & ~isempty(yTest); + yPred = ivmOut(model, XTest); + testError = 1-sum(yPred==yTest)/size(yTest, 1); + fprintf('Data set %s, test error %2.4f\n', dataSetName, testError); +end +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capitalName '_' invariance '_' num2str(experimentNo) '_d' num2str(dVal) '_seed' num2str(seedVal)], 'testError', ... + 'ivmInfo', 'kern', 'noise', 'runTime', 'jobId', ... + 'jobName', 'origIndex') + diff --git a/gpmivm/vivmRunDataSetLearn.m b/gpmivm/vivmRunDataSetLearn.m new file mode 100644 index 00000000..2887eda8 --- /dev/null +++ b/gpmivm/vivmRunDataSetLearn.m @@ -0,0 +1,58 @@ +function vivmRunDataSetLearn(dataSetName, invariance, experimentNo, ... + selectionCriterion, dVal, seedVal); + +% VIVMRUNDATASETLEARN Try the virtual IVM on a data set and save the results. + +% IVM + +if nargin < 6 + seedVal = []; + randn('seed', 1e5) + rand('seed', 1e5) +else + randn('seed', seedVal); + rand('seed', seedVal); +end + +jobId = getenv('PBS_JOBID'); +jobName = getenv('PBS_JOBNAME'); +fprintf('Seed %2.0e\n', seedVal); + +if isempty(seedVal) + [X, y, XTest, yTest] = mapLoadData(dataSetName); +else + [X, y, XTest, yTest] = mapLoadData(dataSetName, seedVal); +end + +capitalName = dataSetName; +capitalName(1) = upper(capitalName(1)); + +options = ivmOptions; + +mu = zeros(size(yTest)); +varSigma = zeros(size(yTest)); + +load(['dem' capitalName '_' ... + num2str(experimentNo) '_' ... + num2str(seedVal) '.mat']) + +% Create new training set with virtual SVs. +origIndex = ivmInfoStore.I; +[X, y] = ivmVirtual(X(ivmInfoStore.I, :), y(ivmInfoStore.I, ... + :), invariance); +tic +model = ivmRun(X, y, kernStore, ... + noiseStore, selectionCriterion, dVal, ... + options); + +runTime = toc; +if ~isempty(XTest) & ~isempty(yTest); + yPred = ivmOut(model, XTest); + testError = 1-sum(yPred==yTest)/size(yTest, 1); + fprintf('Data set %s, test error %2.4f\n', dataSetName, testError); +end +[kernStore, noiseStore, ivmInfoStore] = ivmDeconstruct(model); +save(['dem' capitalName '_learn_' invariance '_' num2str(experimentNo) '_d' num2str(dVal) '_seed' num2str(seedVal)], 'testError', ... + 'ivmInfo', 'kern', 'noise', 'runTime', 'jobId', ... + 'jobName', 'origIndex') + diff --git a/gpmivm/vivmUspsResults.m b/gpmivm/vivmUspsResults.m new file mode 100644 index 00000000..1255dbef --- /dev/null +++ b/gpmivm/vivmUspsResults.m @@ -0,0 +1,53 @@ +% VIVMUSPSRESULTS Summarise the USPS result files in LaTeX. + +% IVM + +invariance = 'translate'; +dVal = 1000; +try + load vivmUspsResults +catch + [void, id] = lasterr; + if strcmp(id, 'MATLAB:load:couldNotReadFile') + [origX, origy, XTest, yTest] = mapLoadData('usps'); + [void, yTest] = max(yTest, [], 2); + yTest = yTest - 1; + + for expNo = 1 + for seed = 1:10 + mu = zeros(size(yTest)); + for digit = 0:9 + load(['demUsps' num2str(digit) '_' invariance '_' num2str(expNo) '_d' ... + num2str(dVal) '_seed' num2str(seed*1e5)]); + [X, y] = ivmVirtual(origX(origIndex, :), origy(origIndex, :), invariance); + + testEr(seed, digit+1) = testError; + model = ivmReconstruct(kern, noise, ivmInfo, ... + X, y(:, digit+1)); + mu(:, digit+1) = ivmPosteriorMeanVar(model, XTest); + mu(:, digit+1) = mu(:, digit+1) + model.noise.bias; + end + [void, yPred] = max(mu, [], 2); + yPred = yPred - 1; + overallError(seed) = 1 - sum(yPred == yTest)/size(yTest, 1); + save('vivmUspsResults', 'testEr', 'overallError'); + + end + end + else + error(lasterr) + end +end + +% Now make the table +meanEr = squeeze(mean(testEr*100, 1)); +varEr = squeeze(var(testEr*100, [], 1)); +meanOverEr = mean(overallError*100); +varOverEr = var(overallError*100); +for i = 1:1 + for j = 1:10 + fprintf(['$%s \\pm %1.2f$'], numsf2str(meanEr(i, j), 3), sqrt(varEr(i, j))); + fprintf(' & '); + end + fprintf(['$%s \\pm %1.2f$\\\n'], numsf2str(meanOverEr(i), 3), sqrt(varOverEr(i))) +end diff --git a/ivm/README.md b/ivm/README.md new file mode 100644 index 00000000..16ffdc76 --- /dev/null +++ b/ivm/README.md @@ -0,0 +1,174 @@ + +IVM Software +============ + +This page describes examples of how to use the informative vector machine Software (IVM). + +Release Information +------------------- + +Current release is 0.4. + +As well as downloading the IVM software you need to obtain the toolboxes specified below. + +For core functionality + +- [netlab](https://github.com/sods/netlab) mainly used for optimization utilities (like scg). + +- [prior](https://github.com/SheffieldML/prior) prior distributions. +- [optimi](https://github.com/SheffieldML/optimi) optimization constriant mappings. +- [kern](https://github.com/SheffieldML/kern) covariance functions. +- [ndlutil](https://github.com/SheffieldML/ndlutil) various utility functions. +- [noise](https://github.com/SheffieldML/noise) noise models. +- [mltools](https://github.com/SheffieldML/mltools) machine learning models. +- [rochol](https://github.com/SheffieldML/rochol) rank one Cholesky update. + +- [Carl Rasmussen's minimize](http://learning.eng.cam.ac.uk/carl/code/minimize/) + +For loading in data for demos + +- [datasets](https://github.com/SheffieldML/datasets/) + +For motion capture experiments + +- [mocap](https://github.com/SheffieldML/mocap/) + +For the demThreeFive example which compares with [SVM light](http://svmlight.joachims.org/) you need Anton Schwaighofer's SVML light interface + +- [svml](https://github.com/sods/svml/) + +Examples +-------- + +`demClassification1` +-------------------- + +The first example given is `demClassification1` which is a simple classification data set, where only one direction of the input is relevant in determining the decision boundary. An ARD MLP kernel is used in combination with a linear kernel. The ARD parameters in the linear and MLP kernel are constrained to be the same by the line: + +```matlab +% Constrain the ARD parameters in the MLP and linear kernels to be the same. +model.kern = cmpndTieParameters(model.kern, {[4, 7], [5, 8]}); +``` + +The resulting classification is shown below. + +![](./diagrams/demClassificationOne1.png) + +ecision boundary from the `demClassification1.m` example. Postive class is red circles, negative class green crosses and active points are yellow dots. Decision boundary shown in red, contours at 0.25 and 0.75 probability shown in blue. + +`demClassification2` +-------------------- + +The second example attempts to learn a Gaussian process give data that is sampled from a Gaussian process. The code is `demClassification2`. The underlying Gaussian process is based on an RBF kernel with variance inverse width 10. The IVM learns an inverse width of 15 and gives the classification is shown below. + +![](./diagrams/demClassificationTwo2.png) + +Decision boundary from the `demClassification2.m` example. Postive class is red circles, negative class green crosses and active points are yellow dots. Decision boundary shown in red, contours at 0.25 and 0.75 probability shown in blue. + +`demClassification3` +-------------------- + +This example is similar to `demClassification2`, only now there is a null category region in the data (a region of low data density between the classes). The example is for comparison with the null category noise model. + +![](./diagrams/demClassificationThree3.png) + +Decision boundary from the `demClassification3.m` example. Postive class is red circles, negative class green crosses and active points are yellow dots. Decision boundary shown in red, contours at 0.25 and 0.75 probability shown in blue. + +`demOrdered1` +------------- + +In this example the ordered categorical noise model is used (ordinal regression). The data is a simple data set for which a linear one dimensional model suffices. The IVM is given a combination of an RBF and linear kernel with ARD.For the ordered categorical case there are several parameters associated with the noise model (in particular the category widths), these are learnt too. The model learns that the system is linear and only one direction is important. The resulting classification is given below. + +![](./diagrams/demOrderedOne1.png) + +Decision boundary from the `demOrdered1.m` example. Class 0 - red cross, Class 1 - green circles, Class 2 - blue crosses, Class 3 - cyan asterisks, Class 4 - pink squares, Class 5 - yellow diamonds. Class 6 - red triangles. Active points are yellow dots, note that because the kernel is linear by now the most informative points tend to be at the extrema. Decision boundaries shown in red, contours at 0.25 and 0.75 probability shown in blue. + +`demOrdered2` +------------- + +Another example with the ordered categorical noise model, here the data is radial, the categories being along the radius of a circle. The IVM is given a combination of an RBF and linear kernel with ARD. Again there are several parameters associated with the noise model, and these are learnt using `ivmOptimiseNoise`. The resulting classification is given below. + +![](./diagrams/demOrderedTwo2.png) + +Decision boundary from the `demOrdered1.m` example. Class 0 - red cross, Class 1 - green circles, Class 2 - blue crosses, Class 3 - cyan asterisks, Class 4 - pink squares, Class 5 - yellow diamonds. Class 6 - red triangles. Active points are yellow dots, note that because the kernel is linear by now the most informative points tend to be at the extrema. Decision boundaries shown in red, contours at 0.25 and 0.75 probability shown in blue. + +`demRegression1` +---------------- + +In this example the Gaussian noise model is used (standard regression). The data is sampled from a Gaussian process, only one input dimension is important. The IVM is given a combination of an RBF and linear kernel with ARD. The resulting regression is given below. + +![](./diagrams/demRegressionOne1.png) + +Regression from the example `demRegression1.m`. Targets are red dots and active points are yellow dots. + +`demRegression2` +---------------- + +A second example with Gaussian noise, sampled from a Gaussian process, but this time with differing length scales. + +![](./diagrams/demRegressionTwo2.png) + +Regression from the example `demRegression2.m`. Targets are red dots and active points are yellow dots. + +Benchmark Data Sets +------------------- + +The function `ivmGunnarData` allows you to test the IVM on Gunnar Raetsch's benchmark data sets. Download the data sets, [from here](http://ida.first.fraunhofer.de/projects/bench/benchmarks.htm) and expand the ringnorm data set into '$DATASETSDIRECTORY/gunnar/ringnorm'. Then run the following script. +` >>ivmGunnarData('ringnorm', 1, {'rbf', 'bias', 'white'}, 1, 100) ... Final model: IVM Model: Noise Model: Probit bias on process 1: 0.0439 Probit Sigma2: 0.0000 Kernel: Compound kernel: RBF inverse width: 0.0866 (length scale 3.3984) RBF variance: 1.2350 Bias Variance: 8.2589 White Noise Variance: 0.0000 Test Error 0.0183 Model likelihood -56.7120 ` + +You can try any of the data sets by replacing ringnorm with the relevant data set (note that they don't all work with only 100 active points inas in the example above, for example the 'banana' data set needs 200 active points to get a reasonable result, + +```matlab +>> ivmGunnarData('banana', 1, {'rbf', 'bias', 'white'}, 1, 200) ... +Final model: IVM Model: +Noise Model: Probit +bias on process 1: 0.1067 +Probit Sigma2: 0.0000 +Kernel: Compound +kernel: RBF inverse width: 1.6411 (length scale 0.7806) +RBF variance: 0.2438 +Bias Variance: 0.0000 +White Noise Variance: 0.0148 +Test Error 0.1129 +Model likelihood 175.3588 +``` + +![](./diagrams/demBanana1.png) + +Decision boundary from the banana example. Postive class is red circles, negative class green crosses and active points are yellow dots. Decision boundary shown in red, contours at 0.25 and 0.75 probability shown in blue. + +Null Category Noise Model +------------------------- + +Examples +-------- + +The toy data example in the papers can be recreated using: + +``` +>> demUnlabelled1 +``` + +and leads to the decision boundary given below. A standard IVM based classifier can be run on the data using + +``` +>> demUnlabelled2 +``` + +![](./diagrams/demUnlabelledOne1.png)![](./diagrams/demUnlabelledOne2.png) + +The null category noise model run on toy data. *Top*: using the null category, the true nature of the decision boundary is recovered. *Bottom*: the standard IVM, does not recover the true decision boundary. +The other USPS digit classification example given in the NIPS paper can be re-run with: + +``` +>> demThreeFive +``` + +Be aware that this code can take some time to run. The results, in the form of averaged area under ROC curve against probability of missing label, can be plotted using + +``` +>> demThreeFiveResults +``` +Plot of average area under ROC curve against probability of label being present. The red line is the standard IVM based classifier, the blue dotted line is the null category noise model based classifier, the green dash-dot line is the a normal SVM and the mauve dashed line is the transductive SVM. + +Page updated on Sat Jan 13 01:05:38 2007 diff --git a/ivm/additionalfiles.txt b/ivm/additionalfiles.txt new file mode 100644 index 00000000..3877c352 --- /dev/null +++ b/ivm/additionalfiles.txt @@ -0,0 +1,14 @@ +# These are files that should be packaged for the IVM software +dir: html +~/mlprojects/ivm/html/index.html +~/mlprojects/ivm/html/demBanana1.png +~/mlprojects/ivm/html/demClassificationOne1.png +~/mlprojects/ivm/html/demClassificationTwo2.png +~/mlprojects/ivm/html/demClassificationThree3.png +~/mlprojects/ivm/html/demOrderedOne1.png +~/mlprojects/ivm/html/demOrderedTwo2.png +~/mlprojects/ivm/html/demRegressionOne1.png +~/mlprojects/ivm/html/demRegressionTwo2.png +~/mlprojects/ivm/html/demUnlabelledOne1.png +~/mlprojects/ivm/html/demUnlabelledOne2.png +~/mlprojects/ivm/html/demThreeFive.png \ No newline at end of file diff --git a/ivm/demClassification3.m b/ivm/demClassification3.m new file mode 100644 index 00000000..31fe2caf --- /dev/null +++ b/ivm/demClassification3.m @@ -0,0 +1,65 @@ +% DEMCLASSIFICATION3 IVM for classification on a data-set sampled from a GP with null category. + +% IVM + +% Fix seeds +randn('seed', 1e5); +rand('seed', 1e5); + +dataSetName = 'classificationThree'; +experimentNo = 3; + +% load data +[X, y] = mapLoadData(dataSetName); + + +% Set up model +options = ivmOptions; +options.display = 2; +options.kern = {'rbf', 'white'}; + +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +for i = 1:options.extIters; + + % Select the active set. + model = ivmOptimiseIVM(model, options.display); + % Plot the data. + if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); + end + % Optimise the kernel parameters. + model = ivmOptimiseKernel(model, options.display, options.kernIters); +end +model = ivmOptimiseIVM(model, options.display); +if options.display > 1 + ivm3dPlot(model, 'ivmContour', i); +end +% display active points. +model = ivmOptimiseIVM(model, options.display); + +% Display the final model. +ivmDisplay(model); + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') & printDiagram + ivmPrintPlot(model, 'ivmContour', [], [], [], capName, experimentNo); +end + + + + + + + diff --git a/ivm/demIonosphere1.m b/ivm/demIonosphere1.m new file mode 100644 index 00000000..67818d73 --- /dev/null +++ b/ivm/demIonosphere1.m @@ -0,0 +1,29 @@ +% DEMEP1 Demonstrate Expectation propagation. + + +randn('seed', 1e6) +rand('seed', 1e6) + +% Generate a toy data-set +[X, y] = mapLoadData('ionosphere'); +noiseModel = 'probit'; +selectionCriterion = 'random'; + +kernelType = 'rbf'; +prior = 0; +display = 0; +dVal = 200; + + +model = ivm(X, y, kernelType, noiseModel, selectionCriterion, dVal) +model.kern.inverseWidth = 1/(exp(1)*exp(1)); +model.kern.variance = exp(4)*exp(4); +model.noise.sigma2 = 1; +%for i = 1:4 +% model = ivmSelectPoints(model, display); +% model = ivmOptimiseKernel(model, display, 100); +%end +model = ivmSelectPoints(model, display); + + + diff --git a/ivm/demRegressionTwoIvm.m b/ivm/demRegressionTwoIvm.m new file mode 100644 index 00000000..e89214f8 --- /dev/null +++ b/ivm/demRegressionTwoIvm.m @@ -0,0 +1,69 @@ +% DEMREGRESSION2 The data-set is sampled from a GP with known parameters. + +% IVM + +randn('seed', 1e5); +rand('seed', 1e5); + +dataSetName = 'regressionTwo'; +experimentNo = 2; + +% load data +[X, y] = mapLoadData(dataSetName); + + +% Set up model +options = ivmOptions; +options.noise = 'gaussian'; +% Learn the noise model for the ordered categorical case. +options.display = 2; +% Use a kernel consisting of an RBF ard kernel, a linear ard kernel and a +% bias term. +options.kern = {'rbfard', 'linard', 'white'}; + +model = ivmCreate(size(X, 1), size(y, 2), X, y, options); + +model.kern = cmpndTieParameters(model.kern, {[3, 6], [4, 7]}); +if options.display > 1 + [h1, h2] = ivm3dPlot(model, 'mesh', i); + drawnow +end + +for i = 1:options.extIters + % Plot the data. + % Select the active set. + model = ivmOptimiseIVM(model, options.display); + if options.display > 1 + delete(h2) + [h1, h2] = ivm3dPlot(model, 'mesh', i); + drawnow + end + % Optimise kernel parameters. + model = ivmOptimiseKernel(model, options.display, options.kernIters); + +end +model = ivmOptimiseIVM(model, options.display); +if options.display > 1 + delete(h2) + [h1, h2] = ivm3dPlot(model, 'mesh', i); +end +% Show the active points. +model = ivmOptimiseIVM(model, options.display); + +ivmDisplay(model); + +% Save the results. +capName = dataSetName;; +capName(1) = upper(capName(1)); +[kern, noise, ivmInfo] = ivmDeconstruct(model); +save(['dem' capName num2str(experimentNo) '.mat'], ... + 'kern', ... + 'noise', ... + 'ivmInfo'); + +if exist('printDiagram') & printDiagram + ivmPrintPlot(model, 'mesh', [], [], [], capName, experimentNo); +end + + + diff --git a/ivm/diagrams/demBanana1.png b/ivm/diagrams/demBanana1.png new file mode 100644 index 00000000..87a012d4 Binary files /dev/null and b/ivm/diagrams/demBanana1.png differ diff --git a/ivm/diagrams/demClassificationOne1.png b/ivm/diagrams/demClassificationOne1.png new file mode 100644 index 00000000..074695e7 Binary files /dev/null and b/ivm/diagrams/demClassificationOne1.png differ diff --git a/ivm/diagrams/demClassificationOne2.png b/ivm/diagrams/demClassificationOne2.png new file mode 100644 index 00000000..a9bafccd Binary files /dev/null and b/ivm/diagrams/demClassificationOne2.png differ diff --git a/ivm/diagrams/demClassificationThree3.png b/ivm/diagrams/demClassificationThree3.png new file mode 100644 index 00000000..1ef25abb Binary files /dev/null and b/ivm/diagrams/demClassificationThree3.png differ diff --git a/ivm/diagrams/demClassificationTwo2.png b/ivm/diagrams/demClassificationTwo2.png new file mode 100644 index 00000000..c4dcaa48 Binary files /dev/null and b/ivm/diagrams/demClassificationTwo2.png differ diff --git a/ivm/diagrams/demOrderedOne1.png b/ivm/diagrams/demOrderedOne1.png new file mode 100644 index 00000000..e0091ee0 Binary files /dev/null and b/ivm/diagrams/demOrderedOne1.png differ diff --git a/ivm/diagrams/demOrderedTwo2.png b/ivm/diagrams/demOrderedTwo2.png new file mode 100644 index 00000000..674494ed Binary files /dev/null and b/ivm/diagrams/demOrderedTwo2.png differ diff --git a/ivm/diagrams/demRegressionFour4.png b/ivm/diagrams/demRegressionFour4.png new file mode 100644 index 00000000..299b0cf5 Binary files /dev/null and b/ivm/diagrams/demRegressionFour4.png differ diff --git a/ivm/diagrams/demRegressionOne1.png b/ivm/diagrams/demRegressionOne1.png new file mode 100644 index 00000000..332142bd Binary files /dev/null and b/ivm/diagrams/demRegressionOne1.png differ diff --git a/ivm/diagrams/demRegressionThree3.png b/ivm/diagrams/demRegressionThree3.png new file mode 100644 index 00000000..cf671ccb Binary files /dev/null and b/ivm/diagrams/demRegressionThree3.png differ diff --git a/ivm/diagrams/demRegressionTwo2.png b/ivm/diagrams/demRegressionTwo2.png new file mode 100644 index 00000000..3f3b5438 Binary files /dev/null and b/ivm/diagrams/demRegressionTwo2.png differ diff --git a/ivm/diagrams/demThreeFive.png b/ivm/diagrams/demThreeFive.png new file mode 100644 index 00000000..4f840494 Binary files /dev/null and b/ivm/diagrams/demThreeFive.png differ diff --git a/ivm/diagrams/demUnlabelledOne1.png b/ivm/diagrams/demUnlabelledOne1.png new file mode 100644 index 00000000..bf46bbf6 Binary files /dev/null and b/ivm/diagrams/demUnlabelledOne1.png differ diff --git a/ivm/diagrams/demUnlabelledOne2.png b/ivm/diagrams/demUnlabelledOne2.png new file mode 100644 index 00000000..308e60e0 Binary files /dev/null and b/ivm/diagrams/demUnlabelledOne2.png differ diff --git a/ivm/ivm.txt b/ivm/ivm.txt new file mode 100644 index 00000000..2d176eaf --- /dev/null +++ b/ivm/ivm.txt @@ -0,0 +1 @@ +The informative vector machine. Sparse Gaussian processes by information theoretic driven data point selection. diff --git a/ivm/ivmEpOptimise.m b/ivm/ivmEpOptimise.m new file mode 100644 index 00000000..eab5fcfd --- /dev/null +++ b/ivm/ivmEpOptimise.m @@ -0,0 +1,19 @@ +function model = ivmEpOptimise(model, prior, display, innerIters, ... + outerIters, optimiseNoise); + +% IVMEPOPTIMISE Optimise the IVM making use of point removal. + + +if nargin < 6 + optimiseNoise = 1; +end +% Run IVM +for i = 1:outerIters + model = ivmSelectPoints(model, display); + model = ivmOptimiseKernel(model, prior, display, innerIters); + if optimiseNoise + model = ivmSelectPoints(model, display); + model = ivmOptimiseNoise(model, prior, display, innerIters); + end + ivmDisplay(model); +end diff --git a/ivm/ivmGunnar.sh b/ivm/ivmGunnar.sh new file mode 100644 index 00000000..b21ad863 --- /dev/null +++ b/ivm/ivmGunnar.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +# Script to launch many PPA jobs at a PBS managed cluster. + +function createMatlabFile +{ + # create matlab file + cat > $1 <> $1 +echo exit >> $1 +} + + +function createScriptFile +{ + # create matlab file + cat > $1 <> $1 +echo "" >> $1 +echo "echo Running on " >> $1 +echo "date" >> $1 +echo "echo Using commands " >> $1 +echo "cat $3" >> $1 +echo "unset DISPLAY" >> $1 +echo "/usr/local/bin/matlab < $3" >> $1 +} + +NAME=ivmGunnarDataRBF +RUNNAME=$NAME +KERN="{'rbf', 'white', 'bias'}" + +DATASETS=thyroid,100:titanic,100:heart,100:breast-cancer,100:banana,100:ringnorm,100:twonorm,100:waveform,100:diabetis,200:flare-solar,400:german,400:spice,400:image,400 +DATANUM=`seq -s: 1 5` +INVWIDTHPARAMS=0.1:1:10 + +NOISE=\'probit\' +IFS=: +echo Creating run scripts ... +INDEX=$((0)) +for dataNum in $DATANUM + do + for datasetDval in $DATASETS + do + for invWidth in $INVWIDTHPARAMS + do + dVal=${datasetDval##*,} + dataset=${datasetDval%%,*} + INDEX=$(($INDEX+1)) + MFILE=$NAME$INDEX.m + SFILE=$NAME$INDEX.sh + echo $MFILE + echo $SFILE + createMatlabFile "$MFILE" "'${dataset}'" "$dataNum" "$KERN" "$invWidth" "$dVal" + createScriptFile "$SFILE" "$RUNNAME" "$MFILE" + done + done +done +echo Starting jobs ... +INDEX=$((0)) +for dataNum in $DATANUM + do + for dataset in $DATASETS + do + for invWidth in $INVWIDTHPARAMS + do + INDEX=$(($INDEX+1)) + SFILE=$NAME$INDEX.sh + qsub $SFILE + sleep 0.1 + done + done +done +echo Remember to clear the scripts later diff --git a/ivm/ivmLikelihoods.m b/ivm/ivmLikelihoods.m new file mode 100644 index 00000000..4f899255 --- /dev/null +++ b/ivm/ivmLikelihoods.m @@ -0,0 +1,26 @@ +function L = ivmLikelihoods(model, x, y); + +% IVMLOGLIKELIHOODS Return the likelihood for each point for the IVM. +% FORMAT +% DESC computes the likelihood of each given point under the given +% IVM model structure. +% ARG model : the IVM for which the likelihoods are to be computed. +% ARG x : the input points where the likelihoods are to be +% computed. +% ARG y : the target points where the likelihoods are to be +% computed. +% +% SEEALSO : noiseLikelihood, ivmPosteriorMeanVar +% +% COPYRIGHT : Neil D. Lawrence, 2005 + +if nargin < 3 + % This implies evaluate for the traing data. + mu = model.mu; + varsigma = model.varSigma; + y = model.y; +else + [mu, varsigma] = ivmPosteriorMeanVar(model, x); +end + +L = noiseLikelihood(model.noise, mu, varsigma, y); diff --git a/ivm/ivmLoadData.m b/ivm/ivmLoadData.m new file mode 100644 index 00000000..f050d041 --- /dev/null +++ b/ivm/ivmLoadData.m @@ -0,0 +1,300 @@ +function [X, y, XTest, yTest] = ivmLoadData(dataset, seedVal) + +% IVMLOADDATA Load a dataset. + +if nargin < 2 + seedVal = 1e5; +end +randn('seed', seedVal) +rand('seed', seedVal) +XTest = []; +yTest = []; +switch dataset + case 'pumadynSeeger' + data = load('Dataset.data'); + ind = randperm(size(data, 1)); + indTr = ind(1:7168); + indTe = ind(7169:end); + X = data(indTr, 1:end-1); + y = data(indTr, end); + XTest = data(indTe, 1:end-1); + yTest = data(indTe, end); + Xscale = sqrt(var(X)); + for j = 1:length(Xscale); + X(:, j) = X(:, j)/Xscale(j); + XTest(:, j) = XTest(:, j)/Xscale(j); + end + augX = [X ones(size(X, 1), 1)]; + w = inv(augX'*augX)*augX'*y; + y = y -augX*w; + augTestX = [XTest ones(size(XTest, 1), 1)]; + yTest = yTest - augTestX*w; + yscale = sqrt(var(y)); + y = y/yscale; + yTest = yTest/yscale; + case 'pumadyn' + + % Data is variance 1, no need to normalise. + data = load('Dataset.data'); + ind = randperm(size(data, 1)); + indTr = ind(1:7168); + indTe = ind(7169:end); + X = data(indTr, 1:end-1); + y = data(indTr, end); + XTest = data(indTe, 1:end-1); + yTest = data(indTe, end); + + case 'usps' + load usps_train + X = ALL_DATA; + range = min(ALL_T):max(ALL_T); + for i = 1:length(range) + y(:, i) = (ALL_T == range(i))*2 - 1; + end + if nargout > 2 + load usps_test + XTest = ALL_DATA; + range = min(ALL_T):max(ALL_T); + for i = 1:length(range) + yTest(:, i) = (ALL_T == range(i))*2 - 1; + end + end + + case {'usps0', 'usps1', 'usps2', 'usps3', 'usps4', 'usps5', 'usps6', 'usps7', 'usps8', 'usps9'} + digitNo = str2num(dataset(end)); + load usps_train + X = ALL_DATA; + range = min(ALL_T):max(ALL_T); + for i = 1:length(range) + y(:, i) = (ALL_T == range(i))*2 - 1; + end + if nargout > 2 + load usps_test + XTest = ALL_DATA; + range = min(ALL_T):max(ALL_T); + for i = 1:length(range) + yTest(:, i) = (ALL_T == range(i))*2 - 1; + end + end + y = y(:, digitNo+1); + yTest = yTest(:, digitNo+1); + + case 'regressionOne' + try + load regressionOneData.mat + catch + + [void, errid] = lasterr; + if strcmp(errid, 'MATLAB:load:couldNotReadFile') + numIn = 2; + N = 500; + X = zeros(N, numIn); + X(1:floor(N/2), :) = ... + randn(floor(N/2), numIn)*.5+1; + X(floor(N/2)+1:end, :) = ... + randn(ceil(N/2), numIn)*.5-1; + kern = kernCreate(X, 'rbfard'); + kern.variance = 1; + kern.inverseWidth = 20; + kern.inputScales = [0 0.999]; + + K = kernCompute(kern, X); + y = real(gaussSamp(K, 1)') + randn(N, 1)*0.01; + + save('regressionOneData.mat', 'numIn', 'N', 'X', 'y') + else + error(lasterr); + end + + end + + case 'regressionTwo' + try + load regressionTwoData.mat + catch + + [void, errid] = lasterr; + if strcmp(errid, 'MATLAB:load:couldNotReadFile') + numIn = 2; + N = 500; + X = zeros(N, numIn); + X(1:floor(N/2), :) = ... + randn(floor(N/2), numIn)*.5+1; + X(floor(N/2)+1:end, :) = ... + randn(ceil(N/2), numIn)*.5-1; + kern = kernCreate(X, 'rbfard'); + kern.variance = 1; + kern.inverseWidth = 20; + kern.inputScales = [0.999 .2]; + + K = kernCompute(kern, X); + y = real(gaussSamp(K, 1)') + randn(N, 1)*0.01; + + save('regressionTwoData.mat', 'numIn', 'N', 'X', 'y') + else + error(lasterr) + end + + end + + case 'regressionThree' + try + load regressionThreeData.mat + catch + + [void, errid] = lasterr; + if strcmp(errid, 'MATLAB:load:couldNotReadFile') + numIn = 2; + N = 500; + X = zeros(N, numIn); + X(1:floor(N/2), :) = ... + randn(floor(N/2), numIn)*.5+1; + X(floor(N/2)+1:end, :) = ... + randn(ceil(N/2), numIn)*.5-1; + kern = kernCreate(X, 'lin'); + kern.variance = 1; + + K = kernCompute(kern, X); + y = real(gaussSamp(K, 1)') + randn(N, 1)*0.01; + save('regressionThreeData.mat', 'numIn', 'N', 'X', 'y') + else + error(lasterr); + end + end + + case 'regressionFour' + try + load regressionFourData.mat + catch + + [void, errid] = lasterr; + if strcmp(errid, 'MATLAB:load:couldNotReadFile') + numIn = 2; + N = 500; + X = zeros(N, numIn); + X(1:floor(N/2), :) = ... + randn(floor(N/2), numIn)*.5+1; + X(floor(N/2)+1:end, :) = ... + randn(ceil(N/2), numIn)*.5-1; + kern = kernCreate(X, 'mlp'); + kern.variance = 1; + kern.weightVariance = 1; + kern.biasVariance = 1; + K = kernCompute(kern, X); + y = real(gaussSamp(K, 1)') + randn(N, 1)*0.01; + save('regressionFourData.mat', 'numIn', 'N', 'X', 'y') + else + error(lasterr); + end + end + + case 'classificationTwo' + + try + load classificationTwoData.mat + catch + [void, errid] = lasterr; + if strcmp(errid, 'MATLAB:load:couldNotReadFile') + numIn = 2; + N = 500; + + X = zeros(N, numIn); + X = rand(N, numIn); + + kern = kernCreate(X, 'rbf'); + kern.variance = 10; + kern.inverseWidth = 10; + + K = kernCompute(kern, X); + u = real(gaussSamp(K, 1)'); + + p = cumGaussian(u); + y = 2*(rand(size(u))>p)-1; + save('classificationTwoData.mat', 'numIn', 'N', 'X', 'u', 'y') + else + error(lasterr); + end + + end + + case 'classificationThree' + + try + load classificationThreeData.mat + catch + [void, errid] = lasterr; + if strcmp(errid, 'MATLAB:load:couldNotReadFile') + numIn = 2; + N = 500; + + X = zeros(N, numIn); + X = rand(N, numIn); + + kern = kernCreate(X, 'rbf'); + kern.variance = 10; + kern.inverseWidth = 10; + + K = kernCompute(kern, X); + u = real(gaussSamp(K, 1)'); + a = 3; + pMinus = cumGaussian(u-a/2); + pPlus = cumGaussian(-u-a/2); + p =rand(size(u)); + indMinus = find(ppMinus & ppMinus+pPlus); + y = zeros(N, 1); + y(indPlus) = 1; + y(indMinus) = -1; + y(indNone, :) = []; + X(indNone, :) = []; + save('classificationThreeData.mat', 'numIn', 'N', 'X', 'u', 'y') + else + error(lasterr); + end + + end + + case 'orderedOne' + dataPerCat = 30; + spacing = 3; + + % Generate a toy data-set of (linear) ordered categories. + X = [randn(dataPerCat,2)-[zeros(dataPerCat, 1) repmat(3*spacing, dataPerCat, 1)]; ... + randn(dataPerCat,2)-[zeros(dataPerCat, 1) repmat(2*spacing, dataPerCat, 1)]; ... + randn(dataPerCat,2)-[zeros(dataPerCat, 1) repmat(spacing, dataPerCat, 1)]; ... + randn(dataPerCat, 2); ... + randn(dataPerCat,2)+[zeros(dataPerCat, 1) repmat(spacing, dataPerCat, 1)]; ... + randn(dataPerCat,2)+[zeros(dataPerCat, 1) repmat(2*spacing, dataPerCat, 1)]; ... + randn(dataPerCat,2)+[zeros(dataPerCat, 1) repmat(3*spacing, dataPerCat, 1)]]; + y = [zeros(dataPerCat, 1); ... + repmat(1, dataPerCat, 1); repmat(2, dataPerCat, 1); ... + repmat(3, dataPerCat, 1); repmat(4, dataPerCat, 1); ... + repmat(5, dataPerCat, 1); repmat(6, dataPerCat, 1)]; + + case 'orderedTwo' + dataPerCat = 30; + spacing = 3; + + % Generate a toy data-set of (linear) ordered categories. + thetaR = [randn(dataPerCat,2)-[zeros(dataPerCat, 1) repmat(3*spacing, dataPerCat, 1)]; ... + randn(dataPerCat,2)-[zeros(dataPerCat, 1) repmat(2*spacing, dataPerCat, 1)]; ... + randn(dataPerCat,2)-[zeros(dataPerCat, 1) repmat(spacing, dataPerCat, 1)]; ... + randn(dataPerCat, 2); ... + randn(dataPerCat,2)+[zeros(dataPerCat, 1) repmat(spacing, dataPerCat, 1)]; ... + randn(dataPerCat,2)+[zeros(dataPerCat, 1) repmat(2*spacing, dataPerCat, 1)]; ... + randn(dataPerCat,2)+[zeros(dataPerCat, 1) repmat(3*spacing, dataPerCat, 1)]]; + thetaR(:, 1) = thetaR(:, 1); + thetaR(:, 2) = thetaR(:, 2) + 15; + X = [sin(thetaR(:, 1)).*thetaR(:, 2) cos(thetaR(:, 1)).*thetaR(:, 2)]; + y = [zeros(dataPerCat, 1); ... + repmat(1, dataPerCat, 1); repmat(2, dataPerCat, 1); ... + repmat(3, dataPerCat, 1); repmat(4, dataPerCat, 1); ... + repmat(5, dataPerCat, 1); repmat(6, dataPerCat, 1)]; + + case 'mouthData' + + + + +end diff --git a/ivm/ivmOptimiseIVM.m b/ivm/ivmOptimiseIVM.m new file mode 100644 index 00000000..3f0e028d --- /dev/null +++ b/ivm/ivmOptimiseIVM.m @@ -0,0 +1,61 @@ +function model = ivmOptimiseIvm(model, display) + +% IVMOPTIMISEIVM Selects the points for an IVM model. +% FORMAT +% DESC selects points in an IVM model for inclusion in the active +% set. +% ARG model : the model for which the points are to be selected. +% ARG display : the display level, should be 0, 1 or 2. The higher +% the display, the more verbose. +% RETURN model : the model with the active set selected. +% +% SEEALSO : ivmInit, ivmSelectVisualise, ivmAddPoint, +% ivmSelectPoint, ivmEpUpdatePoint +% +% COPYRIGHT : Neil D. Lawrence, 2003, 2004, 2005 + +% IVM + +if nargin < 2 + display = 1; +end +tol = 1e-4; +model = ivmInit(model); +numData = size(model.X, 1); + +if model.d > numData + warning('Active set size is larger than data-set, resetting to data-set size'); + model.d = numData; +end + +% Start with all data-points inactive. +model.J = 1:numData; + +% Set first infoChange to NaN +infoChange(1) = NaN; +dVal = model.d; +for k = 1:dVal + + [indexSelect, infoChange(k)] = ivmSelectPoint(model); + dataIndexSelect = model.J(indexSelect); + model = ivmAddPoint(model, dataIndexSelect); + if display + ivmSelectVisualise(model, display, k, dataIndexSelect); + end +end +if model.epUpdate + lengthIndex = length(model.I); + betaChange = 1; + oldBeta = model.beta; + counter = 0; + while betaChange > tol + I = model.I'; + counter = counter + 1; + for i = I; + model = ivmEpUpdatePoint(model, i); + end + betaChange = full(max(abs(model.beta - oldBeta))); + fprintf('EP Update %d, beta change %2.4f\n', counter, betaChange) + oldBeta = model.beta; + end +end diff --git a/ivm/ivmToolboxes.m b/ivm/ivmToolboxes.m new file mode 100644 index 00000000..19e9e2a4 --- /dev/null +++ b/ivm/ivmToolboxes.m @@ -0,0 +1,12 @@ +% IVMTOOLBOXES Load in the toolboxes for the IVM. + +importLatest('netlab'); +importLatest('kern'); +importLatest('prior'); +importLatest('svml'); +importLatest('optimi'); +importLatest('rochol'); +importLatest('ndlutil'); +importLatest('mltools'); +importLatest('datasets'); +importLatest('noise'); diff --git a/ivm/ivmTwoDPlot.m b/ivm/ivmTwoDPlot.m new file mode 100644 index 00000000..3c006c74 --- /dev/null +++ b/ivm/ivmTwoDPlot.m @@ -0,0 +1,38 @@ +function ivmTwoDPlot(model, iter, X, y) + +% IVMTWODPLOT Make a 2-D plot of the IVM. + +if nargin < 4 + X = model.X; + y = model.y; +end +clf +markerSize = 10; +lineWidth = 2; +title(['Iteration ' num2str(iter)]) +pointsNeg = plot(X(find(y(:, 1)==-1), 1), ... + X(find(y(:, 1)==-1), 2), ... + 'gx', 'erasemode', 'xor', ... + 'markersize', markerSize+2, ... + 'linewidth', lineWidth); +hold on +pointsPos = plot(X(find(y(:, 1)==1), 1), ... + X(find(y(:, 1)==1), 2), 'ro', ... + 'erasemode', 'xor', ... + 'markersize', markerSize, ... + 'linewidth', lineWidth); + +pointUn = plot(X(find(isnan(y(:, 1))), 1), ... + X(find(isnan(y(:, 1))), 2), 'm.', ... + 'erasemode', 'xor', 'markersize', 8); +set(gca, 'fontname', 'times') +set(gca, 'fontsize', 20); + +if length(model.I)>0 + % It's probably learnt something. + xlim = get(gca, 'xlim'); + ylim = get(gca, 'ylim'); + [CX, CY, CZ, CZVar] = ivmMeshVals(model, xlim, ylim, 30); + noise3dPlot(model.noise, 'ivmContour', CX, CY, CZ, CZVar, lineWidth); +end +drawnow \ No newline at end of file diff --git a/ivm/readme.txt b/ivm/readme.txt new file mode 100644 index 00000000..55ca3037 --- /dev/null +++ b/ivm/readme.txt @@ -0,0 +1,77 @@ +Changes in vs 0.41 +------------------ + +Fixed bug where, for multiple outputs, the IVM 'random' point inclusion wasn't computing the entropy change in ivmSelectPoint. + + +Changes in vs 0.4 +----------------- + +The toolbox has been brought into line with other toolboxes on the site. Now the ivm model is created using ivmCreate, and all the model options are set in ivmOptions. The files are now better descrived. The null category noise model examples are now integrated with the IVM (the main ncnm code is in the NOISE toolbox). + +Changes in vs 0.33 +------------------ + +The code now relies upon the datasets toolbox for loading of datasets used. The EP updates have now been fixed so that the site parameters can be refined. + + +Changes in vs 0.32 +------------------ + +This code no longer works under MATLAB 6.1, it requires MATLAB 7.0 or higher to run. + +This version of the software now relies on the following toolboxes: + +KERN vs 0.14 +------------ + +This toolbox implements the different kernels. IVM interacts with this toolbox through an interface which involves files starting with kern. + +NOISE vs 0.12 +------------- + +This toolbox implements the different noise models. IVM interacts with this toolbox through an interface which involves files starting with noise. + +NDLUTIL vs 0.12 +--------------- + +This toolbox implements some generic functions which could be used beyond the ivm toolbox, for example sigmoid.m, cumGaussian.m + +OPTIMI vs 0.12 +-------------- + +This toolbox implements functions which allow non-linear transformations between parameters to be optimised. For example it allows variances to be optimised in log space. + +PRIOR vs 0.12 +------------- + +This toolbox allows priors to be placed over parameters, at the moment this is used so that MAP solutions can be found for the parameters rather than type-II maximum likelihood. The priors were written for the Null Category Noise Model (see NCNM toolbox) so that an exponential prior could be placed over the process variances. The rest of its funcitonality has not been tested. + +ROCHOL vs 0.12 +-------------- + +This toolbox implements the rank one Cholesky updates. These are need when points are removed or EP updates are applied to selected points. + +Changes in vs 0.31 +------------------ + +The options are now held in a structure whose values are set in ivmOptions.m +There were some missing files in the last release, these have now been added. + +The EP updates are currently unstable numerically and should be used with caution. + +Demos +----- + +Six toy demos are provided: demClassification1, demClassification2, demRegression1, demRegression2, demOrdered1 and demOrdered2. Each runs a different noise model with. They display their results as they run and therefore they don't use the ivmRun function which is the normal recommended way for running code. + +Three large scale experiments are provided on the USPS data-set, demUsps1-3. The use three different types of kernel. + +General comments +---------------- + +Since version 0.22 the code is far more modular, this was done in an effort to improve its readability and reduce the need for re-writes. However it may be slower than the previous version as a result. + +Yet to be implemented functionality still includes: + +Multi-class noise models (which will probably be done mostly in the NOISE toolbox) and randomised greed selection.