Skip to content

Commit

Permalink
git-svn-id: https://pmtk3.googlecode.com/svn/trunk@2791 b6abd7f4-f95b…
Browse files Browse the repository at this point in the history
…-11de-aa3c-59de0406b4f5
  • Loading branch information
[email protected] committed Jan 4, 2012
1 parent a9df674 commit 48a2c07
Show file tree
Hide file tree
Showing 25 changed files with 252 additions and 140 deletions.
Binary file added bishop-gibbs-gauss.pdf
Binary file not shown.
17 changes: 2 additions & 15 deletions config-local.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,4 @@
% This file stores hard coded paths and configuration variables
% Use getConfigValue(varname) to access a value, e.g.
% getConfigValue('PMTKsupportLink')
%
% If a config-local.txt file is found, its values will override these.
%
% PMTKmetaDirs meta, docs, tmp
% PMTKred #990000
% PMTKauthors Kevin Murphy, Matt Dunham
% PMTKcodeDirs toolbox, demos, localUtil, matlabTools
% PMTKgvizPath C:\Program Files\Graphviz2.26\bin
% PMTKlocalWikiPath C:\path\to\pmtk3Wiki
% PMTKlocalDataPath /Users/kpmurphy/GoogleCode/pmtkdata
% PMTKlocalSupportPath /Users/kpmurphy/GoogleCode/pmtksupport
% PMTKpmlBookSource /Users/kpmurphy/Dropbox/MLbook/Text
% PMTKpmlFigures /Users/kpmurphy/Dropbox/MLbook/Figures
% PMTKlightSpeedDir lightspeed2.3
% PMTKgvizPath C:\Program Files\Graphviz2.26\bin

6 changes: 4 additions & 2 deletions config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
% If a config-local.txt file is found, its values will override these.
%
%
% Used by initPmtk3.m (UPDATE THESE DIRECTORIES TO MATCH YOUR INSTALLATION)
% Used by initPmtk3.m
% PMTKgvizPath C:\Program Files\Graphviz2.26\bin
% PMTKlocalDataPath path/to/data
% PMTKlocalSupportPath path/to/support
% PMTKlightSpeedDir lightspeed2.3
%
% Used by generateAuthorReport , pmtkTagReport, etc
% PMTKred #990000
Expand All @@ -16,7 +17,8 @@
% PMTKmetaDirs meta, docs, tmp
%
% Used by pmlFigureCodeReport
% PMTKpmlBookSource C:\path\to\PML\Text
% PMTKpmlBookSource /Users/kpmurphy/Dropbox/MLbook/Text
% PMTKpmlFigures /Users/kpmurphy/Dropbox/MLbook/Figures
%
% used by publishDemos.m, generateSynopses.m
% PMTKlocalWikiPath C:\path\to\pmtk3Wiki
Expand Down
10 changes: 7 additions & 3 deletions demos/binaryFaDemoNewsgroups.m
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
% Demo of factor analysis applied to binary newsgroups bag of words
% We compute 2d embedding

%PMTKreallySlow

% This file is from pmtk3.googlecode.com

requireStatsToolbox; % cmdscale
Expand Down Expand Up @@ -45,12 +47,13 @@
m = m + 1;
methods(m).modelname = 'JJ';
methods(m).fitFn = @(data) binaryFAfit(data, K, 'maxIter', 6, ...
'verbose', truesize, 'computeLoglik', false);
'verbose', true, 'computeLoglik', false);
methods(m).infFn = @(model, labels) binaryFAinferLatent(model, labels);
methods(m).nlatent = K;
end

Ks = [];

%Ks = [];
for kk=1:numel(Ks)
K = Ks(kk);
m = m + 1;
Expand All @@ -61,6 +64,7 @@
methods(m).nlatent = K;
end


Nmethods = numel(methods);
for m=1:Nmethods
fitFn = methods(m).fitFn;
Expand All @@ -87,7 +91,7 @@
end
ndx = 1:1:D;
for d=ndx(:)'
text(mdsCoords(d,1), mdsCoords(d,2), wordlist{d}, 'fontsize', 10);
%text(mdsCoords(d,1), mdsCoords(d,2), wordlist{d}, 'fontsize', 10);
end
nlatent = methods(m).nlatent;
title(sprintf('L=%d, N=%d, method = %s', nlatent, N, methodname))
Expand Down
117 changes: 117 additions & 0 deletions demos/catFAdemo.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
function catFAdemo()
% Factor analysis with categorical and continuous data
% We reproduce the demo from
% http://www.cs.ubc.ca/~emtiyaz/software/mixedDataFA.html
% This just check the code runs, it is not intrinsically interesting.

clear all
setSeed(16)
% generate data - data is stored along columns not rows!
[trainData,testData,simParams] = makeSimDataMixedDataFA(100);
nClass = simParams.nClass;
% introduce missing variables in train data
missProb = 0.1;
trainData.continuousTruth = trainData.continuous;
trainData.discreteTruth = trainData.discrete;
[D,N] = size(trainData.continuous);
miss = rand(D,N)<missProb;
trainData.continuous(miss) = NaN;
[D,N] = size(trainData.discrete);
miss = rand(D,N)<missProb;
trainData.discrete(miss) = NaN;


Dz = 2;
[model, loglikTrace] = catFAfit(trainData.discrete', trainData.continuous', Dz);

[mu, Sigma, loglik] = catFAinferLatent(model, testData.discrete', testData.continuous')

[predD, predC] = catFApredictMissing(model,testData.discrete', testData.continuous')


end

function [trainData, testData, params] = makeSimDataMixedDataFA(N)
% [TRAINDATA, TESTDATA, PARAMS] = makeSimDataMixedDataFA(N) makes simulated data
% for mixedDataFA with N data points
%
% Written by Emtiyaz, CS, UBC
% Modified on June 09, 2010

missProb = 0.3;
Dz = 2;
mean_ = [5 -5]';% -5 -5]';
covMat = eye(Dz);
z = repmat(mean_,1,N) + chol(covMat)*randn(Dz,N);
Dc = 5;
nClass = 2*ones(10,1);%[3 2 4];
noiseCovMat = 0.01*eye(Dc);%abs(diag(randn(Dc,1)));
Bc = rand(Dc,Dz);

% generate data
yc = Bc*z + chol(noiseCovMat)*randn(Dc,N);
%yc = yc - repmat(mean(yc,2), 1, N);

Bm = [];
for c = 1:length(nClass)
Bmc = rand(nClass(c)-1,Dz);
p = [exp(Bmc*z); ones(1,N)];
pMult = p./repmat(sum(p,1),nClass(c),1);
yd(c,:) = sum(repmat([1:nClass(c)],N,1).*mnrnd(1,pMult'),2);
Bm = [Bm; Bmc];
end

% model parameters structure
params.nClass = nClass;
params.mean = mean_;
params.covMat = covMat;
params.noiseCovMat = noiseCovMat;
params.betaMult = Bm;
params.betaCont = Bc;

% split test and train data
ratio = .7;
[trainData, testData] = splitData(yc,yd,ratio);

%{
% introduce missing variables in test data
testData.continuousTruth = testData.continuous;
testData.discreteTruth = testData.discrete;
[D,N] = size(testData.continuous);
miss = rand(D,N)<missProb;
testData.continuous(miss) = NaN;
[D,N] = size(testData.discrete);
miss = rand(D,N)<missProb;
testData.discrete(miss) = NaN;
%}


end

function [trainData, testData, idx] = splitData(yc, yd, ratio)
% splits data into training and testing set
% yc is the continuous data, yd is discrete data
% ratio is the split ratio

[Dc,Nc] = size(yc);
[Dd,Nd] = size(yd);
N = max(Nc,Nd);
nTrain = ceil(ratio*N);
idx = randperm(N);
if Dc>0
testData.continuous = yc(:,idx(nTrain+1:end));
trainData.continuous = yc(:,idx(1:nTrain));
else
testData.continuous = [];
trainData.continuous = [];
end
if Dd>0
testData.discrete = yd(:,idx(nTrain+1:end));
trainData.discrete = yd(:,idx(1:nTrain));
else
testData.discrete = [];
trainData.discrete = [];
end

end

4 changes: 4 additions & 0 deletions demos/classificationShootout.m
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
nDataSets = numel(dataSets);

methods = {'SVM', 'RVM', 'SMLR', 'RMLR'};
if ~svmInstalled
methods = {'SVM', 'RVM', 'SMLR', 'RMLR'};
end

nMethods = numel(methods);
results = cell(nDataSets, nMethods);
for i=1:nDataSets
Expand Down
3 changes: 3 additions & 0 deletions demos/classificationShootoutCvLambdaOnly.m
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
nDataSets = numel(dataSets);

methods = {'SVM', 'RVM', 'SMLR', 'RMLR'};
if ~svmInstalled
methods = {'SVM', 'RVM', 'SMLR', 'RMLR'};
end
nMethods = numel(methods);
results = cell(nDataSets, nMethods);
for i=1:nDataSets
Expand Down
12 changes: 6 additions & 6 deletions demos/demoMinfuncHighdim.m
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,22 @@ function demoMinfuncHighdim()
%methods = {'sd', 'cg', 'bb'};
methods = {'sd', 'cg', 'bb', 'newton', 'newtoncg', 'bfgs', 'lbfgs'};
%methods = {'sd', 'cg', 'bb', 'newton', 'newtoncg', 'bfgs', 'lbfgs', 'newton0', 'newton0lbfgs'};
[styles] = plotColors();
[styles, colors, symbols, plotstyles] = plotColors();

for i=1:length(methods)
names{i} = methods{i}(1:min(5,length(methods{i})))
end
names = {'sd', 'cg', 'bb', 'n', 'ncg', 'bfgs', 'lbfgs'};


seed = 1; setSeed(seed);
dims = [10 100 500];
dims = [10 100];
clear str
for j=1:length(dims)
d = dims(j);
x0 = randn(d,1);
figure;
for i=1:length(methods)
hold on
[fx(i,j), fcount(i,j),tim(i,j)] = helper(x0, methods{i}, styles{i});
[fx(i,j), fcount(i,j),tim(i,j)] = helper(x0, methods{i}, plotstyles{i});

str{i} = sprintf('%s (f* = %5.2f, %d feval, %3.2f sec)', ...
names{i}, fx(i,j), fcount(i,j), tim(i,j));
Expand Down Expand Up @@ -55,6 +54,7 @@ function demoMinfuncHighdim()
bar(tim(:,j))
set(gca,'xticklabel',names)
title(sprintf('time d=%d', d))
drawnow
end
printPmtkFigure minfuncRosenBar

Expand All @@ -75,7 +75,7 @@ function demoMinfuncHighdim()
options.display = 'none';
options.maxFunEvals = 500;
options.tolFun = 1e-2;
options.outputFn = @optimstore;
options.outputFcn = @optimstore;
options.Method = method;
options.HessianModify = 1;
if strcmp(method, 'newtoncg')
Expand Down
2 changes: 1 addition & 1 deletion demos/demoRosenConstrained.m
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ function demoRosenConstrained()
% minimize 2d rosenbrock st x1^2 + x^2 <= 1
% Example from p1-8 of Mathworks Optimization Toolbox manual

requiresOptimToolbox
requireOptimToolbox

xstart = [-1 2];
% Hessian is ignored by quasi-Newton so we use interior point
Expand Down
20 changes: 11 additions & 9 deletions demos/demoRosenHighDim.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
%%
%PMTKinteractive
%PMTKneedsOptimToolbox


% This file is from pmtk3.googlecode.com

% "A note on the extended rosenbrock function" Evol. Comp. 2006
% claims that for d=4 to 30 dims there are 2 local minima, at [1,1,...1] and
% and near [-1,1,...,1].
% Let us verify this for d=4 and d=5

% This file is from pmtk3.googlecode.com


%xstart = [-0.77565923 0.61309337 0.38206285 0.14597202]';
xstart = [-0.96205109 0.93573953 0.88071386 0.77787813 0.60509438]';
[f g H] = rosenbrock(xstart);
Expand All @@ -22,21 +22,25 @@
% So the claim seems dubious...
%
%%

requireOptimToolbox
x = rand(10,1);
[f g H] = rosenbrock(x);
figure;spy(H)
title(sprintf('sparsity pattern of Hessian for extended Rosenbrock'))

printPmtkFigure rosen10dSpy


% Now compare speed of using Hessian or approximating it

d = 200;
d = 20; % 200;
seed = 0;
setSeed(seed);
xstart = 2*rand(d,1)-1;
opts = optimset('display', 'off', 'DerivativeCheck', 'on');
opts = optimset('display', 'off', 'DerivativeCheck', 'off');
[f g H] = rosenbrock(xstart);

clear options
options{1} = optimset(opts, 'GradObj', 'on', 'Hessian', 'on'); % analtyic Hessian
options{2} = optimset(opts, 'GradObj', 'on', 'Hessian', []); % dense numerical Hessian
options{3} = optimset(opts, 'GradObj', 'on', 'HessPattern', H); % sparse numerical Hessian
Expand All @@ -52,5 +56,3 @@

final
t

printPmtkFigure rosen10dSpy
1 change: 1 addition & 0 deletions demos/discrimAnalysisHeightWeightDemo.m
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

% This file is from pmtk3.googlecode.com

clear all
rawdata = loadData('heightWeight');
data.Y = rawdata(:,1); % 1=male, 2=female
data.X = [rawdata(:,2) rawdata(:,3)]; % height, weight
Expand Down
1 change: 1 addition & 0 deletions demos/gpcDemo2d.m
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
x = [x1 x2]';
y = [repmat(-1,1,n1) repmat(1,1,n2)]';
[t1 t2] = meshgrid(-4:0.1:4,-4:0.1:4);
t = [t1(:) t2(:)]; % test

% training
loghyper = [0; 0]; % initial guess
Expand Down
6 changes: 3 additions & 3 deletions demos/hclustYeastDemo.m
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
%% Hierarchical Clustering Demo
%PMTKneedsStatsToobox cluster, pdist, linkage
%PMTKneedsBioToolbox clustergram
%%

% This file is from pmtk3.googlecode.com

requireStatsToolbox
requireBioinfoToolbox
%requireBioinfoToolbox
loadData('yeastData310') % 'X', 'genes', 'times');

corrDist = pdist(X, 'corr');
Expand All @@ -22,11 +21,12 @@
printPmtkFigure('clusterYeastHier16')


if bioinfoToolboxInstalled
figure(5);clf
clustergram(X(:,2:end),'RowLabels',genes, 'ColumnLabels',times(2:end))
title('hierarchical clustering')
printPmtkFigure('clusterYeastRowPerm')

end

figure(6); clf
dendrogram(linkage(corrDist, 'average'));
Expand Down
2 changes: 2 additions & 0 deletions demos/hmmNbestDemo.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
% Nilsson and Goldberger, IJCAI 2001
% We do the example in sec 3.2

%PMTKinprogress

initDist = [0.6; 0.4];
transmat = [0.6 0.4; 0.2 0.8];
obsmat = [0.9 0.1; 0.3 0.7];
Expand Down
Loading

0 comments on commit 48a2c07

Please sign in to comment.