load_forecasting.m

%% Data Driven Fitting with High Dimensional Data
% Copyright (c) 2011, The MathWorks, Inc.

%% Format my data

% Clean Up
clear all
clc

% Import my data
ds = dataset('xlsfile', 'EnergyData.xlsx');

% plot Load = f(time)

plot(ds.SystemLoad(1:336))

%% Divide my data into a test set and a training set
% 2004 - 2007 = training set
% 2008 = test set

[yr, mo, da] = datevec(ds.Date);

Test = ds(yr == 2008, :);
Training = ds(yr ~= 2008, :);

Training_Y = Training.SystemLoad;
Training_X = double(Training(:,3:end));


%%  Create a Load Forecasting Model using a Boosted Decision Tree

t = RegressionTree.template('Surrogate','on');

rens = fitensemble(Training_X, Training_Y, 'LSBoost', 1000, t, ...
    'CategoricalPredictors', [3 4 5],...
    'PredictorNames', Training.Properties.VarNames(3:end))

%% Generate Predictions from the test set

% Create a test set
Test_Y = Test.SystemLoad;
Test_X = double(Test(:,3:end));

% Generate Predictions
Forecast_Load = predict(rens,Test_X);

% Use a simple chart to show predicted versus actual for two weeks of data
figure
plot(Test_Y(1:336), 'b');
hold on
plot(Forecast_Load(1:336), 'r');

%  Use a scatter plot to show Predicted versus Residuals
figure

scatter(Test_Y, Forecast_Load - Test_Y, '.', 'r');
refline(0,1)
xlabel('Predicted')
ylabel('Residuals')

%% Inspect some of the methods associated with "rens"

methods(rens)

display = dataset;
display.Variable = rens.PredictorNames';
display.Importance = predictorImportance(rens)';
sortrows(display, 'Importance', 'descend')

loss(rens, Training_X, Training_Y)

%%  Evaluate Goodness of Fit

% Errors = Predicted - observed
Errors = predict(rens,Test_X) - Test_Y;

% Inspect the Errors
figure
histfit(Errors)
figure
normplot(Errors)

%%  Segment the data into groups

% By Hour
figure;
boxplot(Errors, Test.Hour, 'plotstyle', 'compact');
xlabel('Hour'); ylabel('Errors');
title('Breakdown of Errors by hour');

% By Weekday
figure
boxplot(Errors, Test.Weekday, 'labels', {'Sun','Mon','Tue','Wed','Thu','Fri','Sat'});
ylabel('Percent Error Statistics');
title('Breakdown of Errors by weekday');

% By Month
figure
boxplot(Errors, datestr(Test.Date,'mmm'));
ylabel('Percent Error Statistics');
title('Breakdown of Errors by month');


%%  Use Industry Specific Error Metrics

Error_Percentage = (abs(Errors)./Test_Y) * 100;

fL = reshape(Forecast_Load, 24, length(Forecast_Load)/24)';
tY = reshape(Test_Y, 24, length(Test_Y)/24)';
peakerrpct = abs(max(tY,[],2) - max(fL,[],2))./max(tY,[],2) * 100;

MAE = mean(abs(Error_Percentage));
MAPE = mean(Error_Percentage(~isinf(Error_Percentage)));

fprintf('Mean Absolute Percent Error (MAPE): %0.2f%% \nMean Absolute Error (MAE): %0.2f MWh\nDaily Peak MAPE: %0.2f%%\n',...
    MAPE, MAE, mean(peakerrpct))

%%  Switch algorithms

tic
rens2 = fitensemble(Training_X, Training_Y, 'Bag', 100, 'Tree', ...
    'type', 'regression',...
    'CategoricalPredictors', [3 4 5],...
    'PredictorNames', Training.Properties.VarNames(3:end))
toc

%%  Contrast Models

Forecast_Load2 = predict(rens2,Test_X);

% Use a simple chart to show predicted versus actual for two weeks of data
plot(Test_Y(1:336), 'b');
hold on
plot(Forecast_Load(1:336), 'r');
plot(Forecast_Load2(1:336), 'k');

residuals2 = Forecast_Load2 - Test_Y;

figure
histfit(residuals2)

Error_Percentage2 = (abs(residuals2)./Test_Y) * 100;

fL2 = reshape(Forecast_Load2, 24, length(Forecast_Load2)/24)';
peakerrpct2 = abs(max(tY,[],2) - max(fL2,[],2))./max(tY,[],2) * 100;

MAE2 = mean(abs(Error_Percentage2));
MAPE2 = mean(Error_Percentage2(~isinf(Error_Percentage2)));

fprintf('Mean Absolute Percent Error (MAPE):  %0.2f%% \nMean Absolute Error (MAE): %0.2f MWh\nDaily Peak MAPE: %0.2f%%\n',...
    MAPE, MAE, mean(peakerrpct))

fprintf('Mean Absolute Percent Error 2: (MAPE): %0.2f%% \nMean Absolute Error 2 (MAE): %0.2f MWh\nDaily Peak MAPE 2: %0.2f%%\n',...
    MAPE2, MAE2, mean(peakerrpct2))

%% Neural Network

tic

net = newfit(Training_X', Training_Y', 20);
net.performFcn = 'mae';
net = train(net, Training_X', Training_Y');

toc

Forecast_Load3 = sim(net, Test_X')';

% Use a simple chart to show predicted versus actual for two weeks of data
figure
plot(Test_Y(1:336), 'b');
hold on
plot(Forecast_Load(1:336), 'r');
plot(Forecast_Load2(1:336), 'k');
plot(Forecast_Load3(1:336), 'm');

residuals3 = Forecast_Load3 - Test_Y;

figure
histfit(residuals3)

Error_Percentage3 = (abs(residuals3)./Test_Y) * 100;

fL3 = reshape(Forecast_Load3, 24, length(Forecast_Load3)/24)';
peakerrpct3 = abs(max(tY,[],2) - max(fL3,[],2))./max(tY,[],2) * 100;

MAE3 = mean(abs(Error_Percentage3));
MAPE3 = mean(Error_Percentage3(~isinf(Error_Percentage3)));

fprintf('Mean Absolute Percent Error (MAPE): %0.2f%% \nMean Absolute Error (MAE): %0.2f MWh\nDaily Peak MAPE: %0.2f%%\n',...
    MAPE, MAE, mean(peakerrpct))

fprintf('Mean Absolute Percent Error 2 (MAPE): %0.2f%% \nMean Absolute Error 2 (MAE): %0.2f MWh\nDaily Peak MAPE 2: %0.2f%%\n',...
    MAPE2, MAE2, mean(peakerrpct2))

fprintf('Mean Absolute Percent Error 3 (MAPE): %0.2f%% \nMean Absolute Error 3 (MAE): %0.2f MWh\nDaily Peak MAPE 3: %0.2f%%\n',...
    MAPE3, MAE3, mean(peakerrpct3))