-
Notifications
You must be signed in to change notification settings - Fork 1
/
Project 4-Retail Analysis.sas
136 lines (104 loc) · 4.45 KB
/
Project 4-Retail Analysis.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/* Retail Analysis */
/* Step 1: Import the data */
/* Generated Code (IMPORT) */
/* Source File: Project 04_Retail Analysis_Dataset.xlsx */
/* Source Path: /folders/myshortcuts/myFolders */
%web_drop_table(TRAINING.RetailAnalysis);
FILENAME REFFILE '/folders/myshortcuts/myFolders/Project 04_Retail Analysis_Dataset.xlsx';
PROC IMPORT DATAFILE=REFFILE
DBMS=XLSX
OUT=TRAINING.RetailAnalysis;
GETNAMES=YES;
RUN;
PROC CONTENTS DATA=TRAINING.RetailAnalysis; RUN;
/* Step 2: Compute the Total Sales */
data Training.RetailAnalysis;
set Training.RetailAnalysis;
Total_Sales = sales*quantity;
run;
/* Step 3: Model Total_Sales (Dependent variable) against the other independent variables */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Discount Profit Shipping_Cost;
run;
/* Since the Shipping cost variable is a multiple(0.1) of Profit variable, it can be dropped from the model */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Discount Profit;
run;
/* Discount variable is insignificant. Hence can be omitted from the model */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
run;
/* From the above model we can conclude that */
/* 1) The independent variables Quantity & Profit have p-values < 0.05, hence both are significant */
/* 2) The model is 87.81% accurate (adjusted R-square value). */
/* 3) The linear equation is Total_Sales = -333.042 + 166.88 * Quantity + 4.1 * Profit */
/* From the above equation we can predict Total_Sales values for given Quantity & Profit */
/* Step 4: Checking the predicted values in the output dataset */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
run;
/* From the above output predicted value, we can notice that Product2 has -ve predictions */
/* Running the model only for Product2 */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
where Products = 'Product2';
run;
/* We find that all the variables are insignificant and the accuracy is 56.65% */
/* Running the model only for Product1 */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
where Products = 'Product1';
run;
/* We find that all the variables are insignificant and the accuracy is 42.46% */
/* Running the model only for Product3 */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
where Products = 'Product3';
run;
/* We find that all the variables are significant and the accuracy is 100% */
/* Running the model only for Product4 */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
where Products = 'Product4';
run;
/* We find that one of the variables is significant and the other is insignificant but the accuracy is 100% */
/* Running the model only for Product5 */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
where Products = 'Product5';
run;
/* We find that we cannot say if the variables are significant or not but the accuracy is 100% */
/* Running the model only for Product6 */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
where Products = 'Product6';
run;
/* We find that we cannot say if the variables are significant or not but the accuracy is 100% */
/* Running the model only for Product7 */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
where Products = 'Product7';
run;
/* We find that we cannot say if the variables are significant or not but the accuracy is 100% */
/* Running the model only for Product8 */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
where Products = 'Product8';
run;
/* We find that we cannot say if the variables are significant or not but the accuracy is 100% */
/* Running the model only for Product9 */
PROC REG DATA=training.retailanalysis;
MODEL Total_Sales= Quantity Profit;
output out = PredictedSales p=Total_Sales_Predicted;
where Products = 'Product9';
run;
/* We find that we cannot say if the variables are significant or not but the accuracy is 100% */