forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add forecasting code * add statsmodel * sort import * sort import fix * fixing black * sort requirement * optimize code * try with limited data * sort again * sort fix * sort fix * delete warning and black * add code for forecasting * use black * add more hints to describe * add doctest * finding whitespace * fixing doctest * delete * revert back * revert back * revert back again * revert back again * revert back again * try trimming whitespace * try adding doctypeand etc * fixing reviews * deleting all the space * fixing the build * delete x * add description for safety checker * deleting subscription integer * fix docthint * make def to use function parameters and return values * make def to use function parameters and return values * type hints on data safety checker * optimize code * Update run.py Co-authored-by: FVFYK3GEHV22 <[email protected]> Co-authored-by: Christian Clauss <[email protected]>
- Loading branch information
1 parent
b97529d
commit 12c6980
Showing
4 changed files
with
271 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
total_user,total_events,days | ||
18231,0.0,1 | ||
22621,1.0,2 | ||
15675,0.0,3 | ||
23583,1.0,4 | ||
68351,5.0,5 | ||
34338,3.0,6 | ||
19238,0.0,0 | ||
24192,0.0,1 | ||
70349,0.0,2 | ||
103510,0.0,3 | ||
128355,1.0,4 | ||
148484,6.0,5 | ||
153489,3.0,6 | ||
162667,1.0,0 | ||
311430,3.0,1 | ||
435663,7.0,2 | ||
273526,0.0,3 | ||
628588,2.0,4 | ||
454989,13.0,5 | ||
539040,3.0,6 | ||
52974,1.0,0 | ||
103451,2.0,1 | ||
810020,5.0,2 | ||
580982,3.0,3 | ||
216515,0.0,4 | ||
134694,10.0,5 | ||
93563,1.0,6 | ||
55432,1.0,0 | ||
169634,1.0,1 | ||
254908,4.0,2 | ||
315285,3.0,3 | ||
191764,0.0,4 | ||
514284,7.0,5 | ||
181214,4.0,6 | ||
78459,2.0,0 | ||
161620,3.0,1 | ||
245610,4.0,2 | ||
326722,5.0,3 | ||
214578,0.0,4 | ||
312365,5.0,5 | ||
232454,4.0,6 | ||
178368,1.0,0 | ||
97152,1.0,1 | ||
222813,4.0,2 | ||
285852,4.0,3 | ||
192149,1.0,4 | ||
142241,1.0,5 | ||
173011,2.0,6 | ||
56488,3.0,0 | ||
89572,2.0,1 | ||
356082,2.0,2 | ||
172799,0.0,3 | ||
142300,1.0,4 | ||
78432,2.0,5 | ||
539023,9.0,6 | ||
62389,1.0,0 | ||
70247,1.0,1 | ||
89229,0.0,2 | ||
94583,1.0,3 | ||
102455,0.0,4 | ||
129270,0.0,5 | ||
311409,1.0,6 | ||
1837026,0.0,0 | ||
361824,0.0,1 | ||
111379,2.0,2 | ||
76337,2.0,3 | ||
96747,0.0,4 | ||
92058,0.0,5 | ||
81929,2.0,6 | ||
143423,0.0,0 | ||
82939,0.0,1 | ||
74403,1.0,2 | ||
68234,0.0,3 | ||
94556,1.0,4 | ||
80311,0.0,5 | ||
75283,3.0,6 | ||
77724,0.0,0 | ||
49229,2.0,1 | ||
65708,2.0,2 | ||
273864,1.0,3 | ||
1711281,0.0,4 | ||
1900253,5.0,5 | ||
343071,1.0,6 | ||
1551326,0.0,0 | ||
56636,1.0,1 | ||
272782,2.0,2 | ||
1785678,0.0,3 | ||
241866,0.0,4 | ||
461904,0.0,5 | ||
2191901,2.0,6 | ||
102925,0.0,0 | ||
242778,1.0,1 | ||
298608,0.0,2 | ||
322458,10.0,3 | ||
216027,9.0,4 | ||
916052,12.0,5 | ||
193278,12.0,6 | ||
263207,8.0,0 | ||
672948,10.0,1 | ||
281909,1.0,2 | ||
384562,1.0,3 | ||
1027375,2.0,4 | ||
828905,9.0,5 | ||
624188,22.0,6 | ||
392218,8.0,0 | ||
292581,10.0,1 | ||
299869,12.0,2 | ||
769455,20.0,3 | ||
316443,8.0,4 | ||
1212864,24.0,5 | ||
1397338,28.0,6 | ||
223249,8.0,0 | ||
191264,14.0,1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
""" | ||
this is code for forecasting | ||
but i modified it and used it for safety checker of data | ||
for ex: you have a online shop and for some reason some data are | ||
missing (the amount of data that u expected are not supposed to be) | ||
then we can use it | ||
*ps : 1. ofc we can use normal statistic method but in this case | ||
the data is quite absurd and only a little^^ | ||
2. ofc u can use this and modified it for forecasting purpose | ||
for the next 3 months sales or something, | ||
u can just adjust it for ur own purpose | ||
""" | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from sklearn.preprocessing import Normalizer | ||
from sklearn.svm import SVR | ||
from statsmodels.tsa.statespace.sarimax import SARIMAX | ||
|
||
|
||
def linear_regression_prediction( | ||
train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list | ||
) -> float: | ||
""" | ||
First method: linear regression | ||
input : training data (date, total_user, total_event) in list of float | ||
output : list of total user prediction in float | ||
>>> linear_regression_prediction([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2]) | ||
5.000000000000003 | ||
""" | ||
x = [[1, item, train_mtch[i]] for i, item in enumerate(train_dt)] | ||
x = np.array(x) | ||
y = np.array(train_usr) | ||
beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y) | ||
return abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2]) | ||
|
||
|
||
def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> float: | ||
""" | ||
second method: Sarimax | ||
sarimax is a statistic method which using previous input | ||
and learn its pattern to predict future data | ||
input : training data (total_user, with exog data = total_event) in list of float | ||
output : list of total user prediction in float | ||
>>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2]) | ||
6.6666671111109626 | ||
""" | ||
order = (1, 2, 1) | ||
seasonal_order = (1, 1, 0, 7) | ||
model = SARIMAX( | ||
train_user, exog=train_match, order=order, seasonal_order=seasonal_order | ||
) | ||
model_fit = model.fit(disp=False, maxiter=600, method="nm") | ||
result = model_fit.predict(1, len(test_match), exog=[test_match]) | ||
return result[0] | ||
|
||
|
||
def support_vector_regressor(x_train: list, x_test: list, train_user: list) -> float: | ||
""" | ||
Third method: Support vector regressor | ||
svr is quite the same with svm(support vector machine) | ||
it uses the same principles as the SVM for classification, | ||
with only a few minor differences and the only different is that | ||
it suits better for regression purpose | ||
input : training data (date, total_user, total_event) in list of float | ||
where x = list of set (date and total event) | ||
output : list of total user prediction in float | ||
>>> support_vector_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4]) | ||
1.634932078116079 | ||
""" | ||
regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1) | ||
regressor.fit(x_train, train_user) | ||
y_pred = regressor.predict(x_test) | ||
return y_pred[0] | ||
|
||
|
||
def interquartile_range_checker(train_user: list) -> float: | ||
""" | ||
Optional method: interquatile range | ||
input : list of total user in float | ||
output : low limit of input in float | ||
this method can be used to check whether some data is outlier or not | ||
>>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10]) | ||
2.8 | ||
""" | ||
train_user.sort() | ||
q1 = np.percentile(train_user, 25) | ||
q3 = np.percentile(train_user, 75) | ||
iqr = q3 - q1 | ||
low_lim = q1 - (iqr * 0.1) | ||
return low_lim | ||
|
||
|
||
def data_safety_checker(list_vote: list, actual_result: float) -> None: | ||
""" | ||
Used to review all the votes (list result prediction) | ||
and compare it to the actual result. | ||
input : list of predictions | ||
output : print whether it's safe or not | ||
>>> data_safety_checker([2,3,4],5.0) | ||
Today's data is not safe. | ||
""" | ||
safe = 0 | ||
not_safe = 0 | ||
for i in list_vote: | ||
if i > actual_result: | ||
safe = not_safe + 1 | ||
else: | ||
if abs(abs(i) - abs(actual_result)) <= 0.1: | ||
safe = safe + 1 | ||
else: | ||
not_safe = not_safe + 1 | ||
print(f"Today's data is {'not ' if safe <= not_safe else ''}safe.") | ||
|
||
|
||
# data_input_df = pd.read_csv("ex_data.csv", header=None) | ||
data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] | ||
data_input_df = pd.DataFrame(data_input, columns=["total_user", "total_even", "days"]) | ||
|
||
""" | ||
data column = total user in a day, how much online event held in one day, | ||
what day is that(sunday-saturday) | ||
""" | ||
|
||
# start normalization | ||
normalize_df = Normalizer().fit_transform(data_input_df.values) | ||
# split data | ||
total_date = normalize_df[:, 2].tolist() | ||
total_user = normalize_df[:, 0].tolist() | ||
total_match = normalize_df[:, 1].tolist() | ||
|
||
# for svr (input variable = total date and total match) | ||
x = normalize_df[:, [1, 2]].tolist() | ||
x_train = x[: len(x) - 1] | ||
x_test = x[len(x) - 1 :] | ||
|
||
# for linear reression & sarimax | ||
trn_date = total_date[: len(total_date) - 1] | ||
trn_user = total_user[: len(total_user) - 1] | ||
trn_match = total_match[: len(total_match) - 1] | ||
|
||
tst_date = total_date[len(total_date) - 1 :] | ||
tst_user = total_user[len(total_user) - 1 :] | ||
tst_match = total_match[len(total_match) - 1 :] | ||
|
||
|
||
# voting system with forecasting | ||
res_vote = [] | ||
res_vote.append( | ||
linear_regression_prediction(trn_date, trn_user, trn_match, tst_date, tst_match) | ||
) | ||
res_vote.append(sarimax_predictor(trn_user, trn_match, tst_match)) | ||
res_vote.append(support_vector_regressor(x_train, x_test, trn_user)) | ||
|
||
# check the safety of todays'data^^ | ||
data_safety_checker(res_vote, tst_user) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ qiskit | |
requests | ||
scikit-fuzzy | ||
sklearn | ||
statsmodels | ||
sympy | ||
tensorflow | ||
xgboost |