setwd("~/R/Forecasting")
rm(list=ls())

library(readxl)
my_data <- read_excel("fore.xls")
#forecasting package
library(fpp2)
names(my_data) <- c("DATE", "Sales.Per.Day")

#time series data
Y <- ts(my_data[,2], start = c(1992,1), end = c(2017,12), frequency = 12)

#preliminary analysis
autoplot(Y) + ggtitle("Time Plot: Real US Retail Sales per Day") +
  ylab("Millions of 2017 Dollars") +
  xlab("Time")

#invetigate transformations
#taking the first difference to remove the trend
DY <- diff(Y)
autoplot(DY) + ggtitle("Time Plot: Change in Real US Retail Sales per Day") +
  ylab("Millions of 2017 Dollars") +
  xlab("Time")

#series appears trend-stationary, using to invetigate seasonality
ggseasonplot(DY) + ggtitle("Change in Daily Retail Sales") +
   ylab("Millions of 2017 Dollars")

#FORECASTING
fit <- snaive(DY) #residual sd=287.20
print(summary(fit))
## 
## Forecast method: Seasonal naive method
## 
## Model Information:
## Call: snaive(y = DY) 
## 
## Residual sd: 287.2028 
## 
## Error measures:
##                     ME     RMSE     MAE      MPE     MAPE MASE       ACF1
## Training set 0.2295881 286.7222 230.275 62.66182 133.1789    1 -0.4482568
## 
## Forecasts:
##          Point Forecast       Lo 80       Hi 80       Lo 95      Hi 95
## Jan 2018    -3811.03393 -4178.48323 -3443.58463 -4372.99913 -3249.0687
## Feb 2018     1155.10474   787.65544  1522.55404   593.13954  1717.0699
## Mar 2018      625.41825   257.96895   992.86755    63.45305  1187.3835
## Apr 2018      -80.68609  -448.13539   286.76321  -642.65129   481.2791
## May 2018      467.01692    99.56762   834.46622   -94.94828  1028.9821
## Jun 2018       76.13812  -291.31118   443.58742  -485.82708   638.1033
## Jul 2018     -678.48382 -1045.93312  -311.03452 -1240.44902  -116.5186
## Aug 2018      435.82699    68.37769   803.27629  -126.13821   997.7922
## Sep 2018     -245.12985  -612.57915   122.31945  -807.09505   316.8354
## Oct 2018     -277.76288  -645.21218    89.68642  -839.72808   284.2023
## Nov 2018     1244.60359   877.15429  1612.05289   682.63839  1806.5688
## Dec 2018     1274.41523   906.96593  1641.86453   712.45003  1836.3804
## Jan 2019    -3811.03393 -4330.68571 -3291.38215 -4605.77274 -3016.2951
## Feb 2019     1155.10474   635.45296  1674.75652   360.36593  1949.8436
## Mar 2019      625.41825   105.76647  1145.07003  -169.32056  1420.1571
## Apr 2019      -80.68609  -600.33787   438.96569  -875.42490   714.0527
## May 2019      467.01692   -52.63486   986.66870  -327.72189  1261.7557
## Jun 2019       76.13812  -443.51366   595.78990  -718.60069   870.8769
## Jul 2019     -678.48382 -1198.13560  -158.83204 -1473.22263   116.2550
## Aug 2019      435.82699   -83.82479   955.47877  -358.91182  1230.5658
## Sep 2019     -245.12985  -764.78163   274.52193 -1039.86866   549.6090
## Oct 2019     -277.76288  -797.41466   241.88890 -1072.50169   516.9759
## Nov 2019     1244.60359   724.95181  1764.25537   449.86478  2039.3424
## Dec 2019     1274.41523   754.76345  1794.06701   479.67642  2069.1540
##          Point Forecast       Lo 80       Hi 80       Lo 95      Hi 95
## Jan 2018    -3811.03393 -4178.48323 -3443.58463 -4372.99913 -3249.0687
## Feb 2018     1155.10474   787.65544  1522.55404   593.13954  1717.0699
## Mar 2018      625.41825   257.96895   992.86755    63.45305  1187.3835
## Apr 2018      -80.68609  -448.13539   286.76321  -642.65129   481.2791
## May 2018      467.01692    99.56762   834.46622   -94.94828  1028.9821
## Jun 2018       76.13812  -291.31118   443.58742  -485.82708   638.1033
## Jul 2018     -678.48382 -1045.93312  -311.03452 -1240.44902  -116.5186
## Aug 2018      435.82699    68.37769   803.27629  -126.13821   997.7922
## Sep 2018     -245.12985  -612.57915   122.31945  -807.09505   316.8354
## Oct 2018     -277.76288  -645.21218    89.68642  -839.72808   284.2023
## Nov 2018     1244.60359   877.15429  1612.05289   682.63839  1806.5688
## Dec 2018     1274.41523   906.96593  1641.86453   712.45003  1836.3804
## Jan 2019    -3811.03393 -4330.68571 -3291.38215 -4605.77274 -3016.2951
## Feb 2019     1155.10474   635.45296  1674.75652   360.36593  1949.8436
## Mar 2019      625.41825   105.76647  1145.07003  -169.32056  1420.1571
## Apr 2019      -80.68609  -600.33787   438.96569  -875.42490   714.0527
## May 2019      467.01692   -52.63486   986.66870  -327.72189  1261.7557
## Jun 2019       76.13812  -443.51366   595.78990  -718.60069   870.8769
## Jul 2019     -678.48382 -1198.13560  -158.83204 -1473.22263   116.2550
## Aug 2019      435.82699   -83.82479   955.47877  -358.91182  1230.5658
## Sep 2019     -245.12985  -764.78163   274.52193 -1039.86866   549.6090
## Oct 2019     -277.76288  -797.41466   241.88890 -1072.50169   516.9759
## Nov 2019     1244.60359   724.95181  1764.25537   449.86478  2039.3424
## Dec 2019     1274.41523   754.76345  1794.06701   479.67642  2069.1540
checkresiduals(fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 528.64, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24
#fit ETS method
fit_ets <- ets(Y) #residual sd=218.53
print(summary(fit_ets))
## ETS(A,Ad,A) 
## 
## Call:
##  ets(y = Y) 
## 
##   Smoothing parameters:
##     alpha = 0.3367 
##     beta  = 0.0509 
##     gamma = 1e-04 
##     phi   = 0.9239 
## 
##   Initial states:
##     l = 8500.8769 
##     b = 46.1867 
##     s = 1839.564 207.5063 -364.48 -311.8963 140.9642 -104.2368
##            346.4814 268.3047 29.2376 -120.8496 -487.0592 -1443.536
## 
##   sigma:  218.5271
## 
##      AIC     AICc      BIC 
## 5171.768 5174.103 5239.142 
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 19.13743 212.4903 169.1264 0.1517995 1.468075 0.4592544
##                     ACF1
## Training set -0.04447342
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 19.13743 212.4903 169.1264 0.1517995 1.468075 0.4592544
##                     ACF1
## Training set -0.04447342
checkresiduals(fit_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,Ad,A)
## Q* = 336.95, df = 7, p-value < 2.2e-16
## 
## Model df: 17.   Total lags used: 24
#fit an ARIMA model --> no constant mean because not stationary
fit_arima <- auto.arima(Y,d=1,D=1, stepwise=FALSE, approximation=FALSE, trace=TRUE)
## 
##  ARIMA(0,1,0)(0,1,0)[12]                    : 4234.33
##  ARIMA(0,1,0)(0,1,1)[12]                    : 4206.675
##  ARIMA(0,1,0)(0,1,2)[12]                    : Inf
##  ARIMA(0,1,0)(1,1,0)[12]                    : 4235.79
##  ARIMA(0,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,0)(1,1,2)[12]                    : Inf
##  ARIMA(0,1,0)(2,1,0)[12]                    : 4209.277
##  ARIMA(0,1,0)(2,1,1)[12]                    : 4115.485
##  ARIMA(0,1,0)(2,1,2)[12]                    : 4088.988
##  ARIMA(0,1,1)(0,1,0)[12]                    : 4144.06
##  ARIMA(0,1,1)(0,1,1)[12]                    : 4103.804
##  ARIMA(0,1,1)(0,1,2)[12]                    : Inf
##  ARIMA(0,1,1)(1,1,0)[12]                    : 4140.586
##  ARIMA(0,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,1)(1,1,2)[12]                    : Inf
##  ARIMA(0,1,1)(2,1,0)[12]                    : 4119.189
##  ARIMA(0,1,1)(2,1,1)[12]                    : 4048.338
##  ARIMA(0,1,1)(2,1,2)[12]                    : 4034.027
##  ARIMA(0,1,2)(0,1,0)[12]                    : 4139.995
##  ARIMA(0,1,2)(0,1,1)[12]                    : 4090.479
##  ARIMA(0,1,2)(0,1,2)[12]                    : Inf
##  ARIMA(0,1,2)(1,1,0)[12]                    : 4131.796
##  ARIMA(0,1,2)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,2)(1,1,2)[12]                    : Inf
##  ARIMA(0,1,2)(2,1,0)[12]                    : 4110.627
##  ARIMA(0,1,2)(2,1,1)[12]                    : 4049.062
##  ARIMA(0,1,3)(0,1,0)[12]                    : 4133.029
##  ARIMA(0,1,3)(0,1,1)[12]                    : 4083.647
##  ARIMA(0,1,3)(0,1,2)[12]                    : Inf
##  ARIMA(0,1,3)(1,1,0)[12]                    : 4124.32
##  ARIMA(0,1,3)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,3)(2,1,0)[12]                    : 4104.443
##  ARIMA(0,1,4)(0,1,0)[12]                    : 4132.002
##  ARIMA(0,1,4)(0,1,1)[12]                    : 4071.057
##  ARIMA(0,1,4)(1,1,0)[12]                    : 4117.738
##  ARIMA(0,1,5)(0,1,0)[12]                    : 4116.248
##  ARIMA(1,1,0)(0,1,0)[12]                    : 4168.769
##  ARIMA(1,1,0)(0,1,1)[12]                    : 4132.503
##  ARIMA(1,1,0)(0,1,2)[12]                    : Inf
##  ARIMA(1,1,0)(1,1,0)[12]                    : 4167.36
##  ARIMA(1,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(1,1,0)(1,1,2)[12]                    : Inf
##  ARIMA(1,1,0)(2,1,0)[12]                    : 4143.088
##  ARIMA(1,1,0)(2,1,1)[12]                    : 4067.393
##  ARIMA(1,1,0)(2,1,2)[12]                    : 4050.932
##  ARIMA(1,1,1)(0,1,0)[12]                    : 4142.908
##  ARIMA(1,1,1)(0,1,1)[12]                    : 4099.13
##  ARIMA(1,1,1)(0,1,2)[12]                    : Inf
##  ARIMA(1,1,1)(1,1,0)[12]                    : 4137.637
##  ARIMA(1,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(1,1,1)(1,1,2)[12]                    : Inf
##  ARIMA(1,1,1)(2,1,0)[12]                    : 4116.441
##  ARIMA(1,1,1)(2,1,1)[12]                    : 4049.668
##  ARIMA(1,1,2)(0,1,0)[12]                    : 4135.243
##  ARIMA(1,1,2)(0,1,1)[12]                    : 4090.861
##  ARIMA(1,1,2)(0,1,2)[12]                    : Inf
##  ARIMA(1,1,2)(1,1,0)[12]                    : 4128.696
##  ARIMA(1,1,2)(1,1,1)[12]                    : Inf
##  ARIMA(1,1,2)(2,1,0)[12]                    : 4109.272
##  ARIMA(1,1,3)(0,1,0)[12]                    : 4121.554
##  ARIMA(1,1,3)(0,1,1)[12]                    : 4070.949
##  ARIMA(1,1,3)(1,1,0)[12]                    : 4112.423
##  ARIMA(1,1,4)(0,1,0)[12]                    : 4122.975
##  ARIMA(2,1,0)(0,1,0)[12]                    : 4124.729
##  ARIMA(2,1,0)(0,1,1)[12]                    : 4060.086
##  ARIMA(2,1,0)(0,1,2)[12]                    : Inf
##  ARIMA(2,1,0)(1,1,0)[12]                    : 4107.027
##  ARIMA(2,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(2,1,0)(1,1,2)[12]                    : Inf
##  ARIMA(2,1,0)(2,1,0)[12]                    : 4086.905
##  ARIMA(2,1,0)(2,1,1)[12]                    : 4037.413
##  ARIMA(2,1,1)(0,1,0)[12]                    : 4122.818
##  ARIMA(2,1,1)(0,1,1)[12]                    : 4049.617
##  ARIMA(2,1,1)(0,1,2)[12]                    : Inf
##  ARIMA(2,1,1)(1,1,0)[12]                    : 4099.755
##  ARIMA(2,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(2,1,1)(2,1,0)[12]                    : 4078.748
##  ARIMA(2,1,2)(0,1,0)[12]                    : 4124.685
##  ARIMA(2,1,2)(0,1,1)[12]                    : Inf
##  ARIMA(2,1,2)(1,1,0)[12]                    : Inf
##  ARIMA(2,1,3)(0,1,0)[12]                    : Inf
##  ARIMA(3,1,0)(0,1,0)[12]                    : 4123.541
##  ARIMA(3,1,0)(0,1,1)[12]                    : 4048.053
##  ARIMA(3,1,0)(0,1,2)[12]                    : Inf
##  ARIMA(3,1,0)(1,1,0)[12]                    : 4099.023
##  ARIMA(3,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(3,1,0)(2,1,0)[12]                    : 4077.069
##  ARIMA(3,1,1)(0,1,0)[12]                    : 4124.836
##  ARIMA(3,1,1)(0,1,1)[12]                    : 4049.618
##  ARIMA(3,1,1)(1,1,0)[12]                    : 4100.6
##  ARIMA(3,1,2)(0,1,0)[12]                    : 4126.946
##  ARIMA(4,1,0)(0,1,0)[12]                    : 4123.317
##  ARIMA(4,1,0)(0,1,1)[12]                    : 4049.134
##  ARIMA(4,1,0)(1,1,0)[12]                    : 4100.037
##  ARIMA(4,1,1)(0,1,0)[12]                    : 4125.332
##  ARIMA(5,1,0)(0,1,0)[12]                    : 4124.931
## 
## 
## 
##  Best model: ARIMA(0,1,1)(2,1,2)[12]
print(summary(fit_arima))
## Series: Y 
## ARIMA(0,1,1)(2,1,2)[12] 
## 
## Coefficients:
##           ma1    sar1     sar2     sma1    sma2
##       -0.4755  0.7710  -0.6078  -1.2284  0.4903
## s.e.   0.0492  0.0747   0.0733   0.0790  0.0857
## 
## sigma^2 estimated as 39100:  log likelihood=-2010.87
## AIC=4033.74   AICc=4034.03   BIC=4055.94
## 
## Training set error measures:
##                    ME    RMSE      MAE         MPE    MAPE      MASE
## Training set 2.471835 191.949 146.7799 0.004848292 1.24794 0.3985735
##                      ACF1
## Training set -0.004843104
##                    ME    RMSE      MAE         MPE    MAPE      MASE
## Training set 2.471835 191.949 146.7799 0.004848292 1.24794 0.3985735
##                      ACF1
## Training set -0.004843104
checkresiduals(fit_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)(2,1,2)[12]
## Q* = 76.851, df = 19, p-value = 6.448e-09
## 
## Model df: 5.   Total lags used: 24
sqrt(39100) #residual sd=197.74
#FORECAST
fcst <- forecast(fit_arima,h=36) #h=months in the future
autoplot(fcst, include = 60, ylab = "Sales") #include=number of months (5 years)

print(summary(fcst))
## 
## Forecast method: ARIMA(0,1,1)(2,1,2)[12]
## 
## Model Information:
## Series: Y 
## ARIMA(0,1,1)(2,1,2)[12] 
## 
## Coefficients:
##           ma1    sar1     sar2     sma1    sma2
##       -0.4755  0.7710  -0.6078  -1.2284  0.4903
## s.e.   0.0492  0.0747   0.0733   0.0790  0.0857
## 
## sigma^2 estimated as 39100:  log likelihood=-2010.87
## AIC=4033.74   AICc=4034.03   BIC=4055.94
## 
## Error measures:
##                    ME    RMSE      MAE         MPE    MAPE      MASE
## Training set 2.471835 191.949 146.7799 0.004848292 1.24794 0.3985735
##                      ACF1
## Training set -0.004843104
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2018       12805.26 12551.85 13058.67 12417.70 13192.82
## Feb 2018       13942.45 13656.30 14228.60 13504.82 14380.08
## Mar 2018       14407.37 14091.86 14722.88 13924.84 14889.90
## Apr 2018       14433.40 14091.04 14775.76 13909.80 14957.00
## May 2018       14877.65 14510.39 15244.90 14315.98 15439.31
## Jun 2018       14728.60 14338.03 15119.16 14131.28 15325.91
## Jul 2018       14292.20 13879.64 14704.76 13661.25 14923.16
## Aug 2018       14687.52 14254.08 15120.96 14024.63 15350.41
## Sep 2018       14248.64 13795.28 14702.00 13555.28 14941.99
## Oct 2018       14210.96 13738.52 14683.40 13488.43 14933.50
## Nov 2018       15165.12 14674.34 15655.90 14414.54 15915.71
## Dec 2018       16465.52 15957.06 16973.98 15687.90 17243.14
## Jan 2019       13332.47 12756.57 13908.36 12451.71 14213.22
## Feb 2019       14462.31 13851.00 15073.61 13527.40 15397.22
## Mar 2019       14843.44 14198.67 15488.21 13857.35 15829.53
## Apr 2019       14988.69 14312.10 15665.27 13953.94 16023.44
## May 2019       15348.27 14641.30 16055.24 14267.06 16429.48
## Jun 2019       15152.73 14416.63 15888.82 14026.96 16278.49
## Jul 2019       14891.16 14127.04 15655.28 13722.54 16059.78
## Aug 2019       15180.54 14389.39 15971.69 13970.58 16390.50
## Sep 2019       14660.28 13842.99 15477.56 13410.35 15910.20
## Oct 2019       14702.01 13859.40 15544.61 13413.35 15990.66
## Nov 2019       15425.46 14558.27 16292.66 14099.20 16751.72
## Dec 2019       16927.73 16036.63 17818.83 15564.91 18290.55
## Jan 2020       13620.14 12701.46 14538.82 12215.14 15025.14
## Feb 2020       14755.21 13811.78 15698.64 13312.36 16198.06
## Mar 2020       15169.28 14201.74 16136.82 13689.56 16649.01
## Apr 2020       15341.60 14350.53 16332.67 13825.89 16857.31
## May 2020       15649.73 14635.68 16663.79 14098.88 17200.59
## Jun 2020       15555.20 14518.68 16591.73 13969.98 17140.43
## Jul 2020       15281.30 14222.78 16339.82 13662.44 16900.17
## Aug 2020       15513.62 14433.55 16593.68 13861.80 17165.44
## Sep 2020       15048.36 13947.17 16149.55 13364.23 16732.49
## Oct 2020       15005.40 13883.48 16127.32 13289.57 16721.23
## Nov 2020       15727.49 14585.22 16869.77 13980.54 17474.45
## Dec 2020       17369.62 16207.35 18531.89 15592.08 19147.16
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2018       12805.26 12551.85 13058.67 12417.70 13192.82
## Feb 2018       13942.45 13656.30 14228.60 13504.82 14380.08
## Mar 2018       14407.37 14091.86 14722.88 13924.84 14889.90
## Apr 2018       14433.40 14091.04 14775.76 13909.80 14957.00
## May 2018       14877.65 14510.39 15244.90 14315.98 15439.31
## Jun 2018       14728.60 14338.03 15119.16 14131.28 15325.91
## Jul 2018       14292.20 13879.64 14704.76 13661.25 14923.16
## Aug 2018       14687.52 14254.08 15120.96 14024.63 15350.41
## Sep 2018       14248.64 13795.28 14702.00 13555.28 14941.99
## Oct 2018       14210.96 13738.52 14683.40 13488.43 14933.50
## Nov 2018       15165.12 14674.34 15655.90 14414.54 15915.71
## Dec 2018       16465.52 15957.06 16973.98 15687.90 17243.14
## Jan 2019       13332.47 12756.57 13908.36 12451.71 14213.22
## Feb 2019       14462.31 13851.00 15073.61 13527.40 15397.22
## Mar 2019       14843.44 14198.67 15488.21 13857.35 15829.53
## Apr 2019       14988.69 14312.10 15665.27 13953.94 16023.44
## May 2019       15348.27 14641.30 16055.24 14267.06 16429.48
## Jun 2019       15152.73 14416.63 15888.82 14026.96 16278.49
## Jul 2019       14891.16 14127.04 15655.28 13722.54 16059.78
## Aug 2019       15180.54 14389.39 15971.69 13970.58 16390.50
## Sep 2019       14660.28 13842.99 15477.56 13410.35 15910.20
## Oct 2019       14702.01 13859.40 15544.61 13413.35 15990.66
## Nov 2019       15425.46 14558.27 16292.66 14099.20 16751.72
## Dec 2019       16927.73 16036.63 17818.83 15564.91 18290.55
## Jan 2020       13620.14 12701.46 14538.82 12215.14 15025.14
## Feb 2020       14755.21 13811.78 15698.64 13312.36 16198.06
## Mar 2020       15169.28 14201.74 16136.82 13689.56 16649.01
## Apr 2020       15341.60 14350.53 16332.67 13825.89 16857.31
## May 2020       15649.73 14635.68 16663.79 14098.88 17200.59
## Jun 2020       15555.20 14518.68 16591.73 13969.98 17140.43
## Jul 2020       15281.30 14222.78 16339.82 13662.44 16900.17
## Aug 2020       15513.62 14433.55 16593.68 13861.80 17165.44
## Sep 2020       15048.36 13947.17 16149.55 13364.23 16732.49
## Oct 2020       15005.40 13883.48 16127.32 13289.57 16721.23
## Nov 2020       15727.49 14585.22 16869.77 13980.54 17474.45
## Dec 2020       17369.62 16207.35 18531.89 15592.08 19147.16