# Series de Tiempo#
#Integrantes: Marybel y Dermin#
setwd("C:/Series de tiempo/Surco/Semana I/Bases")
ventas<- read.csv2("SalesRMBase.csv", header=T, na.strings=-999.)
ventas
## t Value
## 1 1 310188
## 2 2 299412
## 3 3 328526
## 4 4 329854
## 5 5 347728
## 6 6 344405
## 7 7 348071
## 8 8 353392
## 9 9 324646
## 10 10 338610
## 11 11 339363
## 12 12 400281
## 13 13 314557
## 14 14 310925
## 15 15 360679
## 16 16 356333
## 17 17 365605
## 18 18 358604
## 19 19 361939
## 20 20 362627
## 21 21 345999
## 22 22 355209
## 23 23 365828
## 24 24 426663
## 25 25 335587
## 26 26 337314
## 27 27 387088
## 28 28 380775
## 29 29 391999
## 30 30 388700
## 31 31 384682
## 32 32 394609
## 33 33 375025
## 34 34 379482
## 35 35 391220
## 36 36 451821
## 37 37 355184
## 38 38 372401
## 39 39 414149
## 40 40 392949
## 41 41 418608
## 42 42 400975
## 43 43 396026
## 44 44 417922
## 45 45 385609
## 46 46 399400
## 47 47 411065
## 48 48 462102
## 49 49 375587
## 50 50 373987
## 51 51 421719
## 52 52 408544
## 53 53 437188
## 54 54 414714
## 55 55 422410
## 56 56 435005
## 57 57 396213
## 58 58 415700
## 59 59 423786
## 60 60 476910
## 61 61 383054
## 62 62 380019
## 63 63 432651
## 64 64 431396
## 65 65 459231
## 66 66 433282
## 67 67 443281
## 68 68 451366
## 69 69 421424
## 70 70 438457
## 71 71 439165
## 72 72 502330
## 73 73 398027
## 74 74 388063
## 75 75 445970
## 76 76 439637
## 77 77 464785
## 78 78 449794
## 79 79 459533
## 80 80 457905
## 81 81 432782
## 82 82 446593
## 83 83 446773
## 84 84 519625
## 85 85 402018
## 86 86 415183
## 87 87 461190
## 88 88 451435
## 89 89 470425
## 90 90 465058
## 91 91 462195
## 92 92 472032
## 93 93 448870
## 94 94 453318
## 95 95 467956
## 96 96 540506
## 97 97 422066
## 98 98 418520
## 99 99 483667
## 100 100 466647
## 101 101 495849
## 102 102 483003
## 103 103 476520
## 104 104 491564
## 105 105 470724
## 106 106 477120
## 107 107 499276
## 108 108 559854
## 109 109 444701
## 110 110 436018
## 111 111 509325
## 112 112 481516
## 113 113 529261
## 114 114 508292
## 115 115 506514
## 116 116 521962
## 117 117 478595
## 118 118 505194
## 119 119 520956
## 120 120 559289
## 121 121 458089
## 122 122 443708
## 123 123 515694
## 124 124 509413
## 125 125 547130
## 126 126 518273
## 127 127 532103
## 128 128 545247
## 129 129 496074
## 130 130 525539
## 131 131 535352
## 132 132 591380
### Confirmamos la base de datos y las variables###
getwd()
## [1] "C:/Series de tiempo/Surco/Semana I/Bases"
list.files()
## [1] "003semana1.P.html"
## [2] "003semana1.P.md"
## [3] "01 Enve.csv"
## [4] "01 Enverga.xlsx"
## [5] "01 Failtime B.xlsx"
## [6] "02 Failtime.csv"
## [7] "03 USPopu Clase.xlsx"
## [8] "03 USPopu E.xlsx"
## [9] "03 USPopu.csv"
## [10] "04 PIBC.csv"
## [11] "05 DepAutos.csv"
## [12] "05 Depreciacion Autos.xlsx"
## [13] "06 White Rock Clase Estacionalidad.xlsx"
## [14] "06 WhiteR.csv"
## [15] "06 WhiteR.xlsx"
## [16] "07 Airpass Class Dic 07.xlsx"
## [17] "07 Airpass.csv"
## [18] "07 Airpass.xlsx"
## [19] "Actividad1.ST.Rmd"
## [20] "actv1ST.html"
## [21] "actv1ST.R"
## [22] "actv1ST.spin.R"
## [23] "actv1ST.spin.Rmd"
## [24] "Otros Clase.xlsx"
## [25] "Primer Asignacion Dic 06.xlsx"
## [26] "SalesRMBase.csv"
## [27] "SpotD.csv"
## [28] "SpotDuty Class Dic 07.xlsx"
## [29] "SpotDuty.csv"
## [30] "Willow Beetle.xlsx"
dim(ventas)
## [1] 132 2
names(ventas)
## [1] "t" "Value"
### se grafica vetas vs tiempo medido en meses###
plot(ventas$Value~ventas$t, col="red", lty=2, type="l",
ylab="Ventas", xlab="Mes")
### Se grafica la funciĂ³n lineal que se ajusta a la pendiente descrita por: ventas vs t. este procedimiento se realiza en excel##
yo <- 332132 + 1478*ventas$t
lines(ventas$t, yo, col="blue", lwd=2)

dim(ventas)
## [1] 132 2
names(ventas)
## [1] "t" "Value"
### Linear Model on time
moda.1<-lm(Value~t,data=ventas)
summary(moda.1)
##
## Call:
## lm(formula = Value ~ t, data = ventas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -68745 -13098 1710 11007 68093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 332131.56 4855.24 68.41 <2e-16 ***
## t 1478.05 63.35 23.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27730 on 130 degrees of freedom
## Multiple R-squared: 0.8072, Adjusted R-squared: 0.8057
## F-statistic: 544.4 on 1 and 130 DF, p-value: < 2.2e-16
AIC(moda.1)
## [1] 3079.403
##inserta una columna con los valores de predicciĂ³n en el modelo 1##
ventas$Value_pred_lineal <- predict(moda.1)
head(ventas)
## t Value Value_pred_lineal
## 1 1 310188 333609.6
## 2 2 299412 335087.7
## 3 3 328526 336565.7
## 4 4 329854 338043.7
## 5 5 347728 339521.8
## 6 6 344405 340999.8
## valor para dic de 2020 con el modelo ajustado##
predict(moda.1, newdata = data.frame(t = 144))
## 1
## 544970.3
#install.packages("ggpubr")#
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.5.2
## Cargando paquete requerido: ggplot2
ggqqplot(moda.1$residuals)

shapiro.test(moda.1$residuals)
##
## Shapiro-Wilk normality test
##
## data: moda.1$residuals
## W = 0.95269, p-value = 0.0001623
#install.packages("lmtest")#
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.2
## Cargando paquete requerido: zoo
## Warning: package 'zoo' was built under R version 4.5.2
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
dwtest(moda.1)
##
## Durbin-Watson test
##
## data: moda.1
## DW = 2.0718, p-value = 0.6281
## alternative hypothesis: true autocorrelation is greater than 0
## tratando de identificar tendencias ##
mod_tendencia <- lm(Value ~ t, data = ventas)
summary(mod_tendencia)
##
## Call:
## lm(formula = Value ~ t, data = ventas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -68745 -13098 1710 11007 68093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 332131.56 4855.24 68.41 <2e-16 ***
## t 1478.05 63.35 23.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27730 on 130 degrees of freedom
## Multiple R-squared: 0.8072, Adjusted R-squared: 0.8057
## F-statistic: 544.4 on 1 and 130 DF, p-value: < 2.2e-16
spec <- spectrum(ventas$Value, plot = TRUE)

frecuencia <- spec$freq[which.max(spec$spec)]
T <- 1 / frecuencia
omega <- 2*pi / T
omega
## [1] 1.582432
mod_armonico <- lm(
Value ~ t + sin(omega*t) + cos(omega*t),
data = ventas
)
summary(mod_armonico)
##
## Call:
## lm(formula = Value ~ t + sin(omega * t) + cos(omega * t), data = ventas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58354 -16366 -1734 15886 56595
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 332483.48 4253.06 78.175 < 2e-16 ***
## t 1471.91 55.49 26.524 < 2e-16 ***
## sin(omega * t) 7391.28 2978.77 2.481 0.0144 *
## cos(omega * t) 17836.12 3001.77 5.942 2.5e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24290 on 128 degrees of freedom
## Multiple R-squared: 0.8544, Adjusted R-squared: 0.851
## F-statistic: 250.4 on 3 and 128 DF, p-value: < 2.2e-16
AIC(mod_armonico)
## [1] 3046.36
shapiro.test(residuals(mod_armonico))
##
## Shapiro-Wilk normality test
##
## data: residuals(mod_armonico)
## W = 0.98913, p-value = 0.3885
dwtest(mod_armonico)
##
## Durbin-Watson test
##
## data: mod_armonico
## DW = 2.1363, p-value = 0.7546
## alternative hypothesis: true autocorrelation is greater than 0
plot(ventas$t, ventas$Value, type="l", col="red",
ylab="Ventas", xlab="Mes")
lines(ventas$t, fitted(mod_armonico), lty=2, col="blue")

ggqqplot(mod_armonico$residuals)

## Otra aproximaciĂ³n ##
##periodica ##
mod_armonico2 <- lm(
Value ~ t +
sin(omega*t) + cos(omega*t) +
sin(2*omega*t) + cos(2*omega*t) +
sin(3*omega*t) + cos(3*omega*t),
data = ventas
)
summary(mod_armonico2)
##
## Call:
## lm(formula = Value ~ t + sin(omega * t) + cos(omega * t) + sin(2 *
## omega * t) + cos(2 * omega * t) + sin(3 * omega * t) + cos(3 *
## omega * t), data = ventas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57858 -15833 -438 13920 56097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 332342.19 4188.22 79.352 < 2e-16 ***
## t 1474.08 54.65 26.974 < 2e-16 ***
## sin(omega * t) 7383.60 2933.60 2.517 0.0131 *
## cos(omega * t) 17637.04 2956.95 5.965 2.38e-08 ***
## sin(2 * omega * t) 5098.63 2910.99 1.752 0.0823 .
## cos(2 * omega * t) -46.68 2978.63 -0.016 0.9875
## sin(3 * omega * t) 722.45 2933.64 0.246 0.8059
## cos(3 * omega * t) -6546.44 2956.82 -2.214 0.0287 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23920 on 124 degrees of freedom
## Multiple R-squared: 0.8633, Adjusted R-squared: 0.8555
## F-statistic: 111.8 on 7 and 124 DF, p-value: < 2.2e-16
AIC(mod_armonico2)
## [1] 3046.072
plot(ventas$t, ventas$Value, type="l", col="red",
ylab="Ventas", xlab="Mes")
lines(ventas$t, fitted(mod_armonico2), lty=2, col="blue")

ggqqplot(mod_armonico2$residuals)

AIC(mod_armonico)
## [1] 3046.36
AIC(mod_armonico2)
## [1] 3046.072
## nuevamente ##
#install.packages("forecast")#
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Adjuntando el paquete: 'forecast'
## The following object is masked from 'package:ggpubr':
##
## gghistogram
# 1. Convertir la serie a objeto de tiempo con frecuencia 12
Ventas_ts <- ts(ventas$Value, frequency = 12)
Ventas_ts
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
## 1 310188 299412 328526 329854 347728 344405 348071 353392 324646 338610 339363
## 2 314557 310925 360679 356333 365605 358604 361939 362627 345999 355209 365828
## 3 335587 337314 387088 380775 391999 388700 384682 394609 375025 379482 391220
## 4 355184 372401 414149 392949 418608 400975 396026 417922 385609 399400 411065
## 5 375587 373987 421719 408544 437188 414714 422410 435005 396213 415700 423786
## 6 383054 380019 432651 431396 459231 433282 443281 451366 421424 438457 439165
## 7 398027 388063 445970 439637 464785 449794 459533 457905 432782 446593 446773
## 8 402018 415183 461190 451435 470425 465058 462195 472032 448870 453318 467956
## 9 422066 418520 483667 466647 495849 483003 476520 491564 470724 477120 499276
## 10 444701 436018 509325 481516 529261 508292 506514 521962 478595 505194 520956
## 11 458089 443708 515694 509413 547130 518273 532103 545247 496074 525539 535352
## Dec
## 1 400281
## 2 426663
## 3 451821
## 4 462102
## 5 476910
## 6 502330
## 7 519625
## 8 540506
## 9 559854
## 10 559289
## 11 591380
# 2. GrĂ¡fica de la serie
plot(Ventas_ts, col = "red", main = "Serie de Ventas", ylab = "Ventas")

# 3. DescomposiciĂ³n clĂ¡sica
decomp <- decompose(Ventas_ts)
plot(decomp)

# 4. Ajuste automĂ¡tico SARIMA
mod_arima <- auto.arima(Ventas_ts, seasonal = TRUE)
summary(mod_arima)
## Series: Ventas_ts
## ARIMA(3,0,0)(2,1,1)[12] with drift
##
## Coefficients:
## ar1 ar2 ar3 sar1 sar2 sma1 drift
## 0.2062 0.1474 0.2048 0.3771 -0.3016 -0.5088 1455.9955
## s.e. 0.0958 0.0934 0.1011 0.1669 0.1111 0.1578 70.8907
##
## sigma^2 = 48737317: log likelihood = -1230.94
## AIC=2477.87 AICc=2479.17 BIC=2500.17
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 260.3766 6459.264 4986.204 0.03749127 1.160237 0.2776984
## ACF1
## Training set 0.01718056
# 5. DiagnĂ³stico de residuos
checkresiduals(mod_arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,0,0)(2,1,1)[12] with drift
## Q* = 26.054, df = 18, p-value = 0.09854
##
## Model df: 6. Total lags used: 24
# 6. PronĂ³stico a 12 meses
forecast_12 <- forecast(mod_arima, h = 12)
ggqqplot(mod_arima$residuals)
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

plot(forecast_12)

mod_arima <- auto.arima(ts(ventas$Value, frequency = 12), seasonal = TRUE)
summary(mod_arima)
## Series: ts(ventas$Value, frequency = 12)
## ARIMA(3,0,0)(2,1,1)[12] with drift
##
## Coefficients:
## ar1 ar2 ar3 sar1 sar2 sma1 drift
## 0.2062 0.1474 0.2048 0.3771 -0.3016 -0.5088 1455.9955
## s.e. 0.0958 0.0934 0.1011 0.1669 0.1111 0.1578 70.8907
##
## sigma^2 = 48737317: log likelihood = -1230.94
## AIC=2477.87 AICc=2479.17 BIC=2500.17
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 260.3766 6459.264 4986.204 0.03749127 1.160237 0.2776984
## ACF1
## Training set 0.01718056
AIC(mod_arima)
## [1] 2477.874