actv1ST.R

# Series de Tiempo#
#Integrantes: Marybel y Dermin#
setwd("C:/Series de tiempo/Surco/Semana I/Bases")
ventas<- read.csv2("SalesRMBase.csv", header=T, na.strings=-999.)
ventas

##       t  Value
## 1     1 310188
## 2     2 299412
## 3     3 328526
## 4     4 329854
## 5     5 347728
## 6     6 344405
## 7     7 348071
## 8     8 353392
## 9     9 324646
## 10   10 338610
## 11   11 339363
## 12   12 400281
## 13   13 314557
## 14   14 310925
## 15   15 360679
## 16   16 356333
## 17   17 365605
## 18   18 358604
## 19   19 361939
## 20   20 362627
## 21   21 345999
## 22   22 355209
## 23   23 365828
## 24   24 426663
## 25   25 335587
## 26   26 337314
## 27   27 387088
## 28   28 380775
## 29   29 391999
## 30   30 388700
## 31   31 384682
## 32   32 394609
## 33   33 375025
## 34   34 379482
## 35   35 391220
## 36   36 451821
## 37   37 355184
## 38   38 372401
## 39   39 414149
## 40   40 392949
## 41   41 418608
## 42   42 400975
## 43   43 396026
## 44   44 417922
## 45   45 385609
## 46   46 399400
## 47   47 411065
## 48   48 462102
## 49   49 375587
## 50   50 373987
## 51   51 421719
## 52   52 408544
## 53   53 437188
## 54   54 414714
## 55   55 422410
## 56   56 435005
## 57   57 396213
## 58   58 415700
## 59   59 423786
## 60   60 476910
## 61   61 383054
## 62   62 380019
## 63   63 432651
## 64   64 431396
## 65   65 459231
## 66   66 433282
## 67   67 443281
## 68   68 451366
## 69   69 421424
## 70   70 438457
## 71   71 439165
## 72   72 502330
## 73   73 398027
## 74   74 388063
## 75   75 445970
## 76   76 439637
## 77   77 464785
## 78   78 449794
## 79   79 459533
## 80   80 457905
## 81   81 432782
## 82   82 446593
## 83   83 446773
## 84   84 519625
## 85   85 402018
## 86   86 415183
## 87   87 461190
## 88   88 451435
## 89   89 470425
## 90   90 465058
## 91   91 462195
## 92   92 472032
## 93   93 448870
## 94   94 453318
## 95   95 467956
## 96   96 540506
## 97   97 422066
## 98   98 418520
## 99   99 483667
## 100 100 466647
## 101 101 495849
## 102 102 483003
## 103 103 476520
## 104 104 491564
## 105 105 470724
## 106 106 477120
## 107 107 499276
## 108 108 559854
## 109 109 444701
## 110 110 436018
## 111 111 509325
## 112 112 481516
## 113 113 529261
## 114 114 508292
## 115 115 506514
## 116 116 521962
## 117 117 478595
## 118 118 505194
## 119 119 520956
## 120 120 559289
## 121 121 458089
## 122 122 443708
## 123 123 515694
## 124 124 509413
## 125 125 547130
## 126 126 518273
## 127 127 532103
## 128 128 545247
## 129 129 496074
## 130 130 525539
## 131 131 535352
## 132 132 591380

### Confirmamos la base de datos y las variables###
getwd()

## [1] "C:/Series de tiempo/Surco/Semana I/Bases"

list.files()

##  [1] "003semana1.P.html"                      
##  [2] "003semana1.P.md"                        
##  [3] "01 Enve.csv"                            
##  [4] "01 Enverga.xlsx"                        
##  [5] "01 Failtime B.xlsx"                     
##  [6] "02 Failtime.csv"                        
##  [7] "03 USPopu Clase.xlsx"                   
##  [8] "03 USPopu E.xlsx"                       
##  [9] "03 USPopu.csv"                          
## [10] "04 PIBC.csv"                            
## [11] "05 DepAutos.csv"                        
## [12] "05 Depreciacion Autos.xlsx"             
## [13] "06 White Rock Clase Estacionalidad.xlsx"
## [14] "06 WhiteR.csv"                          
## [15] "06 WhiteR.xlsx"                         
## [16] "07 Airpass Class Dic 07.xlsx"           
## [17] "07 Airpass.csv"                         
## [18] "07 Airpass.xlsx"                        
## [19] "Actividad1.ST.Rmd"                      
## [20] "actv1ST.html"                           
## [21] "actv1ST.R"                              
## [22] "actv1ST.spin.R"                         
## [23] "actv1ST.spin.Rmd"                       
## [24] "Otros Clase.xlsx"                       
## [25] "Primer Asignacion Dic 06.xlsx"          
## [26] "SalesRMBase.csv"                        
## [27] "SpotD.csv"                              
## [28] "SpotDuty Class Dic 07.xlsx"             
## [29] "SpotDuty.csv"                           
## [30] "Willow Beetle.xlsx"

dim(ventas)

## [1] 132   2

names(ventas)

## [1] "t"     "Value"

### se grafica vetas vs tiempo medido en meses###
plot(ventas$Value~ventas$t, col="red", lty=2, type="l", 
     ylab="Ventas", xlab="Mes")

### Se grafica la función lineal que se ajusta a la pendiente descrita por: ventas vs t.  este procedimiento se realiza en excel##

yo <- 332132 + 1478*ventas$t 

lines(ventas$t, yo, col="blue", lwd=2)

dim(ventas)

## [1] 132   2

names(ventas)

## [1] "t"     "Value"

### Linear Model on time 
moda.1<-lm(Value~t,data=ventas)
summary(moda.1)

## 
## Call:
## lm(formula = Value ~ t, data = ventas)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -68745 -13098   1710  11007  68093 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 332131.56    4855.24   68.41   <2e-16 ***
## t             1478.05      63.35   23.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27730 on 130 degrees of freedom
## Multiple R-squared:  0.8072, Adjusted R-squared:  0.8057 
## F-statistic: 544.4 on 1 and 130 DF,  p-value: < 2.2e-16

AIC(moda.1)

## [1] 3079.403

##inserta una columna con los valores de predicción en el modelo 1##
ventas$Value_pred_lineal <- predict(moda.1)
head(ventas)

##   t  Value Value_pred_lineal
## 1 1 310188          333609.6
## 2 2 299412          335087.7
## 3 3 328526          336565.7
## 4 4 329854          338043.7
## 5 5 347728          339521.8
## 6 6 344405          340999.8

## valor para dic de 2020 con el modelo ajustado##
predict(moda.1, newdata = data.frame(t = 144))

##        1 
## 544970.3

#install.packages("ggpubr")#
library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.5.2

## Cargando paquete requerido: ggplot2

ggqqplot(moda.1$residuals)

shapiro.test(moda.1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  moda.1$residuals
## W = 0.95269, p-value = 0.0001623

#install.packages("lmtest")#
library(lmtest)

## Warning: package 'lmtest' was built under R version 4.5.2

## Cargando paquete requerido: zoo

## Warning: package 'zoo' was built under R version 4.5.2

## 
## Adjuntando el paquete: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

dwtest(moda.1)

## 
##  Durbin-Watson test
## 
## data:  moda.1
## DW = 2.0718, p-value = 0.6281
## alternative hypothesis: true autocorrelation is greater than 0

## tratando de identificar tendencias ##
mod_tendencia <- lm(Value ~ t, data = ventas)
summary(mod_tendencia)

## 
## Call:
## lm(formula = Value ~ t, data = ventas)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -68745 -13098   1710  11007  68093 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 332131.56    4855.24   68.41   <2e-16 ***
## t             1478.05      63.35   23.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27730 on 130 degrees of freedom
## Multiple R-squared:  0.8072, Adjusted R-squared:  0.8057 
## F-statistic: 544.4 on 1 and 130 DF,  p-value: < 2.2e-16

spec <- spectrum(ventas$Value, plot = TRUE)

frecuencia <- spec$freq[which.max(spec$spec)]
T <- 1 / frecuencia
omega <- 2*pi / T
omega

## [1] 1.582432

mod_armonico <- lm(
  Value ~ t + sin(omega*t) + cos(omega*t),
  data = ventas
)

summary(mod_armonico)

## 
## Call:
## lm(formula = Value ~ t + sin(omega * t) + cos(omega * t), data = ventas)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -58354 -16366  -1734  15886  56595 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    332483.48    4253.06  78.175  < 2e-16 ***
## t                1471.91      55.49  26.524  < 2e-16 ***
## sin(omega * t)   7391.28    2978.77   2.481   0.0144 *  
## cos(omega * t)  17836.12    3001.77   5.942  2.5e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24290 on 128 degrees of freedom
## Multiple R-squared:  0.8544, Adjusted R-squared:  0.851 
## F-statistic: 250.4 on 3 and 128 DF,  p-value: < 2.2e-16

AIC(mod_armonico)

## [1] 3046.36

shapiro.test(residuals(mod_armonico))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(mod_armonico)
## W = 0.98913, p-value = 0.3885

dwtest(mod_armonico)

## 
##  Durbin-Watson test
## 
## data:  mod_armonico
## DW = 2.1363, p-value = 0.7546
## alternative hypothesis: true autocorrelation is greater than 0

plot(ventas$t, ventas$Value, type="l", col="red",
     ylab="Ventas", xlab="Mes")

lines(ventas$t, fitted(mod_armonico), lty=2, col="blue")

ggqqplot(mod_armonico$residuals)

## Otra aproximación ##
##periodica ##
mod_armonico2 <- lm(
  Value ~ t +
    sin(omega*t) + cos(omega*t) +
    sin(2*omega*t) + cos(2*omega*t) +
    sin(3*omega*t) + cos(3*omega*t),
  data = ventas
)

summary(mod_armonico2)

## 
## Call:
## lm(formula = Value ~ t + sin(omega * t) + cos(omega * t) + sin(2 * 
##     omega * t) + cos(2 * omega * t) + sin(3 * omega * t) + cos(3 * 
##     omega * t), data = ventas)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -57858 -15833   -438  13920  56097 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        332342.19    4188.22  79.352  < 2e-16 ***
## t                    1474.08      54.65  26.974  < 2e-16 ***
## sin(omega * t)       7383.60    2933.60   2.517   0.0131 *  
## cos(omega * t)      17637.04    2956.95   5.965 2.38e-08 ***
## sin(2 * omega * t)   5098.63    2910.99   1.752   0.0823 .  
## cos(2 * omega * t)    -46.68    2978.63  -0.016   0.9875    
## sin(3 * omega * t)    722.45    2933.64   0.246   0.8059    
## cos(3 * omega * t)  -6546.44    2956.82  -2.214   0.0287 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23920 on 124 degrees of freedom
## Multiple R-squared:  0.8633, Adjusted R-squared:  0.8555 
## F-statistic: 111.8 on 7 and 124 DF,  p-value: < 2.2e-16

AIC(mod_armonico2)

## [1] 3046.072

plot(ventas$t, ventas$Value, type="l", col="red",
     ylab="Ventas", xlab="Mes")

lines(ventas$t, fitted(mod_armonico2), lty=2, col="blue")

ggqqplot(mod_armonico2$residuals)

AIC(mod_armonico)

## [1] 3046.36

AIC(mod_armonico2)

## [1] 3046.072

## nuevamente ##
#install.packages("forecast")#
library(forecast)

## Warning: package 'forecast' was built under R version 4.5.2

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## 
## Adjuntando el paquete: 'forecast'

## The following object is masked from 'package:ggpubr':
## 
##     gghistogram

# 1. Convertir la serie a objeto de tiempo con frecuencia 12
Ventas_ts <- ts(ventas$Value, frequency = 12)
Ventas_ts

##       Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct    Nov
## 1  310188 299412 328526 329854 347728 344405 348071 353392 324646 338610 339363
## 2  314557 310925 360679 356333 365605 358604 361939 362627 345999 355209 365828
## 3  335587 337314 387088 380775 391999 388700 384682 394609 375025 379482 391220
## 4  355184 372401 414149 392949 418608 400975 396026 417922 385609 399400 411065
## 5  375587 373987 421719 408544 437188 414714 422410 435005 396213 415700 423786
## 6  383054 380019 432651 431396 459231 433282 443281 451366 421424 438457 439165
## 7  398027 388063 445970 439637 464785 449794 459533 457905 432782 446593 446773
## 8  402018 415183 461190 451435 470425 465058 462195 472032 448870 453318 467956
## 9  422066 418520 483667 466647 495849 483003 476520 491564 470724 477120 499276
## 10 444701 436018 509325 481516 529261 508292 506514 521962 478595 505194 520956
## 11 458089 443708 515694 509413 547130 518273 532103 545247 496074 525539 535352
##       Dec
## 1  400281
## 2  426663
## 3  451821
## 4  462102
## 5  476910
## 6  502330
## 7  519625
## 8  540506
## 9  559854
## 10 559289
## 11 591380

# 2. Gráfica de la serie
plot(Ventas_ts, col = "red", main = "Serie de Ventas", ylab = "Ventas")

# 3. Descomposición clásica
decomp <- decompose(Ventas_ts)
plot(decomp)

# 4. Ajuste automático SARIMA
mod_arima <- auto.arima(Ventas_ts, seasonal = TRUE)

summary(mod_arima)

## Series: Ventas_ts 
## ARIMA(3,0,0)(2,1,1)[12] with drift 
## 
## Coefficients:
##          ar1     ar2     ar3    sar1     sar2     sma1      drift
##       0.2062  0.1474  0.2048  0.3771  -0.3016  -0.5088  1455.9955
## s.e.  0.0958  0.0934  0.1011  0.1669   0.1111   0.1578    70.8907
## 
## sigma^2 = 48737317:  log likelihood = -1230.94
## AIC=2477.87   AICc=2479.17   BIC=2500.17
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 260.3766 6459.264 4986.204 0.03749127 1.160237 0.2776984
##                    ACF1
## Training set 0.01718056

# 5. Diagnóstico de residuos
checkresiduals(mod_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,0)(2,1,1)[12] with drift
## Q* = 26.054, df = 18, p-value = 0.09854
## 
## Model df: 6.   Total lags used: 24

# 6. Pronóstico a 12 meses
forecast_12 <- forecast(mod_arima, h = 12)
ggqqplot(mod_arima$residuals)

## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

plot(forecast_12)

mod_arima <- auto.arima(ts(ventas$Value, frequency = 12), seasonal = TRUE)
summary(mod_arima)

## Series: ts(ventas$Value, frequency = 12) 
## ARIMA(3,0,0)(2,1,1)[12] with drift 
## 
## Coefficients:
##          ar1     ar2     ar3    sar1     sar2     sma1      drift
##       0.2062  0.1474  0.2048  0.3771  -0.3016  -0.5088  1455.9955
## s.e.  0.0958  0.0934  0.1011  0.1669   0.1111   0.1578    70.8907
## 
## sigma^2 = 48737317:  log likelihood = -1230.94
## AIC=2477.87   AICc=2479.17   BIC=2500.17
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 260.3766 6459.264 4986.204 0.03749127 1.160237 0.2776984
##                    ACF1
## Training set 0.01718056

AIC(mod_arima)

## [1] 2477.874

actv1ST.R

DERMIN

2025-12-05