#install.packages("forecast")
library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

#install.packages("tidyverse")
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

#install.packages("ggplot2")
library(ggplot2)

#install.packages("maps")
library(maps)

## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map

#install.packages("readxl")
library(readxl)

Importar la base de datos

#file.choose()
poblacion<- read.csv("/Users/nahomi/Desktop/GENERACIÓN DE ESCENARIOS /MODULO1/population.csv")

Entender la base de datos

summary(poblacion)

##     state                year        population      
##  Length:6020        Min.   :1900   Min.   :   43000  
##  Class :character   1st Qu.:1930   1st Qu.:  901483  
##  Mode  :character   Median :1960   Median : 2359000  
##                     Mean   :1960   Mean   : 3726003  
##                     3rd Qu.:1990   3rd Qu.: 4541883  
##                     Max.   :2019   Max.   :39512223

str(poblacion)

## 'data.frame':    6020 obs. of  3 variables:
##  $ state     : chr  "AK" "AK" "AK" "AK" ...
##  $ year      : int  1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 ...
##  $ population: int  135000 158000 189000 205000 215000 222000 224000 231000 224000 224000 ...

head(poblacion)

##   state year population
## 1    AK 1950     135000
## 2    AK 1951     158000
## 3    AK 1952     189000
## 4    AK 1953     205000
## 5    AK 1954     215000
## 6    AK 1955     222000

Serie de tiempo en Texas

poblacion_texas <- poblacion%>% filter(state=="TX")
ggplot(poblacion_texas, aes(x=year, y=population)) +
  geom_line() +
  labs(title="Población de Texas", x="Año",
       y ="Poblacion")

ts_texas <- ts(poblacion_texas$population, start=1990, frequency = 1)


arima_texas <- auto.arima(ts_texas)
summary(arima_texas)

## Series: ts_texas 
## ARIMA(0,2,2) 
## 
## Coefficients:
##           ma1      ma2
##       -0.5950  -0.1798
## s.e.   0.0913   0.0951
## 
## sigma^2 = 1.031e+10:  log likelihood = -1527.14
## AIC=3060.28   AICc=3060.5   BIC=3068.6
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE      MAPE      MASE
## Training set 12147.62 99818.31 59257.39 0.1046163 0.5686743 0.2672197
##                     ACF1
## Training set -0.02136734

pronostico_texas <- forecast(arima_texas, level=95, h=10)
pronostico_texas

##      Point Forecast    Lo 95    Hi 95
## 2110       29398472 29199487 29597457
## 2111       29806827 29463665 30149990
## 2112       30215183 29742956 30687410
## 2113       30623538 30024100 31222977
## 2114       31031894 30303359 31760429
## 2115       31440249 30579246 32301253
## 2116       31848605 30851090 32846119
## 2117       32256960 31118581 33395339
## 2118       32665316 31381587 33949044
## 2119       33073671 31640070 34507272

plot(pronostico_texas, main="Población en Texas")

PT 2 EJERCICIO CON MAPA 17 FEB

#install.packages("forecast")
library(forecast)

#install.packages("tidyverse")
library(tidyverse)

#install.packages("maps")  
library(maps)

#file.choose()
poblacion<- read.csv("/Users/nahomi/Desktop/GENERACIÓN DE ESCENARIOS /MODULO1/population.csv")

summary(poblacion)

##     state                year        population      
##  Length:6020        Min.   :1900   Min.   :   43000  
##  Class :character   1st Qu.:1930   1st Qu.:  901483  
##  Mode  :character   Median :1960   Median : 2359000  
##                     Mean   :1960   Mean   : 3726003  
##                     3rd Qu.:1990   3rd Qu.: 4541883  
##                     Max.   :2019   Max.   :39512223

str(poblacion)

## 'data.frame':    6020 obs. of  3 variables:
##  $ state     : chr  "AK" "AK" "AK" "AK" ...
##  $ year      : int  1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 ...
##  $ population: int  135000 158000 189000 205000 215000 222000 224000 231000 224000 224000 ...

head(poblacion)

##   state year population
## 1    AK 1950     135000
## 2    AK 1951     158000
## 3    AK 1952     189000
## 4    AK 1953     205000
## 5    AK 1954     215000
## 6    AK 1955     222000

Serie de tiempo en Texas

poblacion_texas <- poblacion%>% filter(state=="TX")
ggplot(poblacion_texas, aes(x=year, y=population)) +
  geom_line() +
  labs(title="Población de Texas", x="Año",
       y ="Población")

ts_texas <- ts(poblacion_texas$population, start=1990, frequency = 1)


arima_texas <- auto.arima(ts_texas)
summary(arima_texas)

## Series: ts_texas 
## ARIMA(0,2,2) 
## 
## Coefficients:
##           ma1      ma2
##       -0.5950  -0.1798
## s.e.   0.0913   0.0951
## 
## sigma^2 = 1.031e+10:  log likelihood = -1527.14
## AIC=3060.28   AICc=3060.5   BIC=3068.6
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE      MAPE      MASE
## Training set 12147.62 99818.31 59257.39 0.1046163 0.5686743 0.2672197
##                     ACF1
## Training set -0.02136734

pronostico_texas <- forecast(arima_texas, level=95, h=10)
pronostico_texas

##      Point Forecast    Lo 95    Hi 95
## 2110       29398472 29199487 29597457
## 2111       29806827 29463665 30149990
## 2112       30215183 29742956 30687410
## 2113       30623538 30024100 31222977
## 2114       31031894 30303359 31760429
## 2115       31440249 30579246 32301253
## 2116       31848605 30851090 32846119
## 2117       32256960 31118581 33395339
## 2118       32665316 31381587 33949044
## 2119       33073671 31640070 34507272

plot(pronostico_texas, main="Población en Texas")

Crear mapa

#Crear un mapa de EUA por década, con un gradiente azul-rojo de la población por estado desde 1950 hasta 2050
map(database="state")
map(database="state", regions="texas", col="blue", fill=TRUE, add=TRUE)
map(database="state", regions="new york", col="red", fill=TRUE, add=TRUE)
title(main="Pronóstico de Población de EE.UU.")

EJERCICIO CLASE 18 FEB

Las ventas de leche saborizada

ventas <- read_excel("/Users/nahomi/Desktop/GENERACIÓN DE ESCENARIOS /MODULO1/Ventas_Históricas_Lechitas.xlsx")

1. Modelo AUTO.ARIMA

ts_ventas <- ts(ventas$Ventas, start=c(2017,1), frequency=12)
autoplot(ts_ventas) + labs(title= "Ventas de Leche Saborizada Hershey's", x="Tiempo", y="Miles de Dólares")

arima_ventas <- auto.arima(ts_ventas)
summary(arima_ventas)

## Series: ts_ventas 
## ARIMA(1,0,0)(1,1,0)[12] with drift 
## 
## Coefficients:
##          ar1     sar1     drift
##       0.6383  -0.5517  288.8979
## s.e.  0.1551   0.2047   14.5026
## 
## sigma^2 = 202701:  log likelihood = -181.5
## AIC=371   AICc=373.11   BIC=375.72
## 
## Training set error measures:
##                    ME    RMSE    MAE        MPE      MAPE       MASE      ACF1
## Training set 25.22158 343.864 227.17 0.08059932 0.7069542 0.06491044 0.2081026

pronostico_ventas <- forecast(arima_ventas, level=95, h=12)
pronostico_ventas

##          Point Forecast    Lo 95    Hi 95
## Jan 2020       35498.90 34616.48 36381.32
## Feb 2020       34202.17 33155.28 35249.05
## Mar 2020       36703.01 35596.10 37809.92
## Apr 2020       36271.90 35141.44 37402.36
## May 2020       37121.98 35982.07 38261.90
## Jun 2020       37102.65 35958.90 38246.40
## Jul 2020       37151.04 36005.73 38296.34
## Aug 2020       38564.64 37418.70 39710.58
## Sep 2020       38755.22 37609.03 39901.42
## Oct 2020       39779.02 38632.72 40925.32
## Nov 2020       38741.63 37595.28 39887.97
## Dec 2020       38645.86 37499.50 39792.22

autoplot(pronostico_ventas) + labs(title= "Pronóstico de Ventas 2020 de Leche Saborizada Hershey's", x="Tiempo", y="Miles de Dólares")

2. Modelo Regresión Lineal

ventas$mes <- 1:36
regresion_ventas <- lm(Ventas ~ mes, data=ventas)
summary(regresion_ventas)

## 
## Call:
## lm(formula = Ventas ~ mes, data = ventas)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2075.79  -326.41    33.74   458.40  1537.04 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 24894.67     275.03   90.52   <2e-16 ***
## mes           298.37      12.96   23.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 808 on 34 degrees of freedom
## Multiple R-squared:  0.9397, Adjusted R-squared:  0.9379 
## F-statistic: 529.8 on 1 and 34 DF,  p-value: < 2.2e-16

siguiente_anio <- data.frame(mes=37:48)
prediccion_regresion <- predict(regresion_ventas, siguiente_anio)
prediccion_regresion

##        1        2        3        4        5        6        7        8 
## 35934.49 36232.86 36531.23 36829.61 37127.98 37426.35 37724.73 38023.10 
##        9       10       11       12 
## 38321.47 38619.85 38918.22 39216.59

plot(ventas$mes, ventas$Ventas, main="Pronóstico de Ventas 2020 de Leche Saborizada Hershey's", xlab="Tiempo", ylab="Miles de Dólares")
abline(regresion_ventas, col ="blue")
points(siguiente_anio$mes, prediccion_regresion, col = "red")

predicciones_reales <- predict(regresion_ventas, ventas)
MAPE <- mean(abs((ventas$Ventas - predicciones_reales)/ventas$Ventas))*100
MAPE

## [1] 2.011297

## 1. ¿Cual es el modelo que mejor se adapta a la serie?
#El mejor modelo que se adapta a la serie es el **SARIMA** con un MAPE de 0.71%. Comparado con la regresión lineal que su MAPE es de 2.01%.  

## 3. 
#Para el siguiente año, la proyección de ventas es la siguiente:  
# | Mes | Escenario esperado | Escenario pesimista | Escenario optimista |

3. Conclusiones

ventas_por_anio <- read.csv("/Users/nahomi/Desktop/GENERACIÓN DE ESCENARIOS /MODULO1/ventas_por_anio.csv")
ggplot(ventas_por_anio, aes(x=mes, y=ventas,
col=as.factor(anio), group=anio)) + 
  geom_line() +
  labs(title = "Ventas de Leche Saborizada Hershey's por Año", x="Mes", y="Miles de Dólares")

# Nuestra recomendación es realizar campañas publicitarias para aumentar el consumo de leche saborizada Hershey's en el primer semestre del año.

Actividad 2. Generación de escenarios futuros con modelos de pronósticos en series de tiempo.

Nahomi Yolanda Miguel Pulido A01656834

2025-02-17

Importar la base de datos

Entender la base de datos

Serie de tiempo en Texas

PT 2 EJERCICIO CON MAPA 17 FEB

Serie de tiempo en Texas

Crear mapa

EJERCICIO CLASE 18 FEB

Las ventas de leche saborizada

1. Modelo AUTO.ARIMA

2. Modelo Regresión Lineal

3. Conclusiones