# install.packages("forecast")
library(forecast)
# install.packages("tidyverse")
library(tidyverse)
# install.packages("ggplot2")
library(ggplot2)
poblacion <- read.csv("C:\\Users\\gamas\\Downloads\\population.csv")
summary(poblacion)
## state year population
## Length:6020 Min. :1900 Min. : 43000
## Class :character 1st Qu.:1930 1st Qu.: 901483
## Mode :character Median :1960 Median : 2359000
## Mean :1960 Mean : 3726003
## 3rd Qu.:1990 3rd Qu.: 4541883
## Max. :2019 Max. :39512223
str(poblacion)
## 'data.frame': 6020 obs. of 3 variables:
## $ state : chr "AK" "AK" "AK" "AK" ...
## $ year : int 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 ...
## $ population: int 135000 158000 189000 205000 215000 222000 224000 231000 224000 224000 ...
head(poblacion)
## state year population
## 1 AK 1950 135000
## 2 AK 1951 158000
## 3 AK 1952 189000
## 4 AK 1953 205000
## 5 AK 1954 215000
## 6 AK 1955 222000
poblacion_texas <- poblacion %>% filter(state=="TX")
ggplot(poblacion_texas, aes(x=year, y=population)) +
geom_line() +
labs(title="Población de Texas", x = "Año",
y="Población")
ts_texas <- ts(poblacion_texas$population, start=1900, frequency=1) #Serie de tiempo anual
ts_texas <- ts(poblacion_texas$population, start=1900, frequency=4) #Serie de tiempo trimestral
ts_texas <- ts(poblacion_texas$population, start=1900, frequency=12) #Serie de tiempo mensual
arima_texas <- auto.arima(ts_texas)
summary(arima_texas)
## Series: ts_texas
## ARIMA(0,2,2)
##
## Coefficients:
## ma1 ma2
## -0.5950 -0.1798
## s.e. 0.0913 0.0951
##
## sigma^2 = 1.031e+10: log likelihood = -1527.14
## AIC=3060.28 AICc=3060.5 BIC=3068.6
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 12147.62 99818.31 59257.39 0.1046163 0.5686743 0.02296775
## ACF1
## Training set -0.02136734
pronostico_texas <- forecast(arima_texas, level=95, h=10)
pronostico_texas
## Point Forecast Lo 95 Hi 95
## Jan 1910 29398472 29199487 29597457
## Feb 1910 29806827 29463665 30149990
## Mar 1910 30215183 29742956 30687410
## Apr 1910 30623538 30024100 31222977
## May 1910 31031894 30303359 31760429
## Jun 1910 31440249 30579246 32301253
## Jul 1910 31848605 30851090 32846119
## Aug 1910 32256960 31118581 33395339
## Sep 1910 32665316 31381587 33949044
## Oct 1910 33073671 31640070 34507272
plot(pronostico_texas, main="Población de Texas")
#install.packages("maps")
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
poblacion <- read.csv("C:\\Users\\gamas\\Downloads\\population.csv")
summary(poblacion)
## state year population
## Length:6020 Min. :1900 Min. : 43000
## Class :character 1st Qu.:1930 1st Qu.: 901483
## Mode :character Median :1960 Median : 2359000
## Mean :1960 Mean : 3726003
## 3rd Qu.:1990 3rd Qu.: 4541883
## Max. :2019 Max. :39512223
str(poblacion)
## 'data.frame': 6020 obs. of 3 variables:
## $ state : chr "AK" "AK" "AK" "AK" ...
## $ year : int 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 ...
## $ population: int 135000 158000 189000 205000 215000 222000 224000 231000 224000 224000 ...
head(poblacion)
## state year population
## 1 AK 1950 135000
## 2 AK 1951 158000
## 3 AK 1952 189000
## 4 AK 1953 205000
## 5 AK 1954 215000
## 6 AK 1955 222000
map(database="state")
map(database="state", regions="Texas", col="red", fill=TRUE, add=TRUE)
map(database="state", regions="New York", col="green", fill=TRUE, add=TRUE)
title(main = "Pronóstico de Población en EE.UU.")
# install.packages("forecast")
library(forecast)
# install.packages("tidyverse")
library(tidyverse)
# install.packages("ggplot2")
library(ggplot2)
ventas <- read.csv("C:\\Users\\gamas\\Downloads\\lechitas1.csv")
ts_ventas <- ts(ventas$Ventas, start=c(2017,1), frequency=12)
autoplot(ts_ventas) + labs(title="Ventas de Leche Saborizada Hershey's", x="Tiempo", y="Miles de Dólares")
arima_ventas <- auto.arima(ts_ventas)
summary(arima_ventas)
## Series: ts_ventas
## ARIMA(1,0,0)(1,1,0)[12] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.6383 -0.5517 288.8980
## s.e. 0.1551 0.2047 14.5026
##
## sigma^2 = 202700: log likelihood = -181.5
## AIC=371 AICc=373.11 BIC=375.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 25.22163 343.863 227.1699 0.08059942 0.7069541 0.06491041
## ACF1
## Training set 0.2081043
pronostico_ventas <- forecast(arima_ventas, level=95, h=10)
pronostico_ventas
## Point Forecast Lo 95 Hi 95
## Jan 2020 35498.90 34616.48 36381.32
## Feb 2020 34202.17 33155.29 35249.05
## Mar 2020 36703.01 35596.10 37809.92
## Apr 2020 36271.90 35141.44 37402.36
## May 2020 37121.98 35982.07 38261.90
## Jun 2020 37102.65 35958.91 38246.40
## Jul 2020 37151.04 36005.74 38296.35
## Aug 2020 38564.65 37418.71 39710.59
## Sep 2020 38755.23 37609.03 39901.42
## Oct 2020 39779.03 38632.73 40925.33
autoplot(pronostico_ventas) + labs(title="Pronóstico de ventas 2020 de Leche Saborizada Hershey's", x="Tiempo", y="Miles de Dólares")
ventas$Mes <- 1:36
regresion_ventas <- lm(Ventas ~ Mes, data=ventas)
summary(regresion_ventas)
##
## Call:
## lm(formula = Ventas ~ Mes, data = ventas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2075.79 -326.41 33.74 458.41 1537.04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24894.67 275.03 90.52 <2e-16 ***
## Mes 298.37 12.96 23.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 808 on 34 degrees of freedom
## Multiple R-squared: 0.9397, Adjusted R-squared: 0.9379
## F-statistic: 529.8 on 1 and 34 DF, p-value: < 2.2e-16
next_year <- data.frame(Mes=37:48)
prediccion_regresion <- predict(regresion_ventas, next_year)
prediccion_regresion
## 1 2 3 4 5 6 7 8
## 35934.49 36232.86 36531.23 36829.61 37127.98 37426.35 37724.73 38023.10
## 9 10 11 12
## 38321.47 38619.85 38918.22 39216.59
plot(ventas$Mes, ventas$Ventas, data=ventas, main="Pronóstico de ventas 2020 de Leche Saborizada Hershey's", xlab="Tiempo", ylab="Miles de Dólares")
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter
abline(regresion_ventas, col="blue")
points(next_year$Mes, prediccion_regresion, col="red")
predicciones_reales <- predict(regresion_ventas, ventas)
predicciones_reales
## 1 2 3 4 5 6 7 8
## 25193.04 25491.42 25789.79 26088.16 26386.54 26684.91 26983.28 27281.66
## 9 10 11 12 13 14 15 16
## 27580.03 27878.40 28176.78 28475.15 28773.52 29071.90 29370.27 29668.64
## 17 18 19 20 21 22 23 24
## 29967.02 30265.39 30563.76 30862.14 31160.51 31458.89 31757.26 32055.63
## 25 26 27 28 29 30 31 32
## 32354.01 32652.38 32950.75 33249.13 33547.50 33845.87 34144.25 34442.62
## 33 34 35 36
## 34740.99 35039.37 35337.74 35636.11
MAPE <- mean(abs((ventas$Ventas -
predicciones_reales)/ventas$Ventas))*100
MAPE
## [1] 2.011298
El mejor modelo que se adapta a la serie es el SARIMA con un MAPE de 0.70%, comparado con la Regresión Lineal con su MAPE de 2.01%.
Para el siguiente año, la proyección de ventas es la siguiente:
#file.choose()
ventas_por_anio <- read.csv("C:\\Users\\gamas\\Downloads\\ventas_por_anios.csv")
ggplot(ventas_por_anio, aes(x=mes, y=ventas,
col=as.factor(anio), group=anio)) +
geom_line() +
labs(title = "Ventas de Leche saborizada Hershey's por Año",
x="Mes", y="Miles de Dólares")
Nuestra recomendación sería realizar campañas publicitarias para aumentar el consumo de leche saborizada Hershey’s en el primer semestre del año.