#Instalación de Librerías
#Análisis de la base de datos
####### Load Data #######
avocado <- read.csv("avocado.csv")
####### Processing ######
str(avocado)
## 'data.frame': 18249 obs. of 13 variables:
## $ Date : chr "27/12/2015" "20/12/2015" "13/12/2015" "06/12/2015" ...
## $ AveragePrice: num 1.33 1.35 0.93 1.08 1.28 1.26 0.99 0.98 1.02 1.07 ...
## $ Total.Volume: num 64237 54877 118220 78992 51040 ...
## $ X4046 : num 1037 674 795 1132 941 ...
## $ X4225 : num 54455 44639 109150 71976 43838 ...
## $ X4770 : num 48.2 58.3 130.5 72.6 75.8 ...
## $ Total.Bags : num 8697 9506 8145 5811 6184 ...
## $ Small.Bags : num 8604 9408 8042 5677 5986 ...
## $ Large.Bags : num 93.2 97.5 103.1 133.8 197.7 ...
## $ XLarge.Bags : num 0 0 0 0 0 0 0 0 0 0 ...
## $ type : chr "conventional" "conventional" "conventional" "conventional" ...
## $ year : int 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
## $ region : chr "Albany" "Albany" "Albany" "Albany" ...
avocado$year = as.factor(avocado$year)
avocado$Date = as.Date(avocado$Date, "%d/%m/%Y")
avocado$month = factor(months(avocado$Date), levels = month.name)
grouped = avocado %>%
group_by(year, month, type) %>%
select(year, month, type,AveragePrice) %>%
summarise(averagePrice = mean(AveragePrice))
## `summarise()` has grouped output by 'year', 'month'. You can override using the
## `.groups` argument.
#Visualización básica
options(repr.plot.width= 12, repr.plot.height=5)
ggplot(data=grouped,aes(x=month, y=averagePrice, colour=year,group = year)) +
labs(colour = "Year", x = "Month", y ="Average Price", title = "Line Plot - Average monthly prices of avocado by avocado type for each year")+
geom_line() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_grid(. ~grouped$type)
#avocado$region <- strip(avocado$region)
#US_avocado <- avocado[avocado$region=="totalus",] ## Isolate rows only containing totalus
avocado_price <- avocado[avocado$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]
##División de base de datos
avocadoCities <- avocado %>% filter(avocado$region != "California")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "West")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "Plains")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "SouthCentral")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "GreatLakes")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "Northeast")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "Midsouth")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "Southeast")
avocadoRegionsC <- avocado %>% filter(avocado$region == "California")
avocadoRegionsW <- avocado %>% filter(avocado$region == "West")
avocadoRegionsP <- avocado %>% filter(avocado$region == "Plains")
avocadoRegionsSC <- avocado %>% filter(avocado$region == "SouthCentral")
avocadoRegionsG <- avocado %>% filter(avocado$region == "GreatLakes")
avocadoRegionsN <- avocado %>% filter(avocado$region == "Northeast")
avocadoRegionsM <- avocado %>% filter(avocado$region == "Midsouth")
avocadoRegionsS <- avocado %>% filter(avocado$region == "Southeast")
avocadoRegions <- rbind(avocadoRegionsC, avocadoRegionsW)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsP)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsSC)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsG)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsN)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsM)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsS)
#rm(avocadoRegionsW)
#rm(avocadoRegionsP)
#rm(avocadoRegionsSC)
#rm(avocadoRegionsG)
#rm(avocadoRegionsN)
#rm(avocadoRegionsM)
#rm(avocadoRegionsS)
##Coordenadas para elaboración de mapa de calor
En esta seción estaremos analizando un primer aproximado de lo que es la venta promedio del aguacate a lo largo del tiempo, comparando lo que es el aguacate normal contra el convencional. Podemos observar que el aguacate mas caro es el orgánico por naturaleza y se da el precio mas alto en septiembre del 2017. Lo cuál puede determinar alguna escacez de producción que pudo haber habido o alta demanda en aquel tiempo. Una vez teniendo esta aproximación de como funcionan nuestros datos, podemos proceder a realizar los análisis de regresiones.
En esta sección manipulamos los datos para que se puedan adaptar a nuestras visualizaciones.
week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)
fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
matplot(1:29, cbind(week_price[141:169], pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "autoARIMA")
#Podemos observar que es un plot que no es lo más optimo por lo tanto utilizaremos el ARIMA manual para observar los resultados de cambios.
fweekcast <- forecast(fit.week, h = 29)
{plot(fweekcast, main = expression("auto.ARIMA(1,0,3)(1,1,0)"[52]))
lines(ts(week_price))}
man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")
fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
lines(ts(week_price))}
avocado_price <- avocadoRegionsC[avocadoRegionsC$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]
week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)
fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")
fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
lines(ts(week_price))}
avocadoRegionsC <- as.data.frame(fmanweek1)
avocadoRegionsC$lat <- 38.5816
avocadoRegionsC$lon <- 121.4944
avocado_price <- avocadoRegionsW[avocadoRegionsW$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]
week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)
fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")
fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
lines(ts(week_price))}
avocadoRegionsW <- as.data.frame(fmanweek1)
avocadoRegionsW$lat <- 39.5994
avocadoRegionsW$lon <- 110.8107
avocado_price <- avocadoRegionsP[avocadoRegionsP$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]
week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)
fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")
fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
lines(ts(week_price))}
avocadoRegionsP <- as.data.frame(fmanweek1)
avocadoRegionsP$lat <- 43.9695
avocadoRegionsP$lon <- 99.9018
avocado_price <- avocadoRegionsSC[avocadoRegionsSC$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]
week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)
fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")
fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
lines(ts(week_price))}
avocadoRegionsSC <- as.data.frame(fmanweek1)
avocadoRegionsSC$lat <- 34.6036
avocadoRegionsSC$lon <- 98.3959
avocado_price <- avocadoRegionsG[avocadoRegionsG$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]
week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)
fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")
fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
lines(ts(week_price))}
avocadoRegionsG <- as.data.frame(fmanweek1)
avocadoRegionsG$lat <- 41.8781
avocadoRegionsG$lon <- 87.6298
avocado_price <- avocadoRegionsN[avocadoRegionsN$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]
week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)
fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")
fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
lines(ts(week_price))}
avocadoRegionsN <- as.data.frame(fmanweek1)
avocadoRegionsN$lat <- 40.7831
avocadoRegionsN$lon <- 73.9712
avocado_price <- avocadoRegionsM[avocadoRegionsM$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]
week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)
fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")
fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
lines(ts(week_price))}
avocadoRegionsM <- as.data.frame(fmanweek1)
avocadoRegionsM$lat <- 36.1627
avocadoRegionsM$lon <- 86.7816
avocado_price <- avocadoRegionsS[avocadoRegionsS$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]
week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)
fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")
fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
lines(ts(week_price))}
avocadoRegionsS <- as.data.frame(fmanweek1)
avocadoRegionsS$lat <- 33.0362
avocadoRegionsS$lon <- 85.0322
avocadoRegions <- rbind(avocadoRegionsC, avocadoRegionsW)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsP)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsSC)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsG)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsN)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsM)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsS)
write.csv(avocadoRegions, "avocadoRegions.csv")
Aunque South Central está cerca de México, no consumen tanto como otras regiones más lejanas, como el noreste de Estados Unidos.
En este caso, es muy importante hablar de la estacionalidad. Sobre todo para eventos donde el aguacate termina siendo muy utilizado. Por ejemplo en febrero en temporadas de super bowl en todo el mundo, o septiembre el caso de mexico y todas sus festividades del mes patrio. Por lo tanto nuestro modelo arima también pronóstica altas ventas para estas fechas en zonas correspondidas.