#Instalación de Librerías

#Análisis de la base de datos

####### Load Data #######
avocado <- read.csv("avocado.csv")
####### Processing ######
str(avocado)
## 'data.frame':    18249 obs. of  13 variables:
##  $ Date        : chr  "27/12/2015" "20/12/2015" "13/12/2015" "06/12/2015" ...
##  $ AveragePrice: num  1.33 1.35 0.93 1.08 1.28 1.26 0.99 0.98 1.02 1.07 ...
##  $ Total.Volume: num  64237 54877 118220 78992 51040 ...
##  $ X4046       : num  1037 674 795 1132 941 ...
##  $ X4225       : num  54455 44639 109150 71976 43838 ...
##  $ X4770       : num  48.2 58.3 130.5 72.6 75.8 ...
##  $ Total.Bags  : num  8697 9506 8145 5811 6184 ...
##  $ Small.Bags  : num  8604 9408 8042 5677 5986 ...
##  $ Large.Bags  : num  93.2 97.5 103.1 133.8 197.7 ...
##  $ XLarge.Bags : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ type        : chr  "conventional" "conventional" "conventional" "conventional" ...
##  $ year        : int  2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
##  $ region      : chr  "Albany" "Albany" "Albany" "Albany" ...
avocado$year = as.factor(avocado$year)
avocado$Date = as.Date(avocado$Date, "%d/%m/%Y")
avocado$month  = factor(months(avocado$Date), levels = month.name)

grouped = avocado %>% 
  group_by(year, month, type) %>% 
  select(year, month, type,AveragePrice) %>%
  summarise(averagePrice = mean(AveragePrice))
## `summarise()` has grouped output by 'year', 'month'. You can override using the
## `.groups` argument.

#Visualización básica

options(repr.plot.width= 12, repr.plot.height=5)
ggplot(data=grouped,aes(x=month, y=averagePrice, colour=year,group = year)) +
  labs(colour = "Year", x = "Month", y ="Average Price", title = "Line Plot - Average monthly prices of avocado by avocado type for each year")+
  geom_line() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_grid(. ~grouped$type)

#avocado$region <- strip(avocado$region)
#US_avocado <- avocado[avocado$region=="totalus",] ## Isolate rows only containing totalus
avocado_price <- avocado[avocado$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]

##División de base de datos

avocadoCities <- avocado %>% filter(avocado$region != "California")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "West")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "Plains")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "SouthCentral")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "GreatLakes")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "Northeast")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "Midsouth")
avocadoCities <- avocadoCities %>% filter(avocadoCities$region != "Southeast")

avocadoRegionsC <- avocado %>% filter(avocado$region == "California")
avocadoRegionsW <- avocado %>% filter(avocado$region == "West")
avocadoRegionsP <- avocado %>% filter(avocado$region == "Plains")
avocadoRegionsSC <- avocado %>% filter(avocado$region == "SouthCentral")
avocadoRegionsG <- avocado %>% filter(avocado$region == "GreatLakes")
avocadoRegionsN <- avocado %>% filter(avocado$region == "Northeast")
avocadoRegionsM <- avocado %>% filter(avocado$region == "Midsouth")
avocadoRegionsS <- avocado %>% filter(avocado$region == "Southeast")

avocadoRegions <- rbind(avocadoRegionsC, avocadoRegionsW)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsP)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsSC)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsG)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsN)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsM)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsS)

#rm(avocadoRegionsW)
#rm(avocadoRegionsP)
#rm(avocadoRegionsSC)
#rm(avocadoRegionsG)
#rm(avocadoRegionsN)
#rm(avocadoRegionsM)
#rm(avocadoRegionsS)

##Coordenadas para elaboración de mapa de calor

En esta seción estaremos analizando un primer aproximado de lo que es la venta promedio del aguacate a lo largo del tiempo, comparando lo que es el aguacate normal contra el convencional. Podemos observar que el aguacate mas caro es el orgánico por naturaleza y se da el precio mas alto en septiembre del 2017. Lo cuál puede determinar alguna escacez de producción que pudo haber habido o alta demanda en aquel tiempo. Una vez teniendo esta aproximación de como funcionan nuestros datos, podemos proceder a realizar los análisis de regresiones.

En esta sección manipulamos los datos para que se puedan adaptar a nuestras visualizaciones.

week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)

autoARIMA Forecast plot (cancelled)

fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)
matplot(1:29, cbind(week_price[141:169], pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "autoARIMA")

#Podemos observar que es un plot que no es lo más optimo por lo tanto utilizaremos el ARIMA manual para observar los resultados de cambios.

fweekcast <- forecast(fit.week, h = 29)
{plot(fweekcast, main = expression("auto.ARIMA(1,0,3)(1,1,0)"[52]))
  lines(ts(week_price))}

Manual ARIMA

man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")

fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
  lines(ts(week_price))}

Como podemos observar hacer el arima manual resultó más satisfactorio, ya que la posibilidad de error es mucho menor al modelo anterior.

Ahora un ARIMA para todas las regiones

California

avocado_price <- avocadoRegionsC[avocadoRegionsC$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]

week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)

fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)

man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")

fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
  lines(ts(week_price))}

avocadoRegionsC <- as.data.frame(fmanweek1)
avocadoRegionsC$lat <- 38.5816
avocadoRegionsC$lon <- 121.4944

West

avocado_price <- avocadoRegionsW[avocadoRegionsW$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]

week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)

fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)

man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")

fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
  lines(ts(week_price))}

avocadoRegionsW <- as.data.frame(fmanweek1)
avocadoRegionsW$lat <- 39.5994
avocadoRegionsW$lon <- 110.8107

Plains

avocado_price <- avocadoRegionsP[avocadoRegionsP$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]

week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)

fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)

man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")

fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
  lines(ts(week_price))}

avocadoRegionsP <- as.data.frame(fmanweek1)
avocadoRegionsP$lat <- 43.9695
avocadoRegionsP$lon <- 99.9018

South Central

avocado_price <- avocadoRegionsSC[avocadoRegionsSC$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]

week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)

fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)

man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")

fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
  lines(ts(week_price))}

avocadoRegionsSC <- as.data.frame(fmanweek1)
avocadoRegionsSC$lat <- 34.6036
avocadoRegionsSC$lon <- 98.3959

Great Lakes

avocado_price <- avocadoRegionsG[avocadoRegionsG$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]

week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)

fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)

man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")

fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
  lines(ts(week_price))}

avocadoRegionsG <- as.data.frame(fmanweek1)
avocadoRegionsG$lat <- 41.8781
avocadoRegionsG$lon <- 87.6298

Northeast

avocado_price <- avocadoRegionsN[avocadoRegionsN$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]

week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)

fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)

man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")

fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
  lines(ts(week_price))}

avocadoRegionsN <- as.data.frame(fmanweek1)
avocadoRegionsN$lat <- 40.7831
avocadoRegionsN$lon <- 73.9712

Midsouth

avocado_price <- avocadoRegionsM[avocadoRegionsM$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]

week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)

fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)

man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")

fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
  lines(ts(week_price))}

avocadoRegionsM <- as.data.frame(fmanweek1)
avocadoRegionsM$lat <- 36.1627
avocadoRegionsM$lon <- 86.7816

Southeast

avocado_price <- avocadoRegionsS[avocadoRegionsS$type=="conventional",] ## Isolate only conventional avocados
avocado_price <- avocado_price[order(as.Date(avocado_price$Date, format = "%d/%m/%Y")),]

week_price <- ts(na.omit(avocado_price$AveragePrice), frequency = 52)
decomp <- stl(week_price, s.window = "periodic")
week_price.diff <- diff(avocado_price$AveragePrice, difference = 1)

fit.week <- arima(week_price[1:140], order = c(0,1,0), seasonal = list(order=c(0,1,1), period = 52))
pre.week <- predict(fit.week, n.ahead = 29)

man.fit.week <- arima(week_price[1:140], order = c(3,0,0), seasonal = list(order = c(0,1,1), period = 52))
man.pre.week <- predict(man.fit.week, n.ahead=29)
matplot(1:29, cbind(week_price[141:169], man.pre.week$pred), type = "l", xlab = "Weeks", ylab = "Average Price across the US ($)", main = "Manual ARIMA")

fmanweek1 <- forecast(man.fit.week, h=29)
{plot(fmanweek1, main = expression("ARIMA(6,0,1)(1,1,1)"[52]))
  lines(ts(week_price))}

avocadoRegionsS <- as.data.frame(fmanweek1)
avocadoRegionsS$lat <- 33.0362
avocadoRegionsS$lon <- 85.0322
avocadoRegions <- rbind(avocadoRegionsC, avocadoRegionsW)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsP)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsSC)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsG)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsN)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsM)
avocadoRegions <- rbind(avocadoRegions, avocadoRegionsS)

write.csv(avocadoRegions, "avocadoRegions.csv")

Aunque South Central está cerca de México, no consumen tanto como otras regiones más lejanas, como el noreste de Estados Unidos.

Pronóstico de ventas

En este caso, es muy importante hablar de la estacionalidad. Sobre todo para eventos donde el aguacate termina siendo muy utilizado. Por ejemplo en febrero en temporadas de super bowl en todo el mundo, o septiembre el caso de mexico y todas sus festividades del mes patrio. Por lo tanto nuestro modelo arima también pronóstica altas ventas para estas fechas en zonas correspondidas.

RECOMENDACIONES

  1. Analizar las futuras exigencias de mercado por zona sobre todo para establecer precios accesibles dependiendo de la comunidad y su rango de ingresos.
  2. Establecer estrategias de mercadotecnia y difusión para la promoción de la frescura de productos, y diferentes recetas u opciones viables que pueden hacer para distribuirse.
  3. Utilizar modelos de pronóstico sobre todo para observar las capacidades que tenemos de obtener proveedores razonables y que las redes logísticas sean mucho más viables.