#load libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
## Warning: package 'skimr' was built under R version 4.2.3
library(GGally)
## Warning: package 'GGally' was built under R version 4.2.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(viridis)
## Warning: package 'viridis' was built under R version 4.2.3
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.2.3
library(caret)
## Warning: package 'caret' was built under R version 4.2.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(e1071)
## Warning: package 'e1071' was built under R version 4.2.3
library(rpart)
library(xgboost)
## Warning: package 'xgboost' was built under R version 4.2.3
##
## Attaching package: 'xgboost'
##
## The following object is masked from 'package:dplyr':
##
## slice
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.2.3
## corrplot 0.92 loaded
library(corrgram)
## Warning: package 'corrgram' was built under R version 4.2.3
##
## Attaching package: 'corrgram'
##
## The following object is masked from 'package:lattice':
##
## panel.fill
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.2.3
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(scales)
## Warning: package 'scales' was built under R version 4.2.3
##
## Attaching package: 'scales'
##
## The following objects are masked from 'package:psych':
##
## alpha, rescale
##
## The following object is masked from 'package:viridis':
##
## viridis_pal
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(treemap)
## Warning: package 'treemap' was built under R version 4.2.3
library(repr)
## Warning: package 'repr' was built under R version 4.2.3
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.2.3
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:ggthemes':
##
## theme_map
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.2.3
##
## Attaching package: 'ggpubr'
##
## The following object is masked from 'package:cowplot':
##
## get_legend
library(RColorBrewer)
library(plotrix)
##
## Attaching package: 'plotrix'
##
## The following object is masked from 'package:scales':
##
## rescale
##
## The following object is masked from 'package:psych':
##
## rescale
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.2.3
library(lubridate)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.2.3
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(tibbletime)
## Warning: package 'tibbletime' was built under R version 4.2.3
##
## Attaching package: 'tibbletime'
##
## The following object is masked from 'package:stats':
##
## filter
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.2.3
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(tidyr)
library(ggpubr)
library(grid)
library(smooth)
## Warning: package 'smooth' was built under R version 4.2.3
## Loading required package: greybox
## Warning: package 'greybox' was built under R version 4.2.3
## Registered S3 method overwritten by 'greybox':
## method from
## print.pcor lava
## Package "greybox", v2.0.0 loaded.
##
##
## Attaching package: 'greybox'
##
## The following object is masked from 'package:caret':
##
## MAE
##
## The following object is masked from 'package:lubridate':
##
## hm
##
## The following object is masked from 'package:tidyr':
##
## spread
##
## This is package "smooth", v4.0.1
library(forecast)
## Warning: package 'forecast' was built under R version 4.2.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
##
## The following object is masked from 'package:ggpubr':
##
## gghistogram
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.2.3
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ fma 2.5 ✔ expsmooth 2.3
## Warning: package 'fma' was built under R version 4.2.3
## Warning: package 'expsmooth' was built under R version 4.2.3
## ── Conflicts ───────────────────────────────────────────────── fpp2_conflicts ──
## ✖ psych::%+%() masks ggplot2::%+%()
## ✖ scales::alpha() masks psych::alpha(), ggplot2::alpha()
## ✖ scales::discard() masks purrr::discard()
## ✖ magrittr::extract() masks tidyr::extract()
## ✖ forecast::gghistogram() masks ggpubr::gghistogram()
## ✖ caret::lift() masks purrr::lift()
## ✖ magrittr::set_names() masks purrr::set_names()
df <- read.csv("avocado.csv")
avo <- df
levels(df$type)
## NULL
dim(df)
## [1] 18249 14
str(df)
## 'data.frame': 18249 obs. of 14 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Date : chr "2015-12-27" "2015-12-20" "2015-12-13" "2015-12-06" ...
## $ AveragePrice: num 1.33 1.35 0.93 1.08 1.28 1.26 0.99 0.98 1.02 1.07 ...
## $ Total.Volume: num 64237 54877 118220 78992 51040 ...
## $ X4046 : num 1037 674 795 1132 941 ...
## $ X4225 : num 54455 44639 109150 71976 43838 ...
## $ X4770 : num 48.2 58.3 130.5 72.6 75.8 ...
## $ Total.Bags : num 8697 9506 8145 5811 6184 ...
## $ Small.Bags : num 8604 9408 8042 5677 5986 ...
## $ Large.Bags : num 93.2 97.5 103.1 133.8 197.7 ...
## $ XLarge.Bags : num 0 0 0 0 0 0 0 0 0 0 ...
## $ type : chr "conventional" "conventional" "conventional" "conventional" ...
## $ year : int 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
## $ region : chr "Albany" "Albany" "Albany" "Albany" ...
ggplot(df, aes(x = AveragePrice, fill = type)) +
geom_density() + facet_wrap(~type) + theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom") + labs(title = "Avocado Price by TYpe")+scale_fill_brewer(palette = "Set2")
vol_type <- df %>% group_by(type) %>% summarise(avg.vol = mean(Total.Volume)) %>% mutate(pct = prop.table(avg.vol) * 100)
vol_type
## # A tibble: 2 × 3
## type avg.vol pct
## <chr> <dbl> <dbl>
## 1 conventional 1653213. 97.2
## 2 organic 47811. 2.81
#ggplot(df, aes(x = date, y = value)) + geom_line(aes(color = variable), size = 1) +
#scale_color_manual(values = c("#00AFBB", "#E7B800")) +
#theme_minimal()
#change date from factor to date
df$Date <- as.Date(df$Date,"%Y-%m-%d")
class(df$Date)
## [1] "Date"
#sort the dates
df <- df[order(as.Date(df$Date, format = "%Y-%m-%d")), ]
price_trend <- df %>% select(Date, AveragePrice, type) %>%
ggplot(aes(x = Date, y = AveragePrice)) + geom_area(aes(color = type, fill = type), alpha = 0.3, position = position_dodge(0.8)) +
theme_minimal() + scale_color_manual(values = c("#ED7921", "#62BE51")) +
scale_fill_manual(values = c("#FD833E", "#B8FC5F"))
price_trend
#face wrap for each product
ggplot(df, aes(x = Date, y = AveragePrice, col = type)) +
geom_line() + facet_wrap(~ type) + theme_minimal() +
theme(legend.position = "bottom")
#filter by type
organic <- df %>% select(Date, AveragePrice, type, Total.Volume) %>%
filter(type == "organic")
conventional <- df %>% select(Date, AveragePrice, type, Total.Volume) %>%
filter(type == "conventional")
organic <- as_tbl_time(organic, index=Date)
organic <- as_period(organic, '1 month')
#conventional avacados
conventional <- as_tbl_time(conventional, index=Date)
conventional <- as_period(conventional, '1 month')
#Relationship between Prices and Total Volume
#monthly avocadoes price
conventional_monthly <- conventional %>%
ggplot(aes(x= Date, y = AveragePrice)) + geom_line(color = "#7FB3D5") +
theme_economist()+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#D5D8DC")) +
labs(title = "Conventional Avocados") + geom_hline(yintercept = max(conventional$AveragePrice), linetype = "dashed", color = "red") +
geom_hline(yintercept = min(conventional$AveragePrice), linetype = "dashed", color = "blue")
conventional_monthly
#Volume chart
conventional_volume <- conventional %>%
ggplot(aes(x = Date, y = Total.Volume)) + geom_bar(stat = "identity", fill="#7FB3D5", color="black") + theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#D5D8DC")) +
geom_smooth(method = "loess", color = "red")
conventional_volume
## `geom_smooth()` using formula = 'y ~ x'
organic_monthly <- organic %>%
ggplot(aes(x = Date, y = AveragePrice)) + geom_line(color="#58D68D" ) + theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#D5D8DC")) +
labs(title = "Organic Avacado") + geom_hline(yintercept = max(organic$AveragePrice), linetype = "dashed", color = "red") +
geom_hline(yintercept = min(organic$AveragePrice), linetype = "dashed", color = "blue")
organic_volume <- organic %>%
ggplot(aes(x = Date, y = Total.Volume)) + geom_bar(stat = "identity", fill="#7FB3D5", color="black") + theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#D5D8DC")) +
geom_smooth(method = "loess", color = "red")
plot_grid(conventional_monthly, organic_monthly, conventional_volume, organic_volume, nrow = 2, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
seasonal_df <- avo
seasonal_df$month_year <- format(as.Date(avo$Date), "%Y-%m")
seasonal_df$month <- format(as.Date(avo$Date), "%m")
seasonal_df$year <- format(as.Date(avo$Date), "%Y")
seasonal_df$monthabb <- sapply(seasonal_df$month, function(x) month.abb[as.numeric(x)])
seasonal_df$monthabb = factor(seasonal_df$monthabb, levels = month.abb)
#check seasonal patterns with conventional avocados
ggplot(seasonal_df, aes(x = AveragePrice, fill = as.factor(year))) +
geom_density(alpha = 0.5) +
theme_economist() +
facet_wrap(~year) + theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F9E79F")) +
guides(fill = F) +
labs(title = "Distribution of Prices by year", x = "Average PRice", y = "Density") +
scale_fill_manual(values = c("#2E64FE", "#40FF00", "#FE642E", "#FE2E2E"))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#Detecting seasonality patterns
conv_patterns <- seasonal_df %>% select(monthabb, AveragePrice, type) %>% filter(type == "conventional") %>%
group_by(monthabb) %>%
summarize(avg = mean(AveragePrice)) %>%
ggplot(aes(x = monthabb, y=avg)) +
geom_point(color = "#F35D5D", aes(size = avg)) + geom_line(group = 1, color = "#7FB3D5") +
theme_economist() +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F9E79F")) +
labs(title = "Conventional Avocados", c = "Month", y = "Average Price")
org_patterns <- seasonal_df %>% select(monthabb, AveragePrice, type) %>% filter(type == "organic") %>%
group_by(monthabb) %>%
summarize(avg = mean(AveragePrice)) %>%
ggplot(aes(x = monthabb, y=avg)) +
geom_point(color = "#F35D5D", aes(size = avg)) + geom_line(group = 1, color = "#7FB3D5") +
theme_economist() +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F9E79F")) +
labs(title = "Organic Avocados", c = "Month", y = "Average Price")
plot_grid(conv_patterns, org_patterns, nrow = 2)
#chekc if seasonality pattern is maintained by each year
conv_pat_yearly <- seasonal_df %>% select(year, monthabb, AveragePrice, type) %>% filter(type == "conventional", year == c("2015", "2016","2017")) %>%
group_by(year, monthabb) %>% summarize(avg = mean(AveragePrice)) %>%
ggplot(aes(x = monthabb, y = avg)) + geom_point(color = "#5D6D7E") +
geom_line(group = 1, color = "#F7DC6F") + facet_wrap(~as.factor(year)) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"), axis.text.x = element_text(angle = 90)) +
labs(title = "Seasonal Fluctuations \n Conventional Avocados", x = "Month", y = "Average Price")
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
org_pat_yearly <- seasonal_df %>% select(year, monthabb, AveragePrice, type) %>% filter(type == "organic", year == c("2015", "2016","2017")) %>%
group_by(year, monthabb) %>% summarize(avg = mean(AveragePrice)) %>%
ggplot(aes(x = monthabb, y = avg)) + geom_point(color = "#5D6D7E") +
geom_line(group = 1, color = "#F7DC6F") + facet_wrap(~as.factor(year)) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"), axis.text.x = element_text(angle = 90)) +
labs(title = "Seasonal Fluctuations \n Organic Avocados", x = "Month", y = "Average Price")
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
plot_grid(conv_pat_yearly, org_pat_yearly, nrow = 2)
# Measuring standard deviation per month through each year by type of avocado.
std_conv <- seasonal_df %>% select(year, monthabb, AveragePrice, type) %>% filter(type == "conventional", year == c("2015", "2016", "2017")) %>% group_by(year, monthabb) %>%
summarize(std = sd(AveragePrice)) %>%
ggplot(aes(x = monthabb, y = std)) +
geom_point(aes(size = std), col = "#5A96C6") +
geom_segment(aes(x = monthabb,
xend = monthabb,
y = min(std),
yend = max(std)), linetype = "dashed", size = 0.1) +
coord_flip() + facet_wrap(~year) +theme_tufte()+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"), legend.position = "none") +
labs(title = "Conventional Avocados \n Price Volatility", x = "Months", y = "Standard Deviation")
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
std_org <- seasonal_df %>% select(year, monthabb, AveragePrice, type) %>% filter(type == "organic", year == c("2015", "2016", "2017")) %>% group_by(year, monthabb) %>%
summarize(std = sd(AveragePrice)) %>%
ggplot(aes(x = monthabb, y = std)) +
geom_point(aes(size = std), col = "#5AC67C") + #draw points
geom_segment(aes(x = monthabb,
xend = monthabb,
y = min(std),
yend = max(std)), linetype = "dashed", size = 0.1) +
coord_flip() + facet_wrap(~year) +theme_tufte()+
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"), legend.position = "none") +
labs(title = "Organic Avocados \n Price Volatility", x = "Months", y = "Standard Deviation")
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
plot_grid(std_conv, std_org, ncol = 2)
# Let's have a closer look how the price changes per month.
# filter by type and year
options(repr.plot.width=10, repr.plot.height=8)
se <- function(x) sqrt(var(x) / length(x))
conv <- seasonal_df %>% select(year, monthabb, AveragePrice, type) %>%
filter(type == "conventional", year == c("2015", "2016", "2017")) %>%
group_by(year, monthabb) %>%
ggplot(aes(x = monthabb, y = AveragePrice, fill = monthabb), color = "white") + geom_bar(width = 1, stat = "identity") +
geom_errorbar(aes(ymin = AveragePrice - se(AveragePrice),
ymax = AveragePrice + se(AveragePrice),
color = monthabb),
width = 0.2) +
scale_y_continuous(breaks = 0:nlevels(seasonal_df$monthabb))+
facet_wrap(~year) + theme_minimal() +
theme(axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title = element_blank(),
axis.line = element_blank(),
plot.background = element_rect(fill = "#FFF1E0"),
legend.position = "none", plot.title = element_text(hjust = 0.5))+ coord_polar() +
labs(title="Seasonal cycle \n Conventional Avocados") +
scale_fill_manual(values=c('#57FCE0', '#57A6FC', '#3C546E', '#4AFA76', '#95CFA4', '#C0E436', '#F2A42D', '#F25F2D', '#F2442D',
'#AB4949', '#4950AB', '#4974AB'))
orga <- seasonal_df %>% select(year, monthabb, AveragePrice, type) %>%
filter(type == "organic", year == c("2015", "2016", "2017")) %>%
group_by(year, monthabb) %>%
ggplot(aes(x = monthabb, y = AveragePrice, fill = monthabb), color = "white") + geom_bar(width = 1, stat = "identity") +
geom_errorbar(aes(ymin = AveragePrice - se(AveragePrice),
ymax = AveragePrice + se(AveragePrice),
color = monthabb),
width = 0.2) +
scale_y_continuous(breaks = 0:nlevels(seasonal_df$monthabb))+
facet_wrap(~year) + theme_minimal() +
theme(axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title = element_blank(),
axis.line = element_blank(),
plot.background = element_rect(fill = "#FFF1E0"),
legend.position = "none", plot.title = element_text(hjust = 0.5))+ coord_polar() +
labs(title="Seasonal cycle \n Organic Avocados") +
scale_fill_manual(values=c('#57FCE0', '#57A6FC', '#3C546E', '#4AFA76', '#95CFA4', '#C0E436', '#F2A42D', '#F25F2D', '#F2442D',
'#AB4949', '#4950AB', '#4974AB'))
plot_grid(conv, orga, nrow = 2)
grid.arrange(conv, orga, nrow = 2)
r_avg <- seasonal_df %>% group_by(year, monthabb) %>% select(type, year, monthabb, AveragePrice) %>%
filter(type == "conventional", year == c("2015", "2016", "2017")) %>%
group_by(year, monthabb) %>%
summarize(avg = mean(AveragePrice))
## Warning: There were 2 warnings in `filter()`.
## The first warning was:
## ℹ In argument: `year == c("2015", "2016", "2017")`.
## ℹ In group 12: `year = "2015"`, `monthabb = Dec`.
## Caused by warning in `year == c("2015", "2016", "2017")`:
## ! longer object length is not a multiple of shorter object length
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
structured_data <- spread_(r_avg, key = "year", value = "avg")
## Warning: `spread_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `spread()` instead.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
colnames(structured_data) <- c("Months", "First_year", "Second_year", "Third_year")
structured_data$First_pct <- NA
structured_data$Second_pct <- NA
structured_data$First_pct <- (structured_data$Second_year - structured_data$First_year) / structured_data$First_year
structured_data$Second_pct <- (structured_data$Third_year - structured_data$Second_year) / structured_data$Second_year
structured_data <- structured_data %>%
mutate(first_cond = ifelse(First_pct > 0, "Positive", "Negative"),
second_cond = ifelse(Second_pct > 0, "Positive", "Negative"))
firstp_chage <- ggplot(structured_data) +
geom_segment(aes(x = Months, xend = Months, y = First_year, yend = Second_year), color = "#6E6A6A") +
geom_point(aes(x = Months, y = First_year), color = "#F74B4B", size = 3) +
geom_point(aes(x = Months, y=Second_year), color = "#36ACD7", size = 3) +
coord_flip()+theme_economist()+
theme(
legend.position = "top",
plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "#F4F6F7")) +
labs(title="Conventional Avocado Price changes \n (2015 - 2016)", x="Months", y="Price",
caption="Red: Year of 2015, Blue: Year of 2016")
secondp_chage <- ggplot(structured_data) +
geom_segment(aes(x = Months, xend = Months, y = Second_year, yend = Third_year), color = "#6E6A6A") +
geom_point(aes(x = Months, y = Second_year), color = "#36ACD7", size = 3) +
geom_point(aes(x = Months, y=Third_year), color = "#58FA58", size = 3) +
coord_flip()+theme_economist()+
theme(
legend.position = "top",
plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "#F4F6F7")) +
labs(title="Conventional Avocado Price changes \n (2016 - 2017)", x="Months", y="Price",
caption="Blue: Year of 2016, Green: Year of 2017")
grid.arrange(firstp_chage, secondp_chage, ncol = 2)
first_pct_dif <- structured_data %>%
select(Months, First_pct, first_cond) %>%
ggplot(aes(fill = first_cond))+geom_bar(stat = "identity", aes(x = Months, y=round(First_pct, 2) * 100), color = "black") + theme_economist()+
theme(axis.text.x = element_text(angle = 90), plot.background = element_rect(fill = "#F4F6F7"), legend.position = "bottom") +
labs(x ="Month", y = "% Difference") +
guides(fill = guide_legend(title = "Diff Status")) +
scale_fill_manual(values = c("#FB4D42", "#ADE175"))
second_pct_dif <- structured_data %>%
select(Months, Second_pct, second_cond) %>%
ggplot(aes(fill = second_cond))+geom_bar(stat = "identity", aes(x = Months, y=round(Second_pct, 2) * 100), color = "black") + theme_economist()+
theme(axis.text.x = element_text(angle = 90), plot.background = element_rect(fill = "#F4F6F7"), legend.position = "bottom") +
labs(x ="Month", y = "% Difference") +
guides(fill = guide_legend(title = "Diff Status")) +
scale_fill_manual(values = c("#FB4D42", "#ADE175"))
plot_grid(first_pct_dif, second_pct_dif, ncol = 2)
#organic avacados
r_avg_org <- seasonal_df %>% group_by(year, monthabb) %>% select(type, year, monthabb, AveragePrice) %>%
filter(type == "organic", year == c("2015", "2016", "2017")) %>%
group_by(year, monthabb) %>%
summarize(avg=mean(AveragePrice))
## Warning: There were 2 warnings in `filter()`.
## The first warning was:
## ℹ In argument: `year == c("2015", "2016", "2017")`.
## ℹ In group 12: `year = "2015"`, `monthabb = Dec`.
## Caused by warning in `year == c("2015", "2016", "2017")`:
## ! longer object length is not a multiple of shorter object length
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
structured_data_org <- spread_(r_avg_org, key="year", value="avg")
## Warning: `spread_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `spread()` instead.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
colnames(structured_data_org) <- c("Months", "First_year", "Second_year", "Third_year")
structured_data_org$first_pct <- NA
structured_data_org$second_pct <- NA
structured_data_org$first_pct <- (structured_data_org$Second_year - structured_data_org$First_year) / structured_data$First_year
structured_data_org$second_pct <- (structured_data_org$Third_year - structured_data_org$Second_year) / structured_data$Second_year
structured_data_org <- structured_data_org %>%
mutate(first_cond = ifelse(first_pct > 0, "Positive", "Negative"),
second_cond = ifelse(second_pct > 0, "Positive", "Negative"))
firstp_change_org <- ggplot(structured_data_org) +
geom_segment(aes(x = Months,
xend = Months,
y = First_year,
yend = Second_year), color = "#6E6A6A") +
geom_point(aes(x = Months, y = First_year), color = "#F74B4B", size = 3) +
geom_point(aes(x = Months, y = Second_year), color = "#36ACD7", size = 3) +
coord_flip() + theme_economist()+
theme(
legend.position = "top", plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "#DCFCE6")
) +
labs(title = "Organic Avocado Price changes \n (2015 - 2016)", x = "Months", y = "Price", caption = "Red: year of 2015 Blue: Year of 2016")
secondp_change_org <- ggplot(structured_data_org)+
geom_segment(aes(x = Months, xend = Months, y=Second_year, yend = Third_year), color = "#6E6A6A")+
geom_point(aes(x = Months, y = Second_year), color = "#36ACD7", size = 3)+
geom_point(aes(x = Months, y = Third_year), color = "#58FA58", size = 3) +
coord_flip() + theme_economist() +
theme(legend.position = "top", plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill = "#DCFCE6")) +
labs(title = "Organic Avocado Price changes \n (2016 - 2017)", x = "Months", y = "Price", caption = "Blue: Year of 2016, Green: Year of 2017")
plot_grid(firstp_change_org, secondp_change_org, ncol = 2)
first_pct_dif_org <- structured_data_org %>%
select(Months, first_pct, first_cond) %>%
ggplot(aes(fill = first_cond)) + geom_bar(stat = "identity", aes(x = Months, y = round(first_pct, 2) * 100), color = "black") +theme_economist() +
theme(axis.text.x = element_text(angle = 90), plot.background = element_rect(fill = "#DCFCE6"), legend.position = "bottom") +
labs(x = "Month", y = "% Difference") +
guides(fill = guide_legend(title = "Diff Status")) +
scale_fill_manual(values = c("#FB4D42", "#ADE175"))
second_pct_dif_org <- structured_data_org %>%
select(Months, second_pct, second_cond) %>%
ggplot(aes(fill = second_cond)) + geom_bar(stat = "identity", aes(x = Months, y = round(second_pct, 2) * 100), color = "black") +theme_economist() +
theme(axis.text.x = element_text(angle = 90), plot.background = element_rect(fill = "#DCFCE6"), legend.position = "bottom") +
labs(x = "Month", y = "% Difference") +
guides(fill = guide_legend(title = "Diff Status")) +
scale_fill_manual(values = c("#FB4D42", "#ADE175"))
plot_grid(first_pct_dif_org, second_pct_dif_org, nrow = 2)
#create a seasonal columna dn plot a point line chart by each year
seasonal_df$season <- ifelse(seasonal_df$month %in% c("03", "04", "05"),
"Spring", ifelse(seasonal_df$month %in% c("06", "07", "08"), "Summer",
ifelse(seasonal_df$month %in% c("09", "10", "11"), "Fall", "Winter")))
seasonality.plot.conventional <- seasonal_df %>% select(season, year, AveragePrice, type) %>%
filter(type == "conventional", year == c("2015", "2016", "2017")) %>%
group_by(season, year) %>%
summarize(avg = mean(AveragePrice)) %>%
ggplot(aes(x = season, y = avg, color= season)) +
geom_point(size = 3) +
geom_segment(aes(x = season, xend = season, y = 0, yend = avg)) +
coord_flip() +
facet_wrap(~as.factor(year)) + theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7")) +
scale_color_manual(values = c("#a06a31", "#9bd16b", "#d1706b", "#3bbf9e")) +
labs(title = "Conventional Avocados by Season" , x = "Season", y = "Average Price") + geom_text(aes(x = season, y = 0.01, label = paste0("$", round(avg, 2))), hjust = -0.5, vjust = -0.5, size = 4, color = "black", fontface = "italic", angle = 360)
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
seasonality.plot.org <- seasonal_df %>% select(season, year, AveragePrice, type) %>% filter(type == "organic", year == c("2015", "2016", "2017")) %>% group_by(season, year) %>%
summarize(avg = mean(AveragePrice)) %>% ggplot(aes(x = season, y = avg, color = season)) + geom_point(size = 3) +
geom_segment(aes(x = season, xend = season, y = 0, yend = avg))+
coord_flip() + facet_wrap(~as.factor(year)) + theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect("#F4F6F7")) +
scale_color_manual(values=c("#a06a31", "#9bd16b", "#d1706b", "#3bbf9e")) +
labs(title = "Organic Avacoados by Season", x = "Season", y = "Average Price") +
geom_text(aes(x = season, y = 0.01, label = paste0("$", round(avg, 2))),
hjust = -0.5, vjust = -0.5, size = 4, color = "black", fontface = "italic", angle= 360)
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
plot_grid(seasonality.plot.conventional, seasonality.plot.org, ncol = 2)
#Regional Analysis
regions_monthabb <- seasonal_df %>%
select(monthabb, Total.Volume) %>% group_by(monthabb) %>%
summarize(avg.volume = mean(Total.Volume)) %>%
ggplot(aes(x = monthabb, y = avg.volume)) + geom_bar(stat ="identity", fill = "#81FF5F") +
coord_flip() + theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"))
price_monthabb <- seasonal_df %>% select(monthabb, AveragePrice) %>%
group_by(monthabb) %>% group_by(avg.price = mean(AveragePrice)) %>%
ggplot(aes(x = monthabb, y = avg.price)) + geom_bar(stat = "identity", fill = "#FF685F") + coord_flip() + theme_minimal() +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5), plot.background = element_rect("#F4F6F7"))
#the higher the price the lower the volume
seasonal_df$volume_price_diff <- seasonal_df$Total.Volume - seasonal_df$AveragePrice
#if the month is higher there is higher volume and thus a lower price
volprice_diff <- seasonal_df %>%
group_by(year, monthabb) %>%
select(year, monthabb, type, volume_price_diff) %>%
filter(year == c("2015", "2016", "2017")) %>%
summarize(avg.diff = mean(volume_price_diff)) %>%
ggplot(aes(x = monthabb, y = avg.diff, group = year, color = year)) +
geom_area(aes(fill = year)) + theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"),
legend.position = "bottom", legend.background = element_rect(fill = "#FFF9F5", size = .5, linetype = "solid", color = "black")) +
scale_fill_manual(values=c("#6EB3EA", "#89F058", "#FBA31C")) + scale_color_manual(values=c("#407EAF","#68B842", "#F1711E"))
## Warning: There were 2 warnings in `filter()`.
## The first warning was:
## ℹ In argument: `year == c("2015", "2016", "2017")`.
## ℹ In group 12: `year = "2015"`, `monthabb = Dec`.
## Caused by warning in `year == c("2015", "2016", "2017")`:
## ! longer object length is not a multiple of shorter object length
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# pushViewport(viewport(layout = grid.layout(2,2)))
# vplayout <- function(x,y) viewport(layout.pos.row = x, layout.pos.col = y)
#
# print(regions_monthabb, vp = vplayout(1,1))
# print(price_monthabb, vp = vplayout(1,2))
# print(volprice_diff, vp = vplayout(2, 1:2))
volume_conv <- seasonal_df %>%
select(year, monthabb, Total.Volume, region, type) %>%
filter(type == "conventional", year == c("2015", "2016", "2017"),
region == c("Northeast", "SouthCentral", "MidSouth", "Southeast", "West")) %>%
group_by(year, monthabb, region) %>%
summarize(avg.vol = mean(Total.Volume)) %>%
ggplot(aes(x = monthabb, y= avg.vol)) + geom_point(color = "#5D6D7E") +
geom_line(group = 1, color = "#819ff7")+
facet_grid(region ~ year) +
theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F2F5A9"), axis.text.x = element_text(angle = 90)) +
labs(title="Market Volume \n Convenctional Avocados", x="Month", y="Average Volume")
## Warning: There was 1 warning in `filter()`.
## ℹ In argument: `==...`.
## Caused by warning in `region == c("Northeast", "SouthCentral", "MidSouth", "Southeast", "West")`:
## ! longer object length is not a multiple of shorter object length
## `summarise()` has grouped output by 'year', 'monthabb'. You can override using
## the `.groups` argument.
volume_conv
volume_org <- seasonal_df %>%
select(year, monthabb, Total.Volume, region, type) %>%
filter(type == "organic", year == c("2015", "2016", "2017"),
region == c("Northeast", "SouthCentral", "MidSouth", "Southeast", "West")) %>%
group_by(year, monthabb, region) %>%
summarize(avg.vol = mean(Total.Volume)) %>%
ggplot(aes(x = monthabb, y= avg.vol)) + geom_point(color = "#5D6D7E") +
geom_line(group = 1, color = "#FE642E")+
facet_grid(region ~ year) +
theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F2F5A9"), axis.text.x = element_text(angle = 90)) +
labs(title="Market Volume \n Organic Avocados", x="Month", y="Average Volume")
## Warning: There was 1 warning in `filter()`.
## ℹ In argument: `==...`.
## Caused by warning in `region == c("Northeast", "SouthCentral", "MidSouth", "Southeast", "West")`:
## ! longer object length is not a multiple of shorter object length
## `summarise()` has grouped output by 'year', 'monthabb'. You can override using
## the `.groups` argument.
volume_org
#Using the Autoplot library for Time Series: The autoplot library allows us to see patterns between the different years. In the chart below we can see the following: Prices for 2017 were the highest overall. There is an increase in avocado prices for both organic and conventional types. The lowest price occured in the year of 2015
conv.price <- seasonal_df %>% select(type, year, monthabb, AveragePrice) %>%
filter(type == "conventional", year == c("2015", "2016", "2017")) %>%
group_by(year, monthabb) %>%
summarize(avg = mean(AveragePrice))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
org.price <- seasonal_df %>% select(type, year, monthabb, AveragePrice) %>%
filter(type == "organic", year == c("2015", "2016", "2017")) %>%
group_by(year, monthabb) %>%
summarize(avg = mean(AveragePrice))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
conv.price <- ts(conv.price$avg, start = 2015, frequency = 12)
org.price <- ts(org.price$avg, start = 2015, frequency = 12)
conv.plot <- autoplot(conv.price, color = "#48a4ff") +
theme_economist() + theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F9E79F")) +
labs(title = "Average Price by Month \n Conventional Avocados", y = "Average Price")
org.plot <- autoplot(org.price, color = "#48a4ff") +
theme_economist() + theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F9E79F")) +
labs(title = "Average Price by Month \n organic Avocados", y = "Average Price")
byyear.plot.conv <- ggseasonplot(conv.price, year.labels = TRUE, year.labels.left = TRUE) + theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F9E79F")) +
labs(title = "Average convetional A price by year \n for each month", y = "Average Price") +
scale_color_manual(values = c("#407EAF","#68B842", "#F1711E"))
byyear.plot.org <- ggseasonplot(org.price, year.labels = TRUE, year.labels.left = TRUE) + theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F9E79F")) +
labs(title = "Average Organic A price by year \n for each month", y = "Average Price") +
scale_color_manual(values = c("#407EAF","#68B842", "#F1711E"))
plot_grid(conv.plot, byyear.plot.conv, org.plot, byyear.plot.org, nrow = 2, ncol = 2)
polar.conv <- ggseasonplot(conv.price, polar = TRUE) +
ylab("Average Avocado Price") +
ggtitle("Conventional Avocados \n Polar Plot") + theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"), legend.position = "bottom", legend.background = element_rect(fill = "#FFF9F5", size = 0.5, linetype = "solid", color = "black")) +
scale_color_manual(values = c("#407EAF","#68B842", "#F1711E"))
polar.conv
polar.org <- ggseasonplot(org.price, polar = TRUE) +
ylab("Average Avocado Price") +
ggtitle("Organic Avocados \n Polar Plot") + theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#F4F6F7"), legend.position = "bottom", legend.background = element_rect(fill = "#FFF9F5", size = 0.5, linetype = "solid", color = "black")) +
scale_color_manual(values = c("#407EAF","#68B842", "#F1711E"))
polar.org
plot_grid(polar.conv, polar.org, ncol = 2)
mothly_conv <- ggsubseriesplot(conv.price) +
labs(title = "Conventional Avocados", x = "Months", y = "Average Price") +
theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#FEF5c7")) +
annotate("text", x = c(6), y = c(1.35), label = c("Upward trend \n (June - October"), color = "black", size = 2.5, angle = 360, fontface = "bold") +
geom_segment(aes(x = c(6), y = 1.3, xend = c(6.5), yend = 1.2), color = "#d6665e", size = 1, arrow = arrow(length = unit(0.35, "cm")))
mothly_org <- ggsubseriesplot(org.price) +
labs(title = "Organic Avocados", x = "Months", y = "Average Price") +
theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#FEF5c7")) +
annotate("text", x = c(6), y = c(1.35), label = c("Upward trend \n (June - October"), color = "black", size = 2.5, angle = 360, fontface = "bold") +
geom_segment(aes(x = c(6), y = 1.3, xend = c(6.5), yend = 1.2), color = "#d6665e", size = 1, arrow = arrow(length = unit(0.35, "cm")))
plot_grid(mothly_conv, mothly_org, nrow = 2)
seasonality_trend_conv <- window(conv.price, start = 2015)
conv_plot_trends <- autoplot(seasonality_trend_conv) + theme_economist() +
theme(plot.title = element_text(hjust = 0.5, size = 12), plot.background = element_rect(fill = "#F4f6f7")) +
labs(x = "year", y = "Average price", title = "Conventional Avocados")
autocoor_conv <- ggAcf(conv.price, lag = 12) +
theme(plot.title = element_text(hjust = 0.5, size = 12), plot.background = element_rect(fill = "#f4f6f7")) +
labs(title = "Autocorrelation for \n Conventional Avocados")
seasonality_trend_org <- window(org.price, start = 2015)
org_plot_trends <- autoplot(seasonality_trend_org) + theme_economist() +
theme(plot.title = element_text(hjust = 0.5, size = 12), plot.background = element_rect(fill = "#F4f6f7")) +
labs(x = "year", y = "Average price", title = "Organic Avocados")
autocoor_org <- ggAcf(org.price, lag = 12) +
theme(plot.title = element_text(hjust = 0.5, size = 12), plot.background = element_rect(fill = "#f4f6f7")) +
labs(title = "Autocorrelation for \n Organic Avocados")
plot_grid(conv_plot_trends, autocoor_conv, org_plot_trends, autocoor_org, ncol = 2, nrow = 2)
#average price by week
weekly_df <- avo
weekly_df$week <- format(as.Date(avo$Date), "%w")
conv.price.weekly <- weekly_df %>% select(type, year, AveragePrice) %>%
filter(type == "conventional", year == c("2015", "2016", "2017"))
org.price.weekly <- weekly_df %>% select(type, year, AveragePrice) %>%
filter(type == "organic", year == c("2015", "2016", "2017"))
weekly_conv_price <- ts(conv.price.weekly$AveragePrice, start = 2015, frequency = 12)
weekly_org_price <- ts(org.price.weekly$AveragePrice, start = 2015, frequency = 12)
weekly_trends_conv <- window(weekly_conv_price, start = 2015)
weekly_trends_org <- window(weekly_org_price, start = 2015)
conv_plot_weekly <- autoplot(weekly_trends_conv) + theme_economist() +
theme(plot.title = element_text(hjust = 0.5, size = 12), plot.background = element_rect(fill = "#fff1e0")) +
labs(x = "Time", y = "Average Price", title = "Conventional Avocados \n (Weekly TIme Series)")
org_plot_weekly <- autoplot(weekly_trends_org) + theme_economist() +
theme(plot.title = element_text(hjust = 0.5, size = 12), plot.background = element_rect(fill = "#fff1e0")) +
labs(x = "Time", y = "Average Price", title = "Organic Avocados \n (Weekly TIme Series)")
autocoor_conv <- ggAcf(weekly_conv_price, lag = 156, fill = "#48a4ff") + theme_economist() +
theme(plot.title = element_text(hjust = 0.5, size = 12), plot.background = element_rect(fill = "#fff1e0")) +
labs(title = "Autocorrelations by Weekly Legs")
## Warning in ggplot2::geom_segment(lineend = "butt", ...): Ignoring unknown
## parameters: `fill`
autocoor_org <- ggAcf(weekly_org_price, lag = 156, fill = "#48a4ff") + theme_economist() +
theme(plot.title = element_text(hjust = 0.5, size = 12), plot.background = element_rect(fill = "#fff1e0")) +
labs(title = "Autocorrelations by Weekly Legs")
## Warning in ggplot2::geom_segment(lineend = "butt", ...): Ignoring unknown
## parameters: `fill`
plot_grid(conv_plot_weekly, autocoor_conv, org_plot_weekly, autocoor_org, ncol = 2, nrow = 2)
#Various Forecasting Methods: In this section we will use various models
to come up with accurate predictions in order to see what will the
upcoming patterns will be for avocado prices.
The list of models we will use include:
Smoothing Moving Average Seasonal Naive Method Drift Method ARIMA
#using smoothing average
sma_conv <- sma(conventional$AveragePrice, h = 10, silent = FALSE) +
theme_economist()
## Order 1 - -14.0265; Order 20 - 2.6142; Order 39 - 3.2892
## Order 1 - -14.0265; Order 10 - -3.447; Order 20 - 2.6142
## Order 1 - -14.0265; Order 5 - -8.9552; Order 10 - -3.447
## Order 1 - -14.0265; Order 3 - -15.9464; Order 5 - -8.9552
## Order 1 - -14.0265; Order 2 - -17.4924; Order 3 - -15.9464
library(fpp2)
conv <- df %>% select(Date, AveragePrice, type) %>% filter(type == "conventional")
org <- df %>% select(Date, AveragePrice, type) %>% filter(type == "organic")
#conventional avocados
conventional <- as_tbl_time(conv, index = Date)
conventional <- as_period(conventional, '1 month')
conventional$type <- NULL
#organic avocados
organic <- as_tbl_time(org, index = Date)
organic <- as_period(organic, '1 month')
organic$type <- NULL
conv_ts <- ts(conventional[,2], start = c(2015, 1), frequency = 12)
org_ts <- ts(organic[,2], start = c(2015, 1), frequency = 12)
#the differnce from month to month
#to remove the trend take the difference
difference_conv <- diff(conv_ts)
main_diff <- autoplot(difference_conv) + theme_minimal()
seasonality_diff <- ggseasonplot(difference_conv) + theme_minimal()
plot_grid(main_diff, seasonality_diff, nrow = 2)
#ARIMA Model
# Y has trend unlike difference, it will take the difference behind the scenes d=1
# Stepwise will only use some models instead of all possible combinations
# approximation uses the model that approximates the best result to save time
arima_model_cv <- auto.arima(conv_ts, d=1, D=1, stepwise = FALSE, approximation = FALSE, trace = TRUE)
##
## ARIMA(0,1,0)(0,1,0)[12] : 17.51894
## ARIMA(0,1,0)(0,1,1)[12] : Inf
## ARIMA(0,1,0)(1,1,0)[12] : 13.65754
## ARIMA(0,1,0)(1,1,1)[12] : Inf
## ARIMA(0,1,1)(0,1,0)[12] : 11.44504
## ARIMA(0,1,1)(0,1,1)[12] : Inf
## ARIMA(0,1,1)(1,1,0)[12] : 7.926653
## ARIMA(0,1,1)(1,1,1)[12] : Inf
## ARIMA(0,1,2)(0,1,0)[12] : 13.85906
## ARIMA(0,1,2)(0,1,1)[12] : Inf
## ARIMA(0,1,2)(1,1,0)[12] : 10.60394
## ARIMA(0,1,2)(1,1,1)[12] : Inf
## ARIMA(0,1,3)(0,1,0)[12] : Inf
## ARIMA(0,1,3)(0,1,1)[12] : Inf
## ARIMA(0,1,3)(1,1,0)[12] : Inf
## ARIMA(0,1,3)(1,1,1)[12] : Inf
## ARIMA(0,1,4)(0,1,0)[12] : Inf
## ARIMA(0,1,4)(0,1,1)[12] : Inf
## ARIMA(0,1,4)(1,1,0)[12] : Inf
## ARIMA(0,1,5)(0,1,0)[12] : Inf
## ARIMA(1,1,0)(0,1,0)[12] : 15.40448
## ARIMA(1,1,0)(0,1,1)[12] : Inf
## ARIMA(1,1,0)(1,1,0)[12] : 10.08229
## ARIMA(1,1,0)(1,1,1)[12] : Inf
## ARIMA(1,1,1)(0,1,0)[12] : 13.96476
## ARIMA(1,1,1)(0,1,1)[12] : Inf
## ARIMA(1,1,1)(1,1,0)[12] : 10.64378
## ARIMA(1,1,1)(1,1,1)[12] : Inf
## ARIMA(1,1,2)(0,1,0)[12] : Inf
## ARIMA(1,1,2)(0,1,1)[12] : Inf
## ARIMA(1,1,2)(1,1,0)[12] : Inf
## ARIMA(1,1,2)(1,1,1)[12] : Inf
## ARIMA(1,1,3)(0,1,0)[12] : Inf
## ARIMA(1,1,3)(0,1,1)[12] : Inf
## ARIMA(1,1,3)(1,1,0)[12] : Inf
## ARIMA(1,1,4)(0,1,0)[12] : Inf
## ARIMA(2,1,0)(0,1,0)[12] : 12.20614
## ARIMA(2,1,0)(0,1,1)[12] : Inf
## ARIMA(2,1,0)(1,1,0)[12] : 10.50862
## ARIMA(2,1,0)(1,1,1)[12] : Inf
## ARIMA(2,1,1)(0,1,0)[12] : 14.37757
## ARIMA(2,1,1)(0,1,1)[12] : Inf
## ARIMA(2,1,1)(1,1,0)[12] : 13.47267
## ARIMA(2,1,1)(1,1,1)[12] : Inf
## ARIMA(2,1,2)(0,1,0)[12] : 15.41755
## ARIMA(2,1,2)(0,1,1)[12] : Inf
## ARIMA(2,1,2)(1,1,0)[12] : Inf
## ARIMA(2,1,3)(0,1,0)[12] : Inf
## ARIMA(3,1,0)(0,1,0)[12] : 15.01092
## ARIMA(3,1,0)(0,1,1)[12] : Inf
## ARIMA(3,1,0)(1,1,0)[12] : 13.58839
## ARIMA(3,1,0)(1,1,1)[12] : Inf
## ARIMA(3,1,1)(0,1,0)[12] : 18.00893
## ARIMA(3,1,1)(0,1,1)[12] : Inf
## ARIMA(3,1,1)(1,1,0)[12] : 16.8641
## ARIMA(3,1,2)(0,1,0)[12] : Inf
## ARIMA(4,1,0)(0,1,0)[12] : 16.62101
## ARIMA(4,1,0)(0,1,1)[12] : Inf
## ARIMA(4,1,0)(1,1,0)[12] : 15.52644
## ARIMA(4,1,1)(0,1,0)[12] : 18.49889
## ARIMA(5,1,0)(0,1,0)[12] : 18.45644
##
##
##
## Best model: ARIMA(0,1,1)(1,1,0)[12]
arima_model_org <- auto.arima(org_ts, d=1, D=1, stepwise = FALSE, approximation = FALSE, trace = TRUE)
##
## ARIMA(0,1,0)(0,1,0)[12] : 2.521273
## ARIMA(0,1,0)(0,1,1)[12] : Inf
## ARIMA(0,1,0)(1,1,0)[12] : -2.28856
## ARIMA(0,1,0)(1,1,1)[12] : Inf
## ARIMA(0,1,1)(0,1,0)[12] : 4.237894
## ARIMA(0,1,1)(0,1,1)[12] : Inf
## ARIMA(0,1,1)(1,1,0)[12] : -0.9972336
## ARIMA(0,1,1)(1,1,1)[12] : Inf
## ARIMA(0,1,2)(0,1,0)[12] : 6.536184
## ARIMA(0,1,2)(0,1,1)[12] : Inf
## ARIMA(0,1,2)(1,1,0)[12] : 1.305819
## ARIMA(0,1,2)(1,1,1)[12] : Inf
## ARIMA(0,1,3)(0,1,0)[12] : 9.325564
## ARIMA(0,1,3)(0,1,1)[12] : Inf
## ARIMA(0,1,3)(1,1,0)[12] : 3.595351
## ARIMA(0,1,3)(1,1,1)[12] : Inf
## ARIMA(0,1,4)(0,1,0)[12] : Inf
## ARIMA(0,1,4)(0,1,1)[12] : Inf
## ARIMA(0,1,4)(1,1,0)[12] : 7.006226
## ARIMA(0,1,5)(0,1,0)[12] : Inf
## ARIMA(1,1,0)(0,1,0)[12] : 4.34258
## ARIMA(1,1,0)(0,1,1)[12] : Inf
## ARIMA(1,1,0)(1,1,0)[12] : -0.7000006
## ARIMA(1,1,0)(1,1,1)[12] : Inf
## ARIMA(1,1,1)(0,1,0)[12] : Inf
## ARIMA(1,1,1)(0,1,1)[12] : Inf
## ARIMA(1,1,1)(1,1,0)[12] : Inf
## ARIMA(1,1,1)(1,1,1)[12] : Inf
## ARIMA(1,1,2)(0,1,0)[12] : 8.908865
## ARIMA(1,1,2)(0,1,1)[12] : Inf
## ARIMA(1,1,2)(1,1,0)[12] : Inf
## ARIMA(1,1,2)(1,1,1)[12] : Inf
## ARIMA(1,1,3)(0,1,0)[12] : 11.89595
## ARIMA(1,1,3)(0,1,1)[12] : Inf
## ARIMA(1,1,3)(1,1,0)[12] : 7.010278
## ARIMA(1,1,4)(0,1,0)[12] : Inf
## ARIMA(2,1,0)(0,1,0)[12] : 6.613931
## ARIMA(2,1,0)(0,1,1)[12] : Inf
## ARIMA(2,1,0)(1,1,0)[12] : 1.897622
## ARIMA(2,1,0)(1,1,1)[12] : Inf
## ARIMA(2,1,1)(0,1,0)[12] : 8.612471
## ARIMA(2,1,1)(0,1,1)[12] : Inf
## ARIMA(2,1,1)(1,1,0)[12] : Inf
## ARIMA(2,1,1)(1,1,1)[12] : Inf
## ARIMA(2,1,2)(0,1,0)[12] : Inf
## ARIMA(2,1,2)(0,1,1)[12] : Inf
## ARIMA(2,1,2)(1,1,0)[12] : Inf
## ARIMA(2,1,3)(0,1,0)[12] : 14.56648
## ARIMA(3,1,0)(0,1,0)[12] : 9.038767
## ARIMA(3,1,0)(0,1,1)[12] : Inf
## ARIMA(3,1,0)(1,1,0)[12] : Inf
## ARIMA(3,1,0)(1,1,1)[12] : Inf
## ARIMA(3,1,1)(0,1,0)[12] : 11.40005
## ARIMA(3,1,1)(0,1,1)[12] : Inf
## ARIMA(3,1,1)(1,1,0)[12] : Inf
## ARIMA(3,1,2)(0,1,0)[12] : Inf
## ARIMA(4,1,0)(0,1,0)[12] : 10.26243
## ARIMA(4,1,0)(0,1,1)[12] : Inf
## ARIMA(4,1,0)(1,1,0)[12] : Inf
## ARIMA(4,1,1)(0,1,0)[12] : Inf
## ARIMA(5,1,0)(0,1,0)[12] : 12.89994
##
##
##
## Best model: ARIMA(0,1,0)(1,1,0)[12]
print(summary(arima_model_cv))
## Series: conv_ts
## ARIMA(0,1,1)(1,1,0)[12]
##
## Coefficients:
## ma1 sar1
## -0.5852 -0.5753
## s.e. 0.1547 0.1753
##
## sigma^2 = 0.05354: log likelihood = -0.42
## AIC=6.84 AICc=7.93 BIC=10.61
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.001571836 0.181512 0.1213104 -1.236511 9.331712 0.4580951
## ACF1
## Training set -0.04030268
checkresiduals(arima_model_cv) + theme_minimal()
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(1,1,0)[12]
## Q* = 5.909, df = 6, p-value = 0.4335
##
## Model df: 2. Total lags used: 8
## NULL
conv_forecase_sn <- autoplot(conv_ts) +
autolayer(meanf(conv_ts, h = 24), series = "Mean", PI = FALSE) +
autolayer(naive(conv_ts, h = 24), series = "Naive", PI = FALSE) +
autolayer(snaive(conv_ts, h = 24), series = "Seasonal naive", PI = FALSE) +
ggtitle("Conventional Avocado \n Seasonal Naive Method") +
xlab("Date") +ylab("Price") +
scale_color_manual(values = c("#FA5858", "#00BFFF", "#FF8000")) +
guides(color = guide_legend(title = "Forecast")) + theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#f4f6f7"))
org_forecase_sn <- autoplot(org_ts) +
autolayer(meanf(org_ts, h = 24), series = "Mean", PI = FALSE) +
autolayer(naive(org_ts, h = 24), series = "Naive", PI = FALSE) +
autolayer(snaive(org_ts, h = 24), series = "Seasonal naive", PI = FALSE) +
ggtitle("Organic Avocado \n Seasonal Naive Method") +
xlab("Date") +ylab("Price") +
scale_color_manual(values = c("#FA5858", "#00BFFF", "#FF8000")) +
guides(color = guide_legend(title = "Forecast")) + theme_economist() +
theme(plot.title = element_text(hjust = 0.5), plot.background = element_rect(fill = "#f4f6f7"))
plot_grid(conv_forecase_sn, org_forecase_sn, nrow = 2)
conv_forecast_dr <- autoplot(conv_ts) +
autolayer(meanf(conv_ts, h = 24),
series = "Mean", PI = F) +
autolayer(naive(conv_ts, h = 24),
series = "Narive", PI = F) +
autolayer(rwf(conv_ts, drift = T, h = 24),
series = "Drift", PI = F) +
ggtitle("Conventional Avocado \n Drift Method") +
xlab("Date") + ylab("Price") + scale_color_manual(values=c("#ffff24", "#98fb98", "#ff6347")) +
guides(colour=guide_legend(title="Forecast")) + theme_economist() +
theme(plot.title=element_text(hjust=0.5), plot.background=element_rect(fill="#F4F6F7"))
org_forecast_dr <- autoplot(org_ts) +
autolayer(meanf(org_ts, h = 24),
series = "Mean", PI = F) +
autolayer(naive(org_ts, h = 24),
series = "Narive", PI = F) +
autolayer(rwf(org_ts, drift = T, h = 24),
series = "Drift", PI = F) +
ggtitle("Organic Avocado \n Drift Method") +
xlab("Date") + ylab("Price") + scale_color_manual(values=c("#ffff24", "#98fb98", "#ff6347")) +
guides(colour=guide_legend(title="Forecast")) + theme_economist() +
theme(plot.title=element_text(hjust=0.5), plot.background=element_rect(fill="#F4F6F7"))
plot_grid(conv_forecast_dr, org_forecast_dr, nrow = 2)
Understanding Residuals: Residuals is not only used to understand the difference ebtween our forecast model and actual values, it also tells us some potential for “abnormal” movements during certain period.
rescv_nv <- residuals(naive(conv_ts))
p1 <- autoplot(rescv_nv, color = "#bbff6c") + xlab("Day") + ylab("") +
ggtitle("Residuals from naive method \n Conventional Avocados") + theme_economist() +
theme(plot.title=element_text(hjust=0.5, color="white"), plot.background=element_rect(fill="#0D7680"),
axis.text.x=element_text(colour="white"), axis.text.y=element_text(colour="white"),
axis.title=element_text(colour="white"))
rescorg_nv <- residuals(naive(org_ts))
p2 <- autoplot(rescorg_nv, color = "#bbff6c") + xlab("Day") + ylab("") +
ggtitle("Residuals from naive method \n Organic Avocados") + theme_economist() +
theme(plot.title=element_text(hjust=0.5, color="white"), plot.background=element_rect(fill="#0D7680"),
axis.text.x=element_text(colour="white"), axis.text.y=element_text(colour="white"),
axis.title=element_text(colour="white"))
plot_grid(p1, p2, nrow = 2)
sqrt(0.05354)
## [1] 0.2313871
forecast_cv <- forecast(arima_model_cv, h=24)
# Include means including the last 60 months in order to see closer the forecast.
autoplot(forecast_cv, include=60) + theme_minimal() + theme(plot.title=element_text(hjust=0.5), plot.background=element_rect(fill="#F4F6F7"),
legend.position="bottom", legend.background = element_rect(fill="#FFF9F5",
size=0.5, linetype="solid",
colour ="black")) +
labs(title="Forecasting using ARIMA model \n Conventional Avocados", x="Date", y="Price")
print(summary(forecast_cv))
##
## Forecast method: ARIMA(0,1,1)(1,1,0)[12]
##
## Model Information:
## Series: conv_ts
## ARIMA(0,1,1)(1,1,0)[12]
##
## Coefficients:
## ma1 sar1
## -0.5852 -0.5753
## s.e. 0.1547 0.1753
##
## sigma^2 = 0.05354: log likelihood = -0.42
## AIC=6.84 AICc=7.93 BIC=10.61
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.001571836 0.181512 0.1213104 -1.236511 9.331712 0.4580951
## ACF1
## Training set -0.04030268
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Apr 2018 1.0988345 0.8023048 1.395364 0.64533143 1.552338
## May 2018 1.1386783 0.8176500 1.459707 0.64770795 1.629649
## Jun 2018 1.4894880 1.1457026 1.833273 0.96371357 2.015262
## Jul 2018 1.4012636 1.0361366 1.766391 0.84285011 1.959677
## Aug 2018 1.4230392 1.0377510 1.808327 0.83379179 2.012287
## Sep 2018 1.4977124 1.0932668 1.902158 0.87916625 2.116259
## Oct 2018 1.5082522 1.0855165 1.930988 0.86173368 2.154771
## Nov 2018 1.5475562 1.1072895 1.987823 0.87422638 2.220886
## Dec 2018 1.3635790 0.9064531 1.820705 0.66446529 2.062693
## Jan 2019 1.2474001 0.7740151 1.720785 0.52342021 1.971380
## Feb 2019 1.2164342 0.7273303 1.705538 0.46841433 1.964454
## Mar 2019 1.0593318 0.5549987 1.563665 0.28802089 1.830643
## Apr 2019 1.3204562 0.7580294 1.882883 0.46029855 2.180614
## May 2019 1.2510859 0.6619903 1.840181 0.35014192 2.152030
## Jun 2019 1.5323956 0.9177875 2.147004 0.59243353 2.472358
## Jul 2019 1.4143855 0.7752824 2.053489 0.43696160 2.391809
## Aug 2019 1.4063754 0.7436821 2.069069 0.39287343 2.419877
## Sep 2019 1.5704057 0.8849336 2.255878 0.52206651 2.618745
## Oct 2019 1.5346123 0.8270942 2.242130 0.45255684 2.616668
## Nov 2019 1.5110354 0.7821380 2.239933 0.39628308 2.625788
## Dec 2019 1.3005820 0.5509147 2.050249 0.15406485 2.447099
## Jan 2020 1.1016650 0.3317880 1.871542 -0.07576024 2.279090
## Feb 2020 1.0309849 0.2414153 1.820555 -0.17655767 2.238527
## Mar 2020 0.9930253 0.1842423 1.801808 -0.24390149 2.229952
sqrt(0.05354)
## [1] 0.2313871
forecast_org <- forecast(arima_model_org, h=24)
# Include means including the last 60 months in order to see closer the forecast.
autoplot(forecast_org, include=60) + theme_minimal() + theme(plot.title=element_text(hjust=0.5), plot.background=element_rect(fill="#d0f0c0"),
legend.position="bottom", legend.background = element_rect(fill="#FFF9F5",
size=0.5, linetype="solid",
colour ="black")) +
labs(title="Forecasting using ARIMA model \n Organic Avocados", x="Date", y="Price")
print(summary(forecast_org))
##
## Forecast method: ARIMA(0,1,0)(1,1,0)[12]
##
## Model Information:
## Series: org_ts
## ARIMA(0,1,0)(1,1,0)[12]
##
## Coefficients:
## sar1
## -0.6507
## s.e. 0.1612
##
## sigma^2 = 0.03635: log likelihood = 3.41
## AIC=-2.81 AICc=-2.29 BIC=-0.29
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.00997634 0.1526389 0.09720952 -0.9593879 5.739336 0.3899936
## ACF1
## Training set -0.1943216
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Apr 2018 1.252732 1.00840915 1.497054 0.879072602 1.626391
## May 2018 1.371436 1.02591219 1.716961 0.843002687 1.899870
## Jun 2018 1.113831 0.69065147 1.537010 0.466634001 1.761027
## Jul 2018 1.227520 0.73887478 1.716165 0.480201688 1.974838
## Aug 2018 1.366225 0.81990280 1.912546 0.530697491 2.201752
## Sep 2018 1.412253 0.81378729 2.010718 0.496978748 2.327527
## Oct 2018 1.269042 0.62262564 1.915459 0.280433304 2.257651
## Nov 2018 1.367747 0.67669858 2.058796 0.310879576 2.424615
## Dec 2018 1.372818 0.63985006 2.105785 0.251840414 2.493795
## Jan 2019 1.342677 0.57006076 2.115292 0.161062680 2.524290
## Feb 2019 1.238085 0.42775848 2.048411 -0.001202315 2.477372
## Mar 2019 1.302198 0.45583954 2.148556 0.007804594 2.596591
## Apr 2019 1.235828 0.32753338 2.144123 -0.153288939 2.624946
## May 2019 1.231742 0.26547194 2.198012 -0.246040426 2.709524
## Jun 2019 1.037649 0.01669071 2.058607 -0.523771769 2.599069
## Jul 2019 1.337642 0.26478012 2.410504 -0.303158692 2.978443
## Aug 2019 1.353555 0.23118728 2.475924 -0.362958569 3.070070
## Sep 2019 1.382647 0.21286579 2.552428 -0.406378973 3.171673
## Oct 2019 1.065835 -0.14951098 2.281181 -0.792876251 2.924546
## Nov 2019 1.041748 -0.21751465 2.301011 -0.884128235 2.967625
## Dec 2019 1.004477 -0.29722214 2.306176 -0.986300122 2.995254
## Jan 2020 1.059020 -0.28377550 2.401815 -0.994608289 3.112648
## Feb 2020 1.009472 -0.37319780 2.392142 -1.105139090 3.124084
## Mar 2020 1.005838 -0.41558851 2.427265 -1.168046383 3.179723