Total honey production in United States decreasing every year and in 2006 U.S authorities gave more attention to that problem and asked researchers to analyze honey production datas to figure out problem in state based and forecast possible scenarios, projections related to future condition of honey industry and demand. Generally, main cause of this production decrease is price and it is hard to compute with foreign exporters from China, Argentina, Mexico and Turkey. Also, there are some other factors like climate changes, air polution, peoples migration to big cities from small town and decrease in farmer number, increase in life standards of people… To sum up, we will focus on honey production from 1998 to 2012 by checking variables of supply, price and stocks since expiry date for honey is long lasting there are stocked honeys too and farmers tries to sell it with higher price when supply decrease or demand increase which results in giher selling price for bee producers.
Data’s main source is United States National Agricultural Statistics Service and it collects data in two ways. As first way they are using december agricultural survey results and as second way they are is using country agricultural survey results and they are publishing this data sets online for free.
Keeping the following as the main points to be analyzed:
Which city is the most productive with honey production and which is least?
Whats the change, Trend of honey production from 1998 to 2012?
Which states produced most honey ?
What will be the future of honey production ?
suppressMessages(library(lubridate))
suppressMessages(library(tidyverse))
suppressMessages(library(cowplot))
suppressMessages(library(scales))
suppressMessages(library(DataCombine))
suppressMessages(library(stargazer))
suppressMessages(library(sandwich))
suppressMessages(library(dyn) )
suppressMessages(library(lmtest))
suppressMessages(library(data.table))
suppressMessages(library(tidyr))
suppressMessages(library(readr))
suppressMessages(library(stringr))
suppressMessages(library(forecast))
suppressMessages(library(gridExtra))
dir <- "../Data/"
func <- "../Codes/"
output <- "../Output/"
source(paste0(func, "theme_bg.R"))
source(paste0(func, "da_helper_functions.R"))
#Load Data
suppressMessages(honeyfinaldf <- read_csv(paste(dir, "honeyproduction.csv", sep="")))
honeyfinaldf <- data.frame(honeyfinaldf)
#adding a new variable consumption
honeyfinaldf$consumption <- honeyfinaldf$totalprod-honeyfinaldf$stocks
suppressMessages(glimpse(honeyfinaldf))
## Observations: 626
## Variables: 9
## $ state <chr> "AL", "AZ", "AR", "CA", "CO", "FL", "GA", "HI", "I...
## $ numcol <dbl> 16000, 55000, 53000, 450000, 27000, 230000, 75000,...
## $ yieldpercol <dbl> 71, 60, 65, 83, 72, 98, 56, 118, 50, 71, 92, 78, 4...
## $ totalprod <dbl> 1136000, 3300000, 3445000, 37350000, 1944000, 2254...
## $ stocks <dbl> 159000, 1485000, 1688000, 12326000, 1594000, 45080...
## $ priceperlb <dbl> 0.72, 0.64, 0.59, 0.62, 0.70, 0.64, 0.69, 0.77, 0....
## $ prodvalue <dbl> 818000, 2112000, 2033000, 23157000, 1361000, 14426...
## $ year <dbl> 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 19...
## $ consumption <dbl> 977000, 1815000, 1757000, 25024000, 350000, 180320...
# Save working file
write.csv(honeyfinaldf, paste(output, "honeyproduction.csv", sep=""), row.names = FALSE)
colSums(is.na(honeyfinaldf))
## state numcol yieldpercol totalprod stocks priceperlb
## 0 0 0 0 0 0
## prodvalue year consumption
## 0 0 0
table(honeyfinaldf$year)
##
## 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
## 43 43 43 44 44 44 41 41 41 41 41 40 40 40 40
change <- honeyfinaldf %>%
group_by(year) %>%
summarise(YearTotal=sum(totalprod))
ggplot(data=change, aes(x=year, y=YearTotal/1000000)) +
geom_smooth(method = "lm") +
geom_line(color = "red", size = 2) +
geom_point() +
scale_x_continuous(breaks=seq(1998, 2012, 2)) +
ylab("Production (in Million lb)") +
xlab("Years") +
theme_bg()+
background_grid(major = "xy", minor="y", size.major = 0.2)
productionperstate <- honeyfinaldf %>%
group_by(state) %>%
summarise(sumprod=sum(totalprod)) %>%
arrange(desc(sumprod)) %>%
head(10) %>%
mutate(percentage=round(sumprod/sum(sumprod)*100,2))
ggplot(data=productionperstate, aes(x=reorder(state,sumprod),y=sumprod/1000000))+
geom_col(aes(fill=state),show.legend=F)+
geom_text(aes(label=percentage))+
coord_flip()+
labs(x="States in the USA",y="Total Honey Produced (in Million lb)",title="Total Honey Produced by Each State")
productnperstate <- honeyfinaldf %>%
group_by(state) %>%
summarise(sumprod=sum(totalprod)) %>%
arrange(sumprod) %>%
head(10) %>%
mutate(percentage=round(sumprod/sum(sumprod)*100,2))
ggplot(data=productnperstate, aes(x=reorder(state,sumprod),y=sumprod/1000000))+
geom_col(aes(fill=state),show.legend=F)+
geom_text(aes(label=percentage))+
coord_flip()+
labs(x="States in the USA",y="Total Honey Produced (in Million lb)",title="Total Honey Produced by Each State")
# Top 10 states producing most honey in 1998
productionnper1998 <- honeyfinaldf %>%
filter(year==1998) %>%
group_by(state) %>%
summarise(sumprod=sum(totalprod)) %>%
arrange(desc(sumprod)) %>%
head(10) %>%
mutate(percentage=round(sumprod/sum(sumprod)*100,2))
y1998 <- ggplot(data = productionnper1998, aes(x=reorder(state,sumprod),y=sumprod/1000000, fill = state)) +
geom_bar(stat = "identity") +
guides(fill = FALSE) +
xlab("State") +
ylab("Total Honey Produced (in Million lb)") +
ggtitle("Honey Production in 1998") +
coord_flip()
# Top 10 states producing most honey in 2012
productionnper2012 <- honeyfinaldf %>%
filter(year==2012) %>%
group_by(state) %>%
summarise(sumprod=sum(totalprod)) %>%
arrange(desc(sumprod)) %>%
head(10) %>%
mutate(percentage=round(sumprod/sum(sumprod)*100,2))
y2012 <- ggplot(data = productionnper2012, aes(x=reorder(state,sumprod),y=sumprod/1000000, fill = state)) +
geom_bar(stat = "identity") +
guides(fill = FALSE) +
xlab("State") +
ylab("Total Honey Produced (in Million lb)") +
ggtitle("Honey Production in 2012") +
coord_flip()
grid.arrange(y1998, y2012, ncol=2)
change <- honeyfinaldf %>%
group_by(year) %>%
summarise(YearTotal=sum(totalprod))
ggplot(data=change, aes(x=year, y=YearTotal/1000000)) +
geom_smooth(method = "lm") +
geom_line(color = "red", size = 2) +
geom_point() +
scale_x_continuous(breaks=seq(1998, 2012, 2)) +
ylab("Production (in Million lb)") +
xlab("Years") +
theme_bg()+
background_grid(major = "xy", minor="y", size.major = 0.2)
price <- honeyfinaldf %>%
group_by(year) %>%
summarise(YearAvgPrice=mean(priceperlb))
ggplot(data=price, aes(x=year, y=YearAvgPrice)) +
geom_smooth(method = "lm") +
geom_line(color = "red", size = 2) +
geom_point() +
scale_x_continuous(breaks=seq(1998, 2012, 2)) +
ylab("Prices (Avg)") +
xlab("Years") +
theme_bg()+
background_grid(major = "xy", minor="y", size.major = 0.2)
ProductionPerYear <- honeyfinaldf %>%
group_by(year) %>%
summarize(TotalConsumption=sum(consumption))
ggplot(ProductionPerYear,aes(x=year,y=TotalConsumption/1000000)) +
geom_smooth(method = "lm") +
geom_line(color="red", size=2) +
geom_point() +
scale_x_continuous(breaks=seq(1998, 2012, 2)) +
labs(x="Year", y="Total Consumption (in Million lb)", title="Total Consumption for years(1998-2012)") +
theme_bg()+
background_grid(major = "xy", minor="y", size.major = 0.2)
hny <- honeyfinaldf %>%
group_by(year) %>%
summarise(totalproduction=sum(totalprod), VoP=sum(prodvalue))
ggplot(hny) +
geom_smooth(aes(x=year,y=totalproduction/1000000), method = "lm", color="blue", size=1, se=FALSE) +
geom_smooth(aes(x=year,y=VoP/1000000), method = "lm", color="darkblue", size=1, se=FALSE) +
geom_line(aes(x=year,y=totalproduction/1000000), color="red", size=2) +
geom_line(aes(x=year,y=VoP/1000000), color="green", size=2) +
geom_point(aes(x=year,y=totalproduction/1000000)) +
geom_point(aes(x=year,y=VoP/1000000)) +
scale_x_continuous(breaks=seq(1998, 2012, 2)) +
labs(x="Year", y="", title="Total Honey Production Vs. Value of Production") +
theme_bg()+
background_grid(major = "xy", minor="y", size.major = 0.2)
honey <- honeyfinaldf %>%
group_by(year) %>%
summarise(total_prod=sum(totalprod))
honey_production <- ts(honey$total_prod, start=1998, frequency=1)
plot(honey_production/1000000, ylab="Honey Production (in Million lb)")
ets2013 <- forecast(honey_production, h=1)
plot(ets2013, ylab="Honey Production")
naive2013 <- forecast(naive(honey_production, h=1))
plot(naive2013, ylab="Honey Production")
ses2013 <- forecast(ses(honey_production, h = 1))
plot(ses2013, ylab="Honey Production")
arima2013 <- forecast(auto.arima(honey_production), h=1)
tbats2013 <- forecast(tbats(honey_production), h=1)
nnetar2013 <- forecast(nnetar(honey_production), h=1)
rbindlist(list(
data.table(method = 'naive', as.data.frame(accuracy(naive2013))),
data.table(method = 'ses', as.data.frame(accuracy(ses2013))),
data.table(method = 'ets', as.data.frame(accuracy(ets2013))),
data.table(method = 'arima', as.data.frame(accuracy(arima2013))),
data.table(method = 'tbats', as.data.frame(accuracy(tbats2013))),
data.table(method = 'nnetar', as.data.frame(accuracy(nnetar2013)))))
## method ME RMSE MAE MPE MAPE MASE
## 1: naive -5.615143e+06 18619687 16216857 -3.7520688 9.580399 1.0000000
## 2: ses -5.222306e+06 17688262 15333611 -3.8247471 8.993452 0.9455353
## 3: ets -5.166540e+06 17777015 15371532 -3.8073382 9.011067 0.9478737
## 4: arima -5.226165e+06 17988415 15150368 -3.4952642 8.948372 0.9342358
## 5: tbats 1.939466e+06 11654869 9195955 0.4851627 5.230874 0.5670615
## 6: nnetar 4.302547e+01 13252495 11534284 -0.6100512 6.821363 0.7112528
## ACF1
## 1: -0.48898070
## 2: -0.15065170
## 3: -0.13812952
## 4: -0.50069873
## 5: -0.08123227
## 6: -0.17337599