Crop Yields Viz

Data source: Tidy Tuesday Global crop yields

Load packages

library(tidyverse)
library(readr)
library(Hmisc)
library(skimr)

Get data

key_crop <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/key_crop_yields.csv')
fertilizer <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/cereal_crop_yield_vs_fertilizer_application.csv')
tractors <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/cereal_yields_vs_tractor_inputs_in_agriculture.csv')
land_use <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/land_use_vs_yield_change_in_cereal_production.csv')
arable_land <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/arable_land_pin.csv')

Part 1 key crop yield

skim(key_crop)

Data summary
Name	key_crop
Number of rows	13075
Number of columns	14
_______________________
Column type frequency:
character	2
numeric	12
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
Entity	0	1.00	4	39	0	249	0
Code	1919	0.85	3	8	0	214	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Year	0	1.00	1990.37	16.73	1961.00	1976.00	1991.00	2005.00	2018.00	▇▆▇▇▇
Wheat (tonnes per hectare)	4974	0.62	2.43	1.69	0.00	1.23	1.99	3.12	10.67	▇▅▂▁▁
Rice (tonnes per hectare)	4604	0.65	3.16	1.85	0.20	1.77	2.74	4.16	10.68	▇▇▃▁▁
Maize (tonnes per hectare)	2301	0.82	3.02	3.13	0.03	1.14	1.83	3.92	36.76	▇▁▁▁▁
Soybeans (tonnes per hectare)	7114	0.46	1.45	0.75	0.00	0.86	1.33	1.90	5.95	▇▇▂▁▁
Potatoes (tonnes per hectare)	3059	0.77	15.40	9.29	0.84	8.64	13.41	20.05	75.30	▇▅▁▁▁
Beans (tonnes per hectare)	5066	0.61	1.09	0.82	0.03	0.59	0.83	1.35	9.18	▇▁▁▁▁
Peas (tonnes per hectare)	6840	0.48	1.48	1.01	0.04	0.72	1.15	1.99	7.16	▇▃▁▁▁
Cassava (tonnes per hectare)	5887	0.55	9.34	5.11	1.00	5.55	8.67	11.99	38.58	▇▇▁▁▁
Barley (tonnes per hectare)	6342	0.51	2.23	1.50	0.09	1.05	1.88	3.02	9.15	▇▆▂▁▁
Cocoa beans (tonnes per hectare)	8466	0.35	0.39	0.28	0.00	0.24	0.36	0.49	3.43	▇▁▁▁▁
Bananas (tonnes per hectare)	4166	0.68	15.20	12.08	0.66	5.94	11.78	20.79	77.59	▇▃▁▁▁

long_crops <- key_crop %>% 
  pivot_longer(cols = 4:last_col(),
               names_to = "crop", 
               values_to = "crop_production") %>% 
  mutate(crop = str_remove_all(crop, " \\(tonnes per hectare\\)")) %>% 
  set_names(nm = names(.) %>% tolower())

colnames(long_crops)

## [1] "entity"          "code"            "year"            "crop"           
## [5] "crop_production"

Load packages for visualization

library(viridis)
library(hrbrthemes)
library(waffle)
library(magrittr)
library(ggridges)
library(ggsci)
library(ggpubr)
library(gridExtra)

Global key crop production throughout the years

year_prod = long_crops %>% group_by(year) %>% tally(crop_production) %>% rename(total_production=n) %>% as.data.frame()
ggplot(year_prod, aes(x=year, y= total_production, group=1)) + geom_line() + theme_light() + labs(y="Tons per hectare", x="Year", title= "Global key crop production throughout the years")

year_crop = long_crops %>% group_by(year,crop) %>% tally(crop_production) %>% rename(total_production=n) %>% as.data.frame()
ggplot(year_crop, aes(x=year, y= total_production, group=crop, fill=crop)) + geom_area() + theme_minimal() + scale_x_continuous(breaks = c(1961,2018)) + theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank()) + facet_wrap(~crop) + scale_fill_simpsons() + theme(legend.position="none", axis.text.x=element_text(angle=45), axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) + labs(y="Production", x="Year", title = "Global key crops production (1961-2018)")

Distribution of yearly production (1961-2018)

ggplot(year_crop, aes(x=total_production, y=fct_rev(crop), fill= crop)) +  geom_density_ridges() +
  theme_light() + 
  theme(legend.position = "none", panel.grid.major.y=element_blank(), panel.grid.major.x=element_blank(), panel.grid.minor.x = element_blank(), panel.border=element_blank()) + scale_fill_simpsons() + labs(y="", x="Tons per hectare", title="Distribution of yearly production (1961-2018)")

Production by crop type

median_values = long_crops %>% filter(!is.na(crop_production)) %>% group_by(crop) %>% summarise(med= median(crop_production, na.rm=TRUE))

ggplot(long_crops, aes(x = fct_rev(crop), y = crop_production)) +
  geom_boxplot(aes(color = crop)) +
  guides(fill = FALSE, color = FALSE) +
  coord_flip() +
  labs(y = "Tons per hectare", x = "", title = "Production by crop type") +
  theme_classic() +
  scale_color_simpsons() +
  scale_fill_simpsons()

Global key crops in 2018

s1 = long_crops %>% filter(year== 2018) %>% group_by(crop) %>% tally(crop_production) %>% mutate(prop= n/sum(n)) %>% rename(fre=n) %>% as.data.frame()
s2 = c('Bananas' = 3066, 'Barley' = 390, 'Beans' = 225, 'Cassava' = 1412, 'Maize' = 1002, 'Peas'= 211, 'Potatoes' = 3883, 'Rice' = 626, 'Soybeans' = 232, 'Wheat' = 487, 'Cocoa beans' = 35)
waffle1= waffle(s2/35, rows=10, size=0.5, colors = c("#FED439FF", "#709AE1FF", "#8A9197FF", "#D2AF81FF","#FD7446FF", "#D5E4A2FF", "#197EC0FF", "#C80813FF", "#46732EFF", "#71D0F5FF", "#370355FF"), title="Global key crops in 2018") 
waffle1

###Fertilizers

colnames(fertilizer)

## [1] "Entity"                                         
## [2] "Code"                                           
## [3] "Year"                                           
## [4] "Cereal yield (tonnes per hectare)"              
## [5] "Nitrogen fertilizer use (kilograms per hectare)"

names(fertilizer)[4] ='cereal'
names(fertilizer)[5] = 'nitrogen'
f2 = fertilizer %>% filter_at(vars(Year,cereal,nitrogen), all_vars(!is.na(.))) %>% mutate(nc = (nitrogen/cereal))

#scatterplot
fv1 = ggscatter(f2,x="cereal",y="nitrogen", add= "reg.line", conf.int=TRUE, cor.coef = TRUE, size = 1, alpha=0.6, add.params = list(color= "#457b9d", fill= "grey"), cor.method ="pearson",color="orange") + labs(x="Cereal yield (t/ha)", y= "Nitrogen use (kg/ha)", title = "Cereal yield and nitrogen fertilizer use") + theme(axis.title = element_text(size = 8), plot.title = element_text(size = 11), axis.text = element_text(size=9))

#nitrogen use by year
fy = f2 %>% group_by(Year) %>% summarise_at(vars(cereal, nitrogen, nc),sum) %>% as.data.frame()
fv2 = ggplot(fy, aes(x=Year, y= nitrogen, group=1)) + geom_line() + theme_light() + labs(y="Nitrogen (kg/ha)", x="Year", title= "Nitrogen fertlizer use") + theme(axis.title = element_text(size = 9), plot.title = element_text(size = 11))

#cereal yield by year
fv3 = ggplot(fy, aes(x=Year, y= cereal, group=1)) + geom_line() + theme_light() + labs(y="Cereal (t/ha)", x="Year", title= "Cereal yield") + theme(axis.title = element_text(size = 9), plot.title = element_text(size = 11))

#nitrogen/cereal
fv4 = ggplot(fy, aes(x=Year, y= nc, group=1)) + geom_line() + theme_light() + labs(y="Nitrogen (kg/ha)", x="Year", title= "Nitrogen fertilizer use per cereal yield (t/ha)") + theme(axis.title = element_text(size = 9), plot.title = element_text(size = 11))

ggarrange(fv3, fv2, fv1, fv4)