Data source: Tidy Tuesday Global crop yields

Load packages

library(tidyverse)
library(readr)
library(Hmisc)
library(skimr)

Get data

key_crop <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/key_crop_yields.csv')
fertilizer <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/cereal_crop_yield_vs_fertilizer_application.csv')
tractors <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/cereal_yields_vs_tractor_inputs_in_agriculture.csv')
land_use <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/land_use_vs_yield_change_in_cereal_production.csv')
arable_land <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/arable_land_pin.csv')

Part 1 key crop yield

skim(key_crop)
Data summary
Name key_crop
Number of rows 13075
Number of columns 14
_______________________
Column type frequency:
character 2
numeric 12
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Entity 0 1.00 4 39 0 249 0
Code 1919 0.85 3 8 0 214 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Year 0 1.00 1990.37 16.73 1961.00 1976.00 1991.00 2005.00 2018.00 ▇▆▇▇▇
Wheat (tonnes per hectare) 4974 0.62 2.43 1.69 0.00 1.23 1.99 3.12 10.67 ▇▅▂▁▁
Rice (tonnes per hectare) 4604 0.65 3.16 1.85 0.20 1.77 2.74 4.16 10.68 ▇▇▃▁▁
Maize (tonnes per hectare) 2301 0.82 3.02 3.13 0.03 1.14 1.83 3.92 36.76 ▇▁▁▁▁
Soybeans (tonnes per hectare) 7114 0.46 1.45 0.75 0.00 0.86 1.33 1.90 5.95 ▇▇▂▁▁
Potatoes (tonnes per hectare) 3059 0.77 15.40 9.29 0.84 8.64 13.41 20.05 75.30 ▇▅▁▁▁
Beans (tonnes per hectare) 5066 0.61 1.09 0.82 0.03 0.59 0.83 1.35 9.18 ▇▁▁▁▁
Peas (tonnes per hectare) 6840 0.48 1.48 1.01 0.04 0.72 1.15 1.99 7.16 ▇▃▁▁▁
Cassava (tonnes per hectare) 5887 0.55 9.34 5.11 1.00 5.55 8.67 11.99 38.58 ▇▇▁▁▁
Barley (tonnes per hectare) 6342 0.51 2.23 1.50 0.09 1.05 1.88 3.02 9.15 ▇▆▂▁▁
Cocoa beans (tonnes per hectare) 8466 0.35 0.39 0.28 0.00 0.24 0.36 0.49 3.43 ▇▁▁▁▁
Bananas (tonnes per hectare) 4166 0.68 15.20 12.08 0.66 5.94 11.78 20.79 77.59 ▇▃▁▁▁
long_crops <- key_crop %>% 
  pivot_longer(cols = 4:last_col(),
               names_to = "crop", 
               values_to = "crop_production") %>% 
  mutate(crop = str_remove_all(crop, " \\(tonnes per hectare\\)")) %>% 
  set_names(nm = names(.) %>% tolower())
colnames(long_crops)
## [1] "entity"          "code"            "year"            "crop"           
## [5] "crop_production"

Load packages for visualization

library(viridis)
library(hrbrthemes)
library(waffle)
library(magrittr)
library(ggridges)
library(ggsci)
library(ggpubr)
library(gridExtra)

Global key crop production throughout the years

year_prod = long_crops %>% group_by(year) %>% tally(crop_production) %>% rename(total_production=n) %>% as.data.frame()
ggplot(year_prod, aes(x=year, y= total_production, group=1)) + geom_line() + theme_light() + labs(y="Tons per hectare", x="Year", title= "Global key crop production throughout the years")

year_crop = long_crops %>% group_by(year,crop) %>% tally(crop_production) %>% rename(total_production=n) %>% as.data.frame()
ggplot(year_crop, aes(x=year, y= total_production, group=crop, fill=crop)) + geom_area() + theme_minimal() + scale_x_continuous(breaks = c(1961,2018)) + theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank()) + facet_wrap(~crop) + scale_fill_simpsons() + theme(legend.position="none", axis.text.x=element_text(angle=45), axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) + labs(y="Production", x="Year", title = "Global key crops production (1961-2018)")

Distribution of yearly production (1961-2018)

ggplot(year_crop, aes(x=total_production, y=fct_rev(crop), fill= crop)) +  geom_density_ridges() +
  theme_light() + 
  theme(legend.position = "none", panel.grid.major.y=element_blank(), panel.grid.major.x=element_blank(), panel.grid.minor.x = element_blank(), panel.border=element_blank()) + scale_fill_simpsons() + labs(y="", x="Tons per hectare", title="Distribution of yearly production (1961-2018)")

Production by crop type

median_values = long_crops %>% filter(!is.na(crop_production)) %>% group_by(crop) %>% summarise(med= median(crop_production, na.rm=TRUE))

ggplot(long_crops, aes(x = fct_rev(crop), y = crop_production)) +
  geom_boxplot(aes(color = crop)) +
  guides(fill = FALSE, color = FALSE) +
  coord_flip() +
  labs(y = "Tons per hectare", x = "", title = "Production by crop type") +
  theme_classic() +
  scale_color_simpsons() +
  scale_fill_simpsons()

Global key crops in 2018

s1 = long_crops %>% filter(year== 2018) %>% group_by(crop) %>% tally(crop_production) %>% mutate(prop= n/sum(n)) %>% rename(fre=n) %>% as.data.frame()
s2 = c('Bananas' = 3066, 'Barley' = 390, 'Beans' = 225, 'Cassava' = 1412, 'Maize' = 1002, 'Peas'= 211, 'Potatoes' = 3883, 'Rice' = 626, 'Soybeans' = 232, 'Wheat' = 487, 'Cocoa beans' = 35)
waffle1= waffle(s2/35, rows=10, size=0.5, colors = c("#FED439FF", "#709AE1FF", "#8A9197FF", "#D2AF81FF","#FD7446FF", "#D5E4A2FF", "#197EC0FF", "#C80813FF", "#46732EFF", "#71D0F5FF", "#370355FF"), title="Global key crops in 2018") 
waffle1

###Fertilizers

colnames(fertilizer)
## [1] "Entity"                                         
## [2] "Code"                                           
## [3] "Year"                                           
## [4] "Cereal yield (tonnes per hectare)"              
## [5] "Nitrogen fertilizer use (kilograms per hectare)"
names(fertilizer)[4] ='cereal'
names(fertilizer)[5] = 'nitrogen'
f2 = fertilizer %>% filter_at(vars(Year,cereal,nitrogen), all_vars(!is.na(.))) %>% mutate(nc = (nitrogen/cereal))
#scatterplot
fv1 = ggscatter(f2,x="cereal",y="nitrogen", add= "reg.line", conf.int=TRUE, cor.coef = TRUE, size = 1, alpha=0.6, add.params = list(color= "#457b9d", fill= "grey"), cor.method ="pearson",color="orange") + labs(x="Cereal yield (t/ha)", y= "Nitrogen use (kg/ha)", title = "Cereal yield and nitrogen fertilizer use") + theme(axis.title = element_text(size = 8), plot.title = element_text(size = 11), axis.text = element_text(size=9))

#nitrogen use by year
fy = f2 %>% group_by(Year) %>% summarise_at(vars(cereal, nitrogen, nc),sum) %>% as.data.frame()
fv2 = ggplot(fy, aes(x=Year, y= nitrogen, group=1)) + geom_line() + theme_light() + labs(y="Nitrogen (kg/ha)", x="Year", title= "Nitrogen fertlizer use") + theme(axis.title = element_text(size = 9), plot.title = element_text(size = 11))

#cereal yield by year
fv3 = ggplot(fy, aes(x=Year, y= cereal, group=1)) + geom_line() + theme_light() + labs(y="Cereal (t/ha)", x="Year", title= "Cereal yield") + theme(axis.title = element_text(size = 9), plot.title = element_text(size = 11))

#nitrogen/cereal
fv4 = ggplot(fy, aes(x=Year, y= nc, group=1)) + geom_line() + theme_light() + labs(y="Nitrogen (kg/ha)", x="Year", title= "Nitrogen fertilizer use per cereal yield (t/ha)") + theme(axis.title = element_text(size = 9), plot.title = element_text(size = 11))

ggarrange(fv3, fv2, fv1, fv4)