Data source: Tidy Tuesday Global crop yields
Load packages
library(tidyverse)
library(readr)
library(Hmisc)
library(skimr)
Get data
key_crop <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/key_crop_yields.csv')
fertilizer <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/cereal_crop_yield_vs_fertilizer_application.csv')
tractors <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/cereal_yields_vs_tractor_inputs_in_agriculture.csv')
land_use <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/land_use_vs_yield_change_in_cereal_production.csv')
arable_land <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-01/arable_land_pin.csv')
Part 1 key crop yield
skim(key_crop)
| Name | key_crop |
| Number of rows | 13075 |
| Number of columns | 14 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 12 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| Entity | 0 | 1.00 | 4 | 39 | 0 | 249 | 0 |
| Code | 1919 | 0.85 | 3 | 8 | 0 | 214 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Year | 0 | 1.00 | 1990.37 | 16.73 | 1961.00 | 1976.00 | 1991.00 | 2005.00 | 2018.00 | ▇▆▇▇▇ |
| Wheat (tonnes per hectare) | 4974 | 0.62 | 2.43 | 1.69 | 0.00 | 1.23 | 1.99 | 3.12 | 10.67 | ▇▅▂▁▁ |
| Rice (tonnes per hectare) | 4604 | 0.65 | 3.16 | 1.85 | 0.20 | 1.77 | 2.74 | 4.16 | 10.68 | ▇▇▃▁▁ |
| Maize (tonnes per hectare) | 2301 | 0.82 | 3.02 | 3.13 | 0.03 | 1.14 | 1.83 | 3.92 | 36.76 | ▇▁▁▁▁ |
| Soybeans (tonnes per hectare) | 7114 | 0.46 | 1.45 | 0.75 | 0.00 | 0.86 | 1.33 | 1.90 | 5.95 | ▇▇▂▁▁ |
| Potatoes (tonnes per hectare) | 3059 | 0.77 | 15.40 | 9.29 | 0.84 | 8.64 | 13.41 | 20.05 | 75.30 | ▇▅▁▁▁ |
| Beans (tonnes per hectare) | 5066 | 0.61 | 1.09 | 0.82 | 0.03 | 0.59 | 0.83 | 1.35 | 9.18 | ▇▁▁▁▁ |
| Peas (tonnes per hectare) | 6840 | 0.48 | 1.48 | 1.01 | 0.04 | 0.72 | 1.15 | 1.99 | 7.16 | ▇▃▁▁▁ |
| Cassava (tonnes per hectare) | 5887 | 0.55 | 9.34 | 5.11 | 1.00 | 5.55 | 8.67 | 11.99 | 38.58 | ▇▇▁▁▁ |
| Barley (tonnes per hectare) | 6342 | 0.51 | 2.23 | 1.50 | 0.09 | 1.05 | 1.88 | 3.02 | 9.15 | ▇▆▂▁▁ |
| Cocoa beans (tonnes per hectare) | 8466 | 0.35 | 0.39 | 0.28 | 0.00 | 0.24 | 0.36 | 0.49 | 3.43 | ▇▁▁▁▁ |
| Bananas (tonnes per hectare) | 4166 | 0.68 | 15.20 | 12.08 | 0.66 | 5.94 | 11.78 | 20.79 | 77.59 | ▇▃▁▁▁ |
long_crops <- key_crop %>%
pivot_longer(cols = 4:last_col(),
names_to = "crop",
values_to = "crop_production") %>%
mutate(crop = str_remove_all(crop, " \\(tonnes per hectare\\)")) %>%
set_names(nm = names(.) %>% tolower())
colnames(long_crops)
## [1] "entity" "code" "year" "crop"
## [5] "crop_production"
Load packages for visualization
library(viridis)
library(hrbrthemes)
library(waffle)
library(magrittr)
library(ggridges)
library(ggsci)
library(ggpubr)
library(gridExtra)
Global key crop production throughout the years
year_prod = long_crops %>% group_by(year) %>% tally(crop_production) %>% rename(total_production=n) %>% as.data.frame()
ggplot(year_prod, aes(x=year, y= total_production, group=1)) + geom_line() + theme_light() + labs(y="Tons per hectare", x="Year", title= "Global key crop production throughout the years")
year_crop = long_crops %>% group_by(year,crop) %>% tally(crop_production) %>% rename(total_production=n) %>% as.data.frame()
ggplot(year_crop, aes(x=year, y= total_production, group=crop, fill=crop)) + geom_area() + theme_minimal() + scale_x_continuous(breaks = c(1961,2018)) + theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank()) + facet_wrap(~crop) + scale_fill_simpsons() + theme(legend.position="none", axis.text.x=element_text(angle=45), axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) + labs(y="Production", x="Year", title = "Global key crops production (1961-2018)")
Distribution of yearly production (1961-2018)
ggplot(year_crop, aes(x=total_production, y=fct_rev(crop), fill= crop)) + geom_density_ridges() +
theme_light() +
theme(legend.position = "none", panel.grid.major.y=element_blank(), panel.grid.major.x=element_blank(), panel.grid.minor.x = element_blank(), panel.border=element_blank()) + scale_fill_simpsons() + labs(y="", x="Tons per hectare", title="Distribution of yearly production (1961-2018)")
Production by crop type
median_values = long_crops %>% filter(!is.na(crop_production)) %>% group_by(crop) %>% summarise(med= median(crop_production, na.rm=TRUE))
ggplot(long_crops, aes(x = fct_rev(crop), y = crop_production)) +
geom_boxplot(aes(color = crop)) +
guides(fill = FALSE, color = FALSE) +
coord_flip() +
labs(y = "Tons per hectare", x = "", title = "Production by crop type") +
theme_classic() +
scale_color_simpsons() +
scale_fill_simpsons()
Global key crops in 2018
s1 = long_crops %>% filter(year== 2018) %>% group_by(crop) %>% tally(crop_production) %>% mutate(prop= n/sum(n)) %>% rename(fre=n) %>% as.data.frame()
s2 = c('Bananas' = 3066, 'Barley' = 390, 'Beans' = 225, 'Cassava' = 1412, 'Maize' = 1002, 'Peas'= 211, 'Potatoes' = 3883, 'Rice' = 626, 'Soybeans' = 232, 'Wheat' = 487, 'Cocoa beans' = 35)
waffle1= waffle(s2/35, rows=10, size=0.5, colors = c("#FED439FF", "#709AE1FF", "#8A9197FF", "#D2AF81FF","#FD7446FF", "#D5E4A2FF", "#197EC0FF", "#C80813FF", "#46732EFF", "#71D0F5FF", "#370355FF"), title="Global key crops in 2018")
waffle1
###Fertilizers
colnames(fertilizer)
## [1] "Entity"
## [2] "Code"
## [3] "Year"
## [4] "Cereal yield (tonnes per hectare)"
## [5] "Nitrogen fertilizer use (kilograms per hectare)"
names(fertilizer)[4] ='cereal'
names(fertilizer)[5] = 'nitrogen'
f2 = fertilizer %>% filter_at(vars(Year,cereal,nitrogen), all_vars(!is.na(.))) %>% mutate(nc = (nitrogen/cereal))
#scatterplot
fv1 = ggscatter(f2,x="cereal",y="nitrogen", add= "reg.line", conf.int=TRUE, cor.coef = TRUE, size = 1, alpha=0.6, add.params = list(color= "#457b9d", fill= "grey"), cor.method ="pearson",color="orange") + labs(x="Cereal yield (t/ha)", y= "Nitrogen use (kg/ha)", title = "Cereal yield and nitrogen fertilizer use") + theme(axis.title = element_text(size = 8), plot.title = element_text(size = 11), axis.text = element_text(size=9))
#nitrogen use by year
fy = f2 %>% group_by(Year) %>% summarise_at(vars(cereal, nitrogen, nc),sum) %>% as.data.frame()
fv2 = ggplot(fy, aes(x=Year, y= nitrogen, group=1)) + geom_line() + theme_light() + labs(y="Nitrogen (kg/ha)", x="Year", title= "Nitrogen fertlizer use") + theme(axis.title = element_text(size = 9), plot.title = element_text(size = 11))
#cereal yield by year
fv3 = ggplot(fy, aes(x=Year, y= cereal, group=1)) + geom_line() + theme_light() + labs(y="Cereal (t/ha)", x="Year", title= "Cereal yield") + theme(axis.title = element_text(size = 9), plot.title = element_text(size = 11))
#nitrogen/cereal
fv4 = ggplot(fy, aes(x=Year, y= nc, group=1)) + geom_line() + theme_light() + labs(y="Nitrogen (kg/ha)", x="Year", title= "Nitrogen fertilizer use per cereal yield (t/ha)") + theme(axis.title = element_text(size = 9), plot.title = element_text(size = 11))
ggarrange(fv3, fv2, fv1, fv4)