Mapping crop yield estimations during the period 1969-2022

The database corresponds to the estimations of Sorghum crop yield in Argentina during the period 1969-2022. The database as well as the shape files can be downloaded from public sources. See: https://www.ign.gob.ar/NuestrasActividades/SIG.

Load required libraries

library(ggplot2)
library(sf)
## Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
library(readxl)
library(broom)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sp)
library(tmap)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(mapview)
library(doBy)
## 
## Adjuntando el paquete: 'doBy'
## 
## The following object is masked from 'package:dplyr':
## 
##     order_by
library(prettymapr)
library(patchwork)
library(knitr)
library(ggspatial)
library(leaflet)

Load shape files and data frames containing the variables to be plotted

departamentos_sh <- read_sf("./departamentos/departamento.shp")
head(departamentos_sh)

The shape file contains a column named in1, this column must be present in our data frame too in order to be able to merge both files. We also need to load the data frames that contain the information we want to plot. In this case we are interested in studying the changes in the crop yield of sorghum in Argentina from 1969 to 2022. We load the database and split it by periods of ten years each:

estimaciones<-read_xlsx("estimaciones.xlsx", sheet = 1)

periodo1<-read_xlsx("estimaciones.xlsx", sheet = 2) # 2012-2022
periodo2<-read_xlsx("estimaciones.xlsx", sheet = 3) # 2001-2011
periodo3<-read_xlsx("estimaciones.xlsx", sheet = 4) # 1990-2000
periodo4<-read_xlsx("estimaciones.xlsx", sheet = 5) # 1979-1989
periodo5<-read_xlsx("estimaciones.xlsx", sheet = 6) # 1969-1978

The functions head() and str() can be used to look at the first rows and structure of the data frame in order to see if the column of interest is correctly settled, in this case, both as character. Note that if this column is settled as an integer, the merge won´t work.

head(departamentos_sh) #look at the shape file
str(departamentos_sh) # in1 is a character, OK
## sf [529 × 9] (S3: sf/tbl_df/tbl/data.frame)
##  $ gid     : num [1:529] 552 529 530 531 532 533 534 535 536 537 ...
##  $ objeto  : chr [1:529] "Departamento" "Departamento" "Departamento" "Departamento" ...
##  $ fna     : chr [1:529] "Partido de Adolfo Gonzales Chaves" "Departamento Concordia" "Departamento Federal" "Departamento Gualeguaychú" ...
##  $ gna     : chr [1:529] "Partido" "Departamento" "Departamento" "Departamento" ...
##  $ nam     : chr [1:529] "Adolfo Gonzales Chaves" "Concordia" "Federal" "Gualeguaychú" ...
##  $ in1     : chr [1:529] "06014" "30015" "30035" "30056" ...
##  $ fdc     : chr [1:529] "ARBA - Gerencia de Servicios Catastrales" "ATER - Direc. de Catastro" "ATER - Direc. de Catastro" "ATER - Direc. de Catastro" ...
##  $ sag     : chr [1:529] "IGN" "IGN" "IGN" "IGN" ...
##  $ geometry:sfc_MULTIPOLYGON of length 529; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:1272, 1:2] -60.3 -60.3 -60.3 -60.3 -60.3 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:8] "gid" "objeto" "fna" "gna" ...
head(periodo1) # look at the data frame that contains the variables of interest
str(periodo1) #in1 is a character, OK
## tibble [2,400 × 13] (S3: tbl_df/tbl/data.frame)
##  $ Cultivo       : chr [1:2400] "Sorgo" "Sorgo" "Sorgo" "Sorgo" ...
##  $ Campania      : chr [1:2400] "2012/13" "2013/14" "2014/15" "2015/16" ...
##  $ anio          : num [1:2400] 2013 2014 2015 2016 2017 ...
##  $ provincia     : chr [1:2400] "BUENOS AIRES" "BUENOS AIRES" "BUENOS AIRES" "BUENOS AIRES" ...
##  $ Departamento  : chr [1:2400] "25 DE MAYO" "25 DE MAYO" "25 DE MAYO" "25 DE MAYO" ...
##  $ idProvincia   : num [1:2400] 6 6 6 6 6 6 6 6 6 6 ...
##  $ idDepartamento: num [1:2400] 854 854 854 854 854 854 854 854 854 854 ...
##  $ in1           : chr [1:2400] "06854" "06854" "06854" "06854" ...
##  $ periodo       : chr [1:2400] "periodo1" "periodo1" "periodo1" "periodo1" ...
##  $ SupSembrada   : num [1:2400] 1200 4000 2000 1100 1500 ...
##  $ SupCosechada  : num [1:2400] 1200 4000 2000 1100 1500 556 561 565 627 700 ...
##  $ Produccion    : num [1:2400] 7680 14800 11000 4070 5550 ...
##  $ Rendimiento   : num [1:2400] 6400 3700 5500 3700 3700 ...

Ir order to plot the map we first need to merge the shape file to the data frame containing the variables of interest. For this, we use the inner_join() from the dplyr package (Wickham et al., 2023) function where the shared columns need to be specified. In this case, the columns are in1 and in1. It´s very important the order in which the parameters are entered in the script. The shape file must be first in order to get a final shape file with the data frame columns added.

dfm1<-inner_join(x=departamentos_sh, y=periodo1, by=c("in1"="in1"))
dfm2<-inner_join(x=departamentos_sh, y=periodo2, by=c("in1"="in1"))
dfm3<-inner_join(x=departamentos_sh, y=periodo3, by=c("in1"="in1"))
dfm4<-inner_join(x=departamentos_sh, y=periodo4, by=c("in1"="in1"))
dfm5<-inner_join(x=departamentos_sh, y=periodo5, by=c("in1"="in1"))

We look at the resulting shape file:

head(dfm1)

Plot the maps

Once the files are merged we are ready to plot the maps using the ggplot2 package. In this case we plot the first period, use the dfm1 dataset:

ggp1<-ggplot(data = dfm1)+
  geom_sf(aes(fill=Rendimiento))+
  scale_fill_gradient("Rendimiento\n(kg/ha)", high = "red", low = "white",
                      guide=guide_legend(direction='vertical',
                                         title.position='top',
                                         title.hjust = .5,
                                         label.hjust = .5,
                                         label.position = 'bottom',
                                         keywidth = 1,
                                         keyheight = 1))+
  labs(title="Estimaciones Sorgo (2012-2022)")+
  annotation_north_arrow(location = "tr", height = unit(1, "cm"),
                         width = unit(1, "cm"))+
       #subtitle='Population density',
       #caption=c('Source: Vanina Maguire'))+
  theme_void()+
  theme(title=element_text(face='bold'),legend.position = 'right')

ggp1

Second period, use the dfm2 dataset:

ggp2<-ggplot(data = dfm2)+
  geom_sf(aes(fill=Rendimiento))+
  scale_fill_gradient("Rendimiento\n(kg/ha)", high = "red", low = "white",
                      guide=guide_legend(direction='vertical',
                                         title.position='top',
                                         title.hjust = .5,
                                         label.hjust = .5,
                                         label.position = 'bottom',
                                         keywidth = 1,
                                         keyheight = 1))+
  labs(title="Estimaciones Sorgo (2001-2011)")+
  annotation_north_arrow(location = "tr", height = unit(1, "cm"),
                         width = unit(1, "cm"))+
       #subtitle='Population density',
       #caption=c('Source: Vanina Maguire'))+
  theme_void()+
  theme(title=element_text(face='bold'),legend.position = 'right')

ggp2

Third period, use the dfm3 dataset:

ggp3<-ggplot(data = dfm3)+
  geom_sf(aes(fill=Rendimiento))+
  scale_fill_gradient("Rendimiento\n(kg/ha)", high = "red", low = "white",
                      guide=guide_legend(direction='vertical',
                                         title.position='top',
                                         title.hjust = .5,
                                         label.hjust = .5,
                                         label.position = 'bottom',
                                         keywidth = 1,
                                         keyheight = 1))+
  labs(title="Estimaciones Sorgo (1990-2000)")+
  annotation_north_arrow(location = "tr", height = unit(1, "cm"),
                         width = unit(1, "cm"))+
       #subtitle='Population density',
       #caption=c('Source: Vanina Maguire'))+
  theme_void()+
  theme(title=element_text(face='bold'),legend.position = 'right')

ggp3

Fourth period, use the dfm4 dataset:

ggp4<-ggplot(data = dfm4)+
  geom_sf(aes(fill=Rendimiento))+
  scale_fill_gradient("Rendimiento\n(kg/ha)", high = "red", low = "white",
                      guide=guide_legend(direction='vertical',
                                         title.position='top',
                                         title.hjust = .5,
                                         label.hjust = .5,
                                         label.position = 'bottom',
                                         keywidth = 1,
                                         keyheight = 1))+
  labs(title="Estimaciones Sorgo (1979-1989)")+
  annotation_north_arrow(location = "tr", height = unit(1, "cm"),
                         width = unit(1, "cm"))+
       #subtitle='Population density',
       #caption=c('Source: Vanina Maguire'))+
  theme_void()+
  theme(title=element_text(face='bold'),legend.position = 'right')

ggp4

Fifth period, use the dfm5 dataset:

ggp5<-ggplot(data = dfm5)+
  geom_sf(aes(fill=Rendimiento))+
  scale_fill_gradient("Rendimiento\n(kg/ha)", high = "red", low = "white",
                      guide=guide_legend(direction='vertical',
                                         title.position='top',
                                         title.hjust = .5,
                                         label.hjust = .5,
                                         label.position = 'bottom',
                                         keywidth = 1,
                                         keyheight = 1))+
  labs(title="Estimaciones Sorgo (1969-1978)")+
  annotation_north_arrow(location = "tr", height = unit(1, "cm"),
                         width = unit(1, "cm"))+
       #subtitle='Population density',
       #caption=c('Source: Vanina Maguire'))+
  theme_void()+
  theme(title=element_text(face='bold'),legend.position = 'right')

ggp5

Finally, with the patchwork package we can arrange the plots:

library(patchwork) # for plot arrange
ggp1 + ggp2

ggp3 + ggp4

ggp5

References

Wickham, H., François, R., Henry, L., Müller, K., & Vaughan, D. (2023). dplyr: A Grammar of Data Manipulation. https://CRAN.R-project.org/package=dplyr